diff --git a/src/ASPHERE/pair_gayberne.cpp b/src/ASPHERE/pair_gayberne.cpp
index 289adef7f..b22a4fc70 100755
--- a/src/ASPHERE/pair_gayberne.cpp
+++ b/src/ASPHERE/pair_gayberne.cpp
@@ -1,953 +1,953 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Mike Brown (SNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_gayberne.h"
 #include "math_extra.h"
 #include "atom.h"
 #include "atom_vec_ellipsoid.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "integrate.h"
 #include "citeme.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
 static const char cite_pair_gayberne[] =
   "pair gayberne command:\n\n"
   "@Article{Brown09,\n"
   " author =  {W. M. Brown, M. K. Petersen, S. J. Plimpton, and G. S. Grest},\n"
   " title =   {Liquid crystal nanodroplets in solution},\n"
   " journal = {J.~Chem.~Phys.},\n"
   " year =    2009,\n"
   " volume =  130,\n"
   " pages =   {044901}\n"
   "}\n\n";
 
 /* ---------------------------------------------------------------------- */
 
 PairGayBerne::PairGayBerne(LAMMPS *lmp) : Pair(lmp)
 {
   if (lmp->citeme) lmp->citeme->add(cite_pair_gayberne);
 
   single_enable = 0;
   writedata = 1;
 }
 
 /* ----------------------------------------------------------------------
    free all arrays
 ------------------------------------------------------------------------- */
 
 PairGayBerne::~PairGayBerne()
 {
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
 
     memory->destroy(form);
     memory->destroy(epsilon);
     memory->destroy(sigma);
     memory->destroy(shape1);
     memory->destroy(shape2);
     memory->destroy(well);
     memory->destroy(cut);
     memory->destroy(lj1);
     memory->destroy(lj2);
     memory->destroy(lj3);
     memory->destroy(lj4);
     memory->destroy(offset);
     delete [] lshape;
     delete [] setwell;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairGayBerne::compute(int eflag, int vflag)
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double evdwl,one_eng,rsq,r2inv,r6inv,forcelj,factor_lj;
   double fforce[3],ttor[3],rtor[3],r12[3];
   double a1[3][3],b1[3][3],g1[3][3],a2[3][3],b2[3][3],g2[3][3],temp[3][3];
   int *ilist,*jlist,*numneigh,**firstneigh;
   double *iquat,*jquat;
 
   evdwl = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   AtomVecEllipsoid::Bonus *bonus = avec->bonus;
   int *ellipsoid = atom->ellipsoid;
   double **x = atom->x;
   double **f = atom->f;
   double **tor = atom->torque;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     itype = type[i];
 
     if (form[itype][itype] == ELLIPSE_ELLIPSE) {
       iquat = bonus[ellipsoid[i]].quat;
       MathExtra::quat_to_mat_trans(iquat,a1);
       MathExtra::diag_times3(well[itype],a1,temp);
       MathExtra::transpose_times3(a1,temp,b1);
       MathExtra::diag_times3(shape2[itype],a1,temp);
       MathExtra::transpose_times3(a1,temp,g1);
     }
 
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       j &= NEIGHMASK;
 
       // r12 = center to center vector
 
       r12[0] = x[j][0]-x[i][0];
       r12[1] = x[j][1]-x[i][1];
       r12[2] = x[j][2]-x[i][2];
       rsq = MathExtra::dot3(r12,r12);
       jtype = type[j];
 
       // compute if less than cutoff
 
       if (rsq < cutsq[itype][jtype]) {
 
         switch (form[itype][jtype]) {
         case SPHERE_SPHERE:
           r2inv = 1.0/rsq;
           r6inv = r2inv*r2inv*r2inv;
           forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
           forcelj *= -r2inv;
           if (eflag) one_eng =
                        r6inv*(r6inv*lj3[itype][jtype]-lj4[itype][jtype]) -
                        offset[itype][jtype];
           fforce[0] = r12[0]*forcelj;
           fforce[1] = r12[1]*forcelj;
           fforce[2] = r12[2]*forcelj;
           ttor[0] = ttor[1] = ttor[2] = 0.0;
           rtor[0] = rtor[1] = rtor[2] = 0.0;
           break;
 
         case SPHERE_ELLIPSE:
           jquat = bonus[ellipsoid[j]].quat;
           MathExtra::quat_to_mat_trans(jquat,a2);
           MathExtra::diag_times3(well[jtype],a2,temp);
           MathExtra::transpose_times3(a2,temp,b2);
           MathExtra::diag_times3(shape2[jtype],a2,temp);
           MathExtra::transpose_times3(a2,temp,g2);
           one_eng = gayberne_lj(j,i,a2,b2,g2,r12,rsq,fforce,rtor);
           ttor[0] = ttor[1] = ttor[2] = 0.0;
           break;
 
         case ELLIPSE_SPHERE:
           one_eng = gayberne_lj(i,j,a1,b1,g1,r12,rsq,fforce,ttor);
           rtor[0] = rtor[1] = rtor[2] = 0.0;
           break;
 
         default:
           jquat = bonus[ellipsoid[j]].quat;
           MathExtra::quat_to_mat_trans(jquat,a2);
           MathExtra::diag_times3(well[jtype],a2,temp);
           MathExtra::transpose_times3(a2,temp,b2);
           MathExtra::diag_times3(shape2[jtype],a2,temp);
           MathExtra::transpose_times3(a2,temp,g2);
           one_eng = gayberne_analytic(i,j,a1,a2,b1,b2,g1,g2,r12,rsq,
                                       fforce,ttor,rtor);
           break;
         }
 
         fforce[0] *= factor_lj;
         fforce[1] *= factor_lj;
         fforce[2] *= factor_lj;
         ttor[0] *= factor_lj;
         ttor[1] *= factor_lj;
         ttor[2] *= factor_lj;
 
         f[i][0] += fforce[0];
         f[i][1] += fforce[1];
         f[i][2] += fforce[2];
         tor[i][0] += ttor[0];
         tor[i][1] += ttor[1];
         tor[i][2] += ttor[2];
 
         if (newton_pair || j < nlocal) {
           rtor[0] *= factor_lj;
           rtor[1] *= factor_lj;
           rtor[2] *= factor_lj;
           f[j][0] -= fforce[0];
           f[j][1] -= fforce[1];
           f[j][2] -= fforce[2];
           tor[j][0] += rtor[0];
           tor[j][1] += rtor[1];
           tor[j][2] += rtor[2];
         }
 
         if (eflag) evdwl = factor_lj*one_eng;
 
         if (evflag) ev_tally_xyz(i,j,nlocal,newton_pair,
                                  evdwl,0.0,fforce[0],fforce[1],fforce[2],
                                  -r12[0],-r12[1],-r12[2]);
       }
     }
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairGayBerne::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
   memory->create(form,n+1,n+1,"pair:form");
   memory->create(epsilon,n+1,n+1,"pair:epsilon");
   memory->create(sigma,n+1,n+1,"pair:sigma");
   memory->create(shape1,n+1,3,"pair:shape1");
   memory->create(shape2,n+1,3,"pair:shape2");
   memory->create(well,n+1,3,"pair:well");
   memory->create(cut,n+1,n+1,"pair:cut");
   memory->create(lj1,n+1,n+1,"pair:lj1");
   memory->create(lj2,n+1,n+1,"pair:lj2");
   memory->create(lj3,n+1,n+1,"pair:lj3");
   memory->create(lj4,n+1,n+1,"pair:lj4");
   memory->create(offset,n+1,n+1,"pair:offset");
 
   lshape = new double[n+1];
   setwell = new int[n+1];
   for (int i = 1; i <= n; i++) setwell[i] = 0;
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairGayBerne::settings(int narg, char **arg)
 {
   if (narg != 4) error->all(FLERR,"Illegal pair_style command");
 
   gamma = force->numeric(FLERR,arg[0]);
   upsilon = force->numeric(FLERR,arg[1])/2.0;
   mu = force->numeric(FLERR,arg[2]);
   cut_global = force->numeric(FLERR,arg[3]);
 
   // reset cutoffs that have been explicitly set
 
   if (allocated) {
     int i,j;
     for (i = 1; i <= atom->ntypes; i++)
       for (j = i+1; j <= atom->ntypes; j++)
         if (setflag[i][j]) cut[i][j] = cut_global;
   }
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairGayBerne::coeff(int narg, char **arg)
 {
   if (narg < 10 || narg > 11)
     error->all(FLERR,"Incorrect args for pair coefficients");
   if (!allocated) allocate();
 
   int ilo,ihi,jlo,jhi;
   force->bounds(arg[0],atom->ntypes,ilo,ihi);
   force->bounds(arg[1],atom->ntypes,jlo,jhi);
 
   double epsilon_one = force->numeric(FLERR,arg[2]);
   double sigma_one = force->numeric(FLERR,arg[3]);
   double eia_one = force->numeric(FLERR,arg[4]);
   double eib_one = force->numeric(FLERR,arg[5]);
   double eic_one = force->numeric(FLERR,arg[6]);
   double eja_one = force->numeric(FLERR,arg[7]);
   double ejb_one = force->numeric(FLERR,arg[8]);
   double ejc_one = force->numeric(FLERR,arg[9]);
 
   double cut_one = cut_global;
   if (narg == 11) cut_one = force->numeric(FLERR,arg[10]);
 
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
     for (int j = MAX(jlo,i); j <= jhi; j++) {
       epsilon[i][j] = epsilon_one;
       sigma[i][j] = sigma_one;
       cut[i][j] = cut_one;
       if (eia_one != 0.0 || eib_one != 0.0 || eic_one != 0.0) {
         well[i][0] = pow(eia_one,-1.0/mu);
         well[i][1] = pow(eib_one,-1.0/mu);
         well[i][2] = pow(eic_one,-1.0/mu);
         if (eia_one == eib_one && eib_one == eic_one) setwell[i] = 2;
         else setwell[i] = 1;
       }
       if (eja_one != 0.0 || ejb_one != 0.0 || ejc_one != 0.0) {
         well[j][0] = pow(eja_one,-1.0/mu);
         well[j][1] = pow(ejb_one,-1.0/mu);
         well[j][2] = pow(ejc_one,-1.0/mu);
         if (eja_one == ejb_one && ejb_one == ejc_one) setwell[j] = 2;
         else setwell[j] = 1;
       }
       setflag[i][j] = 1;
       count++;
     }
   }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairGayBerne::init_style()
 {
   avec = (AtomVecEllipsoid *) atom->style_match("ellipsoid");
   if (!avec) error->all(FLERR,"Pair gayberne requires atom style ellipsoid");
 
-  neighbor->request(this);
+  neighbor->request(this,instance_me);
 
   // per-type shape precalculations
   // require that atom shapes are identical within each type
   // if shape = 0 for point particle, set shape = 1 as required by Gay-Berne
 
   for (int i = 1; i <= atom->ntypes; i++) {
     if (!atom->shape_consistency(i,shape1[i][0],shape1[i][1],shape1[i][2]))
       error->all(FLERR,
                  "Pair gayberne requires atoms with same type have same shape");
     if (shape1[i][0] == 0.0)
       shape1[i][0] = shape1[i][1] = shape1[i][2] = 1.0;
     shape2[i][0] = shape1[i][0]*shape1[i][0];
     shape2[i][1] = shape1[i][1]*shape1[i][1];
     shape2[i][2] = shape1[i][2]*shape1[i][2];
     lshape[i] = (shape1[i][0]*shape1[i][1]+shape1[i][2]*shape1[i][2]) *
       sqrt(shape1[i][0]*shape1[i][1]);
   }
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairGayBerne::init_one(int i, int j)
 {
   if (setwell[i] == 0 || setwell[j] == 0)
     error->all(FLERR,"Pair gayberne epsilon a,b,c coeffs are not all set");
 
   if (setflag[i][j] == 0) {
     epsilon[i][j] = mix_energy(epsilon[i][i],epsilon[j][j],
                                sigma[i][i],sigma[j][j]);
     sigma[i][j] = mix_distance(sigma[i][i],sigma[j][j]);
     cut[i][j] = mix_distance(cut[i][i],cut[j][j]);
   }
 
   lj1[i][j] = 48.0 * epsilon[i][j] * pow(sigma[i][j],12.0);
   lj2[i][j] = 24.0 * epsilon[i][j] * pow(sigma[i][j],6.0);
   lj3[i][j] = 4.0 * epsilon[i][j] * pow(sigma[i][j],12.0);
   lj4[i][j] = 4.0 * epsilon[i][j] * pow(sigma[i][j],6.0);
 
   if (offset_flag) {
     double ratio = sigma[i][j] / cut[i][j];
     offset[i][j] = 4.0 * epsilon[i][j] * (pow(ratio,12.0) - pow(ratio,6.0));
   } else offset[i][j] = 0.0;
 
   int ishape = 0;
   if (shape1[i][0] != shape1[i][1] ||
       shape1[i][0] != shape1[i][2] ||
       shape1[i][1] != shape1[i][2]) ishape = 1;
   if (setwell[i] == 1) ishape = 1;
   int jshape = 0;
   if (shape1[j][0] != shape1[j][1] ||
       shape1[j][0] != shape1[j][2] ||
       shape1[j][1] != shape1[j][2]) jshape = 1;
   if (setwell[j] == 1) jshape = 1;
 
   if (ishape == 0 && jshape == 0)
     form[i][i] = form[j][j] = form[i][j] = form[j][i] = SPHERE_SPHERE;
   else if (ishape == 0) {
     form[i][i] = SPHERE_SPHERE; form[j][j] = ELLIPSE_ELLIPSE;
     form[i][j] = SPHERE_ELLIPSE; form[j][i] = ELLIPSE_SPHERE;
   } else if (jshape == 0) {
     form[j][j] = SPHERE_SPHERE; form[i][i] = ELLIPSE_ELLIPSE;
     form[j][i] = SPHERE_ELLIPSE; form[i][j] = ELLIPSE_SPHERE;
   } else
     form[i][i] = form[j][j] = form[i][j] = form[j][i] = ELLIPSE_ELLIPSE;
 
   epsilon[j][i] = epsilon[i][j];
   sigma[j][i] = sigma[i][j];
   lj1[j][i] = lj1[i][j];
   lj2[j][i] = lj2[i][j];
   lj3[j][i] = lj3[i][j];
   lj4[j][i] = lj4[i][j];
   offset[j][i] = offset[i][j];
 
   return cut[i][j];
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairGayBerne::write_restart(FILE *fp)
 {
   write_restart_settings(fp);
 
   int i,j;
   for (i = 1; i <= atom->ntypes; i++) {
     fwrite(&setwell[i],sizeof(int),1,fp);
     if (setwell[i]) fwrite(&well[i][0],sizeof(double),3,fp);
     for (j = i; j <= atom->ntypes; j++) {
       fwrite(&setflag[i][j],sizeof(int),1,fp);
       if (setflag[i][j]) {
         fwrite(&epsilon[i][j],sizeof(double),1,fp);
         fwrite(&sigma[i][j],sizeof(double),1,fp);
         fwrite(&cut[i][j],sizeof(double),1,fp);
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairGayBerne::read_restart(FILE *fp)
 {
   read_restart_settings(fp);
   allocate();
 
   int i,j;
   int me = comm->me;
   for (i = 1; i <= atom->ntypes; i++) {
     if (me == 0) fread(&setwell[i],sizeof(int),1,fp);
     MPI_Bcast(&setwell[i],1,MPI_INT,0,world);
     if (setwell[i]) {
       if (me == 0) fread(&well[i][0],sizeof(double),3,fp);
       MPI_Bcast(&well[i][0],3,MPI_DOUBLE,0,world);
     }
     for (j = i; j <= atom->ntypes; j++) {
       if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
       MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
       if (setflag[i][j]) {
         if (me == 0) {
           fread(&epsilon[i][j],sizeof(double),1,fp);
           fread(&sigma[i][j],sizeof(double),1,fp);
           fread(&cut[i][j],sizeof(double),1,fp);
         }
         MPI_Bcast(&epsilon[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&sigma[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&cut[i][j],1,MPI_DOUBLE,0,world);
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairGayBerne::write_restart_settings(FILE *fp)
 {
   fwrite(&gamma,sizeof(double),1,fp);
   fwrite(&upsilon,sizeof(double),1,fp);
   fwrite(&mu,sizeof(double),1,fp);
   fwrite(&cut_global,sizeof(double),1,fp);
   fwrite(&offset_flag,sizeof(int),1,fp);
   fwrite(&mix_flag,sizeof(int),1,fp);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairGayBerne::read_restart_settings(FILE *fp)
 {
   int me = comm->me;
   if (me == 0) {
     fread(&gamma,sizeof(double),1,fp);
     fread(&upsilon,sizeof(double),1,fp);
     fread(&mu,sizeof(double),1,fp);
     fread(&cut_global,sizeof(double),1,fp);
     fread(&offset_flag,sizeof(int),1,fp);
     fread(&mix_flag,sizeof(int),1,fp);
   }
   MPI_Bcast(&gamma,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&upsilon,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&mu,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&cut_global,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&offset_flag,1,MPI_INT,0,world);
   MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes to data file
 ------------------------------------------------------------------------- */
 
 void PairGayBerne::write_data(FILE *fp)
 {
   for (int i = 1; i <= atom->ntypes; i++)
     fprintf(fp,"%d %g %g %g %g %g %g %g %g\n",i,
             epsilon[i][i],sigma[i][i],
             pow(well[i][0],-mu),pow(well[i][1],-mu),pow(well[i][2],-mu),
             pow(well[i][0],-mu),pow(well[i][1],-mu),pow(well[i][2],-mu));
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes all pairs to data file
 ------------------------------------------------------------------------- */
 
 void PairGayBerne::write_data_all(FILE *fp)
 {
   for (int i = 1; i <= atom->ntypes; i++)
     for (int j = i; j <= atom->ntypes; j++)
       fprintf(fp,"%d %d %g %g %g %g %g %g %g %g %g\n",i,j,
               epsilon[i][i],sigma[i][i],
               pow(well[i][0],-mu),pow(well[i][1],-mu),pow(well[i][2],-mu),
               pow(well[j][0],-mu),pow(well[j][1],-mu),pow(well[j][2],-mu),
               cut[i][j]);
 }
 
 /* ----------------------------------------------------------------------
    compute analytic energy, force (fforce), and torque (ttor & rtor)
    based on rotation matrices a and precomputed matrices b and g
    if newton is off, rtor is not calculated for ghost atoms
 ------------------------------------------------------------------------- */
 
 double PairGayBerne::gayberne_analytic(const int i,const int j,double a1[3][3],
                                        double a2[3][3], double b1[3][3],
                                        double b2[3][3], double g1[3][3],
                                        double g2[3][3], double *r12,
                                        const double rsq, double *fforce,
                                        double *ttor, double *rtor)
 {
   double tempv[3], tempv2[3];
   double temp[3][3];
   double temp1,temp2,temp3;
 
   int *type = atom->type;
   int newton_pair = force->newton_pair;
   int nlocal = atom->nlocal;
 
   double r12hat[3];
   MathExtra::normalize3(r12,r12hat);
   double r = sqrt(rsq);
 
   // compute distance of closest approach
 
   double g12[3][3];
   MathExtra::plus3(g1,g2,g12);
   double kappa[3];
   int ierror = MathExtra::mldivide3(g12,r12,kappa);
   if (ierror) error->all(FLERR,"Bad matrix inversion in mldivide3");
 
   // tempv = G12^-1*r12hat
 
   tempv[0] = kappa[0]/r;
   tempv[1] = kappa[1]/r;
   tempv[2] = kappa[2]/r;
   double sigma12 = MathExtra::dot3(r12hat,tempv);
   sigma12 = pow(0.5*sigma12,-0.5);
   double h12 = r-sigma12;
 
   // energy
   // compute u_r
 
   double varrho = sigma[type[i]][type[j]]/(h12+gamma*sigma[type[i]][type[j]]);
   double varrho6 = pow(varrho,6.0);
   double varrho12 = varrho6*varrho6;
   double u_r = 4.0*epsilon[type[i]][type[j]]*(varrho12-varrho6);
 
   // compute eta_12
 
   double eta = 2.0*lshape[type[i]]*lshape[type[j]];
   double det_g12 = MathExtra::det3(g12);
   eta = pow(eta/det_g12,upsilon);
 
   // compute chi_12
 
   double b12[3][3];
   double iota[3];
   MathExtra::plus3(b1,b2,b12);
   ierror = MathExtra::mldivide3(b12,r12,iota);
   if (ierror) error->all(FLERR,"Bad matrix inversion in mldivide3");
 
   // tempv = G12^-1*r12hat
 
   tempv[0] = iota[0]/r;
   tempv[1] = iota[1]/r;
   tempv[2] = iota[2]/r;
   double chi = MathExtra::dot3(r12hat,tempv);
   chi = pow(chi*2.0,mu);
 
   // force
   // compute dUr/dr
 
   temp1 = (2.0*varrho12*varrho-varrho6*varrho)/sigma[type[i]][type[j]];
   temp1 = temp1*24.0*epsilon[type[i]][type[j]];
   double u_slj = temp1*pow(sigma12,3.0)/2.0;
   double dUr[3];
   temp2 = MathExtra::dot3(kappa,r12hat);
   double uslj_rsq = u_slj/rsq;
   dUr[0] = temp1*r12hat[0]+uslj_rsq*(kappa[0]-temp2*r12hat[0]);
   dUr[1] = temp1*r12hat[1]+uslj_rsq*(kappa[1]-temp2*r12hat[1]);
   dUr[2] = temp1*r12hat[2]+uslj_rsq*(kappa[2]-temp2*r12hat[2]);
 
   // compute dChi_12/dr
 
   double dchi[3];
   temp1 = MathExtra::dot3(iota,r12hat);
   temp2 = -4.0/rsq*mu*pow(chi,(mu-1.0)/mu);
   dchi[0] = temp2*(iota[0]-temp1*r12hat[0]);
   dchi[1] = temp2*(iota[1]-temp1*r12hat[1]);
   dchi[2] = temp2*(iota[2]-temp1*r12hat[2]);
 
   temp1 = -eta*u_r;
   temp2 = eta*chi;
   fforce[0] = temp1*dchi[0]-temp2*dUr[0];
   fforce[1] = temp1*dchi[1]-temp2*dUr[1];
   fforce[2] = temp1*dchi[2]-temp2*dUr[2];
 
   // torque for particle 1 and 2
   // compute dUr
 
   tempv[0] = -uslj_rsq*kappa[0];
   tempv[1] = -uslj_rsq*kappa[1];
   tempv[2] = -uslj_rsq*kappa[2];
   MathExtra::vecmat(kappa,g1,tempv2);
   MathExtra::cross3(tempv,tempv2,dUr);
   double dUr2[3];
 
   if (newton_pair || j < nlocal) {
     MathExtra::vecmat(kappa,g2,tempv2);
     MathExtra::cross3(tempv,tempv2,dUr2);
   }
 
   // compute d_chi
 
   MathExtra::vecmat(iota,b1,tempv);
   MathExtra::cross3(tempv,iota,dchi);
   temp1 = -4.0/rsq;
   dchi[0] *= temp1;
   dchi[1] *= temp1;
   dchi[2] *= temp1;
   double dchi2[3];
 
   if (newton_pair || j < nlocal) {
     MathExtra::vecmat(iota,b2,tempv);
     MathExtra::cross3(tempv,iota,dchi2);
     dchi2[0] *= temp1;
     dchi2[1] *= temp1;
     dchi2[2] *= temp1;
   }
 
   // compute d_eta
 
   double deta[3];
   deta[0] = deta[1] = deta[2] = 0.0;
   compute_eta_torque(g12,a1,shape2[type[i]],temp);
   temp1 = -eta*upsilon;
   for (int m = 0; m < 3; m++) {
     for (int y = 0; y < 3; y++) tempv[y] = temp1*temp[m][y];
     MathExtra::cross3(a1[m],tempv,tempv2);
     deta[0] += tempv2[0];
     deta[1] += tempv2[1];
     deta[2] += tempv2[2];
   }
 
   // compute d_eta for particle 2
 
   double deta2[3];
   if (newton_pair || j < nlocal) {
     deta2[0] = deta2[1] = deta2[2] = 0.0;
     compute_eta_torque(g12,a2,shape2[type[j]],temp);
     for (int m = 0; m < 3; m++) {
       for (int y = 0; y < 3; y++) tempv[y] = temp1*temp[m][y];
       MathExtra::cross3(a2[m],tempv,tempv2);
       deta2[0] += tempv2[0];
       deta2[1] += tempv2[1];
       deta2[2] += tempv2[2];
     }
   }
 
   // torque
 
   temp1 = u_r*eta;
   temp2 = u_r*chi;
   temp3 = chi*eta;
 
   ttor[0] = (temp1*dchi[0]+temp2*deta[0]+temp3*dUr[0]) * -1.0;
   ttor[1] = (temp1*dchi[1]+temp2*deta[1]+temp3*dUr[1]) * -1.0;
   ttor[2] = (temp1*dchi[2]+temp2*deta[2]+temp3*dUr[2]) * -1.0;
 
   if (newton_pair || j < nlocal) {
     rtor[0] = (temp1*dchi2[0]+temp2*deta2[0]+temp3*dUr2[0]) * -1.0;
     rtor[1] = (temp1*dchi2[1]+temp2*deta2[1]+temp3*dUr2[1]) * -1.0;
     rtor[2] = (temp1*dchi2[2]+temp2*deta2[2]+temp3*dUr2[2]) * -1.0;
   }
 
   return temp1*chi;
 }
 
 /* ----------------------------------------------------------------------
    compute analytic energy, force (fforce), and torque (ttor)
    between ellipsoid and lj particle
 ------------------------------------------------------------------------- */
 
 double PairGayBerne::gayberne_lj(const int i,const int j,double a1[3][3],
                                  double b1[3][3],double g1[3][3],
                                  double *r12,const double rsq,double *fforce,
                                  double *ttor)
 {
   double tempv[3], tempv2[3];
   double temp[3][3];
   double temp1,temp2,temp3;
 
   int *type = atom->type;
 
   double r12hat[3];
   MathExtra::normalize3(r12,r12hat);
   double r = sqrt(rsq);
 
   // compute distance of closest approach
 
   double g12[3][3];
   g12[0][0] = g1[0][0]+shape2[type[j]][0];
   g12[1][1] = g1[1][1]+shape2[type[j]][0];
   g12[2][2] = g1[2][2]+shape2[type[j]][0];
   g12[0][1] = g1[0][1]; g12[1][0] = g1[1][0];
   g12[0][2] = g1[0][2]; g12[2][0] = g1[2][0];
   g12[1][2] = g1[1][2]; g12[2][1] = g1[2][1];
   double kappa[3];
   int ierror = MathExtra::mldivide3(g12,r12,kappa);
   if (ierror) error->all(FLERR,"Bad matrix inversion in mldivide3");
 
   // tempv = G12^-1*r12hat
 
   tempv[0] = kappa[0]/r;
   tempv[1] = kappa[1]/r;
   tempv[2] = kappa[2]/r;
   double sigma12 = MathExtra::dot3(r12hat,tempv);
   sigma12 = pow(0.5*sigma12,-0.5);
   double h12 = r-sigma12;
 
   // energy
   // compute u_r
 
   double varrho = sigma[type[i]][type[j]]/(h12+gamma*sigma[type[i]][type[j]]);
   double varrho6 = pow(varrho,6.0);
   double varrho12 = varrho6*varrho6;
   double u_r = 4.0*epsilon[type[i]][type[j]]*(varrho12-varrho6);
 
   // compute eta_12
 
   double eta = 2.0*lshape[type[i]]*lshape[type[j]];
   double det_g12 = MathExtra::det3(g12);
   eta = pow(eta/det_g12,upsilon);
 
   // compute chi_12
 
   double b12[3][3];
   double iota[3];
   b12[0][0] = b1[0][0] + well[type[j]][0];
   b12[1][1] = b1[1][1] + well[type[j]][0];
   b12[2][2] = b1[2][2] + well[type[j]][0];
   b12[0][1] = b1[0][1]; b12[1][0] = b1[1][0];
   b12[0][2] = b1[0][2]; b12[2][0] = b1[2][0];
   b12[1][2] = b1[1][2]; b12[2][1] = b1[2][1];
   ierror = MathExtra::mldivide3(b12,r12,iota);
   if (ierror) error->all(FLERR,"Bad matrix inversion in mldivide3");
 
   // tempv = G12^-1*r12hat
 
   tempv[0] = iota[0]/r;
   tempv[1] = iota[1]/r;
   tempv[2] = iota[2]/r;
   double chi = MathExtra::dot3(r12hat,tempv);
   chi = pow(chi*2.0,mu);
 
   // force
   // compute dUr/dr
 
   temp1 = (2.0*varrho12*varrho-varrho6*varrho)/sigma[type[i]][type[j]];
   temp1 = temp1*24.0*epsilon[type[i]][type[j]];
   double u_slj = temp1*pow(sigma12,3.0)/2.0;
   double dUr[3];
   temp2 = MathExtra::dot3(kappa,r12hat);
   double uslj_rsq = u_slj/rsq;
   dUr[0] = temp1*r12hat[0]+uslj_rsq*(kappa[0]-temp2*r12hat[0]);
   dUr[1] = temp1*r12hat[1]+uslj_rsq*(kappa[1]-temp2*r12hat[1]);
   dUr[2] = temp1*r12hat[2]+uslj_rsq*(kappa[2]-temp2*r12hat[2]);
 
   // compute dChi_12/dr
 
   double dchi[3];
   temp1 = MathExtra::dot3(iota,r12hat);
   temp2 = -4.0/rsq*mu*pow(chi,(mu-1.0)/mu);
   dchi[0] = temp2*(iota[0]-temp1*r12hat[0]);
   dchi[1] = temp2*(iota[1]-temp1*r12hat[1]);
   dchi[2] = temp2*(iota[2]-temp1*r12hat[2]);
 
   temp1 = -eta*u_r;
   temp2 = eta*chi;
   fforce[0] = temp1*dchi[0]-temp2*dUr[0];
   fforce[1] = temp1*dchi[1]-temp2*dUr[1];
   fforce[2] = temp1*dchi[2]-temp2*dUr[2];
 
   // torque for particle 1 and 2
   // compute dUr
 
   tempv[0] = -uslj_rsq*kappa[0];
   tempv[1] = -uslj_rsq*kappa[1];
   tempv[2] = -uslj_rsq*kappa[2];
   MathExtra::vecmat(kappa,g1,tempv2);
   MathExtra::cross3(tempv,tempv2,dUr);
 
   // compute d_chi
 
   MathExtra::vecmat(iota,b1,tempv);
   MathExtra::cross3(tempv,iota,dchi);
   temp1 = -4.0/rsq;
   dchi[0] *= temp1;
   dchi[1] *= temp1;
   dchi[2] *= temp1;
 
   // compute d_eta
 
   double deta[3];
   deta[0] = deta[1] = deta[2] = 0.0;
   compute_eta_torque(g12,a1,shape2[type[i]],temp);
   temp1 = -eta*upsilon;
   for (int m = 0; m < 3; m++) {
     for (int y = 0; y < 3; y++) tempv[y] = temp1*temp[m][y];
     MathExtra::cross3(a1[m],tempv,tempv2);
     deta[0] += tempv2[0];
     deta[1] += tempv2[1];
     deta[2] += tempv2[2];
   }
 
   // torque
 
   temp1 = u_r*eta;
   temp2 = u_r*chi;
   temp3 = chi*eta;
 
   ttor[0] = (temp1*dchi[0]+temp2*deta[0]+temp3*dUr[0]) * -1.0;
   ttor[1] = (temp1*dchi[1]+temp2*deta[1]+temp3*dUr[1]) * -1.0;
   ttor[2] = (temp1*dchi[2]+temp2*deta[2]+temp3*dUr[2]) * -1.0;
 
   return temp1*chi;
 }
 
 /* ----------------------------------------------------------------------
    torque contribution from eta
    computes trace in the last doc equation for the torque derivative
    code comes from symbolic solver dump
    m is g12, m2 is a_i, s is the shape for the particle
 ------------------------------------------------------------------------- */
 
 void PairGayBerne::compute_eta_torque(double m[3][3], double m2[3][3],
                                       double *s, double ans[3][3])
 {
   double den = m[1][0]*m[0][2]*m[2][1]-m[0][0]*m[1][2]*m[2][1]-
     m[0][2]*m[2][0]*m[1][1]+m[0][1]*m[2][0]*m[1][2]-
     m[1][0]*m[0][1]*m[2][2]+m[0][0]*m[1][1]*m[2][2];
 
   ans[0][0] = s[0]*(m[1][2]*m[0][1]*m2[0][2]+2.0*m[1][1]*m[2][2]*m2[0][0]-
                     m[1][1]*m2[0][2]*m[0][2]-2.0*m[1][2]*m2[0][0]*m[2][1]+
                     m2[0][1]*m[0][2]*m[2][1]-m2[0][1]*m[0][1]*m[2][2]-
                     m[1][0]*m[2][2]*m2[0][1]+m[2][0]*m[1][2]*m2[0][1]+
                     m[1][0]*m2[0][2]*m[2][1]-m2[0][2]*m[2][0]*m[1][1])/den;
 
   ans[0][1] = s[0]*(m[0][2]*m2[0][0]*m[2][1]-m[2][2]*m2[0][0]*m[0][1]+
                     2.0*m[0][0]*m[2][2]*m2[0][1]-m[0][0]*m2[0][2]*m[1][2]-
                     2.0*m[2][0]*m[0][2]*m2[0][1]+m2[0][2]*m[1][0]*m[0][2]-
                     m[2][2]*m[1][0]*m2[0][0]+m[2][0]*m2[0][0]*m[1][2]+
                     m[2][0]*m2[0][2]*m[0][1]-m2[0][2]*m[0][0]*m[2][1])/den;
 
   ans[0][2] = s[0]*(m[0][1]*m[1][2]*m2[0][0]-m[0][2]*m2[0][0]*m[1][1]-
                     m[0][0]*m[1][2]*m2[0][1]+m[1][0]*m[0][2]*m2[0][1]-
                     m2[0][1]*m[0][0]*m[2][1]-m[2][0]*m[1][1]*m2[0][0]+
                     2.0*m[1][1]*m[0][0]*m2[0][2]-2.0*m[1][0]*m2[0][2]*m[0][1]+
                     m[1][0]*m[2][1]*m2[0][0]+m[2][0]*m2[0][1]*m[0][1])/den;
 
   ans[1][0] = s[1]*(-m[1][1]*m2[1][2]*m[0][2]+2.0*m[1][1]*m[2][2]*m2[1][0]+
                     m[1][2]*m[0][1]*m2[1][2]-2.0*m[1][2]*m2[1][0]*m[2][1]+
                     m2[1][1]*m[0][2]*m[2][1]-m2[1][1]*m[0][1]*m[2][2]-
                     m[1][0]*m[2][2]*m2[1][1]+m[2][0]*m[1][2]*m2[1][1]-
                     m2[1][2]*m[2][0]*m[1][1]+m[1][0]*m2[1][2]*m[2][1])/den;
 
   ans[1][1] = s[1]*(m[0][2]*m2[1][0]*m[2][1]-m[0][1]*m[2][2]*m2[1][0]+
                     2.0*m[2][2]*m[0][0]*m2[1][1]-m2[1][2]*m[0][0]*m[1][2]-
                     2.0*m[2][0]*m2[1][1]*m[0][2]-m[1][0]*m[2][2]*m2[1][0]+
                     m[2][0]*m[1][2]*m2[1][0]+m[1][0]*m2[1][2]*m[0][2]-
                     m[0][0]*m2[1][2]*m[2][1]+m2[1][2]*m[0][1]*m[2][0])/den;
 
   ans[1][2] = s[1]*(m[0][1]*m[1][2]*m2[1][0]-m[0][2]*m2[1][0]*m[1][1]-
                     m[0][0]*m[1][2]*m2[1][1]+m[1][0]*m[0][2]*m2[1][1]+
                     2.0*m[1][1]*m[0][0]*m2[1][2]-m[0][0]*m2[1][1]*m[2][1]+
                     m[0][1]*m[2][0]*m2[1][1]-m2[1][0]*m[2][0]*m[1][1]-
                     2.0*m[1][0]*m[0][1]*m2[1][2]+m[1][0]*m2[1][0]*m[2][1])/den;
 
   ans[2][0] = s[2]*(-m[1][1]*m[0][2]*m2[2][2]+m[0][1]*m[1][2]*m2[2][2]+
                     2.0*m[1][1]*m2[2][0]*m[2][2]-m[0][1]*m2[2][1]*m[2][2]+
                     m[0][2]*m[2][1]*m2[2][1]-2.0*m2[2][0]*m[2][1]*m[1][2]-
                     m[1][0]*m2[2][1]*m[2][2]+m[1][2]*m[2][0]*m2[2][1]-
                     m[1][1]*m[2][0]*m2[2][2]+m[2][1]*m[1][0]*m2[2][2])/den;
 
   ans[2][1] = s[2]*-(m[0][1]*m[2][2]*m2[2][0]-m[0][2]*m2[2][0]*m[2][1]-
                      2.0*m2[2][1]*m[0][0]*m[2][2]+m[1][2]*m2[2][2]*m[0][0]+
                      2.0*m2[2][1]*m[0][2]*m[2][0]+m[1][0]*m2[2][0]*m[2][2]-
                      m[1][0]*m[0][2]*m2[2][2]-m[1][2]*m[2][0]*m2[2][0]+
                      m[0][0]*m2[2][2]*m[2][1]-m2[2][2]*m[0][1]*m[2][0])/den;
 
   ans[2][2] = s[2]*(m[0][1]*m[1][2]*m2[2][0]-m[0][2]*m2[2][0]*m[1][1]-
                     m[0][0]*m[1][2]*m2[2][1]+m[1][0]*m[0][2]*m2[2][1]-
                     m[1][1]*m[2][0]*m2[2][0]-m[2][1]*m2[2][1]*m[0][0]+
                     2.0*m[1][1]*m2[2][2]*m[0][0]+m[2][1]*m[1][0]*m2[2][0]+
                     m[2][0]*m[0][1]*m2[2][1]-2.0*m2[2][2]*m[1][0]*m[0][1])/den;
 }
diff --git a/src/ASPHERE/pair_line_lj.cpp b/src/ASPHERE/pair_line_lj.cpp
index 219a8ea97..7f97902c5 100644
--- a/src/ASPHERE/pair_line_lj.cpp
+++ b/src/ASPHERE/pair_line_lj.cpp
@@ -1,454 +1,454 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_line_lj.h"
 #include "atom.h"
 #include "atom_vec_line.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
 #define DELTA 10000
 
 /* ---------------------------------------------------------------------- */
 
 PairLineLJ::PairLineLJ(LAMMPS *lmp) : Pair(lmp)
 {
   dmax = nmax = 0;
   discrete = NULL;
   dnum = dfirst = NULL;
 
   single_enable = 0;
   restartinfo = 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 PairLineLJ::~PairLineLJ()
 {
   memory->sfree(discrete);
   memory->destroy(dnum);
   memory->destroy(dfirst);
 
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
 
     memory->destroy(cut);
     memory->destroy(epsilon);
     memory->destroy(sigma);
     memory->destroy(lj1);
     memory->destroy(lj2);
     memory->destroy(lj3);
     memory->destroy(lj4);
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLineLJ::compute(int eflag, int vflag)
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   int ni,nj,npi,npj,ifirst,jfirst;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
   double rsq,r2inv,r6inv,term1,term2,sig,sig3,forcelj;
   double xi[2],xj[2],fi[2],fj[2],dxi,dxj,dyi,dyj;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   evdwl = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   double **x = atom->x;
   double **f = atom->f;
   double **torque = atom->torque;
   int *line = atom->line;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   int nall = nlocal + atom->nghost;
   int newton_pair = force->newton_pair;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // grow discrete list if necessary and initialize
 
   if (nall > nmax) {
     nmax = nall;
     memory->destroy(dnum);
     memory->destroy(dfirst);
     memory->create(dnum,nall,"pair:dnum");
     memory->create(dfirst,nall,"pair:dfirst");
   }
   for (i = 0; i < nall; i++) dnum[i] = 0;
   ndiscrete = 0;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq >= cutsq[itype][jtype]) continue;
 
       // line/line interactions = NxN particles
 
       evdwl = 0.0;
       if (line[i] >= 0 && line[j] >= 0) {
         if (dnum[i] == 0) discretize(i,sigma[itype][itype]);
         npi = dnum[i];
         ifirst = dfirst[i];
         if (dnum[j] == 0) discretize(j,sigma[jtype][jtype]);
         npj = dnum[j];
         jfirst = dfirst[j];
 
         for (ni = 0; ni < npi; ni++) {
           dxi = discrete[ifirst+ni].dx;
           dyi = discrete[ifirst+ni].dy;
 
           for (nj = 0; nj < npj; nj++) {
             dxj = discrete[jfirst+nj].dx;
             dyj = discrete[jfirst+nj].dy;
 
             xi[0] = x[i][0] + dxi;
             xi[1] = x[i][1] + dyi;
             xj[0] = x[j][0] + dxj;
             xj[1] = x[j][1] + dyj;
 
             delx = xi[0] - xj[0];
             dely = xi[1] - xj[1];
             rsq = delx*delx + dely*dely;
 
             sig = 0.5 * (discrete[ifirst+ni].sigma+discrete[jfirst+nj].sigma);
             sig3 = sig*sig*sig;
             term2 = 24.0*epsilon[itype][jtype] * sig3*sig3;
             term1 = 2.0 * term2 * sig3*sig3;
             r2inv = 1.0/rsq;
             r6inv = r2inv*r2inv*r2inv;
             forcelj = r6inv * (term1*r6inv - term2);
             fpair = forcelj*r2inv;
 
             if (eflag) evdwl += r6inv*(term1/12.0*r6inv-term2/6.0);
 
             fi[0] = delx*fpair;
             fi[1] = dely*fpair;
             f[i][0] += fi[0];
             f[i][1] += fi[1];
             torque[i][2] += dxi*fi[1] - dyi*fi[0];
 
             if (newton_pair || j < nlocal) {
               fj[0] = -delx*fpair;
               fj[1] = -dely*fpair;
               f[j][0] += fj[0];
               f[j][1] += fj[1];
               torque[j][2] += dxj*fj[1] - dyj*fj[0];
             }
           }
         }
 
       // line/particle interaction = Nx1 particles
       // convert line into Np particles based on sigma and line length
 
       } else if (line[i] >= 0) {
         if (dnum[i] == 0) discretize(i,sigma[itype][itype]);
         npi = dnum[i];
         ifirst = dfirst[i];
 
         for (ni = 0; ni < npi; ni++) {
           dxi = discrete[ifirst+ni].dx;
           dyi = discrete[ifirst+ni].dy;
 
           xi[0] = x[i][0] + dxi;
           xi[1] = x[i][1] + dyi;
           xj[0] = x[j][0];
           xj[1] = x[j][1];
 
           delx = xi[0] - xj[0];
           dely = xi[1] - xj[1];
           rsq = delx*delx + dely*dely;
 
           sig = 0.5 * (discrete[ifirst+ni].sigma+sigma[jtype][jtype]);
           sig3 = sig*sig*sig;
           term2 = 24.0*epsilon[itype][jtype] * sig3*sig3;
           term1 = 2.0 * term2 * sig3*sig3;
           r2inv = 1.0/rsq;
           r6inv = r2inv*r2inv*r2inv;
           forcelj = r6inv * (term1*r6inv - term2);
           fpair = forcelj*r2inv;
 
           if (eflag) evdwl += r6inv*(term1/12.0*r6inv-term2/6.0);
 
           fi[0] = delx*fpair;
           fi[1] = dely*fpair;
           f[i][0] += fi[0];
           f[i][1] += fi[1];
           torque[i][2] += dxi*fi[1] - dyi*fi[0];
 
           if (newton_pair || j < nlocal) {
             fj[0] = -delx*fpair;
             fj[1] = -dely*fpair;
             f[j][0] += fj[0];
             f[j][1] += fj[1];
           }
         }
 
       // particle/line interaction = Nx1 particles
       // convert line into Np particles based on sigma and line length
 
       } else if (line[j] >= 0) {
         if (dnum[j] == 0) discretize(j,sigma[jtype][jtype]);
         npj = dnum[j];
         jfirst = dfirst[j];
 
         for (nj = 0; nj < npj; nj++) {
           dxj = discrete[jfirst+nj].dx;
           dyj = discrete[jfirst+nj].dy;
 
           xi[0] = x[i][0];
           xi[1] = x[i][1];
           xj[0] = x[j][0] + dxj;
           xj[1] = x[j][1] + dyj;
 
           delx = xi[0] - xj[0];
           dely = xi[1] - xj[1];
           rsq = delx*delx + dely*dely;
 
           sig = 0.5 * (sigma[itype][itype]+discrete[jfirst+nj].sigma);
           sig3 = sig*sig*sig;
           term2 = 24.0*epsilon[itype][jtype] * sig3*sig3;
           term1 = 2.0 * term2 * sig3*sig3;
           r2inv = 1.0/rsq;
           r6inv = r2inv*r2inv*r2inv;
           forcelj = r6inv * (term1*r6inv - term2);
           fpair = forcelj*r2inv;
 
           if (eflag) evdwl += r6inv*(term1/12.0*r6inv-term2/6.0);
 
           fi[0] = delx*fpair;
           fi[1] = dely*fpair;
           f[i][0] += fi[0];
           f[i][1] += fi[1];
 
           if (newton_pair || j < nlocal) {
             f[j][0] += fj[0];
             f[j][1] += fj[1];
             fj[0] = -delx*fpair;
             fj[1] = -dely*fpair;
             torque[j][2] += dxj*fj[1] - dyj*fj[0];
           }
         }
 
       // particle/particle interaction = 1x1 particles
 
       } else {
         r2inv = 1.0/rsq;
         r6inv = r2inv*r2inv*r2inv;
         forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
         fpair = forcelj*r2inv;
 
         if (eflag)
           evdwl += r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]);
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
         if (newton_pair || j < nlocal) {
           f[j][0] -= delx*fpair;
           f[j][1] -= dely*fpair;
           f[j][2] -= delz*fpair;
         }
       }
 
       if (evflag) ev_tally(i,j,nlocal,newton_pair,
                            evdwl,0.0,fpair,delx,dely,delz);
     }
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairLineLJ::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
   memory->create(cut,n+1,n+1,"pair:cut");
   memory->create(epsilon,n+1,n+1,"pair:epsilon");
   memory->create(sigma,n+1,n+1,"pair:sigma");
   memory->create(lj1,n+1,n+1,"pair:lj1");
   memory->create(lj2,n+1,n+1,"pair:lj2");
   memory->create(lj3,n+1,n+1,"pair:lj3");
   memory->create(lj4,n+1,n+1,"pair:lj4");
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairLineLJ::settings(int narg, char **arg)
 {
   if (narg != 1) error->all(FLERR,"Illegal pair_style command");
 
   cut_global = force->numeric(FLERR,arg[0]);
 
   // reset cutoffs that have been explicitly set
 
   if (allocated) {
     int i,j;
     for (i = 1; i <= atom->ntypes; i++)
       for (j = i+1; j <= atom->ntypes; j++)
         if (setflag[i][j]) cut[i][j] = cut_global;
   }
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairLineLJ::coeff(int narg, char **arg)
 {
   if (narg < 4 || narg > 5)
     error->all(FLERR,"Incorrect args for pair coefficients");
   if (!allocated) allocate();
 
   int ilo,ihi,jlo,jhi;
   force->bounds(arg[0],atom->ntypes,ilo,ihi);
   force->bounds(arg[1],atom->ntypes,jlo,jhi);
 
   double epsilon_one = force->numeric(FLERR,arg[2]);
   double sigma_one = force->numeric(FLERR,arg[3]);
 
   double cut_one = cut_global;
   if (narg == 5) cut_one = force->numeric(FLERR,arg[4]);
 
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
     for (int j = MAX(jlo,i); j <= jhi; j++) {
       epsilon[i][j] = epsilon_one;
       sigma[i][j] = sigma_one;
       cut[i][j] = cut_one;
       setflag[i][j] = 1;
       count++;
     }
   }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairLineLJ::init_style()
 {
   avec = (AtomVecLine *) atom->style_match("line");
   if (!avec) error->all(FLERR,"Pair line/lj requires atom style line");
 
-  neighbor->request(this);
+  neighbor->request(this,instance_me);
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairLineLJ::init_one(int i, int j)
 {
   if (setflag[i][j] == 0) {
     epsilon[i][j] = mix_energy(epsilon[i][i],epsilon[j][j],
                                sigma[i][i],sigma[j][j]);
     sigma[i][j] = mix_distance(sigma[i][i],sigma[j][j]);
     cut[i][j] = mix_distance(cut[i][i],cut[j][j]);
   }
 
   lj1[i][j] = 48.0 * epsilon[i][j] * pow(sigma[i][j],12.0);
   lj2[i][j] = 24.0 * epsilon[i][j] * pow(sigma[i][j],6.0);
   lj3[i][j] = 4.0 * epsilon[i][j] * pow(sigma[i][j],12.0);
   lj4[i][j] = 4.0 * epsilon[i][j] * pow(sigma[i][j],6.0);
 
   epsilon[j][i] = epsilon[i][j];
   sigma[j][i] = sigma[i][j];
   lj1[j][i] = lj1[i][j];
   lj2[j][i] = lj2[i][j];
   lj3[j][i] = lj3[i][j];
   lj4[j][i] = lj4[i][j];
 
   return cut[i][j];
 }
 
 /* ----------------------------------------------------------------------
    discretize line segment I into N sub-segments no more than sigma in length
    store new discrete particles in Discrete list
 ------------------------------------------------------------------------- */
 
 void PairLineLJ::discretize(int i, double sigma)
 {
   AtomVecLine::Bonus *bonus = avec->bonus;
   double length = bonus[atom->line[i]].length;
   double theta = bonus[atom->line[i]].theta;
   int n = static_cast<int> (length/sigma) + 1;
   dnum[i] = n;
   dfirst[i] = ndiscrete;
 
   if (ndiscrete + n > dmax) {
     dmax += DELTA;
     discrete = (Discrete *)
       memory->srealloc(discrete,dmax*sizeof(Discrete),"pair:discrete");
   }
 
   sigma = length/n;
   double delta;
 
   for (int m = 0; m < n; m++) {
     delta = -0.5 + (2*m+1)/(2.0*n);
     discrete[ndiscrete].dx = delta*length*cos(theta);
     discrete[ndiscrete].dy = delta*length*sin(theta);
     discrete[ndiscrete].sigma = sigma;
     ndiscrete++;
   }
 }
diff --git a/src/ASPHERE/pair_resquared.cpp b/src/ASPHERE/pair_resquared.cpp
index 9764fe61d..adf8c5466 100755
--- a/src/ASPHERE/pair_resquared.cpp
+++ b/src/ASPHERE/pair_resquared.cpp
@@ -1,990 +1,990 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Mike Brown (SNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_resquared.h"
 #include "math_extra.h"
 #include "atom.h"
 #include "atom_vec_ellipsoid.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "integrate.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
 PairRESquared::PairRESquared(LAMMPS *lmp) : Pair(lmp),
                                             cr60(pow(60.0,1.0/3.0)),
                                             b_alpha(45.0/56.0)
 {
   single_enable = 0;
 
   cr60 = pow(60.0,1.0/3.0);
   b_alpha = 45.0/56.0;
   solv_f_a = 3.0/(16.0*atan(1.0)*-36.0);
   solv_f_r = 3.0/(16.0*atan(1.0)*2025.0);
 }
 
 /* ----------------------------------------------------------------------
    free all arrays
 ------------------------------------------------------------------------- */
 
 PairRESquared::~PairRESquared()
 {
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
 
     memory->destroy(form);
     memory->destroy(epsilon);
     memory->destroy(sigma);
     memory->destroy(shape1);
     memory->destroy(shape2);
     memory->destroy(well);
     memory->destroy(cut);
     memory->destroy(lj1);
     memory->destroy(lj2);
     memory->destroy(lj3);
     memory->destroy(lj4);
     memory->destroy(offset);
     delete [] lshape;
     delete [] setwell;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairRESquared::compute(int eflag, int vflag)
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double evdwl,one_eng,rsq,r2inv,r6inv,forcelj,factor_lj;
   double fforce[3],ttor[3],rtor[3],r12[3];
   int *ilist,*jlist,*numneigh,**firstneigh;
   RE2Vars wi,wj;
 
   evdwl = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   double **x = atom->x;
   double **f = atom->f;
   double **tor = atom->torque;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     itype = type[i];
 
     // not a LJ sphere
 
     if (lshape[itype] != 0.0) precompute_i(i,wi);
 
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       j &= NEIGHMASK;
 
       // r12 = center to center vector
 
       r12[0] = x[j][0]-x[i][0];
       r12[1] = x[j][1]-x[i][1];
       r12[2] = x[j][2]-x[i][2];
       rsq = MathExtra::dot3(r12,r12);
       jtype = type[j];
 
       // compute if less than cutoff
 
       if (rsq < cutsq[itype][jtype]) {
         fforce[0] = fforce[1] = fforce[2] = 0.0;
 
         switch (form[itype][jtype]) {
 
          case SPHERE_SPHERE:
           r2inv = 1.0/rsq;
           r6inv = r2inv*r2inv*r2inv;
           forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
           forcelj *= -r2inv;
           if (eflag) one_eng =
               r6inv*(r6inv*lj3[itype][jtype]-lj4[itype][jtype]) -
               offset[itype][jtype];
           fforce[0] = r12[0]*forcelj;
           fforce[1] = r12[1]*forcelj;
           fforce[2] = r12[2]*forcelj;
           break;
 
          case SPHERE_ELLIPSE:
           precompute_i(j,wj);
           if (newton_pair || j < nlocal) {
             one_eng = resquared_lj(j,i,wj,r12,rsq,fforce,rtor,true);
             tor[j][0] += rtor[0]*factor_lj;
             tor[j][1] += rtor[1]*factor_lj;
             tor[j][2] += rtor[2]*factor_lj;
           } else
             one_eng = resquared_lj(j,i,wj,r12,rsq,fforce,rtor,false);
           break;
 
          case ELLIPSE_SPHERE:
           one_eng = resquared_lj(i,j,wi,r12,rsq,fforce,ttor,true);
           tor[i][0] += ttor[0]*factor_lj;
           tor[i][1] += ttor[1]*factor_lj;
           tor[i][2] += ttor[2]*factor_lj;
           break;
 
          default:
           precompute_i(j,wj);
           one_eng = resquared_analytic(i,j,wi,wj,r12,rsq,fforce,ttor,rtor);
           tor[i][0] += ttor[0]*factor_lj;
           tor[i][1] += ttor[1]*factor_lj;
           tor[i][2] += ttor[2]*factor_lj;
           if (newton_pair || j < nlocal) {
             tor[j][0] += rtor[0]*factor_lj;
             tor[j][1] += rtor[1]*factor_lj;
             tor[j][2] += rtor[2]*factor_lj;
           }
          break;
         }
 
         fforce[0] *= factor_lj;
         fforce[1] *= factor_lj;
         fforce[2] *= factor_lj;
         f[i][0] += fforce[0];
         f[i][1] += fforce[1];
         f[i][2] += fforce[2];
 
         if (newton_pair || j < nlocal) {
           f[j][0] -= fforce[0];
           f[j][1] -= fforce[1];
           f[j][2] -= fforce[2];
         }
 
         if (eflag) evdwl = factor_lj*one_eng;
 
         if (evflag) ev_tally_xyz(i,j,nlocal,newton_pair,
                                  evdwl,0.0,fforce[0],fforce[1],fforce[2],
                                  -r12[0],-r12[1],-r12[2]);
       }
     }
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairRESquared::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
   memory->create(form,n+1,n+1,"pair:form");
   memory->create(epsilon,n+1,n+1,"pair:epsilon");
   memory->create(sigma,n+1,n+1,"pair:sigma");
   memory->create(shape1,n+1,3,"pair:shape1");
   memory->create(shape2,n+1,3,"pair:shape2");
   memory->create(well,n+1,3,"pair:well");
   memory->create(cut,n+1,n+1,"pair:cut");
   memory->create(lj1,n+1,n+1,"pair:lj1");
   memory->create(lj2,n+1,n+1,"pair:lj2");
   memory->create(lj3,n+1,n+1,"pair:lj3");
   memory->create(lj4,n+1,n+1,"pair:lj4");
   memory->create(offset,n+1,n+1,"pair:offset");
 
   lshape = new double[n+1];
   setwell = new int[n+1];
   for (int i = 1; i <= n; i++) setwell[i] = 0;
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairRESquared::settings(int narg, char **arg)
 {
   if (narg != 1) error->all(FLERR,"Illegal pair_style command");
 
   cut_global = force->numeric(FLERR,arg[0]);
 
   // reset cutoffs that have been explicitly set
 
   if (allocated) {
     int i,j;
     for (i = 1; i <= atom->ntypes; i++)
       for (j = i+1; j <= atom->ntypes; j++)
         if (setflag[i][j]) cut[i][j] = cut_global;
   }
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairRESquared::coeff(int narg, char **arg)
 {
   if (narg < 10 || narg > 11)
     error->all(FLERR,"Incorrect args for pair coefficients");
   if (!allocated) allocate();
 
   int ilo,ihi,jlo,jhi;
   force->bounds(arg[0],atom->ntypes,ilo,ihi);
   force->bounds(arg[1],atom->ntypes,jlo,jhi);
 
   double epsilon_one = force->numeric(FLERR,arg[2]);
   double sigma_one = force->numeric(FLERR,arg[3]);
   double eia_one = force->numeric(FLERR,arg[4]);
   double eib_one = force->numeric(FLERR,arg[5]);
   double eic_one = force->numeric(FLERR,arg[6]);
   double eja_one = force->numeric(FLERR,arg[7]);
   double ejb_one = force->numeric(FLERR,arg[8]);
   double ejc_one = force->numeric(FLERR,arg[9]);
 
   double cut_one = cut_global;
   if (narg == 11) cut_one = force->numeric(FLERR,arg[10]);
 
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
     for (int j = MAX(jlo,i); j <= jhi; j++) {
       epsilon[i][j] = epsilon_one;
       sigma[i][j] = sigma_one;
       cut[i][j] = cut_one;
       if (eia_one != 0.0 || eib_one != 0.0 || eic_one != 0.0) {
         well[i][0] = eia_one;
         well[i][1] = eib_one;
         well[i][2] = eic_one;
         if (eia_one == 1.0 && eib_one == 1.0 && eic_one == 1.0) setwell[i] = 2;
         else setwell[i] = 1;
       }
       if (eja_one != 0.0 || ejb_one != 0.0 || ejc_one != 0.0) {
         well[j][0] = eja_one;
         well[j][1] = ejb_one;
         well[j][2] = ejc_one;
         if (eja_one == 1.0 && ejb_one == 1.0 && ejc_one == 1.0) setwell[j] = 2;
         else setwell[j] = 1;
       }
       setflag[i][j] = 1;
       count++;
     }
   }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairRESquared::init_style()
 {
   avec = (AtomVecEllipsoid *) atom->style_match("ellipsoid");
   if (!avec) error->all(FLERR,"Pair resquared requires atom style ellipsoid");
 
-  neighbor->request(this);
+  neighbor->request(this,instance_me);
 
   // per-type shape precalculations
   // require that atom shapes are identical within each type
 
   for (int i = 1; i <= atom->ntypes; i++) {
     if (!atom->shape_consistency(i,shape1[i][0],shape1[i][1],shape1[i][2]))
       error->all(FLERR,"Pair resquared requires atoms with same type have same shape");
     if (setwell[i]) {
       shape2[i][0] = shape1[i][0]*shape1[i][0];
       shape2[i][1] = shape1[i][1]*shape1[i][1];
       shape2[i][2] = shape1[i][2]*shape1[i][2];
       lshape[i] = shape1[i][0]*shape1[i][1]*shape1[i][2];
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairRESquared::init_one(int i, int j)
 {
   if (setwell[i] == 0 || setwell[j] == 0)
     error->all(FLERR,"Pair resquared epsilon a,b,c coeffs are not all set");
 
   int ishape = 0;
   if (shape1[i][0] != 0.0 && shape1[i][1] != 0.0 && shape1[i][2] != 0.0)
     ishape = 1;
   int jshape = 0;
   if (shape1[j][0] != 0.0 && shape1[j][1] != 0.0 && shape1[j][2] != 0.0)
     jshape = 1;
 
   if (ishape == 0 && jshape == 0) {
     form[i][j] = SPHERE_SPHERE;
     form[j][i] = SPHERE_SPHERE;
   } else if (ishape == 0) {
     form[i][j] = SPHERE_ELLIPSE;
     form[j][i] = ELLIPSE_SPHERE;
   } else if (jshape == 0) {
     form[i][j] = ELLIPSE_SPHERE;
     form[j][i] = SPHERE_ELLIPSE;
   } else {
     form[i][j] = ELLIPSE_ELLIPSE;
     form[j][i] = ELLIPSE_ELLIPSE;
   }
 
   // allow mixing only for LJ spheres
 
   if (setflag[i][j] == 0) {
     if (setflag[j][i] == 0) {
       if (ishape == 0 && jshape == 0) {
         epsilon[i][j] = mix_energy(epsilon[i][i],epsilon[j][j],
                                    sigma[i][i],sigma[j][j]);
         sigma[i][j] = mix_distance(sigma[i][i],sigma[j][j]);
         cut[i][j] = mix_distance(cut[i][i],cut[j][j]);
       } else
         error->all(FLERR,
                    "Pair resquared epsilon and sigma coeffs are not all set");
     }
     epsilon[i][j] = epsilon[j][i];
     sigma[i][j] = sigma[j][i];
     cut[i][j] = cut[j][i];
   }
 
   lj1[i][j] = 48.0 * epsilon[i][j] * pow(sigma[i][j],12.0);
   lj2[i][j] = 24.0 * epsilon[i][j] * pow(sigma[i][j],6.0);
   lj3[i][j] = 4.0 * epsilon[i][j] * pow(sigma[i][j],12.0);
   lj4[i][j] = 4.0 * epsilon[i][j] * pow(sigma[i][j],6.0);
 
   if (offset_flag) {
     double ratio = sigma[i][j] / cut[i][j];
     offset[i][j] = 4.0 * epsilon[i][j] * (pow(ratio,12.0) - pow(ratio,6.0));
   } else offset[i][j] = 0.0;
 
   epsilon[j][i] = epsilon[i][j];
   sigma[j][i] = sigma[i][j];
   lj1[j][i] = lj1[i][j];
   lj2[j][i] = lj2[i][j];
   lj3[j][i] = lj3[i][j];
   lj4[j][i] = lj4[i][j];
   offset[j][i] = offset[i][j];
 
   return cut[i][j];
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairRESquared::write_restart(FILE *fp)
 {
   write_restart_settings(fp);
 
   int i,j;
   for (i = 1; i <= atom->ntypes; i++) {
     fwrite(&setwell[i],sizeof(int),1,fp);
     if (setwell[i]) fwrite(&well[i][0],sizeof(double),3,fp);
     for (j = i; j <= atom->ntypes; j++) {
       fwrite(&setflag[i][j],sizeof(int),1,fp);
       if (setflag[i][j]) {
         fwrite(&epsilon[i][j],sizeof(double),1,fp);
         fwrite(&sigma[i][j],sizeof(double),1,fp);
         fwrite(&cut[i][j],sizeof(double),1,fp);
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairRESquared::read_restart(FILE *fp)
 {
   read_restart_settings(fp);
   allocate();
 
   int i,j;
   int me = comm->me;
   for (i = 1; i <= atom->ntypes; i++) {
     if (me == 0) fread(&setwell[i],sizeof(int),1,fp);
     MPI_Bcast(&setwell[i],1,MPI_INT,0,world);
     if (setwell[i]) {
       if (me == 0) fread(&well[i][0],sizeof(double),3,fp);
       MPI_Bcast(&well[i][0],3,MPI_DOUBLE,0,world);
     }
     for (j = i; j <= atom->ntypes; j++) {
       if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
       MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
       if (setflag[i][j]) {
         if (me == 0) {
           fread(&epsilon[i][j],sizeof(double),1,fp);
           fread(&sigma[i][j],sizeof(double),1,fp);
           fread(&cut[i][j],sizeof(double),1,fp);
         }
         MPI_Bcast(&epsilon[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&sigma[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&cut[i][j],1,MPI_DOUBLE,0,world);
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairRESquared::write_restart_settings(FILE *fp)
 {
   fwrite(&cut_global,sizeof(double),1,fp);
   fwrite(&mix_flag,sizeof(int),1,fp);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairRESquared::read_restart_settings(FILE *fp)
 {
   int me = comm->me;
   if (me == 0) {
     fread(&cut_global,sizeof(double),1,fp);
     fread(&mix_flag,sizeof(int),1,fp);
   }
   MPI_Bcast(&cut_global,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
 }
 
 /* ----------------------------------------------------------------------
    Precompute per-particle temporaries for RE-squared calculation
 ------------------------------------------------------------------------- */
 
 void PairRESquared::precompute_i(const int i,RE2Vars &ws)
 {
   double aTs[3][3];       // A1'*S1^2
   int *ellipsoid = atom->ellipsoid;
   AtomVecEllipsoid::Bonus *bonus = avec->bonus;
   MathExtra::quat_to_mat_trans(bonus[ellipsoid[i]].quat,ws.A);
   MathExtra::transpose_diag3(ws.A,well[atom->type[i]],ws.aTe);
   MathExtra::transpose_diag3(ws.A,shape2[atom->type[i]],aTs);
   MathExtra::diag_times3(shape2[atom->type[i]],ws.A,ws.sa);
   MathExtra::times3(aTs,ws.A,ws.gamma);
   MathExtra::rotation_generator_x(ws.A,ws.lA[0]);
   MathExtra::rotation_generator_y(ws.A,ws.lA[1]);
   MathExtra::rotation_generator_z(ws.A,ws.lA[2]);
   for (int i=0; i<3; i++) {
     MathExtra::times3(aTs,ws.lA[i],ws.lAtwo[i]);
     MathExtra::transpose_times3(ws.lA[i],ws.sa,ws.lAsa[i]);
     MathExtra::plus3(ws.lAsa[i],ws.lAtwo[i],ws.lAsa[i]);
   }
 }
 
 /* ----------------------------------------------------------------------
    Compute the derivative of the determinant of m, using m and the
    derivative of m (m2)
 ------------------------------------------------------------------------- */
 
 double PairRESquared::det_prime(const double m[3][3], const double m2[3][3])
 {
   double ans;
   ans = m2[0][0]*m[1][1]*m[2][2] - m2[0][0]*m[1][2]*m[2][1] -
         m[1][0]*m2[0][1]*m[2][2] + m[1][0]*m2[0][2]*m[2][1] +
         m[2][0]*m2[0][1]*m[1][2] - m[2][0]*m2[0][2]*m[1][1] +
         m[0][0]*m2[1][1]*m[2][2] - m[0][0]*m2[1][2]*m[2][1] -
         m2[1][0]*m[0][1]*m[2][2] + m2[1][0]*m[0][2]*m[2][1] +
         m[2][0]*m[0][1]*m2[1][2] - m[2][0]*m[0][2]*m2[1][1] +
         m[0][0]*m[1][1]*m2[2][2] - m[0][0]*m[1][2]*m2[2][1] -
         m[1][0]*m[0][1]*m2[2][2] + m[1][0]*m[0][2]*m2[2][1] +
         m2[2][0]*m[0][1]*m[1][2] - m2[2][0]*m[0][2]*m[1][1];
   return ans;
 }
 
 /* ----------------------------------------------------------------------
    Compute the energy, force, torque for a pair (INTEGRATED-INTEGRATED)
 ------------------------------------------------------------------------- */
 
 double PairRESquared::resquared_analytic(const int i, const int j,
                                          const RE2Vars &wi, const RE2Vars &wj,
                                          const double *r, const double rsq,
                                          double *fforce, double *ttor,
                                          double *rtor)
 {
   int *type = atom->type;
 
   // pair computations for energy, force, torque
 
   double z1[3],z2[3];        // A1*rhat  # don't need to store
   double v1[3],v2[3];        // inv(S1^2)*z1 # don't need to store
   double sigma1,sigma2;      // 1/sqrt(z1'*v1)
   double sigma1p2,sigma2p2;  // sigma1^2
   double rnorm;              // L2 norm of r
   double rhat[3];            // r/rnorm
   double s[3];               // inv(gamma1+gamma2)*rhat
   double sigma12;            // 1/sqrt(0.5*s'*rhat)
   double H12[3][3];          // gamma1/sigma1+gamma2/sigma2
   double dH;                 // det(H12)
   double lambda;             // dS1/sigma1p2+dS2/sigma2p2
   double nu;                 // sqrt(dH/(sigma1+sigma2))
   double w[3];               // inv(A1'*E1*A1+A2'*E2*A2)*rhat
   double h12;                // rnorm-sigma12;
   double eta;                // lambda/nu
   double chi;                // 2*rhat'*w
   double sprod;              // dS1*dS2
   double sigh;               // sigma/h12
   double tprod;              // eta*chi*sigh
   double Ua,Ur;              // attractive/repulsive parts of potential
 
   // pair computations for force, torque
 
   double sec;                          // sigma*eta*chi
   double sigma1p3, sigma2p3;           // sigma1^3
   double vsigma1[3], vsigma2[3];       // sigma1^3*v1;
   double sigma12p3;                    // sigma12^3
   double gsigma1[3][3], gsigma2[3][3]; // -gamma1/sigma1^2
   double tsig1sig2;                    // eta/(2*(sigma1+sigma2))
   double tdH;                          // eta/(2*dH)
   double teta1,teta2;                  // 2*eta/lambda*dS1/sigma1p3
   double fourw[3];                     // 4*w;
   double spr[3];                       // 0.5*sigma12^3*s
   double hsec;                         // h12+[3,b_alpha]*sec
   double dspu;                         // 1/h12 - 1/hsec + temp
   double pbsu;                         // 3*sigma/hsec
   double dspr;                         // 7/h12-1/hsec+temp
   double pbsr;                         // b_alpha*sigma/hsec;
   double u[3];                         // (-rhat(i)*rhat+eye(:,i))/rnorm
   double u1[3],u2[3];                  // A1*u
   double dsigma1,dsigma2;              // u1'*vsigma1 (force) p'*vsigma1 (tor)
   double dH12[3][3];                   // dsigma1*gsigma1 + dsigma2*gsigma2
   double ddH;                          // derivative of det(H12)
   double deta,dchi,dh12;               // derivatives of eta,chi,h12
   double dUr,dUa;                      // derivatives of Ua,Ur
 
   // pair computations for torque
 
   double fwae[3];        // -fourw'*aTe
   double p[3];           // lA*rhat
 
   rnorm = sqrt(rsq);
   rhat[0] = r[0]/rnorm;
   rhat[1] = r[1]/rnorm;
   rhat[2] = r[2]/rnorm;
 
   // energy
 
   double temp[3][3];
   MathExtra::plus3(wi.gamma,wj.gamma,temp);
   int ierror = MathExtra::mldivide3(temp,rhat,s);
   if (ierror) error->all(FLERR,"Bad matrix inversion in mldivide3");
 
   sigma12 = 1.0/sqrt(0.5*MathExtra::dot3(s,rhat));
   MathExtra::matvec(wi.A,rhat,z1);
   MathExtra::matvec(wj.A,rhat,z2);
   v1[0] = z1[0]/shape2[type[i]][0];
   v1[1] = z1[1]/shape2[type[i]][1];
   v1[2] = z1[2]/shape2[type[i]][2];
   v2[0] = z2[0]/shape2[type[j]][0];
   v2[1] = z2[1]/shape2[type[j]][1];
   v2[2] = z2[2]/shape2[type[j]][2];
   sigma1 = 1.0/sqrt(MathExtra::dot3(z1,v1));
   sigma2 = 1.0/sqrt(MathExtra::dot3(z2,v2));
   H12[0][0] = wi.gamma[0][0]/sigma1+wj.gamma[0][0]/sigma2;
   H12[0][1] = wi.gamma[0][1]/sigma1+wj.gamma[0][1]/sigma2;
   H12[0][2] = wi.gamma[0][2]/sigma1+wj.gamma[0][2]/sigma2;
   H12[1][0] = wi.gamma[1][0]/sigma1+wj.gamma[1][0]/sigma2;
   H12[1][1] = wi.gamma[1][1]/sigma1+wj.gamma[1][1]/sigma2;
   H12[1][2] = wi.gamma[1][2]/sigma1+wj.gamma[1][2]/sigma2;
   H12[2][0] = wi.gamma[2][0]/sigma1+wj.gamma[2][0]/sigma2;
   H12[2][1] = wi.gamma[2][1]/sigma1+wj.gamma[2][1]/sigma2;
   H12[2][2] = wi.gamma[2][2]/sigma1+wj.gamma[2][2]/sigma2;
   dH=MathExtra::det3(H12);
   sigma1p2 = sigma1*sigma1;
   sigma2p2 = sigma2*sigma2;
   lambda = lshape[type[i]]/sigma1p2 + lshape[type[j]]/sigma2p2;
   nu = sqrt(dH/(sigma1+sigma2));
   MathExtra::times3(wi.aTe,wi.A,temp);
   double temp2[3][3];
   MathExtra::times3(wj.aTe,wj.A,temp2);
   MathExtra::plus3(temp,temp2,temp);
   ierror = MathExtra::mldivide3(temp,rhat,w);
   if (ierror) error->all(FLERR,"Bad matrix inversion in mldivide3");
 
   h12 = rnorm-sigma12;
   eta = lambda/nu;
   chi = 2.0*MathExtra::dot3(rhat,w);
   sprod = lshape[type[i]] * lshape[type[j]];
   sigh = sigma[type[i]][type[j]]/h12;
   tprod = eta*chi*sigh;
 
   double stemp = h12/2.0;
   Ua = (shape1[type[i]][0]+stemp)*(shape1[type[i]][1]+stemp)*
        (shape1[type[i]][2]+stemp)*(shape1[type[j]][0]+stemp)*
        (shape1[type[j]][1]+stemp)*(shape1[type[j]][2]+stemp);
   Ua = (1.0+3.0*tprod)*sprod/Ua;
   Ua = epsilon[type[i]][type[j]]*Ua/-36.0;
 
   stemp = h12/cr60;
   Ur = (shape1[type[i]][0]+stemp)*(shape1[type[i]][1]+stemp)*
        (shape1[type[i]][2]+stemp)*(shape1[type[j]][0]+stemp)*
        (shape1[type[j]][1]+stemp)*(shape1[type[j]][2]+stemp);
   Ur = (1.0+b_alpha*tprod)*sprod/Ur;
   Ur = epsilon[type[i]][type[j]]*Ur*pow(sigh,6.0)/2025.0;
 
   // force
 
   sec = sigma[type[i]][type[j]]*eta*chi;
   sigma12p3 = pow(sigma12,3.0);
   sigma1p3 = sigma1p2*sigma1;
   sigma2p3 = sigma2p2*sigma2;
   vsigma1[0] = -sigma1p3*v1[0];
   vsigma1[1] = -sigma1p3*v1[1];
   vsigma1[2] = -sigma1p3*v1[2];
   vsigma2[0] = -sigma2p3*v2[0];
   vsigma2[1] = -sigma2p3*v2[1];
   vsigma2[2] = -sigma2p3*v2[2];
   gsigma1[0][0] = -wi.gamma[0][0]/sigma1p2;
   gsigma1[0][1] = -wi.gamma[0][1]/sigma1p2;
   gsigma1[0][2] = -wi.gamma[0][2]/sigma1p2;
   gsigma1[1][0] = -wi.gamma[1][0]/sigma1p2;
   gsigma1[1][1] = -wi.gamma[1][1]/sigma1p2;
   gsigma1[1][2] = -wi.gamma[1][2]/sigma1p2;
   gsigma1[2][0] = -wi.gamma[2][0]/sigma1p2;
   gsigma1[2][1] = -wi.gamma[2][1]/sigma1p2;
   gsigma1[2][2] = -wi.gamma[2][2]/sigma1p2;
   gsigma2[0][0] = -wj.gamma[0][0]/sigma2p2;
   gsigma2[0][1] = -wj.gamma[0][1]/sigma2p2;
   gsigma2[0][2] = -wj.gamma[0][2]/sigma2p2;
   gsigma2[1][0] = -wj.gamma[1][0]/sigma2p2;
   gsigma2[1][1] = -wj.gamma[1][1]/sigma2p2;
   gsigma2[1][2] = -wj.gamma[1][2]/sigma2p2;
   gsigma2[2][0] = -wj.gamma[2][0]/sigma2p2;
   gsigma2[2][1] = -wj.gamma[2][1]/sigma2p2;
   gsigma2[2][2] = -wj.gamma[2][2]/sigma2p2;
   tsig1sig2 = eta/(2.0*(sigma1+sigma2));
   tdH = eta/(2.0*dH);
   teta1 = 2.0*eta/lambda;
   teta2 = teta1*lshape[type[j]]/sigma2p3;
   teta1 = teta1*lshape[type[i]]/sigma1p3;
   fourw[0] = 4.0*w[0];
   fourw[1] = 4.0*w[1];
   fourw[2] = 4.0*w[2];
   spr[0] = 0.5*sigma12p3*s[0];
   spr[1] = 0.5*sigma12p3*s[1];
   spr[2] = 0.5*sigma12p3*s[2];
 
   stemp = 1.0/(shape1[type[i]][0]*2.0+h12)+
           1.0/(shape1[type[i]][1]*2.0+h12)+
           1.0/(shape1[type[i]][2]*2.0+h12)+
           1.0/(shape1[type[j]][0]*2.0+h12)+
           1.0/(shape1[type[j]][1]*2.0+h12)+
           1.0/(shape1[type[j]][2]*2.0+h12);
   hsec = h12+3.0*sec;
   dspu = 1.0/h12-1.0/hsec+stemp;
   pbsu = 3.0*sigma[type[i]][type[j]]/hsec;
 
   stemp = 1.0/(shape1[type[i]][0]*cr60+h12)+
           1.0/(shape1[type[i]][1]*cr60+h12)+
           1.0/(shape1[type[i]][2]*cr60+h12)+
           1.0/(shape1[type[j]][0]*cr60+h12)+
           1.0/(shape1[type[j]][1]*cr60+h12)+
           1.0/(shape1[type[j]][2]*cr60+h12);
   hsec = h12+b_alpha*sec;
   dspr = 7.0/h12-1.0/hsec+stemp;
   pbsr = b_alpha*sigma[type[i]][type[j]]/hsec;
 
   for (int i=0; i<3; i++) {
     u[0] = -rhat[i]*rhat[0];
     u[1] = -rhat[i]*rhat[1];
     u[2] = -rhat[i]*rhat[2];
     u[i] += 1.0;
     u[0] /= rnorm;
     u[1] /= rnorm;
     u[2] /= rnorm;
     MathExtra::matvec(wi.A,u,u1);
     MathExtra::matvec(wj.A,u,u2);
     dsigma1=MathExtra::dot3(u1,vsigma1);
     dsigma2=MathExtra::dot3(u2,vsigma2);
     dH12[0][0] = dsigma1*gsigma1[0][0]+dsigma2*gsigma2[0][0];
     dH12[0][1] = dsigma1*gsigma1[0][1]+dsigma2*gsigma2[0][1];
     dH12[0][2] = dsigma1*gsigma1[0][2]+dsigma2*gsigma2[0][2];
     dH12[1][0] = dsigma1*gsigma1[1][0]+dsigma2*gsigma2[1][0];
     dH12[1][1] = dsigma1*gsigma1[1][1]+dsigma2*gsigma2[1][1];
     dH12[1][2] = dsigma1*gsigma1[1][2]+dsigma2*gsigma2[1][2];
     dH12[2][0] = dsigma1*gsigma1[2][0]+dsigma2*gsigma2[2][0];
     dH12[2][1] = dsigma1*gsigma1[2][1]+dsigma2*gsigma2[2][1];
     dH12[2][2] = dsigma1*gsigma1[2][2]+dsigma2*gsigma2[2][2];
     ddH = det_prime(H12,dH12);
     deta = (dsigma1+dsigma2)*tsig1sig2;
     deta -= ddH*tdH;
     deta -= dsigma1*teta1+dsigma2*teta2;
     dchi = MathExtra::dot3(u,fourw);
     dh12 = rhat[i]+MathExtra::dot3(u,spr);
     dUa = pbsu*(eta*dchi+deta*chi)-dh12*dspu;
     dUr = pbsr*(eta*dchi+deta*chi)-dh12*dspr;
     fforce[i]=dUr*Ur+dUa*Ua;
   }
 
   // torque on i
 
   MathExtra::vecmat(fourw,wi.aTe,fwae);
 
   for (int i=0; i<3; i++) {
     MathExtra::matvec(wi.lA[i],rhat,p);
     dsigma1 = MathExtra::dot3(p,vsigma1);
     dH12[0][0] = wi.lAsa[i][0][0]/sigma1+dsigma1*gsigma1[0][0];
     dH12[0][1] = wi.lAsa[i][0][1]/sigma1+dsigma1*gsigma1[0][1];
     dH12[0][2] = wi.lAsa[i][0][2]/sigma1+dsigma1*gsigma1[0][2];
     dH12[1][0] = wi.lAsa[i][1][0]/sigma1+dsigma1*gsigma1[1][0];
     dH12[1][1] = wi.lAsa[i][1][1]/sigma1+dsigma1*gsigma1[1][1];
     dH12[1][2] = wi.lAsa[i][1][2]/sigma1+dsigma1*gsigma1[1][2];
     dH12[2][0] = wi.lAsa[i][2][0]/sigma1+dsigma1*gsigma1[2][0];
     dH12[2][1] = wi.lAsa[i][2][1]/sigma1+dsigma1*gsigma1[2][1];
     dH12[2][2] = wi.lAsa[i][2][2]/sigma1+dsigma1*gsigma1[2][2];
     ddH = det_prime(H12,dH12);
     deta = tsig1sig2*dsigma1-tdH*ddH;
     deta -= teta1*dsigma1;
     double tempv[3];
     MathExtra::matvec(wi.lA[i],w,tempv);
     dchi = -MathExtra::dot3(fwae,tempv);
     MathExtra::matvec(wi.lAtwo[i],spr,tempv);
     dh12 = -MathExtra::dot3(s,tempv);
 
     dUa = pbsu*(eta*dchi + deta*chi)-dh12*dspu;
     dUr = pbsr*(eta*dchi + deta*chi)-dh12*dspr;
     ttor[i] = -(dUa*Ua+dUr*Ur);
   }
 
   // torque on j
 
   if (!(force->newton_pair || j < atom->nlocal))
     return Ua+Ur;
 
   MathExtra::vecmat(fourw,wj.aTe,fwae);
 
   for (int i=0; i<3; i++) {
     MathExtra::matvec(wj.lA[i],rhat,p);
     dsigma2 = MathExtra::dot3(p,vsigma2);
     dH12[0][0] = wj.lAsa[i][0][0]/sigma2+dsigma2*gsigma2[0][0];
     dH12[0][1] = wj.lAsa[i][0][1]/sigma2+dsigma2*gsigma2[0][1];
     dH12[0][2] = wj.lAsa[i][0][2]/sigma2+dsigma2*gsigma2[0][2];
     dH12[1][0] = wj.lAsa[i][1][0]/sigma2+dsigma2*gsigma2[1][0];
     dH12[1][1] = wj.lAsa[i][1][1]/sigma2+dsigma2*gsigma2[1][1];
     dH12[1][2] = wj.lAsa[i][1][2]/sigma2+dsigma2*gsigma2[1][2];
     dH12[2][0] = wj.lAsa[i][2][0]/sigma2+dsigma2*gsigma2[2][0];
     dH12[2][1] = wj.lAsa[i][2][1]/sigma2+dsigma2*gsigma2[2][1];
     dH12[2][2] = wj.lAsa[i][2][2]/sigma2+dsigma2*gsigma2[2][2];
     ddH = det_prime(H12,dH12);
     deta = tsig1sig2*dsigma2-tdH*ddH;
     deta -= teta2*dsigma2;
     double tempv[3];
     MathExtra::matvec(wj.lA[i],w,tempv);
     dchi = -MathExtra::dot3(fwae,tempv);
     MathExtra::matvec(wj.lAtwo[i],spr,tempv);
     dh12 = -MathExtra::dot3(s,tempv);
 
     dUa = pbsu*(eta*dchi + deta*chi)-dh12*dspu;
     dUr = pbsr*(eta*dchi + deta*chi)-dh12*dspr;
     rtor[i] = -(dUa*Ua+dUr*Ur);
   }
 
   return Ua+Ur;
 }
 
 /* ----------------------------------------------------------------------
    Compute the energy, force, torque for a pair (INTEGRATED-LJ)
 ------------------------------------------------------------------------- */
 
 double PairRESquared::resquared_lj(const int i, const int j,
                                    const RE2Vars &wi, const double *r,
                                    const double rsq, double *fforce,
                                    double *ttor, bool calc_torque)
 {
   int *type = atom->type;
 
   // pair computations for energy, force, torque
 
   double rnorm;              // L2 norm of r
   double rhat[3];            // r/rnorm
   double s[3];               // inv(gamma1)*rhat
   double sigma12;            // 1/sqrt(0.5*s'*rhat)
   double w[3];               // inv(A1'*E1*A1+I)*rhat
   double h12;                // rnorm-sigma12;
   double chi;                // 2*rhat'*w
   double sigh;               // sigma/h12
   double tprod;              // chi*sigh
   double Ua,Ur;              // attractive/repulsive parts of potential
 
   // pair computations for force, torque
 
   double sec;                          // sigma*chi
   double sigma12p3;                    // sigma12^3
   double fourw[3];                     // 4*w;
   double spr[3];                       // 0.5*sigma12^3*s
   double hsec;                         // h12+[3,b_alpha]*sec
   double dspu;                         // 1/h12 - 1/hsec + temp
   double pbsu;                         // 3*sigma/hsec
   double dspr;                         // 7/h12-1/hsec+temp
   double pbsr;                         // b_alpha*sigma/hsec;
   double u[3];                         // (-rhat(i)*rhat+eye(:,i))/rnorm
   double dchi,dh12;                    // derivatives of chi,h12
   double dUr,dUa;                      // derivatives of Ua,Ur
   double h12p3;                        // h12^3
 
   // pair computations for torque
 
   double fwae[3];        // -fourw'*aTe
   double p[3];           // lA*rhat
 
   // distance of closest approach correction
 
   double aTs[3][3];       // A1'*S1^2
   double gamma[3][3];     // A1'*S1^2*A
   double lAtwo[3][3][3];  // A1'*S1^2*wi.lA
   double scorrect[3];
   double half_sigma=sigma[type[i]][type[j]] / 2.0;
   scorrect[0] = shape1[type[i]][0]+half_sigma;
   scorrect[1] = shape1[type[i]][1]+half_sigma;
   scorrect[2] = shape1[type[i]][2]+half_sigma;
   scorrect[0] = scorrect[0] * scorrect[0] / 2.0;
   scorrect[1] = scorrect[1] * scorrect[1] / 2.0;
   scorrect[2] = scorrect[2] * scorrect[2] / 2.0;
   MathExtra::transpose_diag3(wi.A,scorrect,aTs);
   MathExtra::times3(aTs,wi.A,gamma);
   for (int ii=0; ii<3; ii++)
     MathExtra::times3(aTs,wi.lA[ii],lAtwo[ii]);
 
   rnorm=sqrt(rsq);
   rhat[0] = r[0]/rnorm;
   rhat[1] = r[1]/rnorm;
   rhat[2] = r[2]/rnorm;
 
   // energy
 
   int ierror = MathExtra::mldivide3(gamma,rhat,s);
   if (ierror) error->all(FLERR,"Bad matrix inversion in mldivide3");
 
   sigma12 = 1.0/sqrt(0.5*MathExtra::dot3(s,rhat));
   double temp[3][3];
   MathExtra::times3(wi.aTe,wi.A,temp);
   temp[0][0] += 1.0;
   temp[1][1] += 1.0;
   temp[2][2] += 1.0;
   ierror = MathExtra::mldivide3(temp,rhat,w);
   if (ierror) error->all(FLERR,"Bad matrix inversion in mldivide3");
 
   h12 = rnorm-sigma12;
   chi = 2.0*MathExtra::dot3(rhat,w);
   sigh = sigma[type[i]][type[j]]/h12;
   tprod = chi*sigh;
 
   h12p3 = pow(h12,3.0);
   double sigmap3 = pow(sigma[type[i]][type[j]],3.0);
   double stemp = h12/2.0;
   Ua = (shape1[type[i]][0]+stemp)*(shape1[type[i]][1]+stemp)*
        (shape1[type[i]][2]+stemp)*h12p3/8.0;
   Ua = (1.0+3.0*tprod)*lshape[type[i]]/Ua;
   Ua = epsilon[type[i]][type[j]]*Ua*sigmap3*solv_f_a;
 
   stemp = h12/cr60;
   Ur = (shape1[type[i]][0]+stemp)*(shape1[type[i]][1]+stemp)*
        (shape1[type[i]][2]+stemp)*h12p3/60.0;
   Ur = (1.0+b_alpha*tprod)*lshape[type[i]]/Ur;
   Ur = epsilon[type[i]][type[j]]*Ur*sigmap3*pow(sigh,6.0)*solv_f_r;
 
   // force
 
   sec = sigma[type[i]][type[j]]*chi;
   sigma12p3 = pow(sigma12,3.0);
   fourw[0] = 4.0*w[0];
   fourw[1] = 4.0*w[1];
   fourw[2] = 4.0*w[2];
   spr[0] = 0.5*sigma12p3*s[0];
   spr[1] = 0.5*sigma12p3*s[1];
   spr[2] = 0.5*sigma12p3*s[2];
 
   stemp = 1.0/(shape1[type[i]][0]*2.0+h12)+
           1.0/(shape1[type[i]][1]*2.0+h12)+
           1.0/(shape1[type[i]][2]*2.0+h12)+
           3.0/h12;
   hsec = h12+3.0*sec;
   dspu = 1.0/h12-1.0/hsec+stemp;
   pbsu = 3.0*sigma[type[i]][type[j]]/hsec;
 
   stemp = 1.0/(shape1[type[i]][0]*cr60+h12)+
           1.0/(shape1[type[i]][1]*cr60+h12)+
           1.0/(shape1[type[i]][2]*cr60+h12)+
           3.0/h12;
   hsec = h12+b_alpha*sec;
   dspr = 7.0/h12-1.0/hsec+stemp;
   pbsr = b_alpha*sigma[type[i]][type[j]]/hsec;
 
   for (int i=0; i<3; i++) {
     u[0] = -rhat[i]*rhat[0];
     u[1] = -rhat[i]*rhat[1];
     u[2] = -rhat[i]*rhat[2];
     u[i] += 1.0;
     u[0] /= rnorm;
     u[1] /= rnorm;
     u[2] /= rnorm;
     dchi = MathExtra::dot3(u,fourw);
     dh12 = rhat[i]+MathExtra::dot3(u,spr);
     dUa = pbsu*dchi-dh12*dspu;
     dUr = pbsr*dchi-dh12*dspr;
     fforce[i]=dUr*Ur+dUa*Ua;
   }
 
   // torque on i
 
   if (calc_torque) {
     MathExtra::vecmat(fourw,wi.aTe,fwae);
 
     for (int i=0; i<3; i++) {
       MathExtra::matvec(wi.lA[i],rhat,p);
       double tempv[3];
       MathExtra::matvec(wi.lA[i],w,tempv);
       dchi = -MathExtra::dot3(fwae,tempv);
       MathExtra::matvec(lAtwo[i],spr,tempv);
       dh12 = -MathExtra::dot3(s,tempv);
 
       dUa = pbsu*dchi-dh12*dspu;
       dUr = pbsr*dchi-dh12*dspr;
       ttor[i] = -(dUa*Ua+dUr*Ur);
     }
   }
 
   return Ua+Ur;
 }
diff --git a/src/ASPHERE/pair_tri_lj.cpp b/src/ASPHERE/pair_tri_lj.cpp
index 72e6588a2..257341e60 100644
--- a/src/ASPHERE/pair_tri_lj.cpp
+++ b/src/ASPHERE/pair_tri_lj.cpp
@@ -1,646 +1,646 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_tri_lj.h"
 #include "math_extra.h"
 #include "atom.h"
 #include "atom_vec_tri.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
 #define DELTA 20
 
 /* ---------------------------------------------------------------------- */
 
 PairTriLJ::PairTriLJ(LAMMPS *lmp) : Pair(lmp)
 {
   dmax = nmax = 0;
   discrete = NULL;
   dnum = dfirst = NULL;
 
   single_enable = 0;
   restartinfo = 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 PairTriLJ::~PairTriLJ()
 {
   memory->sfree(discrete);
   memory->destroy(dnum);
   memory->destroy(dfirst);
 
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
 
     memory->destroy(cut);
     memory->destroy(epsilon);
     memory->destroy(sigma);
     memory->destroy(lj1);
     memory->destroy(lj2);
     memory->destroy(lj3);
     memory->destroy(lj4);
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairTriLJ::compute(int eflag, int vflag)
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   int ni,nj,npi,npj,ifirst,jfirst;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
   double rsq,r2inv,r6inv,term1,term2,sig,sig3,forcelj;
   double dxi,dxj,dyi,dyj,dzi,dzj;
   double xi[3],xj[3],fi[3],fj[3],ti[3],tj[3],p[3][3];
   double dc1[3],dc2[3],dc3[3];
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   evdwl = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   AtomVecTri::Bonus *bonus = avec->bonus;
   double **x = atom->x;
   double **f = atom->f;
   double **torque = atom->torque;
   int *tri = atom->tri;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   int nall = nlocal + atom->nghost;
   int newton_pair = force->newton_pair;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // grow discrete list if necessary and initialize
 
   if (nall > nmax) {
     nmax = nall;
     memory->destroy(dnum);
     memory->destroy(dfirst);
     memory->create(dnum,nall,"pair:dnum");
     memory->create(dfirst,nall,"pair:dfirst");
   }
   for (i = 0; i < nall; i++) dnum[i] = 0;
   ndiscrete = 0;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq >= cutsq[itype][jtype]) continue;
 
       // tri/tri interactions = NxN particles
       // c1,c2,c3 = corner pts of triangle I or J
 
       evdwl = 0.0;
       if (tri[i] >= 0 && tri[j] >= 0) {
         if (dnum[i] == 0) {
           MathExtra::quat_to_mat(bonus[tri[i]].quat,p);
           MathExtra::matvec(p,bonus[tri[i]].c1,dc1);
           MathExtra::matvec(p,bonus[tri[i]].c2,dc2);
           MathExtra::matvec(p,bonus[tri[i]].c3,dc3);
           dfirst[i] = ndiscrete;
           discretize(i,sigma[itype][itype],dc1,dc2,dc3);
           dnum[i] = ndiscrete - dfirst[i];
         }
         npi = dnum[i];
         ifirst = dfirst[i];
 
         if (dnum[j] == 0) {
           MathExtra::quat_to_mat(bonus[tri[j]].quat,p);
           MathExtra::matvec(p,bonus[tri[j]].c1,dc1);
           MathExtra::matvec(p,bonus[tri[j]].c2,dc2);
           MathExtra::matvec(p,bonus[tri[j]].c3,dc3);
           dfirst[j] = ndiscrete;
           discretize(j,sigma[jtype][jtype],dc1,dc2,dc3);
           dnum[j] = ndiscrete - dfirst[j];
         }
         npj = dnum[j];
         jfirst = dfirst[j];
 
         for (ni = 0; ni < npi; ni++) {
           dxi = discrete[ifirst+ni].dx;
           dyi = discrete[ifirst+ni].dy;
           dzi = discrete[ifirst+ni].dz;
 
           for (nj = 0; nj < npj; nj++) {
             dxj = discrete[jfirst+nj].dx;
             dyj = discrete[jfirst+nj].dy;
             dzj = discrete[jfirst+nj].dz;
 
             xi[0] = x[i][0] + dxi;
             xi[1] = x[i][1] + dyi;
             xi[2] = x[i][2] + dzi;
             xj[0] = x[j][0] + dxj;
             xj[1] = x[j][1] + dyj;
             xj[2] = x[j][2] + dzj;
 
             delx = xi[0] - xj[0];
             dely = xi[1] - xj[1];
             delz = xi[2] - xj[2];
             rsq = delx*delx + dely*dely + delz*delz;
 
             sig = 0.5 * (discrete[ifirst+ni].sigma+discrete[jfirst+nj].sigma);
             sig3 = sig*sig*sig;
             term2 = 24.0*epsilon[itype][jtype] * sig3*sig3;
             term1 = 2.0 * term2 * sig3*sig3;
             r2inv = 1.0/rsq;
             r6inv = r2inv*r2inv*r2inv;
             forcelj = r6inv * (term1*r6inv - term2);
             fpair = forcelj*r2inv;
 
             if (eflag) evdwl += r6inv*(term1/12.0*r6inv-term2/6.0);
 
             fi[0] = delx*fpair;
             fi[1] = dely*fpair;
             fi[2] = delz*fpair;
             f[i][0] += fi[0];
             f[i][1] += fi[1];
             f[i][2] += fi[2];
             ti[0] = dyi*fi[2] - dzi*fi[1];
             ti[1] = dzi*fi[0] - dxi*fi[2];
             ti[2] = dxi*fi[1] - dyi*fi[0];
             torque[i][0] += ti[0];
             torque[i][1] += ti[1];
             torque[i][2] += ti[2];
 
             if (newton_pair || j < nlocal) {
               fj[0] = -delx*fpair;
               fj[1] = -dely*fpair;
               fj[2] = -delz*fpair;
               f[j][0] += fj[0];
               f[j][1] += fj[1];
               f[j][2] += fj[2];
               tj[0] = dyj*fj[2] - dzj*fj[1];
               tj[1] = dzj*fj[0] - dxj*fj[2];
               tj[2] = dxj*fj[1] - dyj*fj[0];
               torque[j][0] += tj[0];
               torque[j][1] += tj[1];
               torque[j][2] += tj[2];
             }
           }
         }
 
       // tri/particle interaction = Nx1 particles
       // c1,c2,c3 = corner pts of triangle I
 
       } else if (tri[i] >= 0) {
 
         if (dnum[i] == 0) {
           MathExtra::quat_to_mat(bonus[tri[i]].quat,p);
           MathExtra::matvec(p,bonus[tri[i]].c1,dc1);
           MathExtra::matvec(p,bonus[tri[i]].c2,dc2);
           MathExtra::matvec(p,bonus[tri[i]].c3,dc3);
           dfirst[i] = ndiscrete;
           discretize(i,sigma[itype][itype],dc1,dc2,dc3);
           dnum[i] = ndiscrete - dfirst[i];
         }
         npi = dnum[i];
         ifirst = dfirst[i];
 
         for (ni = 0; ni < npi; ni++) {
           dxi = discrete[ifirst+ni].dx;
           dyi = discrete[ifirst+ni].dy;
           dzi = discrete[ifirst+ni].dz;
 
           xi[0] = x[i][0] + dxi;
           xi[1] = x[i][1] + dyi;
           xi[2] = x[i][2] + dzi;
           xj[0] = x[j][0];
           xj[1] = x[j][1];
           xj[2] = x[j][2];
 
           delx = xi[0] - xj[0];
           dely = xi[1] - xj[1];
           delz = xi[2] - xj[2];
           rsq = delx*delx + dely*dely + delz*delz;
 
           sig = 0.5 * (discrete[ifirst+ni].sigma+sigma[jtype][jtype]);
           sig3 = sig*sig*sig;
           term2 = 24.0*epsilon[itype][jtype] * sig3*sig3;
           term1 = 2.0 * term2 * sig3*sig3;
           r2inv = 1.0/rsq;
           r6inv = r2inv*r2inv*r2inv;
           forcelj = r6inv * (term1*r6inv - term2);
           fpair = forcelj*r2inv;
 
           if (eflag) evdwl += r6inv*(term1/12.0*r6inv-term2/6.0);
 
           fi[0] = delx*fpair;
           fi[1] = dely*fpair;
           fi[2] = delz*fpair;
           f[i][0] += fi[0];
           f[i][1] += fi[1];
           f[i][2] += fi[2];
           ti[0] = dyi*fi[2] - dzi*fi[1];
           ti[1] = dzi*fi[0] - dxi*fi[2];
           ti[2] = dxi*fi[1] - dyi*fi[0];
           torque[i][2] += ti[0];
           torque[i][1] += ti[1];
           torque[i][2] += ti[2];
 
           if (newton_pair || j < nlocal) {
             fj[0] = -delx*fpair;
             fj[1] = -dely*fpair;
             fj[2] = -delz*fpair;
             f[j][0] += fj[0];
             f[j][1] += fj[1];
             f[j][2] += fj[2];
           }
         }
 
       // particle/tri interaction = Nx1 particles
       // c1,c2,c3 = corner pts of triangle J
 
       } else if (tri[j] >= 0) {
         if (dnum[j] == 0) {
           MathExtra::quat_to_mat(bonus[tri[j]].quat,p);
           MathExtra::matvec(p,bonus[tri[j]].c1,dc1);
           MathExtra::matvec(p,bonus[tri[j]].c2,dc2);
           MathExtra::matvec(p,bonus[tri[j]].c3,dc3);
           dfirst[j] = ndiscrete;
           discretize(j,sigma[jtype][jtype],dc1,dc2,dc3);
           dnum[j] = ndiscrete - dfirst[j];
         }
         npj = dnum[j];
         jfirst = dfirst[j];
 
         for (nj = 0; nj < npj; nj++) {
           dxj = discrete[jfirst+nj].dx;
           dyj = discrete[jfirst+nj].dy;
           dzj = discrete[jfirst+nj].dz;
 
           xi[0] = x[i][0];
           xi[1] = x[i][1];
           xi[2] = x[i][2];
           xj[0] = x[j][0] + dxj;
           xj[1] = x[j][1] + dyj;
           xj[2] = x[j][2] + dzj;
 
           delx = xi[0] - xj[0];
           dely = xi[1] - xj[1];
           delz = xi[2] - xj[2];
           rsq = delx*delx + dely*dely + delz*delz;
 
           sig = 0.5 * (sigma[itype][itype]+discrete[jfirst+nj].sigma);
           sig3 = sig*sig*sig;
           term2 = 24.0*epsilon[itype][jtype] * sig3*sig3;
           term1 = 2.0 * term2 * sig3*sig3;
           r2inv = 1.0/rsq;
           r6inv = r2inv*r2inv*r2inv;
           forcelj = r6inv * (term1*r6inv - term2);
           fpair = forcelj*r2inv;
 
           if (eflag) evdwl += r6inv*(term1/12.0*r6inv-term2/6.0);
 
           fi[0] = delx*fpair;
           fi[1] = dely*fpair;
           fi[2] = delz*fpair;
           f[i][0] += fi[0];
           f[i][1] += fi[1];
           f[i][2] += fi[2];
 
           if (newton_pair || j < nlocal) {
             fj[0] = -delx*fpair;
             fj[1] = -dely*fpair;
             fj[2] = -delz*fpair;
             f[j][0] += fj[0];
             f[j][1] += fj[1];
             f[j][2] += fj[2];
             tj[0] = dyj*fj[2] - dzj*fj[1];
             tj[1] = dzj*fj[0] - dxj*fj[2];
             tj[2] = dxj*fj[1] - dyj*fj[0];
             torque[j][0] += tj[0];
             torque[j][1] += tj[1];
             torque[j][2] += tj[2];
           }
         }
 
       // particle/particle interaction = 1x1 particles
 
       } else {
         r2inv = 1.0/rsq;
         r6inv = r2inv*r2inv*r2inv;
         forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
         fpair = forcelj*r2inv;
 
         if (eflag)
           evdwl += r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]);
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
         if (newton_pair || j < nlocal) {
           f[j][0] -= delx*fpair;
           f[j][1] -= dely*fpair;
           f[j][2] -= delz*fpair;
         }
       }
 
       if (evflag) ev_tally(i,j,nlocal,newton_pair,
                            evdwl,0.0,fpair,delx,dely,delz);
     }
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairTriLJ::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
   memory->create(cut,n+1,n+1,"pair:cut");
   memory->create(epsilon,n+1,n+1,"pair:epsilon");
   memory->create(sigma,n+1,n+1,"pair:sigma");
   memory->create(lj1,n+1,n+1,"pair:lj1");
   memory->create(lj2,n+1,n+1,"pair:lj2");
   memory->create(lj3,n+1,n+1,"pair:lj3");
   memory->create(lj4,n+1,n+1,"pair:lj4");
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairTriLJ::settings(int narg, char **arg)
 {
   if (narg != 1) error->all(FLERR,"Illegal pair_style command");
 
   cut_global = force->numeric(FLERR,arg[0]);
 
   // reset cutoffs that have been explicitly set
 
   if (allocated) {
     int i,j;
     for (i = 1; i <= atom->ntypes; i++)
       for (j = i+1; j <= atom->ntypes; j++)
         if (setflag[i][j]) cut[i][j] = cut_global;
   }
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairTriLJ::coeff(int narg, char **arg)
 {
   if (narg < 4 || narg > 5)
     error->all(FLERR,"Incorrect args for pair coefficients");
   if (!allocated) allocate();
 
   int ilo,ihi,jlo,jhi;
   force->bounds(arg[0],atom->ntypes,ilo,ihi);
   force->bounds(arg[1],atom->ntypes,jlo,jhi);
 
   double epsilon_one = force->numeric(FLERR,arg[2]);
   double sigma_one = force->numeric(FLERR,arg[3]);
 
   double cut_one = cut_global;
   if (narg == 5) cut_one = force->numeric(FLERR,arg[4]);
 
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
     for (int j = MAX(jlo,i); j <= jhi; j++) {
       epsilon[i][j] = epsilon_one;
       sigma[i][j] = sigma_one;
       cut[i][j] = cut_one;
       setflag[i][j] = 1;
       count++;
     }
   }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairTriLJ::init_style()
 {
   avec = (AtomVecTri *) atom->style_match("tri");
   if (!avec) error->all(FLERR,"Pair tri/lj requires atom style tri");
 
-  neighbor->request(this);
+  neighbor->request(this,instance_me);
 }
  
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairTriLJ::init_one(int i, int j)
 {
   if (setflag[i][j] == 0) {
     epsilon[i][j] = mix_energy(epsilon[i][i],epsilon[j][j],
                                sigma[i][i],sigma[j][j]);
     sigma[i][j] = mix_distance(sigma[i][i],sigma[j][j]);
     cut[i][j] = mix_distance(cut[i][i],cut[j][j]);
   }
 
   lj1[i][j] = 48.0 * epsilon[i][j] * pow(sigma[i][j],12.0);
   lj2[i][j] = 24.0 * epsilon[i][j] * pow(sigma[i][j],6.0);
   lj3[i][j] = 4.0 * epsilon[i][j] * pow(sigma[i][j],12.0);
   lj4[i][j] = 4.0 * epsilon[i][j] * pow(sigma[i][j],6.0);
 
   epsilon[j][i] = epsilon[i][j];
   sigma[j][i] = sigma[i][j];
   lj1[j][i] = lj1[i][j];
   lj2[j][i] = lj2[i][j];
   lj3[j][i] = lj3[i][j];
   lj4[j][i] = lj4[i][j];
 
   return cut[i][j];
 }
 
 /* ----------------------------------------------------------------------
    recursively discretize triangle I with displaced corners c1,c2,c3
    into N sub-tris no more than sigma in size
    recurse by making 2 tris via bisecting longest side
    store new discrete particles in Discrete list
 ------------------------------------------------------------------------- */
 
 void PairTriLJ::discretize(int i, double sigma,
                           double *c1, double *c2, double *c3)
 {
   double centroid[3],dc1[3],dc2[3],dc3[3];
 
   centroid[0] = (c1[0] + c2[0] + c3[0]) / 3.0;
   centroid[1] = (c1[1] + c2[1] + c3[1]) / 3.0;
   centroid[2] = (c1[2] + c2[2] + c3[2]) / 3.0;
 
   MathExtra::sub3(c1,centroid,dc1);
   MathExtra::sub3(c2,centroid,dc2);
   MathExtra::sub3(c3,centroid,dc3);
 
   double sigmasq = 0.25 * sigma*sigma;
   double len1sq = MathExtra::lensq3(dc1);
   double len2sq = MathExtra::lensq3(dc2);
   double len3sq = MathExtra::lensq3(dc3);
 
   // if sigma sphere overlaps all corner points, add particle at centroid
 
   if ((len1sq <= sigmasq) && (len2sq <= sigmasq) && (len3sq <= sigmasq)) {
     if (ndiscrete == dmax) {
       dmax += DELTA;
       discrete = (Discrete *)
         memory->srealloc(discrete,dmax*sizeof(Discrete),"pair:discrete");
     }
     discrete[ndiscrete].dx = centroid[0];
     discrete[ndiscrete].dy = centroid[1];
     discrete[ndiscrete].dz = centroid[2];
     sigmasq = MAX(len1sq,len2sq);
     sigmasq = MAX(sigmasq,len3sq);
     discrete[ndiscrete].sigma = 2.0 * sqrt(sigmasq);
     ndiscrete++;
     return;
   }
 
   // else break triangle into 2 sub-triangles and recurse
 
   double c12[3],c23[3],c13[3],mid[3];
 
   MathExtra::sub3(c2,c3,c23);
   len1sq = MathExtra::lensq3(c23);
   MathExtra::sub3(c1,c3,c13);
   len2sq = MathExtra::lensq3(c13);
   MathExtra::sub3(c1,c2,c12);
   len3sq = MathExtra::lensq3(c12);
 
   double maxsq = MAX(len1sq,len2sq);
   maxsq = MAX(maxsq,len3sq);
 
   if (len1sq == maxsq) {
     MathExtra::add3(c2,c3,mid);
     MathExtra::scale3(0.5,mid);
     discretize(i,sigma,c1,c2,mid);
     discretize(i,sigma,c1,c3,mid);
   } else if (len2sq == maxsq) {
     MathExtra::add3(c1,c3,mid);
     MathExtra::scale3(0.5,mid);
     discretize(i,sigma,c2,c1,mid);
     discretize(i,sigma,c2,c3,mid);
   } else {
     MathExtra::add3(c1,c2,mid);
     MathExtra::scale3(0.5,mid);
     discretize(i,sigma,c3,c1,mid);
     discretize(i,sigma,c3,c2,mid);
   }
 }
 
 /* ----------------------------------------------------------------------
    recursively discretize triangle I with displaced corners c1,c2,c3
    into N sub-tris no more than sigma in size
    recurse by making 6 tris via centroid
    store new discrete particles in Discrete list
 ------------------------------------------------------------------------- */
 
 /*
 void PairTriLJ::discretize(int i, double sigma,
                           double *c1, double *c2, double *c3)
 {
   double centroid[3],dc1[3],dc2[3],dc3[3];
 
   centroid[0] = (c1[0] + c2[0] + c3[0]) / 3.0;
   centroid[1] = (c1[1] + c2[1] + c3[1]) / 3.0;
   centroid[2] = (c1[2] + c2[2] + c3[2]) / 3.0;
 
   MathExtra::sub3(c1,centroid,dc1);
   MathExtra::sub3(c2,centroid,dc2);
   MathExtra::sub3(c3,centroid,dc3);
 
   double sigmasq = 0.25 * sigma*sigma;
   double len1sq = MathExtra::lensq3(dc1);
   double len2sq = MathExtra::lensq3(dc2);
   double len3sq = MathExtra::lensq3(dc3);
 
   // if sigma sphere overlaps all corner points, add particle at centroid
 
   if (len1sq <= sigmasq && len2sq <= sigmasq & len3sq <= sigmasq) {
     if (ndiscrete == dmax) {
       dmax += DELTA;
       discrete = (Discrete *)
         memory->srealloc(discrete,dmax*sizeof(Discrete),"pair:discrete");
     }
     discrete[ndiscrete].dx = centroid[0];
     discrete[ndiscrete].dy = centroid[1];
     discrete[ndiscrete].dz = centroid[2];
     sigmasq = MAX(len1sq,len2sq);
     sigmasq = MAX(sigmasq,len3sq);
     discrete[ndiscrete].sigma = 2.0 * sqrt(sigmasq);
     ndiscrete++;
     return;
   }
 
   // else break triangle into 6 sub-triangles and recurse
 
   double c1c2mid[3],c2c3mid[3],c1c3mid[3];
 
   MathExtra::add3(c1,c2,c1c2mid);
   MathExtra::scale3(0.5,c1c2mid);
   MathExtra::add3(c2,c3,c2c3mid);
   MathExtra::scale3(0.5,c2c3mid);
   MathExtra::add3(c1,c3,c1c3mid);
   MathExtra::scale3(0.5,c1c3mid);
 
   discretize(i,sigma,c1,c1c2mid,centroid);
   discretize(i,sigma,c1,c1c3mid,centroid);
   discretize(i,sigma,c2,c2c3mid,centroid);
   discretize(i,sigma,c2,c1c2mid,centroid);
   discretize(i,sigma,c3,c1c3mid,centroid);
   discretize(i,sigma,c3,c2c3mid,centroid);
 }
 
 */
diff --git a/src/BODY/pair_body.cpp b/src/BODY/pair_body.cpp
index 494bc4924..4d565e0ed 100644
--- a/src/BODY/pair_body.cpp
+++ b/src/BODY/pair_body.cpp
@@ -1,484 +1,484 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_body.h"
 #include "math_extra.h"
 #include "atom.h"
 #include "atom_vec_body.h"
 #include "body_nparticle.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
 #define DELTA 10000
 
 /* ---------------------------------------------------------------------- */
 
 PairBody::PairBody(LAMMPS *lmp) : Pair(lmp)
 {
   dmax = nmax = 0;
   discrete = NULL;
   dnum = dfirst = NULL;
 
   single_enable = 0;
   restartinfo = 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 PairBody::~PairBody()
 {
   memory->destroy(discrete);
   memory->destroy(dnum);
   memory->destroy(dfirst);
 
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
 
     memory->destroy(cut);
     memory->destroy(epsilon);
     memory->destroy(sigma);
     memory->destroy(lj1);
     memory->destroy(lj2);
     memory->destroy(lj3);
     memory->destroy(lj4);
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBody::compute(int eflag, int vflag)
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   int ni,nj,npi,npj,ifirst,jfirst;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
   double rsq,r2inv,r6inv,forcelj;
   double xi[3],xj[3],fi[3],fj[3],ti[3],tj[3];
   double *dxi,*dxj;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   evdwl = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   double **x = atom->x;
   double **f = atom->f;
   double **torque = atom->torque;
   int *body = atom->body;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   int nall = nlocal + atom->nghost;
   int newton_pair = force->newton_pair;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // grow discrete list if necessary and initialize
 
   if (nall > nmax) {
     nmax = nall;
     memory->destroy(dnum);
     memory->destroy(dfirst);
     memory->create(dnum,nall,"pair:dnum");
     memory->create(dfirst,nall,"pair:dfirst");
   }
   for (i = 0; i < nall; i++) dnum[i] = 0;
   ndiscrete = 0;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq >= cutsq[itype][jtype]) continue;
 
       // body/body interactions = NxM sub-particles
 
       evdwl = 0.0;
       if (body[i] >= 0 && body[j] >= 0) {
         if (dnum[i] == 0) body2space(i);
         npi = dnum[i];
         ifirst = dfirst[i];
         if (dnum[j] == 0) body2space(j);
         npj = dnum[j];
         jfirst = dfirst[j];
 
         for (ni = 0; ni < npi; ni++) {
           dxi = discrete[ifirst+ni];
 
           for (nj = 0; nj < npj; nj++) {
             dxj = discrete[jfirst+nj];
 
             xi[0] = x[i][0] + dxi[0];
             xi[1] = x[i][1] + dxi[1];
             xi[2] = x[i][2] + dxi[2];
             xj[0] = x[j][0] + dxj[0];
             xj[1] = x[j][1] + dxj[1];
             xj[2] = x[j][2] + dxj[2];
 
             delx = xi[0] - xj[0];
             dely = xi[1] - xj[1];
             delz = xi[2] - xj[2];
             rsq = delx*delx + dely*dely + delz*delz;
 
             r2inv = 1.0/rsq;
             r6inv = r2inv*r2inv*r2inv;
             forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
             fpair = forcelj*r2inv;
 
             if (eflag)
               evdwl += r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]);
 
             fi[0] = delx*fpair;
             fi[1] = dely*fpair;
             fi[2] = delz*fpair;
             f[i][0] += fi[0];
             f[i][1] += fi[1];
             f[i][2] += fi[2];
             ti[0] = dxi[1]*fi[2] - dxi[2]*fi[1];
             ti[1] = dxi[2]*fi[0] - dxi[0]*fi[2];
             ti[2] = dxi[0]*fi[1] - dxi[1]*fi[0];
             torque[i][0] += ti[0];
             torque[i][1] += ti[1];
             torque[i][2] += ti[2];
             
             if (newton_pair || j < nlocal) {
               fj[0] = -delx*fpair;
               fj[1] = -dely*fpair;
               fj[2] = -delz*fpair;
               f[j][0] += fj[0];
               f[j][1] += fj[1];
               f[j][2] += fj[2];
               tj[0] = dxj[1]*fj[2] - dxj[2]*fj[1];
               tj[1] = dxj[2]*fj[0] - dxj[0]*fj[2];
               tj[2] = dxj[0]*fj[1] - dxj[1]*fj[0];
               torque[j][0] += tj[0];
               torque[j][1] += tj[1];
               torque[j][2] += tj[2];
             }
           }
         }
 
       // body/particle interaction = Nx1 sub-particles
 
       } else if (body[i] >= 0) {
         if (dnum[i] == 0) body2space(i);
         npi = dnum[i];
         ifirst = dfirst[i];
 
         for (ni = 0; ni < npi; ni++) {
           dxi = discrete[ifirst+ni];
 
           xi[0] = x[i][0] + dxi[0];
           xi[1] = x[i][1] + dxi[1];
           xi[2] = x[i][2] + dxi[2];
           xj[0] = x[j][0];
           xj[1] = x[j][1];
           xj[2] = x[j][2];
 
           delx = xi[0] - xj[0];
           dely = xi[1] - xj[1];
           delz = xi[2] - xj[2];
           rsq = delx*delx + dely*dely + delz*delz;
 
           r2inv = 1.0/rsq;
           r6inv = r2inv*r2inv*r2inv;
           forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
           fpair = forcelj*r2inv;
 
           if (eflag)
             evdwl += r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]);
 
           fi[0] = delx*fpair;
           fi[1] = dely*fpair;
           fi[2] = delz*fpair;
           f[i][0] += fi[0];
           f[i][1] += fi[1];
           f[i][2] += fi[2];
           ti[0] = dxi[1]*fi[2] - dxi[2]*fi[1];
           ti[1] = dxi[2]*fi[0] - dxi[0]*fi[2];
           ti[2] = dxi[0]*fi[1] - dxi[1]*fi[0];
           torque[i][0] += ti[0];
           torque[i][1] += ti[1];
           torque[i][2] += ti[2];
 
           if (newton_pair || j < nlocal) {
             fj[0] = -delx*fpair;
             fj[1] = -dely*fpair;
             fj[2] = -delz*fpair;
             f[j][0] += fj[0];
             f[j][1] += fj[1];
             f[j][2] += fj[2];
           }
         }
 
 
       // particle/body interaction = Nx1 sub-particles
 
       } else if (body[j] >= 0) {
         if (dnum[j] == 0) body2space(j);
         npj = dnum[j];
         jfirst = dfirst[j];
 
         for (nj = 0; nj < npj; nj++) {
           dxj = discrete[jfirst+nj];
 
           xi[0] = x[i][0];
           xi[1] = x[i][1];
           xi[2] = x[i][2];
           xj[0] = x[j][0] + dxj[0];
           xj[1] = x[j][1] + dxj[1];
           xj[2] = x[j][2] + dxj[2];
 
           delx = xi[0] - xj[0];
           dely = xi[1] - xj[1];
           delz = xi[2] - xj[2];
           rsq = delx*delx + dely*dely + delz*delz;
 
           r2inv = 1.0/rsq;
           r6inv = r2inv*r2inv*r2inv;
           forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
           fpair = forcelj*r2inv;
 
           if (eflag)
             evdwl += r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]);
 
           fi[0] = delx*fpair;
           fi[1] = dely*fpair;
           fi[2] = delz*fpair;
           f[i][0] += fi[0];
           f[i][1] += fi[1];
           f[i][2] += fi[2];
 
           if (newton_pair || j < nlocal) {
             fj[0] = -delx*fpair;
             fj[1] = -dely*fpair;
             fj[2] = -delz*fpair;
             f[j][0] += fj[0];
             f[j][1] += fj[1];
             f[j][2] += fj[2];
             tj[0] = dxj[1]*fj[2] - dxj[2]*fj[1];
             tj[1] = dxj[2]*fj[0] - dxj[0]*fj[2];
             tj[2] = dxj[0]*fj[1] - dxj[1]*fj[0];
             torque[j][0] += tj[0];
             torque[j][1] += tj[1];
             torque[j][2] += tj[2];
           }
         }
 
       // particle/particle interaction = 1x1 particles
 
       } else {
         r2inv = 1.0/rsq;
         r6inv = r2inv*r2inv*r2inv;
         forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
         fpair = forcelj*r2inv;
 
         if (eflag)
           evdwl += r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]);
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
         if (newton_pair || j < nlocal) {
           f[j][0] -= delx*fpair;
           f[j][1] -= dely*fpair;
           f[j][2] -= delz*fpair;
         }
       }
 
       if (evflag) ev_tally(i,j,nlocal,newton_pair,
                            evdwl,0.0,fpair,delx,dely,delz);
     }
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairBody::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
   memory->create(cut,n+1,n+1,"pair:cut");
   memory->create(epsilon,n+1,n+1,"pair:epsilon");
   memory->create(sigma,n+1,n+1,"pair:sigma");
   memory->create(lj1,n+1,n+1,"pair:lj1");
   memory->create(lj2,n+1,n+1,"pair:lj2");
   memory->create(lj3,n+1,n+1,"pair:lj3");
   memory->create(lj4,n+1,n+1,"pair:lj4");
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairBody::settings(int narg, char **arg)
 {
   if (narg != 1) error->all(FLERR,"Illegal pair_style command");
 
   cut_global = force->numeric(FLERR,arg[0]);
 
   // reset cutoffs that have been explicitly set
 
   if (allocated) {
     int i,j;
     for (i = 1; i <= atom->ntypes; i++)
       for (j = i+1; j <= atom->ntypes; j++)
         if (setflag[i][j]) cut[i][j] = cut_global;
   }
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairBody::coeff(int narg, char **arg)
 {
   if (narg < 4 || narg > 5)
     error->all(FLERR,"Incorrect args for pair coefficients");
   if (!allocated) allocate();
 
   int ilo,ihi,jlo,jhi;
   force->bounds(arg[0],atom->ntypes,ilo,ihi);
   force->bounds(arg[1],atom->ntypes,jlo,jhi);
 
   double epsilon_one = force->numeric(FLERR,arg[2]);
   double sigma_one = force->numeric(FLERR,arg[3]);
 
   double cut_one = cut_global;
   if (narg == 5) cut_one = force->numeric(FLERR,arg[4]);
 
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
     for (int j = MAX(jlo,i); j <= jhi; j++) {
       epsilon[i][j] = epsilon_one;
       sigma[i][j] = sigma_one;
       cut[i][j] = cut_one;
       setflag[i][j] = 1;
       count++;
     }
   }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairBody::init_style()
 {
   avec = (AtomVecBody *) atom->style_match("body");
   if (!avec) error->all(FLERR,"Pair body requires atom style body");
   if (strcmp(avec->bptr->style,"nparticle") != 0)
     error->all(FLERR,"Pair body requires body style nparticle");
   bptr = (BodyNparticle *) avec->bptr;
 
-  neighbor->request(this);
+  neighbor->request(this,instance_me);
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairBody::init_one(int i, int j)
 {
   if (setflag[i][j] == 0) {
     epsilon[i][j] = mix_energy(epsilon[i][i],epsilon[j][j],
                                sigma[i][i],sigma[j][j]);
     sigma[i][j] = mix_distance(sigma[i][i],sigma[j][j]);
     cut[i][j] = mix_distance(cut[i][i],cut[j][j]);
   }
 
   lj1[i][j] = 48.0 * epsilon[i][j] * pow(sigma[i][j],12.0);
   lj2[i][j] = 24.0 * epsilon[i][j] * pow(sigma[i][j],6.0);
   lj3[i][j] = 4.0 * epsilon[i][j] * pow(sigma[i][j],12.0);
   lj4[i][j] = 4.0 * epsilon[i][j] * pow(sigma[i][j],6.0);
 
   epsilon[j][i] = epsilon[i][j];
   sigma[j][i] = sigma[i][j];
   lj1[j][i] = lj1[i][j];
   lj2[j][i] = lj2[i][j];
   lj3[j][i] = lj3[i][j];
   lj4[j][i] = lj4[i][j];
 
   return cut[i][j];
 }
 
 /* ----------------------------------------------------------------------
    convert N sub-particles in body I to space frame using current quaternion
    store sub-particle space-frame displacements from COM in discrete list
 ------------------------------------------------------------------------- */
 
 void PairBody::body2space(int i)
 {
   int ibonus = atom->body[i];
   AtomVecBody::Bonus *bonus = &avec->bonus[ibonus];
   int nsub = bptr->nsub(bonus);
   double *coords = bptr->coords(bonus);
   
   dnum[i] = nsub;
   dfirst[i] = ndiscrete;
 
   if (ndiscrete + nsub > dmax) {
     dmax += DELTA;
     memory->grow(discrete,dmax,3,"pair:discrete");
   }
 
   double p[3][3];
   MathExtra::quat_to_mat(bonus->quat,p);
 
   for (int m = 0; m < nsub; m++) {
     MathExtra::matvec(p,&coords[3*m],discrete[ndiscrete]);
     ndiscrete++;
   }
 }
diff --git a/src/CLASS2/pair_lj_class2_coul_cut.cpp b/src/CLASS2/pair_lj_class2_coul_cut.cpp
index b4ede4e97..7f851fd23 100644
--- a/src/CLASS2/pair_lj_class2_coul_cut.cpp
+++ b/src/CLASS2/pair_lj_class2_coul_cut.cpp
@@ -1,466 +1,466 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "pair_lj_class2_coul_cut.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "math_const.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
 /* ---------------------------------------------------------------------- */
 
 PairLJClass2CoulCut::PairLJClass2CoulCut(LAMMPS *lmp) : Pair(lmp)
 {
   writedata = 1;
 }
 
 /* ---------------------------------------------------------------------- */
 
 PairLJClass2CoulCut::~PairLJClass2CoulCut()
 {
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
 
     memory->destroy(cut_lj);
     memory->destroy(cut_ljsq);
     memory->destroy(cut_coul);
     memory->destroy(cut_coulsq);
     memory->destroy(epsilon);
     memory->destroy(sigma);
     memory->destroy(lj1);
     memory->destroy(lj2);
     memory->destroy(lj3);
     memory->destroy(lj4);
     memory->destroy(offset);
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJClass2CoulCut::compute(int eflag, int vflag)
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
   double rsq,rinv,r2inv,r3inv,r6inv,forcecoul,forcelj;
   double factor_coul,factor_lj;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   evdwl = ecoul = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r2inv = 1.0/rsq;
 
         if (rsq < cut_coulsq[itype][jtype])
           forcecoul = qqrd2e * qtmp*q[j]*sqrt(r2inv);
         else forcecoul = 0.0;
 
         if (rsq < cut_ljsq[itype][jtype]) {
           rinv = sqrt(r2inv);
           r3inv = r2inv*rinv;
           r6inv = r3inv*r3inv;
           forcelj = r6inv * (lj1[itype][jtype]*r3inv - lj2[itype][jtype]);
         } else forcelj = 0.0;
 
         fpair = (factor_coul*forcecoul + factor_lj*forcelj) * r2inv;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
         if (newton_pair || j < nlocal) {
           f[j][0] -= delx*fpair;
           f[j][1] -= dely*fpair;
           f[j][2] -= delz*fpair;
         }
 
         if (eflag) {
           if (rsq < cut_coulsq[itype][jtype])
             ecoul = factor_coul * qqrd2e * qtmp*q[j]*sqrt(r2inv);
           else ecoul = 0.0;
           if (rsq < cut_ljsq[itype][jtype]) {
             evdwl = r6inv*(lj3[itype][jtype]*r3inv-lj4[itype][jtype]) -
               offset[itype][jtype];
             evdwl *= factor_lj;
           } else evdwl = 0.0;
         }
 
         if (evflag) ev_tally(i,j,nlocal,newton_pair,
                              evdwl,ecoul,fpair,delx,dely,delz);
       }
     }
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairLJClass2CoulCut::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
   memory->create(cut_lj,n+1,n+1,"pair:cut_lj");
   memory->create(cut_ljsq,n+1,n+1,"pair:cut_ljsq");
   memory->create(cut_coul,n+1,n+1,"pair:cut_coul");
   memory->create(cut_coulsq,n+1,n+1,"pair:cut_coulsq");
   memory->create(epsilon,n+1,n+1,"pair:epsilon");
   memory->create(sigma,n+1,n+1,"pair:sigma");
   memory->create(lj1,n+1,n+1,"pair:lj1");
   memory->create(lj2,n+1,n+1,"pair:lj2");
   memory->create(lj3,n+1,n+1,"pair:lj3");
   memory->create(lj4,n+1,n+1,"pair:lj4");
   memory->create(offset,n+1,n+1,"pair:offset");
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairLJClass2CoulCut::settings(int narg, char **arg)
 {
   if (narg < 1 || narg > 2) error->all(FLERR,"Illegal pair_style command");
 
   cut_lj_global = force->numeric(FLERR,arg[0]);
   if (narg == 1) cut_coul_global = cut_lj_global;
   else cut_coul_global = force->numeric(FLERR,arg[1]);
 
   // reset cutoffs that have been explicitly set
 
   if (allocated) {
     int i,j;
     for (i = 1; i <= atom->ntypes; i++)
       for (j = i+1; j <= atom->ntypes; j++)
         if (setflag[i][j]) {
           cut_lj[i][j] = cut_lj_global;
           cut_coul[i][j] = cut_coul_global;
         }
   }
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairLJClass2CoulCut::coeff(int narg, char **arg)
 {
   if (narg < 4 || narg > 6) error->all(FLERR,"Incorrect args for pair coefficients");
   if (!allocated) allocate();
 
   int ilo,ihi,jlo,jhi;
   force->bounds(arg[0],atom->ntypes,ilo,ihi);
   force->bounds(arg[1],atom->ntypes,jlo,jhi);
 
   double epsilon_one = force->numeric(FLERR,arg[2]);
   double sigma_one = force->numeric(FLERR,arg[3]);
 
   double cut_lj_one = cut_lj_global;
   double cut_coul_one = cut_coul_global;
   if (narg >= 5) cut_coul_one = cut_lj_one = force->numeric(FLERR,arg[4]);
   if (narg == 6) cut_coul_one = force->numeric(FLERR,arg[5]);
 
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
     for (int j = MAX(jlo,i); j <= jhi; j++) {
       epsilon[i][j] = epsilon_one;
       sigma[i][j] = sigma_one;
       cut_lj[i][j] = cut_lj_one;
       cut_coul[i][j] = cut_coul_one;
       setflag[i][j] = 1;
       count++;
     }
   }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairLJClass2CoulCut::init_style()
 {
   if (!atom->q_flag)
     error->all(FLERR,"Pair style lj/class2/coul/cut requires atom attribute q");
 
-  neighbor->request(this);
+  neighbor->request(this,instance_me);
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairLJClass2CoulCut::init_one(int i, int j)
 {
   // always mix epsilon,sigma via sixthpower rules
   // mix distance via user-defined rule
 
   if (setflag[i][j] == 0) {
     epsilon[i][j] = 2.0 * sqrt(epsilon[i][i]*epsilon[j][j]) *
       pow(sigma[i][i],3.0) * pow(sigma[j][j],3.0) /
       (pow(sigma[i][i],6.0) + pow(sigma[j][j],6.0));
     sigma[i][j] =
       pow((0.5 * (pow(sigma[i][i],6.0) + pow(sigma[j][j],6.0))),1.0/6.0);
     cut_lj[i][j] = mix_distance(cut_lj[i][i],cut_lj[j][j]);
     cut_coul[i][j] = mix_distance(cut_coul[i][i],cut_coul[j][j]);
   }
 
   double cut = MAX(cut_lj[i][j],cut_coul[i][j]);
   cut_ljsq[i][j] = cut_lj[i][j] * cut_lj[i][j];
   cut_coulsq[i][j] = cut_coul[i][j] * cut_coul[i][j];
 
   lj1[i][j] = 18.0 * epsilon[i][j] * pow(sigma[i][j],9.0);
   lj2[i][j] = 18.0 * epsilon[i][j] * pow(sigma[i][j],6.0);
   lj3[i][j] = 2.0 * epsilon[i][j] * pow(sigma[i][j],9.0);
   lj4[i][j] = 3.0 * epsilon[i][j] * pow(sigma[i][j],6.0);
 
   if (offset_flag) {
     double ratio = sigma[i][j] / cut_lj[i][j];
     offset[i][j] = epsilon[i][j] * (2.0*pow(ratio,9.0) - 3.0*pow(ratio,6.0));
   } else offset[i][j] = 0.0;
 
   cut_ljsq[j][i] = cut_ljsq[i][j];
   cut_coulsq[j][i] = cut_coulsq[i][j];
   lj1[j][i] = lj1[i][j];
   lj2[j][i] = lj2[i][j];
   lj3[j][i] = lj3[i][j];
   lj4[j][i] = lj4[i][j];
   offset[j][i] = offset[i][j];
 
   // compute I,J contribution to long-range tail correction
   // count total # of atoms of type I and J via Allreduce
 
   if (tail_flag) {
     int *type = atom->type;
     int nlocal = atom->nlocal;
 
     double count[2],all[2];
     count[0] = count[1] = 0.0;
     for (int k = 0; k < nlocal; k++) {
       if (type[k] == i) count[0] += 1.0;
       if (type[k] == j) count[1] += 1.0;
     }
     MPI_Allreduce(count,all,2,MPI_DOUBLE,MPI_SUM,world);
 
     double sig3 = sigma[i][j]*sigma[i][j]*sigma[i][j];
     double sig6 = sig3*sig3;
     double rc3 = cut_lj[i][j]*cut_lj[i][j]*cut_lj[i][j];
     double rc6 = rc3*rc3;
     etail_ij = 2.0*MY_PI*all[0]*all[1]*epsilon[i][j] *
       sig6 * (sig3 - 3.0*rc3) / (3.0*rc6);
     ptail_ij = 2.0*MY_PI*all[0]*all[1]*epsilon[i][j] *
       sig6 * (sig3 - 2.0*rc3) / rc6;
   }
 
   return cut;
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairLJClass2CoulCut::write_restart(FILE *fp)
 {
   write_restart_settings(fp);
 
   int i,j;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       fwrite(&setflag[i][j],sizeof(int),1,fp);
       if (setflag[i][j]) {
         fwrite(&epsilon[i][j],sizeof(double),1,fp);
         fwrite(&sigma[i][j],sizeof(double),1,fp);
         fwrite(&cut_lj[i][j],sizeof(double),1,fp);
         fwrite(&cut_coul[i][j],sizeof(double),1,fp);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
    proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairLJClass2CoulCut::read_restart(FILE *fp)
 {
   read_restart_settings(fp);
   allocate();
 
   int i,j;
   int me = comm->me;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
       MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
       if (setflag[i][j]) {
         if (me == 0) {
           fread(&epsilon[i][j],sizeof(double),1,fp);
           fread(&sigma[i][j],sizeof(double),1,fp);
           fread(&cut_lj[i][j],sizeof(double),1,fp);
           fread(&cut_coul[i][j],sizeof(double),1,fp);
         }
         MPI_Bcast(&epsilon[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&sigma[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&cut_lj[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&cut_coul[i][j],1,MPI_DOUBLE,0,world);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairLJClass2CoulCut::write_restart_settings(FILE *fp)
 {
   fwrite(&cut_lj_global,sizeof(double),1,fp);
   fwrite(&cut_coul_global,sizeof(double),1,fp);
   fwrite(&offset_flag,sizeof(int),1,fp);
   fwrite(&mix_flag,sizeof(int),1,fp);
   fwrite(&tail_flag,sizeof(int),1,fp);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairLJClass2CoulCut::read_restart_settings(FILE *fp)
 {
   if (comm->me == 0) {
     fread(&cut_lj_global,sizeof(double),1,fp);
     fread(&cut_coul_global,sizeof(double),1,fp);
     fread(&offset_flag,sizeof(int),1,fp);
     fread(&mix_flag,sizeof(int),1,fp);
     fread(&tail_flag,sizeof(int),1,fp);
   }
   MPI_Bcast(&cut_lj_global,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&cut_coul_global,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&offset_flag,1,MPI_INT,0,world);
   MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
   MPI_Bcast(&tail_flag,1,MPI_INT,0,world);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes to data file
 ------------------------------------------------------------------------- */
 
 void PairLJClass2CoulCut::write_data(FILE *fp)
 {
   for (int i = 1; i <= atom->ntypes; i++)
     fprintf(fp,"%d %g %g\n",i,epsilon[i][i],sigma[i][i]);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes all pairs to data file
 ------------------------------------------------------------------------- */
 
 void PairLJClass2CoulCut::write_data_all(FILE *fp)
 {
   for (int i = 1; i <= atom->ntypes; i++)
     for (int j = i; j <= atom->ntypes; j++)
       fprintf(fp,"%d %d %g %g %g\n",i,j,epsilon[i][j],sigma[i][j],cut_lj[i][j]);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairLJClass2CoulCut::single(int i, int j, int itype, int jtype,
                                    double rsq,
                                    double factor_coul, double factor_lj,
                                    double &fforce)
 {
   double r2inv,rinv,r3inv,r6inv,forcecoul,forcelj,phicoul,philj;
 
   r2inv = 1.0/rsq;
   if (rsq < cut_coulsq[itype][jtype])
     forcecoul = force->qqrd2e * atom->q[i]*atom->q[j]*sqrt(r2inv);
   else forcecoul = 0.0;
   if (rsq < cut_ljsq[itype][jtype]) {
     rinv = sqrt(r2inv);
     r3inv = r2inv*rinv;
     r6inv = r3inv*r3inv;
     forcelj = r6inv * (lj1[itype][jtype]*r3inv - lj2[itype][jtype]);
   } else forcelj = 0.0;
   fforce = (factor_coul*forcecoul + factor_lj*forcelj) * r2inv;
 
   double eng = 0.0;
   if (rsq < cut_coulsq[itype][jtype]) {
     phicoul = force->qqrd2e * atom->q[i]*atom->q[j]*sqrt(r2inv);
     eng += factor_coul*phicoul;
   }
   if (rsq < cut_ljsq[itype][jtype]) {
     philj = r6inv*(lj3[itype][jtype]*r3inv-lj4[itype][jtype]) -
       offset[itype][jtype];
     eng += factor_lj*philj;
   }
 
   return eng;
 }
diff --git a/src/CLASS2/pair_lj_class2_coul_long.cpp b/src/CLASS2/pair_lj_class2_coul_long.cpp
index 60c3ca7e0..2460515b9 100644
--- a/src/CLASS2/pair_lj_class2_coul_long.cpp
+++ b/src/CLASS2/pair_lj_class2_coul_long.cpp
@@ -1,550 +1,550 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_lj_class2_coul_long.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "kspace.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "math_const.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
 #define EWALD_F   1.12837917
 #define EWALD_P   0.3275911
 #define A1        0.254829592
 #define A2       -0.284496736
 #define A3        1.421413741
 #define A4       -1.453152027
 #define A5        1.061405429
 
 /* ---------------------------------------------------------------------- */
 
 PairLJClass2CoulLong::PairLJClass2CoulLong(LAMMPS *lmp) : Pair(lmp)
 {
   ewaldflag = pppmflag = 1;
   writedata = 1;
   ftable = NULL;
 }
 
 /* ---------------------------------------------------------------------- */
 
 PairLJClass2CoulLong::~PairLJClass2CoulLong()
 {
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
 
     memory->destroy(cut_lj);
     memory->destroy(cut_ljsq);
     memory->destroy(epsilon);
     memory->destroy(sigma);
     memory->destroy(lj1);
     memory->destroy(lj2);
     memory->destroy(lj3);
     memory->destroy(lj4);
     memory->destroy(offset);
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJClass2CoulLong::compute(int eflag, int vflag)
 {
   int i,j,ii,jj,inum,jnum,itable,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
   double fraction,table;
   double rsq,r,rinv,r2inv,r3inv,r6inv,forcecoul,forcelj;
   double grij,expm2,prefactor,t,erfc;
   double factor_coul,factor_lj;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   evdwl = ecoul = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r2inv = 1.0/rsq;
 
         if (rsq < cut_coulsq) {
           if (!ncoultablebits || rsq <= tabinnersq) {
             r = sqrt(rsq);
             grij = g_ewald * r;
             expm2 = exp(-grij*grij);
             t = 1.0 / (1.0 + EWALD_P*grij);
             erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
             prefactor = qqrd2e * qtmp*q[j]/r;
             forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
             if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
           } else { 
             union_int_float_t rsq_lookup; 
             rsq_lookup.f = rsq; 
             itable = rsq_lookup.i & ncoulmask; 
             itable >>= ncoulshiftbits; 
             fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable]; 
             table = ftable[itable] + fraction*dftable[itable]; 
             forcecoul = qtmp*q[j] * table; 
             if (factor_coul < 1.0) { 
               table = ctable[itable] + fraction*dctable[itable]; 
               prefactor = qtmp*q[j] * table; 
               forcecoul -= (1.0-factor_coul)*prefactor; 
             }
           }
         } else forcecoul = 0.0;
 
         if (rsq < cut_ljsq[itype][jtype]) {
           rinv = sqrt(r2inv);
           r3inv = r2inv*rinv;
           r6inv = r3inv*r3inv;
           forcelj = r6inv * (lj1[itype][jtype]*r3inv - lj2[itype][jtype]);
         } else forcelj = 0.0;
 
         fpair = (forcecoul + factor_lj*forcelj) * r2inv;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
         if (newton_pair || j < nlocal) {
           f[j][0] -= delx*fpair;
           f[j][1] -= dely*fpair;
           f[j][2] -= delz*fpair;
         }
 
         if (eflag) {
           if (rsq < cut_coulsq) {
             if (!ncoultablebits || rsq <= tabinnersq) 
               ecoul = prefactor*erfc; 
             else { 
               table = etable[itable] + fraction*detable[itable]; 
               ecoul = qtmp*q[j] * table; 
             }
             if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
           } else ecoul = 0.0;
           if (rsq < cut_ljsq[itype][jtype]) {
             evdwl = r6inv*(lj3[itype][jtype]*r3inv-lj4[itype][jtype]) -
               offset[itype][jtype];
             evdwl *= factor_lj;
           } else evdwl = 0.0;
         }
 
         if (evflag) ev_tally(i,j,nlocal,newton_pair,
                              evdwl,ecoul,fpair,delx,dely,delz);
       }
     }
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairLJClass2CoulLong::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
   memory->create(cut_lj,n+1,n+1,"pair:cut_lj");
   memory->create(cut_ljsq,n+1,n+1,"pair:cut_ljsq");
   memory->create(epsilon,n+1,n+1,"pair:epsilon");
   memory->create(sigma,n+1,n+1,"pair:sigma");
   memory->create(lj1,n+1,n+1,"pair:lj1");
   memory->create(lj2,n+1,n+1,"pair:lj2");
   memory->create(lj3,n+1,n+1,"pair:lj3");
   memory->create(lj4,n+1,n+1,"pair:lj4");
   memory->create(offset,n+1,n+1,"pair:offset");
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairLJClass2CoulLong::settings(int narg, char **arg)
 {
   if (narg < 1 || narg > 2) error->all(FLERR,"Illegal pair_style command");
 
   cut_lj_global = force->numeric(FLERR,arg[0]);
   if (narg == 1) cut_coul = cut_lj_global;
   else cut_coul = force->numeric(FLERR,arg[1]);
 
   // reset cutoffs that have been explicitly set
 
   if (allocated) {
     int i,j;
     for (i = 1; i <= atom->ntypes; i++)
       for (j = i+1; j <= atom->ntypes; j++)
         if (setflag[i][j]) cut_lj[i][j] = cut_lj_global;
   }
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairLJClass2CoulLong::coeff(int narg, char **arg)
 {
   if (narg < 4 || narg > 6) 
     error->all(FLERR,"Incorrect args for pair coefficients");
   if (!allocated) allocate();
 
   int ilo,ihi,jlo,jhi;
   force->bounds(arg[0],atom->ntypes,ilo,ihi);
   force->bounds(arg[1],atom->ntypes,jlo,jhi);
 
   double epsilon_one = force->numeric(FLERR,arg[2]);
   double sigma_one = force->numeric(FLERR,arg[3]);
 
  double cut_lj_one = cut_lj_global;
  if (narg == 5) cut_lj_one = force->numeric(FLERR,arg[4]);
 
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
     for (int j = MAX(jlo,i); j <= jhi; j++) {
       epsilon[i][j] = epsilon_one;
       sigma[i][j] = sigma_one;
       cut_lj[i][j] = cut_lj_one;
       setflag[i][j] = 1;
       count++;
     }
   }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairLJClass2CoulLong::init_style()
 {
   if (!atom->q_flag)
     error->all(FLERR,
                "Pair style lj/class2/coul/long requires atom attribute q");
 
-  neighbor->request(this);
+  neighbor->request(this,instance_me);
 
   cut_coulsq = cut_coul * cut_coul;
 
   // insure use of KSpace long-range solver, set g_ewald
 
   if (force->kspace == NULL)
     error->all(FLERR,"Pair style requires a KSpace style");
   g_ewald = force->kspace->g_ewald;
 
   // setup force tables 
   if (ncoultablebits) init_tables(cut_coul,NULL);
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairLJClass2CoulLong::init_one(int i, int j)
 {
   // always mix epsilon,sigma via sixthpower rules
   // mix distance via user-defined rule
 
   if (setflag[i][j] == 0) {
     epsilon[i][j] = 2.0 * sqrt(epsilon[i][i]*epsilon[j][j]) *
       pow(sigma[i][i],3.0) * pow(sigma[j][j],3.0) /
       (pow(sigma[i][i],6.0) + pow(sigma[j][j],6.0));
     sigma[i][j] =
       pow((0.5 * (pow(sigma[i][i],6.0) + pow(sigma[j][j],6.0))),1.0/6.0);
     cut_lj[i][j] = mix_distance(cut_lj[i][i],cut_lj[j][j]);
   }
 
   double cut = MAX(cut_lj[i][j],cut_coul);
   cut_ljsq[i][j] = cut_lj[i][j] * cut_lj[i][j];
 
   lj1[i][j] = 18.0 * epsilon[i][j] * pow(sigma[i][j],9.0);
   lj2[i][j] = 18.0 * epsilon[i][j] * pow(sigma[i][j],6.0);
   lj3[i][j] = 2.0 * epsilon[i][j] * pow(sigma[i][j],9.0);
   lj4[i][j] = 3.0 * epsilon[i][j] * pow(sigma[i][j],6.0);
 
   if (offset_flag) {
     double ratio = sigma[i][j] / cut_lj[i][j];
     offset[i][j] = epsilon[i][j] * (2.0*pow(ratio,9.0) - 3.0*pow(ratio,6.0));
   } else offset[i][j] = 0.0;
 
   cut_ljsq[j][i] = cut_ljsq[i][j];
   lj1[j][i] = lj1[i][j];
   lj2[j][i] = lj2[i][j];
   lj3[j][i] = lj3[i][j];
   lj4[j][i] = lj4[i][j];
   offset[j][i] = offset[i][j];
 
   // compute I,J contribution to long-range tail correction
   // count total # of atoms of type I and J via Allreduce
 
   if (tail_flag) {
     int *type = atom->type;
     int nlocal = atom->nlocal;
 
     double count[2],all[2];
     count[0] = count[1] = 0.0;
     for (int k = 0; k < nlocal; k++) {
       if (type[k] == i) count[0] += 1.0;
       if (type[k] == j) count[1] += 1.0;
     }
     MPI_Allreduce(count,all,2,MPI_DOUBLE,MPI_SUM,world);
 
     double sig3 = sigma[i][j]*sigma[i][j]*sigma[i][j];
     double sig6 = sig3*sig3;
     double rc3 = cut_lj[i][j]*cut_lj[i][j]*cut_lj[i][j];
     double rc6 = rc3*rc3;
     etail_ij = 2.0*MY_PI*all[0]*all[1]*epsilon[i][j] *
       sig6 * (sig3 - 3.0*rc3) / (3.0*rc6);
     ptail_ij = 2.0*MY_PI*all[0]*all[1]*epsilon[i][j] *
       sig6 * (sig3 - 2.0*rc3) / rc6;
   }
 
   return cut;
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairLJClass2CoulLong::write_restart(FILE *fp)
 {
   write_restart_settings(fp);
 
   int i,j;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       fwrite(&setflag[i][j],sizeof(int),1,fp);
       if (setflag[i][j]) {
         fwrite(&epsilon[i][j],sizeof(double),1,fp);
         fwrite(&sigma[i][j],sizeof(double),1,fp);
         fwrite(&cut_lj[i][j],sizeof(double),1,fp);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
    proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairLJClass2CoulLong::read_restart(FILE *fp)
 {
   read_restart_settings(fp);
   allocate();
 
   int i,j;
   int me = comm->me;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
       MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
       if (setflag[i][j]) {
         if (me == 0) {
           fread(&epsilon[i][j],sizeof(double),1,fp);
           fread(&sigma[i][j],sizeof(double),1,fp);
           fread(&cut_lj[i][j],sizeof(double),1,fp);
         }
         MPI_Bcast(&epsilon[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&sigma[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&cut_lj[i][j],1,MPI_DOUBLE,0,world);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairLJClass2CoulLong::write_restart_settings(FILE *fp)
 {
   fwrite(&cut_lj_global,sizeof(double),1,fp);
   fwrite(&cut_coul,sizeof(double),1,fp);
   fwrite(&offset_flag,sizeof(int),1,fp);
   fwrite(&mix_flag,sizeof(int),1,fp);
   fwrite(&tail_flag,sizeof(int),1,fp);
   fwrite(&ncoultablebits,sizeof(int),1,fp);
   fwrite(&tabinner,sizeof(double),1,fp);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairLJClass2CoulLong::read_restart_settings(FILE *fp)
 {
   if (comm->me == 0) {
     fread(&cut_lj_global,sizeof(double),1,fp);
     fread(&cut_coul,sizeof(double),1,fp);
     fread(&offset_flag,sizeof(int),1,fp);
     fread(&mix_flag,sizeof(int),1,fp);
     fread(&tail_flag,sizeof(int),1,fp);
     fread(&ncoultablebits,sizeof(int),1,fp);
     fread(&tabinner,sizeof(double),1,fp);
   }
   MPI_Bcast(&cut_lj_global,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&cut_coul,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&offset_flag,1,MPI_INT,0,world);
   MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
   MPI_Bcast(&tail_flag,1,MPI_INT,0,world);
   MPI_Bcast(&ncoultablebits,1,MPI_INT,0,world);
   MPI_Bcast(&tabinner,1,MPI_DOUBLE,0,world);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes to data file
 ------------------------------------------------------------------------- */
 
 void PairLJClass2CoulLong::write_data(FILE *fp)
 {
   for (int i = 1; i <= atom->ntypes; i++)
     fprintf(fp,"%d %g %g\n",i,epsilon[i][i],sigma[i][i]);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes all pairs to data file
 ------------------------------------------------------------------------- */
 
 void PairLJClass2CoulLong::write_data_all(FILE *fp)
 {
   for (int i = 1; i <= atom->ntypes; i++)
     for (int j = i; j <= atom->ntypes; j++)
       fprintf(fp,"%d %d %g %g %g\n",i,j,epsilon[i][j],sigma[i][j],cut_lj[i][j]);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairLJClass2CoulLong::single(int i, int j, int itype, int jtype,
                                     double rsq,
                                     double factor_coul, double factor_lj,
                                     double &fforce)
 {
   double r2inv,r,rinv,r3inv,r6inv,grij,expm2,t,erfc,prefactor;
   double fraction,table,forcecoul,forcelj,phicoul,philj;
   int itable;
 
   r2inv = 1.0/rsq;
   if (rsq < cut_coulsq) {
     if (!ncoultablebits || rsq <= tabinnersq) {
       r = sqrt(rsq);
       grij = g_ewald * r;
       expm2 = exp(-grij*grij);
       t = 1.0 / (1.0 + EWALD_P*grij);
       erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
       prefactor = force->qqrd2e * atom->q[i]*atom->q[j]/r;
       forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
       if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
     } else {
       union_int_float_t rsq_lookup;
       rsq_lookup.f = rsq;
       itable = rsq_lookup.i & ncoulmask;
       itable >>= ncoulshiftbits;
       fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable];
       table = ftable[itable] + fraction*dftable[itable];
       forcecoul = atom->q[i]*atom->q[j] * table;
       if (factor_coul < 1.0) {
         table = ctable[itable] + fraction*dctable[itable];
         prefactor = atom->q[i]*atom->q[j] * table;
         forcecoul -= (1.0-factor_coul)*prefactor;
       }
     }
   } else forcecoul = 0.0;
   if (rsq < cut_ljsq[itype][jtype]) {
     rinv = sqrt(r2inv);
     r3inv = r2inv*rinv;
     r6inv = r3inv*r3inv;
     forcelj = r6inv * (lj1[itype][jtype]*r3inv - lj2[itype][jtype]);
   } else forcelj = 0.0;
   fforce = (forcecoul + factor_lj*forcelj) * r2inv;
 
   double eng = 0.0;
   if (rsq < cut_coulsq) {
     if (!ncoultablebits || rsq <= tabinnersq)
       phicoul = prefactor*erfc;
     else {
       table = etable[itable] + fraction*detable[itable];
       phicoul = atom->q[i]*atom->q[j] * table;
     }
     if (factor_coul < 1.0) phicoul -= (1.0-factor_coul)*prefactor;
     eng += phicoul;
   }
   if (rsq < cut_ljsq[itype][jtype]) {
     philj = r6inv*(lj3[itype][jtype]*r3inv-lj4[itype][jtype]) -
       offset[itype][jtype];
     eng += factor_lj*philj;
   }
 
   return eng;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void *PairLJClass2CoulLong::extract(const char *str, int &dim)
 {
   dim = 0;
   if (strcmp(str,"cut_coul") == 0) return (void *) &cut_coul;
   return NULL;
 }
diff --git a/src/COLLOID/pair_yukawa_colloid.cpp b/src/COLLOID/pair_yukawa_colloid.cpp
index b0b33c46e..13d632526 100644
--- a/src/COLLOID/pair_yukawa_colloid.cpp
+++ b/src/COLLOID/pair_yukawa_colloid.cpp
@@ -1,178 +1,178 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing authors: Randy Schunk (Sandia)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdlib.h"
 #include "pair_yukawa_colloid.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "force.h"
 #include "comm.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
 PairYukawaColloid::PairYukawaColloid(LAMMPS *lmp) : PairYukawa(lmp)
 {
   writedata = 1;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairYukawaColloid::compute(int eflag, int vflag)
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair,radi,radj;
   double rsq,r,rinv,screening,forceyukawa,factor;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   evdwl = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   double **x = atom->x;
   double **f = atom->f;
   double *radius = atom->radius;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     radi = radius[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor = special_lj[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
       radj = radius[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r = sqrt(rsq);
         rinv = 1.0/r;
         screening = exp(-kappa*(r-(radi+radj)));
         forceyukawa = a[itype][jtype] * screening;
 
         fpair = factor*forceyukawa * rinv;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
         if (newton_pair || j < nlocal) {
           f[j][0] -= delx*fpair;
           f[j][1] -= dely*fpair;
           f[j][2] -= delz*fpair;
         }
 
         if (eflag) {
           evdwl = a[itype][jtype]/kappa * screening - offset[itype][jtype];
           evdwl *= factor;
         }
 
         if (evflag) ev_tally(i,j,nlocal,newton_pair,
                              evdwl,0.0,fpair,delx,dely,delz);
       }
     }
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairYukawaColloid::init_style()
 {
   if (!atom->sphere_flag)
     error->all(FLERR,"Pair yukawa/colloid requires atom style sphere");
 
-  neighbor->request(this);
+  neighbor->request(this,instance_me);
 
   // require that atom radii are identical within each type
 
   for (int i = 1; i <= atom->ntypes; i++)
     if (!atom->radius_consistency(i,rad[i]))
       error->all(FLERR,"Pair yukawa/colloid requires atoms with same type "
                  "have same radius");
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairYukawaColloid::init_one(int i, int j)
 {
   if (setflag[i][j] == 0) {
     a[i][j] = mix_energy(a[i][i],a[j][j],1.0,1.0);
     cut[i][j] = mix_distance(cut[i][i],cut[j][j]);
   }
 
   if (offset_flag) {
     double screening = exp(-kappa * (cut[i][j] - (rad[i]+rad[j])));
     offset[i][j] = a[i][j]/kappa * screening;
   } else offset[i][j] = 0.0;
 
   a[j][i] = a[i][j];
   offset[j][i] = offset[i][j];
 
   return cut[i][j];
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairYukawaColloid::single(int i, int j, int itype, int jtype,
                                  double rsq,
                                  double factor_coul, double factor_lj,
                                  double &fforce)
 {
   double r,rinv,screening,forceyukawa,phi;
 
   r = sqrt(rsq);
   rinv = 1.0/r;
   screening = exp(-kappa*(r-(rad[itype]+rad[jtype])));
   forceyukawa = a[itype][jtype] * screening;
   fforce = factor_lj*forceyukawa * rinv;
 
   phi = a[itype][jtype]/kappa * screening  - offset[itype][jtype];
   return factor_lj*phi;
 }
diff --git a/src/DIPOLE/pair_lj_cut_dipole_cut.cpp b/src/DIPOLE/pair_lj_cut_dipole_cut.cpp
index b0c247b66..d87434871 100755
--- a/src/DIPOLE/pair_lj_cut_dipole_cut.cpp
+++ b/src/DIPOLE/pair_lj_cut_dipole_cut.cpp
@@ -1,486 +1,486 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdlib.h"
 #include "pair_lj_cut_dipole_cut.h"
 #include "atom.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "comm.h"
 #include "force.h"
 #include "memory.h"
 #include "error.h"
 #include "update.h"
 #include "string.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
 PairLJCutDipoleCut::PairLJCutDipoleCut(LAMMPS *lmp) : Pair(lmp)
 {
   single_enable = 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 PairLJCutDipoleCut::~PairLJCutDipoleCut()
 {
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
 
     memory->destroy(cut_lj);
     memory->destroy(cut_ljsq);
     memory->destroy(cut_coul);
     memory->destroy(cut_coulsq);
     memory->destroy(epsilon);
     memory->destroy(sigma);
     memory->destroy(lj1);
     memory->destroy(lj2);
     memory->destroy(lj3);
     memory->destroy(lj4);
     memory->destroy(offset);
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCutDipoleCut::compute(int eflag, int vflag)
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fx,fy,fz;
   double rsq,rinv,r2inv,r6inv,r3inv,r5inv,r7inv;
   double forcecoulx,forcecouly,forcecoulz,crossx,crossy,crossz;
   double tixcoul,tiycoul,tizcoul,tjxcoul,tjycoul,tjzcoul;
   double fq,pdotp,pidotr,pjdotr,pre1,pre2,pre3,pre4;
   double forcelj,factor_coul,factor_lj;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   evdwl = ecoul = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   double **mu = atom->mu;
   double **torque = atom->torque;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r2inv = 1.0/rsq;
         rinv = sqrt(r2inv);
 
         // atom can have both a charge and dipole
         // i,j = charge-charge, dipole-dipole, dipole-charge, or charge-dipole
 
         forcecoulx = forcecouly = forcecoulz = 0.0;
         tixcoul = tiycoul = tizcoul = 0.0;
         tjxcoul = tjycoul = tjzcoul = 0.0;
 
         if (rsq < cut_coulsq[itype][jtype]) {
 
           if (qtmp != 0.0 && q[j] != 0.0) {
             r3inv = r2inv*rinv;
             pre1 = qtmp*q[j]*r3inv;
 
             forcecoulx += pre1*delx;
             forcecouly += pre1*dely;
             forcecoulz += pre1*delz;
           }
 
           if (mu[i][3] > 0.0 && mu[j][3] > 0.0) {
             r3inv = r2inv*rinv;
             r5inv = r3inv*r2inv;
             r7inv = r5inv*r2inv;
 
             pdotp = mu[i][0]*mu[j][0] + mu[i][1]*mu[j][1] + mu[i][2]*mu[j][2];
             pidotr = mu[i][0]*delx + mu[i][1]*dely + mu[i][2]*delz;
             pjdotr = mu[j][0]*delx + mu[j][1]*dely + mu[j][2]*delz;
 
             pre1 = 3.0*r5inv*pdotp - 15.0*r7inv*pidotr*pjdotr;
             pre2 = 3.0*r5inv*pjdotr;
             pre3 = 3.0*r5inv*pidotr;
             pre4 = -1.0*r3inv;
 
             forcecoulx += pre1*delx + pre2*mu[i][0] + pre3*mu[j][0];
             forcecouly += pre1*dely + pre2*mu[i][1] + pre3*mu[j][1];
             forcecoulz += pre1*delz + pre2*mu[i][2] + pre3*mu[j][2];
 
             crossx = pre4 * (mu[i][1]*mu[j][2] - mu[i][2]*mu[j][1]);
             crossy = pre4 * (mu[i][2]*mu[j][0] - mu[i][0]*mu[j][2]);
             crossz = pre4 * (mu[i][0]*mu[j][1] - mu[i][1]*mu[j][0]);
 
             tixcoul += crossx + pre2 * (mu[i][1]*delz - mu[i][2]*dely);
             tiycoul += crossy + pre2 * (mu[i][2]*delx - mu[i][0]*delz);
             tizcoul += crossz + pre2 * (mu[i][0]*dely - mu[i][1]*delx);
             tjxcoul += -crossx + pre3 * (mu[j][1]*delz - mu[j][2]*dely);
             tjycoul += -crossy + pre3 * (mu[j][2]*delx - mu[j][0]*delz);
             tjzcoul += -crossz + pre3 * (mu[j][0]*dely - mu[j][1]*delx);
           }
 
           if (mu[i][3] > 0.0 && q[j] != 0.0) {
             r3inv = r2inv*rinv;
             r5inv = r3inv*r2inv;
             pidotr = mu[i][0]*delx + mu[i][1]*dely + mu[i][2]*delz;
             pre1 = 3.0*q[j]*r5inv * pidotr;
             pre2 = q[j]*r3inv;
 
             forcecoulx += pre2*mu[i][0] - pre1*delx;
             forcecouly += pre2*mu[i][1] - pre1*dely;
             forcecoulz += pre2*mu[i][2] - pre1*delz;
             tixcoul += pre2 * (mu[i][1]*delz - mu[i][2]*dely);
             tiycoul += pre2 * (mu[i][2]*delx - mu[i][0]*delz);
             tizcoul += pre2 * (mu[i][0]*dely - mu[i][1]*delx);
           }
 
           if (mu[j][3] > 0.0 && qtmp != 0.0) {
             r3inv = r2inv*rinv;
             r5inv = r3inv*r2inv;
             pjdotr = mu[j][0]*delx + mu[j][1]*dely + mu[j][2]*delz;
             pre1 = 3.0*qtmp*r5inv * pjdotr;
             pre2 = qtmp*r3inv;
 
             forcecoulx += pre1*delx - pre2*mu[j][0];
             forcecouly += pre1*dely - pre2*mu[j][1];
             forcecoulz += pre1*delz - pre2*mu[j][2];
             tjxcoul += -pre2 * (mu[j][1]*delz - mu[j][2]*dely);
             tjycoul += -pre2 * (mu[j][2]*delx - mu[j][0]*delz);
             tjzcoul += -pre2 * (mu[j][0]*dely - mu[j][1]*delx);
           }
         }
 
         // LJ interaction
 
         if (rsq < cut_ljsq[itype][jtype]) {
           r6inv = r2inv*r2inv*r2inv;
           forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
           forcelj *= factor_lj * r2inv;
         } else forcelj = 0.0;
 
         // total force
 
         fq = factor_coul*qqrd2e;
         fx = fq*forcecoulx + delx*forcelj;
         fy = fq*forcecouly + dely*forcelj;
         fz = fq*forcecoulz + delz*forcelj;
 
         // force & torque accumulation
 
         f[i][0] += fx;
         f[i][1] += fy;
         f[i][2] += fz;
         torque[i][0] += fq*tixcoul;
         torque[i][1] += fq*tiycoul;
         torque[i][2] += fq*tizcoul;
 
         if (newton_pair || j < nlocal) {
           f[j][0] -= fx;
           f[j][1] -= fy;
           f[j][2] -= fz;
           torque[j][0] += fq*tjxcoul;
           torque[j][1] += fq*tjycoul;
           torque[j][2] += fq*tjzcoul;
         }
 
         if (eflag) {
           if (rsq < cut_coulsq[itype][jtype]) {
             ecoul = qtmp*q[j]*rinv;
             if (mu[i][3] > 0.0 && mu[j][3] > 0.0)
               ecoul += r3inv*pdotp - 3.0*r5inv*pidotr*pjdotr;
             if (mu[i][3] > 0.0 && q[j] != 0.0)
               ecoul += -q[j]*r3inv*pidotr;
             if (mu[j][3] > 0.0 && qtmp != 0.0)
               ecoul += qtmp*r3inv*pjdotr;
             ecoul *= factor_coul*qqrd2e;
           } else ecoul = 0.0;
 
           if (rsq < cut_ljsq[itype][jtype]) {
             evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) -
               offset[itype][jtype];
             evdwl *= factor_lj;
           } else evdwl = 0.0;
         }
 
         if (evflag) ev_tally_xyz(i,j,nlocal,newton_pair,
                                  evdwl,ecoul,fx,fy,fz,delx,dely,delz);
       }
     }
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairLJCutDipoleCut::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
   memory->create(cut_lj,n+1,n+1,"pair:cut_lj");
   memory->create(cut_ljsq,n+1,n+1,"pair:cut_ljsq");
   memory->create(cut_coul,n+1,n+1,"pair:cut_coul");
   memory->create(cut_coulsq,n+1,n+1,"pair:cut_coulsq");
   memory->create(epsilon,n+1,n+1,"pair:epsilon");
   memory->create(sigma,n+1,n+1,"pair:sigma");
   memory->create(lj1,n+1,n+1,"pair:lj1");
   memory->create(lj2,n+1,n+1,"pair:lj2");
   memory->create(lj3,n+1,n+1,"pair:lj3");
   memory->create(lj4,n+1,n+1,"pair:lj4");
   memory->create(offset,n+1,n+1,"pair:offset");
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairLJCutDipoleCut::settings(int narg, char **arg)
 {
   if (narg < 1 || narg > 2)
     error->all(FLERR,"Incorrect args in pair_style command");
 
   if (strcmp(update->unit_style,"electron") == 0)
     error->all(FLERR,"Cannot (yet) use 'electron' units with dipoles");
 
   cut_lj_global = force->numeric(FLERR,arg[0]);
   if (narg == 1) cut_coul_global = cut_lj_global;
   else cut_coul_global = force->numeric(FLERR,arg[1]);
 
   // reset cutoffs that have been explicitly set
 
   if (allocated) {
     int i,j;
     for (i = 1; i <= atom->ntypes; i++)
       for (j = i+1; j <= atom->ntypes; j++)
         if (setflag[i][j]) {
           cut_lj[i][j] = cut_lj_global;
           cut_coul[i][j] = cut_coul_global;
         }
   }
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairLJCutDipoleCut::coeff(int narg, char **arg)
 {
   if (narg < 4 || narg > 6)
     error->all(FLERR,"Incorrect args for pair coefficients");
   if (!allocated) allocate();
 
   int ilo,ihi,jlo,jhi;
   force->bounds(arg[0],atom->ntypes,ilo,ihi);
   force->bounds(arg[1],atom->ntypes,jlo,jhi);
 
   double epsilon_one = force->numeric(FLERR,arg[2]);
   double sigma_one = force->numeric(FLERR,arg[3]);
 
   double cut_lj_one = cut_lj_global;
   double cut_coul_one = cut_coul_global;
   if (narg >= 5) cut_coul_one = cut_lj_one = force->numeric(FLERR,arg[4]);
   if (narg == 6) cut_coul_one = force->numeric(FLERR,arg[5]);
 
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
     for (int j = MAX(jlo,i); j <= jhi; j++) {
       epsilon[i][j] = epsilon_one;
       sigma[i][j] = sigma_one;
       cut_lj[i][j] = cut_lj_one;
       cut_coul[i][j] = cut_coul_one;
       setflag[i][j] = 1;
       count++;
     }
   }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairLJCutDipoleCut::init_style()
 {
   if (!atom->q_flag || !atom->mu_flag || !atom->torque_flag)
     error->all(FLERR,"Pair dipole/cut requires atom attributes q, mu, torque");
 
-  neighbor->request(this);
+  neighbor->request(this,instance_me);
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairLJCutDipoleCut::init_one(int i, int j)
 {
   if (setflag[i][j] == 0) {
     epsilon[i][j] = mix_energy(epsilon[i][i],epsilon[j][j],
                                sigma[i][i],sigma[j][j]);
     sigma[i][j] = mix_distance(sigma[i][i],sigma[j][j]);
     cut_lj[i][j] = mix_distance(cut_lj[i][i],cut_lj[j][j]);
     cut_coul[i][j] = mix_distance(cut_coul[i][i],cut_coul[j][j]);
   }
 
   double cut = MAX(cut_lj[i][j],cut_coul[i][j]);
   cut_ljsq[i][j] = cut_lj[i][j] * cut_lj[i][j];
   cut_coulsq[i][j] = cut_coul[i][j] * cut_coul[i][j];
 
   lj1[i][j] = 48.0 * epsilon[i][j] * pow(sigma[i][j],12.0);
   lj2[i][j] = 24.0 * epsilon[i][j] * pow(sigma[i][j],6.0);
   lj3[i][j] = 4.0 * epsilon[i][j] * pow(sigma[i][j],12.0);
   lj4[i][j] = 4.0 * epsilon[i][j] * pow(sigma[i][j],6.0);
 
   if (offset_flag) {
     double ratio = sigma[i][j] / cut_lj[i][j];
     offset[i][j] = 4.0 * epsilon[i][j] * (pow(ratio,12.0) - pow(ratio,6.0));
   } else offset[i][j] = 0.0;
 
   cut_ljsq[j][i] = cut_ljsq[i][j];
   cut_coulsq[j][i] = cut_coulsq[i][j];
   lj1[j][i] = lj1[i][j];
   lj2[j][i] = lj2[i][j];
   lj3[j][i] = lj3[i][j];
   lj4[j][i] = lj4[i][j];
   offset[j][i] = offset[i][j];
 
   return cut;
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairLJCutDipoleCut::write_restart(FILE *fp)
 {
   write_restart_settings(fp);
 
   int i,j;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       fwrite(&setflag[i][j],sizeof(int),1,fp);
       if (setflag[i][j]) {
         fwrite(&epsilon[i][j],sizeof(double),1,fp);
         fwrite(&sigma[i][j],sizeof(double),1,fp);
         fwrite(&cut_lj[i][j],sizeof(double),1,fp);
         fwrite(&cut_coul[i][j],sizeof(double),1,fp);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
    proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairLJCutDipoleCut::read_restart(FILE *fp)
 {
   read_restart_settings(fp);
 
   allocate();
 
   int i,j;
   int me = comm->me;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
       MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
       if (setflag[i][j]) {
         if (me == 0) {
           fread(&epsilon[i][j],sizeof(double),1,fp);
           fread(&sigma[i][j],sizeof(double),1,fp);
           fread(&cut_lj[i][j],sizeof(double),1,fp);
           fread(&cut_coul[i][j],sizeof(double),1,fp);
         }
         MPI_Bcast(&epsilon[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&sigma[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&cut_lj[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&cut_coul[i][j],1,MPI_DOUBLE,0,world);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairLJCutDipoleCut::write_restart_settings(FILE *fp)
 {
   fwrite(&cut_lj_global,sizeof(double),1,fp);
   fwrite(&cut_coul_global,sizeof(double),1,fp);
   fwrite(&offset_flag,sizeof(int),1,fp);
   fwrite(&mix_flag,sizeof(int),1,fp);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairLJCutDipoleCut::read_restart_settings(FILE *fp)
 {
   if (comm->me == 0) {
     fread(&cut_lj_global,sizeof(double),1,fp);
     fread(&cut_coul_global,sizeof(double),1,fp);
     fread(&offset_flag,sizeof(int),1,fp);
     fread(&mix_flag,sizeof(int),1,fp);
   }
   MPI_Bcast(&cut_lj_global,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&cut_coul_global,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&offset_flag,1,MPI_INT,0,world);
   MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
 }
diff --git a/src/DIPOLE/pair_lj_cut_dipole_long.cpp b/src/DIPOLE/pair_lj_cut_dipole_long.cpp
index 2796b54ac..60190ce90 100755
--- a/src/DIPOLE/pair_lj_cut_dipole_long.cpp
+++ b/src/DIPOLE/pair_lj_cut_dipole_long.cpp
@@ -1,559 +1,559 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    www.cs.sandia.gov/~sjplimp/lammps.html
    Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under 
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_lj_cut_dipole_long.h"
 #include "atom.h"
 #include "comm.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "force.h"
 #include "kspace.h"
 #include "math_const.h"
 #include "memory.h"
 #include "error.h"
 #include "update.h"
 #include "string.h"
 
 
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
 #define EWALD_F   1.12837917
 #define EWALD_P   0.3275911
 #define A1        0.254829592
 #define A2       -0.284496736
 #define A3        1.421413741
 #define A4       -1.453152027
 #define A5        1.061405429
 
 /* ---------------------------------------------------------------------- */
 
 PairLJCutDipoleLong::PairLJCutDipoleLong(LAMMPS *lmp) : Pair(lmp)
 {
   single_enable = 0;
   ewaldflag = dipoleflag = 1;
   respa_enable = 0;
 }
 
 /* ----------------------------------------------------------------------
    free all arrays 
 ------------------------------------------------------------------------- */
 
 PairLJCutDipoleLong::~PairLJCutDipoleLong()
 {
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
 
     memory->destroy(cut_lj);
     memory->destroy(cut_ljsq);
     memory->destroy(epsilon);
     memory->destroy(sigma);
     memory->destroy(lj1);
     memory->destroy(lj2);
     memory->destroy(lj3);
     memory->destroy(lj4);
     memory->destroy(offset);
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCutDipoleLong::compute(int eflag, int vflag)
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz;
   double rsq,r,rinv,r2inv,r6inv;
   double forcecoulx,forcecouly,forcecoulz,fforce;
   double tixcoul,tiycoul,tizcoul,tjxcoul,tjycoul,tjzcoul;
   double fx,fy,fz,fdx,fdy,fdz,fax,fay,faz;
   double pdotp,pidotr,pjdotr,pre1,pre2,pre3;
   double grij,expm2,t,erfc;
   double g0,g1,g2,b0,b1,b2,b3,d0,d1,d2,d3;
   double zdix,zdiy,zdiz,zdjx,zdjy,zdjz,zaix,zaiy,zaiz,zajx,zajy,zajz;
   double g0b1_g1b2_g2b3,g0d1_g1d2_g2d3;
   double forcelj,factor_coul,factor_lj,facm1;
   double evdwl,ecoul;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   evdwl = ecoul = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   double **mu = atom->mu;
   double **torque = atom->torque;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   pre1 = 2.0 * g_ewald / MY_PIS;
   pre2 = 4.0 * pow(g_ewald,3.0) / MY_PIS;
   pre3 = 8.0 * pow(g_ewald,5.0) / MY_PIS;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = atom->q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
 	r2inv = 1.0/rsq;
 	rinv = sqrt(r2inv);
 
 	if (rsq < cut_coulsq) {
 	  r = sqrt(rsq);
 	  grij = g_ewald * r;
 	  expm2 = exp(-grij*grij);
 	  t = 1.0 / (1.0 + EWALD_P*grij);
 	  erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
 
 	  pdotp = mu[i][0]*mu[j][0] + mu[i][1]*mu[j][1] + mu[i][2]*mu[j][2];
 	  pidotr = mu[i][0]*delx + mu[i][1]*dely + mu[i][2]*delz;
 	  pjdotr = mu[j][0]*delx + mu[j][1]*dely + mu[j][2]*delz;
 	    
 	  g0 = qtmp*q[j];
 	  g1 = qtmp*pjdotr - q[j]*pidotr + pdotp;
 	  g2 = -pidotr*pjdotr;
 
 	  if (factor_coul > 0.0) {
 	    b0 = erfc * rinv;
 	    b1 = (b0 + pre1*expm2) * r2inv;
 	    b2 = (3.0*b1 + pre2*expm2) * r2inv;
 	    b3 = (5.0*b2 + pre3*expm2) * r2inv;
 
 	    g0b1_g1b2_g2b3 = g0*b1 + g1*b2 + g2*b3;
 	    fdx = delx * g0b1_g1b2_g2b3 -
 	      b1 * (qtmp*mu[j][0] - q[j]*mu[i][0]) + 
 	      b2 * (pjdotr*mu[i][0] + pidotr*mu[j][0]);
 	    fdy = dely * g0b1_g1b2_g2b3 -
 	      b1 * (qtmp*mu[j][1] - q[j]*mu[i][1]) + 
 	      b2 * (pjdotr*mu[i][1] + pidotr*mu[j][1]);
 	    fdz = delz * g0b1_g1b2_g2b3 -
 	      b1 * (qtmp*mu[j][2] - q[j]*mu[i][2]) +
 	      b2 * (pjdotr*mu[i][2] + pidotr*mu[j][2]);
 
 	    zdix = delx * (q[j]*b1 + b2*pjdotr) - b1*mu[j][0];
 	    zdiy = dely * (q[j]*b1 + b2*pjdotr) - b1*mu[j][1];
 	    zdiz = delz * (q[j]*b1 + b2*pjdotr) - b1*mu[j][2];
 	    zdjx = delx * (-qtmp*b1 + b2*pidotr) - b1*mu[i][0];
 	    zdjy = dely * (-qtmp*b1 + b2*pidotr) - b1*mu[i][1];
 	    zdjz = delz * (-qtmp*b1 + b2*pidotr) - b1*mu[i][2];
 
 	    if (factor_coul < 1.0) {
 	      fdx *= factor_coul;
 	      fdy *= factor_coul;
 	      fdz *= factor_coul;
 	      zdix *= factor_coul;
 	      zdiy *= factor_coul;
 	      zdiz *= factor_coul;
 	      zdjx *= factor_coul;
 	      zdjy *= factor_coul;
 	      zdjz *= factor_coul;
 	    }
 	  } else {
 	    fdx = fdy = fdz = 0.0;
 	    zdix = zdiy = zdiz = 0.0;
 	    zdjx = zdjy = zdjz = 0.0;
 	  }
 
 	  if (factor_coul < 1.0) {
 	    d0 = (erfc - 1.0) * rinv;
 	    d1 = (d0 + pre1*expm2) * r2inv;
 	    d2 = (3.0*d1 + pre2*expm2) * r2inv;
 	    d3 = (5.0*d2 + pre3*expm2) * r2inv;
 	    
 	    g0d1_g1d2_g2d3 = g0*d1 + g1*d2 + g2*d3;
 	    fax = delx * g0d1_g1d2_g2d3 -
 	      d1 * (qtmp*mu[j][0] - q[j]*mu[i][0]) + 
 	      d2 * (pjdotr*mu[i][0] + pidotr*mu[j][0]);
 	    fay = dely * g0d1_g1d2_g2d3 -
 	      d1 * (qtmp*mu[j][1] - q[j]*mu[i][1]) + 
 	      d2 * (pjdotr*mu[i][1] + pidotr*mu[j][1]);
 	    faz = delz * g0d1_g1d2_g2d3 -
 	      d1 * (qtmp*mu[j][2] - q[j]*mu[i][2]) +
 	      d2 * (pjdotr*mu[i][2] + pidotr*mu[j][2]);
 
 	    zaix = delx * (q[j]*d1 + d2*pjdotr) - d1*mu[j][0];
 	    zaiy = dely * (q[j]*d1 + d2*pjdotr) - d1*mu[j][1];
 	    zaiz = delz * (q[j]*d1 + d2*pjdotr) - d1*mu[j][2];
 	    zajx = delx * (-qtmp*d1 + d2*pidotr) - d1*mu[i][0];
 	    zajy = dely * (-qtmp*d1 + d2*pidotr) - d1*mu[i][1];
 	    zajz = delz * (-qtmp*d1 + d2*pidotr) - d1*mu[i][2];
 
 	    if (factor_coul > 0.0) {
 	      facm1 = 1.0 - factor_coul;
 	      fax *= facm1;
 	      fay *= facm1;
 	      faz *= facm1;
 	      zaix *= facm1;
 	      zaiy *= facm1;
 	      zaiz *= facm1;
 	      zajx *= facm1;
 	      zajy *= facm1;
 	      zajz *= facm1;
 	    }
 	  } else {
 	    fax = fay = faz = 0.0;
 	    zaix = zaiy = zaiz = 0.0;
 	    zajx = zajy = zajz = 0.0;
 	  }
 
 	  forcecoulx = fdx + fax;
 	  forcecouly = fdy + fay;
 	  forcecoulz = fdz + faz;
 
 	  tixcoul = mu[i][1]*(zdiz + zaiz) - mu[i][2]*(zdiy + zaiy);
 	  tiycoul = mu[i][2]*(zdix + zaix) - mu[i][0]*(zdiz + zaiz);
 	  tizcoul = mu[i][0]*(zdiy + zaiy) - mu[i][1]*(zdix + zaix);
 	  tjxcoul = mu[j][1]*(zdjz + zajz) - mu[j][2]*(zdjy + zajy);
 	  tjycoul = mu[j][2]*(zdjx + zajx) - mu[j][0]*(zdjz + zajz);
 	  tjzcoul = mu[j][0]*(zdjy + zajy) - mu[j][1]*(zdjx + zajx);
 
 	} else {
 	  forcecoulx = forcecouly = forcecoulz = 0.0;
 	  tixcoul = tiycoul = tizcoul = 0.0;
 	  tjxcoul = tjycoul = tjzcoul = 0.0;
 	}
 
 	// LJ interaction
 
 	if (rsq < cut_ljsq[itype][jtype]) {
 	  r6inv = r2inv*r2inv*r2inv;
 	  forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
 	  fforce = factor_lj * forcelj*r2inv;
 	} else fforce = 0.0;
 
 	// total force
 
 	fx = qqrd2e*forcecoulx + delx*fforce;
 	fy = qqrd2e*forcecouly + dely*fforce;
 	fz = qqrd2e*forcecoulz + delz*fforce;
 	
 	// force & torque accumulation
 
 	f[i][0] += fx;
 	f[i][1] += fy;
 	f[i][2] += fz;
 	torque[i][0] += qqrd2e*tixcoul;
 	torque[i][1] += qqrd2e*tiycoul;
 	torque[i][2] += qqrd2e*tizcoul;
 
 	if (newton_pair || j < nlocal) {
 	  f[j][0] -= fx;
 	  f[j][1] -= fy;
 	  f[j][2] -= fz;
 	  torque[j][0] += qqrd2e*tjxcoul;
 	  torque[j][1] += qqrd2e*tjycoul;
 	  torque[j][2] += qqrd2e*tjzcoul;
 	}
 
 	if (eflag) {
 	  if (rsq < cut_coulsq) {
 	    ecoul = qqrd2e*(b0*g0 + b1*g1 + b2*g2);
 	    if (factor_coul < 1.0) {
           ecoul *= factor_coul;
 	      ecoul += (1-factor_coul) * qqrd2e * (d0*g0 + d1*g1 + d2*g2);
         }
 	  } else ecoul = 0.0;
 
 	  if (rsq < cut_ljsq[itype][jtype]) {
 	    evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) -
 	      offset[itype][jtype];
 	    evdwl *= factor_lj;
 	  } else evdwl = 0.0;
 	}
 
 	if (evflag) ev_tally_xyz(i,j,nlocal,newton_pair,
 				 evdwl,ecoul,fx,fy,fz,delx,dely,delz);
       }
     }
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays 
 ------------------------------------------------------------------------- */
 
 void PairLJCutDipoleLong::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
   memory->create(cut_lj,n+1,n+1,"pair:cut_lj");
   memory->create(cut_ljsq,n+1,n+1,"pair:cut_ljsq");
   memory->create(epsilon,n+1,n+1,"pair:epsilon");
   memory->create(sigma,n+1,n+1,"pair:sigma");
   memory->create(lj1,n+1,n+1,"pair:lj1");
   memory->create(lj2,n+1,n+1,"pair:lj2");
   memory->create(lj3,n+1,n+1,"pair:lj3");
   memory->create(lj4,n+1,n+1,"pair:lj4");
   memory->create(offset,n+1,n+1,"pair:offset");
 }
 
 /* ----------------------------------------------------------------------
    global settings 
 ------------------------------------------------------------------------- */
 
 void PairLJCutDipoleLong::settings(int narg, char **arg)
 {
   if (narg < 1 || narg > 2) 
     error->all(FLERR,"Incorrect args in pair_style command");
 
   cut_lj_global = force->numeric(FLERR,arg[0]);
   if (narg == 1) cut_coul = cut_lj_global;
   else cut_coul = force->numeric(FLERR,arg[1]);
 
   // reset cutoffs that have been explicitly set
 
   if (allocated) {
     int i,j;
     for (i = 1; i <= atom->ntypes; i++)
       for (j = i+1; j <= atom->ntypes; j++)
 	if (setflag[i][j]) cut_lj[i][j] = cut_lj_global;
   }
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairLJCutDipoleLong::coeff(int narg, char **arg)
 {
   if (narg < 4 || narg > 5) 
     error->all(FLERR,"Incorrect args for pair coefficients");
   if (!allocated) allocate();
 
   int ilo,ihi,jlo,jhi;
   force->bounds(arg[0],atom->ntypes,ilo,ihi);
   force->bounds(arg[1],atom->ntypes,jlo,jhi);
 
   double epsilon_one = force->numeric(FLERR,arg[2]);
   double sigma_one = force->numeric(FLERR,arg[3]);
 
   double cut_lj_one = cut_lj_global;
   if (narg == 5) cut_lj_one = force->numeric(FLERR,arg[4]);
 
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
     for (int j = MAX(jlo,i); j <= jhi; j++) {
       epsilon[i][j] = epsilon_one;
       sigma[i][j] = sigma_one;
       cut_lj[i][j] = cut_lj_one;
       setflag[i][j] = 1;
       count++;
     }
   }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairLJCutDipoleLong::init_one(int i, int j)
 {
   if (setflag[i][j] == 0) {
     epsilon[i][j] = mix_energy(epsilon[i][i],epsilon[j][j],
 			       sigma[i][i],sigma[j][j]);
     sigma[i][j] = mix_distance(sigma[i][i],sigma[j][j]);
     cut_lj[i][j] = mix_distance(cut_lj[i][i],cut_lj[j][j]);
   }
 
   double cut = MAX(cut_lj[i][j],cut_coul);
   cut_ljsq[i][j] = cut_lj[i][j] * cut_lj[i][j];
 
   lj1[i][j] = 48.0 * epsilon[i][j] * pow(sigma[i][j],12.0);
   lj2[i][j] = 24.0 * epsilon[i][j] * pow(sigma[i][j],6.0);
   lj3[i][j] = 4.0 * epsilon[i][j] * pow(sigma[i][j],12.0);
   lj4[i][j] = 4.0 * epsilon[i][j] * pow(sigma[i][j],6.0);
      
   if (offset_flag) {
     double ratio = sigma[i][j] / cut_lj[i][j];
     offset[i][j] = 4.0 * epsilon[i][j] * (pow(ratio,12.0) - pow(ratio,6.0));
   } else offset[i][j] = 0.0;
   
   cut_ljsq[j][i] = cut_ljsq[i][j];
   lj1[j][i] = lj1[i][j];
   lj2[j][i] = lj2[i][j];
   lj3[j][i] = lj3[i][j];
   lj4[j][i] = lj4[i][j];
   offset[j][i] = offset[i][j];
 
   return cut;
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairLJCutDipoleLong::init_style()
 {
   if (!atom->q_flag || !atom->mu_flag || !atom->torque_flag)
     error->all(FLERR,"Pair dipole/long requires atom attributes q, mu, torque");
 
   if (strcmp(update->unit_style,"electron") == 0)
     error->all(FLERR,"Cannot (yet) use 'electron' units with dipoles");
 
   // insure use of KSpace long-range solver, set g_ewald
 
   if (force->kspace == NULL) 
     error->all(FLERR,"Pair style requires a KSpace style");
 
   g_ewald = force->kspace->g_ewald;
 
   cut_coulsq = cut_coul * cut_coul;
 
-  neighbor->request(this);
+  neighbor->request(this,instance_me);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairLJCutDipoleLong::write_restart(FILE *fp)
 {
   write_restart_settings(fp);
 
   int i,j;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       fwrite(&setflag[i][j],sizeof(int),1,fp);
       if (setflag[i][j]) {
 	fwrite(&epsilon[i][j],sizeof(double),1,fp);
 	fwrite(&sigma[i][j],sizeof(double),1,fp);
 	fwrite(&cut_lj[i][j],sizeof(double),1,fp);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
    proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairLJCutDipoleLong::read_restart(FILE *fp)
 {
   read_restart_settings(fp);
 
   allocate();
 
   int i,j;
   int me = comm->me;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
       MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
       if (setflag[i][j]) {
 	if (me == 0) {
 	  fread(&epsilon[i][j],sizeof(double),1,fp);
 	  fread(&sigma[i][j],sizeof(double),1,fp);
 	  fread(&cut_lj[i][j],sizeof(double),1,fp);
 	}
 	MPI_Bcast(&epsilon[i][j],1,MPI_DOUBLE,0,world);
 	MPI_Bcast(&sigma[i][j],1,MPI_DOUBLE,0,world);
 	MPI_Bcast(&cut_lj[i][j],1,MPI_DOUBLE,0,world);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairLJCutDipoleLong::write_restart_settings(FILE *fp)
 {
   fwrite(&cut_lj_global,sizeof(double),1,fp);
   fwrite(&cut_coul,sizeof(double),1,fp);
   fwrite(&offset_flag,sizeof(int),1,fp);
   fwrite(&mix_flag,sizeof(int),1,fp);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairLJCutDipoleLong::read_restart_settings(FILE *fp)
 {
   if (comm->me == 0) {
     fread(&cut_lj_global,sizeof(double),1,fp);
     fread(&cut_coul,sizeof(double),1,fp);
     fread(&offset_flag,sizeof(int),1,fp);
     fread(&mix_flag,sizeof(int),1,fp);
   }
   MPI_Bcast(&cut_lj_global,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&cut_coul,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&offset_flag,1,MPI_INT,0,world);
   MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void *PairLJCutDipoleLong::extract(const char *str, int &dim)
 {
   if (strcmp(str,"cut_coul") == 0) {
     dim = 0;
     return (void *) &cut_coul;
   } else if (strcmp(str,"ewald_order") == 0) {
     ewald_order = 0;
     ewald_order |= 1<<1;
     ewald_order |= 1<<3;
     dim = 0;
     return (void *) &ewald_order;
   } else if (strcmp(str,"ewald_mix") == 0) {
     dim = 0;
     return (void *) &mix_flag;
   }
   return NULL;
 }
diff --git a/src/DIPOLE/pair_lj_long_dipole_long.cpp b/src/DIPOLE/pair_lj_long_dipole_long.cpp
index b476cfcee..7253dd3c1 100755
--- a/src/DIPOLE/pair_lj_long_dipole_long.cpp
+++ b/src/DIPOLE/pair_lj_long_dipole_long.cpp
@@ -1,682 +1,682 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Pieter J. in 't Veld and Stan Moore (Sandia)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "math_const.h"
 #include "math_vector.h"
 #include "pair_lj_long_dipole_long.h"
 #include "atom.h"
 #include "comm.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "force.h"
 #include "kspace.h"
 #include "update.h"
 #include "integrate.h"
 #include "respa.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
 #define EWALD_F   1.12837917
 #define EWALD_P   0.3275911
 #define A1        0.254829592
 #define A2       -0.284496736
 #define A3        1.421413741
 #define A4       -1.453152027
 #define A5        1.061405429
 
 // ----------------------------------------------------------------------
 
 PairLJLongDipoleLong::PairLJLongDipoleLong(LAMMPS *lmp) : Pair(lmp)
 {
   dispersionflag = ewaldflag = dipoleflag = 1;
   respa_enable = 0;
   single_enable = 0;
 }
 
 // ----------------------------------------------------------------------
 // global settings
 // ----------------------------------------------------------------------
 
 void PairLJLongDipoleLong::options(char **arg, int order)
 {
   const char *option[] = {"long", "cut", "off", NULL};
   int i;
 
   if (!*arg) error->all(FLERR,"Illegal pair_style lj/long/dipole/long command");
   for (i=0; option[i]&&strcmp(arg[0], option[i]); ++i);
   switch (i) {
     default: error->all(FLERR,"Illegal pair_style lj/long/dipole/long command");
     case 0: ewald_order |= 1<<order; break;		// set kspace r^-order
     case 2: ewald_off |= 1<<order;			// turn r^-order off
     case 1: break;
   }
 }
 
 void PairLJLongDipoleLong::settings(int narg, char **arg)
 {
   if (narg != 3 && narg != 4) error->all(FLERR,"Illegal pair_style command");
 
   ewald_off = 0;
   ewald_order = 0;
   options(arg, 6);
   options(++arg, 3);
   options(arg, 1);
   if (!comm->me && ewald_order&(1<<6))
     error->warning(FLERR,"Geometric mixing assumed for 1/r^6 coefficients");
   if (!comm->me && ewald_order==((1<<3)|(1<<6)))
     error->warning(FLERR,
                    "Using largest cut-off for lj/long/dipole/long long long");
   if (!*(++arg))
     error->all(FLERR,"Cut-offs missing in pair_style lj/long/dipole/long");
   if (!((ewald_order^ewald_off)&(1<<3)))
     error->all(FLERR,
                "Coulombic cut not supported in pair_style lj/long/dipole/long");
   cut_lj_global = force->numeric(FLERR,*(arg++));
   if (narg == 4 && (ewald_order==74))
     error->all(FLERR,"Only one cut-off allowed when requesting all long");
   if (narg == 4) cut_coul = force->numeric(FLERR,*(arg++));
   else cut_coul = cut_lj_global;
 
   if (allocated) {					// reset explicit cuts
     int i,j;
     for (i = 1; i <= atom->ntypes; i++)
       for (j = i+1; j <= atom->ntypes; j++)
 	if (setflag[i][j]) cut_lj[i][j] = cut_lj_global;
   }
 }
 
 // ----------------------------------------------------------------------
 // free all arrays
 // ----------------------------------------------------------------------
 
 PairLJLongDipoleLong::~PairLJLongDipoleLong()
 {
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
 
     memory->destroy(cut_lj_read);
     memory->destroy(cut_lj);
     memory->destroy(cut_ljsq);
     memory->destroy(epsilon_read);
     memory->destroy(epsilon);
     memory->destroy(sigma_read);
     memory->destroy(sigma);
     memory->destroy(lj1);
     memory->destroy(lj2);
     memory->destroy(lj3);
     memory->destroy(lj4);
     memory->destroy(offset);
   }
   //if (ftable) free_tables();
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairLJLongDipoleLong::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
   memory->create(cut_lj_read,n+1,n+1,"pair:cut_lj_read");
   memory->create(cut_lj,n+1,n+1,"pair:cut_lj");
   memory->create(cut_ljsq,n+1,n+1,"pair:cut_ljsq");
   memory->create(epsilon_read,n+1,n+1,"pair:epsilon_read");
   memory->create(epsilon,n+1,n+1,"pair:epsilon");
   memory->create(sigma_read,n+1,n+1,"pair:sigma_read");
   memory->create(sigma,n+1,n+1,"pair:sigma");
   memory->create(lj1,n+1,n+1,"pair:lj1");
   memory->create(lj2,n+1,n+1,"pair:lj2");
   memory->create(lj3,n+1,n+1,"pair:lj3");
   memory->create(lj4,n+1,n+1,"pair:lj4");
   memory->create(offset,n+1,n+1,"pair:offset");
 }
 
 /* ----------------------------------------------------------------------
    extract protected data from object
 ------------------------------------------------------------------------- */
 
 void *PairLJLongDipoleLong::extract(const char *id, int &dim)
 {
   const char *ids[] = {
     "B", "sigma", "epsilon", "ewald_order", "ewald_cut", "ewald_mix",
     "cut_coul", "cut_vdwl", NULL};
   void *ptrs[] = {
     lj4, sigma, epsilon, &ewald_order, &cut_coul, &mix_flag, &cut_coul, 
     &cut_lj_global, NULL};
   int i;
 
   for (i=0; ids[i]&&strcmp(ids[i], id); ++i);
   if (i <= 2) dim = 2;
   else dim = 0;
   return ptrs[i];
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairLJLongDipoleLong::coeff(int narg, char **arg)
 {
   if (narg < 4 || narg > 5) 
     error->all(FLERR,"Incorrect args for pair coefficients");
   if (!allocated) allocate();
 
   int ilo,ihi,jlo,jhi;
   force->bounds(arg[0],atom->ntypes,ilo,ihi);
   force->bounds(arg[1],atom->ntypes,jlo,jhi);
 
   double epsilon_one = force->numeric(FLERR,arg[2]);
   double sigma_one = force->numeric(FLERR,arg[3]);
 
   double cut_lj_one = cut_lj_global;
   if (narg == 5) cut_lj_one = force->numeric(FLERR,arg[4]);
 
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
     for (int j = MAX(jlo,i); j <= jhi; j++) {
       epsilon_read[i][j] = epsilon_one;
       sigma_read[i][j] = sigma_one;
       cut_lj_read[i][j] = cut_lj_one;
       setflag[i][j] = 1;
       count++;
     }
   }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairLJLongDipoleLong::init_style()
 {
   const char *style3[] = {"ewald/disp", NULL};
   const char *style6[] = {"ewald/disp", NULL};
   int i;
 
   if (strcmp(update->unit_style,"electron") == 0)
     error->all(FLERR,"Cannot (yet) use 'electron' units with dipoles");
 
   // require an atom style with charge defined
 
   if (!atom->q_flag && (ewald_order&(1<<1)))
     error->all(FLERR,
 	"Invoking coulombic in pair style lj/long/dipole/long requires atom attribute q");
   if (!atom->mu && (ewald_order&(1<<3)))
     error->all(FLERR,"Pair lj/long/dipole/long requires atom attributes mu, torque");
   if (!atom->torque && (ewald_order&(1<<3)))
     error->all(FLERR,"Pair lj/long/dipole/long requires atom attributes mu, torque");
 
-  neighbor->request(this);
+  neighbor->request(this,instance_me);
 
   cut_coulsq = cut_coul * cut_coul;
 
   // ensure use of KSpace long-range solver, set g_ewald
 
   if (ewald_order&(1<<3)) {				// r^-1 kspace
     if (force->kspace == NULL) 
       error->all(FLERR,"Pair style is incompatible with KSpace style");
     for (i=0; style3[i]&&strcmp(force->kspace_style, style3[i]); ++i);
     if (!style3[i])
       error->all(FLERR,"Pair style is incompatible with KSpace style");
   }
   if (ewald_order&(1<<6)) {				// r^-6 kspace
     if (force->kspace == NULL) 
       error->all(FLERR,"Pair style is incompatible with KSpace style");
     for (i=0; style6[i]&&strcmp(force->kspace_style, style6[i]); ++i);
     if (!style6[i])
       error->all(FLERR,"Pair style is incompatible with KSpace style");
   }
   if (force->kspace) g_ewald = force->kspace->g_ewald;
 }
 
 /* ----------------------------------------------------------------------
    neighbor callback to inform pair style of neighbor list to use
    regular or rRESPA
 ------------------------------------------------------------------------- */
 
 void PairLJLongDipoleLong::init_list(int id, NeighList *ptr)
 {
   if (id == 0) list = ptr;
   else if (id == 1) listinner = ptr;
   else if (id == 2) listmiddle = ptr;
   else if (id == 3) listouter = ptr;
 
   if (id)
     error->all(FLERR,"Pair style lj/long/dipole/long does not currently support respa");
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairLJLongDipoleLong::init_one(int i, int j)
 {
   if ((ewald_order&(1<<6))||(setflag[i][j] == 0)) {
     epsilon[i][j] = mix_energy(epsilon_read[i][i],epsilon_read[j][j],
 			       sigma_read[i][i],sigma_read[j][j]);
     sigma[i][j] = mix_distance(sigma_read[i][i],sigma_read[j][j]);
     if (ewald_order&(1<<6))
       cut_lj[i][j] = cut_lj_global;
     else
       cut_lj[i][j] = mix_distance(cut_lj_read[i][i],cut_lj_read[j][j]);
   }
   else {
     sigma[i][j] = sigma_read[i][j];
     epsilon[i][j] = epsilon_read[i][j];
     cut_lj[i][j] = cut_lj_read[i][j];
   }
 
   double cut = MAX(cut_lj[i][j], cut_coul);
   cutsq[i][j] = cut*cut;
   cut_ljsq[i][j] = cut_lj[i][j] * cut_lj[i][j];
 
   lj1[i][j] = 48.0 * epsilon[i][j] * pow(sigma[i][j],12.0);
   lj2[i][j] = 24.0 * epsilon[i][j] * pow(sigma[i][j],6.0);
   lj3[i][j] = 4.0 * epsilon[i][j] * pow(sigma[i][j],12.0);
   lj4[i][j] = 4.0 * epsilon[i][j] * pow(sigma[i][j],6.0);
 
   // check interior rRESPA cutoff
 
   //if (cut_respa && MIN(cut_lj[i][j],cut_coul) < cut_respa[3])
     //error->all(FLERR,"Pair cutoff < Respa interior cutoff");
  
   if (offset_flag) {
     double ratio = sigma[i][j] / cut_lj[i][j];
     offset[i][j] = 4.0 * epsilon[i][j] * (pow(ratio,12.0) - pow(ratio,6.0));
   } else offset[i][j] = 0.0;
 
   cutsq[j][i] = cutsq[i][j];
   cut_ljsq[j][i] = cut_ljsq[i][j];
   lj1[j][i] = lj1[i][j];
   lj2[j][i] = lj2[i][j];
   lj3[j][i] = lj3[i][j];
   lj4[j][i] = lj4[i][j];
   offset[j][i] = offset[i][j];
 
   return cut;
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairLJLongDipoleLong::write_restart(FILE *fp)
 {
   write_restart_settings(fp);
 
   int i,j;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       fwrite(&setflag[i][j],sizeof(int),1,fp);
       if (setflag[i][j]) {
 	fwrite(&epsilon_read[i][j],sizeof(double),1,fp);
 	fwrite(&sigma_read[i][j],sizeof(double),1,fp);
 	fwrite(&cut_lj_read[i][j],sizeof(double),1,fp);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
    proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairLJLongDipoleLong::read_restart(FILE *fp)
 {
   read_restart_settings(fp);
 
   allocate();
 
   int i,j;
   int me = comm->me;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
       MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
       if (setflag[i][j]) {
 	if (me == 0) {
 	  fread(&epsilon_read[i][j],sizeof(double),1,fp);
 	  fread(&sigma_read[i][j],sizeof(double),1,fp);
 	  fread(&cut_lj_read[i][j],sizeof(double),1,fp);
 	}
 	MPI_Bcast(&epsilon_read[i][j],1,MPI_DOUBLE,0,world);
 	MPI_Bcast(&sigma_read[i][j],1,MPI_DOUBLE,0,world);
 	MPI_Bcast(&cut_lj_read[i][j],1,MPI_DOUBLE,0,world);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairLJLongDipoleLong::write_restart_settings(FILE *fp)
 {
   fwrite(&cut_lj_global,sizeof(double),1,fp);
   fwrite(&cut_coul,sizeof(double),1,fp);
   fwrite(&offset_flag,sizeof(int),1,fp);
   fwrite(&mix_flag,sizeof(int),1,fp);
   fwrite(&ewald_order,sizeof(int),1,fp);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairLJLongDipoleLong::read_restart_settings(FILE *fp)
 {
   if (comm->me == 0) {
     fread(&cut_lj_global,sizeof(double),1,fp);
     fread(&cut_coul,sizeof(double),1,fp);
     fread(&offset_flag,sizeof(int),1,fp);
     fread(&mix_flag,sizeof(int),1,fp);
     fread(&ewald_order,sizeof(int),1,fp);
   }
   MPI_Bcast(&cut_lj_global,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&cut_coul,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&offset_flag,1,MPI_INT,0,world);
   MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
   MPI_Bcast(&ewald_order,1,MPI_INT,0,world);
 }
 
 /* ----------------------------------------------------------------------
    compute pair interactions
 ------------------------------------------------------------------------- */
 
 void PairLJLongDipoleLong::compute(int eflag, int vflag)
 {
   double evdwl,ecoul,fpair;
   evdwl = ecoul = 0.0;
 
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
   
   double **x = atom->x, *x0 = x[0];
   double **mu = atom->mu, *mu0 = mu[0], *imu, *jmu;
   double **tq = atom->torque, *tq0 = tq[0], *tqi;
   double **f = atom->f, *f0 = f[0], *fi = f0, fx, fy, fz;
   double *q = atom->q, qi = 0, qj;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
   int i, j;
   int order3 = ewald_order&(1<<3), order6 = ewald_order&(1<<6);
   int *ineigh, *ineighn, *jneigh, *jneighn, typei, typej, ni;
   double *cutsqi, *cut_ljsqi, *lj1i, *lj2i, *lj3i, *lj4i, *offseti;
   double rsq, r2inv, force_coul, force_lj;
   double g2 = g_ewald*g_ewald, g6 = g2*g2*g2, g8 = g6*g2;
   double B0, B1, B2, B3, G0, G1, G2, mudi, mudj, muij;
   vector force_d = VECTOR_NULL, ti = VECTOR_NULL, tj = VECTOR_NULL;
   vector mui, muj, xi, d;
   
   double C1 = 2.0 * g_ewald / MY_PIS;
   double C2 = 2.0 * g2 * C1;
   double C3 = 2.0 * g2 * C2;
 
   ineighn = (ineigh = list->ilist)+list->inum;
 
   for (; ineigh<ineighn; ++ineigh) {			// loop over all neighs
     i = *ineigh; fi = f0+3*i; tqi = tq0+3*i;
     qi = q[i];				// initialize constants
     offseti = offset[typei = type[i]];
     lj1i = lj1[typei]; lj2i = lj2[typei]; lj3i = lj3[typei]; lj4i = lj4[typei];
     cutsqi = cutsq[typei]; cut_ljsqi = cut_ljsq[typei];
     memcpy(xi, x0+(i+(i<<1)), sizeof(vector));
     memcpy(mui, imu = mu0+(i<<2), sizeof(vector));
     
     jneighn = (jneigh = list->firstneigh[i])+list->numneigh[i];
 
     for (; jneigh<jneighn; ++jneigh) {			// loop over neighbors
       j = *jneigh;
       ni = sbmask(j);					// special index
       j &= NEIGHMASK;
       
       { register double *xj = x0+(j+(j<<1));
 	d[0] = xi[0] - xj[0];				// pair vector
 	d[1] = xi[1] - xj[1];
 	d[2] = xi[2] - xj[2]; }
 
       if ((rsq = vec_dot(d, d)) >= cutsqi[typej = type[j]]) continue;
       r2inv = 1.0/rsq;
 
       if (order3 && (rsq < cut_coulsq)) {		// dipole
 	memcpy(muj, jmu = mu0+(j<<2), sizeof(vector));
 	{						// series real space
 	  register double r = sqrt(rsq);
 	  register double x = g_ewald*r;
 	  register double f = exp(-x*x)*qqrd2e;
 
 	  B0 = 1.0/(1.0+EWALD_P*x);			// eqn 2.8
 	  B0 *= ((((A5*B0+A4)*B0+A3)*B0+A2)*B0+A1)*f/r;
 	  B1 = (B0 + C1 * f) * r2inv;
 	  B2 = (3.0*B1 + C2 * f) * r2inv;
 	  B3 = (5.0*B2 + C3 * f) * r2inv;
 
 	  mudi = mui[0]*d[0]+mui[1]*d[1]+mui[2]*d[2];
 	  mudj = muj[0]*d[0]+muj[1]*d[1]+muj[2]*d[2];
 	  muij = mui[0]*muj[0]+mui[1]*muj[1]+mui[2]*muj[2];
 	  G0 = qi*(qj = q[j]);				// eqn 2.10
 	  G1 = qi*mudj-qj*mudi+muij;
 	  G2 = -mudi*mudj;
 	  force_coul = G0*B1+G1*B2+G2*B3;
 	  
 	  mudi *= B2; mudj *= B2;			// torque contribs
 	  ti[0] = mudj*d[0]+(qj*d[0]-muj[0])*B1;
 	  ti[1] = mudj*d[1]+(qj*d[1]-muj[1])*B1;
 	  ti[2] = mudj*d[2]+(qj*d[2]-muj[2])*B1;
 
 	  if (newton_pair || j < nlocal) {
 	    tj[0] = mudi*d[0]-(qi*d[0]+mui[0])*B1;
 	    tj[1] = mudi*d[1]-(qi*d[1]+mui[1])*B1;
 	    tj[2] = mudi*d[2]-(qi*d[2]+mui[2])*B1;
 	  }
 
 	  if (eflag) ecoul = G0*B0+G1*B1+G2*B2;
 	  if (ni > 0) {					// adj part, eqn 2.13
 	    force_coul -= (f = qqrd2e*(1.0-special_coul[ni])/r)*(
 	       	(3.0*G1+15.0*G2*r2inv)*r2inv+G0)*r2inv;
 	    if (eflag)
 	      ecoul -= f*((G1+3.0*G2*r2inv)*r2inv+G0);
 	    B1 -= f*r2inv;
 	  }
 	  B0 = mudj+qj*B1; B3 = -qi*B1+mudi;		// position independent
       if (ni > 0) B0 -= f*3.0*mudj*r2inv*r2inv/B2;
       if (ni > 0) B3 -= f*3.0*mudi*r2inv*r2inv/B2;
 	  force_d[0] = B0*mui[0]+B3*muj[0];		// force contribs
 	  force_d[1] = B0*mui[1]+B3*muj[1];
 	  force_d[2] = B0*mui[2]+B3*muj[2];
       if (ni > 0) {
 	    ti[0] -= f*(3.0*mudj*r2inv*r2inv*d[0]/B2+(qj*r2inv*d[0]-muj[0]*r2inv));
 	    ti[1] -= f*(3.0*mudj*r2inv*r2inv*d[1]/B2+(qj*r2inv*d[1]-muj[1]*r2inv));
 	    ti[2] -= f*(3.0*mudj*r2inv*r2inv*d[2]/B2+(qj*r2inv*d[2]-muj[2]*r2inv));
 	    if (newton_pair || j < nlocal) {
 	      tj[0] -= f*(3.0*mudi*r2inv*r2inv*d[0]/B2-(qi*r2inv*d[0]+mui[0]*r2inv));
 	      tj[1] -= f*(3.0*mudi*r2inv*r2inv*d[1]/B2-(qi*r2inv*d[1]+mui[1]*r2inv));
 	      tj[2] -= f*(3.0*mudi*r2inv*r2inv*d[2]/B2-(qi*r2inv*d[2]+mui[2]*r2inv));
 	    }
       }
 	}						// table real space
       } else {
 	force_coul = ecoul = 0.0;
 	memset(force_d, 0, 3*sizeof(double));
       }
 
       if (rsq < cut_ljsqi[typej]) {			// lj
        	if (order6) {					// long-range lj
 	  register double rn = r2inv*r2inv*r2inv;
 	  register double x2 = g2*rsq, a2 = 1.0/x2;
 	  x2 = a2*exp(-x2)*lj4i[typej];
 	  if (ni < 0) {
 	    force_lj =
 	      (rn*=rn)*lj1i[typej]-g8*(((6.0*a2+6.0)*a2+3.0)*a2+1.0)*x2*rsq;
 	    if (eflag) evdwl = rn*lj3i[typej]-g6*((a2+1.0)*a2+0.5)*x2;
 	  }
 	  else {					// special case
 	    register double f = special_lj[ni], t = rn*(1.0-f);
 	    force_lj = f*(rn *= rn)*lj1i[typej]-
 	      g8*(((6.0*a2+6.0)*a2+3.0)*a2+1.0)*x2*rsq+t*lj2i[typej];
 	    if (eflag) evdwl = 
 		f*rn*lj3i[typej]-g6*((a2+1.0)*a2+0.5)*x2+t*lj4i[typej];
 	  }
 	}
 	else {						// cut lj
 	  register double rn = r2inv*r2inv*r2inv;
 	  if (ni < 0) {
 	    force_lj = rn*(rn*lj1i[typej]-lj2i[typej]);
 	    if (eflag) evdwl = rn*(rn*lj3i[typej]-lj4i[typej])-offseti[typej];
 	  }
 	  else {					// special case
 	    register double f = special_lj[ni];
 	    force_lj = f*rn*(rn*lj1i[typej]-lj2i[typej]);
 	    if (eflag) evdwl = f*(
 		rn*(rn*lj3i[typej]-lj4i[typej])-offseti[typej]);
 	  }
 	}
 	force_lj *= r2inv;
       }
       else force_lj = evdwl = 0.0;
 
       fpair = force_coul+force_lj;			// force
       if (newton_pair || j < nlocal) {
 	register double *fj = f0+(j+(j<<1));
 	fi[0] += fx = d[0]*fpair+force_d[0]; fj[0] -= fx;
 	fi[1] += fy = d[1]*fpair+force_d[1]; fj[1] -= fy;
 	fi[2] += fz = d[2]*fpair+force_d[2]; fj[2] -= fz;
 	tqi[0] += mui[1]*ti[2]-mui[2]*ti[1];		// torque
 	tqi[1] += mui[2]*ti[0]-mui[0]*ti[2];
 	tqi[2] += mui[0]*ti[1]-mui[1]*ti[0];
 	register double *tqj = tq0+(j+(j<<1));
 	tqj[0] += muj[1]*tj[2]-muj[2]*tj[1];
 	tqj[1] += muj[2]*tj[0]-muj[0]*tj[2];
 	tqj[2] += muj[0]*tj[1]-muj[1]*tj[0];
       }
       else {
 	fi[0] += fx = d[0]*fpair+force_d[0];		// force
 	fi[1] += fy = d[1]*fpair+force_d[1];
 	fi[2] += fz = d[2]*fpair+force_d[2];
 	tqi[0] += mui[1]*ti[2]-mui[2]*ti[1];		// torque
 	tqi[1] += mui[2]*ti[0]-mui[0]*ti[2];
 	tqi[2] += mui[0]*ti[1]-mui[1]*ti[0];
       }
 
       if (evflag) ev_tally_xyz(i,j,nlocal,newton_pair,
 			   evdwl,ecoul,fx,fy,fz,d[0],d[1],d[2]);
     }
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ---------------------------------------------------------------------- */
 
 /*
 double PairLJLongDipoleLong::single(int i, int j, int itype, int jtype,
 			    double rsq, double factor_coul, double factor_lj,
 			    double &fforce)
 {
   double r6inv, force_coul, force_lj;
   double g2 = g_ewald*g_ewald, g6 = g2*g2*g2, g8 = g6*g2, *q = atom->q;
 
   double eng = 0.0;
   double r2inv = 1.0/rsq;
 
   if ((ewald_order&(1<<3)) && (rsq < cut_coulsq)) {	// coulombic
     double *mui = atom->mu[i], *muj = atom->mu[j];
     double *xi = atom->x[i], *xj = atom->x[j];
     double qi = q[i], qj = q[j];
     double G0, G1, G2, B0, B1, B2, B3, mudi, mudj, muij;
     vector d = {xi[0]-xj[0], xi[1]-xj[1], xi[2]-xj[2]};
     {							// series real space
       register double r = sqrt(rsq);
       register double x = g_ewald*r;
       register double f = exp(-x*x)*qqrd2e;
 
       B0 = 1.0/(1.0+EWALD_P*x);			// eqn 2.8
       B0 *= ((((A5*B0+A4)*B0+A3)*B0+A2)*B0+A1)*f/r;
       B1 = (B0 + C1 * f) * r2inv;
       B2 = (3.0*B1 + C2 * f) * r2inv;
       B3 = (5.0*B2 + C3 * f) * r2inv;
 
       mudi = mui[0]*d[0]+mui[1]*d[1]+mui[2]*d[2];
       mudj = muj[0]*d[0]+muj[1]*d[1]+muj[2]*d[2];
       muij = mui[0]*muj[0]+mui[1]*muj[1]+mui[2]*muj[2];
       G0 = qi*(qj = q[j]);				// eqn 2.10
       G1 = qi*mudj-qj*mudi+muij;
       G2 = -mudi*mudj;
       force_coul = G0*B1+G1*B2+G2*B3;
 	  
       eng += G0*B0+G1*B1+G2*B2;	
       if (factor_coul < 1.0) {			      	// adj part, eqn 2.13
 	force_coul -= (f = force->qqrd2e*(1.0-factor_coul)/r)*(
 	    (3.0*G1+6.0*muij+15.0*G2*r2inv)*r2inv+G0);
 	eng -= f*((G1+3.0*G2*r2inv)*r2inv+G0);
 	B1 -= f*r2inv;
       }
       B0 = mudj*B2-qj*B1; B3 = qi*B1+mudi*B2;		// position independent
       //force_d[0] = B0*mui[0]+B3*muj[0];		// force contributions
       //force_d[1] = B0*mui[1]+B3*muj[1];
       //force_d[2] = B0*mui[2]+B3*muj[2];
     }							// table real space
   }
   else force_coul = 0.0;
 
   if (rsq < cut_ljsq[itype][jtype]) {			// lennard-jones
     r6inv = r2inv*r2inv*r2inv;
     if (ewald_order&0x40) {				// long-range
       register double x2 = g2*rsq, a2 = 1.0/x2, t = r6inv*(1.0-factor_lj);
       x2 = a2*exp(-x2)*lj4[itype][jtype];
       force_lj = factor_lj*(r6inv *= r6inv)*lj1[itype][jtype]-
        	g8*(((6.0*a2+6.0)*a2+3.0)*a2+a2)*x2*rsq+t*lj2[itype][jtype];
       eng += factor_lj*r6inv*lj3[itype][jtype]-
 	g6*((a2+1.0)*a2+0.5)*x2+t*lj4[itype][jtype];
     }
     else {						// cut
       force_lj = factor_lj*r6inv*(lj1[itype][jtype]*r6inv-lj2[itype][jtype]);
       eng += factor_lj*(r6inv*(r6inv*lj3[itype][jtype]-
 	    lj4[itype][jtype])-offset[itype][jtype]);
     }
   } 
   else force_lj = 0.0;
 
   fforce = (force_coul+force_lj)*r2inv;
   return eng;
 }
 */
 
diff --git a/src/FLD/pair_brownian.cpp b/src/FLD/pair_brownian.cpp
index 47281b4c4..3aa96288b 100755
--- a/src/FLD/pair_brownian.cpp
+++ b/src/FLD/pair_brownian.cpp
@@ -1,733 +1,733 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing authors: Amit Kumar and Michael Bybee (UIUC)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_brownian.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "domain.h"
 #include "update.h"
 #include "modify.h"
 #include "fix.h"
 #include "fix_deform.h"
 #include "fix_wall.h"
 #include "input.h"
 #include "variable.h"
 #include "random_mars.h"
 #include "math_const.h"
 #include "math_special.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 using namespace MathConst;
 using namespace MathSpecial;
 
 // same as fix_wall.cpp
 
 enum{EDGE,CONSTANT,VARIABLE};
 
 /* ---------------------------------------------------------------------- */
 
 PairBrownian::PairBrownian(LAMMPS *lmp) : Pair(lmp)
 {
   single_enable = 0;
   random = NULL;
 }
 
 /* ---------------------------------------------------------------------- */
 
 PairBrownian::~PairBrownian()
 {
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
 
     memory->destroy(cut);
     memory->destroy(cut_inner);
   }
   delete random;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBrownian::compute(int eflag, int vflag)
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,fx,fy,fz,tx,ty,tz;
   double rsq,r,h_sep,radi;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   double **x = atom->x;
   double **f = atom->f;
   double **torque = atom->torque;
   double *radius = atom->radius;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   int newton_pair = force->newton_pair;
 
   double vxmu2f = force->vxmu2f;
   double randr;
   double prethermostat;
   double xl[3],a_sq,a_sh,a_pu,Fbmag;
   double p1[3],p2[3],p3[3];
   int overlaps = 0;
 
   // This section of code adjusts R0/RT0/RS0 if necessary due to changes
   // in the volume fraction as a result of fix deform or moving walls
 
   double dims[3], wallcoord;
   if (flagVF) // Flag for volume fraction corrections
     if (flagdeform || flagwall == 2){ // Possible changes in volume fraction
       if (flagdeform && !flagwall)
         for (j = 0; j < 3; j++)
           dims[j] = domain->prd[j];
       else if (flagwall == 2 || (flagdeform && flagwall == 1)){
         double wallhi[3], walllo[3];
         for (int j = 0; j < 3; j++){
           wallhi[j] = domain->prd[j];
           walllo[j] = 0;
         }
         for (int m = 0; m < wallfix->nwall; m++){
           int dim = wallfix->wallwhich[m] / 2;
           int side = wallfix->wallwhich[m] % 2;
           if (wallfix->xstyle[m] == VARIABLE){
             wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
           }
           else wallcoord = wallfix->coord0[m];
           if (side == 0) walllo[dim] = wallcoord;
           else wallhi[dim] = wallcoord;
         }
         for (int j = 0; j < 3; j++)
           dims[j] = wallhi[j] - walllo[j];
       }
       double vol_T = dims[0]*dims[1]*dims[2];
       double vol_f = vol_P/vol_T;
       if (flaglog == 0) {
         R0  = 6*MY_PI*mu*rad*(1.0 + 2.16*vol_f);
         RT0 = 8*MY_PI*mu*cube(rad);
         //RS0 = 20.0/3.0*MY_PI*mu*pow(rad,3)*(1.0 + 3.33*vol_f + 2.80*vol_f*vol_f);
       } else {
         R0  = 6*MY_PI*mu*rad*(1.0 + 2.725*vol_f - 6.583*vol_f*vol_f);
         RT0 = 8*MY_PI*mu*cube(rad)*(1.0 + 0.749*vol_f - 2.469*vol_f*vol_f);
         //RS0 = 20.0/3.0*MY_PI*mu*pow(rad,3)*(1.0 + 3.64*vol_f - 6.95*vol_f*vol_f);
       }
     }
 
   // scale factor for Brownian moments
 
   prethermostat = sqrt(24.0*force->boltz*t_target/update->dt);
   prethermostat *= sqrt(force->vxmu2f/force->ftm2v/force->mvv2e);
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     radi = radius[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     // FLD contribution to force and torque due to isotropic terms
 
     if (flagfld) {
       f[i][0] += prethermostat*sqrt(R0)*(random->uniform()-0.5);
       f[i][1] += prethermostat*sqrt(R0)*(random->uniform()-0.5);
       f[i][2] += prethermostat*sqrt(R0)*(random->uniform()-0.5);
       if (flaglog) {
         torque[i][0] += prethermostat*sqrt(RT0)*(random->uniform()-0.5);
         torque[i][1] += prethermostat*sqrt(RT0)*(random->uniform()-0.5);
         torque[i][2] += prethermostat*sqrt(RT0)*(random->uniform()-0.5);
       }
     }
 
     if (!flagHI) continue;
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r = sqrt(rsq);
 
         // scalar resistances a_sq and a_sh
 
         h_sep = r - 2.0*radi;
 
         // check for overlaps
 
         if (h_sep < 0.0) overlaps++;
 
         // if less than minimum gap, use minimum gap instead
 
         if (r < cut_inner[itype][jtype])
           h_sep = cut_inner[itype][jtype] - 2.0*radi;
 
         // scale h_sep by radi
 
         h_sep = h_sep/radi;
 
         // scalar resistances
 
         if (flaglog) {
           a_sq = 6.0*MY_PI*mu*radi*(1.0/4.0/h_sep + 9.0/40.0*log(1.0/h_sep));
           a_sh = 6.0*MY_PI*mu*radi*(1.0/6.0*log(1.0/h_sep));
           a_pu = 8.0*MY_PI*mu*cube(radi)*(3.0/160.0*log(1.0/h_sep));
         } else
           a_sq = 6.0*MY_PI*mu*radi*(1.0/4.0/h_sep);
 
         // generate the Pairwise Brownian Force: a_sq
 
         Fbmag = prethermostat*sqrt(a_sq);
 
         // generate a random number
 
         randr = random->uniform()-0.5;
 
         // contribution due to Brownian motion
 
         fx = Fbmag*randr*delx/r;
         fy = Fbmag*randr*dely/r;
         fz = Fbmag*randr*delz/r;
 
         // add terms due to a_sh
 
         if (flaglog) {
 
           // generate two orthogonal vectors to the line of centers
 
           p1[0] = delx/r; p1[1] = dely/r; p1[2] = delz/r;
           set_3_orthogonal_vectors(p1,p2,p3);
 
           // magnitude
 
           Fbmag = prethermostat*sqrt(a_sh);
 
           // force in each of the two directions
 
           randr = random->uniform()-0.5;
           fx += Fbmag*randr*p2[0];
           fy += Fbmag*randr*p2[1];
           fz += Fbmag*randr*p2[2];
 
           randr = random->uniform()-0.5;
           fx += Fbmag*randr*p3[0];
           fy += Fbmag*randr*p3[1];
           fz += Fbmag*randr*p3[2];
         }
 
         // scale forces to appropriate units
 
         fx = vxmu2f*fx;
         fy = vxmu2f*fy;
         fz = vxmu2f*fz;
 
         // sum to total force
 
         f[i][0] -= fx;
         f[i][1] -= fy;
         f[i][2] -= fz;
 
         if (newton_pair || j < nlocal) {
           //randr = random->uniform()-0.5;
           //fx = Fbmag*randr*delx/r;
           //fy = Fbmag*randr*dely/r;
           //fz = Fbmag*randr*delz/r;
 
           f[j][0] += fx;
           f[j][1] += fy;
           f[j][2] += fz;
         }
 
         // torque due to the Brownian Force
 
         if (flaglog) {
 
           // location of the point of closest approach on I from its center
 
           xl[0] = -delx/r*radi;
           xl[1] = -dely/r*radi;
           xl[2] = -delz/r*radi;
 
           // torque = xl_cross_F
 
           tx = xl[1]*fz - xl[2]*fy;
           ty = xl[2]*fx - xl[0]*fz;
           tz = xl[0]*fy - xl[1]*fx;
 
           // torque is same on both particles
 
           torque[i][0] -= tx;
           torque[i][1] -= ty;
           torque[i][2] -= tz;
 
           if (newton_pair || j < nlocal) {
             torque[j][0] -= tx;
             torque[j][1] -= ty;
             torque[j][2] -= tz;
           }
 
           // torque due to a_pu
 
           Fbmag = prethermostat*sqrt(a_pu);
 
           // force in each direction
 
           randr = random->uniform()-0.5;
           tx = Fbmag*randr*p2[0];
           ty = Fbmag*randr*p2[1];
           tz = Fbmag*randr*p2[2];
 
           randr = random->uniform()-0.5;
           tx += Fbmag*randr*p3[0];
           ty += Fbmag*randr*p3[1];
           tz += Fbmag*randr*p3[2];
 
           // torque has opposite sign on two particles
 
           torque[i][0] -= tx;
           torque[i][1] -= ty;
           torque[i][2] -= tz;
 
           if (newton_pair || j < nlocal) {
             torque[j][0] += tx;
             torque[j][1] += ty;
             torque[j][2] += tz;
           }
         }
 
         if (evflag) ev_tally_xyz(i,j,nlocal,newton_pair,
                                  0.0,0.0,-fx,-fy,-fz,delx,dely,delz);
       }
     }
   }
 
   int print_overlaps = 0;
   if (print_overlaps && overlaps)
     printf("Number of overlaps=%d\n",overlaps);
 
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairBrownian::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
   memory->create(cut,n+1,n+1,"pair:cut");
   memory->create(cut_inner,n+1,n+1,"pair:cut_inner");
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairBrownian::settings(int narg, char **arg)
 {
   if (narg != 7 && narg != 9) error->all(FLERR,"Illegal pair_style command");
 
   mu = force->numeric(FLERR,arg[0]);
   flaglog = force->inumeric(FLERR,arg[1]);
   flagfld = force->inumeric(FLERR,arg[2]);
   cut_inner_global = force->numeric(FLERR,arg[3]);
   cut_global = force->numeric(FLERR,arg[4]);
   t_target = force->numeric(FLERR,arg[5]);
   seed = force->inumeric(FLERR,arg[6]);
 
   flagHI = flagVF = 1;
   if (narg == 9) {
     flagHI = force->inumeric(FLERR,arg[7]);
     flagVF = force->inumeric(FLERR,arg[8]);
   }
 
   if (flaglog == 1 && flagHI == 0) {
     error->warning(FLERR,"Cannot include log terms without 1/r terms; "
                    "setting flagHI to 1");
     flagHI = 1;
   }
 
   // initialize Marsaglia RNG with processor-unique seed
 
   delete random;
   random = new RanMars(lmp,seed + comm->me);
 
   // reset cutoffs that have been explicitly set
 
   if (allocated) {
     for (int i = 1; i <= atom->ntypes; i++)
       for (int j = i+1; j <= atom->ntypes; j++)
         if (setflag[i][j]) {
           cut_inner[i][j] = cut_inner_global;
           cut[i][j] = cut_global;
         }
   }
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairBrownian::coeff(int narg, char **arg)
 {
   if (narg != 2 && narg != 4)
     error->all(FLERR,"Incorrect args for pair coefficients");
 
   if (!allocated) allocate();
 
   int ilo,ihi,jlo,jhi;
   force->bounds(arg[0],atom->ntypes,ilo,ihi);
   force->bounds(arg[1],atom->ntypes,jlo,jhi);
 
   double cut_inner_one = cut_inner_global;
   double cut_one = cut_global;
 
   if (narg == 4) {
     cut_inner_one = force->numeric(FLERR,arg[2]);
     cut_one = force->numeric(FLERR,arg[3]);
   }
 
   int count = 0;
   for (int i = ilo; i <= ihi; i++)
     for (int j = MAX(jlo,i); j <= jhi; j++) {
       cut_inner[i][j] = cut_inner_one;
       cut[i][j] = cut_one;
       setflag[i][j] = 1;
       count++;
     }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairBrownian::init_style()
 {
   if (!atom->sphere_flag)
     error->all(FLERR,"Pair brownian requires atom style sphere");
 
   // if newton off, forces between atoms ij will be double computed
   // using different random numbers
 
   if (force->newton_pair == 0 && comm->me == 0)
     error->warning(FLERR,
                    "Pair brownian needs newton pair on for "
                    "momentum conservation");
 
-  neighbor->request(this);
+  neighbor->request(this,instance_me);
 
   // insure all particles are finite-size
   // for pair hybrid, should limit test to types using the pair style
 
   double *radius = atom->radius;
   int nlocal = atom->nlocal;
 
   for (int i = 0; i < nlocal; i++)
     if (radius[i] == 0.0)
       error->one(FLERR,"Pair brownian requires extended particles");
 
   // require monodisperse system with same radii for all types
 
   double radtype;
   for (int i = 1; i <= atom->ntypes; i++) {
     if (!atom->radius_consistency(i,radtype))
       error->all(FLERR,"Pair brownian requires monodisperse particles");
     if (i > 1 && radtype != rad)
       error->all(FLERR,"Pair brownian requires monodisperse particles");
     rad = radtype;
   }
 
   // set the isotropic constants that depend on the volume fraction
   // vol_T = total volume
   // check for fix deform, if exists it must use "remap v"
   // If box will change volume, set appropriate flag so that volume
   // and v.f. corrections are re-calculated at every step.
   //
   // If available volume is different from box volume
   // due to walls, set volume appropriately; if walls will
   // move, set appropriate flag so that volume and v.f. corrections
   // are re-calculated at every step.
 
   flagdeform = flagwall = 0;
   for (int i = 0; i < modify->nfix; i++){
     if (strcmp(modify->fix[i]->style,"deform") == 0)
       flagdeform = 1;
     else if (strstr(modify->fix[i]->style,"wall") != NULL) {
       if (flagwall) 
         error->all(FLERR,
                    "Cannot use multiple fix wall commands with pair brownian");
       flagwall = 1; // Walls exist
       wallfix = (FixWall *) modify->fix[i];
       if (wallfix->xflag) flagwall = 2; // Moving walls exist
     }
   }
 
   // set the isotropic constants depending on the volume fraction
   // vol_T = total volumeshearing = flagdeform = flagwall = 0;
 
   double vol_T, wallcoord;
   if (!flagwall) vol_T = domain->xprd*domain->yprd*domain->zprd;
   else {
     double wallhi[3], walllo[3];
     for (int j = 0; j < 3; j++){
       wallhi[j] = domain->prd[j];
       walllo[j] = 0;
     }
     for (int m = 0; m < wallfix->nwall; m++){
       int dim = wallfix->wallwhich[m] / 2;
       int side = wallfix->wallwhich[m] % 2;
       if (wallfix->xstyle[m] == VARIABLE){
         wallfix->xindex[m] = input->variable->find(wallfix->xstr[m]);
         // Since fix->wall->init happens after pair->init_style
         wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
       }
 
       else wallcoord = wallfix->coord0[m];
 
       if (side == 0) walllo[dim] = wallcoord;
       else wallhi[dim] = wallcoord;
     }
     vol_T = (wallhi[0] - walllo[0]) * (wallhi[1] - walllo[1]) *
       (wallhi[2] - walllo[2]);
   }
 
   // vol_P = volume of particles, assuming mono-dispersity
   // vol_f = volume fraction
 
   vol_P = atom->natoms*(4.0/3.0)*MY_PI*cube(rad);
 
   double vol_f = vol_P/vol_T;
 
   // set isotropic constants
   if (!flagVF) vol_f = 0;
 
   if (flaglog == 0) {
     R0  = 6*MY_PI*mu*rad*(1.0 + 2.16*vol_f);
     RT0 = 8*MY_PI*mu*cube(rad);  // not actually needed
   } else {
     R0  = 6*MY_PI*mu*rad*(1.0 + 2.725*vol_f - 6.583*vol_f*vol_f);
     RT0 = 8*MY_PI*mu*cube(rad)*(1.0 + 0.749*vol_f - 2.469*vol_f*vol_f);
   }
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairBrownian::init_one(int i, int j)
 {
   if (setflag[i][j] == 0) {
     cut_inner[i][j] = mix_distance(cut_inner[i][i],cut_inner[j][j]);
     cut[i][j] = mix_distance(cut[i][i],cut[j][j]);
   }
 
   cut_inner[j][i] = cut_inner[i][j];
 
   return cut[i][j];
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairBrownian::write_restart(FILE *fp)
 {
   write_restart_settings(fp);
 
   int i,j;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       fwrite(&setflag[i][j],sizeof(int),1,fp);
       if (setflag[i][j]) {
         fwrite(&cut_inner[i][j],sizeof(double),1,fp);
         fwrite(&cut[i][j],sizeof(double),1,fp);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
    proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairBrownian::read_restart(FILE *fp)
 {
   read_restart_settings(fp);
   allocate();
 
   int i,j;
   int me = comm->me;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
       MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
       if (setflag[i][j]) {
         if (me == 0) {
           fread(&cut_inner[i][j],sizeof(double),1,fp);
           fread(&cut[i][j],sizeof(double),1,fp);
         }
         MPI_Bcast(&cut_inner[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&cut[i][j],1,MPI_DOUBLE,0,world);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairBrownian::write_restart_settings(FILE *fp)
 {
   fwrite(&mu,sizeof(double),1,fp);
   fwrite(&flaglog,sizeof(int),1,fp);
   fwrite(&flagfld,sizeof(int),1,fp);
   fwrite(&cut_inner_global,sizeof(double),1,fp);
   fwrite(&cut_global,sizeof(double),1,fp);
   fwrite(&t_target,sizeof(double),1,fp);
   fwrite(&seed,sizeof(int),1,fp);
   fwrite(&offset_flag,sizeof(int),1,fp);
   fwrite(&mix_flag,sizeof(int),1,fp);
   fwrite(&flagHI,sizeof(int),1,fp);
   fwrite(&flagVF,sizeof(int),1,fp);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairBrownian::read_restart_settings(FILE *fp)
 {
   int me = comm->me;
   if (me == 0) {
     fread(&mu,sizeof(double),1,fp);
     fread(&flaglog,sizeof(int),1,fp);
     fread(&flagfld,sizeof(int),1,fp);
     fread(&cut_inner_global,sizeof(double),1,fp);
     fread(&cut_global,sizeof(double),1,fp);
     fread(&t_target, sizeof(double),1,fp);
     fread(&seed, sizeof(int),1,fp);
     fread(&offset_flag,sizeof(int),1,fp);
     fread(&mix_flag,sizeof(int),1,fp);
     fread(&flagHI,sizeof(int),1,fp);
     fread(&flagVF,sizeof(int),1,fp);
   }
   MPI_Bcast(&mu,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&flaglog,1,MPI_INT,0,world);
   MPI_Bcast(&flagfld,1,MPI_INT,0,world);
   MPI_Bcast(&cut_inner_global,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&cut_global,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&t_target,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&seed,1,MPI_INT,0,world);
   MPI_Bcast(&offset_flag,1,MPI_INT,0,world);
   MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
   MPI_Bcast(&flagHI,1,MPI_INT,0,world);
   MPI_Bcast(&flagVF,1,MPI_INT,0,world);
 
   // additional setup based on restart parameters
 
   delete random;
   random = new RanMars(lmp,seed + comm->me);
 }
 
 /* ----------------------------------------------------------------------*/
 
 void PairBrownian::set_3_orthogonal_vectors(double p1[3],
                                             double p2[3], double p3[3])
 {
   double norm;
   int ix,iy,iz;
 
   // find the index of maximum magnitude and store it in iz
 
   if (fabs(p1[0]) > fabs(p1[1])) {
     iz=0;
     ix=1;
     iy=2;
   } else {
     iz=1;
     ix=2;
     iy=0;
   }
 
   if (iz==0) {
     if (fabs(p1[0]) < fabs(p1[2])) {
       iz = 2;
       ix = 0;
       iy = 1;
     }
   } else {
     if (fabs(p1[1]) < fabs(p1[2])) {
       iz = 2;
       ix = 0;
       iy = 1;
     }
   }
 
   // set p2 arbitrarily such that it's orthogonal to p1
 
   p2[ix]=1.0;
   p2[iy]=1.0;
   p2[iz] = -(p1[ix]*p2[ix] + p1[iy]*p2[iy])/p1[iz];
 
   // normalize p2
 
   norm = sqrt(p2[0]*p2[0] + p2[1]*p2[1] + p2[2]*p2[2]);
 
   p2[0] = p2[0]/norm;
   p2[1] = p2[1]/norm;
   p2[2] = p2[2]/norm;
 
   // Set p3 by taking the cross product p3=p2xp1
 
   p3[0] = p1[1]*p2[2] - p1[2]*p2[1];
   p3[1] = p1[2]*p2[0] - p1[0]*p2[2];
   p3[2] = p1[0]*p2[1] - p1[1]*p2[0];
 }
diff --git a/src/FLD/pair_brownian_poly.cpp b/src/FLD/pair_brownian_poly.cpp
index 6c18f87f0..c0a958ea7 100644
--- a/src/FLD/pair_brownian_poly.cpp
+++ b/src/FLD/pair_brownian_poly.cpp
@@ -1,441 +1,441 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing authors: Amit Kumar and Michael Bybee (UIUC)
                          Dave Heine (Corning), polydispersity
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_brownian_poly.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "domain.h"
 #include "update.h"
 #include "modify.h"
 #include "fix.h"
 #include "fix_deform.h"
 #include "fix_wall.h"
 #include "input.h"
 #include "variable.h"
 #include "random_mars.h"
 #include "math_const.h"
 #include "math_special.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 using namespace MathConst;
 using namespace MathSpecial;
 
 // same as fix_wall.cpp
 
 enum{EDGE,CONSTANT,VARIABLE};
 
 /* ---------------------------------------------------------------------- */
 
 PairBrownianPoly::PairBrownianPoly(LAMMPS *lmp) : PairBrownian(lmp)
 {
   no_virial_fdotr_compute = 1;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBrownianPoly::compute(int eflag, int vflag)
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,fx,fy,fz,tx,ty,tz;
   double rsq,r,h_sep,beta0,beta1,radi,radj;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   double **x = atom->x;
   double **f = atom->f;
   double **torque = atom->torque;
   double *radius = atom->radius;
   int *type = atom->type;
   int nlocal = atom->nlocal;
 
   double vxmu2f = force->vxmu2f;
   int overlaps = 0;
   double randr;
   double prethermostat;
   double xl[3],a_sq,a_sh,a_pu,Fbmag;
   double p1[3],p2[3],p3[3];
 
   // this section of code adjusts R0/RT0/RS0 if necessary due to changes
   // in the volume fraction as a result of fix deform or moving walls
 
   double dims[3], wallcoord;
   if (flagVF) // Flag for volume fraction corrections
     if (flagdeform || flagwall == 2){ // Possible changes in volume fraction
       if (flagdeform && !flagwall)
         for (j = 0; j < 3; j++)
           dims[j] = domain->prd[j];
       else if (flagwall == 2 || (flagdeform && flagwall == 1)){
         double wallhi[3], walllo[3];
         for (j = 0; j < 3; j++){
           wallhi[j] = domain->prd[j];
           walllo[j] = 0;
         }
         for (int m = 0; m < wallfix->nwall; m++){
           int dim = wallfix->wallwhich[m] / 2;
           int side = wallfix->wallwhich[m] % 2;
           if (wallfix->xstyle[m] == VARIABLE){
             wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
           }
           else wallcoord = wallfix->coord0[m];
           if (side == 0) walllo[dim] = wallcoord;
           else wallhi[dim] = wallcoord;
         }
         for (j = 0; j < 3; j++)
           dims[j] = wallhi[j] - walllo[j];
       }
       double vol_T = dims[0]*dims[1]*dims[2];
       double vol_f = vol_P/vol_T;
       if (flaglog == 0) {
         R0  = 6*MY_PI*mu*rad*(1.0 + 2.16*vol_f);
         RT0 = 8*MY_PI*mu*cube(rad);
         //RS0 = 20.0/3.0*MY_PI*mu*pow(rad,3)*(1.0 + 3.33*vol_f + 2.80*vol_f*vol_f);
       } else {
         R0  = 6*MY_PI*mu*rad*(1.0 + 2.725*vol_f - 6.583*vol_f*vol_f);
         RT0 = 8*MY_PI*mu*cube(rad)*(1.0 + 0.749*vol_f - 2.469*vol_f*vol_f);
         //RS0 = 20.0/3.0*MY_PI*mu*pow(rad,3)*(1.0 + 3.64*vol_f - 6.95*vol_f*vol_f);
       }
     }
   // scale factor for Brownian moments
 
   prethermostat = sqrt(24.0*force->boltz*t_target/update->dt);
   prethermostat *= sqrt(force->vxmu2f/force->ftm2v/force->mvv2e);
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     radi = radius[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     // FLD contribution to force and torque due to isotropic terms
 
     if (flagfld) {
       f[i][0] += prethermostat*sqrt(R0*radi)*(random->uniform()-0.5);
       f[i][1] += prethermostat*sqrt(R0*radi)*(random->uniform()-0.5);
       f[i][2] += prethermostat*sqrt(R0*radi)*(random->uniform()-0.5);
       if (flaglog) {
         const double radi3 = radi*radi*radi;
         torque[i][0] += prethermostat*sqrt(RT0*radi3) *
           (random->uniform()-0.5);
         torque[i][1] += prethermostat*sqrt(RT0*radi3) *
           (random->uniform()-0.5);
         torque[i][2] += prethermostat*sqrt(RT0*radi3) *
           (random->uniform()-0.5);
       }
     }
 
     if (!flagHI) continue;
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
       radj = radius[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r = sqrt(rsq);
 
         // scalar resistances a_sq and a_sh
 
         h_sep = r - radi-radj;
 
         // check for overlaps
 
         if (h_sep < 0.0) overlaps++;
 
         // if less than minimum gap, use minimum gap instead
 
         if (r < cut_inner[itype][jtype])
           h_sep = cut_inner[itype][jtype] - radi-radj;
 
         // scale h_sep by radi
 
         h_sep = h_sep/radi;
         beta0 = radj/radi;
         beta1 = 1.0 + beta0;
 
         // scalar resistances
 
         if (flaglog) {
           a_sq = beta0*beta0/beta1/beta1/h_sep +
             (1.0+7.0*beta0+beta0*beta0)/5.0/cube(beta1)*log(1.0/h_sep);
           a_sq += (1.0+18.0*beta0-29.0*beta0*beta0+18.0*cube(beta0) +
                    powint(beta0,4))/21.0/powint(beta1,4)*h_sep*log(1.0/h_sep);
           a_sq *= 6.0*MY_PI*mu*radi;
           a_sh = 4.0*beta0*(2.0+beta0+2.0*beta0*beta0)/15.0/cube(beta1) *
             log(1.0/h_sep);
           a_sh += 4.0*(16.0-45.0*beta0+58.0*beta0*beta0-45.0*cube(beta0) +
                        16.0*powint(beta0,4))/375.0/powint(beta1,4) *
             h_sep*log(1.0/h_sep);
           a_sh *= 6.0*MY_PI*mu*radi;
           a_pu = beta0*(4.0+beta0)/10.0/beta1/beta1*log(1.0/h_sep);
           a_pu += (32.0-33.0*beta0+83.0*beta0*beta0+43.0 *
                    cube(beta0))/250.0/cube(beta1)*h_sep*log(1.0/h_sep);
           a_pu *= 8.0*MY_PI*mu*cube(radi);
 
         } else a_sq = 6.0*MY_PI*mu*radi*(beta0*beta0/beta1/beta1/h_sep);
 
         // generate the Pairwise Brownian Force: a_sq
 
         Fbmag = prethermostat*sqrt(a_sq);
 
         // generate a random number
 
         randr = random->uniform()-0.5;
 
         // contribution due to Brownian motion
 
         fx = Fbmag*randr*delx/r;
         fy = Fbmag*randr*dely/r;
         fz = Fbmag*randr*delz/r;
 
         // add terms due to a_sh
 
         if (flaglog) {
 
           // generate two orthogonal vectors to the line of centers
 
           p1[0] = delx/r; p1[1] = dely/r; p1[2] = delz/r;
           set_3_orthogonal_vectors(p1,p2,p3);
 
           // magnitude
 
           Fbmag = prethermostat*sqrt(a_sh);
 
           // force in each of the two directions
 
           randr = random->uniform()-0.5;
           fx += Fbmag*randr*p2[0];
           fy += Fbmag*randr*p2[1];
           fz += Fbmag*randr*p2[2];
 
           randr = random->uniform()-0.5;
           fx += Fbmag*randr*p3[0];
           fy += Fbmag*randr*p3[1];
           fz += Fbmag*randr*p3[2];
         }
 
         // scale forces to appropriate units
 
         fx = vxmu2f*fx;
         fy = vxmu2f*fy;
         fz = vxmu2f*fz;
 
         // sum to total Force
 
         f[i][0] -= fx;
         f[i][1] -= fy;
         f[i][2] -= fz;
 
         // torque due to the Brownian Force
 
         if (flaglog) {
 
           // location of the point of closest approach on I from its center
 
           xl[0] = -delx/r*radi;
           xl[1] = -dely/r*radi;
           xl[2] = -delz/r*radi;
 
           // torque = xl_cross_F
 
           tx = xl[1]*fz - xl[2]*fy;
           ty = xl[2]*fx - xl[0]*fz;
           tz = xl[0]*fy - xl[1]*fx;
 
           // torque is same on both particles
 
           torque[i][0] -= tx;
           torque[i][1] -= ty;
           torque[i][2] -= tz;
 
           // torque due to a_pu
 
           Fbmag = prethermostat*sqrt(a_pu);
 
           // force in each direction
 
           randr = random->uniform()-0.5;
           tx = Fbmag*randr*p2[0];
           ty = Fbmag*randr*p2[1];
           tz = Fbmag*randr*p2[2];
 
           randr = random->uniform()-0.5;
           tx += Fbmag*randr*p3[0];
           ty += Fbmag*randr*p3[1];
           tz += Fbmag*randr*p3[2];
 
           // torque has opposite sign on two particles
 
           torque[i][0] -= tx;
           torque[i][1] -= ty;
           torque[i][2] -= tz;
 
         }
 
         // set j = nlocal so that only I gets tallied
 
         if (evflag) ev_tally_xyz(i,nlocal,nlocal,0,
                                  0.0,0.0,-fx,-fy,-fz,delx,dely,delz);
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairBrownianPoly::init_style()
 {
   if (force->newton_pair == 1)
     error->all(FLERR,"Pair brownian/poly requires newton pair off");
   if (!atom->sphere_flag)
     error->all(FLERR,"Pair brownian/poly requires atom style sphere");
 
   // insure all particles are finite-size
   // for pair hybrid, should limit test to types using the pair style
 
   double *radius = atom->radius;
   int nlocal = atom->nlocal;
 
   for (int i = 0; i < nlocal; i++)
     if (radius[i] == 0.0)
       error->one(FLERR,"Pair brownian/poly requires extended particles");
 
-  int irequest = neighbor->request(this);
+  int irequest = neighbor->request(this,instance_me);
   neighbor->requests[irequest]->half = 0;
   neighbor->requests[irequest]->full = 1;
 
   // set the isotropic constants that depend on the volume fraction
   // vol_T = total volume
   // check for fix deform, if exists it must use "remap v"
   // If box will change volume, set appropriate flag so that volume
   // and v.f. corrections are re-calculated at every step.
   //
   // If available volume is different from box volume
   // due to walls, set volume appropriately; if walls will
   // move, set appropriate flag so that volume and v.f. corrections
   // are re-calculated at every step.
 
   flagdeform = flagwall = 0;
   for (int i = 0; i < modify->nfix; i++){
     if (strcmp(modify->fix[i]->style,"deform") == 0)
       flagdeform = 1;
     else if (strstr(modify->fix[i]->style,"wall") != NULL) {
       if (flagwall) 
         error->all(FLERR,
                    "Cannot use multiple fix wall commands with pair brownian");
       flagwall = 1; // Walls exist
       wallfix = (FixWall *) modify->fix[i];
       if (wallfix->xflag) flagwall = 2; // Moving walls exist
     }
   }
 
   // set the isotropic constants that depend on the volume fraction
   // vol_T = total volume
 
   double vol_T, wallcoord;
   if (!flagwall) vol_T = domain->xprd*domain->yprd*domain->zprd;
   else {
     double wallhi[3], walllo[3];
     for (int j = 0; j < 3; j++){
       wallhi[j] = domain->prd[j];
       walllo[j] = 0;
     }
     for (int m = 0; m < wallfix->nwall; m++){
       int dim = wallfix->wallwhich[m] / 2;
       int side = wallfix->wallwhich[m] % 2;
       if (wallfix->xstyle[m] == VARIABLE){
         wallfix->xindex[m] = input->variable->find(wallfix->xstr[m]);
         // Since fix->wall->init happens after pair->init_style
         wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
       }
 
       else wallcoord = wallfix->coord0[m];
 
       if (side == 0) walllo[dim] = wallcoord;
       else wallhi[dim] = wallcoord;
     }
     vol_T = (wallhi[0] - walllo[0]) * (wallhi[1] - walllo[1]) *
       (wallhi[2] - walllo[2]);
   }
 
   // vol_P = volume of particles, assuming mono-dispersity
   // vol_f = volume fraction
 
   double volP = 0.0;
   for (int i = 0; i < nlocal; i++)
     volP += (4.0/3.0)*MY_PI*pow(atom->radius[i],3.0);
   MPI_Allreduce(&volP,&vol_P,1,MPI_DOUBLE,MPI_SUM,world);
 
   double vol_f = vol_P/vol_T;
 
   if (!flagVF) vol_f = 0;
   // set isotropic constants
 
   if (flaglog == 0) {
     R0  = 6*MY_PI*mu*(1.0 + 2.16*vol_f);
     RT0 = 8*MY_PI*mu;
   } else {
     R0  = 6*MY_PI*mu*(1.0 + 2.725*vol_f - 6.583*vol_f*vol_f);
     RT0 = 8*MY_PI*mu*(1.0 + 0.749*vol_f - 2.469*vol_f*vol_f);
   }
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairBrownianPoly::init_one(int i, int j)
 {
   if (setflag[i][j] == 0) {
     cut_inner[i][j] = mix_distance(cut_inner[i][i],cut_inner[j][j]);
     cut[i][j] = mix_distance(cut[i][i],cut[j][j]);
   }
 
   cut_inner[j][i] = cut_inner[i][j];
   return cut[i][j];
 }
diff --git a/src/FLD/pair_lubricate.cpp b/src/FLD/pair_lubricate.cpp
index 7feb915e6..79046e445 100755
--- a/src/FLD/pair_lubricate.cpp
+++ b/src/FLD/pair_lubricate.cpp
@@ -1,820 +1,820 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing authors: Randy Schunk (SNL)
                          Amit Kumar and Michael Bybee (UIUC)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_lubricate.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "domain.h"
 #include "modify.h"
 #include "fix.h"
 #include "fix_deform.h"
 #include "fix_wall.h"
 #include "input.h"
 #include "variable.h"
 #include "random_mars.h"
 #include "math_const.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
 // same as fix_deform.cpp
 
 enum{NO_REMAP,X_REMAP,V_REMAP};
 
 // same as fix_wall.cpp
 
 enum{EDGE,CONSTANT,VARIABLE};
 
 /* ---------------------------------------------------------------------- */
 
 PairLubricate::PairLubricate(LAMMPS *lmp) : Pair(lmp)
 {
   single_enable = 0;
 
   // set comm size needed by this Pair
 
   comm_forward = 6;
 }
 
 /* ---------------------------------------------------------------------- */
 
 PairLubricate::~PairLubricate()
 {
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
 
     memory->destroy(cut);
     memory->destroy(cut_inner);
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLubricate::compute(int eflag, int vflag)
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,fx,fy,fz,tx,ty,tz;
   double rsq,r,h_sep,radi;
   double vr1,vr2,vr3,vnnr,vn1,vn2,vn3;
   double vt1,vt2,vt3,wt1,wt2,wt3,wdotn;
   double vRS0;
   double vi[3],vj[3],wi[3],wj[3],xl[3];
   double a_sq,a_sh,a_pu;
   int *ilist,*jlist,*numneigh,**firstneigh;
   double lamda[3],vstream[3];
 
   double vxmu2f = force->vxmu2f;
 
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   double **x = atom->x;
   double **v = atom->v;
   double **f = atom->f;
   double **omega = atom->omega;
   double **torque = atom->torque;
   double *radius = atom->radius;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   int newton_pair = force->newton_pair;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // subtract streaming component of velocity, omega, angmom
   // assume fluid streaming velocity = box deformation rate
   // vstream = (ux,uy,uz)
   // ux = h_rate[0]*x + h_rate[5]*y + h_rate[4]*z
   // uy = h_rate[1]*y + h_rate[3]*z
   // uz = h_rate[2]*z
   // omega_new = omega - curl(vstream)/2
   // angmom_new = angmom - I*curl(vstream)/2
   // Ef = (grad(vstream) + (grad(vstream))^T) / 2
 
   if (shearing) {
     double *h_rate = domain->h_rate;
     double *h_ratelo = domain->h_ratelo;
 
     for (ii = 0; ii < inum; ii++) {
       i = ilist[ii];
       itype = type[i];
       radi = radius[i];
 
       domain->x2lamda(x[i],lamda);
       vstream[0] = h_rate[0]*lamda[0] + h_rate[5]*lamda[1] +
         h_rate[4]*lamda[2] + h_ratelo[0];
       vstream[1] = h_rate[1]*lamda[1] + h_rate[3]*lamda[2] + h_ratelo[1];
       vstream[2] = h_rate[2]*lamda[2] + h_ratelo[2];
       v[i][0] -= vstream[0];
       v[i][1] -= vstream[1];
       v[i][2] -= vstream[2];
 
       omega[i][0] += 0.5*h_rate[3];
       omega[i][1] -= 0.5*h_rate[4];
       omega[i][2] += 0.5*h_rate[5];
     }
 
     // set Ef from h_rate in strain units
 
     Ef[0][0] = h_rate[0]/domain->xprd;
     Ef[1][1] = h_rate[1]/domain->yprd;
     Ef[2][2] = h_rate[2]/domain->zprd;
     Ef[0][1] = Ef[1][0] = 0.5 * h_rate[5]/domain->yprd;
     Ef[0][2] = Ef[2][0] = 0.5 * h_rate[4]/domain->zprd;
     Ef[1][2] = Ef[2][1] = 0.5 * h_rate[3]/domain->zprd;
 
     // copy updated velocity/omega/angmom to the ghost particles
     // no need to do this if not shearing since comm->ghost_velocity is set
 
     comm->forward_comm_pair(this);
   }
 
   // This section of code adjusts R0/RT0/RS0 if necessary due to changes
   // in the volume fraction as a result of fix deform or moving walls
 
   double dims[3], wallcoord;
   if (flagVF) // Flag for volume fraction corrections
     if (flagdeform || flagwall == 2){ // Possible changes in volume fraction
       if (flagdeform && !flagwall)
         for (j = 0; j < 3; j++)
           dims[j] = domain->prd[j];
       else if (flagwall == 2 || (flagdeform && flagwall == 1)){
          double wallhi[3], walllo[3];
          for (int j = 0; j < 3; j++){
            wallhi[j] = domain->prd[j];
            walllo[j] = 0;
          }
          for (int m = 0; m < wallfix->nwall; m++){
            int dim = wallfix->wallwhich[m] / 2;
            int side = wallfix->wallwhich[m] % 2;
            if (wallfix->xstyle[m] == VARIABLE){
              wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
            }
            else wallcoord = wallfix->coord0[m];
            if (side == 0) walllo[dim] = wallcoord;
            else wallhi[dim] = wallcoord;
          }
          for (int j = 0; j < 3; j++)
            dims[j] = wallhi[j] - walllo[j];
       }
       double vol_T = dims[0]*dims[1]*dims[2];
       double vol_f = vol_P/vol_T;
       if (flaglog == 0) {
         R0  = 6*MY_PI*mu*rad*(1.0 + 2.16*vol_f);
         RT0 = 8*MY_PI*mu*pow(rad,3.0);
         RS0 = 20.0/3.0*MY_PI*mu*pow(rad,3.0)*
           (1.0 + 3.33*vol_f + 2.80*vol_f*vol_f);
       } else {
         R0  = 6*MY_PI*mu*rad*(1.0 + 2.725*vol_f - 6.583*vol_f*vol_f);
         RT0 = 8*MY_PI*mu*pow(rad,3.0)*(1.0 + 0.749*vol_f - 2.469*vol_f*vol_f);
         RS0 = 20.0/3.0*MY_PI*mu*pow(rad,3.0)*
           (1.0 + 3.64*vol_f - 6.95*vol_f*vol_f);
       }
     }
 
   // end of R0 adjustment code
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     radi = radius[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     // angular velocity
 
     wi[0] = omega[i][0];
     wi[1] = omega[i][1];
     wi[2] = omega[i][2];
 
     // FLD contribution to force and torque due to isotropic terms
     // FLD contribution to stress from isotropic RS0
 
     if (flagfld) {
       f[i][0] -= vxmu2f*R0*v[i][0];
       f[i][1] -= vxmu2f*R0*v[i][1];
       f[i][2] -= vxmu2f*R0*v[i][2];
       torque[i][0] -= vxmu2f*RT0*wi[0];
       torque[i][1] -= vxmu2f*RT0*wi[1];
       torque[i][2] -= vxmu2f*RT0*wi[2];
 
       if (shearing && vflag_either) {
         vRS0 = -vxmu2f * RS0;
         v_tally_tensor(i,i,nlocal,newton_pair,
                        vRS0*Ef[0][0],vRS0*Ef[1][1],vRS0*Ef[2][2],
                        vRS0*Ef[0][1],vRS0*Ef[0][2],vRS0*Ef[1][2]);
       }
     }
 
     if (!flagHI) continue;
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r = sqrt(rsq);
 
         // angular momentum = I*omega = 2/5 * M*R^2 * omega
 
         wj[0] = omega[j][0];
         wj[1] = omega[j][1];
         wj[2] = omega[j][2];
 
         // xl = point of closest approach on particle i from its center
 
         xl[0] = -delx/r*radi;
         xl[1] = -dely/r*radi;
         xl[2] = -delz/r*radi;
 
         // velocity at the point of closest approach on both particles
         // v = v + omega_cross_xl - Ef.xl
 
         // particle i
 
         vi[0] = v[i][0] + (wi[1]*xl[2] - wi[2]*xl[1])
                         - (Ef[0][0]*xl[0] + Ef[0][1]*xl[1] + Ef[0][2]*xl[2]);
 
         vi[1] = v[i][1] + (wi[2]*xl[0] - wi[0]*xl[2])
                         - (Ef[1][0]*xl[0] + Ef[1][1]*xl[1] + Ef[1][2]*xl[2]);
 
         vi[2] = v[i][2] + (wi[0]*xl[1] - wi[1]*xl[0])
                         - (Ef[2][0]*xl[0] + Ef[2][1]*xl[1] + Ef[2][2]*xl[2]);
 
         // particle j
 
         vj[0] = v[j][0] - (wj[1]*xl[2] - wj[2]*xl[1])
                         + (Ef[0][0]*xl[0] + Ef[0][1]*xl[1] + Ef[0][2]*xl[2]);
 
         vj[1] = v[j][1] - (wj[2]*xl[0] - wj[0]*xl[2])
                         + (Ef[1][0]*xl[0] + Ef[1][1]*xl[1] + Ef[1][2]*xl[2]);
 
         vj[2] = v[j][2] - (wj[0]*xl[1] - wj[1]*xl[0])
                         + (Ef[2][0]*xl[0] + Ef[2][1]*xl[1] + Ef[2][2]*xl[2]);
 
         // scalar resistances XA and YA
 
         h_sep = r - 2.0*radi;
 
         // if less than the minimum gap use the minimum gap instead
 
         if (r < cut_inner[itype][jtype])
           h_sep = cut_inner[itype][jtype] - 2.0*radi;
 
         // scale h_sep by radi
 
         h_sep = h_sep/radi;
 
         // scalar resistances
 
         if (flaglog) {
           a_sq = 6.0*MY_PI*mu*radi*(1.0/4.0/h_sep + 9.0/40.0*log(1.0/h_sep));
           a_sh = 6.0*MY_PI*mu*radi*(1.0/6.0*log(1.0/h_sep));
           a_pu = 8.0*MY_PI*mu*pow(radi,3.0)*(3.0/160.0*log(1.0/h_sep));
         } else
           a_sq = 6.0*MY_PI*mu*radi*(1.0/4.0/h_sep);
 
         // relative velocity at the point of closest approach
         // includes fluid velocity
 
         vr1 = vi[0] - vj[0];
         vr2 = vi[1] - vj[1];
         vr3 = vi[2] - vj[2];
 
         // normal component (vr.n)n
 
         vnnr = (vr1*delx + vr2*dely + vr3*delz)/r;
         vn1 = vnnr*delx/r;
         vn2 = vnnr*dely/r;
         vn3 = vnnr*delz/r;
 
         // tangential component vr - (vr.n)n
 
         vt1 = vr1 - vn1;
         vt2 = vr2 - vn2;
         vt3 = vr3 - vn3;
 
         // force due to squeeze type motion
 
         fx  = a_sq*vn1;
         fy  = a_sq*vn2;
         fz  = a_sq*vn3;
 
         // force due to all shear kind of motions
 
         if (flaglog) {
           fx = fx + a_sh*vt1;
           fy = fy + a_sh*vt2;
           fz = fz + a_sh*vt3;
         }
 
         // scale forces for appropriate units
 
         fx *= vxmu2f;
         fy *= vxmu2f;
         fz *= vxmu2f;
 
         // add to total force
 
         f[i][0] -= fx;
         f[i][1] -= fy;
         f[i][2] -= fz;
 
         if (newton_pair || j < nlocal) {
           f[j][0] += fx;
           f[j][1] += fy;
           f[j][2] += fz;
         }
 
         // torque due to this force
 
         if (flaglog) {
           tx = xl[1]*fz - xl[2]*fy;
           ty = xl[2]*fx - xl[0]*fz;
           tz = xl[0]*fy - xl[1]*fx;
 
           torque[i][0] -= vxmu2f*tx;
           torque[i][1] -= vxmu2f*ty;
           torque[i][2] -= vxmu2f*tz;
 
           if (newton_pair || j < nlocal) {
             torque[j][0] -= vxmu2f*tx;
             torque[j][1] -= vxmu2f*ty;
             torque[j][2] -= vxmu2f*tz;
           }
 
           // torque due to a_pu
 
           wdotn = ((wi[0]-wj[0])*delx + (wi[1]-wj[1])*dely +
                    (wi[2]-wj[2])*delz)/r;
           wt1 = (wi[0]-wj[0]) - wdotn*delx/r;
           wt2 = (wi[1]-wj[1]) - wdotn*dely/r;
           wt3 = (wi[2]-wj[2]) - wdotn*delz/r;
 
           tx = a_pu*wt1;
           ty = a_pu*wt2;
           tz = a_pu*wt3;
 
           torque[i][0] -= vxmu2f*tx;
           torque[i][1] -= vxmu2f*ty;
           torque[i][2] -= vxmu2f*tz;
 
           if (newton_pair || j < nlocal) {
             torque[j][0] += vxmu2f*tx;
             torque[j][1] += vxmu2f*ty;
             torque[j][2] += vxmu2f*tz;
           }
         }
 
         if (evflag) ev_tally_xyz(i,j,nlocal,newton_pair,
                                  0.0,0.0,-fx,-fy,-fz,delx,dely,delz);
       }
     }
   }
 
   // restore streaming component of velocity, omega, angmom
 
   if (shearing) {
     double *h_rate = domain->h_rate;
     double *h_ratelo = domain->h_ratelo;
 
     for (ii = 0; ii < inum; ii++) {
       i = ilist[ii];
       itype = type[i];
       radi = radius[i];
 
       domain->x2lamda(x[i],lamda);
       vstream[0] = h_rate[0]*lamda[0] + h_rate[5]*lamda[1] +
         h_rate[4]*lamda[2] + h_ratelo[0];
       vstream[1] = h_rate[1]*lamda[1] + h_rate[3]*lamda[2] + h_ratelo[1];
       vstream[2] = h_rate[2]*lamda[2] + h_ratelo[2];
       v[i][0] += vstream[0];
       v[i][1] += vstream[1];
       v[i][2] += vstream[2];
 
       omega[i][0] -= 0.5*h_rate[3];
       omega[i][1] += 0.5*h_rate[4];
       omega[i][2] -= 0.5*h_rate[5];
     }
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairLubricate::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
   memory->create(cut,n+1,n+1,"pair:cut");
   memory->create(cut_inner,n+1,n+1,"pair:cut_inner");
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairLubricate::settings(int narg, char **arg)
 {
   if (narg != 5 && narg != 7) error->all(FLERR,"Illegal pair_style command");
 
   mu = force->numeric(FLERR,arg[0]);
   flaglog = force->inumeric(FLERR,arg[1]);
   flagfld = force->inumeric(FLERR,arg[2]);
   cut_inner_global = force->numeric(FLERR,arg[3]);
   cut_global = force->numeric(FLERR,arg[4]);
 
   flagHI = flagVF = 1;
   if (narg == 7) {
     flagHI = force->inumeric(FLERR,arg[5]);
     flagVF = force->inumeric(FLERR,arg[6]);
   }
 
   if (flaglog == 1 && flagHI == 0) {
     error->warning(FLERR,"Cannot include log terms without 1/r terms; "
                    "setting flagHI to 1");
     flagHI = 1;
   }
 
   // reset cutoffs that have been explicitly set
 
   if (allocated) {
     for (int i = 1; i <= atom->ntypes; i++)
       for (int j = i+1; j <= atom->ntypes; j++)
         if (setflag[i][j]) {
           cut_inner[i][j] = cut_inner_global;
           cut[i][j] = cut_global;
         }
   }
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairLubricate::coeff(int narg, char **arg)
 {
   if (narg != 2 && narg != 4)
     error->all(FLERR,"Incorrect args for pair coefficients");
 
   if (!allocated) allocate();
 
   int ilo,ihi,jlo,jhi;
   force->bounds(arg[0],atom->ntypes,ilo,ihi);
   force->bounds(arg[1],atom->ntypes,jlo,jhi);
 
   double cut_inner_one = cut_inner_global;
   double cut_one = cut_global;
   if (narg == 4) {
     cut_inner_one = force->numeric(FLERR,arg[2]);
     cut_one = force->numeric(FLERR,arg[3]);
   }
 
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
     for (int j = MAX(jlo,i); j <= jhi; j++) {
       cut_inner[i][j] = cut_inner_one;
       cut[i][j] = cut_one;
       setflag[i][j] = 1;
       count++;
     }
   }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairLubricate::init_style()
 {
   if (!atom->sphere_flag)
     error->all(FLERR,"Pair lubricate requires atom style sphere");
   if (comm->ghost_velocity == 0)
     error->all(FLERR,"Pair lubricate requires ghost atoms store velocity");
 
-  neighbor->request(this);
+  neighbor->request(this,instance_me);
 
   // require that atom radii are identical within each type
   // require monodisperse system with same radii for all types
 
   double radtype;
   for (int i = 1; i <= atom->ntypes; i++) {
     if (!atom->radius_consistency(i,radtype))
       error->all(FLERR,"Pair lubricate requires monodisperse particles");
     if (i > 1 && radtype != rad)
       error->all(FLERR,"Pair lubricate requires monodisperse particles");
     rad = radtype;
   }
 
   // check for fix deform, if exists it must use "remap v"
   // If box will change volume, set appropriate flag so that volume
   // and v.f. corrections are re-calculated at every step.
   //
   // If available volume is different from box volume
   // due to walls, set volume appropriately; if walls will
   // move, set appropriate flag so that volume and v.f. corrections
   // are re-calculated at every step.
 
   shearing = flagdeform = flagwall = 0;
   for (int i = 0; i < modify->nfix; i++){
     if (strcmp(modify->fix[i]->style,"deform") == 0) {
       shearing = flagdeform = 1;
       if (((FixDeform *) modify->fix[i])->remapflag != V_REMAP)
         error->all(FLERR,"Using pair lubricate with inconsistent "
                    "fix deform remap option");
     }
     if (strstr(modify->fix[i]->style,"wall") != NULL) {
       if (flagwall) 
         error->all(FLERR,
                    "Cannot use multiple fix wall commands with pair lubricate");
       flagwall = 1; // Walls exist
       wallfix = (FixWall *) modify->fix[i];
       if (wallfix->xflag) flagwall = 2; // Moving walls exist
     }
   }
 
   // set the isotropic constants that depend on the volume fraction
   // vol_T = total volume
 
   double vol_T;
   double wallcoord;
   if (!flagwall) vol_T = domain->xprd*domain->yprd*domain->zprd;
   else {
     double wallhi[3], walllo[3];
     for (int j = 0; j < 3; j++){
       wallhi[j] = domain->prd[j];
       walllo[j] = 0;
     }
 
     for (int m = 0; m < wallfix->nwall; m++){
       int dim = wallfix->wallwhich[m] / 2;
       int side = wallfix->wallwhich[m] % 2;
       if (wallfix->xstyle[m] == VARIABLE){
         wallfix->xindex[m] = input->variable->find(wallfix->xstr[m]);
         //Since fix->wall->init happens after pair->init_style
         wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
       }
 
       else wallcoord = wallfix->coord0[m];
 
       if (side == 0) walllo[dim] = wallcoord;
       else wallhi[dim] = wallcoord;
     }
     vol_T = (wallhi[0] - walllo[0]) * (wallhi[1] - walllo[1]) *
       (wallhi[2] - walllo[2]);
   }
 
   // vol_P = volume of particles, assuming monodispersity
   // vol_f = volume fraction
 
   vol_P = atom->natoms*(4.0/3.0)*MY_PI*pow(rad,3.0);
   double vol_f = vol_P/vol_T;
 
   if (!flagVF) vol_f = 0;
 
   // set isotropic constants for FLD
 
   if (flaglog == 0) {
     R0  = 6*MY_PI*mu*rad*(1.0 + 2.16*vol_f);
     RT0 = 8*MY_PI*mu*pow(rad,3.0);
     RS0 = 20.0/3.0*MY_PI*mu*pow(rad,3.0)*(1.0 + 3.33*vol_f + 2.80*vol_f*vol_f);
   } else {
     R0  = 6*MY_PI*mu*rad*(1.0 + 2.725*vol_f - 6.583*vol_f*vol_f);
     RT0 = 8*MY_PI*mu*pow(rad,3.0)*(1.0 + 0.749*vol_f - 2.469*vol_f*vol_f);
     RS0 = 20.0/3.0*MY_PI*mu*pow(rad,3.0)*(1.0 + 3.64*vol_f - 6.95*vol_f*vol_f);
   }
 
 
   // set Ef = 0 since used whether shearing or not
 
   Ef[0][0] = Ef[0][1] = Ef[0][2] = 0.0;
   Ef[1][0] = Ef[1][1] = Ef[1][2] = 0.0;
   Ef[2][0] = Ef[2][1] = Ef[2][2] = 0.0;
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairLubricate::init_one(int i, int j)
 {
   if (setflag[i][j] == 0) {
     cut_inner[i][j] = mix_distance(cut_inner[i][i],cut_inner[j][j]);
     cut[i][j] = mix_distance(cut[i][i],cut[j][j]);
   }
 
   cut_inner[j][i] = cut_inner[i][j];
 
   return cut[i][j];
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairLubricate::write_restart(FILE *fp)
 {
   write_restart_settings(fp);
 
   int i,j;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       fwrite(&setflag[i][j],sizeof(int),1,fp);
       if (setflag[i][j]) {
         fwrite(&cut_inner[i][j],sizeof(double),1,fp);
         fwrite(&cut[i][j],sizeof(double),1,fp);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
    proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairLubricate::read_restart(FILE *fp)
 {
   read_restart_settings(fp);
   allocate();
 
   int i,j;
   int me = comm->me;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
       MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
       if (setflag[i][j]) {
         if (me == 0) {
           fread(&cut_inner[i][j],sizeof(double),1,fp);
           fread(&cut[i][j],sizeof(double),1,fp);
         }
         MPI_Bcast(&cut_inner[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&cut[i][j],1,MPI_DOUBLE,0,world);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairLubricate::write_restart_settings(FILE *fp)
 {
   fwrite(&mu,sizeof(double),1,fp);
   fwrite(&flaglog,sizeof(int),1,fp);
   fwrite(&flagfld,sizeof(int),1,fp);
   fwrite(&cut_inner_global,sizeof(double),1,fp);
   fwrite(&cut_global,sizeof(double),1,fp);
   fwrite(&offset_flag,sizeof(int),1,fp);
   fwrite(&mix_flag,sizeof(int),1,fp);
   fwrite(&flagHI,sizeof(int),1,fp);
   fwrite(&flagVF,sizeof(int),1,fp);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairLubricate::read_restart_settings(FILE *fp)
 {
   int me = comm->me;
   if (me == 0) {
     fread(&mu,sizeof(double),1,fp);
     fread(&flaglog,sizeof(int),1,fp);
     fread(&flagfld,sizeof(int),1,fp);
     fread(&cut_inner_global,sizeof(double),1,fp);
     fread(&cut_global,sizeof(double),1,fp);
     fread(&offset_flag,sizeof(int),1,fp);
     fread(&mix_flag,sizeof(int),1,fp);
     fread(&flagHI,sizeof(int),1,fp);
     fread(&flagVF,sizeof(int),1,fp);
   }
   MPI_Bcast(&mu,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&flaglog,1,MPI_INT,0,world);
   MPI_Bcast(&flagfld,1,MPI_INT,0,world);
   MPI_Bcast(&cut_inner_global,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&cut_global,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&offset_flag,1,MPI_INT,0,world);
   MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
   MPI_Bcast(&flagHI,1,MPI_INT,0,world);
   MPI_Bcast(&flagVF,1,MPI_INT,0,world);
 }
 
 /* ---------------------------------------------------------------------- */
 
 int PairLubricate::pack_forward_comm(int n, int *list, double *buf,
                                      int pbc_flag, int *pbc)
 {
   int i,j,m;
 
   double **v = atom->v;
   double **omega = atom->omega;
 
   m = 0;
   for (i = 0; i < n; i++) {
     j = list[i];
     buf[m++] = v[j][0];
     buf[m++] = v[j][1];
     buf[m++] = v[j][2];
     buf[m++] = omega[j][0];
     buf[m++] = omega[j][1];
     buf[m++] = omega[j][2];
   }
 
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLubricate::unpack_forward_comm(int n, int first, double *buf)
 {
   int i,m,last;
 
   double **v = atom->v;
   double **omega = atom->omega;
 
   m = 0;
   last = first + n;
   for (i = first; i < last; i++) {
     v[i][0] = buf[m++];
     v[i][1] = buf[m++];
     v[i][2] = buf[m++];
     omega[i][0] = buf[m++];
     omega[i][1] = buf[m++];
     omega[i][2] = buf[m++];
   }
 }
 
 /* ----------------------------------------------------------------------
    check if name is recognized, return integer index for that name
    if name not recognized, return -1
    if type pair setting, return -2 if no type pairs are set
 ------------------------------------------------------------------------- */
 
 int PairLubricate::pre_adapt(char *name, int ilo, int ihi, int jlo, int jhi)
 {
   if (strcmp(name,"mu") == 0) return 0;
   return -1;
 }
 
 /* ----------------------------------------------------------------------
    adapt parameter indexed by which
    change all pair variables affected by the reset parameter
    if type pair setting, set I-J and J-I coeffs
 ------------------------------------------------------------------------- */
 
 void PairLubricate::adapt(int which, int ilo, int ihi, int jlo, int jhi,
                           double value)
 {
   mu = value;
 }
diff --git a/src/FLD/pair_lubricateU.cpp b/src/FLD/pair_lubricateU.cpp
index 2a4bbf7fe..66e9ab140 100644
--- a/src/FLD/pair_lubricateU.cpp
+++ b/src/FLD/pair_lubricateU.cpp
@@ -1,2061 +1,2061 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing authors: Amit Kumar and Michael Bybee (UIUC)
 ------------------------------------------------------------------------- */
 
 #include "mpi.h"
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_lubricateU.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "domain.h"
 #include "update.h"
 #include "math_const.h"
 #include "modify.h"
 #include "fix.h"
 #include "fix_deform.h"
 #include "fix_wall.h"
 #include "input.h"
 #include "variable.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
 #define TOL 1E-4   // tolerance for conjugate gradient
 
 // same as fix_wall.cpp
 
 enum{EDGE,CONSTANT,VARIABLE};
 
 /* ---------------------------------------------------------------------- */
 
 PairLubricateU::PairLubricateU(LAMMPS *lmp) : Pair(lmp)
 {
   single_enable = 0;
 
   // pair lubricateU cannot compute virial as F dot r
   // due to how drag forces are applied to atoms
   // correct method is how per-atom virial does it
 
   no_virial_fdotr_compute = 1;
 
   nmax = 0;
   fl = Tl = xl = NULL;
 
   cgmax = 0;
   bcg = xcg = rcg = rcg1 = pcg = RU =  NULL;
 
   // set comm size needed by this Pair
 
   comm_forward = 6;
 }
 
 /* ---------------------------------------------------------------------- */
 
 PairLubricateU::~PairLubricateU()
 {
   memory->destroy(fl);
   memory->destroy(Tl);
   memory->destroy(xl);
 
   memory->destroy(bcg);
   memory->destroy(xcg);
   memory->destroy(rcg);
   memory->destroy(rcg1);
   memory->destroy(pcg);
   memory->destroy(RU);
 
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
 
     memory->destroy(cut);
     memory->destroy(cut_inner);
   }
 }
 
 /* ----------------------------------------------------------------------
    It first has to solve for the velocity of the particles such that
    the net force on the particles is zero. NOTE: it has to be the last
    type of pair interaction specified in the input file. Also, it
    assumes that no other types of interactions, like k-space, is
    present. As already mentioned, the net force on the particles after
    this pair interaction would be identically zero.
    ---------------------------------------------------------------------- */
 
 void PairLubricateU::compute(int eflag, int vflag)
 {
   int i,j;
 
   double **x = atom->x;
   double **f = atom->f;
   double **torque = atom->torque;
   int nlocal = atom->nlocal;
   int nghost = atom->nghost;
   int nall = nlocal + nghost;
 
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   // skip compute() if called from integrate::setup()
   // this is b/c do not want compute() to update velocities twice on a restart
   // when restarting, call compute on step N (last step of prev run),
   // again on step N (setup of restart run),
   // then on step N+1 (first step of restart)
   // so this is one extra time which leads to bad dynamics
 
   if (update->setupflag) return;
 
   // grow per-atom arrays if necessary
   // need to be atom->nmax in length
 
   if (atom->nmax > nmax) {
     memory->destroy(fl);
     memory->destroy(Tl);
     memory->destroy(xl);
     nmax = atom->nmax;
     memory->create(fl,nmax,3,"pair:fl");
     memory->create(Tl,nmax,3,"pair:Tl");
     memory->create(xl,nmax,3,"pair:xl");
   }
 
   // Added to implement midpoint integration scheme
   // Save force, torque found so far. Also save the positions
 
   for (i=0;i<nlocal+nghost;i++) {
     for (j=0;j<3;j++) {
       fl[i][j] = f[i][j];
       Tl[i][j] = torque[i][j];
       xl[i][j] = x[i][j];
     }
   }
 
   // Stage one of Midpoint method
   // Solve for velocities based on intial positions
 
   stage_one();
 
   // find positions at half the timestep and store in xl
 
   intermediates(nall,xl);
 
   // store back the saved forces and torques in original arrays
 
   for(i=0;i<nlocal+nghost;i++) {
     for(j=0;j<3;j++) {
       f[i][j] = fl[i][j];
       torque[i][j] = Tl[i][j];
     }
   }
 
   // stage two: this will give the final velocities
 
   stage_two(xl);
 }
 
 /* ------------------------------------------------------------------------
    Stage one of midpoint method
 ------------------------------------------------------------------------- */
 
 void PairLubricateU::stage_one()
 {
   int i,j,ii,inum;
 
   double **x = atom->x;
   double **v = atom->v;
   double **f = atom->f;
   double **omega = atom->omega;
   double **torque = atom->torque;
 
   int newton_pair = force->newton_pair;
   int *ilist;
 
   inum = list->inum;
   ilist = list->ilist;
 
   if (6*inum > cgmax) {
     memory->destroy(bcg);
     memory->destroy(xcg);
     memory->destroy(rcg);
     memory->destroy(rcg1);
     memory->destroy(pcg);
     memory->destroy(RU);
     cgmax = 6*inum;
     memory->create(bcg,cgmax,"pair:bcg");
     memory->create(xcg,cgmax,"pair:bcg");
     memory->create(rcg,cgmax,"pair:bcg");
     memory->create(rcg1,cgmax,"pair:bcg");
     memory->create(pcg,cgmax,"pair:bcg");
     memory->create(RU,cgmax,"pair:bcg");
   }
 
   double alpha,beta;
   double normi,error,normig;
   double send[2],recv[2],rcg_dot_rcg;
 
   // First compute R_FE*E
 
   compute_RE();
 
   // Reverse communication of forces and torques to
   // accumulate the net force on each of the particles
 
   if (newton_pair) comm->reverse_comm();
 
   // CONJUGATE GRADIENT
   // Find the right hand side= -ve of all forces/torques
   // b = 6*Npart in overall size
 
   for(ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     for (j = 0; j < 3; j++) {
       bcg[6*ii+j] = -f[i][j];
       bcg[6*ii+j+3] = -torque[i][j];
     }
   }
 
   // Start solving the equation : F^H = -F^P -F^B - F^H_{Ef}
   // Store initial guess for velocity and angular-velocities/angular momentum
   // NOTE velocities and angular velocities are assumed relative to the fluid
 
   for (ii=0;ii<inum;ii++)
     for (j=0;j<3;j++) {
       xcg[6*ii+j] = 0.0;
       xcg[6*ii+j+3] = 0.0;
     }
 
   // Copy initial guess to the global arrays to be acted upon by R_{FU}
   // and returned by f and torque arrays
 
   copy_vec_uo(inum,xcg,v,omega);
 
   // set velocities for ghost particles
 
   comm->forward_comm_pair(this);
 
   // Find initial residual
 
   compute_RU();
 
   // reverse communication of forces and torques
 
   if (newton_pair) comm->reverse_comm();
 
   copy_uo_vec(inum,f,torque,RU);
 
   for (i=0;i<6*inum;i++)
     rcg[i] = bcg[i] - RU[i];
 
   // Set initial conjugate direction
 
   for (i=0;i<6*inum;i++)
     pcg[i] = rcg[i];
 
   // Find initial norm of the residual or norm of the RHS (either is fine)
 
   normi = dot_vec_vec(6*inum,bcg,bcg);
 
   MPI_Allreduce(&normi,&normig,1,MPI_DOUBLE,MPI_SUM,world);
 
   // Loop until convergence
 
   do {
     // find R*p
 
     copy_vec_uo(inum,pcg,v,omega);
 
     // set velocities for ghost particles
 
     comm->forward_comm_pair(this);
 
     compute_RU();
 
     // reverse communication of forces and torques
 
     if (newton_pair) comm->reverse_comm();
 
 
     copy_uo_vec(inum,f,torque,RU);
 
     // Find alpha
 
     send[0] = dot_vec_vec(6*inum,rcg,rcg);
     send[1] = dot_vec_vec(6*inum,RU,pcg);
 
     MPI_Allreduce(send,recv,2,MPI_DOUBLE,MPI_SUM,world);
 
     alpha = recv[0]/recv[1];
     rcg_dot_rcg = recv[0];
 
     // Find new x
 
     for (i=0;i<6*inum;i++)
       xcg[i] = xcg[i] + alpha*pcg[i];
 
     // find new residual
 
     for (i=0;i<6*inum;i++)
       rcg1[i] = rcg[i] - alpha*RU[i];
 
     // find beta
 
     send[0] = dot_vec_vec(6*inum,rcg1,rcg1);
 
     MPI_Allreduce(send,recv,1,MPI_DOUBLE,MPI_SUM,world);
 
     beta = recv[0]/rcg_dot_rcg;
 
     // Find new conjugate direction
 
     for (i=0;i<6*inum;i++)
       pcg[i] = rcg1[i] + beta*pcg[i];
 
     for (i=0;i<6*inum;i++)
       rcg[i] = rcg1[i];
 
     // Find relative error
 
     error = sqrt(recv[0]/normig);
 
   } while (error > TOL);
 
   // update the final converged velocities in respective arrays
 
   copy_vec_uo(inum,xcg,v,omega);
 
   // set velocities for ghost particles
 
   comm->forward_comm_pair(this);
 
   // Find actual particle's velocities from relative velocities
   // Only non-zero component of fluid's vel : vx=gdot*y and wz=-gdot/2
 
   for (ii=0;ii<inum;ii++) {
     i = ilist[ii];
     v[i][0] = v[i][0] + gdot*x[i][1];
     omega[i][2] = omega[i][2] - gdot/2.0;
   }
 }
 
 /*---------------------------------------------------------------
   Finds the position of the particles at half the time step
 ----------------------------------------------------------------*/
 
 void PairLubricateU::intermediates(int nall, double **xl)
 {
   int i;
   double **x = atom->x;
   double **v = atom->v;
   double dtv = update->dt;
 
   for (i=0;i<nall;i++) {
     xl[i][0] = x[i][0] + 0.5*dtv*v[i][0];
     xl[i][1] = x[i][1] + 0.5*dtv*v[i][1];
     xl[i][2] = x[i][2] + 0.5*dtv*v[i][2];
   }
 }
 
 /* ------------------------------------------------------------------------
    Stage one of midpoint method
 ------------------------------------------------------------------------- */
 
 void PairLubricateU::stage_two(double **x)
 {
   int i,j,ii,inum;
   double **v = atom->v;
   double **f = atom->f;
   double **omega = atom->omega;
   double **torque = atom->torque;
 
   int newton_pair = force->newton_pair;
   int *ilist;
 
   inum = list->inum;
   ilist = list->ilist;
 
   double alpha,beta;
   double normi,error,normig;
   double send[2],recv[2],rcg_dot_rcg;
 
   // First compute R_FE*E
 
   compute_RE(x);
 
   // Reverse communication of forces and torques to
   // accumulate the net force on each of the particles
 
   if (newton_pair) comm->reverse_comm();
 
   // CONJUGATE GRADIENT
   // Find the right hand side= -ve of all forces/torques
   // b = 6*Npart in overall size
 
   for(ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     for (j = 0; j < 3; j++) {
       bcg[6*ii+j] = -f[i][j];
       bcg[6*ii+j+3] = -torque[i][j];
     }
   }
 
   // Start solving the equation : F^H = -F^P -F^B - F^H_{Ef}
   // Store initial guess for velocity and angular-velocities/angular momentum
   // NOTE velocities and angular velocities are assumed relative to the fluid
 
   for (ii=0;ii<inum;ii++)
     for (j=0;j<3;j++) {
       xcg[6*ii+j] = 0.0;
       xcg[6*ii+j+3] = 0.0;
     }
 
   // Copy initial guess to the global arrays to be acted upon by R_{FU}
   // and returned by f and torque arrays
 
   copy_vec_uo(inum,xcg,v,omega);
 
   // set velocities for ghost particles
 
   comm->forward_comm_pair(this);
 
   // Find initial residual
 
   compute_RU(x);
 
   // reverse communication of forces and torques
 
   if (newton_pair) comm->reverse_comm();
 
   copy_uo_vec(inum,f,torque,RU);
 
   for (i=0;i<6*inum;i++)
     rcg[i] = bcg[i] - RU[i];
 
   // Set initial conjugate direction
 
   for (i=0;i<6*inum;i++)
     pcg[i] = rcg[i];
 
   // Find initial norm of the residual or norm of the RHS (either is fine)
 
   normi = dot_vec_vec(6*inum,bcg,bcg);
 
   MPI_Allreduce(&normi,&normig,1,MPI_DOUBLE,MPI_SUM,world);
 
   // Loop until convergence
 
   do {
     // find R*p
 
     copy_vec_uo(inum,pcg,v,omega);
 
     // set velocities for ghost particles
 
     comm->forward_comm_pair(this);
 
     compute_RU(x);
 
     // reverse communication of forces and torques
 
     if (newton_pair) comm->reverse_comm();
 
     copy_uo_vec(inum,f,torque,RU);
 
     // Find alpha
 
     send[0] = dot_vec_vec(6*inum,rcg,rcg);
     send[1] = dot_vec_vec(6*inum,RU,pcg);
 
     MPI_Allreduce(send,recv,2,MPI_DOUBLE,MPI_SUM,world);
 
     alpha = recv[0]/recv[1];
     rcg_dot_rcg = recv[0];
 
     // Find new x
 
     for (i=0;i<6*inum;i++)
       xcg[i] = xcg[i] + alpha*pcg[i];
 
     // find new residual
 
     for (i=0;i<6*inum;i++)
       rcg1[i] = rcg[i] - alpha*RU[i];
 
     // find beta
 
     send[0] = dot_vec_vec(6*inum,rcg1,rcg1);
 
     MPI_Allreduce(send,recv,1,MPI_DOUBLE,MPI_SUM,world);
 
     beta = recv[0]/rcg_dot_rcg;
 
     // Find new conjugate direction
 
     for (i=0;i<6*inum;i++)
       pcg[i] = rcg1[i] + beta*pcg[i];
 
     for (i=0;i<6*inum;i++)
       rcg[i] = rcg1[i];
 
     // Find relative error
 
     error = sqrt(recv[0]/normig);
 
   } while (error > TOL);
 
 
   // update the final converged velocities in respective arrays
 
   copy_vec_uo(inum,xcg,v,omega);
 
   // set velocities for ghost particles
 
   comm->forward_comm_pair(this);
 
   // Compute the viscosity/pressure
 
   if (evflag) compute_Fh(x);
 
   // Find actual particle's velocities from relative velocities
   // Only non-zero component of fluid's vel : vx=gdot*y and wz=-gdot/2
 
   for (ii=0;ii<inum;ii++) {
     i = ilist[ii];
     v[i][0] = v[i][0] + gdot*x[i][1];
     omega[i][2] = omega[i][2] - gdot/2.0;
   }
 }
 
 /* ------------------------------------------------------------------------
    This function computes the final hydrodynamic force once the
    velocities have converged.
    ------------------------------------------------------------------------- */
 
 void PairLubricateU::compute_Fh(double **x)
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,fx,fy,fz;
   double rsq,r,h_sep;
   double vr1,vr2,vr3,vnnr,vn1,vn2,vn3;
   double vt1,vt2,vt3;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   double **v = atom->v;
   double **f = atom->f;
   double **omega = atom->omega;
   double **torque = atom->torque;
   double *radius = atom->radius;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   int nghost = atom->nghost;
   int newton_pair = force->newton_pair;
 
   double radi;
 
   double vxmu2f = force->vxmu2f;
   double vi[3],vj[3],wi[3],wj[3],xl[3],a_sq,a_sh;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // This section of code adjusts R0/RT0/RS0 if necessary due to changes
   // in the volume fraction as a result of fix deform or moving walls
 
   double dims[3], wallcoord;
   if (flagVF) // Flag for volume fraction corrections
     if (flagdeform || flagwall == 2){ // Possible changes in volume fraction
       if (flagdeform && !flagwall)
         for (j = 0; j < 3; j++)
           dims[j] = domain->prd[j];
       else if (flagwall == 2 || (flagdeform && flagwall == 1)){
          double wallhi[3], walllo[3];
          for (int j = 0; j < 3; j++){
            wallhi[j] = domain->prd[j];
            walllo[j] = 0;
          }
          for (int m = 0; m < wallfix->nwall; m++){
            int dim = wallfix->wallwhich[m] / 2;
            int side = wallfix->wallwhich[m] % 2;
            if (wallfix->xstyle[m] == VARIABLE){
              wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
            }
            else wallcoord = wallfix->coord0[m];
            if (side == 0) walllo[dim] = wallcoord;
            else wallhi[dim] = wallcoord;
          }
          for (int j = 0; j < 3; j++)
            dims[j] = wallhi[j] - walllo[j];
       }
       double vol_T = dims[0]*dims[1]*dims[2];
       double vol_f = vol_P/vol_T;
       if (flaglog == 0) {
         //        R0  = 6*MY_PI*mu*rad*(1.0 + 2.16*vol_f);
         //        RT0 = 8*MY_PI*mu*pow(rad,3);
         RS0 = 20.0/3.0*MY_PI*mu*pow(rad,3.0)*
           (1.0 + 3.33*vol_f + 2.80*vol_f*vol_f);
       } else {
         //        R0  = 6*MY_PI*mu*rad*(1.0 + 2.725*vol_f - 6.583*vol_f*vol_f);
         //        RT0 = 8*MY_PI*mu*pow(rad,3)*(1.0 + 0.749*vol_f - 2.469*vol_f*vol_f);
         RS0 = 20.0/3.0*MY_PI*mu*pow(rad,3.0)*
           (1.0 + 3.64*vol_f - 6.95*vol_f*vol_f);
       }
     }
 
 
   // end of R0 adjustment code
 
   // Set force to zero which is the final value after this pair interaction
   for (i=0;i<nlocal+nghost;i++)
     for (j=0;j<3;j++) {
       f[i][j] = 0.0;
       torque[i][j] = 0.0;
     }
 
   // reverse communication of forces and torques
 
   if (newton_pair) comm->reverse_comm(); // not really needed
 
   // Find additional contribution from the stresslets
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     radi = radius[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     // Find the contribution to stress from isotropic RS0
     // Set psuedo force to obtain the required contribution
     // need to set delx and fy only
 
     fx = 0.0; delx = radi;
     fy = vxmu2f*RS0*gdot/2.0/radi; dely = 0.0;
     fz = 0.0; delz = 0.0;
     if (evflag)
       ev_tally_xyz(i,i,nlocal,newton_pair,0.0,0.0,-fx,-fy,-fz,delx,dely,delz);
 
     // Find angular velocity
 
     wi[0] = omega[i][0];
     wi[1] = omega[i][1];
     wi[2] = omega[i][2];
 
     if (!flagHI) continue;
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r = sqrt(rsq);
 
         // Use omega directly if it exists, else angmom
         // angular momentum = I*omega = 2/5 * M*R^2 * omega
 
         wj[0] = omega[j][0];
         wj[1] = omega[j][1];
         wj[2] = omega[j][2];
 
         // loc of the point of closest approach on particle i from its cente
 
         xl[0] = -delx/r*radi;
         xl[1] = -dely/r*radi;
         xl[2] = -delz/r*radi;
 
         // velocity at the point of closest approach on both particles
         // v = v + omega_cross_xl
 
         // particle i
 
         vi[0] = v[i][0] + (wi[1]*xl[2] - wi[2]*xl[1]);
         vi[1] = v[i][1] + (wi[2]*xl[0] - wi[0]*xl[2]);
         vi[2] = v[i][2] + (wi[0]*xl[1] - wi[1]*xl[0]);
 
         // particle j
 
         vj[0] = v[j][0] - (wj[1]*xl[2] - wj[2]*xl[1]);
         vj[1] = v[j][1] - (wj[2]*xl[0] - wj[0]*xl[2]);
         vj[2] = v[j][2] - (wj[0]*xl[1] - wj[1]*xl[0]);
 
 
         // Relative  velocity at the point of closest approach
         // include contribution from Einf of the fluid
 
         vr1 = vi[0] - vj[0] -
           2.0*(Ef[0][0]*xl[0] + Ef[0][1]*xl[1] + Ef[0][2]*xl[2]);
         vr2 = vi[1] - vj[1] -
           2.0*(Ef[1][0]*xl[0] + Ef[1][1]*xl[1] + Ef[1][2]*xl[2]);
         vr3 = vi[2] - vj[2] -
           2.0*(Ef[2][0]*xl[0] + Ef[2][1]*xl[1] + Ef[2][2]*xl[2]);
 
         // Normal component (vr.n)n
 
         vnnr = (vr1*delx + vr2*dely + vr3*delz)/r;
         vn1 = vnnr*delx/r;
         vn2 = vnnr*dely/r;
         vn3 = vnnr*delz/r;
 
         // Tangential component vr - (vr.n)n
 
         vt1 = vr1 - vn1;
         vt2 = vr2 - vn2;
         vt3 = vr3 - vn3;
 
         // Find the scalar resistances a_sq, a_sh and a_pu
 
         h_sep = r - 2.0*radi;
 
         // If less than the minimum gap use the minimum gap instead
 
         if (r < cut_inner[itype][jtype])
           h_sep = cut_inner[itype][jtype] - 2.0*radi;
 
         // Scale h_sep by radi
 
         h_sep = h_sep/radi;
 
         // Scalar resistances
 
         if (flaglog) {
           a_sq = 6.0*MY_PI*mu*radi*(1.0/4.0/h_sep + 9.0/40.0*log(1.0/h_sep));
           a_sh = 6.0*MY_PI*mu*radi*(1.0/6.0*log(1.0/h_sep));
         } else
           a_sq = 6.0*MY_PI*mu*radi*(1.0/4.0/h_sep);
 
         // Find force due to squeeze type motion
 
         fx  = a_sq*vn1;
         fy  = a_sq*vn2;
         fz  = a_sq*vn3;
 
         // Find force due to all shear kind of motions
 
         if (flaglog) {
           fx = fx + a_sh*vt1;
           fy = fy + a_sh*vt2;
           fz = fz + a_sh*vt3;
         }
 
         // Scale forces to obtain in appropriate units
 
         fx = vxmu2f*fx;
         fy = vxmu2f*fy;
         fz = vxmu2f*fz;
 
         if (evflag) ev_tally_xyz(i,j,nlocal,newton_pair,
                                  0.0,0.0,-fx,-fy,-fz,delx,dely,delz);
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
   computes R_FU * U
 ---------------------------------------------------------------------- */
 
 void PairLubricateU::compute_RU()
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,fx,fy,fz,tx,ty,tz;
   double rsq,r,h_sep,radi;
   double vr1,vr2,vr3,vnnr,vn1,vn2,vn3;
   double vt1,vt2,vt3,wdotn,wt1,wt2,wt3;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   double **x = atom->x;
   double **v = atom->v;
   double **f = atom->f;
   double **omega = atom->omega;
   double **torque = atom->torque;
   double *radius = atom->radius;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   int nghost = atom->nghost;
   int newton_pair = force->newton_pair;
 
   double vxmu2f = force->vxmu2f;
   double vi[3],vj[3],wi[3],wj[3],xl[3],a_sq,a_sh,a_pu;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // This section of code adjusts R0/RT0/RS0 if necessary due to changes
   // in the volume fraction as a result of fix deform or moving walls
 
   double dims[3], wallcoord;
   if (flagVF) // Flag for volume fraction corrections
     if (flagdeform || flagwall == 2){ // Possible changes in volume fraction
       if (flagdeform && !flagwall)
         for (j = 0; j < 3; j++)
           dims[j] = domain->prd[j];
       else if (flagwall == 2 || (flagdeform && flagwall == 1)){
          double wallhi[3], walllo[3];
          for (int j = 0; j < 3; j++){
            wallhi[j] = domain->prd[j];
            walllo[j] = 0;
          }
          for (int m = 0; m < wallfix->nwall; m++){
            int dim = wallfix->wallwhich[m] / 2;
            int side = wallfix->wallwhich[m] % 2;
            if (wallfix->xstyle[m] == VARIABLE){
              wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
            }
            else wallcoord = wallfix->coord0[m];
            if (side == 0) walllo[dim] = wallcoord;
            else wallhi[dim] = wallcoord;
          }
          for (int j = 0; j < 3; j++)
            dims[j] = wallhi[j] - walllo[j];
       }
       double vol_T = dims[0]*dims[1]*dims[2];
       double vol_f = vol_P/vol_T;
       if (flaglog == 0) {
         R0  = 6*MY_PI*mu*rad*(1.0 + 2.16*vol_f);
         RT0 = 8*MY_PI*mu*pow(rad,3.0);
         //        RS0 = 20.0/3.0*MY_PI*mu*pow(rad,3)*(1.0 + 3.33*vol_f + 2.80*vol_f*vol_f);
       } else {
         R0  = 6*MY_PI*mu*rad*(1.0 + 2.725*vol_f - 6.583*vol_f*vol_f);
         RT0 = 8*MY_PI*mu*pow(rad,3.0)*(1.0 + 0.749*vol_f - 2.469*vol_f*vol_f);
         //        RS0 = 20.0/3.0*MY_PI*mu*pow(rad,3)*(1.0 + 3.64*vol_f - 6.95*vol_f*vol_f);
       }
     }
 
   // end of R0 adjustment code
 
   // Initialize f to zero
   for (i=0;i<nlocal+nghost;i++)
     for (j=0;j<3;j++) {
       f[i][j] = 0.0;
       torque[i][j] = 0.0;
     }
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     radi = radius[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     // Find angular velocity
 
     wi[0] = omega[i][0];
     wi[1] = omega[i][1];
     wi[2] = omega[i][2];
 
     // Contribution due to the isotropic terms
 
     f[i][0] += -vxmu2f*R0*v[i][0];
     f[i][1] += -vxmu2f*R0*v[i][1];
     f[i][2] += -vxmu2f*R0*v[i][2];
 
     torque[i][0] += -vxmu2f*RT0*wi[0];
     torque[i][1] += -vxmu2f*RT0*wi[1];
     torque[i][2] += -vxmu2f*RT0*wi[2];
 
     if (!flagHI) continue;
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r = sqrt(rsq);
 
         // Use omega directly if it exists, else angmom
         // angular momentum = I*omega = 2/5 * M*R^2 * omega
 
         wj[0] = omega[j][0];
         wj[1] = omega[j][1];
         wj[2] = omega[j][2];
 
         // loc of the point of closest approach on particle i from its center
 
         xl[0] = -delx/r*radi;
         xl[1] = -dely/r*radi;
         xl[2] = -delz/r*radi;
 
         // velocity at the point of closest approach on both particles
         // v = v + omega_cross_xl
 
         // particle i
 
         vi[0] = v[i][0] + (wi[1]*xl[2] - wi[2]*xl[1]);
         vi[1] = v[i][1] + (wi[2]*xl[0] - wi[0]*xl[2]);
         vi[2] = v[i][2] + (wi[0]*xl[1] - wi[1]*xl[0]);
 
         // particle j
 
         vj[0] = v[j][0] - (wj[1]*xl[2] - wj[2]*xl[1]);
         vj[1] = v[j][1] - (wj[2]*xl[0] - wj[0]*xl[2]);
         vj[2] = v[j][2] - (wj[0]*xl[1] - wj[1]*xl[0]);
 
         // Find the scalar resistances a_sq and a_sh
 
         h_sep = r - 2.0*radi;
 
         // If less than the minimum gap use the minimum gap instead
 
         if (r < cut_inner[itype][jtype])
           h_sep = cut_inner[itype][jtype] - 2.0*radi;
 
         // Scale h_sep by radi
 
         h_sep = h_sep/radi;
 
         // Scalar resistances
 
         if (flaglog) {
           a_sq = 6.0*MY_PI*mu*radi*(1.0/4.0/h_sep + 9.0/40.0*log(1.0/h_sep));
           a_sh = 6.0*MY_PI*mu*radi*(1.0/6.0*log(1.0/h_sep));
           a_pu = 8.0*MY_PI*mu*pow(radi,3.0)*(3.0/160.0*log(1.0/h_sep));
         } else
           a_sq = 6.0*MY_PI*mu*radi*(1.0/4.0/h_sep);
 
         // Relative  velocity at the point of closest approach
 
         vr1 = vi[0] - vj[0];
         vr2 = vi[1] - vj[1];
         vr3 = vi[2] - vj[2];
 
         // Normal component (vr.n)n
 
         vnnr = (vr1*delx + vr2*dely + vr3*delz)/r;
         vn1 = vnnr*delx/r;
         vn2 = vnnr*dely/r;
         vn3 = vnnr*delz/r;
 
         // Tangential component vr - (vr.n)n
 
         vt1 = vr1 - vn1;
         vt2 = vr2 - vn2;
         vt3 = vr3 - vn3;
 
         // Find force due to squeeze type motion
 
         fx  = a_sq*vn1;
         fy  = a_sq*vn2;
         fz  = a_sq*vn3;
 
         // Find force due to all shear kind of motions
 
         if (flaglog) {
           fx = fx + a_sh*vt1;
           fy = fy + a_sh*vt2;
           fz = fz + a_sh*vt3;
         }
 
         // Scale forces to obtain in appropriate units
 
         fx = vxmu2f*fx;
         fy = vxmu2f*fy;
         fz = vxmu2f*fz;
 
         // Add to the total forc
 
         f[i][0] -= fx;
         f[i][1] -= fy;
         f[i][2] -= fz;
 
         if (newton_pair || j < nlocal) {
           f[j][0] += fx;
           f[j][1] += fy;
           f[j][2] += fz;
         }
 
         // Find torque due to this force
 
         if (flaglog) {
           tx = xl[1]*fz - xl[2]*fy;
           ty = xl[2]*fx - xl[0]*fz;
           tz = xl[0]*fy - xl[1]*fx;
 
           // Why a scale factor ?
 
           torque[i][0] -= vxmu2f*tx;
           torque[i][1] -= vxmu2f*ty;
           torque[i][2] -= vxmu2f*tz;
 
           if(newton_pair || j < nlocal) {
             torque[j][0] -= vxmu2f*tx;
             torque[j][1] -= vxmu2f*ty;
             torque[j][2] -= vxmu2f*tz;
           }
 
           // Torque due to a_pu
 
           wdotn = ((wi[0]-wj[0])*delx +
                    (wi[1]-wj[1])*dely + (wi[2]-wj[2])*delz)/r;
           wt1 = (wi[0]-wj[0]) - wdotn*delx/r;
           wt2 = (wi[1]-wj[1]) - wdotn*dely/r;
           wt3 = (wi[2]-wj[2]) - wdotn*delz/r;
 
           tx = a_pu*wt1;
           ty = a_pu*wt2;
           tz = a_pu*wt3;
 
           // add to total
 
           torque[i][0] -= vxmu2f*tx;
           torque[i][1] -= vxmu2f*ty;
           torque[i][2] -= vxmu2f*tz;
 
           if (newton_pair || j < nlocal) {
             torque[j][0] += vxmu2f*tx;
             torque[j][1] += vxmu2f*ty;
             torque[j][2] += vxmu2f*tz;
           }
         }
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
   computes R_FU * U
 ---------------------------------------------------------------------- */
 
 void PairLubricateU::compute_RU(double **x)
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,fx,fy,fz,tx,ty,tz;
   double rsq,r,h_sep,radi;
   double vr1,vr2,vr3,vnnr,vn1,vn2,vn3;
   double vt1,vt2,vt3,wdotn,wt1,wt2,wt3;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   double **v = atom->v;
   double **f = atom->f;
   double **omega = atom->omega;
   double **torque = atom->torque;
   double *radius = atom->radius;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   int nghost = atom->nghost;
   int newton_pair = force->newton_pair;
 
   double vxmu2f = force->vxmu2f;
   double vi[3],vj[3],wi[3],wj[3],xl[3],a_sq,a_sh,a_pu;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // This section of code adjusts R0/RT0/RS0 if necessary due to changes
   // in the volume fraction as a result of fix deform or moving walls
 
   double dims[3], wallcoord;
   if (flagVF) // Flag for volume fraction corrections
     if (flagdeform || flagwall == 2){ // Possible changes in volume fraction
       if (flagdeform && !flagwall)
         for (j = 0; j < 3; j++)
           dims[j] = domain->prd[j];
       else if (flagwall == 2 || (flagdeform && flagwall == 1)){
          double wallhi[3], walllo[3];
          for (int j = 0; j < 3; j++){
            wallhi[j] = domain->prd[j];
            walllo[j] = 0;
          }
          for (int m = 0; m < wallfix->nwall; m++){
            int dim = wallfix->wallwhich[m] / 2;
            int side = wallfix->wallwhich[m] % 2;
            if (wallfix->xstyle[m] == VARIABLE){
              wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
            }
            else wallcoord = wallfix->coord0[m];
            if (side == 0) walllo[dim] = wallcoord;
            else wallhi[dim] = wallcoord;
          }
          for (int j = 0; j < 3; j++)
            dims[j] = wallhi[j] - walllo[j];
       }
       double vol_T = dims[0]*dims[1]*dims[2];
       double vol_f = vol_P/vol_T;
       if (flaglog == 0) {
         R0  = 6*MY_PI*mu*rad*(1.0 + 2.16*vol_f);
         RT0 = 8*MY_PI*mu*pow(rad,3.0);
         //        RS0 = 20.0/3.0*MY_PI*mu*pow(rad,3)*(1.0 + 3.33*vol_f + 2.80*vol_f*vol_f);
       } else {
         R0  = 6*MY_PI*mu*rad*(1.0 + 2.725*vol_f - 6.583*vol_f*vol_f);
         RT0 = 8*MY_PI*mu*pow(rad,3.0)*(1.0 + 0.749*vol_f - 2.469*vol_f*vol_f);
         //        RS0 = 20.0/3.0*MY_PI*mu*pow(rad,3)*(1.0 + 3.64*vol_f - 6.95*vol_f*vol_f);
       }
     }
 
   // end of R0 adjustment code
 
   // Initialize f to zero
   for (i=0;i<nlocal+nghost;i++)
     for (j=0;j<3;j++) {
       f[i][j] = 0.0;
       torque[i][j] = 0.0;
     }
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     radi = radius[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     // Find angular velocity
 
     wi[0] = omega[i][0];
     wi[1] = omega[i][1];
     wi[2] = omega[i][2];
 
     // Contribution due to the isotropic terms
 
     f[i][0] += -vxmu2f*R0*v[i][0];
     f[i][1] += -vxmu2f*R0*v[i][1];
     f[i][2] += -vxmu2f*R0*v[i][2];
 
     torque[i][0] += -vxmu2f*RT0*wi[0];
     torque[i][1] += -vxmu2f*RT0*wi[1];
     torque[i][2] += -vxmu2f*RT0*wi[2];
 
     if (!flagHI) continue;
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r = sqrt(rsq);
 
         // Use omega directly if it exists, else angmom
         // angular momentum = I*omega = 2/5 * M*R^2 * omega
 
         wj[0] = omega[j][0];
         wj[1] = omega[j][1];
         wj[2] = omega[j][2];
 
         // loc of the point of closest approach on particle i from its center
 
         xl[0] = -delx/r*radi;
         xl[1] = -dely/r*radi;
         xl[2] = -delz/r*radi;
 
         // velocity at the point of closest approach on both particles
         // v = v + omega_cross_xl
 
         // particle i
 
         vi[0] = v[i][0] + (wi[1]*xl[2] - wi[2]*xl[1]);
         vi[1] = v[i][1] + (wi[2]*xl[0] - wi[0]*xl[2]);
         vi[2] = v[i][2] + (wi[0]*xl[1] - wi[1]*xl[0]);
 
         // particle j
 
         vj[0] = v[j][0] - (wj[1]*xl[2] - wj[2]*xl[1]);
         vj[1] = v[j][1] - (wj[2]*xl[0] - wj[0]*xl[2]);
         vj[2] = v[j][2] - (wj[0]*xl[1] - wj[1]*xl[0]);
 
         // Find the scalar resistances a_sq and a_sh
 
         h_sep = r - 2.0*radi;
 
         // If less than the minimum gap use the minimum gap instead
 
         if (r < cut_inner[itype][jtype])
           h_sep = cut_inner[itype][jtype] - 2.0*radi;
 
         // Scale h_sep by radi
 
         h_sep = h_sep/radi;
 
         // Scalar resistances
 
         if (flaglog) {
           a_sq = 6.0*MY_PI*mu*radi*(1.0/4.0/h_sep + 9.0/40.0*log(1.0/h_sep));
           a_sh = 6.0*MY_PI*mu*radi*(1.0/6.0*log(1.0/h_sep));
           a_pu = 8.0*MY_PI*mu*pow(radi,3.0)*(3.0/160.0*log(1.0/h_sep));
         } else
           a_sq = 6.0*MY_PI*mu*radi*(1.0/4.0/h_sep);
 
         // Relative  velocity at the point of closest approach
 
         vr1 = vi[0] - vj[0];
         vr2 = vi[1] - vj[1];
         vr3 = vi[2] - vj[2];
 
         // Normal component (vr.n)n
 
         vnnr = (vr1*delx + vr2*dely + vr3*delz)/r;
         vn1 = vnnr*delx/r;
         vn2 = vnnr*dely/r;
         vn3 = vnnr*delz/r;
 
         // Tangential component vr - (vr.n)n
 
         vt1 = vr1 - vn1;
         vt2 = vr2 - vn2;
         vt3 = vr3 - vn3;
 
         // Find force due to squeeze type motion
 
         fx  = a_sq*vn1;
         fy  = a_sq*vn2;
         fz  = a_sq*vn3;
 
         // Find force due to all shear kind of motions
 
         if (flaglog) {
           fx = fx + a_sh*vt1;
           fy = fy + a_sh*vt2;
           fz = fz + a_sh*vt3;
         }
 
         // Scale forces to obtain in appropriate units
 
         fx = vxmu2f*fx;
         fy = vxmu2f*fy;
         fz = vxmu2f*fz;
 
         // Add to the total force
 
         f[i][0] -= fx;
         f[i][1] -= fy;
         f[i][2] -= fz;
 
         if (newton_pair || j < nlocal) {
           f[j][0] += fx;
           f[j][1] += fy;
           f[j][2] += fz;
         }
 
         // Find torque due to this force
 
         if (flaglog) {
           tx = xl[1]*fz - xl[2]*fy;
           ty = xl[2]*fx - xl[0]*fz;
           tz = xl[0]*fy - xl[1]*fx;
 
           // Why a scale factor ?
 
           torque[i][0] -= vxmu2f*tx;
           torque[i][1] -= vxmu2f*ty;
           torque[i][2] -= vxmu2f*tz;
 
           if(newton_pair || j < nlocal) {
             torque[j][0] -= vxmu2f*tx;
             torque[j][1] -= vxmu2f*ty;
             torque[j][2] -= vxmu2f*tz;
           }
 
           // Torque due to a_pu
 
           wdotn = ((wi[0]-wj[0])*delx +
                    (wi[1]-wj[1])*dely + (wi[2]-wj[2])*delz)/r;
           wt1 = (wi[0]-wj[0]) - wdotn*delx/r;
           wt2 = (wi[1]-wj[1]) - wdotn*dely/r;
           wt3 = (wi[2]-wj[2]) - wdotn*delz/r;
 
           tx = a_pu*wt1;
           ty = a_pu*wt2;
           tz = a_pu*wt3;
 
           // add to total
 
           torque[i][0] -= vxmu2f*tx;
           torque[i][1] -= vxmu2f*ty;
           torque[i][2] -= vxmu2f*tz;
 
           if (newton_pair || j < nlocal) {
             torque[j][0] += vxmu2f*tx;
             torque[j][1] += vxmu2f*ty;
             torque[j][2] += vxmu2f*tz;
           }
         }
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    This computes R_{FE}*E , where E is the rate of strain of tensor which is
    known apriori, as it depends only on the known fluid velocity.
    So, this part of the hydrodynamic interaction can be pre computed and
    transferred to the RHS
    ---------------------------------------------------------------------- */
 
 void PairLubricateU::compute_RE()
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,fx,fy,fz,tx,ty,tz;
   double rsq,r,h_sep,radi;
   double vr1,vr2,vr3,vnnr,vn1,vn2,vn3;
   double vt1,vt2,vt3;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   double **x = atom->x;
   double **f = atom->f;
   double **torque = atom->torque;
   double *radius = atom->radius;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   int newton_pair = force->newton_pair;
 
   double vxmu2f = force->vxmu2f;
   double xl[3],a_sq,a_sh;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   if (!flagHI) return;
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     radi = radius[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     // No contribution from isotropic terms due to E
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r = sqrt(rsq);
 
         // loc of the point of closest approach on particle i from its center
 
         xl[0] = -delx/r*radi;
         xl[1] = -dely/r*radi;
         xl[2] = -delz/r*radi;
 
         // Find the scalar resistances a_sq and a_sh
 
         h_sep = r - 2.0*radi;
 
         // If less than the minimum gap use the minimum gap instead
 
         if (r < cut_inner[itype][jtype])
           h_sep = cut_inner[itype][jtype] - 2.0*radi;
 
         // Scale h_sep by radi
 
         h_sep = h_sep/radi;
 
         // Scalar resistance for Squeeze type motions
 
         if (flaglog)
           a_sq = 6*MY_PI*mu*radi*(1.0/4.0/h_sep + 9.0/40.0*log(1/h_sep));
         else
           a_sq = 6*MY_PI*mu*radi*(1.0/4.0/h_sep);
 
         // Scalar resistance for Shear type motions
 
         if (flaglog) {
           a_sh = 6*MY_PI*mu*radi*(1.0/6.0*log(1/h_sep));
         }
 
         // Relative velocity at the point of closest approach due to Ef only
 
         vr1 = -2.0*(Ef[0][0]*xl[0] + Ef[0][1]*xl[1] + Ef[0][2]*xl[2]);
         vr2 = -2.0*(Ef[1][0]*xl[0] + Ef[1][1]*xl[1] + Ef[1][2]*xl[2]);
         vr3 = -2.0*(Ef[2][0]*xl[0] + Ef[2][1]*xl[1] + Ef[2][2]*xl[2]);
 
         // Normal component (vr.n)n
 
         vnnr = (vr1*delx + vr2*dely + vr3*delz)/r;
         vn1 = vnnr*delx/r;
         vn2 = vnnr*dely/r;
         vn3 = vnnr*delz/r;
 
         // Tangential component vr - (vr.n)n
 
         vt1 = vr1 - vn1;
         vt2 = vr2 - vn2;
         vt3 = vr3 - vn3;
 
         // Find force due to squeeze type motion
 
         fx  = a_sq*vn1;
         fy  = a_sq*vn2;
         fz  = a_sq*vn3;
 
         // Find force due to all shear kind of motions
 
         if (flaglog) {
           fx = fx + a_sh*vt1;
           fy = fy + a_sh*vt2;
           fz = fz + a_sh*vt3;
         }
 
         // Scale forces to obtain in appropriate units
 
         fx = vxmu2f*fx;
         fy = vxmu2f*fy;
         fz = vxmu2f*fz;
 
         // Add to the total forc
 
         f[i][0] -= fx;
         f[i][1] -= fy;
         f[i][2] -= fz;
 
         if (newton_pair || j < nlocal) {
           f[j][0] += fx;
           f[j][1] += fy;
           f[j][2] += fz;
         }
 
         // Find torque due to this force
 
         if (flaglog) {
           tx = xl[1]*fz - xl[2]*fy;
           ty = xl[2]*fx - xl[0]*fz;
           tz = xl[0]*fy - xl[1]*fx;
 
           // Why a scale factor ?
 
           torque[i][0] -= vxmu2f*tx;
           torque[i][1] -= vxmu2f*ty;
           torque[i][2] -= vxmu2f*tz;
 
           if (newton_pair || j < nlocal) {
             torque[j][0] -= vxmu2f*tx;
             torque[j][1] -= vxmu2f*ty;
             torque[j][2] -= vxmu2f*tz;
           }
         }
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    This computes R_{FE}*E , where E is the rate of strain of tensor which is
    known apriori, as it depends only on the known fluid velocity.
    So, this part of the hydrodynamic interaction can be pre computed and
    transferred to the RHS
    ---------------------------------------------------------------------- */
 
 void PairLubricateU::compute_RE(double **x)
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,fx,fy,fz,tx,ty,tz;
   double rsq,r,h_sep,radi;
   double vr1,vr2,vr3,vnnr,vn1,vn2,vn3;
   double vt1,vt2,vt3;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   double **f = atom->f;
   double **torque = atom->torque;
   double *radius = atom->radius;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   int newton_pair = force->newton_pair;
 
   double vxmu2f = force->vxmu2f;
   double xl[3],a_sq,a_sh;
 
   if (!flagHI) return;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     radi = radius[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     // No contribution from isotropic terms due to E
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r = sqrt(rsq);
 
         // loc of the point of closest approach on particle i from its center
 
         xl[0] = -delx/r*radi;
         xl[1] = -dely/r*radi;
         xl[2] = -delz/r*radi;
 
         // Find the scalar resistances a_sq and a_sh
 
         h_sep = r - 2.0*radi;
 
         // If less than the minimum gap use the minimum gap instead
 
         if (r < cut_inner[itype][jtype])
           h_sep = cut_inner[itype][jtype] - 2.0*radi;
 
         // Scale h_sep by radi
 
         h_sep = h_sep/radi;
 
         // Scalar resistance for Squeeze type motions
 
         if (flaglog)
           a_sq = 6*MY_PI*mu*radi*(1.0/4.0/h_sep + 9.0/40.0*log(1/h_sep));
         else
           a_sq = 6*MY_PI*mu*radi*(1.0/4.0/h_sep);
 
         // Scalar resistance for Shear type motions
 
         if (flaglog) {
           a_sh = 6*MY_PI*mu*radi*(1.0/6.0*log(1/h_sep));
         }
 
         // Relative velocity at the point of closest approach due to Ef only
 
         vr1 = -2.0*(Ef[0][0]*xl[0] + Ef[0][1]*xl[1] + Ef[0][2]*xl[2]);
         vr2 = -2.0*(Ef[1][0]*xl[0] + Ef[1][1]*xl[1] + Ef[1][2]*xl[2]);
         vr3 = -2.0*(Ef[2][0]*xl[0] + Ef[2][1]*xl[1] + Ef[2][2]*xl[2]);
 
         // Normal component (vr.n)n
 
         vnnr = (vr1*delx + vr2*dely + vr3*delz)/r;
         vn1 = vnnr*delx/r;
         vn2 = vnnr*dely/r;
         vn3 = vnnr*delz/r;
 
         // Tangential component vr - (vr.n)n
 
         vt1 = vr1 - vn1;
         vt2 = vr2 - vn2;
         vt3 = vr3 - vn3;
 
         // Find force due to squeeze type motion
 
         fx  = a_sq*vn1;
         fy  = a_sq*vn2;
         fz  = a_sq*vn3;
 
         // Find force due to all shear kind of motions
 
         if (flaglog) {
           fx = fx + a_sh*vt1;
           fy = fy + a_sh*vt2;
           fz = fz + a_sh*vt3;
         }
 
         // Scale forces to obtain in appropriate units
 
         fx = vxmu2f*fx;
         fy = vxmu2f*fy;
         fz = vxmu2f*fz;
 
         // Add to the total forc
 
         f[i][0] -= fx;
         f[i][1] -= fy;
         f[i][2] -= fz;
 
         if (newton_pair || j < nlocal) {
           f[j][0] += fx;
           f[j][1] += fy;
           f[j][2] += fz;
         }
 
         // Find torque due to this force
 
         if (flaglog) {
           tx = xl[1]*fz - xl[2]*fy;
           ty = xl[2]*fx - xl[0]*fz;
           tz = xl[0]*fy - xl[1]*fx;
 
           // Why a scale factor ?
 
           torque[i][0] -= vxmu2f*tx;
           torque[i][1] -= vxmu2f*ty;
           torque[i][2] -= vxmu2f*tz;
 
           if (newton_pair || j < nlocal) {
             torque[j][0] -= vxmu2f*tx;
             torque[j][1] -= vxmu2f*ty;
             torque[j][2] -= vxmu2f*tz;
           }
         }
       }
     }
   }
 }
 
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairLubricateU::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   setflag = memory->create(setflag,n+1,n+1,"pair:setflag");
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   cutsq = memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
   memory->create(cut,n+1,n+1,"pair:cut");
   memory->create(cut_inner,n+1,n+1,"pair:cut_inner");
 }
 
 /*-----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairLubricateU::settings(int narg, char **arg)
 {
   if (narg != 5 && narg != 7) error->all(FLERR,"Illegal pair_style command");
 
   mu = force->numeric(FLERR,arg[0]);
   flaglog = force->inumeric(FLERR,arg[1]);
   cut_inner_global = force->numeric(FLERR,arg[2]);
   cut_global = force->numeric(FLERR,arg[3]);
   gdot =  force->numeric(FLERR,arg[4]);
 
   flagHI = flagVF = 1;
   if (narg == 7) {
     flagHI = force->inumeric(FLERR,arg[5]);
     flagVF = force->inumeric(FLERR,arg[6]);
   }
 
   if (flaglog == 1 && flagHI == 0) {
     error->warning(FLERR,"Cannot include log terms without 1/r terms; "
                    "setting flagHI to 1.");
     flagHI = 1;
   }
 
   // reset cutoffs that have been explicitly set
 
   if (allocated) {
     int i,j;
     for (i = 1; i <= atom->ntypes; i++)
       for (j = i+1; j <= atom->ntypes; j++)
         if (setflag[i][j]) {
           cut_inner[i][j] = cut_inner_global;
           cut[i][j] = cut_global;
         }
   }
 
   // store the rate of strain tensor
 
   Ef[0][0] = 0.0;
   Ef[0][1] = 0.5*gdot;
   Ef[0][2] = 0.0;
   Ef[1][0] = 0.5*gdot;
   Ef[1][1] = 0.0;
   Ef[1][2] = 0.0;
   Ef[2][0] = 0.0;
   Ef[2][1] = 0.0;
   Ef[2][2] = 0.0;
 }
 
 /*-----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairLubricateU::coeff(int narg, char **arg)
 {
   if (narg != 2 && narg != 4)
     error->all(FLERR,"Incorrect args for pair coefficients");
 
   if (!allocated) allocate();
 
   int ilo,ihi,jlo,jhi;
   force->bounds(arg[0],atom->ntypes,ilo,ihi);
   force->bounds(arg[1],atom->ntypes,jlo,jhi);
 
   double cut_inner_one = cut_inner_global;
   double cut_one = cut_global;
   if (narg == 4) {
     cut_inner_one = force->numeric(FLERR,arg[2]);
     cut_one = force->numeric(FLERR,arg[3]);
   }
 
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
     for (int j = MAX(jlo,i); j <= jhi; j++) {
       cut_inner[i][j] = cut_inner_one;
       cut[i][j] = cut_one;
       setflag[i][j] = 1;
       count++;
     }
   }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairLubricateU::init_style()
 {
   if (!atom->sphere_flag)
     error->all(FLERR,"Pair lubricateU requires atom style sphere");
   if (comm->ghost_velocity == 0)
     error->all(FLERR,"Pair lubricateU requires ghost atoms store velocity");
 
-  neighbor->request(this);
+  neighbor->request(this,instance_me);
 
   // require that atom radii are identical within each type
   // require monodisperse system with same radii for all types
 
   double radtype;
   for (int i = 1; i <= atom->ntypes; i++) {
     if (!atom->radius_consistency(i,radtype))
       error->all(FLERR,"Pair lubricateU requires monodisperse particles");
     if (i > 1 && radtype != rad)
       error->all(FLERR,"Pair lubricateU requires monodisperse particles");
   }
 
   // check for fix deform, if exists it must use "remap v"
   // If box will change volume, set appropriate flag so that volume
   // and v.f. corrections are re-calculated at every step.
   //
   // If available volume is different from box volume
   // due to walls, set volume appropriately; if walls will
   // move, set appropriate flag so that volume and v.f. corrections
   // are re-calculated at every step.
 
   flagdeform = flagwall = 0;
   for (int i = 0; i < modify->nfix; i++){
     if (strcmp(modify->fix[i]->style,"deform") == 0)
       flagdeform = 1;
     else if (strstr(modify->fix[i]->style,"wall") != NULL) {
       if (flagwall) 
         error->all(FLERR,
                    "Cannot use multiple fix wall commands with "
                    "pair lubricateU");
       flagwall = 1; // Walls exist
       wallfix = (FixWall *) modify->fix[i];
       if (wallfix->xflag) flagwall = 2; // Moving walls exist
     }
   }
 
   // set the isotropic constants depending on the volume fraction
   // vol_T = total volumeshearing = flagdeform = flagwall = 0;
   double vol_T, wallcoord;
     if (!flagwall) vol_T = domain->xprd*domain->yprd*domain->zprd;
   else {
     double wallhi[3], walllo[3];
     for (int j = 0; j < 3; j++){
       wallhi[j] = domain->prd[j];
       walllo[j] = 0;
     }
     for (int m = 0; m < wallfix->nwall; m++){
       int dim = wallfix->wallwhich[m] / 2;
       int side = wallfix->wallwhich[m] % 2;
       if (wallfix->xstyle[m] == VARIABLE){
         wallfix->xindex[m] = input->variable->find(wallfix->xstr[m]);
         //Since fix->wall->init happens after pair->init_style
         wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
       }
 
       else wallcoord = wallfix->coord0[m];
 
       if (side == 0) walllo[dim] = wallcoord;
       else wallhi[dim] = wallcoord;
     }
     vol_T = (wallhi[0] - walllo[0]) * (wallhi[1] - walllo[1]) *
       (wallhi[2] - walllo[2]);
   }
 
 
   // assuming monodisperse spheres, vol_P = volume of the particles
 
   double tmp = 0.0;
   if (atom->radius) tmp = atom->radius[0];
   MPI_Allreduce(&tmp,&rad,1,MPI_DOUBLE,MPI_MAX,world);
 
   vol_P = atom->natoms * (4.0/3.0)*MY_PI*pow(rad,3.0);
 
   // vol_f = volume fraction
 
   double vol_f = vol_P/vol_T;
 
   if (!flagVF) vol_f = 0;
 
   // set the isotropic constant
 
   if (flaglog == 0) {
     R0  = 6*MY_PI*mu*rad*(1.0 + 2.16*vol_f);
     RT0 = 8*MY_PI*mu*pow(rad,3.0);  // not actually needed
     RS0 = 20.0/3.0*MY_PI*mu*pow(rad,3.0)*(1.0 + 3.33*vol_f + 2.80*vol_f*vol_f);
   } else {
     R0  = 6*MY_PI*mu*rad*(1.0 + 2.725*vol_f - 6.583*vol_f*vol_f);
     RT0 = 8*MY_PI*mu*pow(rad,3.0)*(1.0 + 0.749*vol_f - 2.469*vol_f*vol_f);
     RS0 = 20.0/3.0*MY_PI*mu*pow(rad,3.0)*(1.0 + 3.64*vol_f - 6.95*vol_f*vol_f);
   }
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairLubricateU::init_one(int i, int j)
 {
   if (setflag[i][j] == 0) {
     cut_inner[i][j] = mix_distance(cut_inner[i][i],cut_inner[j][j]);
     cut[i][j] = mix_distance(cut[i][i],cut[j][j]);
   }
 
   cut_inner[j][i] = cut_inner[i][j];
 
   return cut[i][j];
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairLubricateU::write_restart(FILE *fp)
 {
   write_restart_settings(fp);
 
   int i,j;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       fwrite(&setflag[i][j],sizeof(int),1,fp);
       if (setflag[i][j]) {
         fwrite(&cut_inner[i][j],sizeof(double),1,fp);
         fwrite(&cut[i][j],sizeof(double),1,fp);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
    proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairLubricateU::read_restart(FILE *fp)
 {
   read_restart_settings(fp);
   allocate();
 
   int i,j;
   int me = comm->me;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
       MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
       if (setflag[i][j]) {
         if (me == 0) {
           fread(&cut_inner[i][j],sizeof(double),1,fp);
           fread(&cut[i][j],sizeof(double),1,fp);
         }
         MPI_Bcast(&cut_inner[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&cut[i][j],1,MPI_DOUBLE,0,world);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairLubricateU::write_restart_settings(FILE *fp)
 {
   fwrite(&mu,sizeof(double),1,fp);
   fwrite(&flaglog,sizeof(int),1,fp);
   fwrite(&cut_inner_global,sizeof(double),1,fp);
   fwrite(&cut_global,sizeof(double),1,fp);
   fwrite(&offset_flag,sizeof(int),1,fp);
   fwrite(&mix_flag,sizeof(int),1,fp);
   fwrite(&flagHI,sizeof(int),1,fp);
   fwrite(&flagVF,sizeof(int),1,fp);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairLubricateU::read_restart_settings(FILE *fp)
 {
   int me = comm->me;
   if (me == 0) {
     fread(&mu,sizeof(double),1,fp);
     fread(&flaglog,sizeof(int),1,fp);
     fread(&cut_inner_global,sizeof(double),1,fp);
     fread(&cut_global,sizeof(double),1,fp);
     fread(&offset_flag,sizeof(int),1,fp);
     fread(&mix_flag,sizeof(int),1,fp);
     fread(&flagHI,sizeof(int),1,fp);
     fread(&flagVF,sizeof(int),1,fp);
   }
   MPI_Bcast(&mu,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&flaglog,1,MPI_INT,0,world);
   MPI_Bcast(&cut_inner_global,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&cut_global,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&offset_flag,1,MPI_INT,0,world);
   MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
   MPI_Bcast(&flagHI,1,MPI_INT,0,world);
   MPI_Bcast(&flagVF,1,MPI_INT,0,world);
 }
 
 /*---------------------------------------------------------------------------*/
 
 void PairLubricateU::copy_vec_uo(int inum, double *xcg,
                                  double **v, double **omega)
 {
   int i,j,ii;
   int *ilist = list->ilist;
 
   for (ii=0;ii<inum;ii++) {
     i = ilist[ii];
 
     for (j=0;j<3;j++) {
       v[i][j] = xcg[6*ii+j];
       omega[i][j] = xcg[6*ii+j+3];
     }
   }
 }
 
 /*---------------------------------------------------------------------------*/
 
 void PairLubricateU::copy_uo_vec(int inum, double **f, double **torque,
                                  double *RU)
 {
   int i,j,ii;
   int *ilist;
 
   ilist = list->ilist;
 
   for (ii=0;ii<inum;ii++) {
     i = ilist[ii];
     for (j=0;j<3;j++) {
       RU[6*ii+j] = f[i][j];
       RU[6*ii+j+3] = torque[i][j];
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 int PairLubricateU::pack_forward_comm(int n, int *list, double *buf,
                                       int pbc_flag, int *pbc)
 {
   int i,j,m;
 
   double **v = atom->v;
   double **omega = atom->omega;
 
   m = 0;
   for (i = 0; i < n; i++) {
     j = list[i];
     buf[m++] = v[j][0];
     buf[m++] = v[j][1];
     buf[m++] = v[j][2];
     buf[m++] = omega[j][0];
     buf[m++] = omega[j][1];
     buf[m++] = omega[j][2];
   }
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLubricateU::unpack_forward_comm(int n, int first, double *buf)
 {
   int i,m,last;
 
   double **v = atom->v;
   double **omega = atom->omega;
 
   m = 0;
   last = first + n;
   for (i = first; i < last; i++) {
     v[i][0] = buf[m++];
     v[i][1] = buf[m++];
     v[i][2] = buf[m++];
     omega[i][0] = buf[m++];
     omega[i][1] = buf[m++];
     omega[i][2] = buf[m++];
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairLubricateU::dot_vec_vec(int N, double *x, double *y)
 {
   double dotp=0.0;
   for (int i = 0; i < N; i++) dotp += x[i]*y[i];
   return dotp;
 }
diff --git a/src/FLD/pair_lubricateU_poly.cpp b/src/FLD/pair_lubricateU_poly.cpp
index f78d14952..02734c058 100644
--- a/src/FLD/pair_lubricateU_poly.cpp
+++ b/src/FLD/pair_lubricateU_poly.cpp
@@ -1,1234 +1,1234 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing authors: Amit Kumar and Michael Bybee (UIUC)
                          Pieter in 't Veld (BASF), code restructuring
                          Dave Heine (Corning), polydispersity
 ------------------------------------------------------------------------- */
 
 #include "mpi.h"
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_lubricateU_poly.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "domain.h"
 #include "update.h"
 #include "modify.h"
 #include "fix.h"
 #include "fix_deform.h"
 #include "fix_wall.h"
 #include "input.h"
 #include "variable.h"
 #include "math_const.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
 #define TOL 1E-3   // tolerance for conjugate gradient
 
 // same as fix_wall.cpp
 
 enum{EDGE,CONSTANT,VARIABLE};
 
 
 /* ---------------------------------------------------------------------- */
 
 PairLubricateUPoly::PairLubricateUPoly(LAMMPS *lmp) :
   PairLubricateU(lmp) {}
 
 /* ----------------------------------------------------------------------
    It first has to solve for the velocity of the particles such that
    the net force on the particles is zero. NOTE: it has to be the last
    type of pair interaction specified in the input file. Also, it
    assumes that no other types of interactions, like k-space, is
    present. As already mentioned, the net force on the particles after
    this pair interaction would be identically zero.
    ---------------------------------------------------------------------- */
 
 void PairLubricateUPoly::compute(int eflag, int vflag)
 {
   int i,j;
 
   int nlocal = atom->nlocal;
   int nghost = atom->nghost;
   int nall = nlocal + nghost;
 
   double **x = atom->x;
   double **f = atom->f;
   double **torque = atom->torque;
 
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   // grow per-atom arrays if necessary
   // need to be atom->nmax in length
 
   if (atom->nmax > nmax) {
     memory->destroy(fl);
     memory->destroy(Tl);
     memory->destroy(xl);
     nmax = atom->nmax;
     memory->create(fl,nmax,3,"pair:fl");
     memory->create(Tl,nmax,3,"pair:Tl");
     memory->create(xl,nmax,3,"pair:xl");
   }
 
   if (6*list->inum > cgmax) {
     memory->sfree(bcg);
     memory->sfree(xcg);
     memory->sfree(rcg);
     memory->sfree(rcg1);
     memory->sfree(pcg);
     memory->sfree(RU);
     cgmax = 6*list->inum;
     memory->create(bcg,cgmax,"pair:bcg");
     memory->create(xcg,cgmax,"pair:bcg");
     memory->create(rcg,cgmax,"pair:bcg");
     memory->create(rcg1,cgmax,"pair:bcg");
     memory->create(pcg,cgmax,"pair:bcg");
     memory->create(RU,cgmax,"pair:bcg");
   }
 
   // Added to implement midpoint integration scheme
   // Save force, torque found so far. Also save the positions
 
   for (i=0;i<nlocal+nghost;i++) {
     for (j=0;j<3;j++) {
       fl[i][j] = f[i][j];
       Tl[i][j] = torque[i][j];
       xl[i][j] = x[i][j];
     }
   }
 
   // Stage one of Midpoint method
   // Solve for velocities based on intial positions
 
   iterate(atom->x,1);
 
   // Find positions at half the timestep and store in xl
 
   intermediates(nall,xl);
 
   // Store back the saved forces and torques in original arrays
 
   for(i=0;i<nlocal+nghost;i++) {
     for(j=0;j<3;j++) {
       f[i][j] = fl[i][j];
       torque[i][j] = Tl[i][j];
     }
   }
 
   // Stage two: This will give the final velocities
 
   iterate(xl,2);
 }
 
 /* ------------------------------------------------------------------------
    Stage one of midpoint method
 ------------------------------------------------------------------------- */
 
 void PairLubricateUPoly::iterate(double **x, int stage)
 {
   int i,j,ii;
 
   int inum = list->inum;
   int *ilist = list->ilist;
   int newton_pair = force->newton_pair;
 
   double alpha,beta;
   double normi,error,normig;
   double send[2],recv[2],rcg_dot_rcg;
 
   double **v = atom->v;
   double **f = atom->f;
   double **omega = atom->omega;
   double **torque = atom->torque;
 
   // First compute R_FE*E
 
   compute_RE(x);
 
   // Reverse communication of forces and torques to
   // accumulate the net force on each of the particles
 
   if (newton_pair) comm->reverse_comm();
 
   // CONJUGATE GRADIENT
   // Find the right hand side= -ve of all forces/torques
   // b = 6*Npart in overall size
 
   for(ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     for (j = 0; j < 3; j++) {
       bcg[6*ii+j] = -f[i][j];
       bcg[6*ii+j+3] = -torque[i][j];
     }
   }
 
   // Start solving the equation : F^H = -F^P -F^B - F^H_{Ef}
   // Store initial guess for velocity and angular-velocities/angular momentum
   // NOTE velocities and angular velocities are assumed relative to the fluid
 
   for (ii=0;ii<inum;ii++)
     for (j=0;j<3;j++) {
       xcg[6*ii+j] = 0.0;
       xcg[6*ii+j+3] = 0.0;
     }
 
   // Copy initial guess to the global arrays to be acted upon by R_{FU}
   // and returned by f and torque arrays
 
   copy_vec_uo(inum,xcg,v,omega);
 
   // set velocities for ghost particles
 
   comm->forward_comm_pair(this);
 
   // Find initial residual
 
   compute_RU(x);
 
   // reverse communication of forces and torques
 
   if (newton_pair) comm->reverse_comm();
 
   copy_uo_vec(inum,f,torque,RU);
 
   for (i=0;i<6*inum;i++)
     rcg[i] = bcg[i] - RU[i];
 
   // Set initial conjugate direction
 
   for (i=0;i<6*inum;i++)
     pcg[i] = rcg[i];
 
   // Find initial norm of the residual or norm of the RHS (either is fine)
 
   normi = dot_vec_vec(6*inum,bcg,bcg);
 
   MPI_Allreduce(&normi,&normig,1,MPI_DOUBLE,MPI_SUM,world);
 
   // Loop until convergence
 
   do {
     // find R*p
 
     copy_vec_uo(inum,pcg,v,omega);
 
     // set velocities for ghost particles
 
     comm->forward_comm_pair(this);
 
     compute_RU(x);
 
     // reverse communication of forces and torques
 
     if (newton_pair) comm->reverse_comm();
 
     copy_uo_vec(inum,f,torque,RU);
 
     // Find alpha
 
     send[0] = dot_vec_vec(6*inum,rcg,rcg);
     send[1] = dot_vec_vec(6*inum,RU,pcg);
 
     MPI_Allreduce(send,recv,2,MPI_DOUBLE,MPI_SUM,world);
 
     alpha = recv[0]/recv[1];
     rcg_dot_rcg = recv[0];
 
     // Find new x
 
     for (i=0;i<6*inum;i++)
       xcg[i] = xcg[i] + alpha*pcg[i];
 
     // find new residual
 
     for (i=0;i<6*inum;i++)
       rcg1[i] = rcg[i] - alpha*RU[i];
 
     // find beta
 
     send[0] = dot_vec_vec(6*inum,rcg1,rcg1);
 
     MPI_Allreduce(send,recv,1,MPI_DOUBLE,MPI_SUM,world);
 
     beta = recv[0]/rcg_dot_rcg;
 
     // Find new conjugate direction
 
     for (i=0;i<6*inum;i++)
       pcg[i] = rcg1[i] + beta*pcg[i];
 
     for (i=0;i<6*inum;i++)
       rcg[i] = rcg1[i];
 
     // Find relative error
 
     error = sqrt(recv[0]/normig);
 
   } while (error > TOL);
 
 
   // update the final converged velocities in respective arrays
 
   copy_vec_uo(inum,xcg,v,omega);
 
   // set velocities for ghost particles
 
   comm->forward_comm_pair(this);
 
   // compute the viscosity/pressure
 
   if (evflag && stage == 2) compute_Fh(x);
 
   // find actual particle's velocities from relative velocities
   // only non-zero component of fluid's vel : vx=gdot*y and wz=-gdot/2
 
   for (ii=0;ii<inum;ii++) {
     i = ilist[ii];
     v[i][0] = v[i][0] + gdot*x[i][1];
     omega[i][2] = omega[i][2] - gdot/2.0;
   }
 }
 
 /* ------------------------------------------------------------------------
    This function computes the final hydrodynamic force once the
    velocities have converged.
    ------------------------------------------------------------------------- */
 
 void PairLubricateUPoly::compute_Fh(double **x)
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   int *type = atom->type;
   int nlocal = atom->nlocal;
   int nghost = atom->nghost;
   int newton_pair = force->newton_pair;
 
   double xtmp,ytmp,ztmp,delx,dely,delz,fx,fy,fz;
   double rsq,r,h_sep,radi,radj;
   double vr1,vr2,vr3,vnnr,vn1,vn2,vn3;
   double vt1,vt2,vt3;
   double vi[3],vj[3],wi[3],wj[3],xl[3],jl[3],pre[2];
 
   double **v = atom->v;
   double **f = atom->f;
   double **omega = atom->omega;
   double **torque = atom->torque;
   double *radius = atom->radius;
 
   double beta[2][5];
   double vxmu2f = force->vxmu2f;
   double a_sq = 0.0;
   double a_sh = 0.0;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   beta[0][0] = beta[1][0] = beta[1][4] = 0.0;
 
   // This section of code adjusts R0/RT0/RS0 if necessary due to changes
   // in the volume fraction as a result of fix deform or moving walls
 
   double dims[3], wallcoord;
   if (flagVF) // Flag for volume fraction corrections
     if (flagdeform || flagwall == 2){ // Possible changes in volume fraction
       if (flagdeform && !flagwall)
         for (j = 0; j < 3; j++)
           dims[j] = domain->prd[j];
       else if (flagwall == 2 || (flagdeform && flagwall == 1)){
          double wallhi[3], walllo[3];
          for (int j = 0; j < 3; j++){
            wallhi[j] = domain->prd[j];
            walllo[j] = 0;
          }
          for (int m = 0; m < wallfix->nwall; m++){
            int dim = wallfix->wallwhich[m] / 2;
            int side = wallfix->wallwhich[m] % 2;
            if (wallfix->xstyle[m] == VARIABLE){
              wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
            }
            else wallcoord = wallfix->coord0[m];
            if (side == 0) walllo[dim] = wallcoord;
            else wallhi[dim] = wallcoord;
          }
          for (int j = 0; j < 3; j++)
            dims[j] = wallhi[j] - walllo[j];
       }
       double vol_T = dims[0]*dims[1]*dims[2];
       double vol_f = vol_P/vol_T;
       if (flaglog == 0) {
         //R0  = 6*MY_PI*mu*(1.0 + 2.16*vol_f);
         //RT0 = 8*MY_PI*mu;
         RS0 = 20.0/3.0*MY_PI*mu*(1.0 + 3.33*vol_f + 2.80*vol_f*vol_f);
       } else {
         //R0  = 6*MY_PI*mu*(1.0 + 2.725*vol_f - 6.583*vol_f*vol_f);
         //RT0 = 8*MY_PI*mu*(1.0 + 0.749*vol_f - 2.469*vol_f*vol_f);
         RS0 = 20.0/3.0*MY_PI*mu*(1.0 + 3.64*vol_f - 6.95*vol_f*vol_f);
       }
     }
 
   // end of R0 adjustment code
   // Set force to zero which is the final value after this pair interaction
 
   for (i=0;i<nlocal+nghost;i++)
     for (j=0;j<3;j++) {
       f[i][j] = 0.0;
       torque[i][j] = 0.0;
     }
 
   // reverse communication of forces and torques
 
   if (newton_pair) comm->reverse_comm(); // not really needed
 
   // Find additional contribution from the stresslets
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     radi = radius[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
     pre[1] = 8.0*(pre[0] = MY_PI*mu*radi)*radi*radi; // BROKEN?? Should be "+"??
     pre[0] *= 6.0;
 
     // Find the contribution to stress from isotropic RS0
     // Set psuedo force to obtain the required contribution
     // need to set delx  and fy only
 
     fx = 0.0; delx = radi;
     fy = vxmu2f*RS0*pow(radi,3.0)*gdot/2.0/radi; dely = 0.0;
     fz = 0.0; delz = 0.0;
     if (evflag)
       ev_tally_xyz(i,i,nlocal,newton_pair,0.0,0.0,-fx,-fy,-fz,delx,dely,delz);
 
     // Find angular velocity
 
     wi[0] = omega[i][0];
     wi[1] = omega[i][1];
     wi[2] = omega[i][2];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
       radj = radius[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r = sqrt(rsq);
 
         // Use omega directly if it exists, else angmom
         // angular momentum = I*omega = 2/5 * M*R^2 * omega
 
         wj[0] = omega[j][0];
         wj[1] = omega[j][1];
         wj[2] = omega[j][2];
 
         // loc of the point of closest approach on particle i from its cente
         // POC for j is in opposite direction as for i
 
         xl[0] = -delx/r*radi;
         xl[1] = -dely/r*radi;
         xl[2] = -delz/r*radi;
         jl[0] = delx/r*radj;
         jl[1] = dely/r*radj;
         jl[2] = delz/r*radj;
 
         h_sep = r - radi-radj;
 
         // velocity at the point of closest approach on both particles
         // v = v + omega_cross_xl
 
         // particle i
 
         vi[0] = v[i][0] + (wi[1]*xl[2] - wi[2]*xl[1]);
         vi[1] = v[i][1] + (wi[2]*xl[0] - wi[0]*xl[2]);
         vi[2] = v[i][2] + (wi[0]*xl[1] - wi[1]*xl[0]);
 
         // particle j
 
         vj[0] = v[j][0] + (wj[1]*jl[2] - wj[2]*jl[1]);
         vj[1] = v[j][1] + (wj[2]*jl[0] - wj[0]*jl[2]);
         vj[2] = v[j][2] + (wj[0]*jl[1] - wj[1]*jl[0]);
 
 
         // Relative  velocity at the point of closest approach
         // include contribution from Einf of the fluid
 
         vr1 = vi[0] - vj[0] -
           2.0*(Ef[0][0]*xl[0] + Ef[0][1]*xl[1] + Ef[0][2]*xl[2]);
         vr2 = vi[1] - vj[1] -
           2.0*(Ef[1][0]*xl[0] + Ef[1][1]*xl[1] + Ef[1][2]*xl[2]);
         vr3 = vi[2] - vj[2] -
           2.0*(Ef[2][0]*xl[0] + Ef[2][1]*xl[1] + Ef[2][2]*xl[2]);
 
         // Normal component (vr.n)n
 
         vnnr = (vr1*delx + vr2*dely + vr3*delz)/r;
         vn1 = vnnr*delx/r;
         vn2 = vnnr*dely/r;
         vn3 = vnnr*delz/r;
 
         // Tangential component vr - (vr.n)n
 
         vt1 = vr1 - vn1;
         vt2 = vr2 - vn2;
         vt3 = vr3 - vn3;
 
         // Find the scalar resistances a_sq, a_sh and a_pu
 
         // If less than the minimum gap use the minimum gap instead
 
         if (r < cut_inner[itype][jtype])
           h_sep = cut_inner[itype][jtype] - radi-radj;
 
         // Scale h_sep by radi
 
         h_sep = h_sep/radi;
         beta[0][1] = radj/radi;
         beta[1][1] = 1.0 + beta[0][1];
 
         /*beta0 = radj/radi;
         beta1 = 1.0 + beta0;*/
 
         // Scalar resistances
 
         if (flaglog) {
           beta[0][2] = beta[0][1]*beta[0][1];
           beta[0][3] = beta[0][2]*beta[0][1];
           beta[0][4] = beta[0][3]*beta[0][1];
           beta[1][2] = beta[1][1]*beta[1][1];
           beta[1][3] = beta[1][2]*beta[1][1];
           double log_h_sep_beta13 = log(1.0/h_sep)/beta[1][3];
           double h_sep_beta11 = h_sep/beta[1][1];
 
           a_sq = pre[0]*(beta[0][2]/beta[1][2]/h_sep
                 +((0.2+1.4*beta[0][1]+0.2*beta[0][2])
                   +(1.0+18.0*(beta[0][1]+beta[0][3])-29.0*beta[0][2]
                     +beta[0][4])*h_sep_beta11/21.0)*log_h_sep_beta13);
 
           a_sh = pre[0]*((8.0*(beta[0][1]+beta[0][3])+4.0*beta[0][2])/15.0
                 +(64.0-180.0*(beta[0][1]+beta[0][3])+232.0*beta[0][2]
                   +64.0*beta[0][4])*h_sep_beta11/375.0)*log_h_sep_beta13;
 
           /*a_sq = beta0*beta0/beta1/beta1/h_sep
                   +(1.0+7.0*beta0+beta0*beta0)/5.0/pow(beta1,3)*log(1.0/h_sep);
           a_sq += (1.0+18.0*beta0-29.0*beta0*beta0+18.0*pow(beta0,3)
                   +pow(beta0,4))/21.0/pow(beta1,4)*h_sep*log(1.0/h_sep);
           a_sq *= 6.0*MY_PI*mu*radi;
 
           a_sh = 4.0*beta0*(2.0+beta0
                   +2.0*beta0*beta0)/15.0/pow(beta1,3)*log(1.0/h_sep);
           a_sh += 4.0*(16.0-45.0*beta0+58.0*beta0*beta0-45.0*pow(beta0,3)
                   +16.0*pow(beta0,4))/375.0/pow(beta1,4)*h_sep*log(1.0/h_sep);
           a_sh *= 6.0*MY_PI*mu*radi;*/
         } else {
           //a_sq = 6.0*MY_PI*mu*radi*(beta0*beta0/beta1/beta1/h_sep);
           a_sq = pre[0]*(beta[0][1]*beta[0][1]/(beta[1][1]*beta[1][1]*h_sep));
         }
 
         // Find force due to squeeze type motion
 
         fx  = a_sq*vn1;
         fy  = a_sq*vn2;
         fz  = a_sq*vn3;
 
         // Find force due to all shear kind of motions
 
         if (flaglog) {
           fx = fx + a_sh*vt1;
           fy = fy + a_sh*vt2;
           fz = fz + a_sh*vt3;
         }
 
         // Scale forces to obtain in appropriate units
 
         fx = vxmu2f*fx;
         fy = vxmu2f*fy;
         fz = vxmu2f*fz;
 
         // set j = nlocal so that only I gets tallied
 
         if (evflag) ev_tally_xyz(i,nlocal,nlocal,0,
                                  0.0,0.0,-fx,-fy,-fz,delx,dely,delz);
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
   computes R_FU * U
 ---------------------------------------------------------------------- */
 
 void PairLubricateUPoly::compute_RU(double **x)
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   int *type = atom->type;
   int nlocal = atom->nlocal;
   int nghost = atom->nghost;
 
   double xtmp,ytmp,ztmp,delx,dely,delz,fx,fy,fz,tx,ty,tz;
   double rsq,r,radi,radj,h_sep;
   double vr1,vr2,vr3,vnnr,vn1,vn2,vn3;
   double vt1,vt2,vt3,wdotn,wt1,wt2,wt3;
   double vi[3],vj[3],wi[3],wj[3],xl[3],jl[3],pre[2];
 
   double **v = atom->v;
   double **f = atom->f;
   double **omega = atom->omega;
   double **torque = atom->torque;
   double *radius = atom->radius;
 
   double beta[2][5];
   double vxmu2f = force->vxmu2f;
   double a_sq = 0.0;
   double a_sh = 0.0;
   double a_pu = 0.0;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   beta[0][0] = beta[1][0] = beta[1][4] = 0.0;
 
  // This section of code adjusts R0/RT0/RS0 if necessary due to changes
   // in the volume fraction as a result of fix deform or moving walls
 
   double dims[3], wallcoord;
   if (flagVF) // Flag for volume fraction corrections
     if (flagdeform || flagwall == 2){ // Possible changes in volume fraction
       if (flagdeform && !flagwall)
         for (j = 0; j < 3; j++)
           dims[j] = domain->prd[j];
       else if (flagwall == 2 || (flagdeform && flagwall == 1)){
          double wallhi[3], walllo[3];
          for (j = 0; j < 3; j++){
            wallhi[j] = domain->prd[j];
            walllo[j] = 0;
          }
          for (int m = 0; m < wallfix->nwall; m++){
            int dim = wallfix->wallwhich[m] / 2;
            int side = wallfix->wallwhich[m] % 2;
            if (wallfix->xstyle[m] == VARIABLE){
              wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
            }
            else wallcoord = wallfix->coord0[m];
            if (side == 0) walllo[dim] = wallcoord;
            else wallhi[dim] = wallcoord;
          }
          for (j = 0; j < 3; j++)
            dims[j] = wallhi[j] - walllo[j];
       }
       double vol_T = dims[0]*dims[1]*dims[2];
       double vol_f = vol_P/vol_T;
       if (flaglog == 0) {
         R0  = 6*MY_PI*mu*(1.0 + 2.16*vol_f);
         RT0 = 8*MY_PI*mu;
         //        RS0 = 20.0/3.0*MY_PI*mu*(1.0 + 3.33*vol_f + 2.80*vol_f*vol_f);
       } else {
         R0  = 6*MY_PI*mu*(1.0 + 2.725*vol_f - 6.583*vol_f*vol_f);
         RT0 = 8*MY_PI*mu*(1.0 + 0.749*vol_f - 2.469*vol_f*vol_f);
         //        RS0 = 20.0/3.0*MY_PI*mu*(1.0 + 3.64*vol_f - 6.95*vol_f*vol_f);
       }
     }
 
   // end of R0 adjustment code
 
   // Initialize f to zero
 
   for (i=0;i<nlocal+nghost;i++)
     for (j=0;j<3;j++) {
       f[i][j] = 0.0;
       torque[i][j] = 0.0;
     }
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     radi = radius[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
     pre[1] = 8.0*(pre[0] = MY_PI*mu*radi)*radi*radi;
     pre[0] *= 6.0;
 
     // Find angular velocity
 
     wi[0] = omega[i][0];
     wi[1] = omega[i][1];
     wi[2] = omega[i][2];
 
     // Contribution due to the isotropic terms
 
     f[i][0] += -vxmu2f*R0*radi*v[i][0];
     f[i][1] += -vxmu2f*R0*radi*v[i][1];
     f[i][2] += -vxmu2f*R0*radi*v[i][2];
 
     const double radi3 = radi*radi*radi;
     torque[i][0] += -vxmu2f*RT0*radi3*wi[0];
     torque[i][1] += -vxmu2f*RT0*radi3*wi[1];
     torque[i][2] += -vxmu2f*RT0*radi3*wi[2];
 
     if (!flagHI) continue;
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
       radj = radius[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r = sqrt(rsq);
 
         wj[0] = omega[j][0];
         wj[1] = omega[j][1];
         wj[2] = omega[j][2];
 
         // loc of the point of closest approach on particle i from its center
 
         xl[0] = -delx/r*radi;
         xl[1] = -dely/r*radi;
         xl[2] = -delz/r*radi;
         jl[0] = delx/r*radj;
         jl[1] = dely/r*radj;
         jl[2] = delz/r*radj;
 
         // velocity at the point of closest approach on both particles
         // v = v + omega_cross_xl
 
         // particle i
 
         vi[0] = v[i][0] + (wi[1]*xl[2] - wi[2]*xl[1]);
         vi[1] = v[i][1] + (wi[2]*xl[0] - wi[0]*xl[2]);
         vi[2] = v[i][2] + (wi[0]*xl[1] - wi[1]*xl[0]);
 
         // particle j
 
         vj[0] = v[j][0] + (wj[1]*jl[2] - wj[2]*jl[1]);
         vj[1] = v[j][1] + (wj[2]*jl[0] - wj[0]*jl[2]);
         vj[2] = v[j][2] + (wj[0]*jl[1] - wj[1]*jl[0]);
 
         // Find the scalar resistances a_sq and a_sh
 
         h_sep = r - radi-radj;
 
         // If less than the minimum gap use the minimum gap instead
 
         if (r < cut_inner[itype][jtype])
           h_sep = cut_inner[itype][jtype] - radi-radj;
 
         // Scale h_sep by radi
 
         h_sep = h_sep/radi;
         beta[0][1] = radj/radi;
         beta[1][1] = 1.0 + beta[0][1];
 
         // Scalar resistances
 
         if (flaglog) {
           beta[0][2] = beta[0][1]*beta[0][1];
           beta[0][3] = beta[0][2]*beta[0][1];
           beta[0][4] = beta[0][3]*beta[0][1];
           beta[1][2] = beta[1][1]*beta[1][1];
           beta[1][3] = beta[1][2]*beta[1][1];
           double log_h_sep_beta13 = log(1.0/h_sep)/beta[1][3];
           double h_sep_beta11 = h_sep/beta[1][1];
 
           a_sq = pre[0]*(beta[0][2]/beta[1][2]/h_sep
                 +((0.2+1.4*beta[0][1]+0.2*beta[0][2])
                   +(1.0+18.0*(beta[0][1]+beta[0][3])-29.0*beta[0][2]
                     +beta[0][4])*h_sep_beta11/21.0)*log_h_sep_beta13);
 
           a_sh = pre[0]*((8.0*(beta[0][1]+beta[0][3])+4.0*beta[0][2])/15.0
                 +(64.0-180.0*(beta[0][1]+beta[0][3])+232.0*beta[0][2]
                   +64.0*beta[0][4])*h_sep_beta11/375.0)*log_h_sep_beta13;
 
           a_pu = pre[1]*((0.4*beta[0][1]+0.1*beta[0][2])*beta[1][1]
                 +(0.128-0.132*beta[0][1]+0.332*beta[0][2]
                   +0.172*beta[0][3])*h_sep)*log_h_sep_beta13;
 
           /*//a_sq = 6*MY_PI*mu*radi*(1.0/4.0/h_sep + 9.0/40.0*log(1/h_sep));
           a_sq = beta0*beta0/beta1/beta1/h_sep
                   +(1.0+7.0*beta0+beta0*beta0)/5.0/pow(beta1,3)*log(1.0/h_sep);
           a_sq += (1.0+18.0*beta0-29.0*beta0*beta0+18.0*pow(beta0,3)
                   +pow(beta0,4))/21.0/pow(beta1,4)*h_sep*log(1.0/h_sep);
           a_sq *= 6.0*MY_PI*mu*radi;
 
           a_sh = 4.0*beta0*(2.0+beta0
                   +2.0*beta0*beta0)/15.0/pow(beta1,3)*log(1.0/h_sep);
           a_sh += 4.0*(16.0-45.0*beta0+58.0*beta0*beta0-45.0*pow(beta0,3)
                   +16.0*pow(beta0,4))/375.0/pow(beta1,4)*h_sep*log(1.0/h_sep);
           a_sh *= 6.0*MY_PI*mu*radi;
 
           a_pu = beta0*(4.0+beta0)/10.0/beta1/beta1*log(1.0/h_sep);
           a_pu += (32.0-33.0*beta0+83.0*beta0*beta0
                   +43.0*pow(beta0,3))/250.0/pow(beta1,3)*h_sep*log(1.0/h_sep);
           a_pu *= 8.0*MY_PI*mu*pow(radi,3);*/
         } else
           a_sq = pre[0]*(beta[0][1]*beta[0][1]/(beta[1][1]*beta[1][1]*h_sep));
 
         // Relative  velocity at the point of closest approach
 
         vr1 = vi[0] - vj[0];
         vr2 = vi[1] - vj[1];
         vr3 = vi[2] - vj[2];
 
         // Normal component (vr.n)n
 
         vnnr = (vr1*delx + vr2*dely + vr3*delz)/r;
         vn1 = vnnr*delx/r;
         vn2 = vnnr*dely/r;
         vn3 = vnnr*delz/r;
 
         // Tangential component vr - (vr.n)n
 
         vt1 = vr1 - vn1;
         vt2 = vr2 - vn2;
         vt3 = vr3 - vn3;
 
         // Find force due to squeeze type motion
 
         fx  = a_sq*vn1;
         fy  = a_sq*vn2;
         fz  = a_sq*vn3;
 
         // Find force due to all shear kind of motions
 
         if (flaglog) {
           fx = fx + a_sh*vt1;
           fy = fy + a_sh*vt2;
           fz = fz + a_sh*vt3;
         }
 
         // Scale forces to obtain in appropriate units
 
         fx = vxmu2f*fx;
         fy = vxmu2f*fy;
         fz = vxmu2f*fz;
 
         // Add to the total forc
 
         f[i][0] -= fx;
         f[i][1] -= fy;
         f[i][2] -= fz;
 
         // Find torque due to this force
 
         if (flaglog) {
           tx = xl[1]*fz - xl[2]*fy;
           ty = xl[2]*fx - xl[0]*fz;
           tz = xl[0]*fy - xl[1]*fx;
 
           // Why a scale factor ?
 
           torque[i][0] -= vxmu2f*tx;
           torque[i][1] -= vxmu2f*ty;
           torque[i][2] -= vxmu2f*tz;
 
           // Torque due to a_pu
 
           wdotn = ((wi[0]-wj[0])*delx +
                    (wi[1]-wj[1])*dely + (wi[2]-wj[2])*delz)/r;
           wt1 = (wi[0]-wj[0]) - wdotn*delx/r;
           wt2 = (wi[1]-wj[1]) - wdotn*dely/r;
           wt3 = (wi[2]-wj[2]) - wdotn*delz/r;
 
           tx = a_pu*wt1;
           ty = a_pu*wt2;
           tz = a_pu*wt3;
 
           // add to total
 
           torque[i][0] -= vxmu2f*tx;
           torque[i][1] -= vxmu2f*ty;
           torque[i][2] -= vxmu2f*tz;
         }
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    This computes R_{FE}*E , where E is the rate of strain of tensor which is
    known apriori, as it depends only on the known fluid velocity.
    So, this part of the hydrodynamic interaction can be pre computed and
    transferred to the RHS
    ---------------------------------------------------------------------- */
 
 void PairLubricateUPoly::compute_RE(double **x)
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   int *type = atom->type;
 
   double xtmp,ytmp,ztmp,delx,dely,delz,fx,fy,fz,tx,ty,tz;
   double rsq,r,h_sep,radi,radj;
   //double beta0,beta1,lhsep;
   double vr1,vr2,vr3,vnnr,vn1,vn2,vn3;
   double vt1,vt2,vt3;
   double xl[3],pre[2];
 
   double **f = atom->f;
   double **torque = atom->torque;
   double *radius = atom->radius;
 
   double beta[2][5];
   double vxmu2f = force->vxmu2f;
   double a_sq = 0.0;
   double a_sh = 0.0;
 
   if (!flagHI) return;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   beta[0][0] = beta[1][0] = beta[1][4] = 0.0;
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     radi = radius[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
     pre[1] = 8.0*(pre[0] = MY_PI*mu*radi)*radi*radi;
     pre[0] *= 6.0;
 
     // No contribution from isotropic terms due to E
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
       radj = radius[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r = sqrt(rsq);
 
         // loc of the point of closest approach on particle i from its center
 
         xl[0] = -delx/r*radi;
         xl[1] = -dely/r*radi;
         xl[2] = -delz/r*radi;
 
         // Find the scalar resistances a_sq and a_sh
 
         h_sep = r - radi-radj;
 
         // If less than the minimum gap use the minimum gap instead
 
         if (r < cut_inner[itype][jtype])
           h_sep = cut_inner[itype][jtype] - radi-radj;
 
         // Scale h_sep by radi
 
         h_sep = h_sep/radi;
         beta[0][1] = radj/radi;
         beta[1][1] = 1.0 + beta[0][1];
 
         /*beta0 = radj/radi;
         beta1 = 1.0 + beta0;
         lhsep = log(1.0/h_sep);*/
 
         // Scalar resistance for Squeeze type motions
 
 
         if (flaglog) {
           beta[0][2] = beta[0][1]*beta[0][1];
           beta[0][3] = beta[0][2]*beta[0][1];
           beta[0][4] = beta[0][3]*beta[0][1];
           beta[1][2] = beta[1][1]*beta[1][1];
           beta[1][3] = beta[1][2]*beta[1][1];
           double log_h_sep_beta13 = log(1.0/h_sep)/beta[1][3];
           double h_sep_beta11 = h_sep/beta[1][1];
 
           a_sq = pre[0]*(beta[0][2]/beta[1][2]/h_sep
                 +((0.2+1.4*beta[0][1]+0.2*beta[0][2])
                   +(1.0+18.0*(beta[0][1]+beta[0][3])-29.0*beta[0][2]
                     +beta[0][4])*h_sep_beta11/21.0)*log_h_sep_beta13);
 
           a_sh = pre[0]*((8.0*(beta[0][1]+beta[0][3])+4.0*beta[0][2])/15.0
                 +(64.0-180.0*(beta[0][1]+beta[0][3])+232.0*beta[0][2]
                   +64.0*beta[0][4])*h_sep_beta11/375.0)*log_h_sep_beta13;
         } else
           a_sq = pre[0]*(beta[0][1]*beta[0][1]/(beta[1][1]*beta[1][1]*h_sep));
 
         // Relative velocity at the point of closest approach due to Ef only
 
         vr1 = -2.0*(Ef[0][0]*xl[0] + Ef[0][1]*xl[1] + Ef[0][2]*xl[2]);
         vr2 = -2.0*(Ef[1][0]*xl[0] + Ef[1][1]*xl[1] + Ef[1][2]*xl[2]);
         vr3 = -2.0*(Ef[2][0]*xl[0] + Ef[2][1]*xl[1] + Ef[2][2]*xl[2]);
 
         // Normal component (vr.n)n
 
         vnnr = (vr1*delx + vr2*dely + vr3*delz)/r;
         vn1 = vnnr*delx/r;
         vn2 = vnnr*dely/r;
         vn3 = vnnr*delz/r;
 
         // Tangential component vr - (vr.n)n
 
         vt1 = vr1 - vn1;
         vt2 = vr2 - vn2;
         vt3 = vr3 - vn3;
 
         // Find force due to squeeze type motion
 
         fx  = a_sq*vn1;
         fy  = a_sq*vn2;
         fz  = a_sq*vn3;
 
         // Find force due to all shear kind of motions
 
         if (flaglog) {
           fx = fx + a_sh*vt1;
           fy = fy + a_sh*vt2;
           fz = fz + a_sh*vt3;
         }
 
         // Scale forces to obtain in appropriate units
 
         fx = vxmu2f*fx;
         fy = vxmu2f*fy;
         fz = vxmu2f*fz;
 
         // Add to the total forc
 
         f[i][0] -= fx;
         f[i][1] -= fy;
         f[i][2] -= fz;
 
         // Find torque due to this force
 
         if (flaglog) {
           tx = xl[1]*fz - xl[2]*fy;
           ty = xl[2]*fx - xl[0]*fz;
           tz = xl[0]*fy - xl[1]*fx;
 
           // Why a scale factor ?
 
           torque[i][0] -= vxmu2f*tx;
           torque[i][1] -= vxmu2f*ty;
           torque[i][2] -= vxmu2f*tz;
 
         }
       }
     }
   }
 }
 
 /*-----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairLubricateUPoly::settings(int narg, char **arg)
 {
   if (narg < 5 || narg > 7) error->all(FLERR,"Illegal pair_style command");
 
   mu = force->numeric(FLERR,arg[0]);
   flaglog = force->inumeric(FLERR,arg[1]);
   cut_inner_global = force->numeric(FLERR,arg[2]);
   cut_global = force->numeric(FLERR,arg[3]);
   gdot =  force->numeric(FLERR,arg[4]);
 
   flagHI = flagVF = 1;
   if (narg >= 6) flagHI = force->inumeric(FLERR,arg[5]);
   if (narg == 7) flagVF = force->inumeric(FLERR,arg[6]);
 
   if (flaglog == 1 && flagHI == 0) {
     error->warning(FLERR,"Cannot include log terms without 1/r terms; "
                    "setting flagHI to 1");
     flagHI = 1;
   }
 
   // reset cutoffs that have been explicitly set
 
   if (allocated) {
     int i,j;
     for (i = 1; i <= atom->ntypes; i++)
       for (j = i+1; j <= atom->ntypes; j++)
         if (setflag[i][j]) {
           cut_inner[i][j] = cut_inner_global;
           cut[i][j] = cut_global;
         }
   }
 
   // Store the rate of strain tensor
 
   Ef[0][0] = 0.0;
   Ef[0][1] = 0.5*gdot;
   Ef[0][2] = 0.0;
   Ef[1][0] = 0.5*gdot;
   Ef[1][1] = 0.0;
   Ef[1][2] = 0.0;
   Ef[2][0] = 0.0;
   Ef[2][1] = 0.0;
   Ef[2][2] = 0.0;
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairLubricateUPoly::init_style()
 {
   if (force->newton_pair == 1)
     error->all(FLERR,"Pair lubricateU/poly requires newton pair off");
   if (comm->ghost_velocity == 0)
     error->all(FLERR,
                "Pair lubricateU/poly requires ghost atoms store velocity");
   if (!atom->sphere_flag)
     error->all(FLERR,"Pair lubricate/poly requires atom style sphere");
 
   // insure all particles are finite-size
   // for pair hybrid, should limit test to types using the pair style
 
   double *radius = atom->radius;
   int nlocal = atom->nlocal;
 
   for (int i = 0; i < nlocal; i++)
     if (radius[i] == 0.0)
       error->one(FLERR,"Pair lubricate/poly requires extended particles");
 
   // Set the isotropic constants depending on the volume fraction
 
   // Find the total volume
   // check for fix deform, if exists it must use "remap v"
   // If box will change volume, set appropriate flag so that volume
   // and v.f. corrections are re-calculated at every step.
   //
   // If available volume is different from box volume
   // due to walls, set volume appropriately; if walls will
   // move, set appropriate flag so that volume and v.f. corrections
   // are re-calculated at every step.
 
   flagdeform = flagwall = 0;
   for (int i = 0; i < modify->nfix; i++){
     if (strcmp(modify->fix[i]->style,"deform") == 0)
       flagdeform = 1;
     else if (strstr(modify->fix[i]->style,"wall") != NULL){
       if (flagwall) 
         error->all(FLERR,
                    "Cannot use multiple fix wall commands with "
                    "pair lubricateU");
       flagwall = 1; // Walls exist
       wallfix = (FixWall *) modify->fix[i];
       if (wallfix->xflag) flagwall = 2; // Moving walls exist
     }
   }
 
   // set the isotropic constants depending on the volume fraction
   // vol_T = total volumeshearing = flagdeform = flagwall = 0;
 
   double vol_T, wallcoord;
     if (!flagwall) vol_T = domain->xprd*domain->yprd*domain->zprd;
   else {
     double wallhi[3], walllo[3];
     for (int j = 0; j < 3; j++){
       wallhi[j] = domain->prd[j];
       walllo[j] = 0;
     }
     for (int m = 0; m < wallfix->nwall; m++){
       int dim = wallfix->wallwhich[m] / 2;
       int side = wallfix->wallwhich[m] % 2;
       if (wallfix->xstyle[m] == VARIABLE){
         wallfix->xindex[m] = input->variable->find(wallfix->xstr[m]);
         //Since fix->wall->init happens after pair->init_style
         wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
       }
 
       else wallcoord = wallfix->coord0[m];
 
       if (side == 0) walllo[dim] = wallcoord;
       else wallhi[dim] = wallcoord;
     }
     vol_T = (wallhi[0] - walllo[0]) * (wallhi[1] - walllo[1]) *
       (wallhi[2] - walllo[2]);
   }
 
   // Assuming monodisperse spheres, find the volume of the particles
 
   double volP = 0.0;
   for (int i = 0; i < nlocal; i++)
     volP += (4.0/3.0)*MY_PI*pow(atom->radius[i],3.0);
   MPI_Allreduce(&volP,&vol_P,1,MPI_DOUBLE,MPI_SUM,world);
 
   double vol_f = vol_P/vol_T;
 
   //DRH volume fraction needs to be defined manually
   // if excluded volume regions are present
   //  vol_f=0.5;
 
   if (!flagVF) vol_f = 0;
 
   if (!comm->me) {
     if(logfile)
       fprintf(logfile, "lubricateU: vol_f = %g, vol_p = %g, vol_T = %g\n",
           vol_f,vol_P,vol_T);
     if (screen)
       fprintf(screen, "lubricateU: vol_f = %g, vol_p = %g, vol_T = %g\n",
           vol_f,vol_P,vol_T);
   }
 
   // Set the isotropic constant
 
   if (flaglog == 0) {
     R0  = 6*MY_PI*mu*(1.0 + 2.16*vol_f);
     RT0 = 8*MY_PI*mu;  // Not needed actually
     RS0 = 20.0/3.0*MY_PI*mu*(1.0 + 3.33*vol_f + 2.80*vol_f*vol_f);
   } else {
     R0  = 6*MY_PI*mu*(1.0 + 2.725*vol_f - 6.583*vol_f*vol_f);
     RT0 = 8*MY_PI*mu*(1.0 + 0.749*vol_f - 2.469*vol_f*vol_f);
     RS0 = 20.0/3.0*MY_PI*mu*(1.0 + 3.64*vol_f - 6.95*vol_f*vol_f);
   }
 
-  int irequest = neighbor->request(this);
+  int irequest = neighbor->request(this,instance_me);
   neighbor->requests[irequest]->half = 0;
   neighbor->requests[irequest]->full = 1;
 }
diff --git a/src/FLD/pair_lubricate_poly.cpp b/src/FLD/pair_lubricate_poly.cpp
index 5d1b5b8da..937c5a315 100644
--- a/src/FLD/pair_lubricate_poly.cpp
+++ b/src/FLD/pair_lubricate_poly.cpp
@@ -1,555 +1,555 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing authors: Randy Schunk (SNL)
                          Amit Kumar and Michael Bybee (UIUC)
                          Dave Heine (Corning), polydispersity
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_lubricate_poly.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "domain.h"
 #include "modify.h"
 #include "fix.h"
 #include "fix_deform.h"
 #include "memory.h"
 #include "random_mars.h"
 #include "fix_wall.h"
 #include "input.h"
 #include "variable.h"
 #include "math_const.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
 // same as fix_deform.cpp
 
 enum{NO_REMAP,X_REMAP,V_REMAP};
 
 
 // same as fix_wall.cpp
 
 enum{EDGE,CONSTANT,VARIABLE};
 
 /* ---------------------------------------------------------------------- */
 
 PairLubricatePoly::PairLubricatePoly(LAMMPS *lmp) : PairLubricate(lmp)
 {
   no_virial_fdotr_compute = 1;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLubricatePoly::compute(int eflag, int vflag)
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,fx,fy,fz,tx,ty,tz;
   double rsq,r,h_sep,beta0,beta1,radi,radj;
   double vr1,vr2,vr3,vnnr,vn1,vn2,vn3;
   double vt1,vt2,vt3,wt1,wt2,wt3,wdotn;
   double vRS0;
   double vi[3],vj[3],wi[3],wj[3],xl[3],jl[3];
   double a_sq,a_sh,a_pu;
   int *ilist,*jlist,*numneigh,**firstneigh;
   double lamda[3],vstream[3];
 
   double vxmu2f = force->vxmu2f;
 
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   double **x = atom->x;
   double **v = atom->v;
   double **f = atom->f;
   double **omega = atom->omega;
   double **torque = atom->torque;
   double *radius = atom->radius;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   int newton_pair = force->newton_pair;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // subtract streaming component of velocity, omega, angmom
   // assume fluid streaming velocity = box deformation rate
   // vstream = (ux,uy,uz)
   // ux = h_rate[0]*x + h_rate[5]*y + h_rate[4]*z
   // uy = h_rate[1]*y + h_rate[3]*z
   // uz = h_rate[2]*z
   // omega_new = omega - curl(vstream)/2
   // angmom_new = angmom - I*curl(vstream)/2
   // Ef = (grad(vstream) + (grad(vstream))^T) / 2
 
   if (shearing) {
     double *h_rate = domain->h_rate;
     double *h_ratelo = domain->h_ratelo;
 
     for (ii = 0; ii < inum; ii++) {
       i = ilist[ii];
       itype = type[i];
       radi = radius[i];
       domain->x2lamda(x[i],lamda);
       vstream[0] = h_rate[0]*lamda[0] + h_rate[5]*lamda[1] +
         h_rate[4]*lamda[2] + h_ratelo[0];
       vstream[1] = h_rate[1]*lamda[1] + h_rate[3]*lamda[2] + h_ratelo[1];
       vstream[2] = h_rate[2]*lamda[2] + h_ratelo[2];
       v[i][0] -= vstream[0];
       v[i][1] -= vstream[1];
       v[i][2] -= vstream[2];
 
       omega[i][0] += 0.5*h_rate[3];
       omega[i][1] -= 0.5*h_rate[4];
       omega[i][2] += 0.5*h_rate[5];
     }
 
     // set Ef from h_rate in strain units
 
     Ef[0][0] = h_rate[0]/domain->xprd;
     Ef[1][1] = h_rate[1]/domain->yprd;
     Ef[2][2] = h_rate[2]/domain->zprd;
     Ef[0][1] = Ef[1][0] = 0.5 * h_rate[5]/domain->yprd;
     Ef[0][2] = Ef[2][0] = 0.5 * h_rate[4]/domain->zprd;
     Ef[1][2] = Ef[2][1] = 0.5 * h_rate[3]/domain->zprd;
 
     // copy updated omega to the ghost particles
     // no need to do this if not shearing since comm->ghost_velocity is set
 
     comm->forward_comm_pair(this);
   }
 
   // This section of code adjusts R0/RT0/RS0 if necessary due to changes
   // in the volume fraction as a result of fix deform or moving walls
 
   double dims[3], wallcoord;
   if (flagVF) // Flag for volume fraction corrections
     if (flagdeform || flagwall == 2){ // Possible changes in volume fraction
       if (flagdeform && !flagwall)
         for (j = 0; j < 3; j++)
           dims[j] = domain->prd[j];
       else if (flagwall == 2 || (flagdeform && flagwall == 1)){
          double wallhi[3], walllo[3];
          for (int j = 0; j < 3; j++){
            wallhi[j] = domain->prd[j];
            walllo[j] = 0;
          }
          for (int m = 0; m < wallfix->nwall; m++){
            int dim = wallfix->wallwhich[m] / 2;
            int side = wallfix->wallwhich[m] % 2;
            if (wallfix->xstyle[m] == VARIABLE){
              wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
            }
            else wallcoord = wallfix->coord0[m];
            if (side == 0) walllo[dim] = wallcoord;
            else wallhi[dim] = wallcoord;
          }
          for (int j = 0; j < 3; j++)
            dims[j] = wallhi[j] - walllo[j];
       }
       double vol_T = dims[0]*dims[1]*dims[2];
       double vol_f = vol_P/vol_T;
       if (flaglog == 0) {
         R0  = 6*MY_PI*mu*(1.0 + 2.16*vol_f);
         RT0 = 8*MY_PI*mu;
         RS0 = 20.0/3.0*MY_PI*mu*(1.0 + 3.33*vol_f + 2.80*vol_f*vol_f);
       } else {
         R0  = 6*MY_PI*mu*(1.0 + 2.725*vol_f - 6.583*vol_f*vol_f);
         RT0 = 8*MY_PI*mu*(1.0 + 0.749*vol_f - 2.469*vol_f*vol_f);
         RS0 = 20.0/3.0*MY_PI*mu*(1.0 + 3.64*vol_f - 6.95*vol_f*vol_f);
       }
     }
 
 
   // end of R0 adjustment code
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
     radi = radius[i];
 
     // angular velocity
 
     wi[0] = omega[i][0];
     wi[1] = omega[i][1];
     wi[2] = omega[i][2];
 
     // FLD contribution to force and torque due to isotropic terms
     // FLD contribution to stress from isotropic RS0
 
     if (flagfld) {
       f[i][0] -= vxmu2f*R0*radi*v[i][0];
       f[i][1] -= vxmu2f*R0*radi*v[i][1];
       f[i][2] -= vxmu2f*R0*radi*v[i][2];
       const double radi3 = radi*radi*radi;
       torque[i][0] -= vxmu2f*RT0*radi3*wi[0];
       torque[i][1] -= vxmu2f*RT0*radi3*wi[1];
       torque[i][2] -= vxmu2f*RT0*radi3*wi[2];
 
       if (shearing && vflag_either) {
         vRS0 = -vxmu2f * RS0*radi3;
         v_tally_tensor(i,i,nlocal,newton_pair,
                        vRS0*Ef[0][0],vRS0*Ef[1][1],vRS0*Ef[2][2],
                        vRS0*Ef[0][1],vRS0*Ef[0][2],vRS0*Ef[1][2]);
       }
     }
 
     if (!flagHI) continue;
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
       radj = atom->radius[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r = sqrt(rsq);
 
         // angular momentum = I*omega = 2/5 * M*R^2 * omega
 
         wj[0] = omega[j][0];
         wj[1] = omega[j][1];
         wj[2] = omega[j][2];
 
         // xl = point of closest approach on particle i from its center
 
         xl[0] = -delx/r*radi;
         xl[1] = -dely/r*radi;
         xl[2] = -delz/r*radi;
         jl[0] = -delx/r*radj;
         jl[1] = -dely/r*radj;
         jl[2] = -delz/r*radj;
 
         // velocity at the point of closest approach on both particles
         // v = v + omega_cross_xl - Ef.xl
 
         // particle i
 
         vi[0] = v[i][0] + (wi[1]*xl[2] - wi[2]*xl[1])
                         - (Ef[0][0]*xl[0] + Ef[0][1]*xl[1] + Ef[0][2]*xl[2]);
 
         vi[1] = v[i][1] + (wi[2]*xl[0] - wi[0]*xl[2])
                         - (Ef[1][0]*xl[0] + Ef[1][1]*xl[1] + Ef[1][2]*xl[2]);
 
         vi[2] = v[i][2] + (wi[0]*xl[1] - wi[1]*xl[0])
                         - (Ef[2][0]*xl[0] + Ef[2][1]*xl[1] + Ef[2][2]*xl[2]);
 
         // particle j
 
         vj[0] = v[j][0] - (wj[1]*jl[2] - wj[2]*jl[1])
                         + (Ef[0][0]*jl[0] + Ef[0][1]*jl[1] + Ef[0][2]*jl[2]);
 
         vj[1] = v[j][1] - (wj[2]*jl[0] - wj[0]*jl[2])
                         + (Ef[1][0]*jl[0] + Ef[1][1]*jl[1] + Ef[1][2]*jl[2]);
 
         vj[2] = v[j][2] - (wj[0]*jl[1] - wj[1]*jl[0])
                         + (Ef[2][0]*jl[0] + Ef[2][1]*jl[1] + Ef[2][2]*jl[2]);
 
         // scalar resistances XA and YA
 
         h_sep = r - radi-radj;
 
         // if less than the minimum gap use the minimum gap instead
 
         if (r < cut_inner[itype][jtype])
           h_sep = cut_inner[itype][jtype] - radi-radj;
 
         // scale h_sep by radi
 
         h_sep = h_sep/radi;
         beta0 = radj/radi;
         beta1 = 1.0 + beta0;
 
         // scalar resistances
 
         if (flaglog) {
           a_sq = beta0*beta0/beta1/beta1/h_sep +
             (1.0+7.0*beta0+beta0*beta0)/5.0/pow(beta1,3.0)*log(1.0/h_sep);
           a_sq += (1.0+18.0*beta0-29.0*beta0*beta0+18.0 *
                    pow(beta0,3.0)+pow(beta0,4.0))/21.0/pow(beta1,4.0) *
             h_sep*log(1.0/h_sep);
           a_sq *= 6.0*MY_PI*mu*radi;
           a_sh = 4.0*beta0*(2.0+beta0+2.0*beta0*beta0)/15.0/pow(beta1,3.0) *
             log(1.0/h_sep);
           a_sh += 4.0*(16.0-45.0*beta0+58.0*beta0*beta0-45.0*pow(beta0,3.0) +
                        16.0*pow(beta0,4.0))/375.0/pow(beta1,4.0) *
             h_sep*log(1.0/h_sep);
           a_sh *= 6.0*MY_PI*mu*radi;
           a_pu = beta0*(4.0+beta0)/10.0/beta1/beta1*log(1.0/h_sep);
           a_pu += (32.0-33.0*beta0+83.0*beta0*beta0+43.0 *
                    pow(beta0,3.0))/250.0/pow(beta1,3.0)*h_sep*log(1.0/h_sep);
           a_pu *= 8.0*MY_PI*mu*pow(radi,3.0);
         } else a_sq = 6.0*MY_PI*mu*radi*(beta0*beta0/beta1/beta1/h_sep);
 
         // relative velocity at the point of closest approach
         // includes fluid velocity
 
         vr1 = vi[0] - vj[0];
         vr2 = vi[1] - vj[1];
         vr3 = vi[2] - vj[2];
 
         // normal component (vr.n)n
 
         vnnr = (vr1*delx + vr2*dely + vr3*delz)/r;
         vn1 = vnnr*delx/r;
         vn2 = vnnr*dely/r;
         vn3 = vnnr*delz/r;
 
         // tangential component vr - (vr.n)n
 
         vt1 = vr1 - vn1;
         vt2 = vr2 - vn2;
         vt3 = vr3 - vn3;
 
         // force due to squeeze type motion
 
         fx  = a_sq*vn1;
         fy  = a_sq*vn2;
         fz  = a_sq*vn3;
 
         // force due to all shear kind of motions
 
         if (flaglog) {
           fx = fx + a_sh*vt1;
           fy = fy + a_sh*vt2;
           fz = fz + a_sh*vt3;
         }
 
         // scale forces for appropriate units
 
         fx *= vxmu2f;
         fy *= vxmu2f;
         fz *= vxmu2f;
 
         // add to total force
 
         f[i][0] -= fx;
         f[i][1] -= fy;
         f[i][2] -= fz;
 
         // torque due to this force
 
         if (flaglog) {
           tx = xl[1]*fz - xl[2]*fy;
           ty = xl[2]*fx - xl[0]*fz;
           tz = xl[0]*fy - xl[1]*fx;
 
           torque[i][0] -= vxmu2f*tx;
           torque[i][1] -= vxmu2f*ty;
           torque[i][2] -= vxmu2f*tz;
 
           // torque due to a_pu
 
           wdotn = ((wi[0]-wj[0])*delx + (wi[1]-wj[1])*dely +
                    (wi[2]-wj[2])*delz)/r;
           wt1 = (wi[0]-wj[0]) - wdotn*delx/r;
           wt2 = (wi[1]-wj[1]) - wdotn*dely/r;
           wt3 = (wi[2]-wj[2]) - wdotn*delz/r;
 
           tx = a_pu*wt1;
           ty = a_pu*wt2;
           tz = a_pu*wt3;
 
           torque[i][0] -= vxmu2f*tx;
           torque[i][1] -= vxmu2f*ty;
           torque[i][2] -= vxmu2f*tz;
 
         }
 
         // set j = nlocal so that only I gets tallied
 
         if (evflag) ev_tally_xyz(i,nlocal,nlocal,0,
                                  0.0,0.0,-fx,-fy,-fz,delx,dely,delz);
       }
     }
   }
 
   // restore streaming component of velocity, omega, angmom
 
   if (shearing) {
     double *h_rate = domain->h_rate;
     double *h_ratelo = domain->h_ratelo;
 
     for (ii = 0; ii < inum; ii++) {
       i = ilist[ii];
       itype = type[i];
       radi = atom->radius[i];
 
       domain->x2lamda(x[i],lamda);
       vstream[0] = h_rate[0]*lamda[0] + h_rate[5]*lamda[1] +
         h_rate[4]*lamda[2] + h_ratelo[0];
       vstream[1] = h_rate[1]*lamda[1] + h_rate[3]*lamda[2] + h_ratelo[1];
       vstream[2] = h_rate[2]*lamda[2] + h_ratelo[2];
       v[i][0] += vstream[0];
       v[i][1] += vstream[1];
       v[i][2] += vstream[2];
 
       omega[i][0] -= 0.5*h_rate[3];
       omega[i][1] += 0.5*h_rate[4];
       omega[i][2] -= 0.5*h_rate[5];
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairLubricatePoly::init_style()
 {
   if (force->newton_pair == 1)
     error->all(FLERR,"Pair lubricate/poly requires newton pair off");
   if (comm->ghost_velocity == 0)
     error->all(FLERR,
                "Pair lubricate/poly requires ghost atoms store velocity");
   if (!atom->sphere_flag)
     error->all(FLERR,"Pair lubricate/poly requires atom style sphere");
 
   // ensure all particles are finite-size
   // for pair hybrid, should limit test to types using the pair style
 
   double *radius = atom->radius;
   int nlocal = atom->nlocal;
 
   for (int i = 0; i < nlocal; i++)
     if (radius[i] == 0.0)
       error->one(FLERR,"Pair lubricate/poly requires extended particles");
 
-  int irequest = neighbor->request(this);
+  int irequest = neighbor->request(this,instance_me);
   neighbor->requests[irequest]->half = 0;
   neighbor->requests[irequest]->full = 1;
 
   // set the isotropic constants that depend on the volume fraction
   // vol_T = total volume
 
   // check for fix deform, if exists it must use "remap v"
   // If box will change volume, set appropriate flag so that volume
   // and v.f. corrections are re-calculated at every step.
 
   // if available volume is different from box volume
   // due to walls, set volume appropriately; if walls will
   // move, set appropriate flag so that volume and v.f. corrections
   // are re-calculated at every step.
 
   shearing = flagdeform = flagwall = 0;
   for (int i = 0; i < modify->nfix; i++){
     if (strcmp(modify->fix[i]->style,"deform") == 0) {
       shearing = flagdeform = 1;
       if (((FixDeform *) modify->fix[i])->remapflag != V_REMAP)
         error->all(FLERR,"Using pair lubricate with inconsistent "
                    "fix deform remap option");
     }
     if (strstr(modify->fix[i]->style,"wall") != NULL) {
       if (flagwall) 
         error->all(FLERR,
                    "Cannot use multiple fix wall commands with "
                    "pair lubricate/poly");
       flagwall = 1; // Walls exist
       wallfix = (FixWall *) modify->fix[i];
       if (wallfix->xflag) flagwall = 2; // Moving walls exist
     }
 
     if (strstr(modify->fix[i]->style,"wall") != NULL){
       flagwall = 1; // Walls exist
       if (((FixWall *) modify->fix[i])->xflag ) {
         flagwall = 2; // Moving walls exist
         wallfix = (FixWall *) modify->fix[i];
       }
     }
   }
 
   double vol_T;
   double wallcoord;
   if (!flagwall) vol_T = domain->xprd*domain->yprd*domain->zprd;
   else {
     double wallhi[3], walllo[3];
     for (int j = 0; j < 3; j++){
       wallhi[j] = domain->prd[j];
       walllo[j] = 0;
     }
     for (int m = 0; m < wallfix->nwall; m++){
       int dim = wallfix->wallwhich[m] / 2;
       int side = wallfix->wallwhich[m] % 2;
       if (wallfix->xstyle[m] == VARIABLE){
         wallfix->xindex[m] = input->variable->find(wallfix->xstr[m]);
         //Since fix->wall->init happens after pair->init_style
         wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
       }
       else wallcoord = wallfix->coord0[m];
 
       if (side == 0) walllo[dim] = wallcoord;
       else wallhi[dim] = wallcoord;
     }
     vol_T = (wallhi[0] - walllo[0]) * (wallhi[1] - walllo[1]) *
       (wallhi[2] - walllo[2]);
   }
 
   double volP = 0.0;
   for (int i = 0; i < nlocal; i++)
     volP += (4.0/3.0)*MY_PI*pow(atom->radius[i],3.0);
   MPI_Allreduce(&volP,&vol_P,1,MPI_DOUBLE,MPI_SUM,world);
 
   double vol_f = vol_P/vol_T;
 
   if (!flagVF) vol_f = 0;
 
   // set isotropic constants
 
   if (flaglog == 0) {
     R0  = 6*MY_PI*mu*(1.0 + 2.16*vol_f);
     RT0 = 8*MY_PI*mu;
     RS0 = 20.0/3.0*MY_PI*mu*(1.0 + 3.33*vol_f + 2.80*vol_f*vol_f);
   } else {
     R0  = 6*MY_PI*mu*(1.0 + 2.725*vol_f - 6.583*vol_f*vol_f);
     RT0 = 8*MY_PI*mu*(1.0 + 0.749*vol_f - 2.469*vol_f*vol_f);
     RS0 = 20.0/3.0*MY_PI*mu*(1.0 + 3.64*vol_f - 6.95*vol_f*vol_f);
   }
 
   // check for fix deform, if exists it must use "remap v"
 
   shearing = 0;
   for (int i = 0; i < modify->nfix; i++)
     if (strcmp(modify->fix[i]->style,"deform") == 0) {
       shearing = 1;
       if (((FixDeform *) modify->fix[i])->remapflag != V_REMAP)
         error->all(FLERR,"Using pair lubricate/poly with inconsistent "
                    "fix deform remap option");
     }
 
   // set Ef = 0 since used whether shearing or not
 
   Ef[0][0] = Ef[0][1] = Ef[0][2] = 0.0;
   Ef[1][0] = Ef[1][1] = Ef[1][2] = 0.0;
   Ef[2][0] = Ef[2][1] = Ef[2][2] = 0.0;
 }
diff --git a/src/GPU/pair_beck_gpu.cpp b/src/GPU/pair_beck_gpu.cpp
index 6f52b3eb8..614c088b0 100644
--- a/src/GPU/pair_beck_gpu.cpp
+++ b/src/GPU/pair_beck_gpu.cpp
@@ -1,244 +1,244 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
    
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under 
    the GNU General Public License.
    
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Trung Dac Nguyen (ORNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "pair_beck_gpu.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "integrate.h"
 #include "memory.h"
 #include "error.h"
 #include "neigh_request.h"
 #include "universe.h"
 #include "update.h"
 #include "domain.h"
 #include "string.h"
 #include "gpu_extra.h"
 #include "math_special.h"
 
 using namespace LAMMPS_NS;
 using namespace MathSpecial;
 
 // External functions from cuda library for atom decomposition
 
 int beck_gpu_init(const int ntypes, double **cutsq, double **host_aa,
                   double **alpha, double **beta, double **AA, double **BB,
                   double *special_lj, const int nlocal,
                   const int nall, const int max_nbors, const int maxspecial,
                   const double cell_size, int &gpu_mode, FILE *screen);
 void beck_gpu_clear();
 int ** beck_gpu_compute_n(const int ago, const int inum,
                           const int nall, double **host_x, int *host_type,
                           double *sublo, double *subhi, tagint *tag, int **nspecial,
                           tagint **special, const bool eflag, const bool vflag,
                           const bool eatom, const bool vatom, int &host_start,
                           int **ilist, int **jnum,
                           const double cpu_time, bool &success);
 void beck_gpu_compute(const int ago, const int inum, const int nall, 
                       double **host_x, int *host_type, int *ilist, int *numj,
                       int **firstneigh, const bool eflag, const bool vflag,
                       const bool eatom, const bool vatom, int &host_start,
                       const double cpu_time, bool &success);
 double beck_gpu_bytes();
 
 /* ---------------------------------------------------------------------- */
 
 PairBeckGPU::PairBeckGPU(LAMMPS *lmp) : PairBeck(lmp), gpu_mode(GPU_FORCE)
 {
   respa_enable = 0;
   reinitflag = 0;
   cpu_time = 0.0;
   GPU_EXTRA::gpu_ready(lmp->modify, lmp->error); 
 }
 
 /* ----------------------------------------------------------------------
    free all arrays
 ------------------------------------------------------------------------- */
 
 PairBeckGPU::~PairBeckGPU()
 {
   beck_gpu_clear();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBeckGPU::compute(int eflag, int vflag)
 {
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
   
   int nall = atom->nlocal + atom->nghost;
   int inum, host_start;
   
   bool success = true;
   int *ilist, *numneigh, **firstneigh;
   if (gpu_mode != GPU_FORCE) {
     inum = atom->nlocal;
     firstneigh = beck_gpu_compute_n(neighbor->ago, inum, nall,
                                      atom->x, atom->type, domain->sublo,
                                      domain->subhi, atom->tag, atom->nspecial,
                                      atom->special, eflag, vflag, eflag_atom,
                                      vflag_atom, host_start, 
                                      &ilist, &numneigh, cpu_time, success);
   } else {
     inum = list->inum;
     ilist = list->ilist;
     numneigh = list->numneigh;
     firstneigh = list->firstneigh;
     beck_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
                       ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
                       vflag_atom, host_start, cpu_time, success);
   }
   if (!success)
     error->one(FLERR,"Insufficient memory on accelerator");
 
   if (host_start<inum) {
     cpu_time = MPI_Wtime();
     cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
     cpu_time = MPI_Wtime() - cpu_time;
   }
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairBeckGPU::init_style()
 {
   if (force->newton_pair) 
     error->all(FLERR,"Cannot use newton pair with beck/gpu pair style");
 
   // Repeat cutsq calculation because done after call to init_style
   double maxcut = -1.0;
   double cut;
   for (int i = 1; i <= atom->ntypes; i++) {
     for (int j = i; j <= atom->ntypes; j++) {
       if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
         cut = init_one(i,j);
         cut *= cut;
         if (cut > maxcut)
           maxcut = cut;
         cutsq[i][j] = cutsq[j][i] = cut;
       } else
         cutsq[i][j] = cutsq[j][i] = 0.0;
     }
   }
   double cell_size = sqrt(maxcut) + neighbor->skin;
 
   int maxspecial=0;
   if (atom->molecular)
     maxspecial=atom->maxspecial;
   int success = beck_gpu_init(atom->ntypes+1, cutsq, aa, alpha, beta,
                               AA, BB, force->special_lj, atom->nlocal,
                               atom->nlocal+atom->nghost, 300, maxspecial,
                               cell_size, gpu_mode, screen);
   GPU_EXTRA::check_flag(success,error,world);
 
   if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this);
+    int irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full = 1;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairBeckGPU::memory_usage()
 {
   double bytes = Pair::memory_usage();
   return bytes + beck_gpu_bytes();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBeckGPU::cpu_compute(int start, int inum, int eflag, int vflag, 
                                int *ilist, int *numneigh, int **firstneigh) {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
   double rsq,r5,force_beck,factor_lj;
   double r,rinv;
   double aaij,alphaij,betaij;
   double term1,term1inv,term2,term3,term4,term5,term6;
   int *jlist;
 
   double **x = atom->x;
   double **f = atom->f;
   int *type = atom->type;
   double *special_lj = force->special_lj;
 
   // loop over neighbors of my atoms
 
   for (ii = start; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r = sqrt(rsq);
         r5 = rsq*rsq*r;
         aaij = aa[itype][jtype];
         alphaij = alpha[itype][jtype];
         betaij = beta[itype][jtype];
         term1 = aaij*aaij + rsq;
         term2 = powint(term1,-5);
         term3 = 21.672 + 30.0*aaij*aaij + 6.0*rsq;
         term4 = alphaij + r5*betaij;
         term5 = alphaij + 6.0*r5*betaij;
         rinv  = 1.0/r;
         force_beck = AA[itype][jtype]*exp(-1.0*r*term4)*term5;
         force_beck -= BB[itype][jtype]*r*term2*term3;
 
         fpair = factor_lj*force_beck*rinv;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
 
         if (eflag) {
           term6 = powint(term1,-3);
           term1inv = 1.0/term1;
           evdwl = AA[itype][jtype]*exp(-1.0*r*term4);
           evdwl -= BB[itype][jtype]*term6*(1.0+(2.709+3.0*aaij*aaij)*term1inv);
           evdwl *= factor_lj;
         }
 
         if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
       }
     }
   }
 }
diff --git a/src/GPU/pair_born_coul_long_gpu.cpp b/src/GPU/pair_born_coul_long_gpu.cpp
index b774665e9..cabdaeeea 100644
--- a/src/GPU/pair_born_coul_long_gpu.cpp
+++ b/src/GPU/pair_born_coul_long_gpu.cpp
@@ -1,297 +1,297 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
    
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under 
    the GNU General Public License.
    
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Trung Dac Nguyen (ORNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "pair_born_coul_long_gpu.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "integrate.h"
 #include "math_const.h"
 #include "memory.h"
 #include "error.h"
 #include "neigh_request.h"
 #include "universe.h"
 #include "update.h"
 #include "domain.h"
 #include "string.h"
 #include "kspace.h"
 #include "gpu_extra.h"
 
 #define EWALD_F   1.12837917
 #define EWALD_P   0.3275911
 #define A1        0.254829592
 #define A2       -0.284496736
 #define A3        1.421413741
 #define A4       -1.453152027
 #define A5        1.061405429
 
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
 // External functions from cuda library for atom decomposition
 
 int borncl_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
                     double **host_born1, double **host_born2, 
                     double **host_born3, double **host_a, 
                     double **host_c, double **host_d, 
                     double **sigma, double **offset, double *special_lj,
                     const int inum, const int nall, const int max_nbors,
                     const int maxspecial, const double cell_size,
                     int &gpu_mode, FILE *screen, double **host_cut_ljsq,
                     double host_cut_coulsq, double *host_special_coul,
                     const double qqrd2e, const double g_ewald);
 void borncl_gpu_clear();
 int** borncl_gpu_compute_n(const int ago, const int inum_full, const int nall,
                            double **host_x, int *host_type, double *sublo,
                            double *subhi, tagint *tag, int **nspecial, 
                            tagint **special, const bool eflag, const bool vflag,
                            const bool eatom, const bool vatom, int &host_start,
                            int **ilist, int **jnum,  const double cpu_time,
                            bool &success, double *host_q, double *boxlo,
                            double *prd);
 void borncl_gpu_compute(const int ago, const int inum_full, const int nall,
                         double **host_x, int *host_type, int *ilist, int *numj,
                         int **firstneigh, const bool eflag, const bool vflag,
                         const bool eatom, const bool vatom, int &host_start,
                         const double cpu_time, bool &success, double *host_q,
                         const int nlocal, double *boxlo, double *prd);
 double borncl_gpu_bytes();
 
 /* ---------------------------------------------------------------------- */
 
 PairBornCoulLongGPU::PairBornCoulLongGPU(LAMMPS *lmp) : 
   PairBornCoulLong(lmp), gpu_mode(GPU_FORCE)
 {
   respa_enable = 0;
   reinitflag = 0;
   cpu_time = 0.0;
   GPU_EXTRA::gpu_ready(lmp->modify, lmp->error); 
 }
 
 /* ----------------------------------------------------------------------
    free all arrays
 ------------------------------------------------------------------------- */
 
 PairBornCoulLongGPU::~PairBornCoulLongGPU()
 {
   borncl_gpu_clear();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBornCoulLongGPU::compute(int eflag, int vflag)
 {
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
   
   int nall = atom->nlocal + atom->nghost;
   int inum, host_start;
   
   bool success = true;
   int *ilist, *numneigh, **firstneigh;    
   if (gpu_mode != GPU_FORCE) {
     inum = atom->nlocal;
     firstneigh = borncl_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
                                       atom->type, domain->sublo, domain->subhi,
                                       atom->tag, atom->nspecial, atom->special,
                                       eflag, vflag, eflag_atom, vflag_atom,
                                       host_start, &ilist, &numneigh, cpu_time,
                                       success, atom->q, domain->boxlo,
                                       domain->prd);
   } else {
     inum = list->inum;
     ilist = list->ilist;
     numneigh = list->numneigh;
     firstneigh = list->firstneigh;
     borncl_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
                        ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
                        vflag_atom, host_start, cpu_time, success, atom->q,
                        atom->nlocal, domain->boxlo, domain->prd);
   }
   if (!success)
     error->one(FLERR,"Insufficient memory on accelerator");
 
   if (host_start<inum) {
     cpu_time = MPI_Wtime();
     cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
     cpu_time = MPI_Wtime() - cpu_time;
   }
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairBornCoulLongGPU::init_style()
 {
   if (!atom->q_flag)
     error->all(FLERR,
       "Pair style born/coul/long/gpu requires atom attribute q");
   if (force->newton_pair) 
     error->all(FLERR,
        "Cannot use newton pair with born/coul/long/gpu pair style");
 
   // Repeat cutsq calculation because done after call to init_style
   double maxcut = -1.0;
   double cut;
   for (int i = 1; i <= atom->ntypes; i++) {
     for (int j = i; j <= atom->ntypes; j++) {
       if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
         cut = init_one(i,j);
         cut *= cut;
         if (cut > maxcut)
           maxcut = cut;
         cutsq[i][j] = cutsq[j][i] = cut;
       } else
         cutsq[i][j] = cutsq[j][i] = 0.0;
     }
   }
   double cell_size = sqrt(maxcut) + neighbor->skin;
 
   cut_coulsq = cut_coul * cut_coul;
 
   // insure use of KSpace long-range solver, set g_ewald
 
   if (force->kspace == NULL)
     error->all(FLERR,"Pair style is incompatible with KSpace style");
   g_ewald = force->kspace->g_ewald;
 
   int maxspecial=0;
   if (atom->molecular)
     maxspecial=atom->maxspecial;
   int success = borncl_gpu_init(atom->ntypes+1, cutsq,  rhoinv, 
                                 born1, born2, born3, a, c, d, sigma,
                                 offset, force->special_lj, atom->nlocal,
                   	        atom->nlocal+atom->nghost, 300, maxspecial,
                    	        cell_size, gpu_mode, screen, cut_ljsq,
                                 cut_coulsq, force->special_coul, 
                                 force->qqrd2e, g_ewald);
 
   GPU_EXTRA::check_flag(success,error,world);
 
   if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this);
+    int irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full = 1;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairBornCoulLongGPU::memory_usage()
 {
   double bytes = Pair::memory_usage();
   return bytes + borncl_gpu_bytes();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBornCoulLongGPU::cpu_compute(int start, int inum, int eflag,
                                       int vflag, int *ilist, int *numneigh,
                                       int **firstneigh)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
   double r,rexp,r2inv,r6inv,forcecoul,forceborn,factor_coul,factor_lj;
   double grij,expm2,prefactor,t,erfc;
   int *jlist;
   double rsq;
 
   evdwl = ecoul = 0.0;
 
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   int *type = atom->type;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   double qqrd2e = force->qqrd2e;
 
   // loop over neighbors of my atoms
 
   for (ii = start; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r2inv = 1.0/rsq;
         r = sqrt(rsq);
 
         if (rsq < cut_coulsq) {
           grij = g_ewald * r;
           expm2 = exp(-grij*grij);
           t = 1.0 / (1.0 + EWALD_P*grij);
           erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
           prefactor = qqrd2e * qtmp*q[j]/r;
           forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
           if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
         } else forcecoul = 0.0;
 
         if (rsq < cut_ljsq[itype][jtype]) {
           r6inv = r2inv*r2inv*r2inv;
           rexp = exp((sigma[itype][jtype]-r)*rhoinv[itype][jtype]);
           forceborn = born1[itype][jtype]*r*rexp - born2[itype][jtype]*r6inv
             + born3[itype][jtype]*r2inv*r6inv;
         } else forceborn = 0.0;
 
         fpair = (forcecoul + factor_lj*forceborn) * r2inv;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
 
         if (eflag) {
           if (rsq < cut_coulsq) {
             ecoul = prefactor*erfc;
             if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
           } else ecoul = 0.0;
           if (rsq < cut_ljsq[itype][jtype]) {
             evdwl = a[itype][jtype]*rexp - c[itype][jtype]*r6inv 
               + d[itype][jtype]*r6inv*r2inv - offset[itype][jtype];
             evdwl *= factor_lj;
           } else evdwl = 0.0;
         }
 
         if (evflag) ev_tally_full(i,evdwl,ecoul,fpair,delx,dely,delz);
       }
     }
   }
 }
diff --git a/src/GPU/pair_born_coul_wolf_gpu.cpp b/src/GPU/pair_born_coul_wolf_gpu.cpp
index 05debf4ef..5d09b0325 100644
--- a/src/GPU/pair_born_coul_wolf_gpu.cpp
+++ b/src/GPU/pair_born_coul_wolf_gpu.cpp
@@ -1,292 +1,292 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
    
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under 
    the GNU General Public License.
    
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing authors: Trung Dac Nguyen (ORNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "pair_born_coul_wolf_gpu.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "integrate.h"
 #include "math_const.h"
 #include "memory.h"
 #include "error.h"
 #include "neigh_request.h"
 #include "universe.h"
 #include "update.h"
 #include "domain.h"
 #include "string.h"
 #include "gpu_extra.h"
 
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
 // External functions from cuda library for atom decomposition
 
 int borncw_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
                     double **host_born1, double **host_born2, 
                     double **host_born3, double **host_a, double **host_c, 
                     double **host_d, double **sigma, double **offset, 
                     double *special_lj, const int inum,
                     const int nall, const int max_nbors, const int maxspecial,
                     const double cell_size, int &gpu_mode, FILE *screen,
                     double **host_cut_ljsq, double host_cut_coulsq,
                     double *host_special_coul, const double qqrd2e,
                     const double alf, const double e_shift, const double f_shift);
 void borncw_gpu_clear();
 int ** borncw_gpu_compute_n(const int ago, const int inum_full, const int nall,
                             double **host_x, int *host_type, double *sublo,
                             double *subhi, tagint *tag, int **nspecial, 
                             tagint **special, const bool eflag, const bool vflag,
                             const bool eatom, const bool vatom, int &host_start,
                             int **ilist, int **jnum, const double cpu_time,
                             bool &success, double *host_q, double *boxlo,
                             double *prd);
 void borncw_gpu_compute(const int ago, const int inum_full, const int nall,
                         double **host_x, int *host_type, int *ilist, int *numj,
                         int **firstneigh, const bool eflag, const bool vflag,
                         const bool eatom, const bool vatom, int &host_start,
                         const double cpu_time, bool &success, double *host_q,
                         const int nlocal, double *boxlo, double *prd);
 double borncw_gpu_bytes();
 
 /* ---------------------------------------------------------------------- */
 
 PairBornCoulWolfGPU::PairBornCoulWolfGPU(LAMMPS *lmp) : PairBornCoulWolf(lmp), 
 						      gpu_mode(GPU_FORCE)
 {
   respa_enable = 0;
   reinitflag = 0;
   cpu_time = 0.0;
   GPU_EXTRA::gpu_ready(lmp->modify, lmp->error); 
 }
 
 /* ----------------------------------------------------------------------
    free all arrays
 ------------------------------------------------------------------------- */
 
 PairBornCoulWolfGPU::~PairBornCoulWolfGPU()
 {
   borncw_gpu_clear();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBornCoulWolfGPU::compute(int eflag, int vflag)
 {
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
   
   int nall = atom->nlocal + atom->nghost;
   int inum, host_start;
   
   bool success = true;
   int *ilist, *numneigh, **firstneigh;
   if (gpu_mode != GPU_FORCE) {
     inum = atom->nlocal;
     firstneigh = borncw_gpu_compute_n(neighbor->ago, inum, nall,
                                       atom->x, atom->type, domain->sublo,
                                       domain->subhi, atom->tag, atom->nspecial,
                                       atom->special, eflag, vflag, eflag_atom,
                                       vflag_atom, host_start, 
                                       &ilist, &numneigh, cpu_time, success, 
                                       atom->q, domain->boxlo, domain->prd);
   } else {
     inum = list->inum;
     ilist = list->ilist;
     numneigh = list->numneigh;
     firstneigh = list->firstneigh;
     borncw_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
                        ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
                        vflag_atom, host_start, cpu_time, success, atom->q,
                        atom->nlocal, domain->boxlo, domain->prd);
   }
   if (!success)
     error->one(FLERR,"Insufficient memory on accelerator");
 
   if (host_start<inum) {
     cpu_time = MPI_Wtime();
     cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
     cpu_time = MPI_Wtime() - cpu_time;
   }
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairBornCoulWolfGPU::init_style()
 {
   if (force->newton_pair) 
     error->all(FLERR,
       "Cannot use newton pair with born/coul/wolf/gpu pair style");
 
   // Repeat cutsq calculation because done after call to init_style
   double maxcut = -1.0;
   double cut;
   for (int i = 1; i <= atom->ntypes; i++) {
     for (int j = i; j <= atom->ntypes; j++) {
       if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
         cut = init_one(i,j);
         cut *= cut;
         if (cut > maxcut)
           maxcut = cut;
         cutsq[i][j] = cutsq[j][i] = cut;
       } else
         cutsq[i][j] = cutsq[j][i] = 0.0;
     }
   }
   double cell_size = sqrt(maxcut) + neighbor->skin;
 
   cut_coulsq = cut_coul * cut_coul;
 
   double e_shift = erfc(alf*cut_coul)/cut_coul;
   double f_shift = -(e_shift+ 2.0*alf/MY_PIS * exp(-alf*alf*cut_coul*cut_coul)) / 
     cut_coul; 
 
   int maxspecial=0;
   if (atom->molecular)
     maxspecial=atom->maxspecial;
   int success = borncw_gpu_init(atom->ntypes+1, cutsq, rhoinv, 
                                 born1, born2, born3, a, c, d, sigma, offset, 
                                 force->special_lj, atom->nlocal,
                                 atom->nlocal+atom->nghost, 300, maxspecial,
                                 cell_size, gpu_mode, screen, cut_ljsq,
                                 cut_coulsq, force->special_coul, force->qqrd2e, 
                                 alf, e_shift, f_shift);
   GPU_EXTRA::check_flag(success,error,world);
 
   if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this);
+    int irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full = 1;
   } 
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairBornCoulWolfGPU::memory_usage()
 {
   double bytes = Pair::memory_usage();
   return bytes + borncw_gpu_bytes();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBornCoulWolfGPU::cpu_compute(int start, int inum, int eflag, int vflag,
                                       int *ilist, int *numneigh,
                                       int **firstneigh) {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,qtmp,delx,dely,delz,evdwl,ecoul,fpair;
   double rsq,r2inv,r6inv,forcecoul,forceborn,factor_coul,factor_lj;
   double erfcc,erfcd,v_sh,dvdrr,e_self,qisq;
   double prefactor;
   double r,rexp;
   int *jlist;
 
   evdwl = ecoul = 0.0;
   
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   double qqrd2e = force->qqrd2e;
   
   double e_shift = erfc(alf*cut_coul)/cut_coul;
   double f_shift = -(e_shift+ 2.0*alf/MY_PIS * exp(-alf*alf*cut_coul*cut_coul)) / 
     cut_coul; 
 
   // loop over neighbors of my atoms
 
   for (ii = start; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     qisq = qtmp*qtmp;
     e_self = -(e_shift/2.0 + alf/MY_PIS) * qisq*qqrd2e;
     if (evflag) ev_tally(i,i,nlocal,0,0.0,e_self,0.0,0.0,0.0,0.0);
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r2inv = 1.0/rsq;
   
         if (rsq < cut_coulsq) {
           r = sqrt(rsq);
           prefactor = qqrd2e*qtmp*q[j]/r;
           erfcc = erfc(alf*r); 
           erfcd = exp(-alf*alf*r*r);
           v_sh = (erfcc - e_shift*r) * prefactor; 
           dvdrr = (erfcc/rsq + 2.0*alf/MY_PIS * erfcd/r) + f_shift;
           forcecoul = dvdrr*rsq*prefactor;
           if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
         } else forcecoul = 0.0;
 
         if (rsq < cut_ljsq[itype][jtype]) {
           r6inv = r2inv*r2inv*r2inv;
           r = sqrt(rsq);
           rexp = exp((sigma[itype][jtype]-r)*rhoinv[itype][jtype]);
           forceborn = born1[itype][jtype]*r*rexp - born2[itype][jtype]*r6inv + 
             born3[itype][jtype]*r2inv*r6inv;
         } else forceborn = 0.0;
 	
         fpair = (factor_coul*forcecoul + factor_lj*forceborn) * r2inv;
 	
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
 
         if (eflag) {
           if (rsq < cut_coulsq) {
             ecoul = v_sh;
             if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
           } else ecoul = 0.0;
           if (rsq < cut_ljsq[itype][jtype]) {
             evdwl = a[itype][jtype]*rexp - c[itype][jtype]*r6inv +
               d[itype][jtype]*r6inv*r2inv - offset[itype][jtype];
             evdwl *= factor_lj;
           } else evdwl = 0.0;
         }
 
         if (evflag) ev_tally_full(i,evdwl,ecoul,fpair,delx,dely,delz);
       }
     }
   }
 }
diff --git a/src/GPU/pair_born_gpu.cpp b/src/GPU/pair_born_gpu.cpp
index 4b3353533..4df91fde1 100644
--- a/src/GPU/pair_born_gpu.cpp
+++ b/src/GPU/pair_born_gpu.cpp
@@ -1,246 +1,246 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
    
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under 
    the GNU General Public License.
    
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Trung Dac Nguyen (ORNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "pair_born_gpu.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "integrate.h"
 #include "memory.h"
 #include "error.h"
 #include "neigh_request.h"
 #include "universe.h"
 #include "update.h"
 #include "domain.h"
 #include "string.h"
 #include "gpu_extra.h"
 
 using namespace LAMMPS_NS;
 
 // External functions from cuda library for atom decomposition
 
 int born_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
                   double **host_born1, double **host_born2, double **host_born3, 
                   double **host_a, double **host_c, double **host_d, 
                   double **host_sigma, double **offset, double *special_lj, 
                   const int inum, const int nall, const int max_nbors,
                   const int maxspecial, const double cell_size, 
                   int &gpu_mode, FILE *screen);
 void born_gpu_reinit(const int ntypes, double **host_rhoinv,
                      double **host_born1, double **host_born2, double **host_born3,
                      double **host_a, double **host_c, double **host_d,
                      double **offset);
 void born_gpu_clear();
 int ** born_gpu_compute_n(const int ago, const int inum_full, 
                           const int nall, double **host_x, int *host_type, 
                           double *sublo, double *subhi, tagint *tag, int **nspecial,
                           tagint **special, const bool eflag, const bool vflag,
                           const bool eatom, const bool vatom, int &host_start,
                           int **ilist, int **jnum, const double cpu_time,
                           bool &success);
 void born_gpu_compute(const int ago, const int inum_full, const int nall,
                       double **host_x, int *host_type, int *ilist, int *numj,
                       int **firstneigh, const bool eflag, const bool vflag,
                       const bool eatom, const bool vatom, int &host_start,
                       const double cpu_time, bool &success);
 double born_gpu_bytes();
 
 /* ---------------------------------------------------------------------- */
 
 PairBornGPU::PairBornGPU(LAMMPS *lmp) : PairBorn(lmp), gpu_mode(GPU_FORCE)
 {
   respa_enable = 0;
   cpu_time = 0.0;
   GPU_EXTRA::gpu_ready(lmp->modify, lmp->error); 
 }
 
 /* ----------------------------------------------------------------------
    free all arrays
 ------------------------------------------------------------------------- */
 
 PairBornGPU::~PairBornGPU()
 {
   born_gpu_clear();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBornGPU::compute(int eflag, int vflag)
 {
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
   
   int nall = atom->nlocal + atom->nghost;
   int inum, host_start;
   
   bool success = true;
   int *ilist, *numneigh, **firstneigh;
   if (gpu_mode != GPU_FORCE) {
     inum = atom->nlocal;
     firstneigh = born_gpu_compute_n(neighbor->ago, inum, nall,
                                     atom->x, atom->type, domain->sublo,
                                     domain->subhi, atom->tag, atom->nspecial,
                                     atom->special, eflag, vflag, eflag_atom,
                                     vflag_atom, host_start, 
                                     &ilist, &numneigh, cpu_time, success);
   } else {
     inum = list->inum;
     ilist = list->ilist;
     numneigh = list->numneigh;
     firstneigh = list->firstneigh;
     born_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
                      ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
                      vflag_atom, host_start, cpu_time, success);
   }
   if (!success)
     error->one(FLERR,"Insufficient memory on accelerator");
 
   if (host_start<inum) {
     cpu_time = MPI_Wtime();
     cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
     cpu_time = MPI_Wtime() - cpu_time;
   }
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairBornGPU::init_style()
 {
   if (force->newton_pair) 
     error->all(FLERR,"Cannot use newton pair with born/gpu pair style");
 
   // Repeat cutsq calculation because done after call to init_style
   double maxcut = -1.0;
   double cut;
   for (int i = 1; i <= atom->ntypes; i++) {
     for (int j = i; j <= atom->ntypes; j++) {
       if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
         cut = init_one(i,j);
         cut *= cut;
         if (cut > maxcut)
           maxcut = cut;
         cutsq[i][j] = cutsq[j][i] = cut;
       } else
         cutsq[i][j] = cutsq[j][i] = 0.0;
     }
   }
   double cell_size = sqrt(maxcut) + neighbor->skin;
 
   int maxspecial=0;
   if (atom->molecular)
     maxspecial=atom->maxspecial;
   int success = born_gpu_init(atom->ntypes+1, cutsq, rhoinv, 
                               born1, born2, born3, a, c, d, sigma,
                               offset, force->special_lj, atom->nlocal,
                               atom->nlocal+atom->nghost, 300, maxspecial,
 	      cell_size, gpu_mode, screen);
   GPU_EXTRA::check_flag(success,error,world);
 
   if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this);
+    int irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full = 1;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBornGPU::reinit()
 {
   Pair::reinit();
   
   born_gpu_reinit(atom->ntypes+1, rhoinv, born1, born2, born3,
                   a, c, d, offset);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairBornGPU::memory_usage()
 {
   double bytes = Pair::memory_usage();
   return bytes + born_gpu_bytes();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBornGPU::cpu_compute(int start, int inum, int eflag, int vflag, 
                               int *ilist, int *numneigh, int **firstneigh) {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
   double rsq,r2inv,r6inv,forceborn,factor_lj;
   double r,rexp;
   int *jlist;
 
   double **x = atom->x;
   double **f = atom->f;
   int *type = atom->type;
   double *special_lj = force->special_lj;
 
   // loop over neighbors of my atoms
 
   for (ii = start; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r2inv = 1.0/rsq;
         r6inv = r2inv*r2inv*r2inv;
         r = sqrt(rsq);
         rexp = exp((sigma[itype][jtype]-r)*rhoinv[itype][jtype]);
         forceborn = born1[itype][jtype]*r*rexp - born2[itype][jtype]*r6inv + 
           born3[itype][jtype]*r2inv*r6inv;
         fpair = factor_lj*forceborn*r2inv;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
 
         if (eflag) {
           evdwl = a[itype][jtype]*rexp - c[itype][jtype]*r6inv + 
             d[itype][jtype]*r6inv*r2inv - offset[itype][jtype];
           evdwl *= factor_lj;
         }
 
         if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
       }
     }
   }
 }
diff --git a/src/GPU/pair_buck_coul_cut_gpu.cpp b/src/GPU/pair_buck_coul_cut_gpu.cpp
index 1afba4253..e00fb88c0 100644
--- a/src/GPU/pair_buck_coul_cut_gpu.cpp
+++ b/src/GPU/pair_buck_coul_cut_gpu.cpp
@@ -1,260 +1,260 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing authors: Trung Dac Nguyen (ORNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "pair_buck_coul_cut_gpu.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "integrate.h"
 #include "memory.h"
 #include "error.h"
 #include "neigh_request.h"
 #include "universe.h"
 #include "update.h"
 #include "domain.h"
 #include "string.h"
 #include "gpu_extra.h"
 
 using namespace LAMMPS_NS;
 
 // External functions from cuda library for atom decomposition
 
 int buckc_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
                    double **host_buck1, double **host_buck2, double **host_a,
                    double **host_c, double **offset, double *special_lj,
                    const int inum, const int nall, const int max_nbors,
                    const int maxspecial, const double cell_size,
                    int &gpu_mode, FILE *screen, double **host_cut_ljsq,
                    double **host_cut_coulsq, double *host_special_coul,
                    const double qqrd2e);
 void buckc_gpu_clear();
 int ** buckc_gpu_compute_n(const int ago, const int inum_full, const int nall,
                            double **host_x, int *host_type, double *sublo,
                            double *subhi, tagint *tag, int **nspecial,
                            tagint **special, const bool eflag, const bool vflag,
                            const bool eatom, const bool vatom, int &host_start,
                            int **ilist, int **jnum, const double cpu_time,
                            bool &success, double *host_q, double *boxlo,
                            double *prd);
 void buckc_gpu_compute(const int ago, const int inum_full, const int nall,
                        double **host_x, int *host_type, int *ilist, int *numj,
                        int **firstneigh, const bool eflag, const bool vflag,
                        const bool eatom, const bool vatom, int &host_start,
                        const double cpu_time, bool &success, double *host_q,
                        const int nlocal, double *boxlo, double *prd);
 double buckc_gpu_bytes();
 
 /* ---------------------------------------------------------------------- */
 
 PairBuckCoulCutGPU::PairBuckCoulCutGPU(LAMMPS *lmp) : PairBuckCoulCut(lmp),
                                                       gpu_mode(GPU_FORCE)
 {
   respa_enable = 0;
   reinitflag = 0;
   cpu_time = 0.0;
   GPU_EXTRA::gpu_ready(lmp->modify, lmp->error);
 }
 
 /* ----------------------------------------------------------------------
    free all arrays
 ------------------------------------------------------------------------- */
 
 PairBuckCoulCutGPU::~PairBuckCoulCutGPU()
 {
   buckc_gpu_clear();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBuckCoulCutGPU::compute(int eflag, int vflag)
 {
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   int nall = atom->nlocal + atom->nghost;
   int inum, host_start;
 
   bool success = true;
   int *ilist, *numneigh, **firstneigh;
   if (gpu_mode != GPU_FORCE) {
     inum = atom->nlocal;
     firstneigh = buckc_gpu_compute_n(neighbor->ago, inum, nall,
                                      atom->x, atom->type, domain->sublo,
                                      domain->subhi, atom->tag, atom->nspecial,
                                      atom->special, eflag, vflag, eflag_atom,
                                      vflag_atom, host_start,
                                      &ilist, &numneigh, cpu_time, success,
                                      atom->q, domain->boxlo, domain->prd);
   } else {
     inum = list->inum;
     ilist = list->ilist;
     numneigh = list->numneigh;
     firstneigh = list->firstneigh;
     buckc_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
                       ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
                       vflag_atom, host_start, cpu_time, success, atom->q,
                       atom->nlocal, domain->boxlo, domain->prd);
   }
   if (!success)
     error->one(FLERR,"Insufficient memory on accelerator");
 
   if (host_start<inum) {
     cpu_time = MPI_Wtime();
     cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
     cpu_time = MPI_Wtime() - cpu_time;
   }
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairBuckCoulCutGPU::init_style()
 {
   if (force->newton_pair)
     error->all(FLERR,
                "Cannot use newton pair with buck/coul/cut/gpu pair style");
 
   // Repeat cutsq calculation because done after call to init_style
   double maxcut = -1.0;
   double cut;
   for (int i = 1; i <= atom->ntypes; i++) {
     for (int j = i; j <= atom->ntypes; j++) {
       if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
         cut = init_one(i,j);
         cut *= cut;
         if (cut > maxcut)
           maxcut = cut;
         cutsq[i][j] = cutsq[j][i] = cut;
       } else
         cutsq[i][j] = cutsq[j][i] = 0.0;
     }
   }
   double cell_size = sqrt(maxcut) + neighbor->skin;
 
   int maxspecial=0;
   if (atom->molecular)
     maxspecial=atom->maxspecial;
   int success = buckc_gpu_init(atom->ntypes+1, cutsq, rhoinv, buck1, buck2,
                                a, c, offset, force->special_lj, atom->nlocal,
                                atom->nlocal+atom->nghost, 300, maxspecial,
                                cell_size, gpu_mode, screen, cut_ljsq,
                                cut_coulsq, force->special_coul, force->qqrd2e);
   GPU_EXTRA::check_flag(success,error,world);
 
   if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this);
+    int irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full = 1;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairBuckCoulCutGPU::memory_usage()
 {
   double bytes = Pair::memory_usage();
   return bytes + buckc_gpu_bytes();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBuckCoulCutGPU::cpu_compute(int start, int inum, int eflag, int vflag,
                                      int *ilist, int *numneigh,
                                      int **firstneigh) {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,qtmp,delx,dely,delz,evdwl,ecoul,fpair;
   double rsq,r2inv,r6inv,forcecoul,forcebuck,factor_coul,factor_lj;
   double r,rexp;
   int *jlist;
 
   evdwl = ecoul = 0.0;
 
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   int *type = atom->type;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   double qqrd2e = force->qqrd2e;
 
   // loop over neighbors of my atoms
 
   for (ii = start; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r2inv = 1.0/rsq;
         r = sqrt(rsq);
 
         if (rsq < cut_coulsq[itype][jtype])
           forcecoul = qqrd2e * qtmp*q[j]/r;
         else forcecoul = 0.0;
 
         if (rsq < cut_ljsq[itype][jtype]) {
           r6inv = r2inv*r2inv*r2inv;
           rexp = exp(-r*rhoinv[itype][jtype]);
           forcebuck = buck1[itype][jtype]*r*rexp - buck2[itype][jtype]*r6inv;
         } else forcebuck = 0.0;
 
         fpair = (factor_coul*forcecoul + factor_lj*forcebuck) * r2inv;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
 
         if (eflag) {
           if (rsq < cut_coulsq[itype][jtype])
             ecoul = factor_coul * qqrd2e * qtmp*q[j]/r;
           else ecoul = 0.0;
           if (rsq < cut_ljsq[itype][jtype]) {
             evdwl = a[itype][jtype]*rexp - c[itype][jtype]*r6inv -
               offset[itype][jtype];
             evdwl *= factor_lj;
           } else evdwl = 0.0;
         }
 
         if (evflag) ev_tally_full(i,evdwl,ecoul,fpair,delx,dely,delz);
       }
     }
   }
 }
diff --git a/src/GPU/pair_buck_coul_long_gpu.cpp b/src/GPU/pair_buck_coul_long_gpu.cpp
index caa45e653..108596c31 100644
--- a/src/GPU/pair_buck_coul_long_gpu.cpp
+++ b/src/GPU/pair_buck_coul_long_gpu.cpp
@@ -1,290 +1,290 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Trung Dac Nguyen (ORNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "pair_buck_coul_long_gpu.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "integrate.h"
 #include "memory.h"
 #include "error.h"
 #include "neigh_request.h"
 #include "universe.h"
 #include "update.h"
 #include "domain.h"
 #include "string.h"
 #include "kspace.h"
 #include "gpu_extra.h"
 
 #define EWALD_F   1.12837917
 #define EWALD_P   0.3275911
 #define A1        0.254829592
 #define A2       -0.284496736
 #define A3        1.421413741
 #define A4       -1.453152027
 #define A5        1.061405429
 
 using namespace LAMMPS_NS;
 
 // External functions from cuda library for atom decomposition
 
 int buckcl_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
                     double **host_buck1, double **host_buck2, double **host_a,
                     double **host_c, double **offset, double *special_lj,
                     const int inum, const int nall, const int max_nbors,
                     const int maxspecial, const double cell_size,
                     int &gpu_mode, FILE *screen, double **host_cut_ljsq,
                     double host_cut_coulsq, double *host_special_coul,
                     const double qqrd2e, const double g_ewald);
 void buckcl_gpu_clear();
 int** buckcl_gpu_compute_n(const int ago, const int inum_full, const int nall,
                            double **host_x, int *host_type, double *sublo,
                            double *subhi, tagint *tag, int **nspecial,
                            tagint **special, const bool eflag, const bool vflag,
                            const bool eatom, const bool vatom, int &host_start,
                            int **ilist, int **jnum,  const double cpu_time,
                            bool &success, double *host_q, double *boxlo,
                            double *prd);
 void buckcl_gpu_compute(const int ago, const int inum_full, const int nall,
                         double **host_x, int *host_type, int *ilist, int *numj,
                         int **firstneigh, const bool eflag, const bool vflag,
                         const bool eatom, const bool vatom, int &host_start,
                         const double cpu_time, bool &success, double *host_q,
                         const int nlocal, double *boxlo, double *prd);
 double buckcl_gpu_bytes();
 
 /* ---------------------------------------------------------------------- */
 
 PairBuckCoulLongGPU::PairBuckCoulLongGPU(LAMMPS *lmp) :
   PairBuckCoulLong(lmp), gpu_mode(GPU_FORCE)
 {
   respa_enable = 0;
   reinitflag = 0;
   cpu_time = 0.0;
   GPU_EXTRA::gpu_ready(lmp->modify, lmp->error);
 }
 
 /* ----------------------------------------------------------------------
    free all arrays
 ------------------------------------------------------------------------- */
 
 PairBuckCoulLongGPU::~PairBuckCoulLongGPU()
 {
   buckcl_gpu_clear();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBuckCoulLongGPU::compute(int eflag, int vflag)
 {
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   int nall = atom->nlocal + atom->nghost;
   int inum, host_start;
 
   bool success = true;
   int *ilist, *numneigh, **firstneigh;
   if (gpu_mode != GPU_FORCE) {
     inum = atom->nlocal;
     firstneigh = buckcl_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
                                       atom->type, domain->sublo, domain->subhi,
                                       atom->tag, atom->nspecial, atom->special,
                                       eflag, vflag, eflag_atom, vflag_atom,
                                       host_start, &ilist, &numneigh, cpu_time,
                                       success, atom->q, domain->boxlo,
                                       domain->prd);
   } else {
     inum = list->inum;
     ilist = list->ilist;
     numneigh = list->numneigh;
     firstneigh = list->firstneigh;
     buckcl_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
                        ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
                        vflag_atom, host_start, cpu_time, success, atom->q,
                        atom->nlocal, domain->boxlo, domain->prd);
   }
   if (!success)
     error->one(FLERR,"Insufficient memory on accelerator");
 
   if (host_start<inum) {
     cpu_time = MPI_Wtime();
     cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
     cpu_time = MPI_Wtime() - cpu_time;
   }
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairBuckCoulLongGPU::init_style()
 {
   if (!atom->q_flag)
     error->all(FLERR,
                "Pair style buck/coul/long/gpu requires atom attribute q");
   if (force->newton_pair)
     error->all(FLERR,
                "Cannot use newton pair with buck/coul/long/gpu pair style");
 
   // Repeat cutsq calculation because done after call to init_style
   double maxcut = -1.0;
   double cut;
   for (int i = 1; i <= atom->ntypes; i++) {
     for (int j = i; j <= atom->ntypes; j++) {
       if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
         cut = init_one(i,j);
         cut *= cut;
         if (cut > maxcut)
           maxcut = cut;
         cutsq[i][j] = cutsq[j][i] = cut;
       } else
         cutsq[i][j] = cutsq[j][i] = 0.0;
     }
   }
   double cell_size = sqrt(maxcut) + neighbor->skin;
 
   cut_coulsq = cut_coul * cut_coul;
 
   // insure use of KSpace long-range solver, set g_ewald
 
   if (force->kspace == NULL)
     error->all(FLERR,"Pair style is incompatible with KSpace style");
   g_ewald = force->kspace->g_ewald;
 
   int maxspecial=0;
   if (atom->molecular)
     maxspecial=atom->maxspecial;
   int success = buckcl_gpu_init(atom->ntypes+1, cutsq,  rhoinv, buck1, buck2,
                                 a, c, offset, force->special_lj, atom->nlocal,
                                 atom->nlocal+atom->nghost, 300, maxspecial,
                                 cell_size, gpu_mode, screen, cut_ljsq,
                                 cut_coulsq, force->special_coul, force->qqrd2e,
                                 g_ewald);
   GPU_EXTRA::check_flag(success,error,world);
 
   if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this);
+    int irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full = 1;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairBuckCoulLongGPU::memory_usage()
 {
   double bytes = Pair::memory_usage();
   return bytes + buckcl_gpu_bytes();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBuckCoulLongGPU::cpu_compute(int start, int inum, int eflag,
                                        int vflag, int *ilist, int *numneigh,
                                        int **firstneigh)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
   double r,rexp,r2inv,r6inv,forcecoul,forcebuck,factor_coul,factor_lj;
   double grij,expm2,prefactor,t,erfc;
   int *jlist;
   double rsq;
 
   evdwl = ecoul = 0.0;
 
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   int *type = atom->type;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   double qqrd2e = force->qqrd2e;
 
   // loop over neighbors of my atoms
 
   for (ii = start; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r2inv = 1.0/rsq;
         r = sqrt(rsq);
         if (rsq < cut_coulsq) {
           grij = g_ewald * r;
           expm2 = exp(-grij*grij);
           t = 1.0 / (1.0 + EWALD_P*grij);
           erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
           prefactor = qqrd2e * qtmp*q[j]/r;
           forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
           if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
         } else forcecoul = 0.0;
 
         if (rsq < cut_ljsq[itype][jtype]) {
           r6inv = r2inv*r2inv*r2inv;
           rexp = exp(-r*rhoinv[itype][jtype]);
           forcebuck = buck1[itype][jtype]*r*rexp - buck2[itype][jtype]*r6inv;
         } else forcebuck = 0.0;
 
         fpair = (forcecoul + factor_lj*forcebuck) * r2inv;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
 
         if (eflag) {
           if (rsq < cut_coulsq) {
             ecoul = prefactor*erfc;
             if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
           } else ecoul = 0.0;
 
           if (rsq < cut_ljsq[itype][jtype]) {
             evdwl = a[itype][jtype]*rexp - c[itype][jtype]*r6inv -
               offset[itype][jtype];
             evdwl *= factor_lj;
           } else evdwl = 0.0;
         }
 
         if (evflag) ev_tally_full(i,evdwl,ecoul,fpair,delx,dely,delz);
       }
     }
   }
 }
diff --git a/src/GPU/pair_buck_gpu.cpp b/src/GPU/pair_buck_gpu.cpp
index a30164bac..de64bfd90 100644
--- a/src/GPU/pair_buck_gpu.cpp
+++ b/src/GPU/pair_buck_gpu.cpp
@@ -1,242 +1,242 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Trung Dac Nguyen (ORNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "pair_buck_gpu.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "integrate.h"
 #include "memory.h"
 #include "error.h"
 #include "neigh_request.h"
 #include "universe.h"
 #include "update.h"
 #include "domain.h"
 #include "string.h"
 #include "gpu_extra.h"
 
 using namespace LAMMPS_NS;
 
 // External functions from cuda library for atom decomposition
 
 int buck_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
                   double **host_buck1, double **host_buck2,
                   double **host_a, double **host_c,
                   double **offset, double *special_lj, const int inum,
                   const int nall, const int max_nbors,  const int maxspecial,
                   const double cell_size, int &gpu_mode, FILE *screen);
 void buck_gpu_reinit(const int ntypes, double **cutsq, double **host_rhoinv,
                   double **host_buck1, double **host_buck2,
                   double **host_a, double **host_c, double **offset);
 void buck_gpu_clear();
 int ** buck_gpu_compute_n(const int ago, const int inum_full, const int nall,
                           double **host_x, int *host_type, double *sublo,
                           double *subhi, tagint *tag, int **nspecial,
                           tagint **special, const bool eflag, const bool vflag,
                           const bool eatom, const bool vatom, int &host_start,
                           int **ilist, int **jnum, const double cpu_time,
                           bool &success);
 void buck_gpu_compute(const int ago, const int inum_full, const int nall,
                       double **host_x, int *host_type, int *ilist, int *numj,
                       int **firstneigh, const bool eflag, const bool vflag,
                       const bool eatom, const bool vatom, int &host_start,
                       const double cpu_time, bool &success);
 double buck_gpu_bytes();
 
 /* ---------------------------------------------------------------------- */
 
 PairBuckGPU::PairBuckGPU(LAMMPS *lmp) : PairBuck(lmp), gpu_mode(GPU_FORCE)
 {
   respa_enable = 0;
   cpu_time = 0.0;
   GPU_EXTRA::gpu_ready(lmp->modify, lmp->error);
 }
 
 /* ----------------------------------------------------------------------
    free all arrays
 ------------------------------------------------------------------------- */
 
 PairBuckGPU::~PairBuckGPU()
 {
   buck_gpu_clear();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBuckGPU::compute(int eflag, int vflag)
 {
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   int nall = atom->nlocal + atom->nghost;
   int inum, host_start;
 
   bool success = true;
   int *ilist, *numneigh, **firstneigh;
   if (gpu_mode != GPU_FORCE) {
     inum = atom->nlocal;
     firstneigh = buck_gpu_compute_n(neighbor->ago, inum, nall,
                                     atom->x, atom->type, domain->sublo,
                                     domain->subhi, atom->tag, atom->nspecial,
                                     atom->special, eflag, vflag, eflag_atom,
                                     vflag_atom, host_start,
                                     &ilist, &numneigh, cpu_time, success);
   } else {
     inum = list->inum;
     ilist = list->ilist;
     numneigh = list->numneigh;
     firstneigh = list->firstneigh;
     buck_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
                      ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
                      vflag_atom, host_start, cpu_time, success);
   }
   if (!success)
     error->one(FLERR,"Insufficient memory on accelerator");
 
   if (host_start<inum) {
     cpu_time = MPI_Wtime();
     cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
     cpu_time = MPI_Wtime() - cpu_time;
   }
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairBuckGPU::init_style()
 {
   if (force->newton_pair)
     error->all(FLERR,"Cannot use newton pair with buck/gpu pair style");
 
   // Repeat cutsq calculation because done after call to init_style
   double maxcut = -1.0;
   double cut;
   for (int i = 1; i <= atom->ntypes; i++) {
     for (int j = i; j <= atom->ntypes; j++) {
       if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
         cut = init_one(i,j);
         cut *= cut;
         if (cut > maxcut)
           maxcut = cut;
         cutsq[i][j] = cutsq[j][i] = cut;
       } else
         cutsq[i][j] = cutsq[j][i] = 0.0;
     }
   }
   double cell_size = sqrt(maxcut) + neighbor->skin;
 
   int maxspecial=0;
   if (atom->molecular)
     maxspecial=atom->maxspecial;
   int success = buck_gpu_init(atom->ntypes+1, cutsq, rhoinv, buck1, buck2,
                               a, c, offset, force->special_lj, atom->nlocal,
                               atom->nlocal+atom->nghost, 300, maxspecial,
                               cell_size, gpu_mode, screen);
   GPU_EXTRA::check_flag(success,error,world);
 
   if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this);
+    int irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full = 1;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBuckGPU::reinit()
 {
   Pair::reinit();
   
   buck_gpu_reinit(atom->ntypes+1, cutsq, rhoinv, buck1, buck2,
                   a, c, offset);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairBuckGPU::memory_usage()
 {
   double bytes = Pair::memory_usage();
   return bytes + buck_gpu_bytes();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBuckGPU::cpu_compute(int start, int inum, int eflag, int vflag,
                                int *ilist, int *numneigh, int **firstneigh) {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
   double rsq,r2inv,r6inv,forcebuck,factor_lj;
   double r,rexp;
   int *jlist;
 
   double **x = atom->x;
   double **f = atom->f;
   int *type = atom->type;
   double *special_lj = force->special_lj;
 
   // loop over neighbors of my atoms
 
   for (ii = start; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r2inv = 1.0/rsq;
         r6inv = r2inv*r2inv*r2inv;
         r = sqrt(rsq);
         rexp = exp(-r*rhoinv[itype][jtype]);
         forcebuck = buck1[itype][jtype]*r*rexp - buck2[itype][jtype]*r6inv;
         fpair = factor_lj*forcebuck*r2inv;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
 
         if (eflag) {
           evdwl = a[itype][jtype]*rexp - c[itype][jtype]*r6inv -
             offset[itype][jtype];
           evdwl *= factor_lj;
         }
 
         if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
       }
     }
   }
 }
diff --git a/src/GPU/pair_colloid_gpu.cpp b/src/GPU/pair_colloid_gpu.cpp
index 77dbdb344..1b164df8f 100644
--- a/src/GPU/pair_colloid_gpu.cpp
+++ b/src/GPU/pair_colloid_gpu.cpp
@@ -1,307 +1,307 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
    
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under 
    the GNU General Public License.
    
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Trung Dac Nguyen (ORNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "pair_colloid_gpu.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "integrate.h"
 #include "memory.h"
 #include "error.h"
 #include "neigh_request.h"
 #include "universe.h"
 #include "update.h"
 #include "domain.h"
 #include "string.h"
 #include "gpu_extra.h"
 
 using namespace LAMMPS_NS;
 
 // External functions from cuda library for atom decomposition
 
 int colloid_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
                      double **host_lj2, double **host_lj3, double **host_lj4, 
                      double **offset, double *special_lj, double **host_a12, 
                      double **host_a1, double **host_a2, double **host_d1, 
                      double **host_d2, double **host_sigma3, double **host_sigma6, 
                      int **host_form, const int nlocal, 
                      const int nall, const int max_nbors, const int maxspecial,
                      const double cell_size, int &gpu_mode, FILE *screen);
 void colloid_gpu_clear();
 int ** colloid_gpu_compute_n(const int ago, const int inum,
                              const int nall, double **host_x, int *host_type, 
                              double *sublo, double *subhi, tagint *tag, int **nspecial,
                              tagint **special, const bool eflag, const bool vflag,
                              const bool eatom, const bool vatom, int &host_start,
                              int **ilist, int **jnum,
                              const double cpu_time, bool &success);
 void colloid_gpu_compute(const int ago, const int inum, const int nall, 
                          double **host_x, int *host_type, int *ilist, int *numj,
                          int **firstneigh, const bool eflag, const bool vflag,
                          const bool eatom, const bool vatom, int &host_start,
                          const double cpu_time, bool &success);
 double colloid_gpu_bytes();
 
 /* ---------------------------------------------------------------------- */
 
 PairColloidGPU::PairColloidGPU(LAMMPS *lmp) : PairColloid(lmp), gpu_mode(GPU_FORCE)
 {
   respa_enable = 0;
   reinitflag = 0;
   cpu_time = 0.0;
   GPU_EXTRA::gpu_ready(lmp->modify, lmp->error); 
 }
 
 /* ----------------------------------------------------------------------
    free all arrays
 ------------------------------------------------------------------------- */
 
 PairColloidGPU::~PairColloidGPU()
 {
   colloid_gpu_clear();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairColloidGPU::compute(int eflag, int vflag)
 {
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
   
   int nall = atom->nlocal + atom->nghost;
   int inum, host_start;
   
   bool success = true;
   int *ilist, *numneigh, **firstneigh;
   if (gpu_mode != GPU_FORCE) {
     inum = atom->nlocal;
     firstneigh = colloid_gpu_compute_n(neighbor->ago, inum, nall,
                                        atom->x, atom->type, domain->sublo,
                                        domain->subhi, atom->tag, atom->nspecial,
                                        atom->special, eflag, vflag, eflag_atom,
                                        vflag_atom, host_start, 
                                        &ilist, &numneigh, cpu_time, success);
   } else {
     inum = list->inum;
     ilist = list->ilist;
     numneigh = list->numneigh;
     firstneigh = list->firstneigh;
     colloid_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
                         ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
                         vflag_atom, host_start, cpu_time, success);
   }
   if (!success)
     error->one(FLERR,"Insufficient memory on accelerator");
 
   if (host_start<inum) {
     cpu_time = MPI_Wtime();
     cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
     cpu_time = MPI_Wtime() - cpu_time;
   }
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairColloidGPU::init_style()
 {
   if (force->newton_pair) 
     error->all(FLERR,"Cannot use newton pair with colloid/gpu pair style");
 
   // Repeat cutsq calculation because done after call to init_style
   double maxcut = -1.0;
   double cut;
   for (int i = 1; i <= atom->ntypes; i++) {
     for (int j = i; j <= atom->ntypes; j++) {
       if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
         cut = init_one(i,j);
         cut *= cut;
         if (cut > maxcut)
           maxcut = cut;
         cutsq[i][j] = cutsq[j][i] = cut;
       } else
         cutsq[i][j] = cutsq[j][i] = 0.0;
     }
   }
   double cell_size = sqrt(maxcut) + neighbor->skin;
 
   int **_form = NULL;
   int n=atom->ntypes;
   memory->create(_form,n+1,n+1,"colloid/gpu:_form");
   for (int i = 1; i <= n; i++) {
     for (int j = 1; j <= n; j++) {
       if (form[i][j] == SMALL_SMALL) _form[i][j] = 0;
       else if (form[i][j] == SMALL_LARGE) _form[i][j] = 1;
       else if (form[i][j] == LARGE_LARGE) _form[i][j] = 2;
     }
   }
   int maxspecial=0;
   if (atom->molecular)
     maxspecial=atom->maxspecial;
   int success = colloid_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4,
                                  offset, force->special_lj, a12, a1, a2, 
                                  d1, d2, sigma3, sigma6, _form, atom->nlocal,
                                  atom->nlocal+atom->nghost, 300, maxspecial,
                                  cell_size, gpu_mode, screen);
   memory->destroy(_form);
   GPU_EXTRA::check_flag(success,error,world);
 
   if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this);
+    int irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full = 1;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairColloidGPU::memory_usage()
 {
   double bytes = Pair::memory_usage();
   return bytes + colloid_gpu_bytes();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairColloidGPU::cpu_compute(int start, int inum, int eflag, int vflag, 
                                  int *ilist, int *numneigh, int **firstneigh) 
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
   double r,rsq,r2inv,r6inv,forcelj,factor_lj;
   double c1,c2,fR,dUR,dUA;
   double K[9],h[4],g[4];
   int *jlist;
 
   double **x = atom->x;
   double **f = atom->f;
   int *type = atom->type;
   double *special_lj = force->special_lj;
 
   // loop over neighbors of my atoms
 
   for (ii = start; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq >= cutsq[itype][jtype]) continue;
 
       switch (form[itype][jtype]) {
       case SMALL_SMALL: 
         r2inv = 1.0/rsq;
         r6inv = r2inv*r2inv*r2inv;
         forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
         fpair = factor_lj*forcelj*r2inv;
         if (eflag) 
           evdwl = r6inv*(r6inv*lj3[itype][jtype]-lj4[itype][jtype]) -
             offset[itype][jtype];
         break;
 	      
       case SMALL_LARGE:
         c2 = a2[itype][jtype];
         K[1] = c2*c2;
         K[2] = rsq;
         K[0] = K[1] - rsq;
         K[4] = rsq*rsq;
         K[3] = K[1] - K[2];
         K[3] *= K[3]*K[3];
         K[6] = K[3]*K[3];
         fR = sigma3[itype][jtype]*a12[itype][jtype]*c2*K[1]/K[3];
         fpair = 4.0/15.0*fR*factor_lj * 
           (2.0*(K[1]+K[2]) * (K[1]*(5.0*K[1]+22.0*K[2])+5.0*K[4]) * 
           sigma6[itype][jtype]/K[6]-5.0) / K[0];
         if (eflag) 
           evdwl = 2.0/9.0*fR * 
             (1.0-(K[1]*(K[1]*(K[1]/3.0+3.0*K[2])+4.2*K[4])+K[2]*K[4]) *
             sigma6[itype][jtype]/K[6]) - offset[itype][jtype];
         if (rsq <= K[1]) 
           error->one(FLERR,"Overlapping small/large in pair colloid");
         break;
 
       case LARGE_LARGE:
         r = sqrt(rsq);
         c1 = a1[itype][jtype];
         c2 = a2[itype][jtype];
         K[0] = c1*c2;
         K[1] = c1+c2;
         K[2] = c1-c2;
         K[3] = K[1]+r;
         K[4] = K[1]-r;
         K[5] = K[2]+r;
         K[6] = K[2]-r;
         K[7] = 1.0/(K[3]*K[4]);
         K[8] = 1.0/(K[5]*K[6]);
         g[0] = pow(K[3],-7.0);
         g[1] = pow(K[4],-7.0);
         g[2] = pow(K[5],-7.0);
         g[3] = pow(K[6],-7.0);
         h[0] = ((K[3]+5.0*K[1])*K[3]+30.0*K[0])*g[0];
         h[1] = ((K[4]+5.0*K[1])*K[4]+30.0*K[0])*g[1];
         h[2] = ((K[5]+5.0*K[2])*K[5]-30.0*K[0])*g[2];
         h[3] = ((K[6]+5.0*K[2])*K[6]-30.0*K[0])*g[3];
         g[0] *= 42.0*K[0]/K[3]+6.0*K[1]+K[3];
         g[1] *= 42.0*K[0]/K[4]+6.0*K[1]+K[4];
         g[2] *= -42.0*K[0]/K[5]+6.0*K[2]+K[5];
         g[3] *= -42.0*K[0]/K[6]+6.0*K[2]+K[6];
 
         fR = a12[itype][jtype]*sigma6[itype][jtype]/r/37800.0;
         evdwl = fR * (h[0]-h[1]-h[2]+h[3]);
         dUR = evdwl/r + 5.0*fR*(g[0]+g[1]-g[2]-g[3]);
         dUA = -a12[itype][jtype]/3.0*r*((2.0*K[0]*K[7]+1.0)*K[7] + 
           (2.0*K[0]*K[8]-1.0)*K[8]);
         fpair = factor_lj * (dUR+dUA)/r;
         if (eflag)
           evdwl += a12[itype][jtype]/6.0 * 
             (2.0*K[0]*(K[7]+K[8])-log(K[8]/K[7])) - offset[itype][jtype];
         if (r <= K[1]) 
           error->one(FLERR,"Overlapping large/large in pair colloid");
         break;
       }
       
       if (eflag) evdwl *= factor_lj;
 
       f[i][0] += delx*fpair;
       f[i][1] += dely*fpair;
       f[i][2] += delz*fpair;
 
       if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
     }
   }
 }
diff --git a/src/GPU/pair_coul_cut_gpu.cpp b/src/GPU/pair_coul_cut_gpu.cpp
index fb0edafae..04345b920 100644
--- a/src/GPU/pair_coul_cut_gpu.cpp
+++ b/src/GPU/pair_coul_cut_gpu.cpp
@@ -1,247 +1,247 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Trung Dac Nguyen
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "pair_coul_cut_gpu.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "integrate.h"
 #include "memory.h"
 #include "error.h"
 #include "neigh_request.h"
 #include "universe.h"
 #include "update.h"
 #include "domain.h"
 #include "string.h"
 #include "gpu_extra.h"
 
 using namespace LAMMPS_NS;
 
 // External functions from cuda library for atom decomposition
 
 int coul_gpu_init(const int ntypes, double **host_scale, double **cutsq,
                   double *special_coul, const int nlocal,
                   const int nall, const int max_nbors, const int maxspecial,
                   const double cell_size, int &gpu_mode, FILE *screen,
                   const double qqrd2e);
 void coul_gpu_reinit(const int ntypes, double **host_scale);
 void coul_gpu_clear();
 int ** coul_gpu_compute_n(const int ago, const int inum,
                          const int nall, double **host_x, int *host_type,
                          double *sublo, double *subhi, tagint *tag, int **nspecial,
                          tagint **special, const bool eflag, const bool vflag,
                          const bool eatom, const bool vatom, int &host_start,
                          int **ilist, int **jnum, const double cpu_time,
                          bool &success, double *host_q, double *boxlo,
                          double *prd);
 void coul_gpu_compute(const int ago, const int inum,
                       const int nall, double **host_x, int *host_type,
                      int *ilist, int *numj, int **firstneigh,
                      const bool eflag, const bool vflag, const bool eatom,
                      const bool vatom, int &host_start, const double cpu_time,
                      bool &success, double *host_q, const int nlocal,
                      double *boxlo, double *prd);
 double coul_gpu_bytes();
 
 /* ---------------------------------------------------------------------- */
 
 PairCoulCutGPU::PairCoulCutGPU(LAMMPS *lmp) : PairCoulCut(lmp), gpu_mode(GPU_FORCE)
 {
   respa_enable = 0;
   cpu_time = 0.0;
   GPU_EXTRA::gpu_ready(lmp->modify, lmp->error);
 }
 
 /* ----------------------------------------------------------------------
    free all arrays
 ------------------------------------------------------------------------- */
 
 PairCoulCutGPU::~PairCoulCutGPU()
 {
   coul_gpu_clear();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairCoulCutGPU::compute(int eflag, int vflag)
 {
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   int nall = atom->nlocal + atom->nghost;
   int inum, host_start;
 
   bool success = true;
   int *ilist, *numneigh, **firstneigh;
   if (gpu_mode != GPU_FORCE) {
     inum = atom->nlocal;
     firstneigh = coul_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
                                    atom->type, domain->sublo, domain->subhi,
                                    atom->tag, atom->nspecial, atom->special,
                                    eflag, vflag, eflag_atom, vflag_atom,
                                    host_start, &ilist, &numneigh, cpu_time,
                                    success, atom->q, domain->boxlo,
                                    domain->prd);
   } else {
     inum = list->inum;
     ilist = list->ilist;
     numneigh = list->numneigh;
     firstneigh = list->firstneigh;
     coul_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
                     ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
                     vflag_atom, host_start, cpu_time, success, atom->q,
                     atom->nlocal, domain->boxlo, domain->prd);
   }
   if (!success)
     error->one(FLERR,"Insufficient memory on accelerator");
 
   if (host_start<inum) {
     cpu_time = MPI_Wtime();
     cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
     cpu_time = MPI_Wtime() - cpu_time;
   }
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairCoulCutGPU::init_style()
 {
   if (!atom->q_flag)
     error->all(FLERR,"Pair style coul/cut/gpu requires atom attribute q");
 
   if (force->newton_pair)
     error->all(FLERR,"Cannot use newton pair with coul/cut/gpu pair style");
 
   // Repeat cutsq calculation because done after call to init_style
   double maxcut = -1.0;
   double cut;
   for (int i = 1; i <= atom->ntypes; i++) {
     for (int j = i; j <= atom->ntypes; j++) {
       if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
         cut = init_one(i,j);
         cut *= cut;
         if (cut > maxcut)
           maxcut = cut;
         cutsq[i][j] = cutsq[j][i] = cut;
       } else
         cutsq[i][j] = cutsq[j][i] = 0.0;
     }
   }
   double cell_size = sqrt(maxcut) + neighbor->skin;
 
   int maxspecial=0;
   if (atom->molecular)
     maxspecial=atom->maxspecial;
   int success = coul_gpu_init(atom->ntypes+1, scale, cutsq,
                              force->special_coul, atom->nlocal,
                              atom->nlocal+atom->nghost, 300, maxspecial,
                              cell_size, gpu_mode, screen, force->qqrd2e);
   GPU_EXTRA::check_flag(success,error,world);
 
   if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this);
+    int irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full = 1;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairCoulCutGPU::reinit()
 {
   Pair::reinit();
   
   coul_gpu_reinit(atom->ntypes+1, scale);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairCoulCutGPU::memory_usage()
 {
   double bytes = Pair::memory_usage();
   return bytes + coul_gpu_bytes();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairCoulCutGPU::cpu_compute(int start, int inum, int eflag, int vflag,
                                       int *ilist, int *numneigh,
                                       int **firstneigh)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,ecoul,fpair;
   double rsq,r2inv,forcecoul,factor_coul;
   int *jlist;
 
   ecoul = 0.0;
 
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   int *type = atom->type;
   double *special_coul = force->special_coul;
   double qqrd2e = force->qqrd2e;
 
   // loop over neighbors of my atoms
 
   for (ii = start; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r2inv = 1.0/rsq;
         forcecoul = qqrd2e * qtmp*q[j]*sqrt(r2inv);
         fpair = factor_coul*forcecoul * r2inv;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
 
         if (eflag) {
           ecoul = factor_coul * qqrd2e * qtmp*q[j]*sqrt(r2inv);
         }
 
         if (evflag) ev_tally_full(i,0.0,ecoul,fpair,delx,dely,delz);
       }
     }
   }
 }
diff --git a/src/GPU/pair_coul_debye_gpu.cpp b/src/GPU/pair_coul_debye_gpu.cpp
index e4aa503dc..19ae6b316 100644
--- a/src/GPU/pair_coul_debye_gpu.cpp
+++ b/src/GPU/pair_coul_debye_gpu.cpp
@@ -1,255 +1,255 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
    
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under 
    the GNU General Public License.
    
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Trung Dac Nguyen (ndtrung@umich.edu)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "pair_coul_debye_gpu.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "integrate.h"
 #include "memory.h"
 #include "error.h"
 #include "neigh_request.h"
 #include "universe.h"
 #include "update.h"
 #include "domain.h"
 #include "string.h"
 #include "gpu_extra.h"
 
 using namespace LAMMPS_NS;
 
 // External functions from cuda library for atom decomposition
 
 int cdebye_gpu_init(const int ntypes, double **host_scale, double **cutsq,
                     double *special_coul, const int nlocal, const int nall,
                     const int max_nbors, const int maxspecial,
                     const double cell_size, int &gpu_mode, FILE *screen,
                     const double qqrd2e, const double kappa);
 void cdebye_gpu_reinit(const int ntypes, double **host_scale);
 void cdebye_gpu_clear();
 int ** cdebye_gpu_compute_n(const int ago, const int inum, const int nall, 
                           double **host_x, int *host_type, 
                           double *sublo, double *subhi, tagint *tag, int **nspecial,
                           tagint **special, const bool eflag, const bool vflag,
                           const bool eatom, const bool vatom, int &host_start,
                           int **ilist, int **jnum, const double cpu_time,
                           bool &success, double *host_q, double *boxlo,
                           double *prd);
 void cdebye_gpu_compute(const int ago, const int inum, const int nall, 
                       double **host_x, int *host_type,
                       int *ilist, int *numj, int **firstneigh,
                       const bool eflag, const bool vflag, const bool eatom,
                       const bool vatom, int &host_start, const double cpu_time,
                       bool &success, double *host_q, const int nlocal,
                       double *boxlo, double *prd);
 double cdebye_gpu_bytes();
 
 /* ---------------------------------------------------------------------- */
 
 PairCoulDebyeGPU::PairCoulDebyeGPU(LAMMPS *lmp) : 
   PairCoulDebye(lmp), gpu_mode(GPU_FORCE)
 {
   respa_enable = 0;
   cpu_time = 0.0;
   GPU_EXTRA::gpu_ready(lmp->modify, lmp->error); 
 }
 
 /* ----------------------------------------------------------------------
    free all arrays
 ------------------------------------------------------------------------- */
 
 PairCoulDebyeGPU::~PairCoulDebyeGPU()
 {
   cdebye_gpu_clear();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairCoulDebyeGPU::compute(int eflag, int vflag)
 {
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
   
   int nall = atom->nlocal + atom->nghost;
   int inum, host_start;
   
   bool success = true;
   int *ilist, *numneigh, **firstneigh;  
   if (gpu_mode != GPU_FORCE) {
     inum = atom->nlocal;
     firstneigh = cdebye_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
                                     atom->type, domain->sublo, domain->subhi,
                                     atom->tag, atom->nspecial, atom->special,
                                     eflag, vflag, eflag_atom, vflag_atom,
                                     host_start, &ilist, &numneigh, cpu_time,
                                     success, atom->q, domain->boxlo, 
                                     domain->prd);
   } else {
     inum = list->inum;
     ilist = list->ilist;
     numneigh = list->numneigh;
     firstneigh = list->firstneigh;
     cdebye_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
                      ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
                      vflag_atom, host_start, cpu_time, success, atom->q,
                      atom->nlocal, domain->boxlo, domain->prd);
   }
   if (!success)
     error->one(FLERR,"Insufficient memory on accelerator");
 
   if (host_start<inum) {
     cpu_time = MPI_Wtime();
     cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
     cpu_time = MPI_Wtime() - cpu_time;
   }
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairCoulDebyeGPU::init_style()
 {
   if (!atom->q_flag)
     error->all(FLERR,"Pair style coul/debye/gpu requires atom attribute q");
 
   if (force->newton_pair) 
     error->all(FLERR,"Cannot use newton pair with coul/debye/gpu pair style");
 
   // Repeat cutsq calculation because done after call to init_style
   double maxcut = -1.0;
   double cut;
   for (int i = 1; i <= atom->ntypes; i++) {
     for (int j = i; j <= atom->ntypes; j++) {
       if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
         cut = init_one(i,j);
         cut *= cut;
         if (cut > maxcut)
           maxcut = cut;
         cutsq[i][j] = cutsq[j][i] = cut;
       } else
         cutsq[i][j] = cutsq[j][i] = 0.0;
     }
   }
   double cell_size = sqrt(maxcut) + neighbor->skin;
 
   int maxspecial=0;
   if (atom->molecular)
     maxspecial=atom->maxspecial;
   int success = cdebye_gpu_init(atom->ntypes+1, scale, cutsq,
                                 force->special_coul, atom->nlocal,
                                 atom->nlocal+atom->nghost, 300, maxspecial,
                                 cell_size, gpu_mode, screen,
                                 force->qqrd2e, kappa);
   GPU_EXTRA::check_flag(success,error,world);
 
   if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this);
+    int irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full = 1;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairCoulDebyeGPU::reinit()
 {
   Pair::reinit();
   
   cdebye_gpu_reinit(atom->ntypes+1, scale);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairCoulDebyeGPU::memory_usage()
 {
   double bytes = Pair::memory_usage();
   return bytes + cdebye_gpu_bytes();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairCoulDebyeGPU::cpu_compute(int start, int inum, int eflag, 
                                         int vflag, int *ilist,
                                         int *numneigh, int **firstneigh)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,ecoul,fpair;
   double rsq,r2inv,forcecoul,factor_coul;
   double r,rinv,screening;
   int *jlist;
 
   ecoul = 0.0;
 
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   int *type = atom->type;
   double *special_coul = force->special_coul;
   double qqrd2e = force->qqrd2e;
 
   // loop over neighbors of my atoms
 
   for (ii = start; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r2inv = 1.0/rsq;
         r = sqrt(rsq);
         rinv = 1.0/r;
         screening = exp(-kappa*r);
         forcecoul = qqrd2e * scale[itype][jtype] *
           qtmp*q[j] * screening * (kappa + rinv);
         fpair = factor_coul*forcecoul * r2inv;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
 
         if (eflag) {
           ecoul = factor_coul * qqrd2e * scale[itype][jtype] *
             qtmp*q[j] * rinv * screening;
         }
 
         if (evflag) ev_tally_full(i,0.0,ecoul,fpair,delx,dely,delz);
       }
     }
   }
 }
diff --git a/src/GPU/pair_coul_dsf_gpu.cpp b/src/GPU/pair_coul_dsf_gpu.cpp
index 3fd13ccad..8b5985e8c 100644
--- a/src/GPU/pair_coul_dsf_gpu.cpp
+++ b/src/GPU/pair_coul_dsf_gpu.cpp
@@ -1,269 +1,269 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Trung Dac Nguyen (ORNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "pair_coul_dsf_gpu.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "integrate.h"
 #include "memory.h"
 #include "error.h"
 #include "neigh_request.h"
 #include "universe.h"
 #include "update.h"
 #include "domain.h"
 #include "string.h"
 #include "gpu_extra.h"
 
 #define MY_PIS 1.77245385090551602729
 #define EWALD_F   1.12837917
 #define EWALD_P   0.3275911
 #define A1        0.254829592
 #define A2       -0.284496736
 #define A3        1.421413741
 #define A4       -1.453152027
 #define A5        1.061405429
 
 using namespace LAMMPS_NS;
 
 // External functions from cuda library for atom decomposition
 
 int cdsf_gpu_init(const int ntypes, const int nlocal, const int nall,
                   const int max_nbors, const int maxspecial,
                   const double cell_size, int &gpu_mode, FILE *screen,
                   const double host_cut_coulsq,
                   double *host_special_coul, const double qqrd2e, 
                   const double e_shift, const double f_shift, 
                   const double alpha);
 void cdsf_gpu_clear();
 int ** cdsf_gpu_compute_n(const int ago, const int inum,
                           const int nall, double **host_x, int *host_type,
                           double *sublo, double *subhi, tagint *tag, int **nspecial,
                           tagint **special, const bool eflag, const bool vflag,
                           const bool eatom, const bool vatom, int &host_start,
                           int **ilist, int **jnum, const double cpu_time,
                           bool &success, double *host_q, double *boxlo,
                           double *prd);
 void cdsf_gpu_compute(const int ago, const int inum,
                       const int nall, double **host_x, int *host_type,
                       int *ilist, int *numj, int **firstneigh,
                       const bool eflag, const bool vflag, const bool eatom,
                       const bool vatom, int &host_start, const double cpu_time,
                       bool &success, double *host_q, const int nlocal,
                       double *boxlo, double *prd);
 double cdsf_gpu_bytes();
 
 /* ---------------------------------------------------------------------- */
 
 PairCoulDSFGPU::PairCoulDSFGPU(LAMMPS *lmp) : PairCoulDSF(lmp), 
   gpu_mode(GPU_FORCE)
 {
   respa_enable = 0;
   reinitflag = 0;
   cpu_time = 0.0;
   GPU_EXTRA::gpu_ready(lmp->modify, lmp->error);
 }
 
 /* ----------------------------------------------------------------------
    free all arrays
 ------------------------------------------------------------------------- */
 
 PairCoulDSFGPU::~PairCoulDSFGPU()
 {
   cdsf_gpu_clear();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairCoulDSFGPU::compute(int eflag, int vflag)
 {
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   int nall = atom->nlocal + atom->nghost;
   int inum, host_start;
 
   bool success = true;
   int *ilist, *numneigh, **firstneigh;
   if (gpu_mode != GPU_FORCE) {
     inum = atom->nlocal;
     firstneigh = cdsf_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
                                     atom->type, domain->sublo, domain->subhi,
                                     atom->tag, atom->nspecial, atom->special,
                                     eflag, vflag, eflag_atom, vflag_atom,
                                     host_start, &ilist, &numneigh, cpu_time,
                                     success, atom->q, domain->boxlo,
                                     domain->prd);
   } else {
     inum = list->inum;
     ilist = list->ilist;
     numneigh = list->numneigh;
     firstneigh = list->firstneigh;
     cdsf_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
                      ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
                      vflag_atom, host_start, cpu_time, success, atom->q,
                      atom->nlocal, domain->boxlo, domain->prd);
   }
   if (!success)
     error->one(FLERR,"Insufficient memory on accelerator");
 
   if (host_start<inum) {
     cpu_time = MPI_Wtime();
     cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
     cpu_time = MPI_Wtime() - cpu_time;
   }
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairCoulDSFGPU::init_style()
 {
   if (!atom->q_flag)
     error->all(FLERR,"Pair style coul/dsf/gpu requires atom attribute q");
 
   if (force->newton_pair)
     error->all(FLERR,"Cannot use newton pair with coul/dsf/gpu pair style");
 
   // Repeat cutsq calculation because done after call to init_style
   double maxcut = -1.0;
   double cut;
   for (int i = 1; i <= atom->ntypes; i++) {
     for (int j = i; j <= atom->ntypes; j++) {
       if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
         cut = init_one(i,j);
         cut *= cut;
         if (cut > maxcut)
           maxcut = cut;
         cutsq[i][j] = cutsq[j][i] = cut;
       } else
         cutsq[i][j] = cutsq[j][i] = 0.0;
     }
   }
   double cell_size = sqrt(maxcut) + neighbor->skin;
 
   cut_coulsq = cut_coul * cut_coul;
   double erfcc = erfc(alpha*cut_coul); 
   double erfcd = exp(-alpha*alpha*cut_coul*cut_coul);
   f_shift = -(erfcc/cut_coulsq + 2.0/MY_PIS*alpha*erfcd/cut_coul); 
   e_shift = erfcc/cut_coul - f_shift*cut_coul; 
 
   int maxspecial=0;
   if (atom->molecular)
     maxspecial=atom->maxspecial;
   int success = cdsf_gpu_init(atom->ntypes+1, atom->nlocal,
                               atom->nlocal+atom->nghost, 300, maxspecial,
                               cell_size, gpu_mode, screen, cut_coulsq,
                               force->special_coul, force->qqrd2e, e_shift,
                               f_shift, alpha);
   GPU_EXTRA::check_flag(success,error,world);
 
   if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this);
+    int irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full = 1;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairCoulDSFGPU::memory_usage()
 {
   double bytes = Pair::memory_usage();
   return bytes + cdsf_gpu_bytes();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairCoulDSFGPU::cpu_compute(int start, int inum, int eflag, int vflag,
                                  int *ilist, int *numneigh,
                                  int **firstneigh)
 {
   int i,j,ii,jj,jnum;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,ecoul,fpair;
   double r,rsq,r2inv,forcecoul,factor_coul;
   double prefactor,erfcc,erfcd,e_self,t;
   int *jlist;
 
   ecoul = 0.0;
 
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
   double qqrd2e = force->qqrd2e;
 
   // loop over neighbors of my atoms
 
   for (ii = start; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     if (evflag) {
       e_self = -(e_shift/2.0 + alpha/MY_PIS) * qtmp*qtmp*qqrd2e;
       ev_tally(i,i,nlocal,0,0.0,e_self,0.0,0.0,0.0,0.0);
     }
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
 
       if (rsq < cut_coulsq) {
         r2inv = 1.0/rsq;
         r = sqrt(rsq);
         prefactor = factor_coul * qqrd2e*qtmp*q[j]/r;
         erfcd = exp(-alpha*alpha*r*r);
         t = 1.0 / (1.0 + EWALD_P*alpha*r);
         erfcc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * erfcd;
         forcecoul = prefactor * (erfcc/r + 2.0*alpha/MY_PIS * erfcd + 
           r*f_shift) * r;
 
         fpair = forcecoul * r2inv;
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
 
         if (eflag) {
           if (rsq < cut_coulsq) {
             ecoul = prefactor * (erfcc - r*e_shift - rsq*f_shift);
           } else ecoul = 0.0;
         }
 
         if (evflag) ev_tally_full(i,0.0,ecoul,fpair,delx,dely,delz);
       }
     }
   }
 }
diff --git a/src/GPU/pair_coul_long_gpu.cpp b/src/GPU/pair_coul_long_gpu.cpp
index 76ee538da..9bc528607 100644
--- a/src/GPU/pair_coul_long_gpu.cpp
+++ b/src/GPU/pair_coul_long_gpu.cpp
@@ -1,288 +1,288 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Axel Kohlmeyer (Temple)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "pair_coul_long_gpu.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "integrate.h"
 #include "memory.h"
 #include "error.h"
 #include "neigh_request.h"
 #include "universe.h"
 #include "update.h"
 #include "domain.h"
 #include "string.h"
 #include "kspace.h"
 #include "gpu_extra.h"
 
 #define EWALD_F   1.12837917
 #define EWALD_P   0.3275911
 #define A1        0.254829592
 #define A2       -0.284496736
 #define A3        1.421413741
 #define A4       -1.453152027
 #define A5        1.061405429
 
 using namespace LAMMPS_NS;
 
 // External functions from cuda library for atom decomposition
 
 int cl_gpu_init(const int ntypes, double **scale,
                 const int nlocal, const int nall, const int max_nbors,
                 const int maxspecial, const double cell_size, int &gpu_mode,
                 FILE *screen, double host_cut_coulsq, double *host_special_coul,
                 const double qqrd2e, const double g_ewald);
 void cl_gpu_reinit(const int ntypes, double **scale);
 void cl_gpu_clear();
 int ** cl_gpu_compute_n(const int ago, const int inum,
                         const int nall, double **host_x, int *host_type,
                         double *sublo, double *subhi, tagint *tag,
                         int **nspecial, tagint **special, const bool eflag,
                         const bool vflag, const bool eatom, const bool vatom,
                         int &host_start, int **ilist, int **jnum,
                         const double cpu_time, bool &success, double *host_q,
                         double *boxlo, double *prd);
 void cl_gpu_compute(const int ago, const int inum, const int nall,
                     double **host_x, int *host_type, int *ilist, int *numj,
                     int **firstneigh, const bool eflag, const bool vflag,
                     const bool eatom, const bool vatom, int &host_start,
                     const double cpu_time, bool &success, double *host_q,
                     const int nlocal, double *boxlo, double *prd);
 double cl_gpu_bytes();
 
 /* ---------------------------------------------------------------------- */
 
 PairCoulLongGPU::PairCoulLongGPU(LAMMPS *lmp) :
   PairCoulLong(lmp), gpu_mode(GPU_FORCE)
 {
   respa_enable = 0;
   cpu_time = 0.0;
   GPU_EXTRA::gpu_ready(lmp->modify, lmp->error);
 }
 
 /* ----------------------------------------------------------------------
    free all arrays
 ------------------------------------------------------------------------- */
 
 PairCoulLongGPU::~PairCoulLongGPU()
 {
   cl_gpu_clear();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairCoulLongGPU::compute(int eflag, int vflag)
 {
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   int nall = atom->nlocal + atom->nghost;
   int inum, host_start;
 
   bool success = true;
   int *ilist, *numneigh, **firstneigh;
   if (gpu_mode != GPU_FORCE) {
     inum = atom->nlocal;
     firstneigh = cl_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
                                   atom->type, domain->sublo, domain->subhi,
                                   atom->tag, atom->nspecial, atom->special,
                                   eflag, vflag, eflag_atom, vflag_atom,
                                   host_start, &ilist, &numneigh, cpu_time,
                                   success, atom->q, domain->boxlo,
                                   domain->prd);
   } else {
     inum = list->inum;
     ilist = list->ilist;
     numneigh = list->numneigh;
     firstneigh = list->firstneigh;
     cl_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
                    ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
                    vflag_atom, host_start, cpu_time, success, atom->q,
                    atom->nlocal, domain->boxlo, domain->prd);
   }
   if (!success)
     error->one(FLERR,"Insufficient memory on accelerator");
 
   if (host_start<inum) {
     cpu_time = MPI_Wtime();
     cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
     cpu_time = MPI_Wtime() - cpu_time;
   }
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairCoulLongGPU::init_style()
 {
   cut_respa = NULL;
 
   if (!atom->q_flag)
     error->all(FLERR,"Pair style coul/long/gpu requires atom attribute q");
   if (force->newton_pair)
     error->all(FLERR,"Cannot use newton pair with coul/long/gpu pair style");
 
   // Repeat cutsq calculation because done after call to init_style
   double cell_size = cut_coul + neighbor->skin;
 
   cut_coulsq = cut_coul * cut_coul;
 
   // insure use of KSpace long-range solver, set g_ewald
 
   if (force->kspace == NULL)
     error->all(FLERR,"Pair style is incompatible with KSpace style");
   g_ewald = force->kspace->g_ewald;
 
   // setup force tables
 
   if (ncoultablebits) init_tables(cut_coul,cut_respa);
 
   int maxspecial=0;
   if (atom->molecular)
     maxspecial=atom->maxspecial;
   int success = cl_gpu_init(atom->ntypes+1, scale,
                             atom->nlocal, atom->nlocal+atom->nghost, 300,
                             maxspecial, cell_size, gpu_mode, screen, cut_coulsq,
                             force->special_coul, force->qqrd2e, g_ewald);
 
   GPU_EXTRA::check_flag(success,error,world);
 
   if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this);
+    int irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full = 1;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairCoulLongGPU::reinit()
 {
   Pair::reinit();
   
   cl_gpu_reinit(atom->ntypes+1, scale);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairCoulLongGPU::memory_usage()
 {
   double bytes = Pair::memory_usage();
   return bytes + cl_gpu_bytes();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairCoulLongGPU::cpu_compute(int start, int inum, int eflag,
                                   int vflag, int *ilist, int *numneigh,
                                   int **firstneigh)
 {
   int i,j,ii,jj,jnum,itable;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,ecoul,fpair;
   double fraction,table;
   double r,r2inv,forcecoul,factor_coul;
   double grij,expm2,prefactor,t,erfc;
   int *jlist;
   double rsq;
 
   ecoul = 0.0;
 
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   double *special_coul = force->special_coul;
   double qqrd2e = force->qqrd2e;
 
   // loop over neighbors of my atoms
 
   for (ii = start; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
 
       r2inv = 1.0/rsq;
 
       if (rsq < cut_coulsq) {
         if (!ncoultablebits || rsq <= tabinnersq) {
           r = sqrt(rsq);
           grij = g_ewald * r;
           expm2 = exp(-grij*grij);
           t = 1.0 / (1.0 + EWALD_P*grij);
           erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
           prefactor = qqrd2e * qtmp*q[j]/r;
           forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
           if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
         } else {
           union_int_float_t rsq_lookup;
           rsq_lookup.f = rsq;
           itable = rsq_lookup.i & ncoulmask;
           itable >>= ncoulshiftbits;
           fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable];
           table = ftable[itable] + fraction*dftable[itable];
           forcecoul = qtmp*q[j] * table;
           if (factor_coul < 1.0) {
             table = ctable[itable] + fraction*dctable[itable];
             prefactor = qtmp*q[j] * table;
             forcecoul -= (1.0-factor_coul)*prefactor;
           }
         }
 
         fpair = forcecoul * r2inv;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
 
         if (eflag) {
           if (rsq < cut_coulsq) {
             if (!ncoultablebits || rsq <= tabinnersq)
               ecoul = prefactor*erfc;
             else {
               table = etable[itable] + fraction*detable[itable];
               ecoul = qtmp*q[j] * table;
             }
             if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
           } else ecoul = 0.0;
         }
 
         if (evflag) ev_tally_full(i,0.0,ecoul,fpair,delx,dely,delz);
       }
     }
   }
 }
diff --git a/src/GPU/pair_dpd_gpu.cpp b/src/GPU/pair_dpd_gpu.cpp
index 552e598f4..dacdaa5e5 100644
--- a/src/GPU/pair_dpd_gpu.cpp
+++ b/src/GPU/pair_dpd_gpu.cpp
@@ -1,410 +1,410 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Trung Dac Nguyen (ORNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "pair_dpd_gpu.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "integrate.h"
 #include "memory.h"
 #include "error.h"
 #include "neigh_request.h"
 #include "random_mars.h"
 #include "universe.h"
 #include "update.h"
 #include "domain.h"
 #include "string.h"
 #include "gpu_extra.h"
 
 using namespace LAMMPS_NS;
 
 // External functions from cuda library for atom decomposition
 
 int dpd_gpu_init(const int ntypes, double **cutsq, double **host_a0,
                  double **host_gamma, double **host_sigma, double **host_cut,
                  double *special_lj, bool tstat_only, const int inum,
                  const int nall, const int max_nbors,  const int maxspecial,
                  const double cell_size, int &gpu_mode, FILE *screen);
 void dpd_gpu_clear();
 int ** dpd_gpu_compute_n(const int ago, const int inum_full, const int nall,
                          double **host_x, int *host_type, double *sublo,
                          double *subhi, tagint *tag, int **nspecial,
                          tagint **special, const bool eflag, const bool vflag, 
                          const bool eatom, const bool vatom, int &host_start, 
                          int **ilist, int **jnum, const double cpu_time, bool &success,
                          double **host_v, const double dtinvsqrt, 
                          const int seed, const int timestep,
                          double *boxlo, double *prd);
 void dpd_gpu_compute(const int ago, const int inum_full, const int nall,
                      double **host_x, int *host_type, int *ilist, int *numj,
                      int **firstneigh, const bool eflag, const bool vflag,
                      const bool eatom, const bool vatom, int &host_start,
                      const double cpu_time, bool &success, tagint *tag,
                      double **host_v, const double dtinvsqrt, 
                      const int seed, const int timestep,
                      const int nlocal, double *boxlo, double *prd);
 double dpd_gpu_bytes();
 
 #define EPSILON 1.0e-10
 
 //#define _USE_UNIFORM_SARU_LCG
 //#define _USE_UNIFORM_SARU_TEA8
 //#define _USE_GAUSSIAN_SARU_LCG
 
 #if !defined(_USE_UNIFORM_SARU_LCG) && !defined(_USE_UNIFORM_SARU_TEA8) && !defined(_USE_GAUSSIAN_SARU_LCG)
 #define _USE_UNIFORM_SARU_LCG
 #endif
 
 // References: 
 // 1. Y. Afshar, F. Schmid, A. Pishevar, S. Worley, Comput. Phys. Comm. 184 (2013), 1119–1128.
 // 2. C. L. Phillips, J. A. Anderson, S. C. Glotzer, Comput. Phys. Comm. 230 (2011), 7191-7201.
 // PRNG period = 3666320093*2^32 ~ 2^64 ~ 10^19
 
 #define LCGA 0x4beb5d59 // Full period 32 bit LCG
 #define LCGC 0x2600e1f7
 #define oWeylPeriod 0xda879add // Prime period 3666320093
 #define oWeylOffset 0x8009d14b
 #define TWO_N32 0.232830643653869628906250e-9f /* 2^-32 */
 
 // specifically implemented for steps = 1; high = 1.0; low = -1.0
 // returns uniformly distributed random numbers u in [-1.0;1.0] 
 // using the inherent LCG, then multiply u with sqrt(3) to "match" 
 // with a normal random distribution. 
 // Afshar et al. mutlplies u in [-0.5;0.5] with sqrt(12)
 // Curly brackets to make variables local to the scope.
 #ifdef _USE_UNIFORM_SARU_LCG
 #define numtyp double
 #define SQRT3 (numtyp)1.7320508075688772935274463
 #define saru(seed1, seed2, seed, timestep, randnum) {                         \
   unsigned int seed3 = seed + timestep;                                       \
   seed3^=(seed1<<7)^(seed2>>6);                                               \
   seed2+=(seed1>>4)^(seed3>>15);                                              \
   seed1^=(seed2<<9)+(seed3<<8);                                               \
   seed3^=0xA5366B4D*((seed2>>11) ^ (seed1<<1));                               \
   seed2+=0x72BE1579*((seed1<<4)  ^ (seed3>>16));                              \
   seed1^=0x3F38A6ED*((seed3>>5)  ^ (((signed int)seed2)>>22));                \
   seed2+=seed1*seed3;                                                         \
   seed1+=seed3 ^ (seed2>>2);                                                  \
   seed2^=((signed int)seed2)>>17;                                             \
   unsigned int state  = 0x79dedea3*(seed1^(((signed int)seed1)>>14));         \
   unsigned int wstate = (state + seed2) ^ (((signed int)state)>>8);           \
   state  = state + (wstate*(wstate^0xdddf97f5));                              \
   wstate = 0xABCB96F7 + (wstate>>1);                                          \
   state = LCGA*state + LCGC;                                                  \
   wstate = wstate + oWeylOffset+((((signed int)wstate)>>31) & oWeylPeriod);   \
   unsigned int v = (state ^ (state>>26)) + wstate;                            \
   unsigned int s = (signed int)((v^(v>>20))*0x6957f5a7);                      \
   randnum = SQRT3*(s*TWO_N32*(numtyp)2.0-(numtyp)1.0);                        \
 }
 #endif
 
 // specifically implemented for steps = 1; high = 1.0; low = -1.0
 // returns uniformly distributed random numbers u in [-1.0;1.0] using TEA8 
 // then multiply u with sqrt(3) to "match" with a normal random distribution 
 // Afshar et al. mutlplies u in [-0.5;0.5] with sqrt(12)
 #ifdef _USE_UNIFORM_SARU_TEA8
 #define numtyp double
 #define SQRT3 (numtyp)1.7320508075688772935274463
 #define k0 0xA341316C
 #define k1 0xC8013EA4
 #define k2 0xAD90777D
 #define k3 0x7E95761E
 #define delta 0x9e3779b9
 #define rounds 8
 #define saru(seed1, seed2, seed, timestep, randnum) {                         \
   unsigned int seed3 = seed + timestep;                                       \
   seed3^=(seed1<<7)^(seed2>>6);                                               \
   seed2+=(seed1>>4)^(seed3>>15);                                              \
   seed1^=(seed2<<9)+(seed3<<8);                                               \
   seed3^=0xA5366B4D*((seed2>>11) ^ (seed1<<1));                               \
   seed2+=0x72BE1579*((seed1<<4)  ^ (seed3>>16));                              \
   seed1^=0x3F38A6ED*((seed3>>5)  ^ (((signed int)seed2)>>22));                \
   seed2+=seed1*seed3;                                                         \
   seed1+=seed3 ^ (seed2>>2);                                                  \
   seed2^=((signed int)seed2)>>17;                                             \
   unsigned int state  = 0x79dedea3*(seed1^(((signed int)seed1)>>14));         \
   unsigned int wstate = (state + seed2) ^ (((signed int)state)>>8);           \
   state  = state + (wstate*(wstate^0xdddf97f5));                              \
   wstate = 0xABCB96F7 + (wstate>>1);                                          \
   unsigned int sum = 0;                                                       \
   for (int i=0; i < rounds; i++) {                                            \
     sum += delta;                                                             \
     state += ((wstate<<4) + k0)^(wstate + sum)^((wstate>>5) + k1);            \
     wstate += ((state<<4) + k2)^(state + sum)^((state>>5) + k3);              \
   }                                                                           \
   unsigned int v = (state ^ (state>>26)) + wstate;                            \
   unsigned int s = (signed int)((v^(v>>20))*0x6957f5a7);                      \
   randnum = SQRT3*(s*TWO_N32*(numtyp)2.0-(numtyp)1.0);                        \
 }
 #endif
 
 // specifically implemented for steps = 1; high = 1.0; low = -1.0
 // returns two uniformly distributed random numbers r1 and r2 in [-1.0;1.0], 
 // and uses the polar method (Marsaglia's) to transform to a normal random value
 // This is used to compared with CPU DPD using RandMars::gaussian()
 #ifdef _USE_GAUSSIAN_SARU_LCG
 #define numtyp double
 #define saru(seed1, seed2, seed, timestep, randnum) {                         \
   unsigned int seed3 = seed + timestep;                                       \
   seed3^=(seed1<<7)^(seed2>>6);                                               \
   seed2+=(seed1>>4)^(seed3>>15);                                              \
   seed1^=(seed2<<9)+(seed3<<8);                                               \
   seed3^=0xA5366B4D*((seed2>>11) ^ (seed1<<1));                               \
   seed2+=0x72BE1579*((seed1<<4)  ^ (seed3>>16));                              \
   seed1^=0x3F38A6ED*((seed3>>5)  ^ (((signed int)seed2)>>22));                \
   seed2+=seed1*seed3;                                                         \
   seed1+=seed3 ^ (seed2>>2);                                                  \
   seed2^=((signed int)seed2)>>17;                                             \
   unsigned int state=0x12345678;                                              \
   unsigned int wstate=12345678;                                               \
   state  = 0x79dedea3*(seed1^(((signed int)seed1)>>14));                      \
   wstate = (state + seed2) ^ (((signed int)state)>>8);                        \
   state  = state + (wstate*(wstate^0xdddf97f5));                              \
   wstate = 0xABCB96F7 + (wstate>>1);                                          \
   unsigned int v, s;                                                          \
   numtyp r1, r2, rsq;                                                         \
   while (1) {                                                                 \
     state = LCGA*state + LCGC;                                                \
     wstate = wstate + oWeylOffset+((((signed int)wstate)>>31) & oWeylPeriod); \
     v = (state ^ (state>>26)) + wstate;                                       \
     s = (signed int)((v^(v>>20))*0x6957f5a7);                                 \
     r1 = s*TWO_N32*(numtyp)2.0-(numtyp)1.0;                                   \
     state = LCGA*state + LCGC;                                                \
     wstate = wstate + oWeylOffset+((((signed int)wstate)>>31) & oWeylPeriod); \
     v = (state ^ (state>>26)) + wstate;                                       \
     s = (signed int)((v^(v>>20))*0x6957f5a7);                                 \
     r2 = s*TWO_N32*(numtyp)2.0-(numtyp)1.0;                                   \
     rsq = r1 * r1 + r2 * r2;                                                  \
     if (rsq < (numtyp)1.0) break;                                             \
   }                                                                           \
   numtyp fac = sqrt((numtyp)-2.0*log(rsq)/rsq);                               \
   randnum = r2*fac;                                                           \
 }
 #endif
 
 /* ---------------------------------------------------------------------- */
 
 PairDPDGPU::PairDPDGPU(LAMMPS *lmp) : PairDPD(lmp), gpu_mode(GPU_FORCE)
 {
   respa_enable = 0;
   reinitflag = 0;
   cpu_time = 0.0;
   GPU_EXTRA::gpu_ready(lmp->modify, lmp->error);
 }
 
 /* ----------------------------------------------------------------------
    free all arrays
 ------------------------------------------------------------------------- */
 
 PairDPDGPU::~PairDPDGPU()
 {
   dpd_gpu_clear();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairDPDGPU::compute(int eflag, int vflag)
 {
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   int nall = atom->nlocal + atom->nghost;
   int inum, host_start;
 
   double dtinvsqrt = 1.0/sqrt(update->dt);
   
   bool success = true;
   int *ilist, *numneigh, **firstneigh;
   if (gpu_mode != GPU_FORCE) {
     inum = atom->nlocal;
     firstneigh = dpd_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
                                    atom->type, domain->sublo, domain->subhi,
                                    atom->tag, atom->nspecial, atom->special,
                                    eflag, vflag, eflag_atom, vflag_atom,
                                    host_start, &ilist, &numneigh, cpu_time,
                                    success, atom->v, dtinvsqrt, seed, 
                                    update->ntimestep,
                                    domain->boxlo, domain->prd);
   } else {
     inum = list->inum;
     ilist = list->ilist;
     numneigh = list->numneigh;
     firstneigh = list->firstneigh;
     dpd_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
                     ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
                     vflag_atom, host_start, cpu_time, success, 
                     atom->tag, atom->v, dtinvsqrt, seed, 
                     update->ntimestep,
                     atom->nlocal, domain->boxlo, domain->prd);
   }
   if (!success)
     error->one(FLERR,"Insufficient memory on accelerator");
 
   if (host_start<inum) {
     cpu_time = MPI_Wtime();
     cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
     cpu_time = MPI_Wtime() - cpu_time;
   }
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairDPDGPU::init_style()
 {
   if (force->newton_pair)
     error->all(FLERR,"Cannot use newton pair with dpd/gpu pair style");
 
   // Repeat cutsq calculation because done after call to init_style
   double maxcut = -1.0;
   double mcut;
   for (int i = 1; i <= atom->ntypes; i++) {
     for (int j = i; j <= atom->ntypes; j++) {
       if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
         mcut = init_one(i,j);
         mcut *= mcut;
         if (mcut > maxcut)
           maxcut = mcut;
         cutsq[i][j] = cutsq[j][i] = mcut;
       } else
         cutsq[i][j] = cutsq[j][i] = 0.0;
     }
   }
   double cell_size = sqrt(maxcut) + neighbor->skin;
 
   int maxspecial=0;
   if (atom->molecular)
     maxspecial=atom->maxspecial;
   int success = dpd_gpu_init(atom->ntypes+1, cutsq, a0, gamma, sigma, 
                              cut, force->special_lj, false, atom->nlocal,
                              atom->nlocal+atom->nghost, 300, maxspecial,
                              cell_size, gpu_mode, screen);
   GPU_EXTRA::check_flag(success,error,world);
 
   if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this);
+    int irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full = 1;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairDPDGPU::memory_usage()
 {
   double bytes = Pair::memory_usage();
   return bytes + dpd_gpu_bytes();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairDPDGPU::cpu_compute(int start, int inum, int eflag, int vflag,
                                int *ilist, int *numneigh, int **firstneigh) {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
   double vxtmp,vytmp,vztmp,delvx,delvy,delvz;
   double rsq,r,rinv,dot,wd,randnum,factor_dpd;
   int *jlist;
   tagint itag,jtag;
 
   double **x = atom->x;
   double **v = atom->v;
   double **f = atom->f;
   int *type = atom->type;
   tagint *tag = atom->tag;
   double *special_lj = force->special_lj;
   double dtinvsqrt = 1.0/sqrt(update->dt);
   int timestep = (int)update->ntimestep;
 
   // loop over neighbors of my atoms
 
   for (ii = start; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     vxtmp = v[i][0];
     vytmp = v[i][1];
     vztmp = v[i][2];
     itype = type[i];
     itag = tag[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_dpd = special_lj[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
       jtag = tag[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r = sqrt(rsq);
         if (r < EPSILON) continue;     // r can be 0.0 in DPD systems
         rinv = 1.0/r;
         delvx = vxtmp - v[j][0];
         delvy = vytmp - v[j][1];
         delvz = vztmp - v[j][2];
         dot = delx*delvx + dely*delvy + delz*delvz;
         wd = 1.0 - r/cut[itype][jtype];
 
         unsigned int tag1=itag, tag2=jtag;
         if (tag1 > tag2) {
           tag1 = jtag; tag2 = itag;
         }
 
         randnum = 0.0;
         saru(tag1, tag2, seed, timestep, randnum);
 
         // conservative force = a0 * wd
         // drag force = -gamma * wd^2 * (delx dot delv) / r
         // random force = sigma * wd * rnd * dtinvsqrt;
 
         fpair = a0[itype][jtype]*wd;
         fpair -= gamma[itype][jtype]*wd*wd*dot*rinv;
         fpair += sigma[itype][jtype]*wd*randnum*dtinvsqrt;
         fpair *= factor_dpd*rinv;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
 
         if (eflag) {
           // unshifted eng of conservative term:
           // evdwl = -a0[itype][jtype]*r * (1.0-0.5*r/cut[itype][jtype]);
           // eng shifted to 0.0 at cutoff
           evdwl = 0.5*a0[itype][jtype]*cut[itype][jtype] * wd*wd;
           evdwl *= factor_dpd;
         }
 
         if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
       }
     }
   }
 }
diff --git a/src/GPU/pair_dpd_tstat_gpu.cpp b/src/GPU/pair_dpd_tstat_gpu.cpp
index 84fdf4366..b5233027f 100644
--- a/src/GPU/pair_dpd_tstat_gpu.cpp
+++ b/src/GPU/pair_dpd_tstat_gpu.cpp
@@ -1,418 +1,418 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Trung Dac Nguyen (ORNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "pair_dpd_tstat_gpu.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "integrate.h"
 #include "memory.h"
 #include "error.h"
 #include "neigh_request.h"
 #include "random_mars.h"
 #include "universe.h"
 #include "update.h"
 #include "domain.h"
 #include "string.h"
 #include "gpu_extra.h"
 
 using namespace LAMMPS_NS;
 
 // External functions from cuda library for atom decomposition
 
 int dpd_gpu_init(const int ntypes, double **cutsq, double **host_a0,
                  double **host_gamma, double **host_sigma, double **host_cut,
                  double *special_lj, bool tstat_only, const int inum,
                  const int nall, const int max_nbors,  const int maxspecial,
                  const double cell_size, int &gpu_mode, FILE *screen);
 void dpd_gpu_clear();
 int ** dpd_gpu_compute_n(const int ago, const int inum_full, const int nall,
                          double **host_x, int *host_type, double *sublo,
                          double *subhi, tagint *tag, int **nspecial,
                          tagint **special, const bool eflag, const bool vflag, 
                          const bool eatom, const bool vatom, int &host_start, 
                          int **ilist, int **jnum, const double cpu_time, bool &success,
                          double **host_v, const double dtinvsqrt,
                          const int seed, const int timestep,
                          double *boxlo, double *prd);
 void dpd_gpu_compute(const int ago, const int inum_full, const int nall,
                      double **host_x, int *host_type, int *ilist, int *numj,
                      int **firstneigh, const bool eflag, const bool vflag,
                      const bool eatom, const bool vatom, int &host_start,
                      const double cpu_time, bool &success, tagint *tag,
                      double **host_v, const double dtinvsqrt, 
                      const int seed, const int timestep,
                      const int nlocal, double *boxlo, double *prd);
 void dpd_gpu_update_coeff(int ntypes, double **host_a0, double **host_gamma,
                           double **host_sigma, double **host_cut);
 double dpd_gpu_bytes();
 
 #define EPSILON 1.0e-10
 
 //#define _USE_UNIFORM_SARU_LCG
 //#define _USE_UNIFORM_SARU_TEA8
 //#define _USE_GAUSSIAN_SARU_LCG
 
 #if !defined(_USE_UNIFORM_SARU_LCG) && !defined(_USE_UNIFORM_SARU_TEA8) && !defined(_USE_GAUSSIAN_SARU_LCG)
 #define _USE_UNIFORM_SARU_LCG
 #endif
 
 // References: 
 // 1. Y. Afshar, F. Schmid, A. Pishevar, S. Worley, Comput. Phys. Comm. 184 (2013), 1119–1128.
 // 2. C. L. Phillips, J. A. Anderson, S. C. Glotzer, Comput. Phys. Comm. 230 (2011), 7191-7201.
 // PRNG period = 3666320093*2^32 ~ 2^64 ~ 10^19
 
 #define LCGA 0x4beb5d59 // Full period 32 bit LCG
 #define LCGC 0x2600e1f7
 #define oWeylPeriod 0xda879add // Prime period 3666320093
 #define oWeylOffset 0x8009d14b
 #define TWO_N32 0.232830643653869628906250e-9f /* 2^-32 */
 
 // specifically implemented for steps = 1; high = 1.0; low = -1.0
 // returns uniformly distributed random numbers u in [-1.0;1.0] 
 // using the inherent LCG, then multiply u with sqrt(3) to "match" 
 // with a normal random distribution. 
 // Afshar et al. mutlplies u in [-0.5;0.5] with sqrt(12)
 // Curly brackets to make variables local to the scope.
 #ifdef _USE_UNIFORM_SARU_LCG
 #define numtyp double
 #define SQRT3 (numtyp)1.7320508075688772935274463
 #define saru(seed1, seed2, seed, timestep, randnum) {                         \
   unsigned int seed3 = seed + timestep;                                       \
   seed3^=(seed1<<7)^(seed2>>6);                                               \
   seed2+=(seed1>>4)^(seed3>>15);                                              \
   seed1^=(seed2<<9)+(seed3<<8);                                               \
   seed3^=0xA5366B4D*((seed2>>11) ^ (seed1<<1));                               \
   seed2+=0x72BE1579*((seed1<<4)  ^ (seed3>>16));                              \
   seed1^=0x3F38A6ED*((seed3>>5)  ^ (((signed int)seed2)>>22));                \
   seed2+=seed1*seed3;                                                         \
   seed1+=seed3 ^ (seed2>>2);                                                  \
   seed2^=((signed int)seed2)>>17;                                             \
   unsigned int state  = 0x79dedea3*(seed1^(((signed int)seed1)>>14));         \
   unsigned int wstate = (state + seed2) ^ (((signed int)state)>>8);           \
   state  = state + (wstate*(wstate^0xdddf97f5));                              \
   wstate = 0xABCB96F7 + (wstate>>1);                                          \
   state = LCGA*state + LCGC;                                                  \
   wstate = wstate + oWeylOffset+((((signed int)wstate)>>31) & oWeylPeriod);   \
   unsigned int v = (state ^ (state>>26)) + wstate;                            \
   unsigned int s = (signed int)((v^(v>>20))*0x6957f5a7);                      \
   randnum = SQRT3*(s*TWO_N32*(numtyp)2.0-(numtyp)1.0);                        \
 }
 #endif
 
 // specifically implemented for steps = 1; high = 1.0; low = -1.0
 // returns uniformly distributed random numbers u in [-1.0;1.0] using TEA8 
 // then multiply u with sqrt(3) to "match" with a normal random distribution 
 // Afshar et al. mutlplies u in [-0.5;0.5] with sqrt(12)
 #ifdef _USE_UNIFORM_SARU_TEA8
 #define numtyp double
 #define SQRT3 (numtyp)1.7320508075688772935274463
 #define k0 0xA341316C
 #define k1 0xC8013EA4
 #define k2 0xAD90777D
 #define k3 0x7E95761E
 #define delta 0x9e3779b9
 #define rounds 8
 #define saru(seed1, seed2, seed, timestep, randnum) {                         \
   unsigned int seed3 = seed + timestep;                                       \
   seed3^=(seed1<<7)^(seed2>>6);                                               \
   seed2+=(seed1>>4)^(seed3>>15);                                              \
   seed1^=(seed2<<9)+(seed3<<8);                                               \
   seed3^=0xA5366B4D*((seed2>>11) ^ (seed1<<1));                               \
   seed2+=0x72BE1579*((seed1<<4)  ^ (seed3>>16));                              \
   seed1^=0x3F38A6ED*((seed3>>5)  ^ (((signed int)seed2)>>22));                \
   seed2+=seed1*seed3;                                                         \
   seed1+=seed3 ^ (seed2>>2);                                                  \
   seed2^=((signed int)seed2)>>17;                                             \
   unsigned int state  = 0x79dedea3*(seed1^(((signed int)seed1)>>14));         \
   unsigned int wstate = (state + seed2) ^ (((signed int)state)>>8);           \
   state  = state + (wstate*(wstate^0xdddf97f5));                              \
   wstate = 0xABCB96F7 + (wstate>>1);                                          \
   unsigned int sum = 0;                                                       \
   for (int i=0; i < rounds; i++) {                                            \
     sum += delta;                                                             \
     state += ((wstate<<4) + k0)^(wstate + sum)^((wstate>>5) + k1);            \
     wstate += ((state<<4) + k2)^(state + sum)^((state>>5) + k3);              \
   }                                                                           \
   unsigned int v = (state ^ (state>>26)) + wstate;                            \
   unsigned int s = (signed int)((v^(v>>20))*0x6957f5a7);                      \
   randnum = SQRT3*(s*TWO_N32*(numtyp)2.0-(numtyp)1.0);                        \
 }
 #endif
 
 // specifically implemented for steps = 1; high = 1.0; low = -1.0
 // returns two uniformly distributed random numbers r1 and r2 in [-1.0;1.0], 
 // and uses the polar method (Marsaglia's) to transform to a normal random value
 // This is used to compared with CPU DPD using RandMars::gaussian()
 #ifdef _USE_GAUSSIAN_SARU_LCG
 #define numtyp double
 #define saru(seed1, seed2, seed, timestep, randnum) {                         \
   unsigned int seed3 = seed + timestep;                                       \
   seed3^=(seed1<<7)^(seed2>>6);                                               \
   seed2+=(seed1>>4)^(seed3>>15);                                              \
   seed1^=(seed2<<9)+(seed3<<8);                                               \
   seed3^=0xA5366B4D*((seed2>>11) ^ (seed1<<1));                               \
   seed2+=0x72BE1579*((seed1<<4)  ^ (seed3>>16));                              \
   seed1^=0x3F38A6ED*((seed3>>5)  ^ (((signed int)seed2)>>22));                \
   seed2+=seed1*seed3;                                                         \
   seed1+=seed3 ^ (seed2>>2);                                                  \
   seed2^=((signed int)seed2)>>17;                                             \
   unsigned int state=0x12345678;                                              \
   unsigned int wstate=12345678;                                               \
   state  = 0x79dedea3*(seed1^(((signed int)seed1)>>14));                      \
   wstate = (state + seed2) ^ (((signed int)state)>>8);                        \
   state  = state + (wstate*(wstate^0xdddf97f5));                              \
   wstate = 0xABCB96F7 + (wstate>>1);                                          \
   unsigned int v, s;                                                          \
   numtyp r1, r2, rsq;                                                         \
   while (1) {                                                                 \
     state = LCGA*state + LCGC;                                                \
     wstate = wstate + oWeylOffset+((((signed int)wstate)>>31) & oWeylPeriod); \
     v = (state ^ (state>>26)) + wstate;                                       \
     s = (signed int)((v^(v>>20))*0x6957f5a7);                                 \
     r1 = s*TWO_N32*(numtyp)2.0-(numtyp)1.0;                                   \
     state = LCGA*state + LCGC;                                                \
     wstate = wstate + oWeylOffset+((((signed int)wstate)>>31) & oWeylPeriod); \
     v = (state ^ (state>>26)) + wstate;                                       \
     s = (signed int)((v^(v>>20))*0x6957f5a7);                                 \
     r2 = s*TWO_N32*(numtyp)2.0-(numtyp)1.0;                                   \
     rsq = r1 * r1 + r2 * r2;                                                  \
     if (rsq < (numtyp)1.0) break;                                             \
   }                                                                           \
   numtyp fac = sqrt((numtyp)-2.0*log(rsq)/rsq);                               \
   randnum = r2*fac;                                                           \
 }
 #endif
 
 /* ---------------------------------------------------------------------- */
 
 PairDPDTstatGPU::PairDPDTstatGPU(LAMMPS *lmp) : PairDPDTstat(lmp),
   gpu_mode(GPU_FORCE)
 {
   respa_enable = 0;
   reinitflag = 0;
   cpu_time = 0.0;
   GPU_EXTRA::gpu_ready(lmp->modify, lmp->error);
 }
 
 /* ----------------------------------------------------------------------
    free all arrays
 ------------------------------------------------------------------------- */
 
 PairDPDTstatGPU::~PairDPDTstatGPU()
 {
   dpd_gpu_clear();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairDPDTstatGPU::compute(int eflag, int vflag)
 {
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   // adjust sigma if target T is changing
 
   if (t_start != t_stop) {
     double delta = update->ntimestep - update->beginstep;
     if (delta != 0.0) delta /= update->endstep - update->beginstep;
     temperature = t_start + delta * (t_stop-t_start);
     double boltz = force->boltz;
     for (int i = 1; i <= atom->ntypes; i++)
       for (int j = i; j <= atom->ntypes; j++)
         sigma[i][j] = sigma[j][i] = sqrt(2.0*boltz*temperature*gamma[i][j]);
     
     dpd_gpu_update_coeff(atom->ntypes+1, a0, gamma, sigma, cut);
   }
 
   int nall = atom->nlocal + atom->nghost;
   int inum, host_start;
 
   double dtinvsqrt = 1.0/sqrt(update->dt);
   
   bool success = true;
   int *ilist, *numneigh, **firstneigh;
   if (gpu_mode != GPU_FORCE) {
     inum = atom->nlocal;
     firstneigh = dpd_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
                                    atom->type, domain->sublo, domain->subhi,
                                    atom->tag, atom->nspecial, atom->special,
                                    eflag, vflag, eflag_atom, vflag_atom,
                                    host_start, &ilist, &numneigh, cpu_time,
                                    success, atom->v, dtinvsqrt, seed, 
                                    update->ntimestep,
                                    domain->boxlo, domain->prd);
   } else {
     inum = list->inum;
     ilist = list->ilist;
     numneigh = list->numneigh;
     firstneigh = list->firstneigh;
     dpd_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
                     ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
                     vflag_atom, host_start, cpu_time, success, 
                     atom->tag, atom->v, dtinvsqrt, seed, 
                     update->ntimestep,
                     atom->nlocal, domain->boxlo, domain->prd);
   }
   if (!success)
     error->one(FLERR,"Insufficient memory on accelerator");
 
   if (host_start<inum) {
     cpu_time = MPI_Wtime();
     cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
     cpu_time = MPI_Wtime() - cpu_time;
   }
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairDPDTstatGPU::init_style()
 {
   if (force->newton_pair)
     error->all(FLERR,"Cannot use newton pair with dpd/tstat/gpu pair style");
 
   // Repeat cutsq calculation because done after call to init_style
   double maxcut = -1.0;
   double mcut;
   for (int i = 1; i <= atom->ntypes; i++) {
     for (int j = i; j <= atom->ntypes; j++) {
       if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
         mcut = init_one(i,j);
         mcut *= mcut;
         if (mcut > maxcut)
           maxcut = mcut;
         cutsq[i][j] = cutsq[j][i] = mcut;
       } else
         cutsq[i][j] = cutsq[j][i] = 0.0;
     }
   }
   double cell_size = sqrt(maxcut) + neighbor->skin;
 
   int maxspecial=0;
   if (atom->molecular)
     maxspecial=atom->maxspecial;
   int success = dpd_gpu_init(atom->ntypes+1, cutsq, a0, gamma, sigma, 
                              cut, force->special_lj, true, atom->nlocal,
                              atom->nlocal+atom->nghost, 300, maxspecial,
                              cell_size, gpu_mode, screen);
   GPU_EXTRA::check_flag(success,error,world);
 
   if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this);
+    int irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full = 1;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairDPDTstatGPU::memory_usage()
 {
   double bytes = Pair::memory_usage();
   return bytes + dpd_gpu_bytes();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairDPDTstatGPU::cpu_compute(int start, int inum, int eflag, int vflag,
                                int *ilist, int *numneigh, int **firstneigh) {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,fpair;
   double vxtmp,vytmp,vztmp,delvx,delvy,delvz;
   double rsq,r,rinv,dot,wd,randnum,factor_dpd;
   int *jlist;
   tagint itag,jtag;
 
   double **x = atom->x;
   double **v = atom->v;
   double **f = atom->f;
   int *type = atom->type;
   tagint *tag = atom->tag;
   double *special_lj = force->special_lj;
   double dtinvsqrt = 1.0/sqrt(update->dt);
   int timestep = (int)update->ntimestep;
 
   // loop over neighbors of my atoms
 
   for (ii = start; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     vxtmp = v[i][0];
     vytmp = v[i][1];
     vztmp = v[i][2];
     itype = type[i];
     itag = tag[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_dpd = special_lj[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
       jtag = tag[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r = sqrt(rsq);
         if (r < EPSILON) continue;     // r can be 0.0 in DPD systems
         rinv = 1.0/r;
         delvx = vxtmp - v[j][0];
         delvy = vytmp - v[j][1];
         delvz = vztmp - v[j][2];
         dot = delx*delvx + dely*delvy + delz*delvz;
         wd = 1.0 - r/cut[itype][jtype];
 
         unsigned int tag1=itag, tag2=jtag;
         if (tag1 > tag2) {
           tag1 = jtag; tag2 = itag;
         }
 
         randnum = 0.0;
         saru(tag1, tag2, seed, timestep, randnum);
 
         // conservative force = a0 * wd
         // drag force = -gamma * wd^2 * (delx dot delv) / r
         // random force = sigma * wd * rnd * dtinvsqrt;
 
         fpair = -gamma[itype][jtype]*wd*wd*dot*rinv;
         fpair += sigma[itype][jtype]*wd*randnum*dtinvsqrt;
         fpair *= factor_dpd*rinv;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
         
         if (evflag) ev_tally_full(i,0.0,0.0,fpair,delx,dely,delz);
       }
     }
   }
 }
diff --git a/src/GPU/pair_eam_gpu.cpp b/src/GPU/pair_eam_gpu.cpp
index adb859003..c9aba74c2 100644
--- a/src/GPU/pair_eam_gpu.cpp
+++ b/src/GPU/pair_eam_gpu.cpp
@@ -1,279 +1,279 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing authors: Trung Dac Nguyen (ORNL), W. Michael Brown (ORNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_eam_gpu.h"
 #include "atom.h"
 #include "force.h"
 #include "comm.h"
 #include "domain.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "memory.h"
 #include "error.h"
 #include "neigh_request.h"
 #include "gpu_extra.h"
 
 #define MAXLINE 1024
 
 using namespace LAMMPS_NS;
 
 // External functions from cuda library for atom decomposition
 
 int eam_gpu_init(const int ntypes, double host_cutforcesq,
                  int **host_type2rhor, int **host_type2z2r,
                  int *host_type2frho, double ***host_rhor_spline,
                  double ***host_z2r_spline, double ***host_frho_spline,
                  double rdr, double rdrho, double rhomax, 
                  int nrhor, int nrho, int nz2r, int nfrho, int nr, 
                  const int nlocal, const int nall, const int max_nbors, 
                  const int maxspecial, const double cell_size, int &gpu_mode, 
                  FILE *screen, int &fp_size);
 void eam_gpu_clear();
 int** eam_gpu_compute_n(const int ago, const int inum_full, const int nall,
                         double **host_x, int *host_type, double *sublo,
                         double *subhi, tagint *tag, int **nspecial, tagint **special,
                         const bool eflag, const bool vflag, const bool eatom,
                         const bool vatom, int &host_start, int **ilist,
                         int **jnum,  const double cpu_time, bool &success,
                         int &inum, void **fp_ptr);
 void eam_gpu_compute(const int ago, const int inum_full, const int nlocal,
                      const int nall,double **host_x, int *host_type,
                      int *ilist, int *numj, int **firstneigh,
                      const bool eflag, const bool vflag,
                      const bool eatom, const bool vatom, int &host_start,
                      const double cpu_time, bool &success, void **fp_ptr);
 void eam_gpu_compute_force(int *ilist, const bool eflag, const bool vflag,
                            const bool eatom, const bool vatom);
 double eam_gpu_bytes();
 
 /* ---------------------------------------------------------------------- */
 
 PairEAMGPU::PairEAMGPU(LAMMPS *lmp) : PairEAM(lmp), gpu_mode(GPU_FORCE)
 {
   respa_enable = 0;
   reinitflag = 0;
   cpu_time = 0.0;
   GPU_EXTRA::gpu_ready(lmp->modify, lmp->error);
 }
 
 /* ----------------------------------------------------------------------
    check if allocated, since class can be destructed when incomplete
 ------------------------------------------------------------------------- */
 
 PairEAMGPU::~PairEAMGPU()
 {
   eam_gpu_clear();
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairEAMGPU::memory_usage()
 {
   double bytes = Pair::memory_usage();
   return bytes + eam_gpu_bytes();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairEAMGPU::compute(int eflag, int vflag)
 {
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = eflag_global = eflag_atom = 0;
 
   // compute density on each atom on GPU
 
   int nlocal = atom->nlocal;
   int nall = nlocal + atom->nghost;
   int inum, host_start, inum_dev;
 
   bool success = true;
   int *ilist, *numneigh, **firstneigh;
   if (gpu_mode != GPU_FORCE) {
     inum = atom->nlocal;
     firstneigh = eam_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
                                    atom->type, domain->sublo, domain->subhi,
                                    atom->tag, atom->nspecial, atom->special,
                                    eflag, vflag, eflag_atom, vflag_atom,
                                    host_start, &ilist, &numneigh, cpu_time,
                                    success, inum_dev, &fp_pinned);
   } else { // gpu_mode == GPU_FORCE
     inum = list->inum;
     ilist = list->ilist;
     numneigh = list->numneigh;
     firstneigh = list->firstneigh;
     eam_gpu_compute(neighbor->ago, inum, nlocal, nall, atom->x, atom->type,
                     ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
                     vflag_atom, host_start, cpu_time, success, &fp_pinned);
   }
 
   if (!success)
     error->one(FLERR,"Insufficient memory on accelerator");
 
   // communicate derivative of embedding function
 
   comm->forward_comm_pair(this);
 
   // compute forces on each atom on GPU
   if (gpu_mode != GPU_FORCE)
     eam_gpu_compute_force(NULL, eflag, vflag, eflag_atom, vflag_atom);
   else
     eam_gpu_compute_force(ilist, eflag, vflag, eflag_atom, vflag_atom);
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairEAMGPU::init_style()
 {
   if (force->newton_pair)
     error->all(FLERR,"Cannot use newton pair with eam/gpu pair style");
 
   // convert read-in file(s) to arrays and spline them
 
   file2array();
   array2spline();
 
   // Repeat cutsq calculation because done after call to init_style
   double maxcut = -1.0;
   double cut;
   for (int i = 1; i <= atom->ntypes; i++) {
     for (int j = i; j <= atom->ntypes; j++) {
       if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
         cut = init_one(i,j);
         cut *= cut;
         if (cut > maxcut)
           maxcut = cut;
         cutsq[i][j] = cutsq[j][i] = cut;
       } else
         cutsq[i][j] = cutsq[j][i] = 0.0;
     }
   }
   double cell_size = sqrt(maxcut) + neighbor->skin;
 
   int maxspecial=0;
   if (atom->molecular)
     maxspecial=atom->maxspecial;
   int fp_size;
   int success = eam_gpu_init(atom->ntypes+1, cutforcesq, type2rhor, type2z2r,
                              type2frho, rhor_spline, z2r_spline, frho_spline,
                              rdr, rdrho, rhomax, nrhor, nrho, nz2r, nfrho, nr,
                              atom->nlocal, atom->nlocal+atom->nghost, 300,
                              maxspecial, cell_size, gpu_mode, screen, fp_size);
   GPU_EXTRA::check_flag(success,error,world);
 
   if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this);
+    int irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full = 1;
   }
 
   if (fp_size == sizeof(double))
     fp_single = false;
   else
     fp_single = true;
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairEAMGPU::single(int i, int j, int itype, int jtype,
                        double rsq, double factor_coul, double factor_lj,
                        double &fforce)
 {
   int m;
   double r,p,rhoip,rhojp,z2,z2p,recip,phi,phip,psip;
   double *coeff;
 
   r = sqrt(rsq);
   p = r*rdr + 1.0;
   m = static_cast<int> (p);
   m = MIN(m,nr-1);
   p -= m;
   p = MIN(p,1.0);
 
   coeff = rhor_spline[type2rhor[itype][jtype]][m];
   rhoip = (coeff[0]*p + coeff[1])*p + coeff[2];
   coeff = rhor_spline[type2rhor[jtype][itype]][m];
   rhojp = (coeff[0]*p + coeff[1])*p + coeff[2];
   coeff = z2r_spline[type2z2r[itype][jtype]][m];
   z2p = (coeff[0]*p + coeff[1])*p + coeff[2];
   z2 = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6];
 
   double fp_i,fp_j;
   if (fp_single == false) {
     fp_i = ((double*)fp_pinned)[i];
     fp_j = ((double*)fp_pinned)[j];
   } else {
     fp_i = ((float*)fp_pinned)[i];
     fp_j = ((float*)fp_pinned)[j];
   }
 
   recip = 1.0/r;
   phi = z2*recip;
   phip = z2p*recip - phi*recip;
   psip = fp_i*rhojp + fp_j*rhoip + phip;
   fforce = -psip*recip;
 
   return phi;
 }
 
 /* ---------------------------------------------------------------------- */
 
 int PairEAMGPU::pack_forward_comm(int n, int *list, double *buf, 
                                   int pbc_flag,int *pbc)
 {
   int i,j,m;
 
   m = 0;
 
   if (fp_single) {
     float *fp_ptr = (float *)fp_pinned;
     for (i = 0; i < n; i++) {
       j = list[i];
       buf[m++] = static_cast<double>(fp_ptr[j]);
     }
   } else {
     double *fp_ptr = (double *)fp_pinned;
     for (i = 0; i < n; i++) {
       j = list[i];
       buf[m++] = fp_ptr[j];
     }
   }
 
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairEAMGPU::unpack_forward_comm(int n, int first, double *buf)
 {
   int i,m,last;
 
   m = 0;
   last = first + n;
   if (fp_single) {
     float *fp_ptr = (float *)fp_pinned;
     for (i = first; i < last; i++) fp_ptr[i] = buf[m++];
   } else {
     double *fp_ptr = (double *)fp_pinned;
     for (i = first; i < last; i++) fp_ptr[i] = buf[m++];
   }
 }
diff --git a/src/GPU/pair_gauss_gpu.cpp b/src/GPU/pair_gauss_gpu.cpp
index 2065c927f..c7b47131a 100644
--- a/src/GPU/pair_gauss_gpu.cpp
+++ b/src/GPU/pair_gauss_gpu.cpp
@@ -1,235 +1,235 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
    
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under 
    the GNU General Public License.
    
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Trung Dac Nguyen (ORNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "pair_gauss_gpu.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "integrate.h"
 #include "memory.h"
 #include "error.h"
 #include "neigh_request.h"
 #include "universe.h"
 #include "update.h"
 #include "domain.h"
 #include "string.h"
 #include "gpu_extra.h"
 
 using namespace LAMMPS_NS;
 
 // External functions from cuda library for atom decomposition
 
 int gauss_gpu_init(const int ntypes, double **cutsq, double **host_a,
                    double **b, double **offset, double *special_lj, const int nlocal, 
                    const int nall, const int max_nbors, const int maxspecial,
                    const double cell_size, int &gpu_mode, FILE *screen);
 int gauss_gpu_reinit(const int ntypes, double **cutsq, double **host_a,
                    double **b, double **offset);
 void gauss_gpu_clear();
 int ** gauss_gpu_compute_n(const int ago, const int inum,
                            const int nall, double **host_x, int *host_type, 
                            double *sublo, double *subhi, tagint *tag, int **nspecial,
                            tagint **special, const bool eflag, const bool vflag,
                            const bool eatom, const bool vatom, int &host_start,
                            int **ilist, int **jnum,
                            const double cpu_time, bool &success);
 void gauss_gpu_compute(const int ago, const int inum, const int nall, 
                        double **host_x, int *host_type, int *ilist, int *numj,
                        int **firstneigh, const bool eflag, const bool vflag,
                        const bool eatom, const bool vatom, int &host_start,
                        const double cpu_time, bool &success);
 double gauss_gpu_bytes();
 
 /* ---------------------------------------------------------------------- */
 
 PairGaussGPU::PairGaussGPU(LAMMPS *lmp) : PairGauss(lmp), gpu_mode(GPU_FORCE)
 {
   respa_enable = 0;
   cpu_time = 0.0;
   GPU_EXTRA::gpu_ready(lmp->modify, lmp->error); 
 }
 
 /* ----------------------------------------------------------------------
    free all arrays
 ------------------------------------------------------------------------- */
 
 PairGaussGPU::~PairGaussGPU()
 {
   gauss_gpu_clear();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairGaussGPU::compute(int eflag, int vflag)
 {
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
   
   int nall = atom->nlocal + atom->nghost;
   int inum, host_start;
   
   bool success = true;
   int *ilist, *numneigh, **firstneigh;
   if (gpu_mode != GPU_FORCE) {
     inum = atom->nlocal;
     firstneigh = gauss_gpu_compute_n(neighbor->ago, inum, nall,
                                      atom->x, atom->type, domain->sublo,
                                      domain->subhi, atom->tag, atom->nspecial,
                                      atom->special, eflag, vflag, eflag_atom,
                                      vflag_atom, host_start, 
                                      &ilist, &numneigh, cpu_time, success);
   } else {
     inum = list->inum;
     ilist = list->ilist;
     numneigh = list->numneigh;
     firstneigh = list->firstneigh;
     gauss_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
                       ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
                       vflag_atom, host_start, cpu_time, success);
   }
   if (!success)
     error->one(FLERR,"Insufficient memory on accelerator");
 
   if (host_start<inum) {
     cpu_time = MPI_Wtime();
     cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
     cpu_time = MPI_Wtime() - cpu_time;
   }
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairGaussGPU::init_style()
 {
   if (force->newton_pair) 
     error->all(FLERR,"Cannot use newton pair with gauss/gpu pair style");
 
   // Repeat cutsq calculation because done after call to init_style
   double maxcut = -1.0;
   double cut;
   for (int i = 1; i <= atom->ntypes; i++) {
     for (int j = i; j <= atom->ntypes; j++) {
       if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
         cut = init_one(i,j);
         cut *= cut;
         if (cut > maxcut)
           maxcut = cut;
         cutsq[i][j] = cutsq[j][i] = cut;
       } else
         cutsq[i][j] = cutsq[j][i] = 0.0;
     }
   }
   double cell_size = sqrt(maxcut) + neighbor->skin;
 
   int maxspecial=0;
   if (atom->molecular)
     maxspecial=atom->maxspecial;
   int success = gauss_gpu_init(atom->ntypes+1, cutsq, a, b,
                                offset, force->special_lj, atom->nlocal,
                                atom->nlocal+atom->nghost, 300, maxspecial,
                                cell_size, gpu_mode, screen);
   GPU_EXTRA::check_flag(success,error,world);
 
   if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this);
+    int irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full = 1;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairGaussGPU::reinit()
 {
   Pair::reinit();
   
   gauss_gpu_reinit(atom->ntypes+1, cutsq, a, b, offset);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairGaussGPU::memory_usage()
 {
   double bytes = Pair::memory_usage();
   return bytes + gauss_gpu_bytes();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairGaussGPU::cpu_compute(int start, int inum, int eflag, int vflag, 
                                int *ilist, int *numneigh, int **firstneigh) {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
   double rsq,r2inv,forcelj,factor_lj;
   int *jlist;
 
   double **x = atom->x;
   double **f = atom->f;
   int *type = atom->type;
   double *special_lj = force->special_lj;
 
   // loop over neighbors of my atoms
 
   for (ii = start; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r2inv = 1.0/rsq;
         forcelj = - 2.0*a[itype][jtype]*b[itype][jtype] * rsq * 
           exp(-b[itype][jtype]*rsq); 
         fpair = factor_lj*forcelj*r2inv;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
 
         if (eflag) {
           evdwl = -(a[itype][jtype]*exp(-b[itype][jtype]*rsq) -
             offset[itype][jtype]);
           evdwl *= factor_lj;
         }
 
         if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
       }
     }
   }
 }
diff --git a/src/GPU/pair_gayberne_gpu.cpp b/src/GPU/pair_gayberne_gpu.cpp
index d69603334..75191f931 100644
--- a/src/GPU/pair_gayberne_gpu.cpp
+++ b/src/GPU/pair_gayberne_gpu.cpp
@@ -1,342 +1,342 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Mike Brown (SNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "pair_gayberne_gpu.h"
 #include "math_extra.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "atom_vec_ellipsoid.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "integrate.h"
 #include "memory.h"
 #include "error.h"
 #include "neigh_request.h"
 #include "universe.h"
 #include "domain.h"
 #include "update.h"
 #include "string.h"
 #include "gpu_extra.h"
 
 using namespace LAMMPS_NS;
 
 // External functions from cuda library for atom decomposition
 
 int gb_gpu_init(const int ntypes, const double gamma, const double upsilon,
                 const double mu, double **shape, double **well, double **cutsq,
                 double **sigma, double **epsilon, double *host_lshape,
                 int **form, double **host_lj1, double **host_lj2,
                 double **host_lj3, double **host_lj4, double **offset,
                 double *special_lj, const int nlocal, const int nall,
                 const int max_nbors, const int maxspecial,
                 const double cell_size,        int &gpu_mode, FILE *screen);
 void gb_gpu_clear();
 int ** gb_gpu_compute_n(const int ago, const int inum, const int nall,
                         double **host_x, int *host_type, double *sublo,
                         double *subhi, tagint *tag, int **nspecial, tagint **special,
                         const bool eflag, const bool vflag,
                         const bool eatom, const bool vatom, int &host_start,
                         int **ilist, int **jnum, const double cpu_time,
                         bool &success, double **host_quat);
 int * gb_gpu_compute(const int ago, const int inum, const int nall,
                      double **host_x, int *host_type, int *ilist, int *numj,
                      int **firstneigh, const bool eflag, const bool vflag,
                      const bool eatom, const bool vatom, int &host_start,
                      const double cpu_time, bool &success, double **host_quat);
 double gb_gpu_bytes();
 
 enum{SPHERE_SPHERE,SPHERE_ELLIPSE,ELLIPSE_SPHERE,ELLIPSE_ELLIPSE};
 
 /* ---------------------------------------------------------------------- */
 
 PairGayBerneGPU::PairGayBerneGPU(LAMMPS *lmp) : PairGayBerne(lmp),
                                                 gpu_mode(GPU_FORCE)
 {
   quat_nmax = 0;
   reinitflag = 0;
   quat = NULL;
   GPU_EXTRA::gpu_ready(lmp->modify, lmp->error);
 }
 
 /* ----------------------------------------------------------------------
    free all arrays
 ------------------------------------------------------------------------- */
 
 PairGayBerneGPU::~PairGayBerneGPU()
 {
   gb_gpu_clear();
   cpu_time = 0.0;
   memory->destroy(quat);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairGayBerneGPU::compute(int eflag, int vflag)
 {
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   int nall = atom->nlocal + atom->nghost;
   int inum, host_start;
 
   bool success = true;
   int *ilist, *numneigh, **firstneigh;
 
   if (nall > quat_nmax) {
     quat_nmax = static_cast<int>(1.1 * nall);
     memory->grow(quat, quat_nmax, 4, "pair:quat");
   }
   AtomVecEllipsoid::Bonus *bonus = avec->bonus;
   int *ellipsoid = atom->ellipsoid;
   for (int i=0; i<nall; i++) {
     int qi = ellipsoid[i];
     if (qi > -1) {
       quat[i][0] = bonus[qi].quat[0];
       quat[i][1] = bonus[qi].quat[1];
       quat[i][2] = bonus[qi].quat[2];
       quat[i][3] = bonus[qi].quat[3];
     }
   }
 
   if (gpu_mode != GPU_FORCE) {
     inum = atom->nlocal;
     firstneigh = gb_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
                                   atom->type, domain->sublo, domain->subhi,
                                   atom->tag, atom->nspecial, atom->special,
                                   eflag, vflag, eflag_atom, vflag_atom,
                                   host_start, &ilist, &numneigh, cpu_time,
                                   success, quat);
   } else {
     inum = list->inum;
     numneigh = list->numneigh;
     firstneigh = list->firstneigh;
     ilist = gb_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
                            list->ilist, numneigh, firstneigh, eflag, vflag,
                            eflag_atom, vflag_atom, host_start,
                            cpu_time, success, quat);
   }
   if (!success)
     error->one(FLERR,"Insufficient memory on accelerator");
 
   if (host_start < inum) {
     cpu_time = MPI_Wtime();
     cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
     cpu_time = MPI_Wtime() - cpu_time;
   }
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairGayBerneGPU::init_style()
 {
   avec = (AtomVecEllipsoid *) atom->style_match("ellipsoid");
   if (!avec)
     error->all(FLERR,"Pair gayberne/gpu requires atom style ellipsoid");
   if (force->newton_pair)
     error->all(FLERR,"Cannot use newton pair with gayberne/gpu pair style");
   if (!atom->ellipsoid_flag)
     error->all(FLERR,"Pair gayberne/gpu requires atom style ellipsoid");
 
   // per-type shape precalculations
   // require that atom shapes are identical within each type
   // if shape = 0 for point particle, set shape = 1 as required by Gay-Berne
 
   for (int i = 1; i <= atom->ntypes; i++) {
     if (!atom->shape_consistency(i,shape1[i][0],shape1[i][1],shape1[i][2]))
       error->all(FLERR,"Pair gayberne/gpu requires atoms with same type have same shape");
     if (shape1[i][0] == 0.0)
       shape1[i][0] = shape1[i][1] = shape1[i][2] = 1.0;
     shape2[i][0] = shape1[i][0]*shape1[i][0];
     shape2[i][1] = shape1[i][1]*shape1[i][1];
     shape2[i][2] = shape1[i][2]*shape1[i][2];
     lshape[i] = (shape1[i][0]*shape1[i][1]+shape1[i][2]*shape1[i][2]) *
       sqrt(shape1[i][0]*shape1[i][1]);
   }
 
   // Repeat cutsq calculation because done after call to init_style
   double maxcut = -1.0;
   double cut;
   for (int i = 1; i <= atom->ntypes; i++) {
     for (int j = i; j <= atom->ntypes; j++) {
       if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
         cut = init_one(i,j);
         cut *= cut;
         if (cut > maxcut)
           maxcut = cut;
         cutsq[i][j] = cutsq[j][i] = cut;
       } else
         cutsq[i][j] = cutsq[j][i] = 0.0;
     }
   }
 
   double cell_size = sqrt(maxcut) + neighbor->skin;
 
   int maxspecial=0;
   if (atom->molecular)
     maxspecial=atom->maxspecial;
   int success = gb_gpu_init(atom->ntypes+1, gamma, upsilon, mu,
                             shape2, well, cutsq, sigma, epsilon, lshape, form,
                             lj1, lj2, lj3, lj4, offset, force->special_lj,
                             atom->nlocal, atom->nlocal+atom->nghost, 300,
                             maxspecial, cell_size, gpu_mode, screen);
   GPU_EXTRA::check_flag(success,error,world);
 
   if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this);
+    int irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full = 1;
   }
   quat_nmax = static_cast<int>(1.1 * (atom->nlocal + atom->nghost));
   memory->grow(quat, quat_nmax, 4, "pair:quat");
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairGayBerneGPU::memory_usage()
 {
   double bytes = Pair::memory_usage();
   return bytes + memory->usage(quat,quat_nmax)+gb_gpu_bytes();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairGayBerneGPU::cpu_compute(int start, int inum, int eflag, int vflag,
                                   int *ilist, int *numneigh, int **firstneigh)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double evdwl,one_eng,rsq,r2inv,r6inv,forcelj,factor_lj;
   double fforce[3],ttor[3],rtor[3],r12[3];
   double a1[3][3],b1[3][3],g1[3][3],a2[3][3],b2[3][3],g2[3][3],temp[3][3];
   int *jlist;
   double *iquat,*jquat;
 
   AtomVecEllipsoid::Bonus *bonus = avec->bonus;
   int *ellipsoid = atom->ellipsoid;
   double **x = atom->x;
   double **f = atom->f;
   double **tor = atom->torque;
   int *type = atom->type;
   double *special_lj = force->special_lj;
 
   // loop over neighbors of my atoms
 
   for (ii = start; ii < inum; ii++) {
     i = ilist[ii];
     itype = type[i];
 
     if (form[itype][itype] == ELLIPSE_ELLIPSE) {
       iquat = bonus[ellipsoid[i]].quat;
       MathExtra::quat_to_mat_trans(iquat,a1);
       MathExtra::diag_times3(well[itype],a1,temp);
       MathExtra::transpose_times3(a1,temp,b1);
       MathExtra::diag_times3(shape2[itype],a1,temp);
       MathExtra::transpose_times3(a1,temp,g1);
     }
 
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       j &= NEIGHMASK;
 
       // r12 = center to center vector
 
       r12[0] = x[j][0]-x[i][0];
       r12[1] = x[j][1]-x[i][1];
       r12[2] = x[j][2]-x[i][2];
       rsq = MathExtra::dot3(r12,r12);
       jtype = type[j];
 
       // compute if less than cutoff
 
       if (rsq < cutsq[itype][jtype]) {
 
         switch (form[itype][jtype]) {
         case SPHERE_SPHERE:
           r2inv = 1.0/rsq;
           r6inv = r2inv*r2inv*r2inv;
           forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
           forcelj *= -r2inv;
           if (eflag) one_eng =
             r6inv*(r6inv*lj3[itype][jtype]-lj4[itype][jtype]) -
             offset[itype][jtype];
           fforce[0] = r12[0]*forcelj;
           fforce[1] = r12[1]*forcelj;
           fforce[2] = r12[2]*forcelj;
           ttor[0] = ttor[1] = ttor[2] = 0.0;
           rtor[0] = rtor[1] = rtor[2] = 0.0;
           break;
 
         case SPHERE_ELLIPSE:
           jquat = bonus[ellipsoid[j]].quat;
           MathExtra::quat_to_mat_trans(jquat,a2);
           MathExtra::diag_times3(well[jtype],a2,temp);
           MathExtra::transpose_times3(a2,temp,b2);
           MathExtra::diag_times3(shape2[jtype],a2,temp);
           MathExtra::transpose_times3(a2,temp,g2);
           one_eng = gayberne_lj(j,i,a2,b2,g2,r12,rsq,fforce,rtor);
           ttor[0] = ttor[1] = ttor[2] = 0.0;
           break;
 
         case ELLIPSE_SPHERE:
           one_eng = gayberne_lj(i,j,a1,b1,g1,r12,rsq,fforce,ttor);
           rtor[0] = rtor[1] = rtor[2] = 0.0;
           break;
 
         default:
           jquat = bonus[ellipsoid[j]].quat;
           MathExtra::quat_to_mat_trans(jquat,a2);
           MathExtra::diag_times3(well[jtype],a2,temp);
           MathExtra::transpose_times3(a2,temp,b2);
           MathExtra::diag_times3(shape2[jtype],a2,temp);
           MathExtra::transpose_times3(a2,temp,g2);
           one_eng = gayberne_analytic(i,j,a1,a2,b1,b2,g1,g2,r12,rsq,
                                       fforce,ttor,rtor);
           break;
         }
 
         fforce[0] *= factor_lj;
         fforce[1] *= factor_lj;
         fforce[2] *= factor_lj;
         ttor[0] *= factor_lj;
         ttor[1] *= factor_lj;
         ttor[2] *= factor_lj;
 
         f[i][0] += fforce[0];
         f[i][1] += fforce[1];
         f[i][2] += fforce[2];
         tor[i][0] += ttor[0];
         tor[i][1] += ttor[1];
         tor[i][2] += ttor[2];
 
         if (eflag) evdwl = factor_lj*one_eng;
 
         if (evflag) ev_tally_xyz_full(i,evdwl,0.0,fforce[0],fforce[1],fforce[2],
                                       -r12[0],-r12[1],-r12[2]);
       }
     }
   }
 }
diff --git a/src/GPU/pair_lj96_cut_gpu.cpp b/src/GPU/pair_lj96_cut_gpu.cpp
index e870643fb..7dacbf68b 100644
--- a/src/GPU/pair_lj96_cut_gpu.cpp
+++ b/src/GPU/pair_lj96_cut_gpu.cpp
@@ -1,230 +1,230 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Mike Brown (SNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "pair_lj96_cut_gpu.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "integrate.h"
 #include "memory.h"
 #include "error.h"
 #include "neigh_request.h"
 #include "universe.h"
 #include "update.h"
 #include "domain.h"
 #include "string.h"
 #include "gpu_extra.h"
 
 using namespace LAMMPS_NS;
 
 // External functions from cuda library for atom decomposition
 
 int lj96_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
                   double **host_lj2, double **host_lj3, double **host_lj4,
                   double **offset, double *special_lj, const int nlocal,
                   const int nall, const int max_nbors, const int maxspecial,
                   const double cell_size, int &gpu_mode, FILE *screen);
 void lj96_gpu_clear();
 int ** lj96_gpu_compute_n(const int ago, const int inum, const int nall,
                           double **host_x, int *host_type, double *sublo,
                           double *subhi, tagint *tag, int **nspecial,
                           tagint **special, const bool eflag, const bool vflag,
                           const bool eatom, const bool vatom, int &host_start,
                           int **ilist, int **jnum,
                           const double cpu_time, bool &success);
 void lj96_gpu_compute(const int ago, const int inum, const int nall,
                       double **host_x, int *host_type, int *ilist, int *numj,
                       int **firstneigh, const bool eflag, const bool vflag,
                       const bool eatom, const bool vatom, int &host_start,
                       const double cpu_time, bool &success);
 double lj96_gpu_bytes();
 
 /* ---------------------------------------------------------------------- */
 
 PairLJ96CutGPU::PairLJ96CutGPU(LAMMPS *lmp) : PairLJ96Cut(lmp), gpu_mode(GPU_FORCE)
 {
   respa_enable = 0;
   reinitflag = 0;
   cpu_time = 0.0;
   GPU_EXTRA::gpu_ready(lmp->modify, lmp->error);
 }
 
 /* ----------------------------------------------------------------------
    free all arrays
 ------------------------------------------------------------------------- */
 
 PairLJ96CutGPU::~PairLJ96CutGPU()
 {
   lj96_gpu_clear();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJ96CutGPU::compute(int eflag, int vflag)
 {
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   int nall = atom->nlocal + atom->nghost;
   int inum, host_start;
 
   bool success = true;
   int *ilist, *numneigh, **firstneigh;
   if (gpu_mode != GPU_FORCE) {
     inum = atom->nlocal;
     firstneigh = lj96_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
                                     atom->type, domain->sublo, domain->subhi,
                                     atom->tag, atom->nspecial, atom->special,
                                     eflag, vflag, eflag_atom, vflag_atom,
                                     host_start, &ilist, &numneigh, cpu_time,
                                     success);
   } else {
     inum = list->inum;
     ilist = list->ilist;
     numneigh = list->numneigh;
     firstneigh = list->firstneigh;
     lj96_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
                      ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
                      vflag_atom, host_start, cpu_time, success);
   }
   if (!success)
     error->one(FLERR,"Insufficient memory on accelerator");
 
   if (host_start<inum) {
     cpu_time = MPI_Wtime();
     cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
     cpu_time = MPI_Wtime() - cpu_time;
   }
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairLJ96CutGPU::init_style()
 {
   cut_respa = NULL;
 
   if (force->newton_pair)
     error->all(FLERR,"Cannot use newton pair with lj96/cut/gpu pair style");
 
   // Repeat cutsq calculation because done after call to init_style
   double maxcut = -1.0;
   double cut;
   for (int i = 1; i <= atom->ntypes; i++) {
     for (int j = i; j <= atom->ntypes; j++) {
       if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
         cut = init_one(i,j);
         cut *= cut;
         if (cut > maxcut)
           maxcut = cut;
         cutsq[i][j] = cutsq[j][i] = cut;
       } else
         cutsq[i][j] = cutsq[j][i] = 0.0;
     }
   }
   double cell_size = sqrt(maxcut) + neighbor->skin;
 
   int maxspecial=0;
   if (atom->molecular)
     maxspecial=atom->maxspecial;
   int success = lj96_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4,
                               offset, force->special_lj, atom->nlocal,
                               atom->nlocal+atom->nghost, 300, maxspecial,
                               cell_size, gpu_mode, screen);
   GPU_EXTRA::check_flag(success,error,world);
 
   if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this);
+    int irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full = 1;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairLJ96CutGPU::memory_usage()
 {
   double bytes = Pair::memory_usage();
   return bytes + lj96_gpu_bytes();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJ96CutGPU::cpu_compute(int start, int inum, int eflag, int vflag,
                                  int *ilist, int *numneigh, int **firstneigh)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
   double rsq,r2inv,r3inv,r6inv,forcelj,factor_lj;
   int *jlist;
 
   double **x = atom->x;
   double **f = atom->f;
   int *type = atom->type;
   double *special_lj = force->special_lj;
 
   // loop over neighbors of my atoms
 
   for (ii = start; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r2inv = 1.0/rsq;
         r6inv = r2inv*r2inv*r2inv;
         r3inv = sqrt(r6inv);
         forcelj = r6inv * (lj1[itype][jtype]*r3inv - lj2[itype][jtype]);
         fpair = factor_lj*forcelj*r2inv;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
 
         if (eflag) {
           evdwl = r6inv*(lj3[itype][jtype]*r3inv-lj4[itype][jtype]) -
             offset[itype][jtype];
           evdwl *= factor_lj;
         }
 
         if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
       }
     }
   }
 }
diff --git a/src/GPU/pair_lj_charmm_coul_long_gpu.cpp b/src/GPU/pair_lj_charmm_coul_long_gpu.cpp
index 52266d840..da9974e7f 100644
--- a/src/GPU/pair_lj_charmm_coul_long_gpu.cpp
+++ b/src/GPU/pair_lj_charmm_coul_long_gpu.cpp
@@ -1,341 +1,341 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Mike Brown (SNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "pair_lj_charmm_coul_long_gpu.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "integrate.h"
 #include "memory.h"
 #include "error.h"
 #include "neigh_request.h"
 #include "universe.h"
 #include "update.h"
 #include "domain.h"
 #include "string.h"
 #include "kspace.h"
 #include "gpu_extra.h"
 
 #define EWALD_F   1.12837917
 #define EWALD_P   0.3275911
 #define A1        0.254829592
 #define A2       -0.284496736
 #define A3        1.421413741
 #define A4       -1.453152027
 #define A5        1.061405429
 
 using namespace LAMMPS_NS;
 
 // External functions from cuda library for atom decomposition
 
 int crml_gpu_init(const int ntypes, double cut_bothsq, double **host_lj1,
                   double **host_lj2, double **host_lj3, double **host_lj4,
                   double **offset, double *special_lj, const int nlocal,
                   const int nall, const int max_nbors, const int maxspecial,
                   const double cell_size, int &gpu_mode, FILE *screen,
                   double host_cut_ljsq, double host_cut_coulsq,
                   double *host_special_coul, const double qqrd2e,
                   const double g_ewald, const double cut_lj_innersq,
                   const double denom_lj, double **epsilon, double **sigma,
                   const bool mix_arithmetic);
 void crml_gpu_clear();
 int ** crml_gpu_compute_n(const int ago, const int inum,
                           const int nall, double **host_x, int *host_type,
                           double *sublo, double *subhi, tagint *tag,
                           int **nspecial, tagint **special, const bool eflag,
                           const bool vflag, const bool eatom, const bool vatom,
                           int &host_start, int **ilist, int **jnum,
                           const double cpu_time, bool &success, double *host_q,
                           double *boxlo, double *prd);
 void crml_gpu_compute(const int ago, const int inum, const int nall,
                       double **host_x, int *host_type, int *ilist, int *numj,
                       int **firstneigh, const bool eflag, const bool vflag,
                       const bool eatom, const bool vatom, int &host_start,
                       const double cpu_time, bool &success, double *host_q,
                       const int nlocal, double *boxlo, double *prd);
 double crml_gpu_bytes();
 
 /* ---------------------------------------------------------------------- */
 
 PairLJCharmmCoulLongGPU::PairLJCharmmCoulLongGPU(LAMMPS *lmp) :
   PairLJCharmmCoulLong(lmp), gpu_mode(GPU_FORCE)
 {
   respa_enable = 0;
   reinitflag = 0;
   cpu_time = 0.0;
   GPU_EXTRA::gpu_ready(lmp->modify, lmp->error);
 }
 
 /* ----------------------------------------------------------------------
    free all arrays
 ------------------------------------------------------------------------- */
 
 PairLJCharmmCoulLongGPU::~PairLJCharmmCoulLongGPU()
 {
   crml_gpu_clear();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCharmmCoulLongGPU::compute(int eflag, int vflag)
 {
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   int nall = atom->nlocal + atom->nghost;
   int inum, host_start;
 
   bool success = true;
   int *ilist, *numneigh, **firstneigh;
   if (gpu_mode != GPU_FORCE) {
     inum = atom->nlocal;
     firstneigh = crml_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
                                     atom->type, domain->sublo, domain->subhi,
                                     atom->tag, atom->nspecial, atom->special,
                                     eflag, vflag, eflag_atom, vflag_atom,
                                     host_start, &ilist, &numneigh, cpu_time,
                                     success, atom->q, domain->boxlo,
                                     domain->prd);
   } else {
     inum = list->inum;
     ilist = list->ilist;
     numneigh = list->numneigh;
     firstneigh = list->firstneigh;
     crml_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
                      ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
                      vflag_atom, host_start, cpu_time, success, atom->q,
                      atom->nlocal, domain->boxlo, domain->prd);
   }
   if (!success)
     error->one(FLERR,"Insufficient memory on accelerator");
 
   if (host_start<inum) {
     cpu_time = MPI_Wtime();
     cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
     cpu_time = MPI_Wtime() - cpu_time;
   }
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairLJCharmmCoulLongGPU::init_style()
 {
   cut_respa = NULL;
 
   if (!atom->q_flag)
     error->all(FLERR,"Pair style lj/charmm/coul/long/gpu requires atom attribute q");
   if (force->newton_pair)
     error->all(FLERR,"Cannot use newton pair with lj/charmm/coul/long/gpu pair style");
 
   // Repeat cutsq calculation because done after call to init_style
   double cut;
   for (int i = 1; i <= atom->ntypes; i++) {
     for (int j = i; j <= atom->ntypes; j++) {
       if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0))
         cut = init_one(i,j);
     }
   }
 
   cut_lj_innersq = cut_lj_inner * cut_lj_inner;
   cut_ljsq = cut_lj * cut_lj;
   cut_coulsq = cut_coul * cut_coul;
   cut_bothsq = MAX(cut_ljsq,cut_coulsq);
 
   denom_lj = (cut_ljsq-cut_lj_innersq) * (cut_ljsq-cut_lj_innersq) *
     (cut_ljsq-cut_lj_innersq);
 
   double cell_size = sqrt(cut_bothsq) + neighbor->skin;
 
   // insure use of KSpace long-range solver, set g_ewald
 
   if (force->kspace == NULL)
     error->all(FLERR,"Pair style is incompatible with KSpace style");
   g_ewald = force->kspace->g_ewald;
 
   // setup force tables
 
   if (ncoultablebits) init_tables(cut_coul,cut_respa);
 
   int maxspecial=0;
   if (atom->molecular)
     maxspecial=atom->maxspecial;
 
   bool arithmetic = true;
   for (int i = 1; i < atom->ntypes + 1; i++)
     for (int j = i + 1; j < atom->ntypes + 1; j++) {
       if (epsilon[i][j] != sqrt(epsilon[i][i] * epsilon[j][j]))
         arithmetic = false;
       if (sigma[i][j] != 0.5 * (sigma[i][i] + sigma[j][j]))
         arithmetic = false;
     }
 
   int success = crml_gpu_init(atom->ntypes+1, cut_bothsq, lj1, lj2, lj3, lj4,
                               offset, force->special_lj, atom->nlocal,
                               atom->nlocal+atom->nghost, 300, maxspecial,
                               cell_size, gpu_mode, screen, cut_ljsq,
                               cut_coulsq, force->special_coul, force->qqrd2e,
                               g_ewald, cut_lj_innersq,denom_lj,epsilon,sigma,
                               arithmetic);
   GPU_EXTRA::check_flag(success,error,world);
 
   if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this);
+    int irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full = 1;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairLJCharmmCoulLongGPU::memory_usage()
 {
   double bytes = Pair::memory_usage();
   return bytes + crml_gpu_bytes();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCharmmCoulLongGPU::cpu_compute(int start, int inum, int eflag,
                                           int vflag, int *ilist,
                                           int *numneigh, int **firstneigh)
 {
   int i,j,ii,jj,jnum,itype,jtype,itable;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
   double fraction,table;
   double r,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
   double grij,expm2,prefactor,t,erfc;
   double philj,switch1,switch2;
   int *jlist;
   double rsq;
 
   evdwl = ecoul = 0.0;
 
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   int *type = atom->type;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   double qqrd2e = force->qqrd2e;
 
   // loop over neighbors of my atoms
 
   for (ii = start; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
 
       if (rsq < cut_bothsq) {
         r2inv = 1.0/rsq;
 
         if (rsq < cut_coulsq) {
           if (!ncoultablebits || rsq <= tabinnersq) {
             r = sqrt(rsq);
             grij = g_ewald * r;
             expm2 = exp(-grij*grij);
             t = 1.0 / (1.0 + EWALD_P*grij);
             erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
             prefactor = qqrd2e * qtmp*q[j]/r;
             forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
             if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
           } else {
             union_int_float_t rsq_lookup;
             rsq_lookup.f = rsq;
             itable = rsq_lookup.i & ncoulmask;
             itable >>= ncoulshiftbits;
             fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable];
             table = ftable[itable] + fraction*dftable[itable];
             forcecoul = qtmp*q[j] * table;
             if (factor_coul < 1.0) {
               table = ctable[itable] + fraction*dctable[itable];
               prefactor = qtmp*q[j] * table;
               forcecoul -= (1.0-factor_coul)*prefactor;
             }
           }
         } else forcecoul = 0.0;
 
         if (rsq < cut_ljsq) {
           r6inv = r2inv*r2inv*r2inv;
           jtype = type[j];
           forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
           if (rsq > cut_lj_innersq) {
             switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) *
               (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj;
             switch2 = 12.0*rsq * (cut_ljsq-rsq) *
               (rsq-cut_lj_innersq) / denom_lj;
             philj = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]);
             forcelj = forcelj*switch1 + philj*switch2;
           }
         } else forcelj = 0.0;
 
         fpair = (forcecoul + factor_lj*forcelj) * r2inv;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
 
         if (eflag) {
           if (rsq < cut_coulsq) {
             if (!ncoultablebits || rsq <= tabinnersq)
               ecoul = prefactor*erfc;
             else {
               table = etable[itable] + fraction*detable[itable];
               ecoul = qtmp*q[j] * table;
             }
             if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
           } else ecoul = 0.0;
 
           if (rsq < cut_ljsq) {
             evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]);
             if (rsq > cut_lj_innersq) {
               switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) *
                 (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj;
               evdwl *= switch1;
             }
             evdwl *= factor_lj;
           } else evdwl = 0.0;
         }
 
         if (evflag) ev_tally_full(i,evdwl,ecoul,fpair,delx,dely,delz);
       }
     }
   }
 }
diff --git a/src/GPU/pair_lj_class2_coul_long_gpu.cpp b/src/GPU/pair_lj_class2_coul_long_gpu.cpp
index 1bff03886..1ec4f5aa8 100644
--- a/src/GPU/pair_lj_class2_coul_long_gpu.cpp
+++ b/src/GPU/pair_lj_class2_coul_long_gpu.cpp
@@ -1,287 +1,287 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Mike Brown (SNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "pair_lj_class2_coul_long_gpu.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "integrate.h"
 #include "memory.h"
 #include "error.h"
 #include "neigh_request.h"
 #include "universe.h"
 #include "update.h"
 #include "domain.h"
 #include "string.h"
 #include "kspace.h"
 #include "gpu_extra.h"
 
 #define EWALD_F   1.12837917
 #define EWALD_P   0.3275911
 #define A1        0.254829592
 #define A2       -0.284496736
 #define A3        1.421413741
 #define A4       -1.453152027
 #define A5        1.061405429
 
 using namespace LAMMPS_NS;
 
 // External functions from cuda library for atom decomposition
 
 int c2cl_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
                   double **host_lj2, double **host_lj3, double **host_lj4,
                   double **offset, double *special_lj, const int nlocal,
                   const int nall, const int max_nbors, const int maxspecial,
                   const double cell_size, int &gpu_mode, FILE *screen,
                   double **host_cut_ljsq, double host_cut_coulsq,
                   double *host_special_coul, const double qqrd2e,
                   const double g_ewald);
 void c2cl_gpu_clear();
 int ** c2cl_gpu_compute_n(const int ago, const int inum,
                           const int nall, double **host_x, int *host_type,
                           double *sublo, double *subhi, tagint *tag,
                           int **nspecial, tagint **special, const bool eflag,
                           const bool vflag, const bool eatom, const bool vatom,
                           int &host_start, int **ilist, int **jnum,
                           const double cpu_time, bool &success, double *host_q,
                           double *boxlo, double *prd);
 void c2cl_gpu_compute(const int ago, const int inum, const int nall,
                       double **host_x, int *host_type, int *ilist, int *numj,
                       int **firstneigh, const bool eflag, const bool vflag,
                       const bool eatom, const bool vatom, int &host_start,
                       const double cpu_time, bool &success, double *host_q,
                       const int nlocal, double *boxlo, double *prd);
 double c2cl_gpu_bytes();
 
 /* ---------------------------------------------------------------------- */
 
 PairLJClass2CoulLongGPU::PairLJClass2CoulLongGPU(LAMMPS *lmp) :
   PairLJClass2CoulLong(lmp), gpu_mode(GPU_FORCE)
 {
   cpu_time = 0.0;
   reinitflag = 0;
   GPU_EXTRA::gpu_ready(lmp->modify, lmp->error);
 }
 
 /* ----------------------------------------------------------------------
    free all arrays
 ------------------------------------------------------------------------- */
 
 PairLJClass2CoulLongGPU::~PairLJClass2CoulLongGPU()
 {
   c2cl_gpu_clear();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJClass2CoulLongGPU::compute(int eflag, int vflag)
 {
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   int nall = atom->nlocal + atom->nghost;
   int inum, host_start;
 
   bool success = true;
   int *ilist, *numneigh, **firstneigh;
   if (gpu_mode != GPU_FORCE) {
     inum = atom->nlocal;
     firstneigh = c2cl_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
                                     atom->type, domain->sublo, domain->subhi,
                                     atom->tag, atom->nspecial, atom->special,
                                     eflag, vflag, eflag_atom, vflag_atom,
                                     host_start, &ilist, &numneigh, cpu_time,
                                     success, atom->q, domain->boxlo,
                                     domain->prd);
   } else {
     inum = list->inum;
     ilist = list->ilist;
     numneigh = list->numneigh;
     firstneigh = list->firstneigh;
     c2cl_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
                      ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
                      vflag_atom, host_start, cpu_time, success, atom->q,
                      atom->nlocal, domain->boxlo, domain->prd);
   }
   if (!success)
     error->one(FLERR,"Insufficient memory on accelerator");
 
   if (host_start<inum) {
     cpu_time = MPI_Wtime();
     cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
     cpu_time = MPI_Wtime() - cpu_time;
   }
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairLJClass2CoulLongGPU::init_style()
 {
   if (!atom->q_flag)
     error->all(FLERR,"Pair style lj/class2/coul/long/gpu requires atom attribute q");
   if (force->newton_pair)
     error->all(FLERR,"Cannot use newton pair with lj/class2/coul/long/gpu pair style");
 
   // Repeat cutsq calculation because done after call to init_style
   double maxcut = -1.0;
   double cut;
   for (int i = 1; i <= atom->ntypes; i++) {
     for (int j = i; j <= atom->ntypes; j++) {
       if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
         cut = init_one(i,j);
         cut *= cut;
         if (cut > maxcut)
           maxcut = cut;
         cutsq[i][j] = cutsq[j][i] = cut;
       } else
         cutsq[i][j] = cutsq[j][i] = 0.0;
     }
   }
   double cell_size = sqrt(maxcut) + neighbor->skin;
 
   cut_coulsq = cut_coul * cut_coul;
 
   // insure use of KSpace long-range solver, set g_ewald
 
   if (force->kspace == NULL)
     error->all(FLERR,"Pair style is incompatible with KSpace style");
   g_ewald = force->kspace->g_ewald;
 
   int maxspecial=0;
   if (atom->molecular)
     maxspecial=atom->maxspecial;
   int success = c2cl_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4,
                               offset, force->special_lj, atom->nlocal,
                               atom->nlocal+atom->nghost, 300, maxspecial,
                               cell_size, gpu_mode, screen, cut_ljsq, cut_coulsq,
                               force->special_coul, force->qqrd2e, g_ewald);
   GPU_EXTRA::check_flag(success,error,world);
 
   if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this);
+    int irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full = 1;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairLJClass2CoulLongGPU::memory_usage()
 {
   double bytes = Pair::memory_usage();
   return bytes + c2cl_gpu_bytes();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJClass2CoulLongGPU::cpu_compute(int start, int inum, int eflag,
                                        int vflag, int *ilist, int *numneigh,
                                        int **firstneigh)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
   double rsq,r,rinv,r2inv,r3inv,r6inv,forcecoul,forcelj;
   double grij,expm2,prefactor,t,erfc;
   double factor_coul,factor_lj;
   int *jlist;
 
   evdwl = ecoul = 0.0;
 
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   int *type = atom->type;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   double qqrd2e = force->qqrd2e;
 
   // loop over neighbors of my atoms
 
   for (ii = start; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r2inv = 1.0/rsq;
 
         if (rsq < cut_coulsq) {
           r = sqrt(rsq);
           grij = g_ewald * r;
           expm2 = exp(-grij*grij);
           t = 1.0 / (1.0 + EWALD_P*grij);
           erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
           prefactor = qqrd2e * qtmp*q[j]/r;
           forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
           if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
         } else forcecoul = 0.0;
 
         if (rsq < cut_ljsq[itype][jtype]) {
           rinv = sqrt(r2inv);
           r3inv = r2inv*rinv;
           r6inv = r3inv*r3inv;
           forcelj = r6inv * (lj1[itype][jtype]*r3inv - lj2[itype][jtype]);
         } else forcelj = 0.0;
 
         fpair = (forcecoul + factor_lj*forcelj) * r2inv;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
 
         if (eflag) {
           if (rsq < cut_coulsq) {
             ecoul = prefactor*erfc;
             if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
           } else ecoul = 0.0;
           if (rsq < cut_ljsq[itype][jtype]) {
             evdwl = r6inv*(lj3[itype][jtype]*r3inv-lj4[itype][jtype]) -
               offset[itype][jtype];
             evdwl *= factor_lj;
           } else evdwl = 0.0;
         }
 
         if (evflag) ev_tally_full(i,evdwl,ecoul,fpair,delx,dely,delz);
       }
     }
   }
 }
diff --git a/src/GPU/pair_lj_class2_gpu.cpp b/src/GPU/pair_lj_class2_gpu.cpp
index 9b97abfd6..b34fc6070 100644
--- a/src/GPU/pair_lj_class2_gpu.cpp
+++ b/src/GPU/pair_lj_class2_gpu.cpp
@@ -1,227 +1,227 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Mike Brown (SNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "pair_lj_class2_gpu.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "integrate.h"
 #include "memory.h"
 #include "error.h"
 #include "neigh_request.h"
 #include "universe.h"
 #include "update.h"
 #include "domain.h"
 #include "string.h"
 #include "gpu_extra.h"
 
 using namespace LAMMPS_NS;
 
 // External functions from cuda library for atom decomposition
 
 int lj96_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
                   double **host_lj2, double **host_lj3, double **host_lj4,
                   double **offset, double *special_lj, const int nlocal,
                   const int nall, const int max_nbors, const int maxspecial,
                   const double cell_size, int &gpu_mode, FILE *screen);
 void lj96_gpu_clear();
 int ** lj96_gpu_compute_n(const int ago, const int inum, const int nall,
                           double **host_x, int *host_type, double *sublo,
                           double *subhi, tagint *tag, int **nspecial,
                           tagint **special, const bool eflag, const bool vflag,
                           const bool eatom, const bool vatom, int &host_start,
                           int **ilist, int **jnum,
                           const double cpu_time, bool &success);
 void lj96_gpu_compute(const int ago, const int inum, const int nall,
                       double **host_x, int *host_type, int *ilist, int *numj,
                       int **firstneigh, const bool eflag, const bool vflag,
                       const bool eatom, const bool vatom, int &host_start,
                       const double cpu_time, bool &success);
 double lj96_gpu_bytes();
 
 /* ---------------------------------------------------------------------- */
 
 PairLJClass2GPU::PairLJClass2GPU(LAMMPS *lmp) : PairLJClass2(lmp), gpu_mode(GPU_FORCE)
 {
   reinitflag = 0;
   cpu_time = 0.0;
   GPU_EXTRA::gpu_ready(lmp->modify, lmp->error);
 }
 
 /* ----------------------------------------------------------------------
    free all arrays
 ------------------------------------------------------------------------- */
 
 PairLJClass2GPU::~PairLJClass2GPU()
 {
   lj96_gpu_clear();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJClass2GPU::compute(int eflag, int vflag)
 {
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   int nall = atom->nlocal + atom->nghost;
   int inum, host_start;
 
   bool success = true;
   int *ilist, *numneigh, **firstneigh;
   if (gpu_mode != GPU_FORCE) {
     inum = atom->nlocal;
     firstneigh = lj96_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
                                     atom->type, domain->sublo, domain->subhi,
                                     atom->tag, atom->nspecial, atom->special,
                                     eflag, vflag, eflag_atom, vflag_atom,
                                     host_start, &ilist, &numneigh, cpu_time,
                                     success);
   } else {
     inum = list->inum;
     ilist = list->ilist;
     numneigh = list->numneigh;
     firstneigh = list->firstneigh;
     lj96_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
                      ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
                      vflag_atom, host_start, cpu_time, success);
   }
   if (!success)
     error->one(FLERR,"Insufficient memory on accelerator");
 
   if (host_start<inum) {
     cpu_time = MPI_Wtime();
     cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
     cpu_time = MPI_Wtime() - cpu_time;
   }
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairLJClass2GPU::init_style()
 {
   if (force->newton_pair)
     error->all(FLERR,"Cannot use newton pair with lj/class2/gpu pair style");
 
   // Repeat cutsq calculation because done after call to init_style
   double maxcut = -1.0;
   double cut;
   for (int i = 1; i <= atom->ntypes; i++) {
     for (int j = i; j <= atom->ntypes; j++) {
       if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
         cut = init_one(i,j);
         cut *= cut;
         if (cut > maxcut)
           maxcut = cut;
         cutsq[i][j] = cutsq[j][i] = cut;
       } else
         cutsq[i][j] = cutsq[j][i] = 0.0;
     }
   }
   double cell_size = sqrt(maxcut) + neighbor->skin;
 
   int maxspecial=0;
   if (atom->molecular)
     maxspecial=atom->maxspecial;
   int success = lj96_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4,
                               offset, force->special_lj, atom->nlocal,
                               atom->nlocal+atom->nghost, 300, maxspecial,
                               cell_size, gpu_mode, screen);
   GPU_EXTRA::check_flag(success,error,world);
 
   if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this);
+    int irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full = 1;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairLJClass2GPU::memory_usage()
 {
   double bytes = Pair::memory_usage();
   return bytes + lj96_gpu_bytes();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJClass2GPU::cpu_compute(int start, int inum, int eflag, int vflag,
                                  int *ilist, int *numneigh, int **firstneigh)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
   double rsq,r2inv,r3inv,r6inv,forcelj,factor_lj;
   int *jlist;
 
   double **x = atom->x;
   double **f = atom->f;
   int *type = atom->type;
   double *special_lj = force->special_lj;
 
   // loop over neighbors of my atoms
 
   for (ii = start; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r2inv = 1.0/rsq;
         r6inv = r2inv*r2inv*r2inv;
         r3inv = sqrt(r6inv);
         forcelj = r6inv * (lj1[itype][jtype]*r3inv - lj2[itype][jtype]);
         fpair = factor_lj*forcelj*r2inv;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
 
         if (eflag) {
           evdwl = r6inv*(lj3[itype][jtype]*r3inv-lj4[itype][jtype]) -
             offset[itype][jtype];
           evdwl *= factor_lj;
         }
 
         if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
       }
     }
   }
 }
diff --git a/src/GPU/pair_lj_cut_coul_cut_gpu.cpp b/src/GPU/pair_lj_cut_coul_cut_gpu.cpp
index 6c79c6611..9a964e366 100644
--- a/src/GPU/pair_lj_cut_coul_cut_gpu.cpp
+++ b/src/GPU/pair_lj_cut_coul_cut_gpu.cpp
@@ -1,259 +1,259 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Mike Brown (SNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "pair_lj_cut_coul_cut_gpu.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "integrate.h"
 #include "memory.h"
 #include "error.h"
 #include "neigh_request.h"
 #include "universe.h"
 #include "update.h"
 #include "domain.h"
 #include "string.h"
 #include "gpu_extra.h"
 
 using namespace LAMMPS_NS;
 
 // External functions from cuda library for atom decomposition
 
 int ljc_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
                  double **host_lj2, double **host_lj3, double **host_lj4,
                  double **offset, double *special_lj, const int nlocal,
                  const int nall, const int max_nbors, const int maxspecial,
                  const double cell_size, int &gpu_mode, FILE *screen,
                  double **host_cut_ljsq, double **host_cut_coulsq,
                  double *host_special_coul, const double qqrd2e);
 void ljc_gpu_clear();
 int ** ljc_gpu_compute_n(const int ago, const int inum,
                          const int nall, double **host_x, int *host_type,
                          double *sublo, double *subhi, tagint *tag, int **nspecial,
                          tagint **special, const bool eflag, const bool vflag,
                          const bool eatom, const bool vatom, int &host_start,
                          int **ilist, int **jnum, const double cpu_time,
                          bool &success, double *host_q, double *boxlo,
                          double *prd);
 void ljc_gpu_compute(const int ago, const int inum,
                       const int nall, double **host_x, int *host_type,
                      int *ilist, int *numj, int **firstneigh,
                      const bool eflag, const bool vflag, const bool eatom,
                      const bool vatom, int &host_start, const double cpu_time,
                      bool &success, double *host_q, const int nlocal,
                      double *boxlo, double *prd);
 double ljc_gpu_bytes();
 
 /* ---------------------------------------------------------------------- */
 
 PairLJCutCoulCutGPU::PairLJCutCoulCutGPU(LAMMPS *lmp) : PairLJCutCoulCut(lmp), gpu_mode(GPU_FORCE)
 {
   respa_enable = 0;
   reinitflag = 0;
   cpu_time = 0.0;
   GPU_EXTRA::gpu_ready(lmp->modify, lmp->error);
 }
 
 /* ----------------------------------------------------------------------
    free all arrays
 ------------------------------------------------------------------------- */
 
 PairLJCutCoulCutGPU::~PairLJCutCoulCutGPU()
 {
   ljc_gpu_clear();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCutCoulCutGPU::compute(int eflag, int vflag)
 {
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   int nall = atom->nlocal + atom->nghost;
   int inum, host_start;
 
   bool success = true;
   int *ilist, *numneigh, **firstneigh;
   if (gpu_mode != GPU_FORCE) {
     inum = atom->nlocal;
     firstneigh = ljc_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
                                    atom->type, domain->sublo, domain->subhi,
                                    atom->tag, atom->nspecial, atom->special,
                                    eflag, vflag, eflag_atom, vflag_atom,
                                    host_start, &ilist, &numneigh, cpu_time,
                                    success, atom->q, domain->boxlo,
                                    domain->prd);
   } else {
     inum = list->inum;
     ilist = list->ilist;
     numneigh = list->numneigh;
     firstneigh = list->firstneigh;
     ljc_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
                     ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
                     vflag_atom, host_start, cpu_time, success, atom->q,
                     atom->nlocal, domain->boxlo, domain->prd);
   }
   if (!success)
     error->one(FLERR,"Insufficient memory on accelerator");
 
   if (host_start<inum) {
     cpu_time = MPI_Wtime();
     cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
     cpu_time = MPI_Wtime() - cpu_time;
   }
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairLJCutCoulCutGPU::init_style()
 {
   if (!atom->q_flag)
     error->all(FLERR,"Pair style lj/cut/coul/cut/gpu requires atom attribute q");
 
   if (force->newton_pair)
     error->all(FLERR,"Cannot use newton pair with lj/cut/coul/cut/gpu pair style");
 
   // Repeat cutsq calculation because done after call to init_style
   double maxcut = -1.0;
   double cut;
   for (int i = 1; i <= atom->ntypes; i++) {
     for (int j = i; j <= atom->ntypes; j++) {
       if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
         cut = init_one(i,j);
         cut *= cut;
         if (cut > maxcut)
           maxcut = cut;
         cutsq[i][j] = cutsq[j][i] = cut;
       } else
         cutsq[i][j] = cutsq[j][i] = 0.0;
     }
   }
   double cell_size = sqrt(maxcut) + neighbor->skin;
 
   int maxspecial=0;
   if (atom->molecular)
     maxspecial=atom->maxspecial;
   int success = ljc_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4,
                              offset, force->special_lj, atom->nlocal,
                              atom->nlocal+atom->nghost, 300, maxspecial,
                              cell_size, gpu_mode, screen, cut_ljsq, cut_coulsq,
                              force->special_coul, force->qqrd2e);
   GPU_EXTRA::check_flag(success,error,world);
 
   if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this);
+    int irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full = 1;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairLJCutCoulCutGPU::memory_usage()
 {
   double bytes = Pair::memory_usage();
   return bytes + ljc_gpu_bytes();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCutCoulCutGPU::cpu_compute(int start, int inum, int eflag, int vflag,
                                       int *ilist, int *numneigh,
                                       int **firstneigh)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
   double rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
   int *jlist;
 
   evdwl = ecoul = 0.0;
 
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   int *type = atom->type;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   double qqrd2e = force->qqrd2e;
 
   // loop over neighbors of my atoms
 
   for (ii = start; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r2inv = 1.0/rsq;
 
         if (rsq < cut_coulsq[itype][jtype])
           forcecoul = qqrd2e * qtmp*q[j]*sqrt(r2inv);
         else forcecoul = 0.0;
 
         if (rsq < cut_ljsq[itype][jtype]) {
           r6inv = r2inv*r2inv*r2inv;
           forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
         } else forcelj = 0.0;
 
         fpair = (factor_coul*forcecoul + factor_lj*forcelj) * r2inv;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
 
         if (eflag) {
           if (rsq < cut_coulsq[itype][jtype])
             ecoul = factor_coul * qqrd2e * qtmp*q[j]*sqrt(r2inv);
           else ecoul = 0.0;
           if (rsq < cut_ljsq[itype][jtype]) {
             evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) -
               offset[itype][jtype];
             evdwl *= factor_lj;
           } else evdwl = 0.0;
         }
 
         if (evflag) ev_tally_full(i,evdwl,ecoul,fpair,delx,dely,delz);
       }
     }
   }
 }
diff --git a/src/GPU/pair_lj_cut_coul_debye_gpu.cpp b/src/GPU/pair_lj_cut_coul_debye_gpu.cpp
index e5e6f087d..724d0d9d0 100644
--- a/src/GPU/pair_lj_cut_coul_debye_gpu.cpp
+++ b/src/GPU/pair_lj_cut_coul_debye_gpu.cpp
@@ -1,266 +1,266 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
    
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under 
    the GNU General Public License.
    
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Trung Dac Nguyen (ORNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "pair_lj_cut_coul_debye_gpu.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "integrate.h"
 #include "memory.h"
 #include "error.h"
 #include "neigh_request.h"
 #include "universe.h"
 #include "update.h"
 #include "domain.h"
 #include "string.h"
 #include "gpu_extra.h"
 
 using namespace LAMMPS_NS;
 
 // External functions from cuda library for atom decomposition
 
 int ljcd_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
                  double **host_lj2, double **host_lj3, double **host_lj4, 
                  double **offset, double *special_lj, const int nlocal, 
                  const int nall, const int max_nbors, const int maxspecial,
                  const double cell_size, int &gpu_mode, FILE *screen,
                  double **host_cut_ljsq, double **host_cut_coulsq,
                  double *host_special_coul, const double qqrd2e,
                  const double kappa);
 void ljcd_gpu_clear();
 int ** ljcd_gpu_compute_n(const int ago, const int inum, const int nall, 
                           double **host_x, int *host_type, 
                           double *sublo, double *subhi, tagint *tag, int **nspecial,
                           tagint **special, const bool eflag, const bool vflag,
                           const bool eatom, const bool vatom, int &host_start,
                           int **ilist, int **jnum, const double cpu_time,
                           bool &success, double *host_q, double *boxlo,
                           double *prd);
 void ljcd_gpu_compute(const int ago, const int inum, const int nall, 
                       double **host_x, int *host_type,
                       int *ilist, int *numj, int **firstneigh,
                       const bool eflag, const bool vflag, const bool eatom,
                       const bool vatom, int &host_start, const double cpu_time,
                       bool &success, double *host_q, const int nlocal,
                       double *boxlo, double *prd);
 double ljcd_gpu_bytes();
 
 /* ---------------------------------------------------------------------- */
 
 PairLJCutCoulDebyeGPU::PairLJCutCoulDebyeGPU(LAMMPS *lmp) : 
   PairLJCutCoulDebye(lmp), gpu_mode(GPU_FORCE)
 {
   respa_enable = 0;
   reinitflag = 0;
   cpu_time = 0.0;
   GPU_EXTRA::gpu_ready(lmp->modify, lmp->error); 
 }
 
 /* ----------------------------------------------------------------------
    free all arrays
 ------------------------------------------------------------------------- */
 
 PairLJCutCoulDebyeGPU::~PairLJCutCoulDebyeGPU()
 {
 ljcd_gpu_clear();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCutCoulDebyeGPU::compute(int eflag, int vflag)
 {
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
   
   int nall = atom->nlocal + atom->nghost;
   int inum, host_start;
   
   bool success = true;
   int *ilist, *numneigh, **firstneigh;  
   if (gpu_mode != GPU_FORCE) {
     inum = atom->nlocal;
     firstneigh = ljcd_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
                                     atom->type, domain->sublo, domain->subhi,
                                     atom->tag, atom->nspecial, atom->special,
                                     eflag, vflag, eflag_atom, vflag_atom,
                                     host_start, &ilist, &numneigh, cpu_time,
                                     success, atom->q, domain->boxlo, 
                                     domain->prd);
   } else {
     inum = list->inum;
     ilist = list->ilist;
     numneigh = list->numneigh;
     firstneigh = list->firstneigh;
     ljcd_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
                      ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
                      vflag_atom, host_start, cpu_time, success, atom->q,
                      atom->nlocal, domain->boxlo, domain->prd);
   }
   if (!success)
     error->one(FLERR,"Insufficient memory on accelerator");
 
   if (host_start<inum) {
     cpu_time = MPI_Wtime();
     cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
     cpu_time = MPI_Wtime() - cpu_time;
   }
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairLJCutCoulDebyeGPU::init_style()
 {
   if (!atom->q_flag)
     error->all(FLERR,"Pair style lj/cut/coul/debye/gpu requires atom attribute q");
 
   if (force->newton_pair) 
     error->all(FLERR,"Cannot use newton pair with lj/cut/coul/debye/gpu pair style");
 
   // Repeat cutsq calculation because done after call to init_style
   double maxcut = -1.0;
   double cut;
   for (int i = 1; i <= atom->ntypes; i++) {
     for (int j = i; j <= atom->ntypes; j++) {
       if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
         cut = init_one(i,j);
         cut *= cut;
         if (cut > maxcut)
           maxcut = cut;
         cutsq[i][j] = cutsq[j][i] = cut;
       } else
         cutsq[i][j] = cutsq[j][i] = 0.0;
     }
   }
   double cell_size = sqrt(maxcut) + neighbor->skin;
 
   int maxspecial=0;
   if (atom->molecular)
     maxspecial=atom->maxspecial;
   int success = ljcd_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4,
                               offset, force->special_lj, atom->nlocal,
                               atom->nlocal+atom->nghost, 300, maxspecial,
                               cell_size, gpu_mode, screen, cut_ljsq, 
                               cut_coulsq, force->special_coul, 
                               force->qqrd2e, kappa);
   GPU_EXTRA::check_flag(success,error,world);
 
   if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this);
+    int irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full = 1;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairLJCutCoulDebyeGPU::memory_usage()
 {
   double bytes = Pair::memory_usage();
   return bytes + ljcd_gpu_bytes();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCutCoulDebyeGPU::cpu_compute(int start, int inum, int eflag, 
                                         int vflag, int *ilist,
                                         int *numneigh, int **firstneigh)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
   double rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
   double r,rinv,screening;
   int *jlist;
 
   evdwl = ecoul = 0.0;
 
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   int *type = atom->type;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   double qqrd2e = force->qqrd2e;
 
   // loop over neighbors of my atoms
 
   for (ii = start; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r2inv = 1.0/rsq;
 
         if (rsq < cut_coulsq[itype][jtype]) {
           r = sqrt(rsq);
           rinv = 1.0/r;
           screening = exp(-kappa*r);
           forcecoul = qqrd2e * qtmp*q[j] * screening * (kappa + rinv);
         } else forcecoul = 0.0;
 
         if (rsq < cut_ljsq[itype][jtype]) {
           r6inv = r2inv*r2inv*r2inv;
           forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
         } else forcelj = 0.0;
 
         fpair = (factor_coul*forcecoul + factor_lj*forcelj) * r2inv;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
 
         if (eflag) {
           if (rsq < cut_coulsq[itype][jtype])
             ecoul = factor_coul * qqrd2e * qtmp*q[j] * rinv * screening;
           else ecoul = 0.0;
           if (rsq < cut_ljsq[itype][jtype]) {
             evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) -
             offset[itype][jtype];
             evdwl *= factor_lj;
           } else evdwl = 0.0;
         }
 
         if (evflag) ev_tally_full(i,evdwl,ecoul,fpair,delx,dely,delz);
       }
     }
   }
 }
diff --git a/src/GPU/pair_lj_cut_coul_dsf_gpu.cpp b/src/GPU/pair_lj_cut_coul_dsf_gpu.cpp
index 22de6bd18..954b89d77 100644
--- a/src/GPU/pair_lj_cut_coul_dsf_gpu.cpp
+++ b/src/GPU/pair_lj_cut_coul_dsf_gpu.cpp
@@ -1,290 +1,290 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Mike Brown (SNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "pair_lj_cut_coul_dsf_gpu.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "integrate.h"
 #include "memory.h"
 #include "error.h"
 #include "neigh_request.h"
 #include "universe.h"
 #include "update.h"
 #include "domain.h"
 #include "string.h"
 #include "gpu_extra.h"
 
 #define MY_PIS 1.77245385090551602729
 #define EWALD_F   1.12837917
 #define EWALD_P   0.3275911
 #define A1        0.254829592
 #define A2       -0.284496736
 #define A3        1.421413741
 #define A4       -1.453152027
 #define A5        1.061405429
 
 using namespace LAMMPS_NS;
 
 // External functions from cuda library for atom decomposition
 
 int ljd_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
                  double **host_lj2, double **host_lj3, double **host_lj4,
                  double **offset, double *special_lj, const int nlocal,
                  const int nall, const int max_nbors, const int maxspecial,
                  const double cell_size, int &gpu_mode, FILE *screen,
                  double **host_cut_ljsq, const double host_cut_coulsq,
                  double *host_special_coul, const double qqrd2e, 
                  const double e_shift, const double f_shift, 
                  const double alpha);
 void ljd_gpu_clear();
 int ** ljd_gpu_compute_n(const int ago, const int inum,
                          const int nall, double **host_x, int *host_type,
                          double *sublo, double *subhi, tagint *tag, int **nspecial,
                          tagint **special, const bool eflag, const bool vflag,
                          const bool eatom, const bool vatom, int &host_start,
                          int **ilist, int **jnum, const double cpu_time,
                          bool &success, double *host_q, double *boxlo,
                          double *prd);
 void ljd_gpu_compute(const int ago, const int inum,
                      const int nall, double **host_x, int *host_type,
                      int *ilist, int *numj, int **firstneigh,
                      const bool eflag, const bool vflag, const bool eatom,
                      const bool vatom, int &host_start, const double cpu_time,
                      bool &success, double *host_q, const int nlocal,
                      double *boxlo, double *prd);
 double ljd_gpu_bytes();
 
 /* ---------------------------------------------------------------------- */
 
 PairLJCutCoulDSFGPU::PairLJCutCoulDSFGPU(LAMMPS *lmp) : PairLJCutCoulDSF(lmp), gpu_mode(GPU_FORCE)
 {
   respa_enable = 0;
   reinitflag = 0;
   cpu_time = 0.0;
   GPU_EXTRA::gpu_ready(lmp->modify, lmp->error);
 }
 
 /* ----------------------------------------------------------------------
    free all arrays
 ------------------------------------------------------------------------- */
 
 PairLJCutCoulDSFGPU::~PairLJCutCoulDSFGPU()
 {
   ljd_gpu_clear();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCutCoulDSFGPU::compute(int eflag, int vflag)
 {
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   int nall = atom->nlocal + atom->nghost;
   int inum, host_start;
 
   bool success = true;
   int *ilist, *numneigh, **firstneigh;
   if (gpu_mode != GPU_FORCE) {
     inum = atom->nlocal;
     firstneigh = ljd_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
                                    atom->type, domain->sublo, domain->subhi,
                                    atom->tag, atom->nspecial, atom->special,
                                    eflag, vflag, eflag_atom, vflag_atom,
                                    host_start, &ilist, &numneigh, cpu_time,
                                    success, atom->q, domain->boxlo,
                                    domain->prd);
   } else {
     inum = list->inum;
     ilist = list->ilist;
     numneigh = list->numneigh;
     firstneigh = list->firstneigh;
     ljd_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
                     ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
                     vflag_atom, host_start, cpu_time, success, atom->q,
                     atom->nlocal, domain->boxlo, domain->prd);
   }
   if (!success)
     error->one(FLERR,"Insufficient memory on accelerator");
 
   if (host_start<inum) {
     cpu_time = MPI_Wtime();
     cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
     cpu_time = MPI_Wtime() - cpu_time;
   }
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairLJCutCoulDSFGPU::init_style()
 {
   if (!atom->q_flag)
     error->all(FLERR,"Pair style lj/cut/coul/dsf/gpu requires atom attribute q");
 
   if (force->newton_pair)
     error->all(FLERR,"Cannot use newton pair with lj/cut/coul/dsf/gpu pair style");
 
   // Repeat cutsq calculation because done after call to init_style
   double maxcut = -1.0;
   double cut;
   for (int i = 1; i <= atom->ntypes; i++) {
     for (int j = i; j <= atom->ntypes; j++) {
       if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
         cut = init_one(i,j);
         cut *= cut;
         if (cut > maxcut)
           maxcut = cut;
         cutsq[i][j] = cutsq[j][i] = cut;
       } else
         cutsq[i][j] = cutsq[j][i] = 0.0;
     }
   }
   double cell_size = sqrt(maxcut) + neighbor->skin;
 
   cut_coulsq = cut_coul * cut_coul;
   double erfcc = erfc(alpha*cut_coul); 
   double erfcd = exp(-alpha*alpha*cut_coul*cut_coul);
   f_shift = -(erfcc/cut_coulsq + 2.0/MY_PIS*alpha*erfcd/cut_coul); 
   e_shift = erfcc/cut_coul - f_shift*cut_coul; 
 
   int maxspecial=0;
   if (atom->molecular)
     maxspecial=atom->maxspecial;
   int success = ljd_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4,
                              offset, force->special_lj, atom->nlocal,
                              atom->nlocal+atom->nghost, 300, maxspecial,
                              cell_size, gpu_mode, screen, cut_ljsq, cut_coulsq,
                              force->special_coul, force->qqrd2e, e_shift,
                              f_shift, alpha);
   GPU_EXTRA::check_flag(success,error,world);
 
   if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this);
+    int irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full = 1;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairLJCutCoulDSFGPU::memory_usage()
 {
   double bytes = Pair::memory_usage();
   return bytes + ljd_gpu_bytes();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCutCoulDSFGPU::cpu_compute(int start, int inum, int eflag, int vflag,
                                       int *ilist, int *numneigh,
                                       int **firstneigh)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
   double r,rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
   double prefactor,erfcc,erfcd,e_self,t;
   int *jlist;
 
   evdwl = ecoul = 0.0;
 
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   double qqrd2e = force->qqrd2e;
 
   // loop over neighbors of my atoms
 
   for (ii = start; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     if (evflag) {
       e_self = -(e_shift/2.0 + alpha/MY_PIS) * qtmp*qtmp*qqrd2e;
       ev_tally(i,i,nlocal,0,0.0,e_self,0.0,0.0,0.0,0.0);
     }
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r2inv = 1.0/rsq;
 
         if (rsq < cut_ljsq[itype][jtype]) {
           r6inv = r2inv*r2inv*r2inv;
           forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
         } else forcelj = 0.0;
 
         if (rsq < cut_coulsq) {
           r = sqrt(rsq);
           prefactor = factor_coul * qqrd2e*qtmp*q[j]/r;
           erfcd = exp(-alpha*alpha*r*r);
           t = 1.0 / (1.0 + EWALD_P*alpha*r);
           erfcc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * erfcd;
           forcecoul = prefactor * (erfcc/r + 2.0*alpha/MY_PIS * erfcd + 
             r*f_shift) * r;
         }
 
         fpair = (forcecoul + factor_lj*forcelj) * r2inv;
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
 
         if (eflag) {
           if (rsq < cut_ljsq[itype][jtype]) {
             evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) -
                     offset[itype][jtype];
             evdwl *= factor_lj;
           } else evdwl = 0.0;
           
           if (rsq < cut_coulsq) {
             ecoul = prefactor * (erfcc - r*e_shift - rsq*f_shift);
           } else ecoul = 0.0;
         }
 
         if (evflag) ev_tally_full(i,evdwl,ecoul,fpair,delx,dely,delz);
       }
     }
   }
 }
diff --git a/src/GPU/pair_lj_cut_coul_long_gpu.cpp b/src/GPU/pair_lj_cut_coul_long_gpu.cpp
index 166649f6d..d800759d7 100644
--- a/src/GPU/pair_lj_cut_coul_long_gpu.cpp
+++ b/src/GPU/pair_lj_cut_coul_long_gpu.cpp
@@ -1,325 +1,325 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Mike Brown (SNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "pair_lj_cut_coul_long_gpu.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "integrate.h"
 #include "memory.h"
 #include "error.h"
 #include "neigh_request.h"
 #include "universe.h"
 #include "update.h"
 #include "domain.h"
 #include "string.h"
 #include "kspace.h"
 #include "gpu_extra.h"
 
 #define EWALD_F   1.12837917
 #define EWALD_P   0.3275911
 #define A1        0.254829592
 #define A2       -0.284496736
 #define A3        1.421413741
 #define A4       -1.453152027
 #define A5        1.061405429
 
 using namespace LAMMPS_NS;
 
 // External functions from cuda library for atom decomposition
 
 int ljcl_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
                   double **host_lj2, double **host_lj3, double **host_lj4,
                   double **offset, double *special_lj, const int nlocal,
                   const int nall, const int max_nbors, const int maxspecial,
                   const double cell_size, int &gpu_mode, FILE *screen,
                   double **host_cut_ljsq, double host_cut_coulsq,
                   double *host_special_coul, const double qqrd2e,
                   const double g_ewald);
 int ljcl_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1,
                     double **host_lj2, double **host_lj3, double **host_lj4,
                     double **offset, double **host_lj_cutsq);
 void ljcl_gpu_clear();
 int ** ljcl_gpu_compute_n(const int ago, const int inum,
                           const int nall, double **host_x, int *host_type,
                           double *sublo, double *subhi, tagint *tag,
                           int **nspecial, tagint **special, const bool eflag,
                           const bool vflag, const bool eatom, const bool vatom,
                           int &host_start, int **ilist, int **jnum,
                           const double cpu_time, bool &success, double *host_q,
                           double *boxlo, double *prd);
 void ljcl_gpu_compute(const int ago, const int inum, const int nall,
                       double **host_x, int *host_type, int *ilist, int *numj,
                       int **firstneigh, const bool eflag, const bool vflag,
                       const bool eatom, const bool vatom, int &host_start,
                       const double cpu_time, bool &success, double *host_q,
                       const int nlocal, double *boxlo, double *prd);
 double ljcl_gpu_bytes();
 
 /* ---------------------------------------------------------------------- */
 
 PairLJCutCoulLongGPU::PairLJCutCoulLongGPU(LAMMPS *lmp) :
   PairLJCutCoulLong(lmp), gpu_mode(GPU_FORCE)
 {
   respa_enable = 0;
   cpu_time = 0.0;
   GPU_EXTRA::gpu_ready(lmp->modify, lmp->error);
 }
 
 /* ----------------------------------------------------------------------
    free all arrays
 ------------------------------------------------------------------------- */
 
 PairLJCutCoulLongGPU::~PairLJCutCoulLongGPU()
 {
   ljcl_gpu_clear();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCutCoulLongGPU::compute(int eflag, int vflag)
 {
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   int nall = atom->nlocal + atom->nghost;
   int inum, host_start;
 
   bool success = true;
   int *ilist, *numneigh, **firstneigh;
   if (gpu_mode != GPU_FORCE) {
     inum = atom->nlocal;
     firstneigh = ljcl_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
                                     atom->type, domain->sublo, domain->subhi,
                                     atom->tag, atom->nspecial, atom->special,
                                     eflag, vflag, eflag_atom, vflag_atom,
                                     host_start, &ilist, &numneigh, cpu_time,
                                     success, atom->q, domain->boxlo,
                                     domain->prd);
   } else {
     inum = list->inum;
     ilist = list->ilist;
     numneigh = list->numneigh;
     firstneigh = list->firstneigh;
     ljcl_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
                      ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
                      vflag_atom, host_start, cpu_time, success, atom->q,
                      atom->nlocal, domain->boxlo, domain->prd);
   }
   if (!success)
     error->one(FLERR,"Insufficient memory on accelerator");
 
   if (host_start<inum) {
     cpu_time = MPI_Wtime();
     cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
     cpu_time = MPI_Wtime() - cpu_time;
   }
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairLJCutCoulLongGPU::init_style()
 {
   cut_respa = NULL;
 
   if (!atom->q_flag)
     error->all(FLERR,"Pair style lj/cut/coul/long/gpu requires atom attribute q");
   if (force->newton_pair)
     error->all(FLERR,"Cannot use newton pair with lj/cut/coul/long/gpu pair style");
 
   // Repeat cutsq calculation because done after call to init_style
   double maxcut = -1.0;
   double cut;
   for (int i = 1; i <= atom->ntypes; i++) {
     for (int j = i; j <= atom->ntypes; j++) {
       if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
         cut = init_one(i,j);
         cut *= cut;
         if (cut > maxcut)
           maxcut = cut;
         cutsq[i][j] = cutsq[j][i] = cut;
       } else
         cutsq[i][j] = cutsq[j][i] = 0.0;
     }
   }
   double cell_size = sqrt(maxcut) + neighbor->skin;
 
   cut_coulsq = cut_coul * cut_coul;
 
   // insure use of KSpace long-range solver, set g_ewald
 
   if (force->kspace == NULL)
     error->all(FLERR,"Pair style is incompatible with KSpace style");
   g_ewald = force->kspace->g_ewald;
 
   // setup force tables
 
   if (ncoultablebits) init_tables(cut_coul,cut_respa);
 
   int maxspecial=0;
   if (atom->molecular)
     maxspecial=atom->maxspecial;
   int success = ljcl_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4,
                               offset, force->special_lj, atom->nlocal,
                               atom->nlocal+atom->nghost, 300, maxspecial,
                               cell_size, gpu_mode, screen, cut_ljsq, cut_coulsq,
                               force->special_coul, force->qqrd2e, g_ewald);
   GPU_EXTRA::check_flag(success,error,world);
 
   if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this);
+    int irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full = 1;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCutCoulLongGPU::reinit()
 {
   Pair::reinit();
   
   ljcl_gpu_reinit(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4, offset, cut_ljsq);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairLJCutCoulLongGPU::memory_usage()
 {
   double bytes = Pair::memory_usage();
   return bytes + ljcl_gpu_bytes();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCutCoulLongGPU::cpu_compute(int start, int inum, int eflag,
                                        int vflag, int *ilist, int *numneigh,
                                        int **firstneigh)
 {
   int i,j,ii,jj,jnum,itype,jtype,itable;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
   double fraction,table;
   double r,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
   double grij,expm2,prefactor,t,erfc;
   int *jlist;
   double rsq;
 
   evdwl = ecoul = 0.0;
 
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   int *type = atom->type;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   double qqrd2e = force->qqrd2e;
 
   // loop over neighbors of my atoms
 
   for (ii = start; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r2inv = 1.0/rsq;
 
         if (rsq < cut_coulsq) {
           if (!ncoultablebits || rsq <= tabinnersq) {
             r = sqrt(rsq);
             grij = g_ewald * r;
             expm2 = exp(-grij*grij);
             t = 1.0 / (1.0 + EWALD_P*grij);
             erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
             prefactor = qqrd2e * qtmp*q[j]/r;
             forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
             if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
           } else {
             union_int_float_t rsq_lookup;
             rsq_lookup.f = rsq;
             itable = rsq_lookup.i & ncoulmask;
             itable >>= ncoulshiftbits;
             fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable];
             table = ftable[itable] + fraction*dftable[itable];
             forcecoul = qtmp*q[j] * table;
             if (factor_coul < 1.0) {
               table = ctable[itable] + fraction*dctable[itable];
               prefactor = qtmp*q[j] * table;
               forcecoul -= (1.0-factor_coul)*prefactor;
             }
           }
         } else forcecoul = 0.0;
 
         if (rsq < cut_ljsq[itype][jtype]) {
           r6inv = r2inv*r2inv*r2inv;
           forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
         } else forcelj = 0.0;
 
         fpair = (forcecoul + factor_lj*forcelj) * r2inv;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
 
         if (eflag) {
           if (rsq < cut_coulsq) {
             if (!ncoultablebits || rsq <= tabinnersq)
               ecoul = prefactor*erfc;
             else {
               table = etable[itable] + fraction*detable[itable];
               ecoul = qtmp*q[j] * table;
             }
             if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
           } else ecoul = 0.0;
 
           if (rsq < cut_ljsq[itype][jtype]) {
             evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) -
               offset[itype][jtype];
             evdwl *= factor_lj;
           } else evdwl = 0.0;
         }
 
         if (evflag) ev_tally_full(i,evdwl,ecoul,fpair,delx,dely,delz);
       }
     }
   }
 }
diff --git a/src/GPU/pair_lj_cut_coul_msm_gpu.cpp b/src/GPU/pair_lj_cut_coul_msm_gpu.cpp
index 4918d1eee..4972ce57b 100644
--- a/src/GPU/pair_lj_cut_coul_msm_gpu.cpp
+++ b/src/GPU/pair_lj_cut_coul_msm_gpu.cpp
@@ -1,297 +1,297 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
    
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under 
    the GNU General Public License.
    
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Trung Dac Nguyen (ORNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "pair_lj_cut_coul_msm_gpu.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "integrate.h"
 #include "memory.h"
 #include "error.h"
 #include "neigh_request.h"
 #include "universe.h"
 #include "update.h"
 #include "domain.h"
 #include "kspace.h"
 #include "string.h"
 #include "gpu_extra.h"
 
 using namespace LAMMPS_NS;
 
 // External functions from cuda library for atom decomposition
 
 int ljcm_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
                   double **host_lj2, double **host_lj3, double **host_lj4,
                   double **host_gcons, double **host_dgcons,
                   double **offset, double *special_lj, const int inum,
                   const int nall, const int max_nbors, const int maxspecial,
                   const double cell_size, int &gpu_mode, FILE *screen,
                   double **host_cut_ljsq, double host_cut_coulsq,
                   double *host_special_coul, const int order, const double qqrd2e);
 void ljcm_gpu_clear();
 int ** ljcm_gpu_compute_n(const int ago, const int inum,
                           const int nall, double **host_x, int *host_type,
                           double *sublo, double *subhi, tagint *tag, int **nspecial,
                           tagint **special, const bool eflag, const bool vflag,
                           const bool eatom, const bool vatom, int &host_start,
                           int **ilist, int **jnum, const double cpu_time,
                           bool &success, double *host_q, double *boxlo, double *prd);
 void ljcm_gpu_compute(const int ago, const int inum, const int nall, 
                       double **host_x, int *host_type, int *ilist, int *numj,
                       int **firstneigh, const bool eflag, const bool vflag,
                       const bool eatom, const bool vatom, int &host_start,
                       const double cpu_time, bool &success, double *host_q,
                       const int nlocal, double *boxlo, double *prd);
 double ljcm_gpu_bytes();
 
 /* ---------------------------------------------------------------------- */
 
 PairLJCutCoulMSMGPU::PairLJCutCoulMSMGPU(LAMMPS *lmp) :
   PairLJCutCoulMSM(lmp), gpu_mode(GPU_FORCE)
 {
   respa_enable = 0;
   reinitflag = 0;
   cpu_time = 0.0;
   GPU_EXTRA::gpu_ready(lmp->modify, lmp->error); 
 }
 
 /* ----------------------------------------------------------------------
    free all arrays
 ------------------------------------------------------------------------- */
 
 PairLJCutCoulMSMGPU::~PairLJCutCoulMSMGPU()
 {
   ljcm_gpu_clear();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCutCoulMSMGPU::compute(int eflag, int vflag)
 {
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
   
   int nall = atom->nlocal + atom->nghost;
   int inum, host_start;
   
   bool success = true;
   int *ilist, *numneigh, **firstneigh;
   if (gpu_mode != GPU_FORCE) {
     inum = atom->nlocal;
     firstneigh = ljcm_gpu_compute_n(neighbor->ago, inum, nall,
                                     atom->x, atom->type, domain->sublo,
                                     domain->subhi, atom->tag, atom->nspecial,
                                     atom->special, eflag, vflag, eflag_atom,
                                     vflag_atom, host_start,
                                     &ilist, &numneigh, cpu_time, success,
                                     atom->q, domain->boxlo, domain->prd);
   } else {
     inum = list->inum;
     ilist = list->ilist;
     numneigh = list->numneigh;
     firstneigh = list->firstneigh;
     ljcm_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
                      ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
                      vflag_atom, host_start, cpu_time, success, atom->q,
                      atom->nlocal, domain->boxlo, domain->prd);
   }
   if (!success)
     error->one(FLERR,"Insufficient memory on accelerator");
 
   if (host_start<inum) {
     cpu_time = MPI_Wtime();
     cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
     cpu_time = MPI_Wtime() - cpu_time;
   }
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairLJCutCoulMSMGPU::init_style()
 {
   cut_respa = NULL;
   
   if (force->newton_pair) 
     error->all(FLERR,"Cannot use newton pair with lj/cut/coul/msm/gpu pair style");
 
   if (force->kspace->scalar_pressure_flag)
     error->all(FLERR,"Must use 'kspace_modify pressure/scalar no' with GPU MSM Pair styles");
 
   // Repeat cutsq calculation because done after call to init_style
   double maxcut = -1.0;
   double cut;
   for (int i = 1; i <= atom->ntypes; i++) {
     for (int j = i; j <= atom->ntypes; j++) {
       if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
         cut = init_one(i,j);
         cut *= cut;
         if (cut > maxcut)
           maxcut = cut;
         cutsq[i][j] = cutsq[j][i] = cut;
       } else
         cutsq[i][j] = cutsq[j][i] = 0.0;
     }
   }
   double cell_size = sqrt(maxcut) + neighbor->skin;
 
   cut_coulsq = cut_coul * cut_coul;
 
   // setup force tables
 
   if (ncoultablebits) init_tables(cut_coul,cut_respa);
   
   int maxspecial=0;
   if (atom->molecular)
     maxspecial=atom->maxspecial;
   int success = ljcm_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4,
                               force->kspace->get_gcons(),
                               force->kspace->get_dgcons(),
                               offset, force->special_lj,
                               atom->nlocal, atom->nlocal+atom->nghost,
                               300, maxspecial, cell_size, gpu_mode, screen,
                               cut_ljsq, cut_coulsq, force->special_coul,
                               force->kspace->order, force->qqrd2e);
   GPU_EXTRA::check_flag(success,error,world);
 
   if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this);
+    int irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full = 1;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairLJCutCoulMSMGPU::memory_usage()
 {
   double bytes = Pair::memory_usage();
   return bytes + ljcm_gpu_bytes();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCutCoulMSMGPU::cpu_compute(int start, int inum, int eflag, int vflag, 
                                int *ilist, int *numneigh, int **firstneigh) {
   int i,j,ii,jj,jnum,itype,jtype,itable;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
   double fraction,table;
   double r,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
   double egamma,fgamma,prefactor;
   int *jlist;
   double rsq;
 
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   int *type = atom->type;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   double qqrd2e = force->qqrd2e;
   
   // loop over neighbors of my atoms
 
   for (ii = start; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r2inv = 1.0/rsq;
 
         if (rsq < cut_coulsq) {
           if (!ncoultablebits || rsq <= tabinnersq) {
             r = sqrt(rsq);
             prefactor = qqrd2e * qtmp*q[j]/r;
             egamma = 1.0 - (r/cut_coul)*force->kspace->gamma(r/cut_coul);
             fgamma = 1.0 + (rsq/cut_coulsq)*force->kspace->dgamma(r/cut_coul);
             forcecoul = prefactor * fgamma;
             if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
           } else {
             union_int_float_t rsq_lookup;
             rsq_lookup.f = rsq;
             itable = rsq_lookup.i & ncoulmask;
             itable >>= ncoulshiftbits;
             fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable];
             table = ftable[itable] + fraction*dftable[itable];
             forcecoul = qtmp*q[j] * table;
             if (factor_coul < 1.0) {
               table = ctable[itable] + fraction*dctable[itable];
               prefactor = qtmp*q[j] * table;
               forcecoul -= (1.0-factor_coul)*prefactor;
             }
           }
         } else forcecoul = 0.0;
 
         if (rsq < cut_ljsq[itype][jtype]) {
           r6inv = r2inv*r2inv*r2inv;
           forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
         } else forcelj = 0.0;
 
         fpair = (forcecoul + forcelj) * r2inv;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
 
         if (eflag) {
           if (rsq < cut_coulsq) {
             if (!ncoultablebits || rsq <= tabinnersq)
               ecoul = prefactor*egamma;
             else {
               table = etable[itable] + fraction*detable[itable];
               ecoul = qtmp*q[j] * table;
             }
             if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
           } else ecoul = 0.0;
 
           if (rsq < cut_ljsq[itype][jtype]) {
             evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) -
               offset[itype][jtype];
             evdwl *= factor_lj;
           } else evdwl = 0.0;
         }
         
         if (evflag) ev_tally_full(i,evdwl,ecoul,fpair,delx,dely,delz);
       }
     }
   }
 }
diff --git a/src/GPU/pair_lj_cut_dipole_cut_gpu.cpp b/src/GPU/pair_lj_cut_dipole_cut_gpu.cpp
index 351873e4e..348fe44b2 100755
--- a/src/GPU/pair_lj_cut_dipole_cut_gpu.cpp
+++ b/src/GPU/pair_lj_cut_dipole_cut_gpu.cpp
@@ -1,370 +1,370 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
    
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under 
    the GNU General Public License.
    
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Trung Dac Nguyen (ORNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "pair_lj_cut_dipole_cut_gpu.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "integrate.h"
 #include "memory.h"
 #include "error.h"
 #include "neigh_request.h"
 #include "universe.h"
 #include "update.h"
 #include "domain.h"
 #include "string.h"
 #include "gpu_extra.h"
 
 using namespace LAMMPS_NS;
 
 // External functions from cuda library for atom decomposition
 
 int dpl_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
                  double **host_lj2, double **host_lj3, double **host_lj4, 
                  double **offset, double *special_lj, const int nlocal, 
                  const int nall, const int max_nbors, const int maxspecial,
                  const double cell_size, int &gpu_mode, FILE *screen,
                  double **host_cut_ljsq, double **host_cut_coulsq,
                  double *host_special_coul, const double qqrd2e);
 void dpl_gpu_clear();
 int ** dpl_gpu_compute_n(const int ago, const int inum,
                          const int nall, double **host_x, int *host_type, 
                          double *sublo, double *subhi, tagint *tag, 
                          int **nspecial, tagint **special, const bool eflag, 
                          const bool vflag, const bool eatom, const bool vatom, 
                          int &host_start, int **ilist, int **jnum, 
                          const double cpu_time, bool &success, 
                          double *host_q, double **host_mu, 
                          double *boxlo, double *prd);
 void dpl_gpu_compute(const int ago, const int inum,
                      const int nall, double **host_x, int *host_type,
                      int *ilist, int *numj, int **firstneigh,
                      const bool eflag, const bool vflag, const bool eatom,
                      const bool vatom, int &host_start, const double cpu_time,
                      bool &success, double *host_q, double **host_mu, 
                      const int nlocal, double *boxlo, double *prd);
 double dpl_gpu_bytes();
 
 /* ---------------------------------------------------------------------- */
 
 PairLJCutDipoleCutGPU::PairLJCutDipoleCutGPU(LAMMPS *lmp) : PairLJCutDipoleCut(lmp), 
   gpu_mode(GPU_FORCE)
 {
   respa_enable = 0;
   reinitflag = 0;
   cpu_time = 0.0;
   GPU_EXTRA::gpu_ready(lmp->modify, lmp->error); 
 }
 
 /* ----------------------------------------------------------------------
    free all arrays
 ------------------------------------------------------------------------- */
 
 PairLJCutDipoleCutGPU::~PairLJCutDipoleCutGPU()
 {
   dpl_gpu_clear();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCutDipoleCutGPU::compute(int eflag, int vflag)
 {
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
   
   int nall = atom->nlocal + atom->nghost;
   int inum, host_start;
   
   bool success = true;
   int *ilist, *numneigh, **firstneigh;  
   if (gpu_mode != GPU_FORCE) {
     inum = atom->nlocal;
     firstneigh = dpl_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
                                    atom->type, domain->sublo, domain->subhi,
                                    atom->tag, atom->nspecial, atom->special,
                                    eflag, vflag, eflag_atom, vflag_atom,
                                    host_start, &ilist, &numneigh, cpu_time,
                                    success, atom->q, atom->mu, domain->boxlo, 
                                    domain->prd);
   } else {
     inum = list->inum;
     ilist = list->ilist;
     numneigh = list->numneigh;
     firstneigh = list->firstneigh;
     dpl_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
                     ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
                     vflag_atom, host_start, cpu_time, success, atom->q,
                     atom->mu, atom->nlocal, domain->boxlo, domain->prd);
   }
   if (!success)
     error->one(FLERR,"Insufficient memory on accelerator");
 
   if (host_start<inum) {
     cpu_time = MPI_Wtime();
     cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
     cpu_time = MPI_Wtime() - cpu_time;
   }
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairLJCutDipoleCutGPU::init_style()
 {
   if (!atom->q_flag || !atom->mu_flag || !atom->torque_flag)
     error->all(FLERR,"Pair dipole/cut/gpu requires atom attributes q, mu, torque");
   
   if (force->newton_pair) 
     error->all(FLERR,"Cannot use newton pair with dipole/cut/gpu pair style");
 
   if (strcmp(update->unit_style,"electron") == 0)
     error->all(FLERR,"Cannot (yet) use 'electron' units with dipoles");
 
   // Repeat cutsq calculation because done after call to init_style
   double maxcut = -1.0;
   double cut;
   for (int i = 1; i <= atom->ntypes; i++) {
     for (int j = i; j <= atom->ntypes; j++) {
       if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
         cut = init_one(i,j);
         cut *= cut;
         if (cut > maxcut)
           maxcut = cut;
         cutsq[i][j] = cutsq[j][i] = cut;
       } else
         cutsq[i][j] = cutsq[j][i] = 0.0;
     }
   }
   double cell_size = sqrt(maxcut) + neighbor->skin;
 
   int maxspecial=0;
   if (atom->molecular)
     maxspecial=atom->maxspecial;
   int success = dpl_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4,
                              offset, force->special_lj, atom->nlocal,
                              atom->nlocal+atom->nghost, 300, maxspecial,
                              cell_size, gpu_mode, screen, cut_ljsq, cut_coulsq,
                              force->special_coul, force->qqrd2e);
   GPU_EXTRA::check_flag(success,error,world);
 
   if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this);
+    int irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full = 1;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairLJCutDipoleCutGPU::memory_usage()
 {
   double bytes = Pair::memory_usage();
   return bytes + dpl_gpu_bytes();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCutDipoleCutGPU::cpu_compute(int start, int inum, int eflag, int vflag,
                                    int *ilist, int *numneigh, 
                                    int **firstneigh)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fx,fy,fz;
   double rsq,rinv,r2inv,r6inv,r3inv,r5inv,r7inv;
   double forcecoulx,forcecouly,forcecoulz,crossx,crossy,crossz;
   double tixcoul,tiycoul,tizcoul,tjxcoul,tjycoul,tjzcoul;
   double fq,pdotp,pidotr,pjdotr,pre1,pre2,pre3,pre4;
   double forcelj,factor_coul,factor_lj;
   int *jlist;
 
   evdwl = ecoul = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   double **mu = atom->mu;
   double **torque = atom->torque;
   int *type = atom->type;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   double qqrd2e = force->qqrd2e;
 
 
   // loop over neighbors of my atoms
 
   for (ii = start; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r2inv = 1.0/rsq;
         rinv = sqrt(r2inv);
 
         // atom can have both a charge and dipole
         // i,j = charge-charge, dipole-dipole, dipole-charge, or charge-dipole
 
         forcecoulx = forcecouly = forcecoulz = 0.0;
         tixcoul = tiycoul = tizcoul = 0.0;
         tjxcoul = tjycoul = tjzcoul = 0.0;
 	
         if (rsq < cut_coulsq[itype][jtype]) {
 
           if (qtmp != 0.0 && q[j] != 0.0) {
             r3inv = r2inv*rinv;
             pre1 = qtmp*q[j]*r3inv;
 
             forcecoulx += pre1*delx;
             forcecouly += pre1*dely;
             forcecoulz += pre1*delz;
           }
 
           if (mu[i][3] > 0.0 && mu[j][3] > 0.0) { 
             r3inv = r2inv*rinv;
             r5inv = r3inv*r2inv;
             r7inv = r5inv*r2inv;
 
             pdotp = mu[i][0]*mu[j][0] + mu[i][1]*mu[j][1] + mu[i][2]*mu[j][2];
             pidotr = mu[i][0]*delx + mu[i][1]*dely + mu[i][2]*delz;
             pjdotr = mu[j][0]*delx + mu[j][1]*dely + mu[j][2]*delz;
 
             pre1 = 3.0*r5inv*pdotp - 15.0*r7inv*pidotr*pjdotr;
             pre2 = 3.0*r5inv*pjdotr;
             pre3 = 3.0*r5inv*pidotr;
             pre4 = -1.0*r3inv;
 
             forcecoulx += pre1*delx + pre2*mu[i][0] + pre3*mu[j][0];
             forcecouly += pre1*dely + pre2*mu[i][1] + pre3*mu[j][1];
             forcecoulz += pre1*delz + pre2*mu[i][2] + pre3*mu[j][2];
     
             crossx = pre4 * (mu[i][1]*mu[j][2] - mu[i][2]*mu[j][1]);
             crossy = pre4 * (mu[i][2]*mu[j][0] - mu[i][0]*mu[j][2]);
             crossz = pre4 * (mu[i][0]*mu[j][1] - mu[i][1]*mu[j][0]);
 
             tixcoul += crossx + pre2 * (mu[i][1]*delz - mu[i][2]*dely);
             tiycoul += crossy + pre2 * (mu[i][2]*delx - mu[i][0]*delz);
             tizcoul += crossz + pre2 * (mu[i][0]*dely - mu[i][1]*delx);
             tjxcoul += -crossx + pre3 * (mu[j][1]*delz - mu[j][2]*dely);
             tjycoul += -crossy + pre3 * (mu[j][2]*delx - mu[j][0]*delz);
             tjzcoul += -crossz + pre3 * (mu[j][0]*dely - mu[j][1]*delx);
           }
 
           if (mu[i][3] > 0.0 && q[j] != 0.0) { 
             r3inv = r2inv*rinv;
             r5inv = r3inv*r2inv;
             pidotr = mu[i][0]*delx + mu[i][1]*dely + mu[i][2]*delz;
             pre1 = 3.0*q[j]*r5inv * pidotr;
             pre2 = q[j]*r3inv;
 
             forcecoulx += pre2*mu[i][0] - pre1*delx;
             forcecouly += pre2*mu[i][1] - pre1*dely;
             forcecoulz += pre2*mu[i][2] - pre1*delz;
             tixcoul += pre2 * (mu[i][1]*delz - mu[i][2]*dely);
             tiycoul += pre2 * (mu[i][2]*delx - mu[i][0]*delz);
             tizcoul += pre2 * (mu[i][0]*dely - mu[i][1]*delx);
           }
 
           if (mu[j][3] > 0.0 && qtmp != 0.0) { 
             r3inv = r2inv*rinv;
             r5inv = r3inv*r2inv;
             pjdotr = mu[j][0]*delx + mu[j][1]*dely + mu[j][2]*delz;
             pre1 = 3.0*qtmp*r5inv * pjdotr;
             pre2 = qtmp*r3inv;
 
             forcecoulx += pre1*delx - pre2*mu[j][0];
             forcecouly += pre1*dely - pre2*mu[j][1];
             forcecoulz += pre1*delz - pre2*mu[j][2];
             tjxcoul += -pre2 * (mu[j][1]*delz - mu[j][2]*dely);
             tjycoul += -pre2 * (mu[j][2]*delx - mu[j][0]*delz);
             tjzcoul += -pre2 * (mu[j][0]*dely - mu[j][1]*delx);
           }
         }
 
         // LJ interaction
 
         if (rsq < cut_ljsq[itype][jtype]) {
           r6inv = r2inv*r2inv*r2inv;
           forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
           forcelj *= factor_lj * r2inv;
         } else forcelj = 0.0;
 	  
         // total force
 
         fq = factor_coul*qqrd2e;
         fx = fq*forcecoulx + delx*forcelj;
         fy = fq*forcecouly + dely*forcelj;
         fz = fq*forcecoulz + delz*forcelj;
 
         // force & torque accumulation
 
         f[i][0] += fx;
         f[i][1] += fy;
         f[i][2] += fz;
         torque[i][0] += fq*tixcoul;
         torque[i][1] += fq*tiycoul;
         torque[i][2] += fq*tizcoul;
 
         if (eflag) {
           if (rsq < cut_coulsq[itype][jtype]) {
             ecoul = qtmp*q[j]*rinv;
             if (mu[i][3] > 0.0 && mu[j][3] > 0.0)
               ecoul += r3inv*pdotp - 3.0*r5inv*pidotr*pjdotr;
             if (mu[i][3] > 0.0 && q[j] != 0.0) 
               ecoul += -q[j]*r3inv*pidotr;
             if (mu[j][3] > 0.0 && qtmp != 0.0)
               ecoul += qtmp*r3inv*pjdotr;
             ecoul *= factor_coul*qqrd2e;
           } else ecoul = 0.0;
 
           if (rsq < cut_ljsq[itype][jtype]) {
             evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) -
               offset[itype][jtype];
             evdwl *= factor_lj;
           } else evdwl = 0.0;
         }
 
         if (evflag) ev_tally_xyz_full(i,evdwl,ecoul,fx,fy,fz,delx,dely,delz);
       }
     }
   }
 }
diff --git a/src/GPU/pair_lj_cut_gpu.cpp b/src/GPU/pair_lj_cut_gpu.cpp
index 6d0556bb3..39c95df5b 100644
--- a/src/GPU/pair_lj_cut_gpu.cpp
+++ b/src/GPU/pair_lj_cut_gpu.cpp
@@ -1,241 +1,241 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Mike Brown (SNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "pair_lj_cut_gpu.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "integrate.h"
 #include "memory.h"
 #include "error.h"
 #include "neigh_request.h"
 #include "universe.h"
 #include "update.h"
 #include "domain.h"
 #include "string.h"
 #include "gpu_extra.h"
 
 using namespace LAMMPS_NS;
 
 // External functions from cuda library for atom decomposition
 
 int ljl_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
                  double **host_lj2, double **host_lj3, double **host_lj4,
                  double **offset, double *special_lj, const int nlocal,
                  const int nall, const int max_nbors, const int maxspecial,
                  const double cell_size, int &gpu_mode, FILE *screen);
 
 int ljl_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1,
                    double **host_lj2, double **host_lj3, double **host_lj4,
                    double **offset);
 
 void ljl_gpu_clear();
 int ** ljl_gpu_compute_n(const int ago, const int inum,
                          const int nall, double **host_x, int *host_type,
                          double *sublo, double *subhi, tagint *tag, int **nspecial,
                          tagint **special, const bool eflag, const bool vflag,
                          const bool eatom, const bool vatom, int &host_start,
                          int **ilist, int **jnum,
                          const double cpu_time, bool &success);
 void ljl_gpu_compute(const int ago, const int inum, const int nall,
                      double **host_x, int *host_type, int *ilist, int *numj,
                      int **firstneigh, const bool eflag, const bool vflag,
                      const bool eatom, const bool vatom, int &host_start,
                      const double cpu_time, bool &success);
 double ljl_gpu_bytes();
 
 /* ---------------------------------------------------------------------- */
 
 PairLJCutGPU::PairLJCutGPU(LAMMPS *lmp) : PairLJCut(lmp), gpu_mode(GPU_FORCE)
 {
   respa_enable = 0;
   cpu_time = 0.0;
   GPU_EXTRA::gpu_ready(lmp->modify, lmp->error);
 }
 
 /* ----------------------------------------------------------------------
    free all arrays
 ------------------------------------------------------------------------- */
 
 PairLJCutGPU::~PairLJCutGPU()
 {
   ljl_gpu_clear();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCutGPU::compute(int eflag, int vflag)
 {
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   int nall = atom->nlocal + atom->nghost;
   int inum, host_start;
   
   bool success = true;
   int *ilist, *numneigh, **firstneigh;
   if (gpu_mode != GPU_FORCE) {
     inum = atom->nlocal;
     firstneigh = ljl_gpu_compute_n(neighbor->ago, inum, nall,
                                    atom->x, atom->type, domain->sublo,
                                    domain->subhi, atom->tag, atom->nspecial,
                                    atom->special, eflag, vflag, eflag_atom,
                                    vflag_atom, host_start,
                                    &ilist, &numneigh, cpu_time, success);
   } else {
     inum = list->inum;
     ilist = list->ilist;
     numneigh = list->numneigh;
     firstneigh = list->firstneigh;
     ljl_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
                     ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
                     vflag_atom, host_start, cpu_time, success);
   }
   if (!success)
     error->one(FLERR,"Insufficient memory on accelerator");
 
   if (host_start<inum) {
     cpu_time = MPI_Wtime();
     cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
     cpu_time = MPI_Wtime() - cpu_time;
   }
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairLJCutGPU::init_style()
 {
   cut_respa = NULL;
 
   if (force->newton_pair)
     error->all(FLERR,"Cannot use newton pair with lj/cut/gpu pair style");
 
   // Repeat cutsq calculation because done after call to init_style
   double maxcut = -1.0;
   double cut;
   for (int i = 1; i <= atom->ntypes; i++) {
     for (int j = i; j <= atom->ntypes; j++) {
       if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
         cut = init_one(i,j);
         cut *= cut;
         if (cut > maxcut)
           maxcut = cut;
         cutsq[i][j] = cutsq[j][i] = cut;
       } else
         cutsq[i][j] = cutsq[j][i] = 0.0;
     }
   }
   double cell_size = sqrt(maxcut) + neighbor->skin;
 
   int maxspecial=0;
   if (atom->molecular)
     maxspecial=atom->maxspecial;
   int success = ljl_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4,
                              offset, force->special_lj, atom->nlocal,
                              atom->nlocal+atom->nghost, 300, maxspecial,
                              cell_size, gpu_mode, screen);
   GPU_EXTRA::check_flag(success,error,world);
 
   if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this);
+    int irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full = 1;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCutGPU::reinit()
 {
   Pair::reinit();
   
   ljl_gpu_reinit(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4, offset);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairLJCutGPU::memory_usage()
 {
   double bytes = Pair::memory_usage();
   return bytes + ljl_gpu_bytes();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCutGPU::cpu_compute(int start, int inum, int eflag, int vflag,
                                int *ilist, int *numneigh, int **firstneigh) {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
   double rsq,r2inv,r6inv,forcelj,factor_lj;
   int *jlist;
 
   double **x = atom->x;
   double **f = atom->f;
   int *type = atom->type;
   double *special_lj = force->special_lj;
 
   // loop over neighbors of my atoms
 
   for (ii = start; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r2inv = 1.0/rsq;
         r6inv = r2inv*r2inv*r2inv;
         forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
         fpair = factor_lj*forcelj*r2inv;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
 
         if (eflag) {
           evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) -
             offset[itype][jtype];
           evdwl *= factor_lj;
         }
 
         if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
       }
     }
   }
 }
diff --git a/src/GPU/pair_lj_expand_gpu.cpp b/src/GPU/pair_lj_expand_gpu.cpp
index 20185905e..1bafd362b 100644
--- a/src/GPU/pair_lj_expand_gpu.cpp
+++ b/src/GPU/pair_lj_expand_gpu.cpp
@@ -1,243 +1,243 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Inderaj Bains (NVIDIA), ibains@nvidia.com
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "pair_lj_expand_gpu.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "integrate.h"
 #include "memory.h"
 #include "error.h"
 #include "neigh_request.h"
 #include "universe.h"
 #include "update.h"
 #include "domain.h"
 #include "string.h"
 #include "gpu_extra.h"
 
 using namespace LAMMPS_NS;
 
 // External functions from cuda library for atom decomposition
 
 int lje_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
                  double **host_lj2, double **host_lj3, double **host_lj4,
                  double **offset, double **shift, double *special_lj,
                  const int nlocal, const int nall, const int max_nbors,
                  const int maxspecial, const double cell_size, int &gpu_mode,
                  FILE *screen);
 int lje_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1,
                    double **host_lj2, double **host_lj3, double **host_lj4,
                    double **offset, double **shift);
 void lje_gpu_clear();
 int ** lje_gpu_compute_n(const int ago, const int inum, const int nall,
                          double **host_x, int *host_type, double *sublo,
                          double *subhi, tagint *tag, int **nspecial,
                          tagint **special, const bool eflag, const bool vflag,
                          const bool eatom, const bool vatom, int &host_start,
                          int **ilist, int **jnum,
                          const double cpu_time, bool &success);
 void lje_gpu_compute(const int ago, const int inum, const int nall,
                      double **host_x, int *host_type, int *ilist, int *numj,
                      int **firstneigh, const bool eflag, const bool vflag,
                      const bool eatom, const bool vatom, int &host_start,
                      const double cpu_time, bool &success);
 double lje_gpu_bytes();
 
 /* ---------------------------------------------------------------------- */
 
 PairLJExpandGPU::PairLJExpandGPU(LAMMPS *lmp) : PairLJExpand(lmp), gpu_mode(GPU_FORCE)
 {
   respa_enable = 0;
   cpu_time = 0.0;
   GPU_EXTRA::gpu_ready(lmp->modify, lmp->error);
 }
 
 /* ----------------------------------------------------------------------
    free all arrays
 ------------------------------------------------------------------------- */
 
 PairLJExpandGPU::~PairLJExpandGPU()
 {
   lje_gpu_clear();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJExpandGPU::compute(int eflag, int vflag)
 {
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   int nall = atom->nlocal + atom->nghost;
   int inum, host_start;
 
   bool success = true;
   int *ilist, *numneigh, **firstneigh;
   if (gpu_mode != GPU_FORCE) {
     inum = atom->nlocal;
     firstneigh = lje_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
                                    atom->type, domain->sublo, domain->subhi,
                                    atom->tag, atom->nspecial, atom->special,
                                    eflag, vflag, eflag_atom, vflag_atom,
                                    host_start, &ilist, &numneigh, cpu_time,
                                    success);
   } else {
     inum = list->inum;
     ilist = list->ilist;
     numneigh = list->numneigh;
     firstneigh = list->firstneigh;
     lje_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
                     ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
                     vflag_atom, host_start, cpu_time, success);
   }
   if (!success)
     error->one(FLERR,"Insufficient memory on accelerator");
 
   if (host_start<inum) {
     cpu_time = MPI_Wtime();
     cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
     cpu_time = MPI_Wtime() - cpu_time;
   }
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairLJExpandGPU::init_style()
 {
   if (force->newton_pair)
     error->all(FLERR,"Cannot use newton pair with lj/expand/gpu pair style");
 
   // Repeat cutsq calculation because done after call to init_style
   double maxcut = -1.0;
   double cut;
   for (int i = 1; i <= atom->ntypes; i++) {
     for (int j = i; j <= atom->ntypes; j++) {
       if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
         cut = init_one(i,j);
         cut *= cut;
         if (cut > maxcut)
           maxcut = cut;
         cutsq[i][j] = cutsq[j][i] = cut;
       } else
         cutsq[i][j] = cutsq[j][i] = 0.0;
     }
   }
   double cell_size = sqrt(maxcut) + neighbor->skin;
 
   int maxspecial=0;
   if (atom->molecular)
     maxspecial=atom->maxspecial;
   int success = lje_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4,
                              offset, shift, force->special_lj, atom->nlocal,
                              atom->nlocal+atom->nghost, 300, maxspecial,
                              cell_size, gpu_mode, screen);
   GPU_EXTRA::check_flag(success,error,world);
 
   if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this);
+    int irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full = 1;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJExpandGPU::reinit()
 {
   Pair::reinit();
   
   lje_gpu_reinit(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4, offset, shift);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairLJExpandGPU::memory_usage()
 {
   double bytes = Pair::memory_usage();
   return bytes + lje_gpu_bytes();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJExpandGPU::cpu_compute(int start, int inum, int eflag, int vflag,
                                   int *ilist, int *numneigh, int **firstneigh)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
   double rsq,r2inv,r6inv,forcelj,factor_lj;
   double r,rshift,rshiftsq;
   int *jlist;
 
   double **x = atom->x;
   double **f = atom->f;
   int *type = atom->type;
   double *special_lj = force->special_lj;
 
   // loop over neighbors of my atoms
 
   for (ii = start; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r = sqrt(rsq);
         rshift = r - shift[itype][jtype];
         rshiftsq = rshift*rshift;
         r2inv = 1.0/rshiftsq;
         r6inv = r2inv*r2inv*r2inv;
         forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
         fpair = factor_lj*forcelj/rshift/r;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
 
         if (eflag) {
           evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) -
             offset[itype][jtype];
           evdwl *= factor_lj;
         }
 
         if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
       }
     }
   }
 }
diff --git a/src/GPU/pair_lj_gromacs_gpu.cpp b/src/GPU/pair_lj_gromacs_gpu.cpp
index 1b0a52a4c..ca71a4931 100644
--- a/src/GPU/pair_lj_gromacs_gpu.cpp
+++ b/src/GPU/pair_lj_gromacs_gpu.cpp
@@ -1,246 +1,246 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
    
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under 
    the GNU General Public License.
    
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Trung Dac Nguyen (ORNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "pair_lj_gromacs_gpu.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "integrate.h"
 #include "memory.h"
 #include "error.h"
 #include "neigh_request.h"
 #include "universe.h"
 #include "update.h"
 #include "domain.h"
 #include "string.h"
 #include "kspace.h"
 #include "gpu_extra.h"
 
 using namespace LAMMPS_NS;
 
 // External functions from cuda library for atom decomposition
 
 int ljgrm_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
                    double **host_lj2, double **host_lj3, double **host_lj4,
                    double *special_lj, const int inum,
                    const int nall, const int max_nbors, const int maxspecial,
                    const double cell_size, int &gpu_mode, FILE *screen,
                    double **host_ljsw1, double **host_ljsw2, double **host_ljsw3,
                    double **host_ljsw4, double **host_ljsw5, 
                    double **cut_inner, double **cut_innersq);
 void ljgrm_gpu_clear();
 int ** ljgrm_gpu_compute_n(const int ago, const int inum_full,
                            const int nall, double **host_x, int *host_type,
                            double *sublo, double *subhi, tagint *tag, int **nspecial,
                            tagint **special, const bool eflag, const bool vflag,
                            const bool eatom, const bool vatom, int &host_start,
                            int **ilist, int **jnum, const double cpu_time,
                            bool &success);
 void ljgrm_gpu_compute(const int ago, const int inum_full, const int nall,
                        double **host_x, int *host_type, int *ilist, int *numj,
                        int **firstneigh, const bool eflag, const bool vflag,
                        const bool eatom, const bool vatom, int &host_start,
                        const double cpu_time, bool &success);
 double ljgrm_gpu_bytes();
 
 /* ---------------------------------------------------------------------- */
 
 PairLJGromacsGPU::PairLJGromacsGPU(LAMMPS *lmp) : 
   PairLJGromacs(lmp), gpu_mode(GPU_FORCE)
 {
   respa_enable = 0;
   reinitflag = 0;
   cpu_time = 0.0;
   GPU_EXTRA::gpu_ready(lmp->modify, lmp->error); 
 }
 
 /* ----------------------------------------------------------------------
    free all arrays
 ------------------------------------------------------------------------- */
 
 PairLJGromacsGPU::~PairLJGromacsGPU()
 {
   ljgrm_gpu_clear();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJGromacsGPU::compute(int eflag, int vflag)
 {
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
   
   int nall = atom->nlocal + atom->nghost;
   int inum, host_start;
   
   bool success = true;
   int *ilist, *numneigh, **firstneigh;    
   if (gpu_mode != GPU_FORCE) {
     inum = atom->nlocal;
     firstneigh = ljgrm_gpu_compute_n(neighbor->ago, inum, nall,
                                      atom->x, atom->type, domain->sublo,
                                      domain->subhi, atom->tag, atom->nspecial,
                                      atom->special, eflag, vflag, eflag_atom,
                                      vflag_atom, host_start, &ilist,
                                      &numneigh, cpu_time, success);
   } else {
     inum = list->inum;
     ilist = list->ilist;
     numneigh = list->numneigh;
     firstneigh = list->firstneigh;
     ljgrm_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
                       ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
                       vflag_atom, host_start, cpu_time, success);
   }
   if (!success)
     error->one(FLERR,"Insufficient memory on accelerator");
 
   if (host_start<inum) {
     cpu_time = MPI_Wtime();
     cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
     cpu_time = MPI_Wtime() - cpu_time;
   }
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairLJGromacsGPU::init_style()
 {
   if (force->newton_pair) 
     error->all(FLERR,"Cannot use newton pair with lj/gromacs/gpu pair style");
 
   // Repeat cutsq calculation because done after call to init_style
   double maxcut = -1.0;
   double mcut;
   for (int i = 1; i <= atom->ntypes; i++) {
     for (int j = i; j <= atom->ntypes; j++) {
       if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
         mcut = init_one(i,j);
         mcut *= mcut;
         if (mcut > maxcut)
           maxcut = mcut;
         cutsq[i][j] = cutsq[j][i] = mcut;
       } else
         cutsq[i][j] = cutsq[j][i] = 0.0;
     }
   }
   double cell_size = sqrt(maxcut) + neighbor->skin;
 
   int maxspecial=0;
   if (atom->molecular)
     maxspecial=atom->maxspecial;
 
   int success = ljgrm_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4,
 			                         force->special_lj, atom->nlocal,
                                atom->nlocal+atom->nghost, 300, maxspecial,
                                cell_size, gpu_mode, screen, ljsw1, ljsw2, 
                                ljsw3, ljsw4, ljsw5, cut_inner, cut_inner_sq);
   GPU_EXTRA::check_flag(success,error,world);
 
   if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this);
+    int irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full = 1;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairLJGromacsGPU::memory_usage()
 {
   double bytes = Pair::memory_usage();
   return bytes + ljgrm_gpu_bytes();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJGromacsGPU::cpu_compute(int start, int inum, int eflag,
                                    int vflag, int *ilist,
                                    int *numneigh, int **firstneigh)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
   double rsq,r2inv,r6inv,forcelj,factor_lj;
   double r,t,fswitch,eswitch;
   int *jlist;
 
   double **x = atom->x;
   double **f = atom->f;
   int *type = atom->type;
   double *special_lj = force->special_lj;
 
   // loop over neighbors of my atoms
 
   for (ii = start; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r2inv = 1.0/rsq;
       	r6inv = r2inv*r2inv*r2inv;
         forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
       	if (rsq > cut_inner_sq[itype][jtype]) {
           r = sqrt(rsq); 
           t = r - cut_inner[itype][jtype];
        	  fswitch = r*t*t*(ljsw1[itype][jtype] + ljsw2[itype][jtype]*t);
 	        forcelj += fswitch;
         }
       	fpair = factor_lj*forcelj * r2inv;
 
       	f[i][0] += delx*fpair;
       	f[i][1] += dely*fpair;
       	f[i][2] += delz*fpair;
 
      	  if (eflag) {
       	  evdwl = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]);
       	  evdwl += ljsw5[itype][jtype];
           if (rsq > cut_inner_sq[itype][jtype]) {
             eswitch = t*t*t*(ljsw3[itype][jtype] + ljsw4[itype][jtype]*t);
             evdwl += eswitch;
           }
       	  evdwl *= factor_lj;
       	}
 
       	if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
       }
     }
   }
 }
diff --git a/src/GPU/pair_lj_sdk_coul_long_gpu.cpp b/src/GPU/pair_lj_sdk_coul_long_gpu.cpp
index 1d35384a7..426b5dfae 100644
--- a/src/GPU/pair_lj_sdk_coul_long_gpu.cpp
+++ b/src/GPU/pair_lj_sdk_coul_long_gpu.cpp
@@ -1,352 +1,352 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Mike Brown (SNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "pair_lj_sdk_coul_long_gpu.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "integrate.h"
 #include "memory.h"
 #include "error.h"
 #include "neigh_request.h"
 #include "universe.h"
 #include "update.h"
 #include "domain.h"
 #include "string.h"
 #include "kspace.h"
 #include "gpu_extra.h"
 
 #define EWALD_F   1.12837917
 #define EWALD_P   0.3275911
 #define A1        0.254829592
 #define A2       -0.284496736
 #define A3        1.421413741
 #define A4       -1.453152027
 #define A5        1.061405429
 
 using namespace LAMMPS_NS;
 
 // External functions from cuda library for atom decomposition
 
 int cmml_gpu_init(const int ntypes, double **cutsq, int **lj_type,
                   double **host_lj1, double **host_lj2, double **host_lj3,
                   double **host_lj4, double **offset, double *special_lj,
                   const int nlocal, const int nall, const int max_nbors,
                   const int maxspecial, const double cell_size, int &gpu_mode,
                   FILE *screen, double **host_cut_ljsq, double host_cut_coulsq,
                   double *host_special_coul, const double qqrd2e,
                   const double g_ewald);
 void cmml_gpu_clear();
 int ** cmml_gpu_compute_n(const int ago, const int inum, const int nall,
                           double **host_x, int *host_type, double *sublo,
                           double *subhi, tagint *tag, int **nspecial,
                           tagint **special, const bool eflag, const bool vflag,
                           const bool eatom, const bool vatom, int &host_start,
                           int **ilist, int **jnum, const double cpu_time,
                           bool &success, double *host_q, double *boxlo,
                           double *prd);
 void cmml_gpu_compute(const int ago, const int inum, const int nall,
                       double **host_x, int *host_type, int *ilist, int *numj,
                       int **firstneigh, const bool eflag, const bool vflag,
                       const bool eatom, const bool vatom, int &host_start,
                       const double cpu_time, bool &success, double *host_q,
                       const int nlocal, double *boxlo, double *prd);
 double cmml_gpu_bytes();
 
 #include "lj_sdk_common.h"
 
 
 using namespace LJSDKParms;
 
 /* ---------------------------------------------------------------------- */
 
 PairLJSDKCoulLongGPU::PairLJSDKCoulLongGPU(LAMMPS *lmp) :
   PairLJSDKCoulLong(lmp), gpu_mode(GPU_FORCE)
 {
   respa_enable = 0;
   reinitflag = 0;
   cpu_time = 0.0;
   GPU_EXTRA::gpu_ready(lmp->modify, lmp->error);
 }
 
 /* ----------------------------------------------------------------------
    free all arrays
 ------------------------------------------------------------------------- */
 
 PairLJSDKCoulLongGPU::~PairLJSDKCoulLongGPU()
 {
   cmml_gpu_clear();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJSDKCoulLongGPU::compute(int eflag, int vflag)
 {
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   int nall = atom->nlocal + atom->nghost;
   int inum, host_start;
 
   bool success = true;
   int *ilist, *numneigh, **firstneigh;
   if (gpu_mode != GPU_FORCE) {
     inum = atom->nlocal;
     firstneigh = cmml_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
                                     atom->type, domain->sublo, domain->subhi,
                                     atom->tag, atom->nspecial, atom->special,
                                     eflag, vflag, eflag_atom, vflag_atom,
                                     host_start, &ilist, &numneigh, cpu_time,
                                     success, atom->q, domain->boxlo,
                                     domain->prd);
   } else {
     inum = list->inum;
     ilist = list->ilist;
     numneigh = list->numneigh;
     firstneigh = list->firstneigh;
     cmml_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
                      ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
                      vflag_atom, host_start, cpu_time, success, atom->q,
                      atom->nlocal, domain->boxlo, domain->prd);
   }
   if (!success)
     error->one(FLERR,"Insufficient memory on accelerator");
 
   if (host_start<inum) {
     cpu_time = MPI_Wtime();
     if (evflag) {
       if (eflag) cpu_compute<1,1>(host_start, inum, ilist, numneigh, firstneigh);
       else cpu_compute<1,0>(host_start, inum, ilist, numneigh, firstneigh);
     } else cpu_compute<0,0>(host_start, inum, ilist, numneigh, firstneigh);
     cpu_time = MPI_Wtime() - cpu_time;
   }
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairLJSDKCoulLongGPU::init_style()
 {
   if (!atom->q_flag)
     error->all(FLERR,"Pair style lj/sdk/coul/long/gpu requires atom attribute q");
   if (force->newton_pair)
     error->all(FLERR,"Cannot use newton pair with lj/sdk/coul/long/gpu pair style");
 
   // Repeat cutsq calculation because done after call to init_style
   double maxcut = -1.0;
   double cut;
   for (int i = 1; i <= atom->ntypes; i++) {
     for (int j = i; j <= atom->ntypes; j++) {
       if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
         cut = init_one(i,j);
         cut *= cut;
         if (cut > maxcut)
           maxcut = cut;
         cutsq[i][j] = cutsq[j][i] = cut;
       } else
         cutsq[i][j] = cutsq[j][i] = 0.0;
     }
   }
   double cell_size = sqrt(maxcut) + neighbor->skin;
 
   cut_coulsq = cut_coul * cut_coul;
 
   // insure use of KSpace long-range solver, set g_ewald
 
   if (force->kspace == NULL)
     error->all(FLERR,"Pair style is incompatible with KSpace style");
   g_ewald = force->kspace->g_ewald;
 
   // setup force tables
 
   if (ncoultablebits) init_tables(cut_coul,NULL);
 
   int maxspecial=0;
   if (atom->molecular)
     maxspecial=atom->maxspecial;
   int success = cmml_gpu_init(atom->ntypes+1, cutsq, lj_type, lj1, lj2, lj3,
                               lj4, offset, force->special_lj, atom->nlocal,
                               atom->nlocal+atom->nghost, 300, maxspecial,
                               cell_size, gpu_mode, screen, cut_ljsq,
                               cut_coulsq, force->special_coul,
                               force->qqrd2e, g_ewald);
   GPU_EXTRA::check_flag(success,error,world);
 
   if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this);
+    int irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full = 1;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairLJSDKCoulLongGPU::memory_usage()
 {
   double bytes = Pair::memory_usage();
   return bytes + cmml_gpu_bytes();
 }
 
 /* ---------------------------------------------------------------------- */
 template <int EVFLAG, int EFLAG>
 void PairLJSDKCoulLongGPU::cpu_compute(int start, int inum, int *ilist,
                                        int *numneigh, int **firstneigh)
 {
   int i,j,ii,jj;
   double qtmp,xtmp,ytmp,ztmp;
   double r2inv,forcecoul,forcelj,factor_coul,factor_lj;
 
   const double * const * const x = atom->x;
   double * const * const f = atom->f;
   const double * const q = atom->q;
   const int * const type = atom->type;
   const double * const special_coul = force->special_coul;
   const double * const special_lj = force->special_lj;
   const double qqrd2e = force->qqrd2e;
   double fxtmp,fytmp,fztmp;
 
   // loop over neighbors of my atoms
 
   for (ii = start; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     fxtmp=fytmp=fztmp=0.0;
 
     const int itype = type[i];
     const int * const jlist = firstneigh[i];
     const int jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       const double delx = xtmp - x[j][0];
       const double dely = ytmp - x[j][1];
       const double delz = ztmp - x[j][2];
       const double rsq = delx*delx + dely*dely + delz*delz;
       const int jtype = type[j];
 
       double evdwl = 0.0;
       double ecoul = 0.0;
       double fpair = 0.0;
 
       if (rsq < cutsq[itype][jtype]) {
           r2inv = 1.0/rsq;
         const int ljt = lj_type[itype][jtype];
 
         if (rsq < cut_coulsq) {
           if (!ncoultablebits || rsq <= tabinnersq) {
             const double r = sqrt(rsq);
             const double grij = g_ewald * r;
             const double expm2 = exp(-grij*grij);
             const double t = 1.0 / (1.0 + EWALD_P*grij);
             const double erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
             const double prefactor = qqrd2e * qtmp*q[j]/r;
             forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
             if (EFLAG) ecoul = prefactor*erfc;
             if (factor_coul < 1.0) {
               forcecoul -= (1.0-factor_coul)*prefactor;
               if (EFLAG) ecoul -= (1.0-factor_coul)*prefactor;
             }
           } else {
             union_int_float_t rsq_lookup;
             rsq_lookup.f = rsq;
             int itable = rsq_lookup.i & ncoulmask;
             itable >>= ncoulshiftbits;
             const double fraction = (rsq_lookup.f - rtable[itable]) *
                                      drtable[itable];
             const double table = ftable[itable] + fraction*dftable[itable];
             forcecoul = qtmp*q[j] * table;
             if (EFLAG) {
               const double table2 = etable[itable] + fraction*detable[itable];
               ecoul = qtmp*q[j] * table2;
             }
             if (factor_coul < 1.0) {
               const double table2 = ctable[itable] + fraction*dctable[itable];
               const double prefactor = qtmp*q[j] * table2;
               forcecoul -= (1.0-factor_coul)*prefactor;
               if (EFLAG) ecoul -= (1.0-factor_coul)*prefactor;
             }
           }
         } else {
           forcecoul = 0.0;
           ecoul = 0.0;
         }
 
 
         if (rsq < cut_ljsq[itype][jtype]) {
 
           if (ljt == LJ12_4) {
             const double r4inv=r2inv*r2inv;
             forcelj = r4inv*(lj1[itype][jtype]*r4inv*r4inv
                              - lj2[itype][jtype]);
 
             if (EFLAG)
               evdwl = r4inv*(lj3[itype][jtype]*r4inv*r4inv
                              - lj4[itype][jtype]) - offset[itype][jtype];
 
           } else if (ljt == LJ9_6) {
             const double r3inv = r2inv*sqrt(r2inv);
             const double r6inv = r3inv*r3inv;
             forcelj = r6inv*(lj1[itype][jtype]*r3inv
                              - lj2[itype][jtype]);
             if (EFLAG)
               evdwl = r6inv*(lj3[itype][jtype]*r3inv
                              - lj4[itype][jtype]) - offset[itype][jtype];
 
           } else if (ljt == LJ12_6) {
             const double r6inv = r2inv*r2inv*r2inv;
             forcelj = r6inv*(lj1[itype][jtype]*r6inv
                              - lj2[itype][jtype]);
             if (EFLAG)
               evdwl = r6inv*(lj3[itype][jtype]*r6inv
                              - lj4[itype][jtype]) - offset[itype][jtype];
           }
 
           if (EFLAG) evdwl *= factor_lj;
 
         } else {
           forcelj=0.0;
           evdwl = 0.0;
         }
 
         fpair = (forcecoul + factor_lj*forcelj) * r2inv;
 
         fxtmp += delx*fpair;
         fytmp += dely*fpair;
         fztmp += delz*fpair;
 
         if (EVFLAG) ev_tally_full(i,evdwl,ecoul,fpair,delx,dely,delz);
       }
     }
     f[i][0] += fxtmp;
     f[i][1] += fytmp;
     f[i][2] += fztmp;
   }
 }
diff --git a/src/GPU/pair_lj_sdk_gpu.cpp b/src/GPU/pair_lj_sdk_gpu.cpp
index fab768254..e19860d60 100644
--- a/src/GPU/pair_lj_sdk_gpu.cpp
+++ b/src/GPU/pair_lj_sdk_gpu.cpp
@@ -1,262 +1,262 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Mike Brown (SNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "pair_lj_sdk_gpu.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "integrate.h"
 #include "memory.h"
 #include "error.h"
 #include "neigh_request.h"
 #include "universe.h"
 #include "update.h"
 #include "domain.h"
 #include "string.h"
 #include "gpu_extra.h"
 
 using namespace LAMMPS_NS;
 
 // External functions from cuda library for atom decomposition
 
 int cmm_gpu_init(const int ntypes, double **cutsq, int **cg_types,
                  double **host_lj1, double **host_lj2, double **host_lj3,
                  double **host_lj4, double **offset, double *special_lj,
                  const int nlocal, const int nall, const int max_nbors,
                  const int maxspecial, const double cell_size, int &gpu_mode,
                  FILE *screen);
 void cmm_gpu_clear();
 int ** cmm_gpu_compute_n(const int ago, const int inum, const int nall,
                          double **host_x, int *host_type, double *sublo,
                          double *subhi, tagint *tag, int **nspecial,
                          tagint **special, const bool eflag, const bool vflag,
                          const bool eatom, const bool vatom, int &host_start,
                          int **ilist, int **jnum,
                          const double cpu_time, bool &success);
 void cmm_gpu_compute(const int ago, const int inum, const int nall,
                      double **host_x, int *host_type, int *ilist, int *numj,
                      int **firstneigh, const bool eflag, const bool vflag,
                      const bool eatom, const bool vatom, int &host_start,
                      const double cpu_time, bool &success);
 double cmm_gpu_bytes();
 
 #include "lj_sdk_common.h"
 
 using namespace LJSDKParms;
 
 /* ---------------------------------------------------------------------- */
 
 PairLJSDKGPU::PairLJSDKGPU(LAMMPS *lmp) : PairLJSDK(lmp), gpu_mode(GPU_FORCE)
 {
   respa_enable = 0;
   reinitflag = 0;
   cpu_time = 0.0;
   GPU_EXTRA::gpu_ready(lmp->modify, lmp->error);
 }
 
 /* ----------------------------------------------------------------------
    free all arrays
 ------------------------------------------------------------------------- */
 
 PairLJSDKGPU::~PairLJSDKGPU()
 {
   cmm_gpu_clear();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJSDKGPU::compute(int eflag, int vflag)
 {
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   int nall = atom->nlocal + atom->nghost;
   int inum, host_start;
 
   bool success = true;
   int *ilist, *numneigh, **firstneigh;
   if (gpu_mode != GPU_FORCE) {
     inum = atom->nlocal;
     firstneigh = cmm_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
                                    atom->type, domain->sublo, domain->subhi,
                                    atom->tag, atom->nspecial, atom->special,
                                    eflag, vflag, eflag_atom, vflag_atom,
                                    host_start, &ilist, &numneigh, cpu_time,
                                    success);
   } else {
     inum = list->inum;
     ilist = list->ilist;
     numneigh = list->numneigh;
     firstneigh = list->firstneigh;
     cmm_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
                     ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
                     vflag_atom, host_start, cpu_time, success);
   }
   if (!success)
     error->one(FLERR,"Insufficient memory on accelerator");
 
   if (host_start<inum) {
     cpu_time = MPI_Wtime();
     if (evflag) {
       if (eflag) cpu_compute<1,1>(host_start, inum, ilist, numneigh, firstneigh);
       else cpu_compute<1,0>(host_start, inum, ilist, numneigh, firstneigh);
     } else cpu_compute<0,0>(host_start, inum, ilist, numneigh, firstneigh);
     cpu_time = MPI_Wtime() - cpu_time;
   }
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairLJSDKGPU::init_style()
 {
   if (force->newton_pair)
     error->all(FLERR,"Cannot use newton pair with lj/sdk/gpu pair style");
 
   // Repeat cutsq calculation because done after call to init_style
   double maxcut = -1.0;
   double cut;
   for (int i = 1; i <= atom->ntypes; i++) {
     for (int j = i; j <= atom->ntypes; j++) {
       if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
         cut = init_one(i,j);
         cut *= cut;
         if (cut > maxcut)
           maxcut = cut;
         cutsq[i][j] = cutsq[j][i] = cut;
       } else
         cutsq[i][j] = cutsq[j][i] = 0.0;
     }
   }
   double cell_size = sqrt(maxcut) + neighbor->skin;
 
   int maxspecial=0;
   if (atom->molecular)
     maxspecial=atom->maxspecial;
   int success = cmm_gpu_init(atom->ntypes+1,cutsq,lj_type,lj1,lj2,lj3,lj4,
                              offset, force->special_lj, atom->nlocal,
                              atom->nlocal+atom->nghost, 300, maxspecial,
                              cell_size, gpu_mode, screen);
   GPU_EXTRA::check_flag(success,error,world);
 
   if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this);
+    int irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full = 1;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairLJSDKGPU::memory_usage()
 {
   double bytes = Pair::memory_usage();
   return bytes + cmm_gpu_bytes();
 }
 
 /* ---------------------------------------------------------------------- */
 template <int EVFLAG, int EFLAG>
 void PairLJSDKGPU::cpu_compute(int start, int inum, int *ilist,
                                int *numneigh, int **firstneigh)
 {
   int i,j,ii,jj,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
   double rsq,r2inv,forcelj,factor_lj;
 
   const double * const * const x = atom->x;
   double * const * const f = atom->f;
   const int * const type = atom->type;
   const double * const special_lj = force->special_lj;
   double fxtmp,fytmp,fztmp;
   evdwl=0.0;
 
   // loop over neighbors of my atoms
 
   for (ii = start; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     fxtmp=fytmp=fztmp=0.0;
 
     const int itype = type[i];
     const int * const jlist = firstneigh[i];
     const int jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r2inv = 1.0/rsq;
         const int ljt = lj_type[itype][jtype];
 
         if (ljt == LJ12_4) {
           const double r4inv=r2inv*r2inv;
           forcelj = r4inv*(lj1[itype][jtype]*r4inv*r4inv
                            - lj2[itype][jtype]);
 
           if (EFLAG)
             evdwl = r4inv*(lj3[itype][jtype]*r4inv*r4inv
                            - lj4[itype][jtype]) - offset[itype][jtype];
 
         } else if (ljt == LJ9_6) {
           const double r3inv = r2inv*sqrt(r2inv);
           const double r6inv = r3inv*r3inv;
           forcelj = r6inv*(lj1[itype][jtype]*r3inv
                            - lj2[itype][jtype]);
           if (EFLAG)
             evdwl = r6inv*(lj3[itype][jtype]*r3inv
                            - lj4[itype][jtype]) - offset[itype][jtype];
 
         } else if (ljt == LJ12_6) {
           const double r6inv = r2inv*r2inv*r2inv;
           forcelj = r6inv*(lj1[itype][jtype]*r6inv
                           - lj2[itype][jtype]);
           if (EFLAG)
             evdwl = r6inv*(lj3[itype][jtype]*r6inv
                            - lj4[itype][jtype]) - offset[itype][jtype];
         } else continue;
 
         fpair = factor_lj*forcelj*r2inv;
 
         fxtmp += delx*fpair;
         fytmp += dely*fpair;
         fztmp += delz*fpair;
 
         if (EVFLAG) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
       }
     }
     f[i][0] += fxtmp;
     f[i][1] += fytmp;
     f[i][2] += fztmp;
   }
 }
diff --git a/src/GPU/pair_lj_sf_dipole_sf_gpu.cpp b/src/GPU/pair_lj_sf_dipole_sf_gpu.cpp
index c5db42665..81ec6c1d0 100755
--- a/src/GPU/pair_lj_sf_dipole_sf_gpu.cpp
+++ b/src/GPU/pair_lj_sf_dipole_sf_gpu.cpp
@@ -1,400 +1,400 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
    
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under 
    the GNU General Public License.
    
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Trung Dac Nguyen (ORNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "pair_lj_sf_dipole_sf_gpu.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "integrate.h"
 #include "memory.h"
 #include "error.h"
 #include "neigh_request.h"
 #include "universe.h"
 #include "update.h"
 #include "domain.h"
 #include "string.h"
 #include "gpu_extra.h"
 
 using namespace LAMMPS_NS;
 
 // External functions from cuda library for atom decomposition
 
 int dplsf_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
                    double **host_lj2, double **host_lj3, double **host_lj4, 
                    double *special_lj, const int nlocal, 
                    const int nall, const int max_nbors, const int maxspecial,
                    const double cell_size, int &gpu_mode, FILE *screen,
                    double **host_cut_ljsq, double **host_cut_coulsq,
                    double *host_special_coul, const double qqrd2e);
 void dplsf_gpu_clear();
 int ** dplsf_gpu_compute_n(const int ago, const int inum,
                            const int nall, double **host_x, int *host_type, 
                            double *sublo, double *subhi, tagint *tag, int **nspecial,
                            tagint **special, const bool eflag, const bool vflag,
                            const bool eatom, const bool vatom, int &host_start,
                            int **ilist, int **jnum, const double cpu_time,
                            bool &success, double *host_q, double **host_mu, 
                            double *boxlo, double *prd);
 void dplsf_gpu_compute(const int ago, const int inum,
                        const int nall, double **host_x, int *host_type,
                        int *ilist, int *numj, int **firstneigh,
                        const bool eflag, const bool vflag, const bool eatom,
                        const bool vatom, int &host_start, const double cpu_time,
                        bool &success, double *host_q, double **host_mu, const int nlocal,
                        double *boxlo, double *prd);
 double dplsf_gpu_bytes();
 
 /* ---------------------------------------------------------------------- */
 
 PairLJSFDipoleSFGPU::PairLJSFDipoleSFGPU(LAMMPS *lmp) : PairLJSFDipoleSF(lmp), 
   gpu_mode(GPU_FORCE)
 {
   respa_enable = 0;
   reinitflag = 0;
   cpu_time = 0.0;
   GPU_EXTRA::gpu_ready(lmp->modify, lmp->error); 
 }
 
 /* ----------------------------------------------------------------------
    free all arrays
 ------------------------------------------------------------------------- */
 
 PairLJSFDipoleSFGPU::~PairLJSFDipoleSFGPU()
 {
   dplsf_gpu_clear();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJSFDipoleSFGPU::compute(int eflag, int vflag)
 {
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
   
   int nall = atom->nlocal + atom->nghost;
   int inum, host_start;
   
   bool success = true;
   int *ilist, *numneigh, **firstneigh;  
   if (gpu_mode != GPU_FORCE) {
     inum = atom->nlocal;
     firstneigh = dplsf_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
                                      atom->type, domain->sublo, domain->subhi,
                                      atom->tag, atom->nspecial, atom->special,
                                      eflag, vflag, eflag_atom, vflag_atom,
                                      host_start, &ilist, &numneigh, cpu_time,
                                      success, atom->q, atom->mu, domain->boxlo, 
                                      domain->prd);
   } else {
     inum = list->inum;
     ilist = list->ilist;
     numneigh = list->numneigh;
     firstneigh = list->firstneigh;
     dplsf_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
                       ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
                       vflag_atom, host_start, cpu_time, success, atom->q,
                       atom->mu, atom->nlocal, domain->boxlo, domain->prd);
   }
   if (!success)
     error->one(FLERR,"Insufficient memory on accelerator");
 
   if (host_start<inum) {
     cpu_time = MPI_Wtime();
     cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
     cpu_time = MPI_Wtime() - cpu_time;
   }
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairLJSFDipoleSFGPU::init_style()
 {
   if (!atom->q_flag || !atom->mu_flag || !atom->torque_flag)
     error->all(FLERR,"Pair dipole/sf/gpu requires atom attributes q, mu, torque");
   
   if (force->newton_pair) 
     error->all(FLERR,"Cannot use newton pair with dipole/sf/gpu pair style");
 
   if (strcmp(update->unit_style,"electron") == 0)
     error->all(FLERR,"Cannot (yet) use 'electron' units with dipoles");
 
   // Repeat cutsq calculation because done after call to init_style
   double maxcut = -1.0;
   double cut;
   for (int i = 1; i <= atom->ntypes; i++) {
     for (int j = i; j <= atom->ntypes; j++) {
       if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
         cut = init_one(i,j);
         cut *= cut;
         if (cut > maxcut)
           maxcut = cut;
         cutsq[i][j] = cutsq[j][i] = cut;
       } else
         cutsq[i][j] = cutsq[j][i] = 0.0;
     }
   }
   double cell_size = sqrt(maxcut) + neighbor->skin;
 
   int maxspecial=0;
   if (atom->molecular)
     maxspecial=atom->maxspecial;
   int success = dplsf_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4,
                                force->special_lj, atom->nlocal,
                                atom->nlocal+atom->nghost, 300, maxspecial,
                                cell_size, gpu_mode, screen, cut_ljsq, cut_coulsq,
                                force->special_coul, force->qqrd2e);
   GPU_EXTRA::check_flag(success,error,world);
 
   if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this);
+    int irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full = 1;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairLJSFDipoleSFGPU::memory_usage()
 {
   double bytes = Pair::memory_usage();
   return bytes + dplsf_gpu_bytes();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJSFDipoleSFGPU::cpu_compute(int start, int inum, int eflag, int vflag,
                                   int *ilist, int *numneigh,
                                   int **firstneigh)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fx,fy,fz;
   double rsq,rinv,r2inv,r6inv,r3inv,r5inv;
   double forcecoulx,forcecouly,forcecoulz,crossx,crossy,crossz;
   double tixcoul,tiycoul,tizcoul,tjxcoul,tjycoul,tjzcoul;
   double fq,pdotp,pidotr,pjdotr,pre1,pre2,pre3,pre4;
   double forcelj,factor_coul,factor_lj;
   double presf,afac,bfac,pqfac,qpfac,forceljcut,forceljsf;
   double aforcecoulx,aforcecouly,aforcecoulz;
   double bforcecoulx,bforcecouly,bforcecoulz;
   double rcutlj2inv, rcutcoul2inv,rcutlj6inv;
   int *jlist;
 
   evdwl = ecoul = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   double **mu = atom->mu;
   double **torque = atom->torque;
   int *type = atom->type;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   double qqrd2e = force->qqrd2e;
 
 
   // loop over neighbors of my atoms
 
   for (ii = start; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r2inv = 1.0/rsq;
         rinv = sqrt(r2inv);
 
         // atom can have both a charge and dipole
         // i,j = charge-charge, dipole-dipole, dipole-charge, or charge-dipole
 
         forcecoulx = forcecouly = forcecoulz = 0.0;
         tixcoul = tiycoul = tizcoul = 0.0;
         tjxcoul = tjycoul = tjzcoul = 0.0;
 
         if (rsq < cut_coulsq[itype][jtype]) {
 
           if (qtmp != 0.0 && q[j] != 0.0) {
             pre1 = qtmp*q[j]*rinv*(r2inv-1.0/cut_coulsq[itype][jtype]);
 
             forcecoulx += pre1*delx;
             forcecouly += pre1*dely;
             forcecoulz += pre1*delz;
           }
 
           if (mu[i][3] > 0.0 && mu[j][3] > 0.0) { 
             r3inv = r2inv*rinv;
             r5inv = r3inv*r2inv;
             rcutcoul2inv=1.0/cut_coulsq[itype][jtype];
 
             pdotp = mu[i][0]*mu[j][0] + mu[i][1]*mu[j][1] + mu[i][2]*mu[j][2];
             pidotr = mu[i][0]*delx + mu[i][1]*dely + mu[i][2]*delz;
             pjdotr = mu[j][0]*delx + mu[j][1]*dely + mu[j][2]*delz;
       
             afac = 1.0 - rsq*rsq * rcutcoul2inv*rcutcoul2inv;
             pre1 = afac * ( pdotp - 3.0 * r2inv * pidotr * pjdotr );
             aforcecoulx = pre1*delx;
             aforcecouly = pre1*dely;
             aforcecoulz = pre1*delz;
 
             bfac = 1.0 - 4.0*rsq*sqrt(rsq)*rcutcoul2inv*sqrt(rcutcoul2inv) +
               3.0*rsq*rsq*rcutcoul2inv*rcutcoul2inv;
             presf = 2.0 * r2inv * pidotr * pjdotr;
             bforcecoulx = bfac * (pjdotr*mu[i][0]+pidotr*mu[j][0]-presf*delx);
             bforcecouly = bfac * (pjdotr*mu[i][1]+pidotr*mu[j][1]-presf*dely);
             bforcecoulz = bfac * (pjdotr*mu[i][2]+pidotr*mu[j][2]-presf*delz);
 	    
             forcecoulx += 3.0 * r5inv * ( aforcecoulx + bforcecoulx );
             forcecouly += 3.0 * r5inv * ( aforcecouly + bforcecouly );
             forcecoulz += 3.0 * r5inv * ( aforcecoulz + bforcecoulz );
 	    
             pre2 = 3.0 * bfac * r5inv * pjdotr;
             pre3 = 3.0 * bfac * r5inv * pidotr;
             pre4 = -bfac * r3inv;
 	    
             crossx = pre4 * (mu[i][1]*mu[j][2] - mu[i][2]*mu[j][1]);
             crossy = pre4 * (mu[i][2]*mu[j][0] - mu[i][0]*mu[j][2]);
             crossz = pre4 * (mu[i][0]*mu[j][1] - mu[i][1]*mu[j][0]);
 
             tixcoul += crossx + pre2 * (mu[i][1]*delz - mu[i][2]*dely);
             tiycoul += crossy + pre2 * (mu[i][2]*delx - mu[i][0]*delz);
             tizcoul += crossz + pre2 * (mu[i][0]*dely - mu[i][1]*delx);
             tjxcoul += -crossx + pre3 * (mu[j][1]*delz - mu[j][2]*dely);
             tjycoul += -crossy + pre3 * (mu[j][2]*delx - mu[j][0]*delz);
             tjzcoul += -crossz + pre3 * (mu[j][0]*dely - mu[j][1]*delx);
           }
 
           if (mu[i][3] > 0.0 && q[j] != 0.0) { 
             r3inv = r2inv*rinv;
             r5inv = r3inv*r2inv;
             pidotr = mu[i][0]*delx + mu[i][1]*dely + mu[i][2]*delz; 
             rcutcoul2inv=1.0/cut_coulsq[itype][jtype];
             pre1 = 3.0 * q[j] * r5inv * pidotr * (1-rsq*rcutcoul2inv);
             pqfac = 1.0 - 3.0*rsq*rcutcoul2inv + 
               2.0*rsq*sqrt(rsq)*rcutcoul2inv*sqrt(rcutcoul2inv);
             pre2 = q[j] * r3inv * pqfac;
 
             forcecoulx += pre2*mu[i][0] - pre1*delx;
             forcecouly += pre2*mu[i][1] - pre1*dely;
             forcecoulz += pre2*mu[i][2] - pre1*delz;
             tixcoul += pre2 * (mu[i][1]*delz - mu[i][2]*dely);
             tiycoul += pre2 * (mu[i][2]*delx - mu[i][0]*delz);
             tizcoul += pre2 * (mu[i][0]*dely - mu[i][1]*delx);
           }
 
           if (mu[j][3] > 0.0 && qtmp != 0.0) { 
             r3inv = r2inv*rinv;
             r5inv = r3inv*r2inv;
             pjdotr = mu[j][0]*delx + mu[j][1]*dely + mu[j][2]*delz;
             rcutcoul2inv=1.0/cut_coulsq[itype][jtype];
             pre1 = 3.0 * qtmp * r5inv * pjdotr * (1-rsq*rcutcoul2inv);
             qpfac = 1.0 - 3.0*rsq*rcutcoul2inv +
               2.0*rsq*sqrt(rsq)*rcutcoul2inv*sqrt(rcutcoul2inv);
             pre2 = qtmp * r3inv * qpfac;
 
             forcecoulx += pre1*delx - pre2*mu[j][0];
             forcecouly += pre1*dely - pre2*mu[j][1];
             forcecoulz += pre1*delz - pre2*mu[j][2];
             tjxcoul += -pre2 * (mu[j][1]*delz - mu[j][2]*dely);
             tjycoul += -pre2 * (mu[j][2]*delx - mu[j][0]*delz);
             tjzcoul += -pre2 * (mu[j][0]*dely - mu[j][1]*delx);
           } 
         }
 
         // LJ interaction
 
         if (rsq < cut_ljsq[itype][jtype]) {
           r6inv = r2inv*r2inv*r2inv;
           forceljcut = r6inv*(lj1[itype][jtype]*r6inv-lj2[itype][jtype])*r2inv;
 	  
           rcutlj2inv = 1.0 / cut_ljsq[itype][jtype];
           rcutlj6inv = rcutlj2inv * rcutlj2inv * rcutlj2inv;
           forceljsf = (lj1[itype][jtype]*rcutlj6inv - lj2[itype][jtype]) * 
           rcutlj6inv * rcutlj2inv;
 
           forcelj = factor_lj * (forceljcut - forceljsf);
         } else forcelj = 0.0;
 	  
         // total force
 
         fq = factor_coul*qqrd2e;
         fx = fq*forcecoulx + delx*forcelj;
         fy = fq*forcecouly + dely*forcelj;
         fz = fq*forcecoulz + delz*forcelj;
 	
         // force & torque accumulation
 
         f[i][0] += fx;
         f[i][1] += fy;
         f[i][2] += fz;
         torque[i][0] += fq*tixcoul;
         torque[i][1] += fq*tiycoul;
         torque[i][2] += fq*tizcoul;
 
         if (eflag) {
           if (rsq < cut_coulsq[itype][jtype]) {
             ecoul = qtmp*q[j]*rinv*
               pow((1.0-sqrt(rsq)/sqrt(cut_coulsq[itype][jtype])),2);
             if (mu[i][3] > 0.0 && mu[j][3] > 0.0)
               ecoul += bfac * (r3inv*pdotp - 3.0*r5inv*pidotr*pjdotr);
             if (mu[i][3] > 0.0 && q[j] != 0.0) 
               ecoul += -q[j]*r3inv * pqfac * pidotr;
             if (mu[j][3] > 0.0 && qtmp != 0.0)
               ecoul += qtmp*r3inv * qpfac * pjdotr;
             ecoul *= factor_coul*qqrd2e;
           } else ecoul = 0.0;
 
           if (rsq < cut_ljsq[itype][jtype]) {
             evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) +
               rcutlj6inv*(6*lj3[itype][jtype]*rcutlj6inv-3*lj4[itype][jtype])*
               rsq*rcutlj2inv +
               rcutlj6inv*(-7*lj3[itype][jtype]*rcutlj6inv+4*lj4[itype][jtype]);
             evdwl *= factor_lj;
           } else evdwl = 0.0; 
         } 
         
         if (evflag) ev_tally_xyz_full(i,evdwl,ecoul,
                                       fx,fy,fz,delx,dely,delz);
       }
     }
   }
 }
diff --git a/src/GPU/pair_mie_cut_gpu.cpp b/src/GPU/pair_mie_cut_gpu.cpp
index 1b0be6cbf..3a6334f85 100644
--- a/src/GPU/pair_mie_cut_gpu.cpp
+++ b/src/GPU/pair_mie_cut_gpu.cpp
@@ -1,230 +1,230 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
    
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under 
    the GNU General Public License.
    
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Trung Dac Nguyen (ORNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "pair_mie_cut_gpu.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "integrate.h"
 #include "memory.h"
 #include "error.h"
 #include "neigh_request.h"
 #include "universe.h"
 #include "update.h"
 #include "domain.h"
 #include "string.h"
 #include "gpu_extra.h"
 
 using namespace LAMMPS_NS;
 
 // External functions from cuda library for atom decomposition
 
 int mie_gpu_init(const int ntypes, double **cutsq, double **host_mie1,
                  double **host_mie2, double **host_mie3, double **host_mie4,
                  double **host_gamA, double **host_gamR, double **offset,
                  double *special_lj, const int nlocal,
                  const int nall, const int max_nbors, const int maxspecial,
                  const double cell_size, int &gpu_mode, FILE *screen);
 void mie_gpu_clear();
 int ** mie_gpu_compute_n(const int ago, const int inum,
                          const int nall, double **host_x, int *host_type, 
                          double *sublo, double *subhi, tagint *tag, int **nspecial,
                          tagint **special, const bool eflag, const bool vflag,
                          const bool eatom, const bool vatom, int &host_start,
                          int **ilist, int **jnum,
                          const double cpu_time, bool &success);
 void mie_gpu_compute(const int ago, const int inum, const int nall, 
                      double **host_x, int *host_type, int *ilist, int *numj,
                      int **firstneigh, const bool eflag, const bool vflag,
                      const bool eatom, const bool vatom, int &host_start,
                      const double cpu_time, bool &success);
 double mie_gpu_bytes();
 
 /* ---------------------------------------------------------------------- */
 
 PairMIECutGPU::PairMIECutGPU(LAMMPS *lmp) : PairMIECut(lmp), gpu_mode(GPU_FORCE)
 {
   respa_enable = 0;
   reinitflag = 0;
   cpu_time = 0.0;
   GPU_EXTRA::gpu_ready(lmp->modify, lmp->error); 
 }
 
 /* ----------------------------------------------------------------------
    free all arrays
 ------------------------------------------------------------------------- */
 
 PairMIECutGPU::~PairMIECutGPU()
 {
   mie_gpu_clear();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairMIECutGPU::compute(int eflag, int vflag)
 {
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
   
   int nall = atom->nlocal + atom->nghost;
   int inum, host_start;
   
   bool success = true;
   int *ilist, *numneigh, **firstneigh;
   if (gpu_mode != GPU_FORCE) {
     inum = atom->nlocal;
     firstneigh = mie_gpu_compute_n(neighbor->ago, inum, nall,
                                    atom->x, atom->type, domain->sublo,
                                    domain->subhi, atom->tag, atom->nspecial,
                                    atom->special, eflag, vflag, eflag_atom,
                                    vflag_atom, host_start,
                                    &ilist, &numneigh, cpu_time, success);
   } else {
     inum = list->inum;
     ilist = list->ilist;
     numneigh = list->numneigh;
     firstneigh = list->firstneigh;
     mie_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
                       ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
                       vflag_atom, host_start, cpu_time, success);
   }
   if (!success)
     error->one(FLERR,"Insufficient memory on accelerator");
 
   if (host_start<inum) {
     cpu_time = MPI_Wtime();
     cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
     cpu_time = MPI_Wtime() - cpu_time;
   }
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairMIECutGPU::init_style()
 {
   cut_respa = NULL;
   
   if (force->newton_pair) 
     error->all(FLERR,"Cannot use newton pair with mie/cut/gpu pair style");
 
   // Repeat cutsq calculation because done after call to init_style
   double maxcut = -1.0;
   double cut;
   for (int i = 1; i <= atom->ntypes; i++) {
     for (int j = i; j <= atom->ntypes; j++) {
       if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
         cut = init_one(i,j);
         cut *= cut;
         if (cut > maxcut)
           maxcut = cut;
         cutsq[i][j] = cutsq[j][i] = cut;
       } else
         cutsq[i][j] = cutsq[j][i] = 0.0;
     }
   }
   double cell_size = sqrt(maxcut) + neighbor->skin;
 
   int maxspecial=0;
   if (atom->molecular)
     maxspecial=atom->maxspecial;
   int success = mie_gpu_init(atom->ntypes+1, cutsq, mie1, mie2, mie3, mie4,
                              gamA, gamR, offset, force->special_lj, atom->nlocal,
                              atom->nlocal+atom->nghost, 300, maxspecial,
                              cell_size, gpu_mode, screen);
   GPU_EXTRA::check_flag(success,error,world);
 
   if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this);
+    int irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full = 1;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairMIECutGPU::memory_usage()
 {
   double bytes = Pair::memory_usage();
   return bytes + mie_gpu_bytes();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairMIECutGPU::cpu_compute(int start, int inum, int eflag, int vflag, 
                                int *ilist, int *numneigh, int **firstneigh) {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
   double rsq,r2inv,rgamR,rgamA,forcemie,factor_mie;
   int *jlist;
 
   double **x = atom->x;
   double **f = atom->f;
   int *type = atom->type;
   double *special_mie = force->special_lj;
 
   // loop over neighbors of my atoms
 
   for (ii = start; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_mie = special_mie[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r2inv = 1.0/rsq;
         rgamA = pow(r2inv,(gamA[itype][jtype]/2.0));
         rgamR = pow(r2inv,(gamR[itype][jtype]/2.0));
         forcemie =  (mie1[itype][jtype]*rgamR - mie2[itype][jtype]*rgamA);
         fpair = factor_mie*forcemie*r2inv;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
 
         if (eflag) {
           evdwl = (mie3[itype][jtype]*rgamR - mie4[itype][jtype]*rgamA) -
             offset[itype][jtype];
           evdwl *= factor_mie;
         }
 
         if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
       }
     }
   }
 }
diff --git a/src/GPU/pair_morse_gpu.cpp b/src/GPU/pair_morse_gpu.cpp
index feacd1d25..ad2b3b0b3 100644
--- a/src/GPU/pair_morse_gpu.cpp
+++ b/src/GPU/pair_morse_gpu.cpp
@@ -1,226 +1,226 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Mike Brown (SNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "pair_morse_gpu.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "integrate.h"
 #include "memory.h"
 #include "error.h"
 #include "neigh_request.h"
 #include "universe.h"
 #include "update.h"
 #include "domain.h"
 #include "string.h"
 #include "gpu_extra.h"
 
 using namespace LAMMPS_NS;
 
 // External functions from cuda library for atom decomposition
 
 int mor_gpu_init(const int ntypes, double **cutsq, double **host_morse1,
                  double **host_r0, double **host_alpha, double **host_d0,
                  double **offset, double *special_lj, const int nlocal,
                  const int nall, const int max_nbors, const int maxspecial,
                  const double cell_size, int &gpu_mode, FILE *screen);
 void mor_gpu_clear();
 int ** mor_gpu_compute_n(const int ago, const int inum,
                          const int nall, double **host_x, int *host_type,
                          double *sublo, double *subhi, tagint *tag, int **nspecial,
                          tagint **special, const bool eflag, const bool vflag,
                          const bool eatom, const bool vatom, int &host_start,
                          int **ilist, int **jnum,
                          const double cpu_time, bool &success);
 void mor_gpu_compute(const int ago, const int inum, const int nall,
                      double **host_x, int *host_type, int *ilist, int *numj,
                      int **firstneigh, const bool eflag, const bool vflag,
                      const bool eatom, const bool vatom, int &host_start,
                      const double cpu_time, bool &success);
 double mor_gpu_bytes();
 
 /* ---------------------------------------------------------------------- */
 
 PairMorseGPU::PairMorseGPU(LAMMPS *lmp) : PairMorse(lmp), gpu_mode(GPU_FORCE)
 {
   reinitflag = 0;
   cpu_time = 0.0;
   GPU_EXTRA::gpu_ready(lmp->modify, lmp->error);
 }
 
 /* ----------------------------------------------------------------------
    free all arrays
 ------------------------------------------------------------------------- */
 
 PairMorseGPU::~PairMorseGPU()
 {
   mor_gpu_clear();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairMorseGPU::compute(int eflag, int vflag)
 {
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   int nall = atom->nlocal + atom->nghost;
   int inum, host_start;
 
   bool success = true;
   int *ilist, *numneigh, **firstneigh;
   if (gpu_mode != GPU_FORCE) {
     inum = atom->nlocal;
     firstneigh = mor_gpu_compute_n(neighbor->ago, inum, nall,
                                    atom->x, atom->type, domain->sublo,
                                    domain->subhi, atom->tag, atom->nspecial,
                                    atom->special, eflag, vflag, eflag_atom,
                                    vflag_atom, host_start, &ilist, &numneigh,
                                    cpu_time, success);
   } else {
     inum = list->inum;
     ilist = list->ilist;
     numneigh = list->numneigh;
     firstneigh = list->firstneigh;
     mor_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
                     ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
                     vflag_atom, host_start, cpu_time, success);
   }
   if (!success)
     error->one(FLERR,"Insufficient memory on accelerator");
 
   if (host_start<inum) {
     cpu_time = MPI_Wtime();
     cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
     cpu_time = MPI_Wtime() - cpu_time;
   }
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairMorseGPU::init_style()
 {
   if (force->newton_pair)
     error->all(FLERR,"Cannot use newton pair with morse/gpu pair style");
 
   // Repeat cutsq calculation because done after call to init_style
   double maxcut = -1.0;
   double cut;
   for (int i = 1; i <= atom->ntypes; i++) {
     for (int j = i; j <= atom->ntypes; j++) {
       if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
         cut = init_one(i,j);
         cut *= cut;
         if (cut > maxcut)
           maxcut = cut;
         cutsq[i][j] = cutsq[j][i] = cut;
       } else
         cutsq[i][j] = cutsq[j][i] = 0.0;
     }
   }
   double cell_size = sqrt(maxcut) + neighbor->skin;
 
   int maxspecial=0;
   if (atom->molecular)
     maxspecial=atom->maxspecial;
   int success = mor_gpu_init(atom->ntypes+1, cutsq, morse1, r0, alpha, d0,
                              offset, force->special_lj, atom->nlocal,
                              atom->nlocal+atom->nghost, 300, maxspecial,
                              cell_size, gpu_mode, screen);
   GPU_EXTRA::check_flag(success,error,world);
 
   if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this);
+    int irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full = 1;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairMorseGPU::memory_usage()
 {
   double bytes = Pair::memory_usage();
   return bytes + mor_gpu_bytes();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairMorseGPU::cpu_compute(int start, int inum, int eflag, int vflag,
                                int *ilist, int *numneigh, int **firstneigh)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
   double rsq,r,dr,dexp,factor_lj;
   int *jlist;
 
   double **x = atom->x;
   double **f = atom->f;
   int *type = atom->type;
   double *special_lj = force->special_lj;
 
   // loop over neighbors of my atoms
 
   for (ii = start; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r = sqrt(rsq);
         dr = r - r0[itype][jtype];
         dexp = exp(-alpha[itype][jtype] * dr);
         fpair = factor_lj * morse1[itype][jtype] * (dexp*dexp - dexp) / r;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
 
         if (eflag) {
           evdwl = d0[itype][jtype] * (dexp*dexp - 2.0*dexp) -
             offset[itype][jtype];
           evdwl *= factor_lj;
         }
 
         if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
       }
     }
   }
 }
diff --git a/src/GPU/pair_resquared_gpu.cpp b/src/GPU/pair_resquared_gpu.cpp
index e500785d4..f26883376 100644
--- a/src/GPU/pair_resquared_gpu.cpp
+++ b/src/GPU/pair_resquared_gpu.cpp
@@ -1,317 +1,317 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Mike Brown (SNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "pair_resquared_gpu.h"
 #include "math_extra.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "atom_vec_ellipsoid.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "integrate.h"
 #include "memory.h"
 #include "error.h"
 #include "neigh_request.h"
 #include "universe.h"
 #include "domain.h"
 #include "update.h"
 #include "string.h"
 #include "gpu_extra.h"
 
 using namespace LAMMPS_NS;
 
 // External functions from cuda library for atom decomposition
 
 int re_gpu_init(const int ntypes, double **shape, double **well,
                 double **cutsq, double **sigma, double **epsilon,
                 int **form, double **host_lj1,
                 double **host_lj2, double **host_lj3, double **host_lj4,
                 double **offset, double *special_lj, const int nlocal,
                 const int nall,        const int max_nbors, const int maxspecial,
                 const double cell_size,        int &gpu_mode, FILE *screen);
 void re_gpu_clear();
 int ** re_gpu_compute_n(const int ago, const int inum, const int nall,
                         double **host_x, int *host_type, double *sublo,
                         double *subhi, tagint *tag, int **nspecial, tagint **special,
                         const bool eflag, const bool vflag,
                         const bool eatom, const bool vatom, int &host_start,
                         int **ilist, int **jnum, const double cpu_time,
                         bool &success, double **host_quat);
 int * re_gpu_compute(const int ago, const int inum, const int nall,
                      double **host_x, int *host_type, int *ilist, int *numj,
                      int **firstneigh, const bool eflag, const bool vflag,
                      const bool eatom, const bool vatom, int &host_start,
                      const double cpu_time, bool &success, double **host_quat);
 double re_gpu_bytes();
 
 enum{SPHERE_SPHERE,SPHERE_ELLIPSE,ELLIPSE_SPHERE,ELLIPSE_ELLIPSE};
 
 /* ---------------------------------------------------------------------- */
 
 PairRESquaredGPU::PairRESquaredGPU(LAMMPS *lmp) : PairRESquared(lmp),
                                                 gpu_mode(GPU_FORCE)
 {
   reinitflag = 0;
   avec = (AtomVecEllipsoid *) atom->style_match("ellipsoid");
   if (!avec)
     error->all(FLERR,"Pair resquared/gpu requires atom style ellipsoid");
   quat_nmax = 0;
   quat = NULL;
   GPU_EXTRA::gpu_ready(lmp->modify, lmp->error);
 }
 
 /* ----------------------------------------------------------------------
    free all arrays
 ------------------------------------------------------------------------- */
 
 PairRESquaredGPU::~PairRESquaredGPU()
 {
   re_gpu_clear();
   cpu_time = 0.0;
   memory->destroy(quat);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairRESquaredGPU::compute(int eflag, int vflag)
 {
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   int nall = atom->nlocal + atom->nghost;
   int inum, host_start;
 
   bool success = true;
   int *ilist, *numneigh, **firstneigh;
 
   if (nall > quat_nmax) {
     quat_nmax = static_cast<int>(1.1 * nall);
     memory->grow(quat, quat_nmax, 4, "pair:quat");
   }
   AtomVecEllipsoid::Bonus *bonus = avec->bonus;
   int *ellipsoid = atom->ellipsoid;
   for (int i=0; i<nall; i++) {
     int qi = ellipsoid[i];
     if (qi > -1) {
       quat[i][0] = bonus[qi].quat[0];
       quat[i][1] = bonus[qi].quat[1];
       quat[i][2] = bonus[qi].quat[2];
       quat[i][3] = bonus[qi].quat[3];
     }
   }
 
   if (gpu_mode != GPU_FORCE) {
     inum = atom->nlocal;
     firstneigh = re_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
                                   atom->type, domain->sublo, domain->subhi,
                                   atom->tag, atom->nspecial, atom->special,
                                   eflag, vflag, eflag_atom, vflag_atom,
                                   host_start, &ilist, &numneigh, cpu_time,
                                   success, quat);
   } else {
     inum = list->inum;
     numneigh = list->numneigh;
     firstneigh = list->firstneigh;
     ilist = re_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
                            list->ilist, numneigh, firstneigh, eflag, vflag,
                            eflag_atom, vflag_atom, host_start,
                            cpu_time, success, quat);
   }
   if (!success)
     error->one(FLERR,"Insufficient memory on accelerator");
 
   if (host_start < inum) {
     cpu_time = MPI_Wtime();
     cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
     cpu_time = MPI_Wtime() - cpu_time;
   }
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairRESquaredGPU::init_style()
 {
   if (force->newton_pair)
     error->all(FLERR,"Cannot use newton pair with resquared/gpu pair style");
   if (!atom->ellipsoid_flag)
     error->all(FLERR,"Pair resquared/gpu requires atom style ellipsoid");
 
   // per-type shape precalculations
   // require that atom shapes are identical within each type
   // if shape = 0 for point particle, set shape = 1 as required by Gay-Berne
 
   for (int i = 1; i <= atom->ntypes; i++) {
     if (!atom->shape_consistency(i,shape1[i][0],shape1[i][1],shape1[i][2]))
       error->all(FLERR,"Pair resquared/gpu requires atoms with same type have same shape");
     if (setwell[i]) {
       shape2[i][0] = shape1[i][0]*shape1[i][0];
       shape2[i][1] = shape1[i][1]*shape1[i][1];
       shape2[i][2] = shape1[i][2]*shape1[i][2];
       lshape[i] = shape1[i][0]*shape1[i][1]*shape1[i][2];
     }
   }
 
   // Repeat cutsq calculation because done after call to init_style
   double maxcut = -1.0;
   double cut;
   for (int i = 1; i <= atom->ntypes; i++) {
     for (int j = i; j <= atom->ntypes; j++) {
       if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
         cut = init_one(i,j);
         cut *= cut;
         if (cut > maxcut)
           maxcut = cut;
         cutsq[i][j] = cutsq[j][i] = cut;
       } else
         cutsq[i][j] = cutsq[j][i] = 0.0;
     }
   }
 
   double cell_size = sqrt(maxcut) + neighbor->skin;
 
   int maxspecial=0;
   if (atom->molecular)
     maxspecial=atom->maxspecial;
   int success = re_gpu_init(atom->ntypes+1, shape1, well, cutsq, sigma,
                             epsilon, form, lj1, lj2, lj3, lj4, offset,
                             force->special_lj, atom->nlocal,
                             atom->nlocal+atom->nghost, 300, maxspecial,
                             cell_size, gpu_mode, screen);
   GPU_EXTRA::check_flag(success,error,world);
 
   if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this);
+    int irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full = 1;
   }
   quat_nmax = static_cast<int>(1.1 * (atom->nlocal + atom->nghost));
   memory->grow(quat, quat_nmax, 4, "pair:quat");
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairRESquaredGPU::memory_usage()
 {
   double bytes = Pair::memory_usage();
   return bytes + memory->usage(quat,quat_nmax)+re_gpu_bytes();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairRESquaredGPU::cpu_compute(int start, int inum, int eflag, int vflag,
                                   int *ilist, int *numneigh, int **firstneigh)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double evdwl,one_eng,rsq,r2inv,r6inv,forcelj,factor_lj;
   double fforce[3],ttor[3],rtor[3],r12[3];
   int *jlist;
   RE2Vars wi,wj;
 
   double **x = atom->x;
   double **f = atom->f;
   double **tor = atom->torque;
   int *type = atom->type;
   double *special_lj = force->special_lj;
 
   // loop over neighbors of my atoms
 
   for (ii = start; ii < inum; ii++) {
     i = ilist[ii];
     itype = type[i];
 
     // not a LJ sphere
 
     if (lshape[itype] != 0.0) precompute_i(i,wi);
 
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       j &= NEIGHMASK;
 
       // r12 = center to center vector
 
       r12[0] = x[j][0]-x[i][0];
       r12[1] = x[j][1]-x[i][1];
       r12[2] = x[j][2]-x[i][2];
       rsq = MathExtra::dot3(r12,r12);
       jtype = type[j];
 
       // compute if less than cutoff
 
       if (rsq < cutsq[itype][jtype]) {
         switch (form[itype][jtype]) {
 
          case SPHERE_SPHERE:
           r2inv = 1.0/rsq;
           r6inv = r2inv*r2inv*r2inv;
           forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
           forcelj *= -r2inv;
           if (eflag) one_eng =
               r6inv*(r6inv*lj3[itype][jtype]-lj4[itype][jtype]) -
               offset[itype][jtype];
           fforce[0] = r12[0]*forcelj;
           fforce[1] = r12[1]*forcelj;
           fforce[2] = r12[2]*forcelj;
           break;
 
          case SPHERE_ELLIPSE:
           precompute_i(j,wj);
           one_eng = resquared_lj(j,i,wj,r12,rsq,fforce,rtor,false);
           break;
 
          case ELLIPSE_SPHERE:
           one_eng = resquared_lj(i,j,wi,r12,rsq,fforce,ttor,true);
           tor[i][0] += ttor[0]*factor_lj;
           tor[i][1] += ttor[1]*factor_lj;
           tor[i][2] += ttor[2]*factor_lj;
           break;
 
          default:
           precompute_i(j,wj);
           one_eng = resquared_analytic(i,j,wi,wj,r12,rsq,fforce,ttor,rtor);
           tor[i][0] += ttor[0]*factor_lj;
           tor[i][1] += ttor[1]*factor_lj;
           tor[i][2] += ttor[2]*factor_lj;
 
          break;
         }
 
         fforce[0] *= factor_lj;
         fforce[1] *= factor_lj;
         fforce[2] *= factor_lj;
         f[i][0] += fforce[0];
         f[i][1] += fforce[1];
         f[i][2] += fforce[2];
 
         if (eflag) evdwl = factor_lj*one_eng;
 
         if (evflag) ev_tally_xyz_full(i,evdwl,0.0,fforce[0],fforce[1],
                                       fforce[2],-r12[0],-r12[1],-r12[2]);
       }
     }
   }
 }
diff --git a/src/GPU/pair_soft_gpu.cpp b/src/GPU/pair_soft_gpu.cpp
index 71729cf3d..b36d5d27b 100644
--- a/src/GPU/pair_soft_gpu.cpp
+++ b/src/GPU/pair_soft_gpu.cpp
@@ -1,237 +1,237 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
    
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under 
    the GNU General Public License.
    
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Trung Dac Nguyen (ORNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "pair_soft_gpu.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "integrate.h"
 #include "memory.h"
 #include "error.h"
 #include "neigh_request.h"
 #include "universe.h"
 #include "update.h"
 #include "domain.h"
 #include "string.h"
 #include "gpu_extra.h"
 #include "math_const.h"
 
 using namespace LAMMPS_NS;
 
 // External functions from cuda library for atom decomposition
 
 int soft_gpu_init(const int ntypes, double **cutsq, double **prefactor,
                    double **cut, double *special_lj, const int nlocal,
                    const int nall, const int max_nbors, const int maxspecial,
                    const double cell_size, int &gpu_mode, FILE *screen);
 void soft_gpu_reinit(const int ntypes, double **cutsq, double **host_prefactor,
                      double **host_cut);
 void soft_gpu_clear();
 int ** soft_gpu_compute_n(const int ago, const int inum,
                            const int nall, double **host_x, int *host_type, 
                            double *sublo, double *subhi, tagint *tag, int **nspecial,
                            tagint **special, const bool eflag, const bool vflag,
                            const bool eatom, const bool vatom, int &host_start,
                            int **ilist, int **jnum,
                            const double cpu_time, bool &success);
 void soft_gpu_compute(const int ago, const int inum, const int nall, 
                        double **host_x, int *host_type, int *ilist, int *numj,
                        int **firstneigh, const bool eflag, const bool vflag,
                        const bool eatom, const bool vatom, int &host_start,
                        const double cpu_time, bool &success);
 double soft_gpu_bytes();
 
 
 using namespace MathConst;
 
 /* ---------------------------------------------------------------------- */
 
 PairSoftGPU::PairSoftGPU(LAMMPS *lmp) : PairSoft(lmp), gpu_mode(GPU_FORCE)
 {
   respa_enable = 0;
   cpu_time = 0.0;
   GPU_EXTRA::gpu_ready(lmp->modify, lmp->error); 
 }
 
 /* ----------------------------------------------------------------------
    free all arrays
 ------------------------------------------------------------------------- */
 
 PairSoftGPU::~PairSoftGPU()
 {
   soft_gpu_clear();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairSoftGPU::compute(int eflag, int vflag)
 {
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
   
   int nall = atom->nlocal + atom->nghost;
   int inum, host_start;
   
   bool success = true;
   int *ilist, *numneigh, **firstneigh;
   if (gpu_mode != GPU_FORCE) {
     inum = atom->nlocal;
     firstneigh = soft_gpu_compute_n(neighbor->ago, inum, nall,
                                      atom->x, atom->type, domain->sublo,
                                      domain->subhi, atom->tag, atom->nspecial,
                                      atom->special, eflag, vflag, eflag_atom,
                                      vflag_atom, host_start, 
                                      &ilist, &numneigh, cpu_time, success);
   } else {
     inum = list->inum;
     ilist = list->ilist;
     numneigh = list->numneigh;
     firstneigh = list->firstneigh;
     soft_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
                       ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
                       vflag_atom, host_start, cpu_time, success);
   }
   if (!success)
     error->one(FLERR,"Insufficient memory on accelerator");
 
   if (host_start<inum) {
     cpu_time = MPI_Wtime();
     cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
     cpu_time = MPI_Wtime() - cpu_time;
   }
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairSoftGPU::init_style()
 {
   if (force->newton_pair) 
     error->all(FLERR,"Cannot use newton pair with soft/gpu pair style");
 
   // Repeat cutsq calculation because done after call to init_style
   double maxcut = -1.0;
   double mcut;
   for (int i = 1; i <= atom->ntypes; i++) {
     for (int j = i; j <= atom->ntypes; j++) {
       if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
         mcut = init_one(i,j);
         mcut *= mcut;
         if (mcut > maxcut)
           maxcut = mcut;
         cutsq[i][j] = cutsq[j][i] = mcut;
       } else
         cutsq[i][j] = cutsq[j][i] = 0.0;
     }
   }
   double cell_size = sqrt(maxcut) + neighbor->skin;
 
   int maxspecial=0;
   if (atom->molecular)
     maxspecial=atom->maxspecial;
   int success = soft_gpu_init(atom->ntypes+1, cutsq, prefactor, cut,
                               force->special_lj, atom->nlocal,
                               atom->nlocal+atom->nghost, 300, maxspecial,
                               cell_size, gpu_mode, screen);
   GPU_EXTRA::check_flag(success,error,world);
 
   if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this);
+    int irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full = 1;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairSoftGPU::reinit()
 {
   Pair::reinit();
   
   soft_gpu_reinit(atom->ntypes+1, cutsq, prefactor, cut);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairSoftGPU::memory_usage()
 {
   double bytes = Pair::memory_usage();
   return bytes + soft_gpu_bytes();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairSoftGPU::cpu_compute(int start, int inum, int eflag, int vflag, 
                                int *ilist, int *numneigh, int **firstneigh) {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
   double r,rsq,arg,factor_lj;
   int *jlist;
 
   double **x = atom->x;
   double **f = atom->f;
   int *type = atom->type;
   double *special_lj = force->special_lj;
 
   // loop over neighbors of my atoms
 
   for (ii = start; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r = sqrt(rsq);
         arg = MY_PI*r/cut[itype][jtype];
         if (r > 0.0) fpair = factor_lj * prefactor[itype][jtype] *
                        sin(arg) * MY_PI/cut[itype][jtype]/r;
         else fpair = 0.0;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
 
         if (eflag)
           evdwl = factor_lj * prefactor[itype][jtype] * (1.0+cos(arg));
 
         if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
       }
     }
   }
 }
diff --git a/src/GPU/pair_sw_gpu.cpp b/src/GPU/pair_sw_gpu.cpp
index 1a395f287..70e271122 100644
--- a/src/GPU/pair_sw_gpu.cpp
+++ b/src/GPU/pair_sw_gpu.cpp
@@ -1,226 +1,226 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Mike Brown (ORNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_sw_gpu.h"
 #include "atom.h"
 #include "neighbor.h"
 #include "neigh_request.h"
 #include "force.h"
 #include "comm.h"
 #include "memory.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "memory.h"
 #include "error.h"
 #include "domain.h"
 #include "gpu_extra.h"
 
 using namespace LAMMPS_NS;
 
 // External functions from cuda library for atom decomposition
 
 int sw_gpu_init(const int ntypes, const int inum, const int nall, const int max_nbors, 
                 const double cell_size, int &gpu_mode, FILE *screen,
                 int* host_map, const int nelements, int*** host_elem2param, const int nparams,
                 const double* sw_epsilon, const double* sw_sigma,
                 const double* sw_lambda, const double* sw_gamma,
                 const double* sw_costheta, const double* sw_biga,
                 const double* sw_bigb, const double* sw_powerp,
                 const double* sw_powerq, const double* sw_cut, 
                 const double* sw_cutsq);
 void sw_gpu_clear();
 int ** sw_gpu_compute_n(const int ago, const int inum,
                         const int nall, double **host_x, int *host_type,
                         double *sublo, double *subhi, tagint *tag, int **nspecial,
                         tagint **special, const bool eflag, const bool vflag,
                         const bool eatom, const bool vatom, int &host_start,
                         int **ilist, int **jnum,
                         const double cpu_time, bool &success);
 void sw_gpu_compute(const int ago, const int nloc, const int nall, const int ln,
                     double **host_x, int *host_type, int *ilist, int *numj,
                     int **firstneigh, const bool eflag, const bool vflag,
                     const bool eatom, const bool vatom, int &host_start,
                     const double cpu_time, bool &success);
 double sw_gpu_bytes();
 extern double lmp_gpu_forces(double **f, double **tor, double *eatom,
                              double **vatom, double *virial, double &ecoul);
 
 #define MAXLINE 1024
 #define DELTA 4
 
 /* ---------------------------------------------------------------------- */
 
 PairSWGPU::PairSWGPU(LAMMPS *lmp) : PairSW(lmp), gpu_mode(GPU_FORCE)
 {
   cpu_time = 0.0;
   reinitflag = 0;
   GPU_EXTRA::gpu_ready(lmp->modify, lmp->error);
 
   cutghost = NULL;
   ghostneigh = 1;
 }
 
 /* ----------------------------------------------------------------------
    check if allocated, since class can be destructed when incomplete
 ------------------------------------------------------------------------- */
 
 PairSWGPU::~PairSWGPU()
 {
   sw_gpu_clear();
   if (allocated)
     memory->destroy(cutghost);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairSWGPU::compute(int eflag, int vflag)
 {
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   int nall = atom->nlocal + atom->nghost;
   int inum, host_start;
 
   bool success = true;
   int *ilist, *numneigh, **firstneigh;
   if (gpu_mode != GPU_FORCE) {
     inum = atom->nlocal;
     firstneigh = sw_gpu_compute_n(neighbor->ago, inum, nall,
                                    atom->x, atom->type, domain->sublo,
                                    domain->subhi, atom->tag, atom->nspecial,
                                    atom->special, eflag, vflag, eflag_atom,
                                    vflag_atom, host_start,
                                    &ilist, &numneigh, cpu_time, success);
   } else {
     inum = list->inum;
     ilist = list->ilist;
     numneigh = list->numneigh;
     firstneigh = list->firstneigh;
 
     sw_gpu_compute(neighbor->ago, atom->nlocal, nall, inum+list->gnum,
                    atom->x, atom->type, ilist, numneigh, firstneigh, eflag, 
                    vflag, eflag_atom, vflag_atom, host_start, cpu_time, 
                    success);
   }
   if (!success)
     error->one(FLERR,"Insufficient memory on accelerator");
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairSWGPU::allocate()
 {
   PairSW::allocate();
   int n = atom->ntypes;
 
   memory->create(cutghost,n+1,n+1,"pair:cutghost");
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairSWGPU::init_style()
 {
   double cell_size = cutmax + neighbor->skin;
 
   if (atom->tag_enable == 0)
     error->all(FLERR,"Pair style sw/gpu requires atom IDs");
   if (force->newton_pair != 0)
     error->all(FLERR,"Pair style sw/gpu requires newton pair off");
 
   double *epsilon, *sigma, *lambda, *gamma;
   double *biga, *bigb, *powerp, *powerq;
   double *_cut, *_cutsq, *costheta;
   epsilon = sigma = lambda = gamma = NULL;
   biga = bigb = powerp = powerq = NULL;
   _cut = _cutsq = costheta = NULL;
 
   memory->create(epsilon,nparams,"pair:epsilon");
   memory->create(sigma,nparams,"pair:sigma");
   memory->create(lambda,nparams,"pair:lambda");
   memory->create(gamma,nparams,"pair:gamma");
   memory->create(biga,nparams,"pair:biga");
   memory->create(bigb,nparams,"pair:bigb");
   memory->create(powerp,nparams,"pair:powerp");
   memory->create(powerq,nparams,"pair:powerq");
   memory->create(_cut,nparams,"pair:_cut");
   memory->create(_cutsq,nparams,"pair:_cutsq");
   memory->create(costheta,nparams,"pair:costheta");
 
   for (int i = 0; i < nparams; i++) {
     epsilon[i] = params[i].epsilon;
     sigma[i] = params[i].sigma;
     lambda[i] = params[i].lambda;
     gamma[i] = params[i].gamma;
     biga[i] = params[i].biga;
     bigb[i] = params[i].bigb;
     powerp[i] = params[i].powerp;
     powerq[i] = params[i].powerq;
     _cut[i] = params[i].cut;
     _cutsq[i] = params[i].cutsq;
     costheta[i] = params[i].costheta;
   }
 
   int success = sw_gpu_init(atom->ntypes+1, atom->nlocal, atom->nlocal+atom->nghost, 300, 
                             cell_size, gpu_mode, screen, map, nelements, 
                             elem2param, nparams, epsilon, 
                             sigma, lambda, gamma, costheta, biga, bigb, 
                             powerp, powerq, _cut, _cutsq);
 
   memory->destroy(epsilon);
   memory->destroy(sigma);
   memory->destroy(lambda);
   memory->destroy(gamma);
   memory->destroy(biga);
   memory->destroy(bigb);
   memory->destroy(powerp);
   memory->destroy(powerq);
   memory->destroy(_cut);
   memory->destroy(_cutsq);
   memory->destroy(costheta);
 
   GPU_EXTRA::check_flag(success,error,world);
 
   if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this);
+    int irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full = 1;
     neighbor->requests[irequest]->ghost = 1;
   }
 
   if (comm->cutghostuser < (2.0*cutmax + neighbor->skin) )
     comm->cutghostuser=2.0*cutmax + neighbor->skin;
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairSWGPU::init_one(int i, int j)
 {
   if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set");
   cutghost[i][j] = cutmax;
   cutghost[j][i] = cutmax;
 
   return cutmax;
 }
 
diff --git a/src/GPU/pair_table_gpu.cpp b/src/GPU/pair_table_gpu.cpp
index 18772c82d..6b1ca065c 100644
--- a/src/GPU/pair_table_gpu.cpp
+++ b/src/GPU/pair_table_gpu.cpp
@@ -1,343 +1,343 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing authors: Trung Dac Nguyen (ORNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "pair_table_gpu.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "integrate.h"
 #include "memory.h"
 #include "error.h"
 #include "neigh_request.h"
 #include "universe.h"
 #include "update.h"
 #include "domain.h"
 #include "string.h"
 #include "gpu_extra.h"
 
 #define LOOKUP 0
 #define LINEAR 1
 #define SPLINE 2
 #define BITMAP 3
 
 using namespace LAMMPS_NS;
 
 // External functions from cuda library for atom decomposition
 
 int table_gpu_init(const int ntypes, double **cutsq,
                    double ***host_table_coeffs, double **host_table_data,
                    double *special_lj, const int nlocal, const int nall,
                    const int max_nbors, const int maxspecial,
                    const double cell_size, int &gpu_mode, FILE *screen,
                    int tabstyle, int ntables, int tablength);
 void table_gpu_clear();
 int ** table_gpu_compute_n(const int ago, const int inum, const int nall,
                            double **host_x, int *host_type, double *sublo,
                            double *subhi, tagint *tag, int **nspecial,
                            tagint **special, const bool eflag, const bool vflag,
                            const bool eatom, const bool vatom, int &host_start,
                            int **ilist, int **jnum, const double cpu_time,
                            bool &success);
 void table_gpu_compute(const int ago, const int inum, const int nall,
                        double **host_x, int *host_type, int *ilist, int *numj,
                        int **firstneigh, const bool eflag, const bool vflag,
                        const bool eatom, const bool vatom, int &host_start,
                        const double cpu_time, bool &success);
 double table_gpu_bytes();
 
 /* ---------------------------------------------------------------------- */
 
 PairTableGPU::PairTableGPU(LAMMPS *lmp) : PairTable(lmp),
                                           gpu_mode(GPU_FORCE)
 {
   respa_enable = 0;
   reinitflag = 0;
   cpu_time = 0.0;
   GPU_EXTRA::gpu_ready(lmp->modify, lmp->error);
 }
 
 /* ----------------------------------------------------------------------
    free all arrays
 ------------------------------------------------------------------------- */
 
 PairTableGPU::~PairTableGPU()
 {
   table_gpu_clear();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairTableGPU::compute(int eflag, int vflag)
 {
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   int nall = atom->nlocal + atom->nghost;
   int inum, host_start;
 
   bool success = true;
   int *ilist, *numneigh, **firstneigh;
   if (gpu_mode != GPU_FORCE) {
     inum = atom->nlocal;
     firstneigh = table_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
                                      atom->type, domain->sublo, domain->subhi,
                                      atom->tag, atom->nspecial, atom->special,
                                      eflag, vflag, eflag_atom, vflag_atom,
                                      host_start, &ilist, &numneigh, cpu_time,
                                      success);
   } else {
     inum = list->inum;
     ilist = list->ilist;
     numneigh = list->numneigh;
     firstneigh = list->firstneigh;
     table_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
                       ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
                       vflag_atom, host_start, cpu_time, success);
   }
   if (!success)
     error->one(FLERR,"Insufficient memory on accelerator");
 
   if (host_start<inum) {
     cpu_time = MPI_Wtime();
     cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
     cpu_time = MPI_Wtime() - cpu_time;
   }
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairTableGPU::init_style()
 {
   if (force->newton_pair)
     error->all(FLERR,"Cannot use newton pair with table/gpu pair style");
 
   int ntypes = atom->ntypes;
 
   // Repeat cutsq calculation because done after call to init_style
   double maxcut = -1.0;
   double cut;
   for (int i = 1; i <= atom->ntypes; i++) {
     for (int j = i; j <= atom->ntypes; j++) {
       if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
         cut = init_one(i,j);
         cut *= cut;
         if (cut > maxcut)
           maxcut = cut;
         cutsq[i][j] = cutsq[j][i] = cut;
       } else
         cutsq[i][j] = cutsq[j][i] = 0.0;
     }
   }
   double cell_size = sqrt(maxcut) + neighbor->skin;
 
   // pack tables and send them to device
   double ***table_coeffs = NULL;
   double **table_data = NULL;
   memory->create(table_coeffs, ntypes+1, ntypes+1, 6, "table:coeffs");
 
   Table *tb;
   for (int i = 1; i <= atom->ntypes; i++)
     for (int j = 1; j <= atom->ntypes; j++) {
       int n = tabindex[i][j];
       tb = &tables[n];
       table_coeffs[i][j][0] = n;
       table_coeffs[i][j][1] = tb->nshiftbits;
       table_coeffs[i][j][2] = tb->nmask;
       table_coeffs[i][j][3] = tb->innersq;
       table_coeffs[i][j][4] = tb->invdelta;
       table_coeffs[i][j][5] = tb->deltasq6;
     }
 
   if (tabstyle != BITMAP) {
     memory->create(table_data, ntables, 6*tablength, "table:data");
     for (int n = 0; n < ntables; n++) {
       tb = &tables[n];
       if (tabstyle == LOOKUP) {
         for (int k = 0; k<tablength-1; k++) {
           table_data[n][6*k+1] = tb->e[k];
           table_data[n][6*k+2] = tb->f[k];
         }
       } else if (tabstyle == LINEAR) {
         for (int k = 0; k<tablength; k++) {
           table_data[n][6*k+0] = tb->rsq[k];
           table_data[n][6*k+1] = tb->e[k];
           table_data[n][6*k+2] = tb->f[k];
           if (k<tablength-1) {
             table_data[n][6*k+3] = tb->de[k];
             table_data[n][6*k+4] = tb->df[k];
           }
        }
       } else if (tabstyle == SPLINE) {
         for (int k = 0; k<tablength; k++) {
           table_data[n][6*k+0] = tb->rsq[k];
           table_data[n][6*k+1] = tb->e[k];
           table_data[n][6*k+2] = tb->f[k];
           table_data[n][6*k+3] = tb->e2[k];
           table_data[n][6*k+4] = tb->f2[k];
         }
       }
     }
   } else {
     int ntable = 1 << tablength;
     memory->create(table_data, ntables, 6*ntable, "table:data");
 
     for (int n = 0; n < ntables; n++) {
       tb = &tables[n];
       for (int k = 0; k<ntable; k++) {
         table_data[n][6*k+0] = tb->rsq[k];
         table_data[n][6*k+1] = tb->e[k];
         table_data[n][6*k+2] = tb->f[k];
         table_data[n][6*k+3] = tb->de[k];
         table_data[n][6*k+4] = tb->df[k];
         table_data[n][6*k+5] = tb->drsq[k];
       }
     }
   }
 
   int maxspecial=0;
   if (atom->molecular)
     maxspecial=atom->maxspecial;
   int success = table_gpu_init(atom->ntypes+1, cutsq, table_coeffs, table_data,
                                force->special_lj, atom->nlocal,
                                atom->nlocal+atom->nghost, 300, maxspecial,
                                cell_size, gpu_mode, screen, tabstyle, ntables,
                                tablength);
   GPU_EXTRA::check_flag(success,error,world);
 
   if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this);
+    int irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full = 1;
   }
 
   memory->destroy(table_coeffs);
   memory->destroy(table_data);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairTableGPU::memory_usage()
 {
   double bytes = Pair::memory_usage();
   return bytes + table_gpu_bytes();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairTableGPU::cpu_compute(int start, int inum, int eflag, int vflag,
                                int *ilist, int *numneigh, int **firstneigh) {
   int i,j,ii,jj,jnum,itype,jtype,itable;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
   double rsq,factor_lj,fraction,value,a,b;
   int *jlist;
   Table *tb;
 
   union_int_float_t rsq_lookup;
   int tlm1 = tablength - 1;
 
   double **x = atom->x;
   double **f = atom->f;
   int *type = atom->type;
   double *special_lj = force->special_lj;
 
   // loop over neighbors of my atoms
 
   for (ii = start; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         tb = &tables[tabindex[itype][jtype]];
         if (rsq < tb->innersq)
           error->one(FLERR,"Pair distance < table inner cutoff");
 
         if (tabstyle == LOOKUP) {
           itable = static_cast<int> ((rsq - tb->innersq) * tb->invdelta);
           if (itable >= tlm1)
             error->one(FLERR,"Pair distance > table outer cutoff");
           fpair = factor_lj * tb->f[itable];
         } else if (tabstyle == LINEAR) {
           itable = static_cast<int> ((rsq - tb->innersq) * tb->invdelta);
           if (itable >= tlm1)
             error->one(FLERR,"Pair distance > table outer cutoff");
           fraction = (rsq - tb->rsq[itable]) * tb->invdelta;
           value = tb->f[itable] + fraction*tb->df[itable];
           fpair = factor_lj * value;
         } else if (tabstyle == SPLINE) {
           itable = static_cast<int> ((rsq - tb->innersq) * tb->invdelta);
           if (itable >= tlm1)
             error->one(FLERR,"Pair distance > table outer cutoff");
           b = (rsq - tb->rsq[itable]) * tb->invdelta;
           a = 1.0 - b;
           value = a * tb->f[itable] + b * tb->f[itable+1] +
             ((a*a*a-a)*tb->f2[itable] + (b*b*b-b)*tb->f2[itable+1]) *
             tb->deltasq6;
           fpair = factor_lj * value;
         } else {
           rsq_lookup.f = rsq;
           itable = rsq_lookup.i & tb->nmask;
           itable >>= tb->nshiftbits;
           fraction = (rsq_lookup.f - tb->rsq[itable]) * tb->drsq[itable];
           value = tb->f[itable] + fraction*tb->df[itable];
           fpair = factor_lj * value;
         }
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
 
         if (eflag) {
           if (tabstyle == LOOKUP)
             evdwl = tb->e[itable];
           else if (tabstyle == LINEAR || tabstyle == BITMAP)
             evdwl = tb->e[itable] + fraction*tb->de[itable];
           else
             evdwl = a * tb->e[itable] + b * tb->e[itable+1] +
               ((a*a*a-a)*tb->e2[itable] + (b*b*b-b)*tb->e2[itable+1]) *
               tb->deltasq6;
           evdwl *= factor_lj;
         }
 
         if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
       }
     }
   }
 }
diff --git a/src/GPU/pair_yukawa_colloid_gpu.cpp b/src/GPU/pair_yukawa_colloid_gpu.cpp
index 7c1d8dd55..febdc3304 100644
--- a/src/GPU/pair_yukawa_colloid_gpu.cpp
+++ b/src/GPU/pair_yukawa_colloid_gpu.cpp
@@ -1,240 +1,240 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
    
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under 
    the GNU General Public License.
    
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Trung Dac Nguyen (ORNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "pair_yukawa_colloid_gpu.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "integrate.h"
 #include "memory.h"
 #include "error.h"
 #include "neigh_request.h"
 #include "universe.h"
 #include "update.h"
 #include "domain.h"
 #include "string.h"
 #include "gpu_extra.h"
 
 using namespace LAMMPS_NS;
 
 // External functions from cuda library for atom decomposition
 
 int ykcolloid_gpu_init(const int ntypes, double **cutsq, double **host_a, 
                  double **host_offset, double *special_lj, const int inum,
                  const int nall, const int max_nbors,  const int maxspecial,
                  const double cell_size, int &gpu_mode, FILE *screen, 
                  const double kappa);
 void ykcolloid_gpu_clear();
 int ** ykcolloid_gpu_compute_n(const int ago, const int inum_full,
                         const int nall, double **host_x, int *host_type,
                         double *sublo, double *subhi, tagint *tag, int **nspecial,
                         tagint **special, const bool eflag, const bool vflag,
                         const bool eatom, const bool vatom, int &host_start,
                         int **ilist, int **jnum, const double cpu_time,
                         bool &success, double *host_rad);
 void ykcolloid_gpu_compute(const int ago, const int inum_full, 
                      const int nall, double **host_x, int *host_type, 
                      int *ilist, int *numj, int **firstneigh, 
                      const bool eflag, const bool vflag,
                      const bool eatom, const bool vatom, int &host_start,
                      const double cpu_time, bool &success, double *host_rad);
 double ykcolloid_gpu_bytes();
 
 /* ---------------------------------------------------------------------- */
 
 PairYukawaColloidGPU::PairYukawaColloidGPU(LAMMPS *lmp) : PairYukawaColloid(lmp), 
   gpu_mode(GPU_FORCE)
 {
   respa_enable = 0;
   reinitflag = 0;
   cpu_time = 0.0;
   GPU_EXTRA::gpu_ready(lmp->modify, lmp->error); 
 }
 
 /* ----------------------------------------------------------------------
    free all arrays
 ------------------------------------------------------------------------- */
 
 PairYukawaColloidGPU::~PairYukawaColloidGPU()
 {
   ykcolloid_gpu_clear();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairYukawaColloidGPU::compute(int eflag, int vflag)
 {
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
   
   int nall = atom->nlocal + atom->nghost;
   int inum, host_start;
   
   bool success = true;
   int *ilist, *numneigh, **firstneigh;
   if (gpu_mode != GPU_FORCE) {
     inum = atom->nlocal;
     firstneigh = ykcolloid_gpu_compute_n(neighbor->ago, inum, nall,
                                          atom->x, atom->type, 
                                          domain->sublo,
                                          domain->subhi, atom->tag, 
                                          atom->nspecial, atom->special, 
                                          eflag, vflag, eflag_atom,
                                          vflag_atom, host_start, &ilist, 
                                          &numneigh, cpu_time, 
                                          success, atom->radius);
   } else {
     inum = list->inum;
     ilist = list->ilist;
     numneigh = list->numneigh;
     firstneigh = list->firstneigh;
     ykcolloid_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, 
                           ilist, numneigh, firstneigh, eflag, vflag, 
                           eflag_atom, vflag_atom, host_start, cpu_time, 
                           success, atom->radius);
   }
   if (!success)
     error->one(FLERR,"Insufficient memory on accelerator");
 
   if (host_start<inum) {
     cpu_time = MPI_Wtime();
     cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
     cpu_time = MPI_Wtime() - cpu_time;
   }
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairYukawaColloidGPU::init_style()
 {
   if (!atom->sphere_flag)
     error->all(FLERR,"Pair yukawa/colloid/gpu requires atom style sphere");
     
   if (force->newton_pair) 
     error->all(FLERR,"Cannot use newton pair with yukawa/colloid/gpu pair style");
 
   // Repeat cutsq calculation because done after call to init_style
   double maxcut = -1.0;
   double cut;
   for (int i = 1; i <= atom->ntypes; i++) {
     for (int j = i; j <= atom->ntypes; j++) {
       if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
         cut = init_one(i,j);
         cut *= cut;
         if (cut > maxcut)
           maxcut = cut;
         cutsq[i][j] = cutsq[j][i] = cut;
       } else
         cutsq[i][j] = cutsq[j][i] = 0.0;
     }
   }
   double cell_size = sqrt(maxcut) + neighbor->skin;
   
   int maxspecial=0;
   if (atom->molecular)
     maxspecial=atom->maxspecial;
   int success = ykcolloid_gpu_init(atom->ntypes+1, cutsq, a, 
                                    offset, force->special_lj, atom->nlocal,
                                    atom->nlocal+atom->nghost, 300, maxspecial,
                                    cell_size, gpu_mode, screen, kappa);
   GPU_EXTRA::check_flag(success,error,world);
 
   if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this);
+    int irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full = 1;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairYukawaColloidGPU::memory_usage()
 {
   double bytes = Pair::memory_usage();
   return bytes + ykcolloid_gpu_bytes();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairYukawaColloidGPU::cpu_compute(int start, int inum, int eflag, 
                                        int vflag, int *ilist, int *numneigh, 
                                        int **firstneigh) {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair,radi,radj;
   double r,rsq,rinv,screening,forceyukawa,factor;
   int *jlist;
 
   double **x = atom->x;
   double **f = atom->f;
   int *type = atom->type;
   double *radius = atom->radius;
   double *special_lj = force->special_lj;
 
   // loop over neighbors of my atoms
 
   for (ii = start; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     radi = radius[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor = special_lj[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
       radj = radius[j];
       
       if (rsq < cutsq[itype][jtype]) {
         r = sqrt(rsq);
         rinv = 1.0/r;
         screening = exp(-kappa*(r-(radi+radj)));
         forceyukawa = a[itype][jtype] * screening;
 
         fpair = factor*forceyukawa * rinv;
 	
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
 
         if (eflag) {
           evdwl = a[itype][jtype]/kappa * screening - offset[itype][jtype];
           evdwl *= factor;
         }
 
         if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
       }
     }
   }
 }
diff --git a/src/GPU/pair_yukawa_gpu.cpp b/src/GPU/pair_yukawa_gpu.cpp
index df384e620..5e4a0dc77 100644
--- a/src/GPU/pair_yukawa_gpu.cpp
+++ b/src/GPU/pair_yukawa_gpu.cpp
@@ -1,229 +1,229 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Trung Dac Nguyen (ORNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "pair_yukawa_gpu.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "integrate.h"
 #include "memory.h"
 #include "error.h"
 #include "neigh_request.h"
 #include "universe.h"
 #include "update.h"
 #include "domain.h"
 #include "string.h"
 #include "gpu_extra.h"
 
 using namespace LAMMPS_NS;
 
 // External functions from cuda library for atom decomposition
 
 int yukawa_gpu_init(const int ntypes, double **cutsq, double kappa,
                     double **host_a, double **offset, double *special_lj,
                     const int inum, const int nall, const int max_nbors,
                     const int maxspecial, const double cell_size,
                     int &gpu_mode, FILE *screen);
 void yukawa_gpu_clear();
 int ** yukawa_gpu_compute_n(const int ago, const int inum_full, const int nall,
                             double **host_x, int *host_type, double *sublo,
                             double *subhi, tagint *tag, int **nspecial,
                             tagint **special, const bool eflag, const bool vflag,
                             const bool eatom, const bool vatom,
                             int &host_start, int **ilist, int **jnum,
                             const double cpu_time, bool &success);
 void yukawa_gpu_compute(const int ago, const int inum_full, const int nall,
                         double **host_x, int *host_type, int *ilist, int *numj,
                         int **firstneigh, const bool eflag, const bool vflag,
                         const bool eatom, const bool vatom, int &host_start,
                         const double cpu_time, bool &success);
 double yukawa_gpu_bytes();
 
 /* ---------------------------------------------------------------------- */
 
 PairYukawaGPU::PairYukawaGPU(LAMMPS *lmp) : PairYukawa(lmp),
                                             gpu_mode(GPU_FORCE)
 {
   respa_enable = 0;
   reinitflag = 0;
   cpu_time = 0.0;
   GPU_EXTRA::gpu_ready(lmp->modify, lmp->error);
 }
 
 /* ----------------------------------------------------------------------
    free all arrays
 ------------------------------------------------------------------------- */
 
 PairYukawaGPU::~PairYukawaGPU()
 {
   yukawa_gpu_clear();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairYukawaGPU::compute(int eflag, int vflag)
 {
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   int nall = atom->nlocal + atom->nghost;
   int inum, host_start;
 
   bool success = true;
   int *ilist, *numneigh, **firstneigh;
   if (gpu_mode != GPU_FORCE) {
     inum = atom->nlocal;
     firstneigh = yukawa_gpu_compute_n(neighbor->ago, inum, nall,
                                       atom->x, atom->type, domain->sublo,
                                       domain->subhi, atom->tag, atom->nspecial,
                                       atom->special, eflag, vflag, eflag_atom,
                                       vflag_atom, host_start,
                                       &ilist, &numneigh, cpu_time, success);
   } else {
     inum = list->inum;
     ilist = list->ilist;
     numneigh = list->numneigh;
     firstneigh = list->firstneigh;
     yukawa_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
                        ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
                        vflag_atom, host_start, cpu_time, success);
   }
   if (!success)
     error->one(FLERR,"Insufficient memory on accelerator");
 
   if (host_start<inum) {
     cpu_time = MPI_Wtime();
     cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
     cpu_time = MPI_Wtime() - cpu_time;
   }
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairYukawaGPU::init_style()
 {
   if (force->newton_pair)
     error->all(FLERR,"Cannot use newton pair with yukawa/gpu pair style");
 
   // Repeat cutsq calculation because done after call to init_style
   double maxcut = -1.0;
   double cut;
   for (int i = 1; i <= atom->ntypes; i++) {
     for (int j = i; j <= atom->ntypes; j++) {
       if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
         cut = init_one(i,j);
         cut *= cut;
         if (cut > maxcut)
           maxcut = cut;
         cutsq[i][j] = cutsq[j][i] = cut;
       } else
         cutsq[i][j] = cutsq[j][i] = 0.0;
     }
   }
   double cell_size = sqrt(maxcut) + neighbor->skin;
 
   int maxspecial=0;
   if (atom->molecular)
     maxspecial=atom->maxspecial;
   int success = yukawa_gpu_init(atom->ntypes+1, cutsq, kappa, a,
                                 offset, force->special_lj, atom->nlocal,
                                 atom->nlocal+atom->nghost, 300, maxspecial,
                                 cell_size, gpu_mode, screen);
   GPU_EXTRA::check_flag(success,error,world);
 
   if (gpu_mode == GPU_FORCE) {
-    int irequest = neighbor->request(this);
+    int irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full = 1;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairYukawaGPU::memory_usage()
 {
   double bytes = Pair::memory_usage();
   return bytes + yukawa_gpu_bytes();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairYukawaGPU::cpu_compute(int start, int inum, int eflag, int vflag,
                                int *ilist, int *numneigh, int **firstneigh) {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
   double rsq,r2inv,r,rinv,screening,forceyukawa,factor;
   int *jlist;
 
   double **x = atom->x;
   double **f = atom->f;
   int *type = atom->type;
   double *special_lj = force->special_lj;
 
   // loop over neighbors of my atoms
 
   for (ii = start; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor = special_lj[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r2inv = 1.0/rsq;
         r = sqrt(rsq);
         rinv = 1.0/r;
         screening = exp(-kappa*r);
         forceyukawa = a[itype][jtype] * screening * (kappa + rinv);
 
         fpair = factor*forceyukawa * r2inv;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
 
         if (eflag) {
           evdwl = a[itype][jtype] * screening * rinv - offset[itype][jtype];
           evdwl *= factor;
         }
 
         if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
       }
     }
   }
 }
diff --git a/src/GRANULAR/pair_gran_hooke_history.cpp b/src/GRANULAR/pair_gran_hooke_history.cpp
index fddba13a9..9c09dcddc 100644
--- a/src/GRANULAR/pair_gran_hooke_history.cpp
+++ b/src/GRANULAR/pair_gran_hooke_history.cpp
@@ -1,806 +1,806 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing authors: Leo Silbert (SNL), Gary Grest (SNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_gran_hooke_history.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "domain.h"
 #include "force.h"
 #include "update.h"
 #include "modify.h"
 #include "fix.h"
 #include "fix_shear_history.h"
 #include "comm.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
 PairGranHookeHistory::PairGranHookeHistory(LAMMPS *lmp) : Pair(lmp)
 {
   single_enable = 1;
   no_virial_fdotr_compute = 1;
   history = 1;
   fix_history = NULL;
 
   single_extra = 4;
   svector = new double[4];
 
   computeflag = 0;
   neighprev = 0;
 
   nmax = 0;
   mass_rigid = NULL;
 
   // set comm size needed by this Pair if used with fix rigid
 
   comm_forward = 1;
 }
 
 /* ---------------------------------------------------------------------- */
 
 PairGranHookeHistory::~PairGranHookeHistory()
 {
   delete [] svector;
   if (fix_history) modify->delete_fix("SHEAR_HISTORY");
 
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
 
     delete [] onerad_dynamic;
     delete [] onerad_frozen;
     delete [] maxrad_dynamic;
     delete [] maxrad_frozen;
   }
 
   memory->destroy(mass_rigid);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairGranHookeHistory::compute(int eflag, int vflag)
 {
   int i,j,ii,jj,inum,jnum;
   double xtmp,ytmp,ztmp,delx,dely,delz,fx,fy,fz;
   double radi,radj,radsum,rsq,r,rinv,rsqinv;
   double vr1,vr2,vr3,vnnr,vn1,vn2,vn3,vt1,vt2,vt3;
   double wr1,wr2,wr3;
   double vtr1,vtr2,vtr3,vrel;
   double mi,mj,meff,damp,ccel,tor1,tor2,tor3;
   double fn,fs,fs1,fs2,fs3;
   double shrmag,rsht;
   int *ilist,*jlist,*numneigh,**firstneigh;
   int *touch,**firsttouch;
   double *shear,*allshear,**firstshear;
 
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   computeflag = 1;
   int shearupdate = 1;
   if (update->setupflag) shearupdate = 0;
 
   // update rigid body info for owned & ghost atoms if using FixRigid masses
   // body[i] = which body atom I is in, -1 if none
   // mass_body = mass of each rigid body
 
   if (fix_rigid && neighbor->ago == 0) {
     int tmp;
     int *body = (int *) fix_rigid->extract("body",tmp);
     double *mass_body = (double *) fix_rigid->extract("masstotal",tmp);
     if (atom->nmax > nmax) {
       memory->destroy(mass_rigid);
       nmax = atom->nmax;
       memory->create(mass_rigid,nmax,"pair:mass_rigid");
     }
     int nlocal = atom->nlocal;
     for (i = 0; i < nlocal; i++)
       if (body[i] >= 0) mass_rigid[i] = mass_body[body[i]];
       else mass_rigid[i] = 0.0;
     comm->forward_comm_pair(this);
   }
 
   double **x = atom->x;
   double **v = atom->v;
   double **f = atom->f;
   double **omega = atom->omega;
   double **torque = atom->torque;
   double *radius = atom->radius;
   double *rmass = atom->rmass;
   double *mass = atom->mass;
   int *type = atom->type;
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
   firsttouch = listgranhistory->firstneigh;
   firstshear = listgranhistory->firstdouble;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     radi = radius[i];
     touch = firsttouch[i];
     allshear = firstshear[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       radj = radius[j];
       radsum = radi + radj;
 
       if (rsq >= radsum*radsum) {
 
         // unset non-touching neighbors
 
         touch[jj] = 0;
         shear = &allshear[3*jj];
         shear[0] = 0.0;
         shear[1] = 0.0;
         shear[2] = 0.0;
 
       } else {
         r = sqrt(rsq);
         rinv = 1.0/r;
         rsqinv = 1.0/rsq;
 
         // relative translational velocity
 
         vr1 = v[i][0] - v[j][0];
         vr2 = v[i][1] - v[j][1];
         vr3 = v[i][2] - v[j][2];
 
         // normal component
 
         vnnr = vr1*delx + vr2*dely + vr3*delz;
         vn1 = delx*vnnr * rsqinv;
         vn2 = dely*vnnr * rsqinv;
         vn3 = delz*vnnr * rsqinv;
 
         // tangential component
 
         vt1 = vr1 - vn1;
         vt2 = vr2 - vn2;
         vt3 = vr3 - vn3;
 
         // relative rotational velocity
 
         wr1 = (radi*omega[i][0] + radj*omega[j][0]) * rinv;
         wr2 = (radi*omega[i][1] + radj*omega[j][1]) * rinv;
         wr3 = (radi*omega[i][2] + radj*omega[j][2]) * rinv;
 
         // meff = effective mass of pair of particles
         // if I or J part of rigid body, use body mass
         // if I or J is frozen, meff is other particle
 
         if (rmass) {
           mi = rmass[i];
           mj = rmass[j];
         } else {
           mi = mass[type[i]];
           mj = mass[type[j]];
         }
         if (fix_rigid) {
           if (mass_rigid[i] > 0.0) mi = mass_rigid[i];
           if (mass_rigid[j] > 0.0) mj = mass_rigid[j];
         }
 
         meff = mi*mj / (mi+mj);
         if (mask[i] & freeze_group_bit) meff = mj;
         if (mask[j] & freeze_group_bit) meff = mi;
 
         // normal forces = Hookian contact + normal velocity damping
 
         damp = meff*gamman*vnnr*rsqinv;
         ccel = kn*(radsum-r)*rinv - damp;
 
         // relative velocities
 
         vtr1 = vt1 - (delz*wr2-dely*wr3);
         vtr2 = vt2 - (delx*wr3-delz*wr1);
         vtr3 = vt3 - (dely*wr1-delx*wr2);
         vrel = vtr1*vtr1 + vtr2*vtr2 + vtr3*vtr3;
         vrel = sqrt(vrel);
 
         // shear history effects
 
         touch[jj] = 1;
         shear = &allshear[3*jj];
 
         if (shearupdate) {
           shear[0] += vtr1*dt;
           shear[1] += vtr2*dt;
           shear[2] += vtr3*dt;
         }
         shrmag = sqrt(shear[0]*shear[0] + shear[1]*shear[1] +
                       shear[2]*shear[2]);
 
         // rotate shear displacements
 
         rsht = shear[0]*delx + shear[1]*dely + shear[2]*delz;
         rsht *= rsqinv;
         if (shearupdate) {
           shear[0] -= rsht*delx;
           shear[1] -= rsht*dely;
           shear[2] -= rsht*delz;
         }
 
         // tangential forces = shear + tangential velocity damping
 
         fs1 = - (kt*shear[0] + meff*gammat*vtr1);
         fs2 = - (kt*shear[1] + meff*gammat*vtr2);
         fs3 = - (kt*shear[2] + meff*gammat*vtr3);
 
         // rescale frictional displacements and forces if needed
 
         fs = sqrt(fs1*fs1 + fs2*fs2 + fs3*fs3);
         fn = xmu * fabs(ccel*r);
 
         if (fs > fn) {
           if (shrmag != 0.0) {
             shear[0] = (fn/fs) * (shear[0] + meff*gammat*vtr1/kt) -
               meff*gammat*vtr1/kt;
             shear[1] = (fn/fs) * (shear[1] + meff*gammat*vtr2/kt) -
               meff*gammat*vtr2/kt;
             shear[2] = (fn/fs) * (shear[2] + meff*gammat*vtr3/kt) -
               meff*gammat*vtr3/kt;
             fs1 *= fn/fs;
             fs2 *= fn/fs;
             fs3 *= fn/fs;
           } else fs1 = fs2 = fs3 = 0.0;
         }
 
         // forces & torques
 
         fx = delx*ccel + fs1;
         fy = dely*ccel + fs2;
         fz = delz*ccel + fs3;
         f[i][0] += fx;
         f[i][1] += fy;
         f[i][2] += fz;
 
         tor1 = rinv * (dely*fs3 - delz*fs2);
         tor2 = rinv * (delz*fs1 - delx*fs3);
         tor3 = rinv * (delx*fs2 - dely*fs1);
         torque[i][0] -= radi*tor1;
         torque[i][1] -= radi*tor2;
         torque[i][2] -= radi*tor3;
 
         if (j < nlocal) {
           f[j][0] -= fx;
           f[j][1] -= fy;
           f[j][2] -= fz;
           torque[j][0] -= radj*tor1;
           torque[j][1] -= radj*tor2;
           torque[j][2] -= radj*tor3;
         }
 
         if (evflag) ev_tally_xyz(i,j,nlocal,0,
                                  0.0,0.0,fx,fy,fz,delx,dely,delz);
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairGranHookeHistory::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
   onerad_dynamic = new double[n+1];
   onerad_frozen = new double[n+1];
   maxrad_dynamic = new double[n+1];
   maxrad_frozen = new double[n+1];
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairGranHookeHistory::settings(int narg, char **arg)
 {
   if (narg != 6) error->all(FLERR,"Illegal pair_style command");
 
   kn = force->numeric(FLERR,arg[0]);
   if (strcmp(arg[1],"NULL") == 0) kt = kn * 2.0/7.0;
   else kt = force->numeric(FLERR,arg[1]);
 
   gamman = force->numeric(FLERR,arg[2]);
   if (strcmp(arg[3],"NULL") == 0) gammat = 0.5 * gamman;
   else gammat = force->numeric(FLERR,arg[3]);
 
   xmu = force->numeric(FLERR,arg[4]);
   dampflag = force->inumeric(FLERR,arg[5]);
   if (dampflag == 0) gammat = 0.0;
 
   if (kn < 0.0 || kt < 0.0 || gamman < 0.0 || gammat < 0.0 ||
       xmu < 0.0 || xmu > 10000.0 || dampflag < 0 || dampflag > 1)
     error->all(FLERR,"Illegal pair_style command");
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairGranHookeHistory::coeff(int narg, char **arg)
 {
   if (narg > 2) error->all(FLERR,"Incorrect args for pair coefficients");
   if (!allocated) allocate();
 
   int ilo,ihi,jlo,jhi;
   force->bounds(arg[0],atom->ntypes,ilo,ihi);
   force->bounds(arg[1],atom->ntypes,jlo,jhi);
 
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
     for (int j = MAX(jlo,i); j <= jhi; j++) {
       setflag[i][j] = 1;
       count++;
     }
   }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairGranHookeHistory::init_style()
 {
   int i;
 
   // error and warning checks
 
   if (!atom->sphere_flag)
     error->all(FLERR,"Pair granular requires atom style sphere");
   if (comm->ghost_velocity == 0)
     error->all(FLERR,"Pair granular requires ghost atoms store velocity");
 
   // need a granular neigh list and optionally a granular history neigh list
 
-  int irequest = neighbor->request(this);
+  int irequest = neighbor->request(this,instance_me);
   neighbor->requests[irequest]->half = 0;
   neighbor->requests[irequest]->gran = 1;
   if (history) {
-    irequest = neighbor->request(this);
+    irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->id = 1;
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->granhistory = 1;
     neighbor->requests[irequest]->dnum = 3;
   }
 
   dt = update->dt;
 
   // if shear history is stored:
   // check if newton flag is valid
   // if first init, create Fix needed for storing shear history
 
   if (history && force->newton_pair == 1)
     error->all(FLERR,
                "Pair granular with shear history requires newton pair off");
 
   if (history && fix_history == NULL) {
     char **fixarg = new char*[3];
     fixarg[0] = (char *) "SHEAR_HISTORY";
     fixarg[1] = (char *) "all";
     fixarg[2] = (char *) "SHEAR_HISTORY";
     modify->add_fix(3,fixarg,1);
     delete [] fixarg;
     fix_history = (FixShearHistory *) modify->fix[modify->nfix-1];
     fix_history->pair = this;
   }
 
   // check for FixFreeze and set freeze_group_bit
 
   for (i = 0; i < modify->nfix; i++)
     if (strcmp(modify->fix[i]->style,"freeze") == 0) break;
   if (i < modify->nfix) freeze_group_bit = modify->fix[i]->groupbit;
   else freeze_group_bit = 0;
 
   // check for FixRigid so can extract rigid body masses
 
   fix_rigid = NULL;
   for (i = 0; i < modify->nfix; i++)
     if (modify->fix[i]->rigid_flag) break;
   if (i < modify->nfix) fix_rigid = modify->fix[i];
 
   // check for FixPour and FixDeposit so can extract particle radii
 
   int ipour;
   for (ipour = 0; ipour < modify->nfix; ipour++)
     if (strcmp(modify->fix[ipour]->style,"pour") == 0) break;
   if (ipour == modify->nfix) ipour = -1;
 
   int idep;
   for (idep = 0; idep < modify->nfix; idep++)
     if (strcmp(modify->fix[idep]->style,"deposit") == 0) break;
   if (idep == modify->nfix) idep = -1;
 
   // set maxrad_dynamic and maxrad_frozen for each type
   // include future FixPour and FixDeposit particles as dynamic
 
   int itype;
   for (i = 1; i <= atom->ntypes; i++) {
     onerad_dynamic[i] = onerad_frozen[i] = 0.0;
     if (ipour >= 0) {
       itype = i;
       onerad_dynamic[i] = 
         *((double *) modify->fix[ipour]->extract("radius",itype));
     }
     if (idep >= 0) {
       itype = i;
       onerad_dynamic[i] = 
         *((double *) modify->fix[idep]->extract("radius",itype));
     }
   }
 
   double *radius = atom->radius;
   int *mask = atom->mask;
   int *type = atom->type;
   int nlocal = atom->nlocal;
 
   for (i = 0; i < nlocal; i++)
     if (mask[i] & freeze_group_bit)
       onerad_frozen[type[i]] = MAX(onerad_frozen[type[i]],radius[i]);
     else
       onerad_dynamic[type[i]] = MAX(onerad_dynamic[type[i]],radius[i]);
 
   MPI_Allreduce(&onerad_dynamic[1],&maxrad_dynamic[1],atom->ntypes,
                 MPI_DOUBLE,MPI_MAX,world);
   MPI_Allreduce(&onerad_frozen[1],&maxrad_frozen[1],atom->ntypes,
                 MPI_DOUBLE,MPI_MAX,world);
 }
 
 /* ----------------------------------------------------------------------
    neighbor callback to inform pair style of neighbor list to use
    optional granular history list
 ------------------------------------------------------------------------- */
 
 void PairGranHookeHistory::init_list(int id, NeighList *ptr)
 {
   if (id == 0) list = ptr;
   else if (id == 1) listgranhistory = ptr;
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairGranHookeHistory::init_one(int i, int j)
 {
   if (!allocated) allocate();
 
   // cutoff = sum of max I,J radii for
   // dynamic/dynamic & dynamic/frozen interactions, but not frozen/frozen
 
   double cutoff = maxrad_dynamic[i]+maxrad_dynamic[j];
   cutoff = MAX(cutoff,maxrad_frozen[i]+maxrad_dynamic[j]);
   cutoff = MAX(cutoff,maxrad_dynamic[i]+maxrad_frozen[j]);
   return cutoff;
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairGranHookeHistory::write_restart(FILE *fp)
 {
   write_restart_settings(fp);
 
   int i,j;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++)
       fwrite(&setflag[i][j],sizeof(int),1,fp);
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairGranHookeHistory::read_restart(FILE *fp)
 {
   read_restart_settings(fp);
   allocate();
 
   int i,j;
   int me = comm->me;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
       MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
     }
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairGranHookeHistory::write_restart_settings(FILE *fp)
 {
   fwrite(&kn,sizeof(double),1,fp);
   fwrite(&kt,sizeof(double),1,fp);
   fwrite(&gamman,sizeof(double),1,fp);
   fwrite(&gammat,sizeof(double),1,fp);
   fwrite(&xmu,sizeof(double),1,fp);
   fwrite(&dampflag,sizeof(int),1,fp);
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairGranHookeHistory::read_restart_settings(FILE *fp)
 {
   if (comm->me == 0) {
     fread(&kn,sizeof(double),1,fp);
     fread(&kt,sizeof(double),1,fp);
     fread(&gamman,sizeof(double),1,fp);
     fread(&gammat,sizeof(double),1,fp);
     fread(&xmu,sizeof(double),1,fp);
     fread(&dampflag,sizeof(int),1,fp);
   }
   MPI_Bcast(&kn,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&kt,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&gamman,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&gammat,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&xmu,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&dampflag,1,MPI_INT,0,world);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairGranHookeHistory::reset_dt()
 {
   dt = update->dt;
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairGranHookeHistory::single(int i, int j, int itype, int jtype,
                                     double rsq,
                                     double factor_coul, double factor_lj,
                                     double &fforce)
 {
   double radi,radj,radsum;
   double r,rinv,rsqinv,delx,dely,delz;
   double vr1,vr2,vr3,vnnr,vn1,vn2,vn3,vt1,vt2,vt3,wr1,wr2,wr3;
   double mi,mj,meff,damp,ccel;
   double vtr1,vtr2,vtr3,vrel,shrmag,rsht;
   double fs1,fs2,fs3,fs,fn;
 
   double *radius = atom->radius;
   radi = radius[i];
   radj = radius[j];
   radsum = radi + radj;
 
   if (rsq >= radsum*radsum) {
     fforce = 0.0;
     svector[0] = svector[1] = svector[2] = svector[3] = 0.0;
     return 0.0;
   }
 
   r = sqrt(rsq);
   rinv = 1.0/r;
   rsqinv = 1.0/rsq;
 
   // relative translational velocity
 
   double **v = atom->v;
   vr1 = v[i][0] - v[j][0];
   vr2 = v[i][1] - v[j][1];
   vr3 = v[i][2] - v[j][2];
 
   // normal component
 
   double **x = atom->x;
   delx = x[i][0] - x[j][0];
   dely = x[i][1] - x[j][1];
   delz = x[i][2] - x[j][2];
 
   vnnr = vr1*delx + vr2*dely + vr3*delz;
   vn1 = delx*vnnr * rsqinv;
   vn2 = dely*vnnr * rsqinv;
   vn3 = delz*vnnr * rsqinv;
 
   // tangential component
 
   vt1 = vr1 - vn1;
   vt2 = vr2 - vn2;
   vt3 = vr3 - vn3;
 
   // relative rotational velocity
 
   double **omega = atom->omega;
   wr1 = (radi*omega[i][0] + radj*omega[j][0]) * rinv;
   wr2 = (radi*omega[i][1] + radj*omega[j][1]) * rinv;
   wr3 = (radi*omega[i][2] + radj*omega[j][2]) * rinv;
 
   // meff = effective mass of pair of particles
   // if I or J part of rigid body, use body mass
   // if I or J is frozen, meff is other particle
 
   double *rmass = atom->rmass;
   double *mass = atom->mass;
   int *type = atom->type;
   int *mask = atom->mask;
 
   if (rmass) {
     mi = rmass[i];
     mj = rmass[j];
   } else {
     mi = mass[type[i]];
     mj = mass[type[j]];
   }
   if (fix_rigid) {
     // NOTE: insure mass_rigid is current for owned+ghost atoms?
     if (mass_rigid[i] > 0.0) mi = mass_rigid[i];
     if (mass_rigid[j] > 0.0) mj = mass_rigid[j];
   }
 
   meff = mi*mj / (mi+mj);
   if (mask[i] & freeze_group_bit) meff = mj;
   if (mask[j] & freeze_group_bit) meff = mi;
 
   // normal forces = Hookian contact + normal velocity damping
 
   damp = meff*gamman*vnnr*rsqinv;
   ccel = kn*(radsum-r)*rinv - damp;
 
   // relative velocities
 
   vtr1 = vt1 - (delz*wr2-dely*wr3);
   vtr2 = vt2 - (delx*wr3-delz*wr1);
   vtr3 = vt3 - (dely*wr1-delx*wr2);
   vrel = vtr1*vtr1 + vtr2*vtr2 + vtr3*vtr3;
   vrel = sqrt(vrel);
 
   // shear history effects
   // neighprev = index of found neigh on previous call
   // search entire jnum list of neighbors of I for neighbor J
   // start from neighprev, since will typically be next neighbor
   // reset neighprev to 0 as necessary
 
   int jnum = list->numneigh[i];
   int *jlist = list->firstneigh[i];
   double *allshear = list->listgranhistory->firstdouble[i];
 
   for (int jj = 0; jj < jnum; jj++) {
     neighprev++;
     if (neighprev >= jnum) neighprev = 0;
     if (jlist[neighprev] == j) break;
   }
 
   double *shear = &allshear[3*neighprev];
   shrmag = sqrt(shear[0]*shear[0] + shear[1]*shear[1] +
                 shear[2]*shear[2]);
 
   // rotate shear displacements
 
   rsht = shear[0]*delx + shear[1]*dely + shear[2]*delz;
   rsht *= rsqinv;
 
   // tangential forces = shear + tangential velocity damping
 
   fs1 = - (kt*shear[0] + meff*gammat*vtr1);
   fs2 = - (kt*shear[1] + meff*gammat*vtr2);
   fs3 = - (kt*shear[2] + meff*gammat*vtr3);
 
   // rescale frictional displacements and forces if needed
 
   fs = sqrt(fs1*fs1 + fs2*fs2 + fs3*fs3);
   fn = xmu * fabs(ccel*r);
 
   if (fs > fn) {
     if (shrmag != 0.0) {
       fs1 *= fn/fs;
       fs2 *= fn/fs;
       fs3 *= fn/fs;
       fs *= fn/fs;
     } else fs1 = fs2 = fs3 = fs = 0.0;
   }
 
   // set all forces and return no energy
 
   fforce = ccel;
   svector[0] = fs1;
   svector[1] = fs2;
   svector[2] = fs3;
   svector[3] = fs;
   return 0.0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 int PairGranHookeHistory::pack_forward_comm(int n, int *list, double *buf, 
                                             int pbc_flag, int *pbc)
 {
   int i,j,m;
 
   m = 0;
   for (i = 0; i < n; i++) {
     j = list[i];
     buf[m++] = mass_rigid[j];
   }
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairGranHookeHistory::unpack_forward_comm(int n, int first, double *buf)
 {
   int i,m,last;
 
   m = 0;
   last = first + n;
   for (i = first; i < last; i++)
     mass_rigid[i] = buf[m++];
 }
 
 /* ---------------------------------------------------------------------- */
 
 void *PairGranHookeHistory::extract(const char *str, int &dim)
 {
   dim = 0;
   if (strcmp(str,"computeflag") == 0) return (void *) &computeflag;
   return NULL;
 }
 
 /* ----------------------------------------------------------------------
    memory usage of local atom-based arrays
 ------------------------------------------------------------------------- */
 
 double PairGranHookeHistory::memory_usage()
 {
   double bytes = nmax * sizeof(double);
   return bytes;
 }
diff --git a/src/KIM/pair_kim.cpp b/src/KIM/pair_kim.cpp
index 5319276d1..082e1ee4b 100644
--- a/src/KIM/pair_kim.cpp
+++ b/src/KIM/pair_kim.cpp
@@ -1,1200 +1,1200 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing authors: Ryan S. Elliott,
                          Valeriu Smirichinski,
                          Ellad Tadmor
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Designed for use with the openkim-api-v1.5.0 package and for use with
    the kim-api-v1.6.0 (and newer) package
 ------------------------------------------------------------------------- */
 
 #include <cstring>
 #include <cstdlib>
 
 // includes from LAMMPS
 #include "pair_kim.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "update.h"
 #include "memory.h"
 #include "domain.h"
 #include "error.h"
 
 // includes from KIM
 #include "KIM_API.h"
 #include "KIM_API_status.h"
 
 #ifndef KIM_API_VERSION_MAJOR
 // support v1.5.0
 #define KIM_API_VERSION_MAJOR 1
 #define KIM_API_VERSION_MINOR 5
 #define KIM_API_VERSION_PATHC 0
 #endif
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
 PairKIM::PairKIM(LAMMPS *lmp) :
    Pair(lmp),
    kim_modelname(0),
    lmps_map_species_to_unique(0),
    lmps_unique_elements(0),
    lmps_num_unique_elements(0),
    lmps_units(METAL),
    pkim(0),
    kim_ind_coordinates(-1),
    kim_ind_numberOfParticles(-1),
    kim_ind_numberContributingParticles(-1),
    kim_ind_numberOfSpecies(-1),
    kim_ind_particleSpecies(-1),
    kim_ind_get_neigh(-1),
    kim_ind_neighObject(-1),
    kim_ind_cutoff(-1),
    kim_ind_energy(-1),
    kim_ind_particleEnergy(-1),
    kim_ind_forces(-1),
    kim_ind_virial(-1),
    kim_ind_particleVirial(-1),
    kim_particle_codes(0),
    lmps_local_tot_num_atoms(0),
    kim_global_cutoff(0.0),
    lmps_maxalloc(0),
    kim_particleSpecies(0),
    lmps_force_tmp(0),
    lmps_stripped_neigh_list(0),
    kim_iterator_position(0),
    Rij(0)
 {
    // Initialize Pair data members to appropriate values
    single_enable = 0;  // We do not provide the Single() function
    restartinfo = 0;    // We do not write any restart info
    one_coeff = 1;      // We only allow one coeff * * call
 
    // BEGIN: initial values that determine the KIM state
    // (used by kim_free(), etc.)
    kim_model_init_ok = false;
    kim_init_ok = false;
    // END
 
    return;
 }
 
 /* ---------------------------------------------------------------------- */
 
 PairKIM::~PairKIM()
 {
    // clean up kim_modelname
    if (kim_modelname != 0) delete [] kim_modelname;
 
    // clean up lammps atom species number to unique particle names mapping
    if (lmps_unique_elements)
       for (int i = 0; i < lmps_num_unique_elements; i++)
         delete [] lmps_unique_elements[i];
    delete [] lmps_unique_elements;
 
    // clean up local memory used to support KIM interface
    memory->destroy(kim_particleSpecies);
    memory->destroy(lmps_force_tmp);
    memory->destroy(lmps_stripped_neigh_list);
 
    // clean up allocated memory for standard Pair class usage
    // also, we allocate lmps_map_species_to_uniuqe in the allocate() function
    if (allocated) {
       memory->destroy(setflag);
       memory->destroy(cutsq);
       delete [] lmps_map_species_to_unique;
    }
 
    // clean up Rij array
    memory->destroy(Rij);
 
    // clean up KIM interface (if necessary)
    kim_free();
 
    return;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairKIM::compute(int eflag , int vflag)
 {
    int kimerror;
 
    if (eflag || vflag)
       ev_setup(eflag,vflag);
    else
       ev_unset();
 
    // grow kim_particleSpecies array if necessary
    // needs to be atom->nmax in length
    if (atom->nmax > lmps_maxalloc) {
       memory->destroy(kim_particleSpecies);
       memory->destroy(lmps_force_tmp);
 
       lmps_maxalloc = atom->nmax;
       memory->create(kim_particleSpecies,lmps_maxalloc,"pair:kim_particleSpecies");
       memory->create(lmps_force_tmp,lmps_maxalloc,3,"pair:lmps_force_tmp");
    }
 
    // kim_particleSpecies = KIM atom species for each LAMMPS atom
    // set ielement to valid 0 if lmps_map_species_to_unique[] stores an un-used -1
 
    int *species = atom->type;
    int nall = atom->nlocal + atom->nghost;
    int ielement;
 
    for (int i = 0; i < nall; i++) {
       ielement = lmps_map_species_to_unique[species[i]];
       ielement = MAX(ielement,0);
       // @@ this (above line) provides bogus info
       // @@ (when lmps_map_species_to_unique[species[i]]==-1) to KIM, but
       // @@ I guess this only happens when lmps_hybrid==true,
       // @@ and we are sure that iterator mode will
       // @@ not use these atoms.... (?)
       kim_particleSpecies[i] = kim_particle_codes[ielement];
    }
 
    // pass current atom pointers to KIM
    set_volatiles();
 
    pkim->setm_compute_by_index(&kimerror,3*3,
                                kim_ind_particleEnergy, eflag_atom,
                                 (int) kim_model_has_particleEnergy,
                                kim_ind_particleVirial, vflag_atom,
                                 (int) kim_model_has_particleVirial,
                                kim_ind_virial, vflag_global!=0,
                                 no_virial_fdotr_compute);
    kim_error(__LINE__,"setm_compute_by_index",kimerror);
 
    // compute via KIM model
    kimerror = pkim->model_compute();
    kim_error(__LINE__,"PairKIM::pkim->model_compute() error",kimerror);
    // assemble force and particleVirial if needed
    if (!lmps_using_newton) comm->reverse_comm_pair(this);
 
    // sum lmps_force_tmp to f if running in hybrid mode
    if (lmps_hybrid) {
       double **f = atom->f;
       for (int i = 0; i < nall; i++) {
          f[i][0] += lmps_force_tmp[i][0];
          f[i][1] += lmps_force_tmp[i][1];
          f[i][2] += lmps_force_tmp[i][2];
       }
    }
 
    if ((no_virial_fdotr_compute == 1) && (vflag_global))
    {  // flip sign and order of virial if KIM is computing it
       for (int i = 0; i < 3; ++i) virial[i] = -1.0*virial[i];
       double tmp = virial[3];
       virial[3] = -virial[5];
       virial[4] = -virial[4];
       virial[5] = -tmp;
    }
    else
    {  // compute virial via LAMMPS fdotr mechanism
       if (vflag_fdotr) virial_fdotr_compute();
    }
 
    if ((kim_model_has_particleVirial) && (vflag_atom))
    {  // flip sign and order of virial if KIM is computing it
       double tmp;
       for (int i = 0; i < nall; ++i)
       {
          for (int j = 0; j < 3; ++j) vatom[i][j] = -1.0*vatom[i][j];
          tmp = vatom[i][3];
          vatom[i][3] = -vatom[i][5];
          vatom[i][4] = -vatom[i][4];
          vatom[i][5] = -tmp;
       }
    }
 
    return;
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairKIM::allocate()
 {
    int n = atom->ntypes;
 
    // allocate standard Pair class arrays
    memory->create(setflag,n+1,n+1,"pair:setflag");
    memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
    // allocate mapping array
    lmps_map_species_to_unique = new int[n+1];
 
    allocated = 1;
 
    return;
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairKIM::settings(int narg, char **arg)
 {
    // This is called when "pair_style kim ..." is read from input
    // may be called multiple times
 
    if (narg != 2) error->all(FLERR,"Illegal pair_style command");
    // arg[0] is the virial handling option: "LAMMPSvirial" or "KIMvirial"
    // arg[1] is the KIM Model name
 
    // ensure we are in a clean state for KIM (needed on repeated call)
    // first time called will do nothing...
    kim_free();
 
    // make sure things are allocated
    if (allocated != 1) allocate();
 
    // clear setflag to ensure coeff() is called after settings()
    int n = atom->ntypes;
    for (int i = 1; i <= n; i++)
       for (int j = i; j <= n; j++)
          setflag[i][j] = 0;
 
    // set virial handling
    if (strcmp(arg[0],"LAMMPSvirial") == 0)
    {
       no_virial_fdotr_compute = 0;
    }
    else if (strcmp(arg[0],"KIMvirial") == 0)
    {
       no_virial_fdotr_compute = 1;
    }
    else
    {
       error->all(FLERR,"Unrecognized virial argument in pair_style command");
    }
 
    // set KIM Model name
    int nmlen = strlen(arg[1]);
    if (kim_modelname != 0)
    {
       delete [] kim_modelname;
       kim_modelname = 0;
    }
    kim_modelname = new char[nmlen+1];
    strcpy(kim_modelname, arg[1]);
 
    return;
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairKIM::coeff(int narg, char **arg)
 {
    // This is called when "pair_coeff ..." is read from input
    // may be called multiple times
 
    int i,j,n;
 
    if (!allocated) allocate();
 
    if (narg != 2 + atom->ntypes)
       error->all(FLERR,"Incorrect args for pair coefficients");
 
    // ensure I,J args are * *
 
    if (strcmp(arg[0],"*") != 0 || strcmp(arg[1],"*") != 0)
       error->all(FLERR,"Incorrect args for pair coefficients");
 
    // read args that map atom species to KIM elements
    // lmps_map_species_to_unique[i] =
    // which element the Ith atom type is, -1 if NULL
    // lmps_num_unique_elements = # of unique elements
    // lmps_unique_elements = list of element names
 
    // if called multiple times: update lmps_unique_elements
    if (lmps_unique_elements) {
       for (i = 0; i < lmps_num_unique_elements; i++)
         delete [] lmps_unique_elements[i];
       delete [] lmps_unique_elements;
    }
    lmps_unique_elements = new char*[atom->ntypes];
    for (i = 0; i < atom->ntypes; i++) lmps_unique_elements[i] = 0;
 
    lmps_num_unique_elements = 0;
    for (i = 2; i < narg; i++) {
       if (strcmp(arg[i],"NULL") == 0) {
          if (!lmps_hybrid)
            error->all(FLERR,"Invalid args for non-hybrid pair coefficients");
          lmps_map_species_to_unique[i-1] = -1;
          continue;
       }
       for (j = 0; j < lmps_num_unique_elements; j++)
          if (strcmp(arg[i],lmps_unique_elements[j]) == 0) break;
       lmps_map_species_to_unique[i-1] = j;
       if (j == lmps_num_unique_elements) {
          n = strlen(arg[i]) + 1;
          lmps_unique_elements[j] = new char[n];
          strcpy(lmps_unique_elements[j],arg[i]);
          lmps_num_unique_elements++;
       }
    }
 
    // clear setflag since coeff() called once with I,J = * *
    n = atom->ntypes;
    for (int i = 1; i <= n; i++)
       for (int j = i; j <= n; j++)
          setflag[i][j] = 0;
 
    // set setflag i,j for type pairs where both are mapped to elements
    int count = 0;
    for (int i = 1; i <= n; i++)
       for (int j = i; j <= n; j++)
          if (lmps_map_species_to_unique[i] >= 0 &&
              lmps_map_species_to_unique[j] >= 0) {
             setflag[i][j] = 1;
             count++;
          }
    if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 
    return;
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairKIM::init_style()
 {
    // This is called for each "run ...", "minimize ...", etc. read from input
 
    if (domain->dimension != 3)
       error->all(FLERR,"PairKIM only works with 3D problems");
 
    // set lmps_* bool flags
    set_lmps_flags();
 
    int kimerror;
    // KIM and Model initialization (only once)
    // also sets kim_ind_* and kim_* bool flags
    if (!kim_init_ok)
    {
       kim_init();
       kimerror = pkim->model_init();
       if (kimerror != KIM_STATUS_OK)
          kim_error(__LINE__, "KIM API:model_init() failed", kimerror);
       else
       {
          kim_model_init_ok = true;
 
          // allocate enough memory to ensure we are safe
          // (by using neighbor->oneatom)
          if (kim_model_using_Rij)
            memory->create(Rij,3*(neighbor->oneatom),"pair:Rij");
       }
    }
 
    // request none, half, or full neighbor list
    // depending on KIM model requirement
 
-   int irequest = neighbor->request(this);
+   int irequest = neighbor->request(this,instance_me);
    if (kim_model_using_cluster)
    {
       neighbor->requests[irequest]->half = 0;
       neighbor->requests[irequest]->full = 0;
    }
    else
    {
       // make sure comm_reverse expects (at most) 9 values when newton is off
       if (!lmps_using_newton) comm_reverse_off = 9;
 
       if (kim_model_using_half)
       {
          neighbor->requests[irequest]->half = 1;
          neighbor->requests[irequest]->full = 0;
          // make sure half lists also include local-ghost pairs
          if (lmps_using_newton) neighbor->requests[irequest]->newton = 2;
       }
       else
       {
          neighbor->requests[irequest]->half = 0;
          neighbor->requests[irequest]->full = 1;
          // make sure full lists also include local-ghost pairs
          if (lmps_using_newton) neighbor->requests[irequest]->newton = 0;
       }
    }
 
    return;
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairKIM::init_one(int i, int j)
 {
    // This is called once of each (unordered) i,j pair for each
    // "run ...", "minimize ...", etc. read from input
 
    if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set");
 
    return kim_global_cutoff;
 }
 
 /* ---------------------------------------------------------------------- */
 
 int PairKIM::pack_reverse_comm(int n, int first, double *buf)
 {
    int i,m,last;
    double *fp;
    if (lmps_hybrid) fp = &(lmps_force_tmp[0][0]);
    else fp = &(atom->f[0][0]);
 
    m = 0;
    last = first + n;
    if ((kim_model_has_forces) && ((vflag_atom == 0) ||
                                   (!kim_model_has_particleVirial)))
    {
       for (i = first; i < last; i++)
       {
          buf[m++] = fp[3*i+0];
          buf[m++] = fp[3*i+1];
          buf[m++] = fp[3*i+2];
       }
       return m;
    }
    else if ((kim_model_has_forces) && (vflag_atom == 1) &&
             (kim_model_has_particleVirial))
    {
       double *va=&(vatom[0][0]);
       for (i = first; i < last; i++)
       {
          buf[m++] = fp[3*i+0];
          buf[m++] = fp[3*i+1];
          buf[m++] = fp[3*i+2];
 
          buf[m++] = va[6*i+0];
          buf[m++] = va[6*i+1];
          buf[m++] = va[6*i+2];
          buf[m++] = va[6*i+3];
          buf[m++] = va[6*i+4];
          buf[m++] = va[6*i+5];
       }
       return m;
    }
    else if ((!kim_model_has_forces) && (vflag_atom == 1) &&
             (kim_model_has_particleVirial))
    {
       double *va=&(vatom[0][0]);
       for (i = first; i < last; i++)
       {
          buf[m++] = va[6*i+0];
          buf[m++] = va[6*i+1];
          buf[m++] = va[6*i+2];
          buf[m++] = va[6*i+3];
          buf[m++] = va[6*i+4];
          buf[m++] = va[6*i+5];
       }
       return m;
    }
    else
       return 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairKIM::unpack_reverse_comm(int n, int *list, double *buf)
 {
    int i,j,m;
    double *fp;
    if (lmps_hybrid) fp = &(lmps_force_tmp[0][0]);
    else fp = &(atom->f[0][0]);
 
    m = 0;
    if ((kim_model_has_forces) && ((vflag_atom == 0) ||
                                   (!kim_model_has_particleVirial)))
    {
       for (i = 0; i < n; i++)
       {
          j = list[i];
          fp[3*j+0]+= buf[m++];
          fp[3*j+1]+= buf[m++];
          fp[3*j+2]+= buf[m++];
       }
    }
    else if ((kim_model_has_forces) && (vflag_atom == 1) &&
             (kim_model_has_particleVirial))
    {
       double *va=&(vatom[0][0]);
       for (i = 0; i < n; i++)
       {
          j = list[i];
          fp[3*j+0]+= buf[m++];
          fp[3*j+1]+= buf[m++];
          fp[3*j+2]+= buf[m++];
 
          va[j*6+0]+=buf[m++];
          va[j*6+1]+=buf[m++];
          va[j*6+2]+=buf[m++];
          va[j*6+3]+=buf[m++];
          va[j*6+4]+=buf[m++];
          va[j*6+5]+=buf[m++];
       }
    }
    else if ((!kim_model_has_forces) && (vflag_atom == 1) &&
             (kim_model_has_particleVirial))
    {
       double *va=&(vatom[0][0]);
       for (i = 0; i < n; i++)
       {
          j = list[i];
          va[j*6+0]+=buf[m++];
          va[j*6+1]+=buf[m++];
          va[j*6+2]+=buf[m++];
          va[j*6+3]+=buf[m++];
          va[j*6+4]+=buf[m++];
          va[j*6+5]+=buf[m++];
       }
    }
    else
       ;// do nothing
 
    return;
 }
 
 /* ----------------------------------------------------------------------
    memory usage of local atom-based arrays
 ------------------------------------------------------------------------- */
 
 double PairKIM::memory_usage()
 {
    double bytes = lmps_maxalloc * sizeof(int);
    return bytes;
 }
 
 /* ----------------------------------------------------------------------
    KIM-specific interface
 ------------------------------------------------------------------------- */
 
 void PairKIM::kim_error(int ln, const char* msg, int errcode)
 {
    if (errcode == KIM_STATUS_OK) return;
    KIM_API_model::report_error(ln,(char *) __FILE__, (char *) msg,errcode);
    error->all(__FILE__,ln,"Internal KIM error");
 
    return;
 }
 
 /* ---------------------------------------------------------------------- */
 
 int PairKIM::get_neigh(void **kimmdl,int *mode,int *request,
                        int *atom, int *numnei, int **nei1atom, double **pRij)
 {
    KIM_API_model *pkim = (KIM_API_model *) *kimmdl;
 
    int kimerror;
    PairKIM *self = (PairKIM *) pkim->get_sim_buffer(&kimerror);
 
    if (self->kim_model_using_Rij) {
       *pRij = &(self->Rij[0]);
    } else {
       *pRij = 0;
    }
 
 
    // subvert KIM api by using direct access to self->list
    //
    // get neighObj from KIM API obj
    // NeighList * neiobj = (NeighList * )
    // (*pkim).get_data_by_index(self->kim_ind_neighObject, &kimerror);
    NeighList * neiobj = self->list;
 
    // subvert KIM api by using direct acces to self->lmps_local_tot_num_atoms
    //
    //int * pnAtoms = (int *)
    // (*pkim).get_data_by_index(self->kim_ind_numberOfParticles, &kimerror);
    //int nAtoms = *pnAtoms;
    int nAtoms = self->lmps_local_tot_num_atoms;
 
    int j, jj, inum, *ilist, *numneigh, **firstneigh;
    inum = neiobj->inum;             //# of I atoms neighbors are stored for
    ilist = neiobj->ilist;           //local indices of I atoms
    numneigh = neiobj->numneigh;     // # of J neighbors for each I atom
    firstneigh = neiobj->firstneigh; // ptr to 1st J int value of each I atom
 
    if (*mode==0){ //iterator mode
       if (*request==1) { //increment iterator
          if (self->kim_iterator_position < inum) {
             *atom = ilist[self->kim_iterator_position];
             *numnei = numneigh[*atom];
 
             // strip off neighbor mask for molecular systems
             if (!self->lmps_using_molecular)
                *nei1atom = firstneigh[*atom];
             else
             {
                int n = *numnei;
                int *ptr = firstneigh[*atom];
                int *lmps_stripped_neigh_list = self->lmps_stripped_neigh_list;
                for (int i = 0; i < n; i++)
                   lmps_stripped_neigh_list[i] = *(ptr++) & NEIGHMASK;
                *nei1atom = lmps_stripped_neigh_list;
             }
 
             // set Rij if needed
             if (self->kim_model_using_Rij) {
                double* x = (double *)
                  (*pkim).get_data_by_index(self->kim_ind_coordinates,
                                            &kimerror);
                for (jj=0; jj < *numnei; jj++) {
                   int i = *atom;
                   j = (*nei1atom)[jj];
                   self->Rij[jj*3 +0] = -x[i*3+0] + x[j*3+0];
                   self->Rij[jj*3 +1] = -x[i*3+1] + x[j*3+1];
                   self->Rij[jj*3 +2] = -x[i*3+2] + x[j*3+2];
                }
             }
 
             // increment iterator
             self->kim_iterator_position++;
 
             return KIM_STATUS_OK; //successful increment
          } else if (self->kim_iterator_position == inum) {
             *numnei = 0;
             return KIM_STATUS_NEIGH_ITER_PAST_END; //reached end by iterator
          } else if (self->kim_iterator_position > inum || inum < 0){
             self->error->one(FLERR, "KIM neighbor iterator exceeded range");
          }
       } else if (*request == 0){ //restart iterator
          self->kim_iterator_position = 0;
          *numnei = 0;
          return KIM_STATUS_NEIGH_ITER_INIT_OK; //succsesful restart
       }
    } else if (*mode == 1){//locator mode
       //...
       if (*request < inum) {
          *atom = *request;
          *numnei = numneigh[*atom];
 
          // strip off neighbor mask for molecular systems
          if (!self->lmps_using_molecular)
             *nei1atom = firstneigh[*atom];
          else
          {
             int n = *numnei;
             int *ptr = firstneigh[*atom];
             int *lmps_stripped_neigh_list = self->lmps_stripped_neigh_list;
             for (int i = 0; i < n; i++)
                lmps_stripped_neigh_list[i] = *(ptr++) & NEIGHMASK;
             *nei1atom = lmps_stripped_neigh_list;
          }
 
          // set Rij if needed
          if (self->kim_model_using_Rij){
             double* x = (double *)
               (*pkim).get_data_by_index(self->kim_ind_coordinates, &kimerror);
             for(int jj=0; jj < *numnei; jj++){
                int i = *atom;
                int j = (*nei1atom)[jj];
                self->Rij[jj*3 +0] = -x[i*3+0] + x[j*3+0];
                self->Rij[jj*3 +1] = -x[i*3+1] + x[j*3+1];
                self->Rij[jj*3 +2] = -x[i*3+2] + x[j*3+2];
             }
          }
          return KIM_STATUS_OK; //successful end
       }
       else if (*request >= nAtoms || inum < 0)
          return KIM_STATUS_NEIGH_INVALID_REQUEST;
       else if (*request >= inum) {
          *atom = *request;
          *numnei = 0;
          return KIM_STATUS_OK; //successfull but no neighbors in the list
       }
    } else return KIM_STATUS_NEIGH_INVALID_MODE; //invalid mode
 
    return -16; //should not get here: unspecified error
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairKIM::kim_free()
 {
    int kimerror;
 
    if (kim_model_init_ok)
    {
       kimerror = pkim->model_destroy();
       kim_model_init_ok = false;
    }
    if (kim_init_ok)
    {
       pkim->free(&kimerror);
       kim_init_ok = false;
    }
    if (pkim != 0)
    {
       delete pkim;
       pkim = 0;
    }
    if (kim_particle_codes_ok)
    {
       delete [] kim_particle_codes;
       kim_particle_codes = 0;
       kim_particle_codes_ok = false;
    }
 
    return;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairKIM::kim_init()
 {
    int kimerror;
 
    // determine KIM Model capabilities (used in this function below)
    set_kim_model_has_flags();
 
    // create appropriate KIM descriptor file
    char* test_descriptor_string = 0;
    // allocate memory for test_descriptor_string and write descriptor file
    write_descriptor(&test_descriptor_string);
 
    // initialize KIM model
    pkim = new KIM_API_model();
    kimerror = pkim->string_init(test_descriptor_string, kim_modelname);
    if (kimerror != KIM_STATUS_OK)
       kim_error(__LINE__,"KIM initialization failed", kimerror);
    else
    {
       kim_init_ok = true;
       delete [] test_descriptor_string;
       test_descriptor_string = 0;
    }
 
    // determine kim_model_using_* true/false values
    //
    // check for half or full list
    kim_model_using_half = (pkim->is_half_neighbors(&kimerror));
    //
    const char* NBC_method;
    kimerror = pkim->get_NBC_method(&NBC_method);
    kim_error(__LINE__,"NBC method not set",kimerror);
    // check for CLUSTER mode
    kim_model_using_cluster = (strcmp(NBC_method,"CLUSTER")==0);
    // check if Rij needed for get_neigh
    kim_model_using_Rij = ((strcmp(NBC_method,"NEIGH_RVEC_H")==0) ||
                           (strcmp(NBC_method,"NEIGH_RVEC_F")==0));
 
    // get correct index of each variable in kim_api object
    pkim->getm_index(&kimerror, 3*13,
     "coordinates", &kim_ind_coordinates, 1,
     "cutoff", &kim_ind_cutoff, 1,
     "numberOfParticles", &kim_ind_numberOfParticles, 1,
 #if KIM_API_VERSION_MAJOR == 1 && KIM_API_VERSON_MINOR == 5
     "numberParticleTypes", &kim_ind_numberOfSpecies, 1,
     "particleTypes", &kim_ind_particleSpecies, 1,
 #else
     "numberOfSpecies", &kim_ind_numberOfSpecies, 1,
     "particleSpecies", &kim_ind_particleSpecies, 1,
 #endif
     "numberContributingParticles", &kim_ind_numberContributingParticles,
                                    kim_model_using_half,
     "particleEnergy", &kim_ind_particleEnergy,
                       (int) kim_model_has_particleEnergy,
     "energy", &kim_ind_energy, (int) kim_model_has_energy,
     "forces", &kim_ind_forces, (int) kim_model_has_forces,
     "neighObject", &kim_ind_neighObject, (int) !kim_model_using_cluster,
     "get_neigh", &kim_ind_get_neigh, (int) !kim_model_using_cluster,
     "particleVirial", &kim_ind_particleVirial,
                       (int) kim_model_has_particleVirial,
     "virial", &kim_ind_virial, no_virial_fdotr_compute);
    kim_error(__LINE__,"getm_index",kimerror);
 
    // setup mapping between LAMMPS unique elements and KIM species codes
    kim_particle_codes = new int[lmps_num_unique_elements];
    kim_particle_codes_ok = true;
    for(int i = 0; i < lmps_num_unique_elements; i++){
       int kimerror;
       kim_particle_codes[i]
          = pkim->get_species_code(lmps_unique_elements[i], &kimerror);
       kim_error(__LINE__, "create_kim_particle_codes: symbol not found ",
                 kimerror);
    }
 
    // set pointer values in KIM API object that will not change during run
    set_statics();
 
    return;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairKIM::set_statics()
 {
    // set total number of atoms
    lmps_local_tot_num_atoms = (int) (atom->nghost + atom->nlocal);
 
    int kimerror;
    pkim->setm_data_by_index(&kimerror, 4*6,
     kim_ind_numberOfSpecies, 1, (void *) &(atom->ntypes), 1,
     kim_ind_cutoff, 1, (void *) &(kim_global_cutoff), 1,
     kim_ind_numberOfParticles, 1, (void *) &lmps_local_tot_num_atoms,  1,
     kim_ind_numberContributingParticles, 1, (void *) &(atom->nlocal),
                                          (int) kim_model_using_half,
     kim_ind_energy, 1, (void *) &(eng_vdwl), (int) kim_model_has_energy,
     kim_ind_virial, 1, (void *) &(virial[0]), no_virial_fdotr_compute);
    kim_error(__LINE__, "setm_data_by_index", kimerror);
    if (!kim_model_using_cluster)
    {
       kimerror = pkim->set_method_by_index(kim_ind_get_neigh, 1,
                                            (func_ptr) &get_neigh);
       kim_error(__LINE__, "set_method_by_index", kimerror);
    }
 
    pkim->set_sim_buffer((void *)this, &kimerror);
    kim_error(__LINE__, "set_sim_buffer", kimerror);
 
    return;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairKIM::set_volatiles()
 {
    int kimerror;
    lmps_local_tot_num_atoms = (int) (atom->nghost + atom->nlocal);
    intptr_t nall = (intptr_t) lmps_local_tot_num_atoms;
 
    pkim->setm_data_by_index(&kimerror, 4*2,
     kim_ind_coordinates, 3*nall, (void*) &(atom->x[0][0]), 1,
     kim_ind_particleSpecies, nall, (void*) kim_particleSpecies, 1);
    kim_error(__LINE__, "setm_data_by_index", kimerror);
 
    if (kim_model_has_particleEnergy && (eflag_atom == 1))
    {
       kimerror = pkim->set_data_by_index(kim_ind_particleEnergy, nall,
                                          (void*) eatom);
       kim_error(__LINE__, "set_data_by_index", kimerror);
    }
 
    if (kim_model_has_particleVirial && (vflag_atom == 1))
    {
       kimerror = pkim->set_data_by_index(kim_ind_particleVirial, 6*nall,
                                          (void*) &(vatom[0][0]));
       kim_error(__LINE__, "set_data_by_index", kimerror);
    }
 
    if (kim_model_has_forces)
    {
       if (lmps_hybrid)
          kimerror = pkim->set_data_by_index(kim_ind_forces, nall*3,
                                             (void*) &(lmps_force_tmp[0][0]));
       else
          kimerror = pkim->set_data_by_index(kim_ind_forces, nall*3,
                                             (void*) &(atom->f[0][0]));
       kim_error(__LINE__, "setm_data_by_index", kimerror);
    }
 
    // subvert the KIM api by direct access to this->list in get_neigh
    //
    //if (!kim_model_using_cluster)
    //   kimerror = pkim->set_data_by_index(kim_ind_neighObject, 1,
    //                                      (void*) this->list);
 
    if (kim_model_has_particleVirial)
    {
       if(vflag_atom != 1) {
          pkim->set_compute_by_index(kim_ind_particleVirial, KIM_COMPUTE_FALSE,
                                     &kimerror);
       } else {
          pkim->set_compute_by_index(kim_ind_particleVirial, KIM_COMPUTE_TRUE,
                                     &kimerror);
       }
    }
 
    if (no_virial_fdotr_compute == 1)
    {
       pkim->set_compute_by_index(kim_ind_virial,
        ((vflag_global != 1) ? KIM_COMPUTE_FALSE : KIM_COMPUTE_TRUE),
        &kimerror);
    }
 
    return;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairKIM::set_lmps_flags()
 {
    // determint if newton is on or off
    lmps_using_newton = (force->newton_pair == 1);
 
    // setup lmps_stripped_neigh_list for neighbors of one atom, if needed
    lmps_using_molecular = (atom->molecular > 0);
    if (lmps_using_molecular) {
       memory->destroy(lmps_stripped_neigh_list);
       memory->create(lmps_stripped_neigh_list,neighbor->oneatom,
                      "pair:lmps_stripped_neigh_list");
    }
 
    // determine if running with pair hybrid
    lmps_hybrid = (force->pair_match("hybrid",0));
 
    // support cluster mode if everything is just right
    lmps_support_cluster = ((domain->xperiodic == 0 &&
                             domain->yperiodic == 0 &&
                             domain->zperiodic == 0
                            )
                            &&
                            (comm->nprocs == 1)
                           );
 
    // determine unit system and set lmps_units flag
    if ((strcmp(update->unit_style,"real")==0))
       lmps_units = REAL;
    else if ((strcmp(update->unit_style,"metal")==0))
       lmps_units = METAL;
    else if ((strcmp(update->unit_style,"si")==0))
       lmps_units = SI;
    else if ((strcmp(update->unit_style,"cgs")==0))
       lmps_units = CGS;
    else if ((strcmp(update->unit_style,"electron")==0))
       lmps_units = ELECTRON;
    else if ((strcmp(update->unit_style,"lj")==0))
       error->all(FLERR,"LAMMPS unit_style lj not supported by KIM models");
    else
       error->all(FLERR,"Unknown unit_style");
 
    return;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairKIM::set_kim_model_has_flags()
 {
    KIM_API_model mdl;
 
    int kimerror;
 
    // get KIM API object representing the KIM Model only
    kimerror = mdl.model_info(kim_modelname);
    kim_error(__LINE__,"KIM initialization failed", kimerror);
 
    // determine if the KIM Model can compute the total energy
    mdl.get_index((char*) "energy", &kimerror);
    kim_model_has_energy = (kimerror == KIM_STATUS_OK);
    if (!kim_model_has_energy)
      error->warning(FLERR,"KIM Model does not provide `energy'; "
                     "Potential energy will be zero");
 
    // determine if the KIM Model can compute the forces
    mdl.get_index((char*) "forces", &kimerror);
    kim_model_has_forces = (kimerror == KIM_STATUS_OK);
    if (!kim_model_has_forces)
      error->warning(FLERR,"KIM Model does not provide `forces'; "
                     "Forces will be zero");
 
    // determine if the KIM Model can compute the particleEnergy
    mdl.get_index((char*) "particleEnergy", &kimerror);
    kim_model_has_particleEnergy = (kimerror == KIM_STATUS_OK);
    if (!kim_model_has_particleEnergy)
      error->warning(FLERR,"KIM Model does not provide `particleEnergy'; "
                     "energy per atom will be zero");
 
    // determine if the KIM Model can compute the particleVerial
    mdl.get_index((char*) "particleVirial", &kimerror);
    kim_model_has_particleVirial = (kimerror == KIM_STATUS_OK);
    mdl.get_index((char*) "process_dEdr", &kimerror);
    kim_model_has_particleVirial = kim_model_has_particleVirial ||
      (kimerror == KIM_STATUS_OK);
    if (!kim_model_has_particleVirial)
      error->warning(FLERR,"KIM Model does not provide `particleVirial'; "
                     "virial per atom will be zero");
 
    // tear down KIM API object
    mdl.free(&kimerror);
    // now destructor will do the remaining tear down for mdl
 
    return;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairKIM::write_descriptor(char** test_descriptor_string)
 {
    // allocate memory
    if (*test_descriptor_string != 0)
      error->all(FLERR, "Test_descriptor_string already allocated");
    // assuming 75 lines at 100 characters each (should be plenty)
    *test_descriptor_string = new char[100*75];
    // initialize
    strcpy(*test_descriptor_string, "");
 
    // Write Test name and units
    strcat(*test_descriptor_string,
       "# This file is automatically generated from LAMMPS pair_style "
       "PairKIM command\n"
       "\n"
 #if KIM_API_VERSION_MAJOR == 1 && KIM_API_VERSON_MINOR == 5
 #else
       "KIM_API_Version := 1.6.0\n\n"
 #endif
       "# Base units\n");
    switch (lmps_units)
    {
       case REAL:
          strcat(*test_descriptor_string,
       "Unit_length      := A\n"
       "Unit_energy      := kcal/mol\n"
       "Unit_charge      := e\n"
       "Unit_temperature := K\n"
       "Unit_time        := fs\n\n");
       break;
       case METAL:
          strcat(*test_descriptor_string,
       "Unit_length      := A\n"
       "Unit_energy      := eV\n"
       "Unit_charge      := e\n"
       "Unit_temperature := K\n"
       "Unit_time        := ps\n\n");
       break;
       case SI:
          strcat(*test_descriptor_string,
       "Unit_length      := m\n"
       "Unit_energy      := J\n"
       "Unit_charge      := C\n"
       "Unit_temperature := K\n"
       "Unit_time        := s\n\n");
       break;
       case CGS:
          strcat(*test_descriptor_string,
       "Unit_length      := cm\n"
       "Unit_energy      := erg\n"
       "Unit_charge      := statC\n"
       "Unit_temperature := K\n"
       "Unit_time        := s\n\n");
       break;
       case ELECTRON:
          strcat(*test_descriptor_string,
       "Unit_length      := Bohr\n"
       "Unit_energy      := Hartree\n"
       "Unit_charge      := e\n"
       "Unit_temperature := K\n"
       "Unit_time        := fs\n\n");
       break;
    }
 
    // Write Supported species section
    strcat(*test_descriptor_string,
       "\n"
 #if KIM_API_VERSION_MAJOR == 1 && KIM_API_VERSON_MINOR == 5
       "SUPPORTED_ATOM/PARTICLES_TYPES:\n"
 #else
       "PARTICLE_SPECIES:\n"
 #endif
       "# Symbol/name           Type            code\n\n");
    int code=1;
    char* tmp_line = 0;
    tmp_line = new char[100];
    for (int i=0; i < lmps_num_unique_elements; i++){
       sprintf(tmp_line, "%-24s%-16s%-3i\n", lmps_unique_elements[i],
               "spec", code++);
       strcat(*test_descriptor_string, tmp_line);
    }
    delete [] tmp_line;
    tmp_line = 0;
    strcat(*test_descriptor_string, "\n");
 
    // Write conventions section
    strcat(*test_descriptor_string,
       "\n"
       "CONVENTIONS:\n"
       "# Name                  Type\n\n"
       "ZeroBasedLists          flag\n");
    // can use iterator or locator neighbor mode, unless in hybrid mode
    if (lmps_hybrid)
       strcat(*test_descriptor_string,
       "Neigh_IterAccess        flag\n");
    else
       strcat(*test_descriptor_string,
       "Neigh_BothAccess        flag\n\n");
 
    strcat(*test_descriptor_string,
       "NEIGH_PURE_H            flag\n"
       "NEIGH_PURE_F            flag\n"
       "NEIGH_RVEC_H            flag\n"
       "NEIGH_RVEC_F            flag\n");
    // @@ add code for MI_OPBC_? support ????
    if (lmps_support_cluster)
    {
       strcat(*test_descriptor_string,
       "CLUSTER                 flag\n\n");
    }
    else
    {
       strcat(*test_descriptor_string, "\n");
    }
 
    // Write input section
    strcat(*test_descriptor_string,
       "\n"
       "MODEL_INPUT:\n"
       "# Name                         Type         Unit    Shape\n\n"
       "numberOfParticles              integer      none    []\n\n"
       "numberContributingParticles    integer      none    []\n\n"
 #if KIM_API_VERSION_MAJOR == 1 && KIM_API_VERSON_MINOR == 5
       "numberParticleTypes            integer      none    []\n\n"
       "particleTypes                  integer      none    "
 #else
       "numberOfSpecies                integer      none    []\n\n"
       "particleSpecies                integer      none    "
 #endif
       "[numberOfParticles]\n\n"
       "coordinates                    double       length  "
       "[numberOfParticles,3]\n\n"
       "neighObject                    pointer      none    []\n\n"
       "get_neigh                      method       none    []\n\n");
 
    // Write output section
    strcat(*test_descriptor_string,
       "\n"
       "MODEL_OUPUT:\n"
       "# Name                         Type         Unit    Shape\n\n"
       "compute                        method       none    []\n\n"
       "destroy                        method       none    []\n\n"
       "cutoff                         double       length  []\n\n");
    if (kim_model_has_energy) strcat(*test_descriptor_string,
       "energy                         double       energy  []\n\n");
    if (kim_model_has_forces) strcat(*test_descriptor_string,
       "forces                         double       force   "
        "[numberOfParticles,3]\n\n");
    if (kim_model_has_particleEnergy) strcat(*test_descriptor_string,
       "particleEnergy                 double       energy  "
        "[numberOfParticles]\n\n");
    if (no_virial_fdotr_compute == 1) strcat(*test_descriptor_string,
       "virial                         double       energy  [6] \n\n");
    if (kim_model_has_particleVirial) strcat(*test_descriptor_string,
       "particleVirial                 double       energy  "
        "[numberOfParticles,6] \n\n");
 
    return;
 }
diff --git a/src/KOKKOS/pair_table_kokkos.cpp b/src/KOKKOS/pair_table_kokkos.cpp
index c8c2f5090..dfd6787c9 100644
--- a/src/KOKKOS/pair_table_kokkos.cpp
+++ b/src/KOKKOS/pair_table_kokkos.cpp
@@ -1,1382 +1,1382 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Paul Crozier (SNL)
 ------------------------------------------------------------------------- */
 
 #include "mpi.h"
 #include "math.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_table_kokkos.h"
 #include "kokkos.h"
 #include "atom.h"
 #include "force.h"
 #include "comm.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "memory.h"
 #include "error.h"
 #include "atom_masks.h"
 
 using namespace LAMMPS_NS;
 
 enum{NONE,RLINEAR,RSQ,BMP};
 enum{FULL,HALFTHREAD,HALF};
 
 #define MAXLINE 1024
 
 /* ---------------------------------------------------------------------- */
 
 template<class DeviceType>
 PairTableKokkos<DeviceType>::PairTableKokkos(LAMMPS *lmp) : Pair(lmp)
 {
   update_table = 0;
   atomKK = (AtomKokkos *) atom;
   ntables = 0;
   tables = NULL;
   execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
   datamask_read = X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK;
   datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK;
   h_table = new TableHost();
   d_table = new TableDevice();
 }
 
 /* ---------------------------------------------------------------------- */
 
 template<class DeviceType>
 PairTableKokkos<DeviceType>::~PairTableKokkos()
 {
 /*  for (int m = 0; m < ntables; m++) free_table(&tables[m]);
   memory->sfree(tables);
 
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
     memory->destroy(tabindex);
   }*/
   delete h_table;
   delete d_table;
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 template<class DeviceType>
 void PairTableKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
 {
   if(update_table)
     create_kokkos_tables();
   if(tabstyle == LOOKUP)
     compute_style<LOOKUP>(eflag_in,vflag_in);
   if(tabstyle == LINEAR)
     compute_style<LINEAR>(eflag_in,vflag_in);
   if(tabstyle == SPLINE)
     compute_style<SPLINE>(eflag_in,vflag_in);
   if(tabstyle == BITMAP)
     compute_style<BITMAP>(eflag_in,vflag_in);
 }
 
 template<class DeviceType>
 template<int TABSTYLE>
 void PairTableKokkos<DeviceType>::compute_style(int eflag_in, int vflag_in)
 {
   eflag = eflag_in;
   vflag = vflag_in;
 
   if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
 
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   atomKK->sync(execution_space,datamask_read);
   //k_cutsq.template sync<DeviceType>();
   //k_params.template sync<DeviceType>();
   if (eflag || vflag) atomKK->modified(execution_space,datamask_modify);
   else atomKK->modified(execution_space,F_MASK);
 
   x = c_x = atomKK->k_x.view<DeviceType>();
   f = atomKK->k_f.view<DeviceType>();
   type = atomKK->k_type.view<DeviceType>();
   nlocal = atom->nlocal;
   nall = atom->nlocal + atom->nghost;
   special_lj[0] = force->special_lj[0];
   special_lj[1] = force->special_lj[1];
   special_lj[2] = force->special_lj[2];
   special_lj[3] = force->special_lj[3];
   newton_pair = force->newton_pair;
   d_cutsq = d_table->cutsq;
   // loop over neighbors of my atoms
 
   EV_FLOAT ev;
   if(atom->ntypes > MAX_TYPES_STACKPARAMS) {
     if (neighflag == FULL) {
       PairComputeFunctor<PairTableKokkos<DeviceType>,FULL,false,S_TableCompute<DeviceType,TABSTYLE> >
         ff(this,(NeighListKokkos<DeviceType>*) list);
       if (eflag || vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
       else Kokkos::parallel_for(list->inum,ff);
     } else if (neighflag == HALFTHREAD) {
       PairComputeFunctor<PairTableKokkos<DeviceType>,HALFTHREAD,false,S_TableCompute<DeviceType,TABSTYLE> >
         ff(this,(NeighListKokkos<DeviceType>*) list);
       if (eflag || vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
       else Kokkos::parallel_for(list->inum,ff);
     } else if (neighflag == HALF) {
       PairComputeFunctor<PairTableKokkos<DeviceType>,HALF,false,S_TableCompute<DeviceType,TABSTYLE> >
         f(this,(NeighListKokkos<DeviceType>*) list);
       if (eflag || vflag) Kokkos::parallel_reduce(list->inum,f,ev);
       else Kokkos::parallel_for(list->inum,f);
     } else if (neighflag == N2) {
       PairComputeFunctor<PairTableKokkos<DeviceType>,N2,false,S_TableCompute<DeviceType,TABSTYLE> >
         f(this,(NeighListKokkos<DeviceType>*) list);
       if (eflag || vflag) Kokkos::parallel_reduce(nlocal,f,ev);
       else Kokkos::parallel_for(nlocal,f);
     } else if (neighflag == FULLCLUSTER) {
       typedef PairComputeFunctor<PairTableKokkos<DeviceType>,FULLCLUSTER,false,S_TableCompute<DeviceType,TABSTYLE> >
         f_type;
       f_type f(this,(NeighListKokkos<DeviceType>*) list);
       #ifdef KOKKOS_HAVE_CUDA
         const int teamsize = Kokkos::Impl::is_same<typename f_type::device_type, Kokkos::Cuda>::value ? 256 : 1;
       #else
         const int teamsize = 1;
       #endif
       const int nteams = (list->inum*f_type::vectorization::increment+teamsize-1)/teamsize;
       Kokkos::TeamPolicy<DeviceType> config(nteams,teamsize);
       if (eflag || vflag) Kokkos::parallel_reduce(config,f,ev);
       else Kokkos::parallel_for(config,f);
     }
   } else {
     if (neighflag == FULL) {
       PairComputeFunctor<PairTableKokkos<DeviceType>,FULL,true,S_TableCompute<DeviceType,TABSTYLE> >
         f(this,(NeighListKokkos<DeviceType>*) list);
       if (eflag || vflag) Kokkos::parallel_reduce(list->inum,f,ev);
       else Kokkos::parallel_for(list->inum,f);
     } else if (neighflag == HALFTHREAD) {
       PairComputeFunctor<PairTableKokkos<DeviceType>,HALFTHREAD,true,S_TableCompute<DeviceType,TABSTYLE> >
         f(this,(NeighListKokkos<DeviceType>*) list);
       if (eflag || vflag) Kokkos::parallel_reduce(list->inum,f,ev);
       else Kokkos::parallel_for(list->inum,f);
     } else if (neighflag == HALF) {
       PairComputeFunctor<PairTableKokkos<DeviceType>,HALF,true,S_TableCompute<DeviceType,TABSTYLE> >
         f(this,(NeighListKokkos<DeviceType>*) list);
       if (eflag || vflag) Kokkos::parallel_reduce(list->inum,f,ev);
       else Kokkos::parallel_for(list->inum,f);
     } else if (neighflag == N2) {
       PairComputeFunctor<PairTableKokkos<DeviceType>,N2,true,S_TableCompute<DeviceType,TABSTYLE> >
         f(this,(NeighListKokkos<DeviceType>*) list);
       if (eflag || vflag) Kokkos::parallel_reduce(nlocal,f,ev);
       else Kokkos::parallel_for(nlocal,f);
     } else if (neighflag == FULLCLUSTER) {
       typedef PairComputeFunctor<PairTableKokkos<DeviceType>,FULLCLUSTER,true,S_TableCompute<DeviceType,TABSTYLE> >
         f_type;
       f_type f(this,(NeighListKokkos<DeviceType>*) list);
       #ifdef KOKKOS_HAVE_CUDA
         const int teamsize = Kokkos::Impl::is_same<typename f_type::device_type, Kokkos::Cuda>::value ? 256 : 1;
       #else
         const int teamsize = 1;
       #endif
       const int nteams = (list->inum*f_type::vectorization::increment+teamsize-1)/teamsize;
       Kokkos::TeamPolicy<DeviceType> config(nteams,teamsize);
       if (eflag || vflag) Kokkos::parallel_reduce(config,f,ev);
       else Kokkos::parallel_for(config,f);
     }
   }
   DeviceType::fence();
 
   if (eflag) eng_vdwl += ev.evdwl;
   if (vflag_global) {
     virial[0] += ev.v[0];
     virial[1] += ev.v[1];
     virial[2] += ev.v[2];
     virial[3] += ev.v[3];
     virial[4] += ev.v[4];
     virial[5] += ev.v[5];
   }
 
   if (vflag_fdotr) pair_virial_fdotr_compute(this);
 }
 
 template<class DeviceType>
 template<bool STACKPARAMS, class Specialisation>
 KOKKOS_INLINE_FUNCTION
 F_FLOAT PairTableKokkos<DeviceType>::
 compute_fpair(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const {
   (void) i;
   (void) j;
   union_int_float_t rsq_lookup;
   double fpair;
   const int tidx = d_table_const.tabindex(itype,jtype);
   //const Table* const tb = &tables[tabindex[itype][jtype]];
 
   //if (rsq < d_table_const.innersq(tidx))
   //  error->one(FLERR,"Pair distance < table inner cutoff");
 
   if (Specialisation::TabStyle == LOOKUP) {
     const int itable = static_cast<int> ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx));
     //if (itable >= tlm1)
     //  error->one(FLERR,"Pair distance > table outer cutoff");
     fpair = d_table_const.f(tidx,itable);
   } else if (Specialisation::TabStyle == LINEAR) {
     const int itable = static_cast<int> ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx));
     //if (itable >= tlm1)
     //  error->one(FLERR,"Pair distance > table outer cutoff");
     const double fraction = (rsq - d_table_const.rsq(tidx,itable)) * d_table_const.invdelta(tidx);
     fpair = d_table_const.f(tidx,itable) + fraction*d_table_const.df(tidx,itable);
   } else if (Specialisation::TabStyle == SPLINE) {
     const int itable = static_cast<int> ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx));
     //if (itable >= tlm1)
     //  error->one(FLERR,"Pair distance > table outer cutoff");
     const double b = (rsq - d_table_const.rsq(tidx,itable)) * d_table_const.invdelta(tidx);
     const double a = 1.0 - b;
     fpair = a * d_table_const.f(tidx,itable) + b * d_table_const.f(tidx,itable+1) +
       ((a*a*a-a)*d_table_const.f2(tidx,itable) + (b*b*b-b)*d_table_const.f2(tidx,itable+1)) *
       d_table_const.deltasq6(tidx);
   } else {
     rsq_lookup.f = rsq;
     int itable = rsq_lookup.i & d_table_const.nmask(tidx);
     itable >>= d_table_const.nshiftbits(tidx);
     const double fraction = (rsq_lookup.f - d_table_const.rsq(tidx,itable)) * d_table_const.drsq(tidx,itable);
     fpair = d_table_const.f(tidx,itable) + fraction*d_table_const.df(tidx,itable);
   }
   return fpair;
 }
 
 template<class DeviceType>
 template<bool STACKPARAMS, class Specialisation>
 KOKKOS_INLINE_FUNCTION
 F_FLOAT PairTableKokkos<DeviceType>::
 compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j, const int& itype, const int& jtype) const {
   (void) i;
   (void) j;
   double evdwl;
   union_int_float_t rsq_lookup;
   const int tidx = d_table_const.tabindex(itype,jtype);
   //const Table* const tb = &tables[tabindex[itype][jtype]];
 
   //if (rsq < d_table_const.innersq(tidx))
   //  error->one(FLERR,"Pair distance < table inner cutoff");
 
   if (Specialisation::TabStyle == LOOKUP) {
     const int itable = static_cast<int> ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx));
     //if (itable >= tlm1)
     //  error->one(FLERR,"Pair distance > table outer cutoff");
     evdwl = d_table_const.e(tidx,itable);
   } else if (Specialisation::TabStyle == LINEAR) {
     const int itable = static_cast<int> ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx));
     //if (itable >= tlm1)
     //  error->one(FLERR,"Pair distance > table outer cutoff");
     const double fraction = (rsq - d_table_const.rsq(tidx,itable)) * d_table_const.invdelta(tidx);
     evdwl = d_table_const.e(tidx,itable) + fraction*d_table_const.de(tidx,itable);
   } else if (Specialisation::TabStyle == SPLINE) {
     const int itable = static_cast<int> ((rsq - d_table_const.innersq(tidx)) * d_table_const.invdelta(tidx));
     //if (itable >= tlm1)
     //  error->one(FLERR,"Pair distance > table outer cutoff");
     const double b = (rsq - d_table_const.rsq(tidx,itable)) * d_table_const.invdelta(tidx);
     const double a = 1.0 - b;
     evdwl = a * d_table_const.e(tidx,itable) + b * d_table_const.e(tidx,itable+1) +
         ((a*a*a-a)*d_table_const.e2(tidx,itable) + (b*b*b-b)*d_table_const.e2(tidx,itable+1)) *
         d_table_const.deltasq6(tidx);
   } else {
     rsq_lookup.f = rsq;
     int itable = rsq_lookup.i & d_table_const.nmask(tidx);
     itable >>= d_table_const.nshiftbits(tidx);
     const double fraction = (rsq_lookup.f - d_table_const.rsq(tidx,itable)) * d_table_const.drsq(tidx,itable);
     evdwl = d_table_const.e(tidx,itable) + fraction*d_table_const.de(tidx,itable);
   }
   return evdwl;
 }
 
 template<class DeviceType>
 void PairTableKokkos<DeviceType>::create_kokkos_tables()
 {
   const int tlm1 = tablength-1;
 
   memory->create_kokkos(d_table->nshiftbits,h_table->nshiftbits,ntables,"Table::nshiftbits");
   memory->create_kokkos(d_table->nmask,h_table->nmask,ntables,"Table::nmask");
   memory->create_kokkos(d_table->innersq,h_table->innersq,ntables,"Table::innersq");
   memory->create_kokkos(d_table->invdelta,h_table->invdelta,ntables,"Table::invdelta");
   memory->create_kokkos(d_table->deltasq6,h_table->deltasq6,ntables,"Table::deltasq6");
 
   if(tabstyle == LOOKUP) {
     memory->create_kokkos(d_table->e,h_table->e,ntables,tlm1,"Table::e");
     memory->create_kokkos(d_table->f,h_table->f,ntables,tlm1,"Table::f");
   }
 
   if(tabstyle == LINEAR) {
     memory->create_kokkos(d_table->rsq,h_table->rsq,ntables,tablength,"Table::rsq");
     memory->create_kokkos(d_table->e,h_table->e,ntables,tablength,"Table::e");
     memory->create_kokkos(d_table->f,h_table->f,ntables,tablength,"Table::f");
     memory->create_kokkos(d_table->de,h_table->de,ntables,tlm1,"Table::de");
     memory->create_kokkos(d_table->df,h_table->df,ntables,tlm1,"Table::df");
   }
 
   if(tabstyle == SPLINE) {
     memory->create_kokkos(d_table->rsq,h_table->rsq,ntables,tablength,"Table::rsq");
     memory->create_kokkos(d_table->e,h_table->e,ntables,tablength,"Table::e");
     memory->create_kokkos(d_table->f,h_table->f,ntables,tablength,"Table::f");
     memory->create_kokkos(d_table->e2,h_table->e2,ntables,tablength,"Table::e2");
     memory->create_kokkos(d_table->f2,h_table->f2,ntables,tablength,"Table::f2");
   }
 
   if(tabstyle == BITMAP) {
     int ntable = 1 << tablength;
     memory->create_kokkos(d_table->rsq,h_table->rsq,ntables,ntable,"Table::rsq");
     memory->create_kokkos(d_table->e,h_table->e,ntables,ntable,"Table::e");
     memory->create_kokkos(d_table->f,h_table->f,ntables,ntable,"Table::f");
     memory->create_kokkos(d_table->de,h_table->de,ntables,ntable,"Table::de");
     memory->create_kokkos(d_table->df,h_table->df,ntables,ntable,"Table::df");
     memory->create_kokkos(d_table->drsq,h_table->drsq,ntables,ntable,"Table::drsq");
   }
 
 
 
   for(int i=0; i < ntables; i++) {
     Table* tb = &tables[i];
 
     h_table->nshiftbits[i] = tb->nshiftbits;
     h_table->nmask[i] = tb->nmask;
     h_table->innersq[i] = tb->innersq;
     h_table->invdelta[i] = tb->invdelta;
     h_table->deltasq6[i] = tb->deltasq6;
 
     for(int j = 0; j<h_table->rsq.dimension_1(); j++)
       h_table->rsq(i,j) = tb->rsq[j];
     for(int j = 0; j<h_table->drsq.dimension_1(); j++)
       h_table->drsq(i,j) = tb->drsq[j];
     for(int j = 0; j<h_table->e.dimension_1(); j++)
       h_table->e(i,j) = tb->e[j];
     for(int j = 0; j<h_table->de.dimension_1(); j++)
       h_table->de(i,j) = tb->de[j];
     for(int j = 0; j<h_table->f.dimension_1(); j++)
       h_table->f(i,j) = tb->f[j];
     for(int j = 0; j<h_table->df.dimension_1(); j++)
       h_table->df(i,j) = tb->df[j];
     for(int j = 0; j<h_table->e2.dimension_1(); j++)
       h_table->e2(i,j) = tb->e2[j];
     for(int j = 0; j<h_table->f2.dimension_1(); j++)
       h_table->f2(i,j) = tb->f2[j];
   }
 
   
   Kokkos::deep_copy(d_table->nshiftbits,h_table->nshiftbits);
   Kokkos::deep_copy(d_table->nmask,h_table->nmask);
   Kokkos::deep_copy(d_table->innersq,h_table->innersq);
   Kokkos::deep_copy(d_table->invdelta,h_table->invdelta);
   Kokkos::deep_copy(d_table->deltasq6,h_table->deltasq6);
   Kokkos::deep_copy(d_table->rsq,h_table->rsq);
   Kokkos::deep_copy(d_table->drsq,h_table->drsq);
   Kokkos::deep_copy(d_table->e,h_table->e);
   Kokkos::deep_copy(d_table->de,h_table->de);
   Kokkos::deep_copy(d_table->f,h_table->f);
   Kokkos::deep_copy(d_table->df,h_table->df);
   Kokkos::deep_copy(d_table->e2,h_table->e2);
   Kokkos::deep_copy(d_table->f2,h_table->f2);
   Kokkos::deep_copy(d_table->tabindex,h_table->tabindex);
 
   d_table_const.nshiftbits = d_table->nshiftbits;
   d_table_const.nmask = d_table->nmask;
   d_table_const.innersq = d_table->innersq;
   d_table_const.invdelta = d_table->invdelta;
   d_table_const.deltasq6 = d_table->deltasq6;
   d_table_const.rsq = d_table->rsq;
   d_table_const.drsq = d_table->drsq;
   d_table_const.e = d_table->e;
   d_table_const.de = d_table->de;
   d_table_const.f = d_table->f;
   d_table_const.df = d_table->df;
   d_table_const.e2 = d_table->e2;
   d_table_const.f2 = d_table->f2;
 
 
   Kokkos::deep_copy(d_table->cutsq,h_table->cutsq);
   update_table = 0;
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 template<class DeviceType>
 void PairTableKokkos<DeviceType>::allocate()
 {
   allocated = 1;
   const int nt = atom->ntypes + 1;
 
   memory->create(setflag,nt,nt,"pair:setflag");
   memory->create_kokkos(d_table->cutsq,h_table->cutsq,cutsq,nt,nt,"pair:cutsq");
   memory->create_kokkos(d_table->tabindex,h_table->tabindex,tabindex,nt,nt,"pair:tabindex");
 
   d_table_const.cutsq = d_table->cutsq;
   d_table_const.tabindex = d_table->tabindex;
   memset(&setflag[0][0],0,nt*nt*sizeof(int));
   memset(&cutsq[0][0],0,nt*nt*sizeof(double));
   memset(&tabindex[0][0],0,nt*nt*sizeof(int));
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 template<class DeviceType>
 void PairTableKokkos<DeviceType>::settings(int narg, char **arg)
 {
   if (narg < 2) error->all(FLERR,"Illegal pair_style command");
 
   // new settings
 
   if (strcmp(arg[0],"lookup") == 0) tabstyle = LOOKUP;
   else if (strcmp(arg[0],"linear") == 0) tabstyle = LINEAR;
   else if (strcmp(arg[0],"spline") == 0) tabstyle = SPLINE;
   else if (strcmp(arg[0],"bitmap") == 0) tabstyle = BITMAP;
   else error->all(FLERR,"Unknown table style in pair_style command");
 
   tablength = force->inumeric(FLERR,arg[1]);
   if (tablength < 2) error->all(FLERR,"Illegal number of pair table entries");
 
   // optional keywords
   // assert the tabulation is compatible with a specific long-range solver
 
   int iarg = 2;
   while (iarg < narg) {
     if (strcmp(arg[iarg],"ewald") == 0) ewaldflag = 1;
     else if (strcmp(arg[iarg],"pppm") == 0) pppmflag = 1;
     else if (strcmp(arg[iarg],"msm") == 0) msmflag = 1;
     else if (strcmp(arg[iarg],"dispersion") == 0) dispersionflag = 1;
     else if (strcmp(arg[iarg],"tip4p") == 0) tip4pflag = 1;
     else error->all(FLERR,"Illegal pair_style command");
     iarg++;
   }
 
   // delete old tables, since cannot just change settings
 
   for (int m = 0; m < ntables; m++) free_table(&tables[m]);
   memory->sfree(tables);
 
   if (allocated) {
     memory->destroy(setflag);
     
     d_table_const.tabindex = d_table->tabindex = typename ArrayTypes<DeviceType>::t_int_2d();
     h_table->tabindex = typename ArrayTypes<LMPHostType>::t_int_2d();
 
     d_table_const.cutsq = d_table->cutsq = typename ArrayTypes<DeviceType>::t_ffloat_2d();  
     h_table->cutsq = typename ArrayTypes<LMPHostType>::t_ffloat_2d();  
   }
   allocated = 0;
 
   ntables = 0;
   tables = NULL;
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 template<class DeviceType>
 void PairTableKokkos<DeviceType>::coeff(int narg, char **arg)
 {
   if (narg != 4 && narg != 5) error->all(FLERR,"Illegal pair_coeff command");
   if (!allocated) allocate();
 
   int ilo,ihi,jlo,jhi;
   force->bounds(arg[0],atom->ntypes,ilo,ihi);
   force->bounds(arg[1],atom->ntypes,jlo,jhi);
 
   int me;
   MPI_Comm_rank(world,&me);
   tables = (Table *)
     memory->srealloc(tables,(ntables+1)*sizeof(Table),"pair:tables");
   Table *tb = &tables[ntables];
   null_table(tb);
   if (me == 0) read_table(tb,arg[2],arg[3]);
   bcast_table(tb);
 
   // set table cutoff
 
   if (narg == 5) tb->cut = force->numeric(FLERR,arg[4]);
   else if (tb->rflag) tb->cut = tb->rhi;
   else tb->cut = tb->rfile[tb->ninput-1];
 
   // error check on table parameters
   // insure cutoff is within table
   // for BITMAP tables, file values can be in non-ascending order
 
   if (tb->ninput <= 1) error->one(FLERR,"Invalid pair table length");
   double rlo,rhi;
   if (tb->rflag == 0) {
     rlo = tb->rfile[0];
     rhi = tb->rfile[tb->ninput-1];
   } else {
     rlo = tb->rlo;
     rhi = tb->rhi;
   }
   if (tb->cut <= rlo || tb->cut > rhi)
     error->all(FLERR,"Invalid pair table cutoff");
   if (rlo <= 0.0) error->all(FLERR,"Invalid pair table cutoff");
 
   // match = 1 if don't need to spline read-in tables
   // this is only the case if r values needed by final tables
   //   exactly match r values read from file
   // for tabstyle SPLINE, always need to build spline tables
 
   tb->match = 0;
   if (tabstyle == LINEAR && tb->ninput == tablength &&
       tb->rflag == RSQ && tb->rhi == tb->cut) tb->match = 1;
   if (tabstyle == BITMAP && tb->ninput == 1 << tablength &&
       tb->rflag == BMP && tb->rhi == tb->cut) tb->match = 1;
   if (tb->rflag == BMP && tb->match == 0)
     error->all(FLERR,"Bitmapped table in file does not match requested table");
 
   // spline read-in values and compute r,e,f vectors within table
 
   if (tb->match == 0) spline_table(tb);
   compute_table(tb);
 
   // store ptr to table in tabindex
 
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
     for (int j = MAX(jlo,i); j <= jhi; j++) {
       tabindex[i][j] = ntables;
       setflag[i][j] = 1;
       count++;
     }
   }
 
   if (count == 0) error->all(FLERR,"Illegal pair_coeff command");
   ntables++;
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 template<class DeviceType>
 double PairTableKokkos<DeviceType>::init_one(int i, int j)
 {
   if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set");
 
   tabindex[j][i] = tabindex[i][j];
 
   if(i<MAX_TYPES_STACKPARAMS+1 && j<MAX_TYPES_STACKPARAMS+1) {
     m_cutsq[j][i] = m_cutsq[i][j] = tables[tabindex[i][j]].cut*tables[tabindex[i][j]].cut;
   }
 
   return tables[tabindex[i][j]].cut;
 }
 
 /* ----------------------------------------------------------------------
    read a table section from a tabulated potential file
    only called by proc 0
    this function sets these values in Table:
      ninput,rfile,efile,ffile,rflag,rlo,rhi,fpflag,fplo,fphi,ntablebits
 ------------------------------------------------------------------------- */
 
 template<class DeviceType>
 void PairTableKokkos<DeviceType>::read_table(Table *tb, char *file, char *keyword)
 {
   char line[MAXLINE];
 
   // open file
 
   FILE *fp = force->open_potential(file);
   if (fp == NULL) {
     char str[128];
     sprintf(str,"Cannot open file %s",file);
     error->one(FLERR,str);
   }
 
   // loop until section found with matching keyword
 
   while (1) {
     if (fgets(line,MAXLINE,fp) == NULL)
       error->one(FLERR,"Did not find keyword in table file");
     if (strspn(line," \t\n\r") == strlen(line)) continue;  // blank line
     if (line[0] == '#') continue;                          // comment
     char *word = strtok(line," \t\n\r");
     if (strcmp(word,keyword) == 0) break;           // matching keyword
     fgets(line,MAXLINE,fp);                         // no match, skip section
     param_extract(tb,line);
     fgets(line,MAXLINE,fp);
     for (int i = 0; i < tb->ninput; i++) fgets(line,MAXLINE,fp);
   }
 
   // read args on 2nd line of section
   // allocate table arrays for file values
 
   fgets(line,MAXLINE,fp);
   param_extract(tb,line);
   memory->create(tb->rfile,tb->ninput,"pair:rfile");
   memory->create(tb->efile,tb->ninput,"pair:efile");
   memory->create(tb->ffile,tb->ninput,"pair:ffile");
 
   // setup bitmap parameters for table to read in
 
   tb->ntablebits = 0;
   int masklo,maskhi,nmask,nshiftbits;
   if (tb->rflag == BMP) {
     while (1 << tb->ntablebits < tb->ninput) tb->ntablebits++;
     if (1 << tb->ntablebits != tb->ninput)
       error->one(FLERR,"Bitmapped table is incorrect length in table file");
     init_bitmap(tb->rlo,tb->rhi,tb->ntablebits,masklo,maskhi,nmask,nshiftbits);
   }
 
   // read r,e,f table values from file
   // if rflag set, compute r
   // if rflag not set, use r from file
 
   int itmp;
   double rtmp;
   union_int_float_t rsq_lookup;
 
   fgets(line,MAXLINE,fp);
   for (int i = 0; i < tb->ninput; i++) {
     fgets(line,MAXLINE,fp);
     sscanf(line,"%d %lg %lg %lg",&itmp,&rtmp,&tb->efile[i],&tb->ffile[i]);
 
     if (tb->rflag == RLINEAR)
       rtmp = tb->rlo + (tb->rhi - tb->rlo)*i/(tb->ninput-1);
     else if (tb->rflag == RSQ) {
       rtmp = tb->rlo*tb->rlo +
         (tb->rhi*tb->rhi - tb->rlo*tb->rlo)*i/(tb->ninput-1);
       rtmp = sqrt(rtmp);
     } else if (tb->rflag == BMP) {
       rsq_lookup.i = i << nshiftbits;
       rsq_lookup.i |= masklo;
       if (rsq_lookup.f < tb->rlo*tb->rlo) {
         rsq_lookup.i = i << nshiftbits;
         rsq_lookup.i |= maskhi;
       }
       rtmp = sqrtf(rsq_lookup.f);
     }
 
     tb->rfile[i] = rtmp;
   }
 
   // close file
 
   fclose(fp);
 }
 
 /* ----------------------------------------------------------------------
    broadcast read-in table info from proc 0 to other procs
    this function communicates these values in Table:
      ninput,rfile,efile,ffile,rflag,rlo,rhi,fpflag,fplo,fphi
 ------------------------------------------------------------------------- */
 
 template<class DeviceType>
 void PairTableKokkos<DeviceType>::bcast_table(Table *tb)
 {
   MPI_Bcast(&tb->ninput,1,MPI_INT,0,world);
 
   int me;
   MPI_Comm_rank(world,&me);
   if (me > 0) {
     memory->create(tb->rfile,tb->ninput,"pair:rfile");
     memory->create(tb->efile,tb->ninput,"pair:efile");
     memory->create(tb->ffile,tb->ninput,"pair:ffile");
   }
 
   MPI_Bcast(tb->rfile,tb->ninput,MPI_DOUBLE,0,world);
   MPI_Bcast(tb->efile,tb->ninput,MPI_DOUBLE,0,world);
   MPI_Bcast(tb->ffile,tb->ninput,MPI_DOUBLE,0,world);
 
   MPI_Bcast(&tb->rflag,1,MPI_INT,0,world);
   if (tb->rflag) {
     MPI_Bcast(&tb->rlo,1,MPI_DOUBLE,0,world);
     MPI_Bcast(&tb->rhi,1,MPI_DOUBLE,0,world);
   }
   MPI_Bcast(&tb->fpflag,1,MPI_INT,0,world);
   if (tb->fpflag) {
     MPI_Bcast(&tb->fplo,1,MPI_DOUBLE,0,world);
     MPI_Bcast(&tb->fphi,1,MPI_DOUBLE,0,world);
   }
 }
 
 /* ----------------------------------------------------------------------
    build spline representation of e,f over entire range of read-in table
    this function sets these values in Table: e2file,f2file
 ------------------------------------------------------------------------- */
 
 template<class DeviceType>
 void PairTableKokkos<DeviceType>::spline_table(Table *tb)
 {
   memory->create(tb->e2file,tb->ninput,"pair:e2file");
   memory->create(tb->f2file,tb->ninput,"pair:f2file");
 
   double ep0 = - tb->ffile[0];
   double epn = - tb->ffile[tb->ninput-1];
   spline(tb->rfile,tb->efile,tb->ninput,ep0,epn,tb->e2file);
 
   if (tb->fpflag == 0) {
     tb->fplo = (tb->ffile[1] - tb->ffile[0]) / (tb->rfile[1] - tb->rfile[0]);
     tb->fphi = (tb->ffile[tb->ninput-1] - tb->ffile[tb->ninput-2]) /
       (tb->rfile[tb->ninput-1] - tb->rfile[tb->ninput-2]);
   }
 
   double fp0 = tb->fplo;
   double fpn = tb->fphi;
   spline(tb->rfile,tb->ffile,tb->ninput,fp0,fpn,tb->f2file);
 }
 
 /* ----------------------------------------------------------------------
    extract attributes from parameter line in table section
    format of line: N value R/RSQ/BITMAP lo hi FP fplo fphi
    N is required, other params are optional
 ------------------------------------------------------------------------- */
 
 template<class DeviceType>
 void PairTableKokkos<DeviceType>::param_extract(Table *tb, char *line)
 {
   tb->ninput = 0;
   tb->rflag = NONE;
   tb->fpflag = 0;
 
   char *word = strtok(line," \t\n\r\f");
   while (word) {
     if (strcmp(word,"N") == 0) {
       word = strtok(NULL," \t\n\r\f");
       tb->ninput = atoi(word);
     } else if (strcmp(word,"R") == 0 || strcmp(word,"RSQ") == 0 ||
                strcmp(word,"BITMAP") == 0) {
       if (strcmp(word,"R") == 0) tb->rflag = RLINEAR;
       else if (strcmp(word,"RSQ") == 0) tb->rflag = RSQ;
       else if (strcmp(word,"BITMAP") == 0) tb->rflag = BMP;
       word = strtok(NULL," \t\n\r\f");
       tb->rlo = atof(word);
       word = strtok(NULL," \t\n\r\f");
       tb->rhi = atof(word);
     } else if (strcmp(word,"FP") == 0) {
       tb->fpflag = 1;
       word = strtok(NULL," \t\n\r\f");
       tb->fplo = atof(word);
       word = strtok(NULL," \t\n\r\f");
       tb->fphi = atof(word);
     } else {
       error->one(FLERR,"Invalid keyword in pair table parameters");
     }
     word = strtok(NULL," \t\n\r\f");
   }
 
   if (tb->ninput == 0) error->one(FLERR,"Pair table parameters did not set N");
 }
 
 /* ----------------------------------------------------------------------
    compute r,e,f vectors from splined values
 ------------------------------------------------------------------------- */
 
 template<class DeviceType>
 void PairTableKokkos<DeviceType>::compute_table(Table *tb)
 {
   update_table = 1;
   int tlm1 = tablength-1;
 
   // inner = inner table bound
   // cut = outer table bound
   // delta = table spacing in rsq for N-1 bins
 
   double inner;
   if (tb->rflag) inner = tb->rlo;
   else inner = tb->rfile[0];
   tb->innersq = inner*inner;
   tb->delta = (tb->cut*tb->cut - tb->innersq) / tlm1;
   tb->invdelta = 1.0/tb->delta;
 
   // direct lookup tables
   // N-1 evenly spaced bins in rsq from inner to cut
   // e,f = value at midpt of bin
   // e,f are N-1 in length since store 1 value at bin midpt
   // f is converted to f/r when stored in f[i]
   // e,f are never a match to read-in values, always computed via spline interp
 
   if (tabstyle == LOOKUP) {
     memory->create(tb->e,tlm1,"pair:e");
     memory->create(tb->f,tlm1,"pair:f");
 
     double r,rsq;
     for (int i = 0; i < tlm1; i++) {
       rsq = tb->innersq + (i+0.5)*tb->delta;
       r = sqrt(rsq);
       tb->e[i] = splint(tb->rfile,tb->efile,tb->e2file,tb->ninput,r);
       tb->f[i] = splint(tb->rfile,tb->ffile,tb->f2file,tb->ninput,r)/r;
     }
   }
 
   // linear tables
   // N-1 evenly spaced bins in rsq from inner to cut
   // rsq,e,f = value at lower edge of bin
   // de,df values = delta from lower edge to upper edge of bin
   // rsq,e,f are N in length so de,df arrays can compute difference
   // f is converted to f/r when stored in f[i]
   // e,f can match read-in values, else compute via spline interp
 
   if (tabstyle == LINEAR) {
     memory->create(tb->rsq,tablength,"pair:rsq");
     memory->create(tb->e,tablength,"pair:e");
     memory->create(tb->f,tablength,"pair:f");
     memory->create(tb->de,tlm1,"pair:de");
     memory->create(tb->df,tlm1,"pair:df");
 
     double r,rsq;
     for (int i = 0; i < tablength; i++) {
       rsq = tb->innersq + i*tb->delta;
       r = sqrt(rsq);
       tb->rsq[i] = rsq;
       if (tb->match) {
         tb->e[i] = tb->efile[i];
         tb->f[i] = tb->ffile[i]/r;
       } else {
         tb->e[i] = splint(tb->rfile,tb->efile,tb->e2file,tb->ninput,r);
         tb->f[i] = splint(tb->rfile,tb->ffile,tb->f2file,tb->ninput,r)/r;
       }
     }
 
     for (int i = 0; i < tlm1; i++) {
       tb->de[i] = tb->e[i+1] - tb->e[i];
       tb->df[i] = tb->f[i+1] - tb->f[i];
     }
   }
 
   // cubic spline tables
   // N-1 evenly spaced bins in rsq from inner to cut
   // rsq,e,f = value at lower edge of bin
   // e2,f2 = spline coefficient for each bin
   // rsq,e,f,e2,f2 are N in length so have N-1 spline bins
   // f is converted to f/r after e is splined
   // e,f can match read-in values, else compute via spline interp
 
   if (tabstyle == SPLINE) {
     memory->create(tb->rsq,tablength,"pair:rsq");
     memory->create(tb->e,tablength,"pair:e");
     memory->create(tb->f,tablength,"pair:f");
     memory->create(tb->e2,tablength,"pair:e2");
     memory->create(tb->f2,tablength,"pair:f2");
 
     tb->deltasq6 = tb->delta*tb->delta / 6.0;
 
     double r,rsq;
     for (int i = 0; i < tablength; i++) {
       rsq = tb->innersq + i*tb->delta;
       r = sqrt(rsq);
       tb->rsq[i] = rsq;
       if (tb->match) {
         tb->e[i] = tb->efile[i];
         tb->f[i] = tb->ffile[i]/r;
       } else {
         tb->e[i] = splint(tb->rfile,tb->efile,tb->e2file,tb->ninput,r);
         tb->f[i] = splint(tb->rfile,tb->ffile,tb->f2file,tb->ninput,r);
       }
     }
 
     // ep0,epn = dh/dg at inner and at cut
     // h(r) = e(r) and g(r) = r^2
     // dh/dg = (de/dr) / 2r = -f/2r
 
     double ep0 = - tb->f[0] / (2.0 * sqrt(tb->innersq));
     double epn = - tb->f[tlm1] / (2.0 * tb->cut);
     spline(tb->rsq,tb->e,tablength,ep0,epn,tb->e2);
 
     // fp0,fpn = dh/dg at inner and at cut
     // h(r) = f(r)/r and g(r) = r^2
     // dh/dg = (1/r df/dr - f/r^2) / 2r
     // dh/dg in secant approx = (f(r2)/r2 - f(r1)/r1) / (g(r2) - g(r1))
 
     double fp0,fpn;
     double secant_factor = 0.1;
     if (tb->fpflag) fp0 = (tb->fplo/sqrt(tb->innersq) - tb->f[0]/tb->innersq) /
       (2.0 * sqrt(tb->innersq));
     else {
       double rsq1 = tb->innersq;
       double rsq2 = rsq1 + secant_factor*tb->delta;
       fp0 = (splint(tb->rfile,tb->ffile,tb->f2file,tb->ninput,sqrt(rsq2)) /
              sqrt(rsq2) - tb->f[0] / sqrt(rsq1)) / (secant_factor*tb->delta);
     }
 
     if (tb->fpflag && tb->cut == tb->rfile[tb->ninput-1]) fpn =
       (tb->fphi/tb->cut - tb->f[tlm1]/(tb->cut*tb->cut)) / (2.0 * tb->cut);
     else {
       double rsq2 = tb->cut * tb->cut;
       double rsq1 = rsq2 - secant_factor*tb->delta;
       fpn = (tb->f[tlm1] / sqrt(rsq2) -
              splint(tb->rfile,tb->ffile,tb->f2file,tb->ninput,sqrt(rsq1)) /
              sqrt(rsq1)) / (secant_factor*tb->delta);
     }
 
     for (int i = 0; i < tablength; i++) tb->f[i] /= sqrt(tb->rsq[i]);
     spline(tb->rsq,tb->f,tablength,fp0,fpn,tb->f2);
   }
 
   // bitmapped linear tables
   // 2^N bins from inner to cut, spaced in bitmapped manner
   // f is converted to f/r when stored in f[i]
   // e,f can match read-in values, else compute via spline interp
 
   if (tabstyle == BITMAP) {
     double r;
     union_int_float_t rsq_lookup;
     int masklo,maskhi;
 
     // linear lookup tables of length ntable = 2^n
     // stored value = value at lower edge of bin
 
     init_bitmap(inner,tb->cut,tablength,masklo,maskhi,tb->nmask,tb->nshiftbits);
     int ntable = 1 << tablength;
     int ntablem1 = ntable - 1;
 
     memory->create(tb->rsq,ntable,"pair:rsq");
     memory->create(tb->e,ntable,"pair:e");
     memory->create(tb->f,ntable,"pair:f");
     memory->create(tb->de,ntable,"pair:de");
     memory->create(tb->df,ntable,"pair:df");
     memory->create(tb->drsq,ntable,"pair:drsq");
 
     union_int_float_t minrsq_lookup;
     minrsq_lookup.i = 0 << tb->nshiftbits;
     minrsq_lookup.i |= maskhi;
 
     for (int i = 0; i < ntable; i++) {
       rsq_lookup.i = i << tb->nshiftbits;
       rsq_lookup.i |= masklo;
       if (rsq_lookup.f < tb->innersq) {
         rsq_lookup.i = i << tb->nshiftbits;
         rsq_lookup.i |= maskhi;
       }
       r = sqrtf(rsq_lookup.f);
       tb->rsq[i] = rsq_lookup.f;
       if (tb->match) {
         tb->e[i] = tb->efile[i];
         tb->f[i] = tb->ffile[i]/r;
       } else {
         tb->e[i] = splint(tb->rfile,tb->efile,tb->e2file,tb->ninput,r);
         tb->f[i] = splint(tb->rfile,tb->ffile,tb->f2file,tb->ninput,r)/r;
       }
       minrsq_lookup.f = MIN(minrsq_lookup.f,rsq_lookup.f);
     }
 
     tb->innersq = minrsq_lookup.f;
 
     for (int i = 0; i < ntablem1; i++) {
       tb->de[i] = tb->e[i+1] - tb->e[i];
       tb->df[i] = tb->f[i+1] - tb->f[i];
       tb->drsq[i] = 1.0/(tb->rsq[i+1] - tb->rsq[i]);
     }
 
     // get the delta values for the last table entries
     // tables are connected periodically between 0 and ntablem1
 
     tb->de[ntablem1] = tb->e[0] - tb->e[ntablem1];
     tb->df[ntablem1] = tb->f[0] - tb->f[ntablem1];
     tb->drsq[ntablem1] = 1.0/(tb->rsq[0] - tb->rsq[ntablem1]);
 
     // get the correct delta values at itablemax
     // smallest r is in bin itablemin
     // largest r is in bin itablemax, which is itablemin-1,
     //   or ntablem1 if itablemin=0
 
     // deltas at itablemax only needed if corresponding rsq < cut*cut
     // if so, compute deltas between rsq and cut*cut
     //   if tb->match, data at cut*cut is unavailable, so we'll take
     //   deltas at itablemax-1 as a good approximation
 
     double e_tmp,f_tmp;
     int itablemin = minrsq_lookup.i & tb->nmask;
     itablemin >>= tb->nshiftbits;
     int itablemax = itablemin - 1;
     if (itablemin == 0) itablemax = ntablem1;
     int itablemaxm1 = itablemax - 1;
     if (itablemax == 0) itablemaxm1 = ntablem1;
     rsq_lookup.i = itablemax << tb->nshiftbits;
     rsq_lookup.i |= maskhi;
     if (rsq_lookup.f < tb->cut*tb->cut) {
       if (tb->match) {
         tb->de[itablemax] = tb->de[itablemaxm1];
         tb->df[itablemax] = tb->df[itablemaxm1];
         tb->drsq[itablemax] = tb->drsq[itablemaxm1];
       } else {
             rsq_lookup.f = tb->cut*tb->cut;
         r = sqrtf(rsq_lookup.f);
         e_tmp = splint(tb->rfile,tb->efile,tb->e2file,tb->ninput,r);
         f_tmp = splint(tb->rfile,tb->ffile,tb->f2file,tb->ninput,r)/r;
         tb->de[itablemax] = e_tmp - tb->e[itablemax];
         tb->df[itablemax] = f_tmp - tb->f[itablemax];
         tb->drsq[itablemax] = 1.0/(rsq_lookup.f - tb->rsq[itablemax]);
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    set all ptrs in a table to NULL, so can be freed safely
 ------------------------------------------------------------------------- */
 
 template<class DeviceType>
 void PairTableKokkos<DeviceType>::null_table(Table *tb)
 {
   tb->rfile = tb->efile = tb->ffile = NULL;
   tb->e2file = tb->f2file = NULL;
   tb->rsq = tb->drsq = tb->e = tb->de = NULL;
   tb->f = tb->df = tb->e2 = tb->f2 = NULL;
 }
 
 /* ----------------------------------------------------------------------
    free all arrays in a table
 ------------------------------------------------------------------------- */
 
 template<class DeviceType>
 void PairTableKokkos<DeviceType>::free_table(Table *tb)
 {
   memory->destroy(tb->rfile);
   memory->destroy(tb->efile);
   memory->destroy(tb->ffile);
   memory->destroy(tb->e2file);
   memory->destroy(tb->f2file);
 
   memory->destroy(tb->rsq);
   memory->destroy(tb->drsq);
   memory->destroy(tb->e);
   memory->destroy(tb->de);
   memory->destroy(tb->f);
   memory->destroy(tb->df);
   memory->destroy(tb->e2);
   memory->destroy(tb->f2);
 }
 
 /* ----------------------------------------------------------------------
    spline and splint routines modified from Numerical Recipes
 ------------------------------------------------------------------------- */
 
 template<class DeviceType>
 void PairTableKokkos<DeviceType>::spline(double *x, double *y, int n,
                        double yp1, double ypn, double *y2)
 {
   int i,k;
   double p,qn,sig,un;
   double *u = new double[n];
 
   if (yp1 > 0.99e30) y2[0] = u[0] = 0.0;
   else {
     y2[0] = -0.5;
     u[0] = (3.0/(x[1]-x[0])) * ((y[1]-y[0]) / (x[1]-x[0]) - yp1);
   }
   for (i = 1; i < n-1; i++) {
     sig = (x[i]-x[i-1]) / (x[i+1]-x[i-1]);
     p = sig*y2[i-1] + 2.0;
     y2[i] = (sig-1.0) / p;
     u[i] = (y[i+1]-y[i]) / (x[i+1]-x[i]) - (y[i]-y[i-1]) / (x[i]-x[i-1]);
     u[i] = (6.0*u[i] / (x[i+1]-x[i-1]) - sig*u[i-1]) / p;
   }
   if (ypn > 0.99e30) qn = un = 0.0;
   else {
     qn = 0.5;
     un = (3.0/(x[n-1]-x[n-2])) * (ypn - (y[n-1]-y[n-2]) / (x[n-1]-x[n-2]));
   }
   y2[n-1] = (un-qn*u[n-2]) / (qn*y2[n-2] + 1.0);
   for (k = n-2; k >= 0; k--) y2[k] = y2[k]*y2[k+1] + u[k];
 
   delete [] u;
 }
 
 /* ---------------------------------------------------------------------- */
 
 template<class DeviceType>
 double PairTableKokkos<DeviceType>::splint(double *xa, double *ya, double *y2a, int n, double x)
 {
   int klo,khi,k;
   double h,b,a,y;
 
   klo = 0;
   khi = n-1;
   while (khi-klo > 1) {
     k = (khi+klo) >> 1;
     if (xa[k] > x) khi = k;
     else klo = k;
   }
   h = xa[khi]-xa[klo];
   a = (xa[khi]-x) / h;
   b = (x-xa[klo]) / h;
   y = a*ya[klo] + b*ya[khi] +
     ((a*a*a-a)*y2a[klo] + (b*b*b-b)*y2a[khi]) * (h*h)/6.0;
   return y;
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 template<class DeviceType>
 void PairTableKokkos<DeviceType>::write_restart(FILE *fp)
 {
   write_restart_settings(fp);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 template<class DeviceType>
 void PairTableKokkos<DeviceType>::read_restart(FILE *fp)
 {
   read_restart_settings(fp);
   allocate();
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 template<class DeviceType>
 void PairTableKokkos<DeviceType>::write_restart_settings(FILE *fp)
 {
   fwrite(&tabstyle,sizeof(int),1,fp);
   fwrite(&tablength,sizeof(int),1,fp);
   fwrite(&ewaldflag,sizeof(int),1,fp);
   fwrite(&pppmflag,sizeof(int),1,fp);
   fwrite(&msmflag,sizeof(int),1,fp);
   fwrite(&dispersionflag,sizeof(int),1,fp);
   fwrite(&tip4pflag,sizeof(int),1,fp);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 template<class DeviceType>
 void PairTableKokkos<DeviceType>::read_restart_settings(FILE *fp)
 {
   if (comm->me == 0) {
     fread(&tabstyle,sizeof(int),1,fp);
     fread(&tablength,sizeof(int),1,fp);
     fread(&ewaldflag,sizeof(int),1,fp);
     fread(&pppmflag,sizeof(int),1,fp);
     fread(&msmflag,sizeof(int),1,fp);
     fread(&dispersionflag,sizeof(int),1,fp);
     fread(&tip4pflag,sizeof(int),1,fp);
   }
   MPI_Bcast(&tabstyle,1,MPI_INT,0,world);
   MPI_Bcast(&tablength,1,MPI_INT,0,world);
   MPI_Bcast(&ewaldflag,1,MPI_INT,0,world);
   MPI_Bcast(&pppmflag,1,MPI_INT,0,world);
   MPI_Bcast(&msmflag,1,MPI_INT,0,world);
   MPI_Bcast(&dispersionflag,1,MPI_INT,0,world);
   MPI_Bcast(&tip4pflag,1,MPI_INT,0,world);
 }
 
 /* ---------------------------------------------------------------------- */
 
 template<class DeviceType>
 double PairTableKokkos<DeviceType>::single(int i, int j, int itype, int jtype, double rsq,
                          double factor_coul, double factor_lj,
                          double &fforce)
 {
   int itable;
   double fraction,value,a,b,phi;
   int tlm1 = tablength - 1;
 
   Table *tb = &tables[tabindex[itype][jtype]];
   if (rsq < tb->innersq) error->one(FLERR,"Pair distance < table inner cutoff");
 
   if (tabstyle == LOOKUP) {
     itable = static_cast<int> ((rsq-tb->innersq) * tb->invdelta);
     if (itable >= tlm1) error->one(FLERR,"Pair distance > table outer cutoff");
     fforce = factor_lj * tb->f[itable];
   } else if (tabstyle == LINEAR) {
     itable = static_cast<int> ((rsq-tb->innersq) * tb->invdelta);
     if (itable >= tlm1) error->one(FLERR,"Pair distance > table outer cutoff");
     fraction = (rsq - tb->rsq[itable]) * tb->invdelta;
     value = tb->f[itable] + fraction*tb->df[itable];
     fforce = factor_lj * value;
   } else if (tabstyle == SPLINE) {
     itable = static_cast<int> ((rsq-tb->innersq) * tb->invdelta);
     if (itable >= tlm1) error->one(FLERR,"Pair distance > table outer cutoff");
     b = (rsq - tb->rsq[itable]) * tb->invdelta;
     a = 1.0 - b;
     value = a * tb->f[itable] + b * tb->f[itable+1] +
       ((a*a*a-a)*tb->f2[itable] + (b*b*b-b)*tb->f2[itable+1]) *
       tb->deltasq6;
     fforce = factor_lj * value;
   } else {
     union_int_float_t rsq_lookup;
     rsq_lookup.f = rsq;
     itable = rsq_lookup.i & tb->nmask;
     itable >>= tb->nshiftbits;
     fraction = (rsq_lookup.f - tb->rsq[itable]) * tb->drsq[itable];
     value = tb->f[itable] + fraction*tb->df[itable];
     fforce = factor_lj * value;
   }
 
   if (tabstyle == LOOKUP)
     phi = tb->e[itable];
   else if (tabstyle == LINEAR || tabstyle == BITMAP)
     phi = tb->e[itable] + fraction*tb->de[itable];
   else
     phi = a * tb->e[itable] + b * tb->e[itable+1] +
       ((a*a*a-a)*tb->e2[itable] + (b*b*b-b)*tb->e2[itable+1]) * tb->deltasq6;
   return factor_lj*phi;
 }
 
 /* ----------------------------------------------------------------------
    return the Coulomb cutoff for tabled potentials
    called by KSpace solvers which require that all pairwise cutoffs be the same
    loop over all tables not just those indexed by tabindex[i][j] since
      no way to know which tables are active since pair::init() not yet called
 ------------------------------------------------------------------------- */
 
 template<class DeviceType>
 void *PairTableKokkos<DeviceType>::extract(const char *str, int &dim)
 {
   if (strcmp(str,"cut_coul") != 0) return NULL;
   if (ntables == 0) error->all(FLERR,"All pair coeffs are not set");
 
   double cut_coul = tables[0].cut;
   for (int m = 1; m < ntables; m++)
     if (tables[m].cut != cut_coul)
       error->all(FLERR,
                  "Pair table cutoffs must all be equal to use with KSpace");
   dim = 0;
   return &tables[0].cut;
 }
 
 template<class DeviceType>
 void PairTableKokkos<DeviceType>::init_style()
 {
-  neighbor->request(this);
+  neighbor->request(this,instance_me);
   neighflag = lmp->kokkos->neighflag;
   int irequest = neighbor->nrequest - 1;
 
   neighbor->requests[irequest]->
     kokkos_host = Kokkos::Impl::is_same<DeviceType,LMPHostType>::value &&
     !Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
   neighbor->requests[irequest]->
     kokkos_device = Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
 
   if (neighflag == FULL) {
     neighbor->requests[irequest]->full = 1;
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full_cluster = 0;
   } else if (neighflag == HALF || neighflag == HALFTHREAD) {
     neighbor->requests[irequest]->full = 0;
     neighbor->requests[irequest]->half = 1;
     neighbor->requests[irequest]->full_cluster = 0;
   } else if (neighflag == N2) {
     neighbor->requests[irequest]->full = 0;
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->full_cluster = 0;
   } else if (neighflag == FULLCLUSTER) {
     neighbor->requests[irequest]->full_cluster = 1;
     neighbor->requests[irequest]->full = 1;
     neighbor->requests[irequest]->half = 0;
   } else {
     error->all(FLERR,"Cannot use chosen neighbor list style with lj/cut/kk");
   }
 }
 
 /*
 template <class DeviceType> template<int NEIGHFLAG>
 KOKKOS_INLINE_FUNCTION
 void PairTableKokkos<DeviceType>::
 ev_tally(EV_FLOAT &ev, const int &i, const int &j, const F_FLOAT &fpair,
          const F_FLOAT &delx, const F_FLOAT &dely, const F_FLOAT &delz) const
 {
   const int EFLAG = eflag;
   const int NEWTON_PAIR = newton_pair;
   const int VFLAG = vflag_either;
 
   if (EFLAG) {
     if (eflag_atom) {
       E_FLOAT epairhalf = 0.5 * (ev.evdwl + ev.ecoul);
       if (NEWTON_PAIR || i < nlocal) eatom[i] += epairhalf;
       if (NEWTON_PAIR || j < nlocal) eatom[j] += epairhalf;
     }
   }
 
   if (VFLAG) {
     const E_FLOAT v0 = delx*delx*fpair;
     const E_FLOAT v1 = dely*dely*fpair;
     const E_FLOAT v2 = delz*delz*fpair;
     const E_FLOAT v3 = delx*dely*fpair;
     const E_FLOAT v4 = delx*delz*fpair;
     const E_FLOAT v5 = dely*delz*fpair;
 
     if (vflag_global) {
       if (NEIGHFLAG) {
         if (NEWTON_PAIR) {
           ev.v[0] += v0;
           ev.v[1] += v1;
           ev.v[2] += v2;
           ev.v[3] += v3;
           ev.v[4] += v4;
           ev.v[5] += v5;
         } else {
           if (i < nlocal) {
             ev.v[0] += 0.5*v0;
             ev.v[1] += 0.5*v1;
             ev.v[2] += 0.5*v2;
             ev.v[3] += 0.5*v3;
             ev.v[4] += 0.5*v4;
             ev.v[5] += 0.5*v5;
           }
           if (j < nlocal) {
             ev.v[0] += 0.5*v0;
             ev.v[1] += 0.5*v1;
             ev.v[2] += 0.5*v2;
             ev.v[3] += 0.5*v3;
             ev.v[4] += 0.5*v4;
             ev.v[5] += 0.5*v5;
           }
         }
       } else {
         ev.v[0] += 0.5*v0;
         ev.v[1] += 0.5*v1;
         ev.v[2] += 0.5*v2;
         ev.v[3] += 0.5*v3;
         ev.v[4] += 0.5*v4;
         ev.v[5] += 0.5*v5;
       }
     }
 
     if (vflag_atom) {
       if (NEWTON_PAIR || i < nlocal) {
         d_vatom(i,0) += 0.5*v0;
         d_vatom(i,1) += 0.5*v1;
         d_vatom(i,2) += 0.5*v2;
         d_vatom(i,3) += 0.5*v3;
         d_vatom(i,4) += 0.5*v4;
         d_vatom(i,5) += 0.5*v5;
       }
       if (NEWTON_PAIR || (NEIGHFLAG && j < nlocal)) {
         d_vatom(j,0) += 0.5*v0;
         d_vatom(j,1) += 0.5*v1;
         d_vatom(j,2) += 0.5*v2;
         d_vatom(j,3) += 0.5*v3;
         d_vatom(j,4) += 0.5*v4;
         d_vatom(j,5) += 0.5*v5;
       }
     }
   }
 }
 */
 template<class DeviceType>
 void PairTableKokkos<DeviceType>::cleanup_copy() {
   // WHY needed: this prevents parent copy from deallocating any arrays
   allocated = 0;
   cutsq = NULL;
   eatom = NULL;
   vatom = NULL;
   h_table=NULL; d_table=NULL;
 }
 
 template class PairTableKokkos<LMPDeviceType>;
 #ifdef KOKKOS_HAVE_CUDA
 template class PairTableKokkos<LMPHostType>;
 #endif
 
diff --git a/src/KSPACE/pair_born_coul_long.cpp b/src/KSPACE/pair_born_coul_long.cpp
index 1c83e0a4b..35b8b517e 100644
--- a/src/KSPACE/pair_born_coul_long.cpp
+++ b/src/KSPACE/pair_born_coul_long.cpp
@@ -1,582 +1,582 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Ahmed Ismail (SNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_born_coul_long.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "kspace.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "math_const.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
 #define EWALD_F   1.12837917
 #define EWALD_P   0.3275911
 #define A1        0.254829592
 #define A2       -0.284496736
 #define A3        1.421413741
 #define A4       -1.453152027
 #define A5        1.061405429
 
 /* ---------------------------------------------------------------------- */
 
 PairBornCoulLong::PairBornCoulLong(LAMMPS *lmp) : Pair(lmp)
 {
   ewaldflag = pppmflag = 1;
   ftable = NULL;
   writedata = 1;
 }
 
 /* ---------------------------------------------------------------------- */
 
 PairBornCoulLong::~PairBornCoulLong()
 {
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
 
     memory->destroy(cut_lj);
     memory->destroy(cut_ljsq);
     memory->destroy(a);
     memory->destroy(rho);
     memory->destroy(sigma);
     memory->destroy(c);
     memory->destroy(d);
     memory->destroy(rhoinv);
     memory->destroy(born1);
     memory->destroy(born2);
     memory->destroy(born3);
     memory->destroy(offset);
   }
   if (ftable) free_tables();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBornCoulLong::compute(int eflag, int vflag)
 {
   int i,j,ii,jj,inum,jnum,itable,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
   double fraction,table;
   double rsq,r2inv,r6inv,forcecoul,forceborn,factor_coul,factor_lj;
   double grij,expm2,prefactor,t,erfc;
   double r,rexp;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   evdwl = ecoul = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r2inv = 1.0/rsq;
 
         if (rsq < cut_coulsq) {
           if (!ncoultablebits || rsq <= tabinnersq) {
             r = sqrt(rsq);
             grij = g_ewald * r;
             expm2 = exp(-grij*grij);
             t = 1.0 / (1.0 + EWALD_P*grij);
             erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
             prefactor = qqrd2e * qtmp*q[j]/r;
             forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
             if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
           } else {
             union_int_float_t rsq_lookup;
             rsq_lookup.f = rsq;
             itable = rsq_lookup.i & ncoulmask;
             itable >>= ncoulshiftbits;
             fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable];
             table = ftable[itable] + fraction*dftable[itable];
             forcecoul = qtmp*q[j] * table;
             if (factor_coul < 1.0) {
               table = ctable[itable] + fraction*dctable[itable];
               prefactor = qtmp*q[j] * table;
               forcecoul -= (1.0-factor_coul)*prefactor;
             }
           }
         } else forcecoul = 0.0;
 
         if (rsq < cut_ljsq[itype][jtype]) {
           r = sqrt(rsq);
           r6inv = r2inv*r2inv*r2inv;
           rexp = exp((sigma[itype][jtype]-r)*rhoinv[itype][jtype]);
           forceborn = born1[itype][jtype]*r*rexp - born2[itype][jtype]*r6inv
             + born3[itype][jtype]*r2inv*r6inv;
         } else forceborn = 0.0;
 
         fpair = (forcecoul + factor_lj*forceborn) * r2inv;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
         if (newton_pair || j < nlocal) {
           f[j][0] -= delx*fpair;
           f[j][1] -= dely*fpair;
           f[j][2] -= delz*fpair;
         }
 
         if (eflag) {
           if (rsq < cut_coulsq) {
             if (!ncoultablebits || rsq <= tabinnersq)
               ecoul = prefactor*erfc;
             else {
               table = etable[itable] + fraction*detable[itable];
               ecoul = qtmp*q[j] * table;
             }
             if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
           } else ecoul = 0.0;
           if (rsq < cut_ljsq[itype][jtype]) {
             evdwl = a[itype][jtype]*rexp - c[itype][jtype]*r6inv
               + d[itype][jtype]*r6inv*r2inv - offset[itype][jtype];
             evdwl *= factor_lj;
           } else evdwl = 0.0;
         }
 
         if (evflag) ev_tally(i,j,nlocal,newton_pair,
                              evdwl,ecoul,fpair,delx,dely,delz);
       }
     }
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairBornCoulLong::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
   memory->create(cut_lj,n+1,n+1,"pair:cut_lj");
   memory->create(cut_ljsq,n+1,n+1,"pair:cut_ljsq");
   memory->create(a,n+1,n+1,"pair:a");
   memory->create(rho,n+1,n+1,"pair:rho");
   memory->create(sigma,n+1,n+1,"pair:sigma");
   memory->create(c,n+1,n+1,"pair:c");
   memory->create(d,n+1,n+1,"pair:d");
   memory->create(rhoinv,n+1,n+1,"pair:rhoinv");
   memory->create(born1,n+1,n+1,"pair:born1");
   memory->create(born2,n+1,n+1,"pair:born2");
   memory->create(born3,n+1,n+1,"pair:born3");
   memory->create(offset,n+1,n+1,"pair:offset");
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairBornCoulLong::settings(int narg, char **arg)
 {
   if (narg < 1 || narg > 2) error->all(FLERR,"Illegal pair_style command");
 
   cut_lj_global = force->numeric(FLERR,arg[0]);
   if (narg == 1) cut_coul = cut_lj_global;
   else cut_coul = force->numeric(FLERR,arg[1]);
 
   // reset cutoffs that have been explicitly set
 
   if (allocated) {
     int i,j;
     for (i = 1; i <= atom->ntypes; i++)
       for (j = i+1; j <= atom->ntypes; j++)
         if (setflag[i][j]) cut_lj[i][j] = cut_lj_global;
   }
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairBornCoulLong::coeff(int narg, char **arg)
 {
   if (narg < 7 || narg > 8) error->all(FLERR,"Incorrect args for pair coefficients");
   if (!allocated) allocate();
 
   int ilo,ihi,jlo,jhi;
   force->bounds(arg[0],atom->ntypes,ilo,ihi);
   force->bounds(arg[1],atom->ntypes,jlo,jhi);
 
   double a_one = force->numeric(FLERR,arg[2]);
   double rho_one = force->numeric(FLERR,arg[3]);
   double sigma_one = force->numeric(FLERR,arg[4]);
   if (rho_one <= 0) error->all(FLERR,"Incorrect args for pair coefficients");
   double c_one = force->numeric(FLERR,arg[5]);
   double d_one = force->numeric(FLERR,arg[6]);
 
   double cut_lj_one = cut_lj_global;
   if (narg == 8) cut_lj_one = force->numeric(FLERR,arg[7]);
 
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
     for (int j = MAX(jlo,i); j <= jhi; j++) {
       a[i][j] = a_one;
       rho[i][j] = rho_one;
       sigma[i][j] = sigma_one;
       c[i][j] = c_one;
       d[i][j] = d_one;
       cut_lj[i][j] = cut_lj_one;
       setflag[i][j] = 1;
       count++;
     }
   }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairBornCoulLong::init_one(int i, int j)
 {
   if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set");
 
   double cut = MAX(cut_lj[i][j],cut_coul);
   cut_ljsq[i][j] = cut_lj[i][j] * cut_lj[i][j];
 
   rhoinv[i][j] = 1.0/rho[i][j];
   born1[i][j] = a[i][j]/rho[i][j];
   born2[i][j] = 6.0*c[i][j];
   born3[i][j] = 8.0*d[i][j];
 
   if (offset_flag) {
     double rexp = exp((sigma[i][j]-cut_lj[i][j])*rhoinv[i][j]);
     offset[i][j] = a[i][j]*rexp - c[i][j]/pow(cut_lj[i][j],6.0) +
       d[i][j]/pow(cut_lj[i][j],8.0);
   } else offset[i][j] = 0.0;
 
   cut_ljsq[j][i] = cut_ljsq[i][j];
   a[j][i] = a[i][j];
   c[j][i] = c[i][j];
   d[j][i] = d[i][j];
   rhoinv[j][i] = rhoinv[i][j];
   sigma[j][i] = sigma[i][j];
   born1[j][i] = born1[i][j];
   born2[j][i] = born2[i][j];
   born3[j][i] = born3[i][j];
   offset[j][i] = offset[i][j];
 
   // compute I,J contribution to long-range tail correction
   // count total # of atoms of type I and J via Allreduce
 
   if (tail_flag) {
      int *type = atom->type;
      int nlocal = atom->nlocal;
 
      double count[2],all[2];
      count[0] = count[1] = 0.0;
      for (int k = 0; k < nlocal; k++) {
        if (type[k] == i) count[0] += 1.0;
        if (type[k] == j) count[1] += 1.0;
      }
      MPI_Allreduce(count,all,2,MPI_DOUBLE,MPI_SUM,world);
 
      double rho1 = rho[i][j];
      double rho2 = rho1*rho1;
      double rho3 = rho2*rho1;
      double rc = cut_lj[i][j];
      double rc2 = rc*rc;
      double rc3 = rc2*rc;
      double rc5 = rc3*rc2;
      etail_ij = 2.0*MY_PI*all[0]*all[1] *
        (a[i][j]*exp((sigma[i][j]-rc)/rho1)*rho1*
         (rc2 + 2.0*rho1*rc + 2.0*rho2) -
         c[i][j]/(3.0*rc3) + d[i][j]/(5.0*rc5));
      ptail_ij = (-1/3.0)*2.0*MY_PI*all[0]*all[1] *
        (-a[i][j]*exp((sigma[i][j]-rc)/rho1) *
         (rc3 + 3.0*rho1*rc2 + 6.0*rho2*rc + 6.0*rho3) +
         2.0*c[i][j]/rc3 - 8.0*d[i][j]/(5.0*rc5));
    }
 
   return cut;
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairBornCoulLong::init_style()
 {
   if (!atom->q_flag)
     error->all(FLERR,"Pair style born/coul/long requires atom attribute q");
 
   cut_coulsq = cut_coul * cut_coul;
 
   // insure use of KSpace long-range solver, set g_ewald
 
   if (force->kspace == NULL)
     error->all(FLERR,"Pair style requires a KSpace style");
   g_ewald = force->kspace->g_ewald;
 
-  neighbor->request(this);
+  neighbor->request(this,instance_me);
   
   // setup force tables
 
   if (ncoultablebits) init_tables(cut_coul,NULL);
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairBornCoulLong::write_restart(FILE *fp)
 {
   write_restart_settings(fp);
 
   int i,j;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       fwrite(&setflag[i][j],sizeof(int),1,fp);
       if (setflag[i][j]) {
         fwrite(&a[i][j],sizeof(double),1,fp);
         fwrite(&rho[i][j],sizeof(double),1,fp);
         fwrite(&sigma[i][j],sizeof(double),1,fp);
         fwrite(&c[i][j],sizeof(double),1,fp);
         fwrite(&d[i][j],sizeof(double),1,fp);
         fwrite(&cut_lj[i][j],sizeof(double),1,fp);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairBornCoulLong::read_restart(FILE *fp)
 {
   read_restart_settings(fp);
 
   allocate();
 
   int i,j;
   int me = comm->me;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
       MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
       if (setflag[i][j]) {
         if (me == 0) {
           fread(&a[i][j],sizeof(double),1,fp);
           fread(&rho[i][j],sizeof(double),1,fp);
           fread(&sigma[i][j],sizeof(double),1,fp);
           fread(&c[i][j],sizeof(double),1,fp);
           fread(&d[i][j],sizeof(double),1,fp);
           fread(&cut_lj[i][j],sizeof(double),1,fp);
         }
         MPI_Bcast(&a[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&rho[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&sigma[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&c[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&d[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&cut_lj[i][j],1,MPI_DOUBLE,0,world);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairBornCoulLong::write_restart_settings(FILE *fp)
 {
   fwrite(&cut_lj_global,sizeof(double),1,fp);
   fwrite(&cut_coul,sizeof(double),1,fp);
   fwrite(&offset_flag,sizeof(int),1,fp);
   fwrite(&mix_flag,sizeof(int),1,fp);
   fwrite(&tail_flag,sizeof(int),1,fp);
   fwrite(&ncoultablebits,sizeof(int),1,fp);
   fwrite(&tabinner,sizeof(double),1,fp);
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairBornCoulLong::read_restart_settings(FILE *fp)
 {
   if (comm->me == 0) {
     fread(&cut_lj_global,sizeof(double),1,fp);
     fread(&cut_coul,sizeof(double),1,fp);
     fread(&offset_flag,sizeof(int),1,fp);
     fread(&mix_flag,sizeof(int),1,fp);
     fread(&tail_flag,sizeof(int),1,fp);
     fread(&ncoultablebits,sizeof(int),1,fp);
     fread(&tabinner,sizeof(double),1,fp);
   }
   MPI_Bcast(&cut_lj_global,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&cut_coul,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&offset_flag,1,MPI_INT,0,world);
   MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
   MPI_Bcast(&tail_flag,1,MPI_INT,0,world);
   MPI_Bcast(&ncoultablebits,1,MPI_INT,0,world);
   MPI_Bcast(&tabinner,1,MPI_DOUBLE,0,world);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes to data file
 ------------------------------------------------------------------------- */
 
 void PairBornCoulLong::write_data(FILE *fp)
 {
   for (int i = 1; i <= atom->ntypes; i++)
     fprintf(fp,"%d %g %g %g %g %g\n",i,
             a[i][i],rho[i][i],sigma[i][i],c[i][i],d[i][i]);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes all pairs to data file
 ------------------------------------------------------------------------- */
 
 void PairBornCoulLong::write_data_all(FILE *fp)
 {
   for (int i = 1; i <= atom->ntypes; i++)
     for (int j = i; j <= atom->ntypes; j++)
       fprintf(fp,"%d %d %g %g %g %g %g %g\n",i,j,
               a[i][j],rho[i][j],sigma[i][j],c[i][j],d[i][j],cut_lj[i][j]);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairBornCoulLong::single(int i, int j, int itype, int jtype,
                                 double rsq,
                                 double factor_coul, double factor_lj,
                                 double &fforce)
 {
   double r2inv,r6inv,r,rexp,grij,expm2,t,erfc,prefactor;
   double fraction,table,forcecoul,forceborn,phicoul,phiborn;
   int itable;
 
   r2inv = 1.0/rsq;
   if (rsq < cut_coulsq) {
     if (!ncoultablebits || rsq <= tabinnersq) {
       r = sqrt(rsq);
       grij = g_ewald * r;
       expm2 = exp(-grij*grij);
       t = 1.0 / (1.0 + EWALD_P*grij);
       erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
       prefactor = force->qqrd2e * atom->q[i]*atom->q[j]/r;
       forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
       if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
     } else {
       union_int_float_t rsq_lookup;
       rsq_lookup.f = rsq;
       itable = rsq_lookup.i & ncoulmask;
       itable >>= ncoulshiftbits;
       fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable];
       table = ftable[itable] + fraction*dftable[itable];
       forcecoul = atom->q[i]*atom->q[j] * table;
       if (factor_coul < 1.0) {
         table = ctable[itable] + fraction*dctable[itable];
         prefactor = atom->q[i]*atom->q[j] * table;
         forcecoul -= (1.0-factor_coul)*prefactor;
       }
     }
   } else forcecoul = 0.0;
   if (rsq < cut_ljsq[itype][jtype]) {
     r6inv = r2inv*r2inv*r2inv;
     r = sqrt(rsq);
     rexp = exp((sigma[itype][jtype]-r)*rhoinv[itype][jtype]);
     forceborn = born1[itype][jtype]*r*rexp - born2[itype][jtype]*r6inv +
       born3[itype][jtype]*r2inv*r6inv;
   } else forceborn = 0.0;
   fforce = (forcecoul + factor_lj*forceborn) * r2inv;
 
   double eng = 0.0;
   if (rsq < cut_coulsq) {
     if (!ncoultablebits || rsq <= tabinnersq)
       phicoul = prefactor*erfc;
     else {
       table = etable[itable] + fraction*detable[itable];
       phicoul = atom->q[i]*atom->q[j] * table;
     }
     if (factor_coul < 1.0) phicoul -= (1.0-factor_coul)*prefactor;
     eng += phicoul;
   }
   if (rsq < cut_ljsq[itype][jtype]) {
     phiborn = a[itype][jtype]*rexp - c[itype][jtype]*r6inv +
       d[itype][jtype]*r2inv*r6inv - offset[itype][jtype];
     eng += factor_lj*phiborn;
   }
   return eng;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void *PairBornCoulLong::extract(const char *str, int &dim)
 {
   dim = 0;
   if (strcmp(str,"cut_coul") == 0) return (void *) &cut_coul;
   return NULL;
 }
diff --git a/src/KSPACE/pair_buck_coul_long.cpp b/src/KSPACE/pair_buck_coul_long.cpp
index d49da2dea..7db721190 100644
--- a/src/KSPACE/pair_buck_coul_long.cpp
+++ b/src/KSPACE/pair_buck_coul_long.cpp
@@ -1,551 +1,551 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_buck_coul_long.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "kspace.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "math_const.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
 #define EWALD_F   1.12837917
 #define EWALD_P   0.3275911
 #define A1        0.254829592
 #define A2       -0.284496736
 #define A3        1.421413741
 #define A4       -1.453152027
 #define A5        1.061405429
 
 /* ---------------------------------------------------------------------- */
 
 PairBuckCoulLong::PairBuckCoulLong(LAMMPS *lmp) : Pair(lmp)
 {
   ewaldflag = pppmflag = 1;
   writedata = 1;
   ftable = NULL;
 }
 
 /* ---------------------------------------------------------------------- */
 
 PairBuckCoulLong::~PairBuckCoulLong()
 {
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
 
     memory->destroy(cut_lj);
     memory->destroy(cut_ljsq);
     memory->destroy(a);
     memory->destroy(rho);
     memory->destroy(c);
     memory->destroy(rhoinv);
     memory->destroy(buck1);
     memory->destroy(buck2);
     memory->destroy(offset);
   }
   if (ftable) free_tables();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBuckCoulLong::compute(int eflag, int vflag)
 {
   int i,j,ii,jj,inum,jnum,itable,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
   double fraction,table;
   double rsq,r2inv,r6inv,forcecoul,forcebuck,factor_coul,factor_lj;
   double grij,expm2,prefactor,t,erfc;
   double r,rexp;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   evdwl = ecoul = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r2inv = 1.0/rsq;          
         if (rsq < cut_coulsq) {
           if (!ncoultablebits || rsq <= tabinnersq) {
             r = sqrt(rsq);
             grij = g_ewald * r;
             expm2 = exp(-grij*grij);
             t = 1.0 / (1.0 + EWALD_P*grij);
             erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
             prefactor = qqrd2e * qtmp*q[j]/r;
             forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
             if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
           } else {
             union_int_float_t rsq_lookup;
             rsq_lookup.f = rsq;
             itable = rsq_lookup.i & ncoulmask;
             itable >>= ncoulshiftbits;
             fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable];
             table = ftable[itable] + fraction*dftable[itable];
             forcecoul = qtmp*q[j] * table;
             if (factor_coul < 1.0) {
               table = ctable[itable] + fraction*dctable[itable];
               prefactor = qtmp*q[j] * table;
               forcecoul -= (1.0-factor_coul)*prefactor;
             }
           }
         } else forcecoul = 0.0;
 
         if (rsq < cut_ljsq[itype][jtype]) {
           r = sqrt(rsq);
           r6inv = r2inv*r2inv*r2inv;
           rexp = exp(-r*rhoinv[itype][jtype]);
           forcebuck = buck1[itype][jtype]*r*rexp - buck2[itype][jtype]*r6inv;
         } else forcebuck = 0.0;
 
         fpair = (forcecoul + factor_lj*forcebuck) * r2inv;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
         if (newton_pair || j < nlocal) {
           f[j][0] -= delx*fpair;
           f[j][1] -= dely*fpair;
           f[j][2] -= delz*fpair;
         }
 
         if (eflag) {
           if (rsq < cut_coulsq) {
             if (!ncoultablebits || rsq <= tabinnersq)
               ecoul = prefactor*erfc;
             else {
               table = etable[itable] + fraction*detable[itable];
               ecoul = qtmp*q[j] * table;
             }
             if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
           } else ecoul = 0.0;
           if (rsq < cut_ljsq[itype][jtype]) {
             evdwl = a[itype][jtype]*rexp - c[itype][jtype]*r6inv -
               offset[itype][jtype];
             evdwl *= factor_lj;
           } else evdwl = 0.0;
         }
 
         if (evflag) ev_tally(i,j,nlocal,newton_pair,
                              evdwl,ecoul,fpair,delx,dely,delz);
       }
     }
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairBuckCoulLong::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
   memory->create(cut_lj,n+1,n+1,"pair:cut_lj");
   memory->create(cut_ljsq,n+1,n+1,"pair:cut_ljsq");
   memory->create(a,n+1,n+1,"pair:a");
   memory->create(rho,n+1,n+1,"pair:rho");
   memory->create(c,n+1,n+1,"pair:c");
   memory->create(rhoinv,n+1,n+1,"pair:rhoinv");
   memory->create(buck1,n+1,n+1,"pair:buck1");
   memory->create(buck2,n+1,n+1,"pair:buck2");
   memory->create(offset,n+1,n+1,"pair:offset");
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairBuckCoulLong::settings(int narg, char **arg)
 {
   if (narg < 1 || narg > 2) error->all(FLERR,"Illegal pair_style command");
 
   cut_lj_global = force->numeric(FLERR,arg[0]);
   if (narg == 1) cut_coul = cut_lj_global;
   else cut_coul = force->numeric(FLERR,arg[1]);
 
   // reset cutoffs that have been explicitly set
 
   if (allocated) {
     int i,j;
     for (i = 1; i <= atom->ntypes; i++)
       for (j = i+1; j <= atom->ntypes; j++)
         if (setflag[i][j]) cut_lj[i][j] = cut_lj_global;
   }
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairBuckCoulLong::coeff(int narg, char **arg)
 {
   if (narg < 5 || narg > 6) 
     error->all(FLERR,"Incorrect args for pair coefficients");
   if (!allocated) allocate();
 
   int ilo,ihi,jlo,jhi;
   force->bounds(arg[0],atom->ntypes,ilo,ihi);
   force->bounds(arg[1],atom->ntypes,jlo,jhi);
 
   double a_one = force->numeric(FLERR,arg[2]);
   double rho_one = force->numeric(FLERR,arg[3]);
   if (rho_one <= 0) error->all(FLERR,"Incorrect args for pair coefficients");
   double c_one = force->numeric(FLERR,arg[4]);
 
   double cut_lj_one = cut_lj_global;
   if (narg == 6) cut_lj_one = force->numeric(FLERR,arg[5]);
 
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
     for (int j = MAX(jlo,i); j <= jhi; j++) {
       a[i][j] = a_one;
       rho[i][j] = rho_one;
       c[i][j] = c_one;
       cut_lj[i][j] = cut_lj_one;
       setflag[i][j] = 1;
       count++;
     }
   }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairBuckCoulLong::init_one(int i, int j)
 {
   if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set");
 
   double cut = MAX(cut_lj[i][j],cut_coul);
   cut_ljsq[i][j] = cut_lj[i][j] * cut_lj[i][j];
 
   rhoinv[i][j] = 1.0/rho[i][j];
   buck1[i][j] = a[i][j]/rho[i][j];
   buck2[i][j] = 6.0*c[i][j];
 
   if (offset_flag) {
     double rexp = exp(-cut_lj[i][j]/rho[i][j]);
     offset[i][j] = a[i][j]*rexp - c[i][j]/pow(cut_lj[i][j],6.0);
   } else offset[i][j] = 0.0;
 
   cut_ljsq[j][i] = cut_ljsq[i][j];
   a[j][i] = a[i][j];
   c[j][i] = c[i][j];
   rhoinv[j][i] = rhoinv[i][j];
   buck1[j][i] = buck1[i][j];
   buck2[j][i] = buck2[i][j];
   offset[j][i] = offset[i][j];
 
   // compute I,J contribution to long-range tail correction
   // count total # of atoms of type I and J via Allreduce
 
   if (tail_flag) {
     int *type = atom->type;
     int nlocal = atom->nlocal;
 
     double count[2],all[2];
     count[0] = count[1] = 0.0;
     for (int k = 0; k < nlocal; k++) {
       if (type[k] == i) count[0] += 1.0;
       if (type[k] == j) count[1] += 1.0;
     }
     MPI_Allreduce(count,all,2,MPI_DOUBLE,MPI_SUM,world);
 
     double rho1 = rho[i][j];
     double rho2 = rho1*rho1;
     double rho3 = rho2*rho1;
     double rc = cut_lj[i][j];
     double rc2 = rc*rc;
     double rc3 = rc2*rc;
     etail_ij = 2.0*MY_PI*all[0]*all[1]*
       (a[i][j]*exp(-rc/rho1)*rho1*(rc2 + 2.0*rho1*rc + 2.0*rho2) -
        c[i][j]/(3.0*rc3));
     ptail_ij = (-1/3.0)*2.0*MY_PI*all[0]*all[1]*
       (-a[i][j]*exp(-rc/rho1)*
        (rc3 + 3.0*rho1*rc2 + 6.0*rho2*rc + 6.0*rho3) + 2.0*c[i][j]/rc3);
   }
 
   return cut;
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairBuckCoulLong::init_style()
 {
   if (!atom->q_flag)
     error->all(FLERR,"Pair style buck/coul/long requires atom attribute q");
 
   cut_coulsq = cut_coul * cut_coul;
 
   // insure use of KSpace long-range solver, set g_ewald
 
   if (force->kspace == NULL)
     error->all(FLERR,"Pair style requires a KSpace style");
   g_ewald = force->kspace->g_ewald;
 
-  neighbor->request(this);
+  neighbor->request(this,instance_me);
   
   // setup force tables
 
   if (ncoultablebits) init_tables(cut_coul,NULL);
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairBuckCoulLong::write_restart(FILE *fp)
 {
   write_restart_settings(fp);
 
   int i,j;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       fwrite(&setflag[i][j],sizeof(int),1,fp);
       if (setflag[i][j]) {
         fwrite(&a[i][j],sizeof(double),1,fp);
         fwrite(&rho[i][j],sizeof(double),1,fp);
         fwrite(&c[i][j],sizeof(double),1,fp);
         fwrite(&cut_lj[i][j],sizeof(double),1,fp);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairBuckCoulLong::read_restart(FILE *fp)
 {
   read_restart_settings(fp);
 
   allocate();
 
   int i,j;
   int me = comm->me;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
       MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
       if (setflag[i][j]) {
         if (me == 0) {
           fread(&a[i][j],sizeof(double),1,fp);
           fread(&rho[i][j],sizeof(double),1,fp);
           fread(&c[i][j],sizeof(double),1,fp);
           fread(&cut_lj[i][j],sizeof(double),1,fp);
         }
         MPI_Bcast(&a[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&rho[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&c[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&cut_lj[i][j],1,MPI_DOUBLE,0,world);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairBuckCoulLong::write_restart_settings(FILE *fp)
 {
   fwrite(&cut_lj_global,sizeof(double),1,fp);
   fwrite(&cut_coul,sizeof(double),1,fp);
   fwrite(&offset_flag,sizeof(int),1,fp);
   fwrite(&mix_flag,sizeof(int),1,fp);
   fwrite(&tail_flag,sizeof(int),1,fp);
   fwrite(&ncoultablebits,sizeof(int),1,fp);
   fwrite(&tabinner,sizeof(double),1,fp);
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairBuckCoulLong::read_restart_settings(FILE *fp)
 {
   if (comm->me == 0) {
     fread(&cut_lj_global,sizeof(double),1,fp);
     fread(&cut_coul,sizeof(double),1,fp);
     fread(&offset_flag,sizeof(int),1,fp);
     fread(&mix_flag,sizeof(int),1,fp);
     fread(&tail_flag,sizeof(int),1,fp);
     fread(&ncoultablebits,sizeof(int),1,fp);
     fread(&tabinner,sizeof(double),1,fp);
   }
   MPI_Bcast(&cut_lj_global,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&cut_coul,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&offset_flag,1,MPI_INT,0,world);
   MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
   MPI_Bcast(&tail_flag,1,MPI_INT,0,world);
   MPI_Bcast(&ncoultablebits,1,MPI_INT,0,world);
   MPI_Bcast(&tabinner,1,MPI_DOUBLE,0,world);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes to data file
 ------------------------------------------------------------------------- */
 
 void PairBuckCoulLong::write_data(FILE *fp)
 {
   for (int i = 1; i <= atom->ntypes; i++)
     fprintf(fp,"%d %g %g %g\n",i,a[i][i],rho[i][i],c[i][i]);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes all pairs to data file
 ------------------------------------------------------------------------- */
 
 void PairBuckCoulLong::write_data_all(FILE *fp)
 {
   for (int i = 1; i <= atom->ntypes; i++)
     for (int j = i; j <= atom->ntypes; j++)
       fprintf(fp,"%d %d %g %g %g %g\n",i,j,
               a[i][j],rho[i][j],c[i][j],cut_lj[i][j]);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairBuckCoulLong::single(int i, int j, int itype, int jtype,
                                 double rsq,
                                 double factor_coul, double factor_lj,
                                 double &fforce)
 {
   double r2inv,r6inv,r,rexp,grij,expm2,t,erfc,prefactor;
   double fraction,table,forcecoul,forcebuck,phicoul,phibuck;
   int itable;
 
   r2inv = 1.0/rsq;
   if (rsq < cut_coulsq) {
     if (!ncoultablebits || rsq <= tabinnersq) {
       r = sqrt(rsq);
       grij = g_ewald * r;
       expm2 = exp(-grij*grij);
       t = 1.0 / (1.0 + EWALD_P*grij);
       erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
       prefactor = force->qqrd2e * atom->q[i]*atom->q[j]/r;
       forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
       if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
     } else {
       union_int_float_t rsq_lookup;
       rsq_lookup.f = rsq;
       itable = rsq_lookup.i & ncoulmask;
       itable >>= ncoulshiftbits;
       fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable];
       table = ftable[itable] + fraction*dftable[itable];
       forcecoul = atom->q[i]*atom->q[j] * table;
       if (factor_coul < 1.0) {
         table = ctable[itable] + fraction*dctable[itable];
         prefactor = atom->q[i]*atom->q[j] * table;
         forcecoul -= (1.0-factor_coul)*prefactor;
       }
     }
   } else forcecoul = 0.0;
   if (rsq < cut_ljsq[itype][jtype]) {
     r6inv = r2inv*r2inv*r2inv;
     r = sqrt(rsq);
     rexp = exp(-r*rhoinv[itype][jtype]);
     forcebuck = buck1[itype][jtype]*r*rexp - buck2[itype][jtype]*r6inv;
   } else forcebuck = 0.0;
   fforce = (forcecoul + factor_lj*forcebuck) * r2inv;
 
   double eng = 0.0;
   if (rsq < cut_coulsq) {
     if (!ncoultablebits || rsq <= tabinnersq)
       phicoul = prefactor*erfc;
     else {
       table = etable[itable] + fraction*detable[itable];
       phicoul = atom->q[i]*atom->q[j] * table;
     }
     if (factor_coul < 1.0) phicoul -= (1.0-factor_coul)*prefactor;
     eng += phicoul;
   }
   if (rsq < cut_ljsq[itype][jtype]) {
     phibuck = a[itype][jtype]*rexp - c[itype][jtype]*r6inv -
       offset[itype][jtype];
     eng += factor_lj*phibuck;
   }
   return eng;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void *PairBuckCoulLong::extract(const char *str, int &dim)
 {
   dim = 0;
   if (strcmp(str,"cut_coul") == 0) return (void *) &cut_coul;
   return NULL;
 }
diff --git a/src/KSPACE/pair_buck_long_coul_long.cpp b/src/KSPACE/pair_buck_long_coul_long.cpp
index 342a6c5ed..3b8692185 100644
--- a/src/KSPACE/pair_buck_long_coul_long.cpp
+++ b/src/KSPACE/pair_buck_long_coul_long.cpp
@@ -1,1053 +1,1053 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Pieter J. in 't Veld (SNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "math_vector.h"
 #include "pair_buck_long_coul_long.h"
 #include "atom.h"
 #include "comm.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "force.h"
 #include "kspace.h"
 #include "update.h"
 #include "integrate.h"
 #include "respa.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
 #define EWALD_F   1.12837917
 #define EWALD_P   0.3275911
 #define A1        0.254829592
 #define A2       -0.284496736
 #define A3        1.421413741
 #define A4       -1.453152027
 #define A5        1.061405429
 
 /* ---------------------------------------------------------------------- */
 
 PairBuckLongCoulLong::PairBuckLongCoulLong(LAMMPS *lmp) : Pair(lmp)
 {
   dispersionflag = ewaldflag = pppmflag = 1;
   respa_enable = 1;
   writedata = 1;
   ftable = NULL;
   fdisptable = NULL;
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairBuckLongCoulLong::options(char **arg, int order)
 {
   const char *option[] = {"long", "cut", "off", NULL};
   int i;
 
   if (!*arg) error->all(FLERR,"Illegal pair_style buck/long/coul/long command");
   for (i=0; option[i]&&strcmp(arg[0], option[i]); ++i);
   switch (i) {
     default: error->all(FLERR,"Illegal pair_style buck/long/coul/long command");
     case 0: ewald_order |= 1<<order; break;
     case 2: ewald_off |= 1<<order;
     case 1: break;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBuckLongCoulLong::settings(int narg, char **arg)
 {
   if (narg != 3 && narg != 4) error->all(FLERR,"Illegal pair_style command");
 
   ewald_order = 0;
   ewald_off = 0;
 
   options(arg,6);
   options(++arg,1);
 
   if (!comm->me && ewald_order == ((1<<1) | (1<<6))) 
     error->warning(FLERR,"Using largest cutoff for buck/long/coul/long");
   if (!*(++arg)) 
     error->all(FLERR,"Cutoffs missing in pair_style buck/long/coul/long");
   if (ewald_off & (1<<6)) 
     error->all(FLERR,"LJ6 off not supported in pair_style buck/long/coul/long");
   if (!((ewald_order^ewald_off) & (1<<1))) 
     error->all(FLERR,
                "Coulomb cut not supported in pair_style buck/long/coul/coul");
   cut_buck_global = force->numeric(FLERR,*(arg++));
   if (narg == 4 && ((ewald_order & 0x42) == 0x42)) 
     error->all(FLERR,"Only one cutoff allowed when requesting all long");
   if (narg == 4) cut_coul = force->numeric(FLERR,*arg);
   else cut_coul = cut_buck_global;
 
   if (allocated) {
     int i,j;
     for (i = 1; i <= atom->ntypes; i++)
       for (j = i+1; j <= atom->ntypes; j++)
         if (setflag[i][j]) cut_buck[i][j] = cut_buck_global;
   }
 }
 
 /* ----------------------------------------------------------------------
    free all arrays
 ------------------------------------------------------------------------- */
 
 PairBuckLongCoulLong::~PairBuckLongCoulLong()
 {
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
 
     memory->destroy(cut_buck_read);
     memory->destroy(cut_buck);
     memory->destroy(cut_bucksq);
     memory->destroy(buck_a_read);
     memory->destroy(buck_a);
     memory->destroy(buck_c_read);
     memory->destroy(buck_c);
     memory->destroy(buck_rho_read);
     memory->destroy(buck_rho);
     memory->destroy(buck1);
     memory->destroy(buck2);
     memory->destroy(rhoinv);
     memory->destroy(offset);
   }
   if (ftable) free_tables();
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairBuckLongCoulLong::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
   memory->create(cut_buck_read,n+1,n+1,"pair:cut_buck_read");
   memory->create(cut_buck,n+1,n+1,"pair:cut_buck");
   memory->create(cut_bucksq,n+1,n+1,"pair:cut_bucksq");
   memory->create(buck_a_read,n+1,n+1,"pair:buck_a_read");
   memory->create(buck_a,n+1,n+1,"pair:buck_a");
   memory->create(buck_c_read,n+1,n+1,"pair:buck_c_read");
   memory->create(buck_c,n+1,n+1,"pair:buck_c");
   memory->create(buck_rho_read,n+1,n+1,"pair:buck_rho_read");
   memory->create(buck_rho,n+1,n+1,"pair:buck_rho");
   memory->create(buck1,n+1,n+1,"pair:buck1");
   memory->create(buck2,n+1,n+1,"pair:buck2");
   memory->create(rhoinv,n+1,n+1,"pair:rhoinv");
   memory->create(offset,n+1,n+1,"pair:offset");
 }
 
 /* ----------------------------------------------------------------------
    extract protected data from object
 ------------------------------------------------------------------------- */
 
 void *PairBuckLongCoulLong::extract(const char *id, int &dim)
 {
   const char *ids[] = {
     "B", "ewald_order", "ewald_cut", "ewald_mix", "cut_coul", "cut_LJ", NULL};
   void *ptrs[] = {
     buck_c, &ewald_order, &cut_coul, &mix_flag, &cut_coul, &cut_buck_global, 
     NULL};
   int i;
 
   for (i=0; ids[i]&&strcmp(ids[i], id); ++i);
   if (i == 0) dim = 2;
   else dim = 0;
   return ptrs[i];
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairBuckLongCoulLong::coeff(int narg, char **arg)
 {
   if (narg < 5 || narg > 6) 
     error->all(FLERR,"Incorrect args for pair coefficients");
   if (!allocated) allocate();
 
   int ilo,ihi,jlo,jhi;
   force->bounds(*(arg++),atom->ntypes,ilo,ihi);
   force->bounds(*(arg++),atom->ntypes,jlo,jhi);
 
   double buck_a_one = force->numeric(FLERR,*(arg++));
   double buck_rho_one = force->numeric(FLERR,*(arg++));
   double buck_c_one = force->numeric(FLERR,*(arg++));
 
   double cut_buck_one = cut_buck_global;
   if (narg == 6) cut_buck_one = force->numeric(FLERR,*(arg++));
 
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
     for (int j = MAX(jlo,i); j <= jhi; j++) {
       buck_a_read[i][j] = buck_a_one;
       buck_c_read[i][j] = buck_c_one;
       buck_rho_read[i][j] = buck_rho_one;
       cut_buck_read[i][j] = cut_buck_one;
       setflag[i][j] = 1;
       count++;
     }
   }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairBuckLongCoulLong::init_style()
 {
   // require an atom style with charge defined
 
   if (!atom->q_flag && (ewald_order&(1<<1)))
     error->all(FLERR,"Pair style buck/long/coul/long requires atom attribute q");
 
   // request regular or rRESPA neighbor lists if neighrequest_flag != 0
 
   if (force->kspace->neighrequest_flag) {
     int irequest;
 
     if (update->whichflag == 1 && strstr(update->integrate_style,"respa")) {
       int respa = 0;
       if (((Respa *) update->integrate)->level_inner >= 0) respa = 1;
       if (((Respa *) update->integrate)->level_middle >= 0) respa = 2;
 
-      if (respa == 0) irequest = neighbor->request(this);
+      if (respa == 0) irequest = neighbor->request(this,instance_me);
       else if (respa == 1) {
-        irequest = neighbor->request(this);
+        irequest = neighbor->request(this,instance_me);
         neighbor->requests[irequest]->id = 1;
         neighbor->requests[irequest]->half = 0;
         neighbor->requests[irequest]->respainner = 1;
-        irequest = neighbor->request(this);
+        irequest = neighbor->request(this,instance_me);
         neighbor->requests[irequest]->id = 3;
         neighbor->requests[irequest]->half = 0;
         neighbor->requests[irequest]->respaouter = 1;
       } else {
-        irequest = neighbor->request(this);
+        irequest = neighbor->request(this,instance_me);
         neighbor->requests[irequest]->id = 1;
         neighbor->requests[irequest]->half = 0;
         neighbor->requests[irequest]->respainner = 1;
-        irequest = neighbor->request(this);
+        irequest = neighbor->request(this,instance_me);
         neighbor->requests[irequest]->id = 2;
         neighbor->requests[irequest]->half = 0;
         neighbor->requests[irequest]->respamiddle = 1;
-        irequest = neighbor->request(this);
+        irequest = neighbor->request(this,instance_me);
         neighbor->requests[irequest]->id = 3;
         neighbor->requests[irequest]->half = 0;
         neighbor->requests[irequest]->respaouter = 1;
       }
 
-    } else irequest = neighbor->request(this);
+    } else irequest = neighbor->request(this,instance_me);
   }
 
   cut_coulsq = cut_coul * cut_coul;
 
   // set rRESPA cutoffs
 
   if (strstr(update->integrate_style,"respa") &&
       ((Respa *) update->integrate)->level_inner >= 0)
     cut_respa = ((Respa *) update->integrate)->cutoff;
   else cut_respa = NULL;
 
   // ensure use of KSpace long-range solver, set two g_ewalds
 
   if (force->kspace == NULL)
     error->all(FLERR,"Pair style requires a KSpace style");
   if (ewald_order&(1<<1)) g_ewald = force->kspace->g_ewald;
   if (ewald_order&(1<<6)) g_ewald_6 = force->kspace->g_ewald_6;
   // setup force tables
 
   if (ncoultablebits) init_tables(cut_coul,cut_respa);
   if (ndisptablebits) init_tables_disp(cut_buck_global);
 }
 
 /* ----------------------------------------------------------------------
    neighbor callback to inform pair style of neighbor list to use
    regular or rRESPA
 ------------------------------------------------------------------------- */
 
 void PairBuckLongCoulLong::init_list(int id, NeighList *ptr)
 {
   if (id == 0) list = ptr;
   else if (id == 1) listinner = ptr;
   else if (id == 2) listmiddle = ptr;
   else if (id == 3) listouter = ptr;
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairBuckLongCoulLong::init_one(int i, int j)
 {
   if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set");
 
   if (ewald_order&(1<<6)) cut_buck[i][j] = cut_buck_global;
   else cut_buck[i][j] = cut_buck_read[i][j];
   buck_a[i][j] = buck_a_read[i][j];
   buck_c[i][j] = buck_c_read[i][j];
   buck_rho[i][j] = buck_rho_read[i][j];
 
   double cut = MAX(cut_buck[i][j],cut_coul);
   cutsq[i][j] = cut*cut;
   cut_bucksq[i][j] = cut_buck[i][j] * cut_buck[i][j];
 
   buck1[i][j] = buck_a[i][j]/buck_rho[i][j];
   buck2[i][j] = 6.0*buck_c[i][j];
   rhoinv[i][j] = 1.0/buck_rho[i][j];
 
   // check interior rRESPA cutoff
 
   if (cut_respa && MIN(cut_buck[i][j],cut_coul) < cut_respa[3])
     error->all(FLERR,"Pair cutoff < Respa interior cutoff");
 
   if (offset_flag) {
     double rexp = exp(-cut_buck[i][j]/buck_rho[i][j]);
     offset[i][j] = buck_a[i][j]*rexp - buck_c[i][j]/pow(cut_buck[i][j],6.0);
   } else offset[i][j] = 0.0;
 
   cutsq[j][i] = cutsq[i][j];
   cut_bucksq[j][i] = cut_bucksq[i][j];
   buck_a[j][i] = buck_a[i][j];
   buck_c[j][i] = buck_c[i][j];
   rhoinv[j][i] = rhoinv[i][j];
   buck1[j][i] = buck1[i][j];
   buck2[j][i] = buck2[i][j];
   offset[j][i] = offset[i][j];
 
   return cut;
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairBuckLongCoulLong::write_restart(FILE *fp)
 {
   write_restart_settings(fp);
 
   int i,j;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       fwrite(&setflag[i][j],sizeof(int),1,fp);
       if (setflag[i][j]) {
         fwrite(&buck_a_read[i][j],sizeof(double),1,fp);
         fwrite(&buck_rho_read[i][j],sizeof(double),1,fp);
         fwrite(&buck_c_read[i][j],sizeof(double),1,fp);
         fwrite(&cut_buck_read[i][j],sizeof(double),1,fp);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairBuckLongCoulLong::read_restart(FILE *fp)
 {
   read_restart_settings(fp);
 
   allocate();
 
   int i,j;
   int me = comm->me;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
       MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
       if (setflag[i][j]) {
         if (me == 0) {
           fread(&buck_a_read[i][j],sizeof(double),1,fp);
           fread(&buck_rho_read[i][j],sizeof(double),1,fp);
           fread(&buck_c_read[i][j],sizeof(double),1,fp);
           fread(&cut_buck_read[i][j],sizeof(double),1,fp);
         }
         MPI_Bcast(&buck_a_read[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&buck_rho_read[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&buck_c_read[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&cut_buck_read[i][j],1,MPI_DOUBLE,0,world);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairBuckLongCoulLong::write_restart_settings(FILE *fp)
 {
   fwrite(&cut_buck_global,sizeof(double),1,fp);
   fwrite(&cut_coul,sizeof(double),1,fp);
   fwrite(&offset_flag,sizeof(int),1,fp);
   fwrite(&mix_flag,sizeof(int),1,fp);
   fwrite(&ncoultablebits,sizeof(int),1,fp);
   fwrite(&tabinner,sizeof(double),1,fp);
   fwrite(&ewald_order,sizeof(int),1,fp);
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairBuckLongCoulLong::read_restart_settings(FILE *fp)
 {
   if (comm->me == 0) {
     fread(&cut_buck_global,sizeof(double),1,fp);
     fread(&cut_coul,sizeof(double),1,fp);
     fread(&offset_flag,sizeof(int),1,fp);
     fread(&mix_flag,sizeof(int),1,fp);
     fread(&ncoultablebits,sizeof(int),1,fp);
     fread(&tabinner,sizeof(double),1,fp);
     fread(&ewald_order,sizeof(int),1,fp);
   }
   MPI_Bcast(&cut_buck_global,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&cut_coul,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&offset_flag,1,MPI_INT,0,world);
   MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
   MPI_Bcast(&ncoultablebits,1,MPI_INT,0,world);
   MPI_Bcast(&tabinner,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&ewald_order,1,MPI_INT,0,world);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes to data file
 ------------------------------------------------------------------------- */
 
 void PairBuckLongCoulLong::write_data(FILE *fp)
 {
   for (int i = 1; i <= atom->ntypes; i++)
     fprintf(fp,"%d %g %g %g\n",i,
             buck_a_read[i][i],buck_rho_read[i][i],buck_c_read[i][i]);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes all pairs to data file
 ------------------------------------------------------------------------- */
 
 void PairBuckLongCoulLong::write_data_all(FILE *fp)
 {
   for (int i = 1; i <= atom->ntypes; i++)
     for (int j = i; j <= atom->ntypes; j++)
       fprintf(fp,"%d %d %g %g %g\n",i,j,
               buck_a_read[i][j],buck_rho_read[i][j],buck_c_read[i][j]);
 }
 
 /* ----------------------------------------------------------------------
    compute pair interactions
 ------------------------------------------------------------------------- */
 
 void PairBuckLongCoulLong::compute(int eflag, int vflag)
 {
 
   double evdwl,ecoul,fpair;
   evdwl = ecoul = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   double **x = atom->x, *x0 = x[0];
   double **f = atom->f, *f0 = f[0], *fi = f0;
   double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
   int i, j, order1 = ewald_order&(1<<1), order6 = ewald_order&(1<<6);
   int *ineigh, *ineighn, *jneigh, *jneighn, typei, typej, ni;
   double qi = 0.0, qri = 0.0, *cutsqi, *cut_bucksqi,
          *buck1i, *buck2i, *buckai, *buckci, *rhoinvi, *offseti;
   double r, rsq, r2inv, force_coul, force_buck;
   double g2 = g_ewald_6*g_ewald_6, g6 = g2*g2*g2, g8 = g6*g2;
   vector xi, d;
 
   ineighn = (ineigh = list->ilist)+list->inum;
 
   for (; ineigh<ineighn; ++ineigh) {                        // loop over my atoms
     i = *ineigh; fi = f0+3*i;
     if (order1) qri = (qi = q[i])*qqrd2e;                // initialize constants
     offseti = offset[typei = type[i]];
     buck1i = buck1[typei]; buck2i = buck2[typei];
     buckai = buck_a[typei]; buckci = buck_c[typei], rhoinvi = rhoinv[typei];
     cutsqi = cutsq[typei]; cut_bucksqi = cut_bucksq[typei];
     memcpy(xi, x0+(i+(i<<1)), sizeof(vector));
     jneighn = (jneigh = list->firstneigh[i])+list->numneigh[i];
 
     for (; jneigh<jneighn; ++jneigh) {                        // loop over neighbors
       j = *jneigh;
       ni = sbmask(j);
       j &= NEIGHMASK;
 
       { register double *xj = x0+(j+(j<<1));
         d[0] = xi[0] - xj[0];                                // pair vector
         d[1] = xi[1] - xj[1];
         d[2] = xi[2] - xj[2]; }
 
       if ((rsq = vec_dot(d, d)) >= cutsqi[typej = type[j]]) continue;
       r2inv = 1.0/rsq;
       r = sqrt(rsq);
 
       if (order1 && (rsq < cut_coulsq)) {                // coulombic
         if (!ncoultablebits || rsq <= tabinnersq) {        // series real space
           register double x = g_ewald*r;
           register double s = qri*q[j], t = 1.0/(1.0+EWALD_P*x);
           if (ni == 0) {
             s *= g_ewald*exp(-x*x);
             force_coul = (t *= ((((t*A5+A4)*t+A3)*t+A2)*t+A1)*s/x)+EWALD_F*s;
             if (eflag) ecoul = t;
           }
           else {                                        // special case
             register double f = s*(1.0-special_coul[ni])/r;
             s *= g_ewald*exp(-x*x);
             force_coul = (t *= ((((t*A5+A4)*t+A3)*t+A2)*t+A1)*s/x)+EWALD_F*s-f;
             if (eflag) ecoul = t-f;
           }
         }                                                // table real space
         else {
           register union_int_float_t t;
           t.f = rsq;
           register const int k = (t.i & ncoulmask) >> ncoulshiftbits;
           register double f = (rsq-rtable[k])*drtable[k], qiqj = qi*q[j];
           if (ni == 0) {
             force_coul = qiqj*(ftable[k]+f*dftable[k]);
             if (eflag) ecoul = qiqj*(etable[k]+f*detable[k]);
           }
           else {                                        // special case
             t.f = (1.0-special_coul[ni])*(ctable[k]+f*dctable[k]);
             force_coul = qiqj*(ftable[k]+f*dftable[k]-t.f);
             if (eflag) ecoul = qiqj*(etable[k]+f*detable[k]-t.f);
           }
         }
       }
       else force_coul = ecoul = 0.0;
 
       if (rsq < cut_bucksqi[typej]) {                        // buckingham
         register double rn = r2inv*r2inv*r2inv,
                         expr = exp(-r*rhoinvi[typej]);
         if (order6) {                                        // long-range
           if (!ndisptablebits || rsq <= tabinnerdispsq) {
             register double x2 = g2*rsq, a2 = 1.0/x2;
             x2 = a2*exp(-x2)*buckci[typej];
             if (ni == 0) {
               force_buck =
                 r*expr*buck1i[typej]-g8*(((6.0*a2+6.0)*a2+3.0)*a2+1.0)*x2*rsq;
               if (eflag) evdwl = expr*buckai[typej]-g6*((a2+1.0)*a2+0.5)*x2;
             }
             else {                                        // special case
               register double f = special_lj[ni], t = rn*(1.0-f);
               force_buck = f*r*expr*buck1i[typej]-
                 g8*(((6.0*a2+6.0)*a2+3.0)*a2+1.0)*x2*rsq+t*buck2i[typej];
               if (eflag) evdwl = f*expr*buckai[typej] -
                            g6*((a2+1.0)*a2+0.5)*x2+t*buckci[typej];
             }
           }
           else {                                              //table real space
             register union_int_float_t disp_t;
             disp_t.f = rsq;
             register const int disp_k = (disp_t.i & ndispmask)>>ndispshiftbits;
             register double f_disp = (rsq-rdisptable[disp_k])*drdisptable[disp_k];
             if (ni == 0) {
               force_buck = r*expr*buck1i[typej]-(fdisptable[disp_k]+f_disp*dfdisptable[disp_k])*buckci[typej];
               if (eflag) evdwl = expr*buckai[typej]-(edisptable[disp_k]+f_disp*dedisptable[disp_k])*buckci[typej];
             }
             else {                                             //speial case
               register double f = special_lj[ni], t = rn*(1.0-f);
               force_buck = f*r*expr*buck1i[typej] -(fdisptable[disp_k]+f_disp*dfdisptable[disp_k])*buckci[typej] +t*buck2i[typej];
               if (eflag) evdwl = f*expr*buckai[typej] -(edisptable[disp_k]+f_disp*dedisptable[disp_k])*buckci[typej]+t*buckci[typej];
             }
           }
         }
         else {                                                // cut
           if (ni == 0) {
             force_buck = r*expr*buck1i[typej]-rn*buck2i[typej];
             if (eflag) evdwl = expr*buckai[typej] -
                          rn*buckci[typej]-offseti[typej];
           }
           else {                                        // special case
             register double f = special_lj[ni];
             force_buck = f*(r*expr*buck1i[typej]-rn*buck2i[typej]);
             if (eflag)
               evdwl = f*(expr*buckai[typej]-rn*buckci[typej]-offseti[typej]);
           }
         }
       }
       else force_buck = evdwl = 0.0;
 
       fpair = (force_coul+force_buck)*r2inv;
 
       if (newton_pair || j < nlocal) {
         register double *fj = f0+(j+(j<<1)), f;
         fi[0] += f = d[0]*fpair; fj[0] -= f;
         fi[1] += f = d[1]*fpair; fj[1] -= f;
         fi[2] += f = d[2]*fpair; fj[2] -= f;
       }
       else {
         fi[0] += d[0]*fpair;
         fi[1] += d[1]*fpair;
         fi[2] += d[2]*fpair;
       }
 
       if (evflag) ev_tally(i,j,nlocal,newton_pair,
                            evdwl,ecoul,fpair,d[0],d[1],d[2]);
     }
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBuckLongCoulLong::compute_inner()
 {
   double r, rsq, r2inv, force_coul = 0.0, force_buck, fpair;
 
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *x0 = atom->x[0], *f0 = atom->f[0], *fi = f0, *q = atom->q;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
   double cut_out_on = cut_respa[0];
   double cut_out_off = cut_respa[1];
 
   double cut_out_diff = cut_out_off - cut_out_on;
   double cut_out_on_sq = cut_out_on*cut_out_on;
   double cut_out_off_sq = cut_out_off*cut_out_off;
 
   int *ineigh, *ineighn, *jneigh, *jneighn, typei, typej, ni;
   int i, j, order1 = (ewald_order|(ewald_off^-1))&(1<<1);
   double qri, *cut_bucksqi, *buck1i, *buck2i, *rhoinvi;
   vector xi, d;
 
   ineighn = (ineigh = listinner->ilist) + listinner->inum;
   for (; ineigh<ineighn; ++ineigh) {                        // loop over my atoms
     i = *ineigh; fi = f0+3*i;
     if (order1) qri = qqrd2e*q[i];
     memcpy(xi, x0+(i+(i<<1)), sizeof(vector));
     cut_bucksqi = cut_bucksq[typei = type[i]];
     buck1i = buck1[typei]; buck2i = buck2[typei]; rhoinvi = rhoinv[typei];
     jneighn = (jneigh = listinner->firstneigh[i])+listinner->numneigh[i];
 
     for (; jneigh<jneighn; ++jneigh) {                        // loop over neighbors
       j = *jneigh;
       ni = sbmask(j);
       j &= NEIGHMASK;
 
       { register double *xj = x0+(j+(j<<1));
         d[0] = xi[0] - xj[0];                                // pair vector
         d[1] = xi[1] - xj[1];
         d[2] = xi[2] - xj[2]; }
 
       if ((rsq = vec_dot(d, d)) >= cut_out_off_sq) continue;
       r2inv = 1.0/rsq;
       r = sqrt(rsq);
 
       if (order1 && (rsq < cut_coulsq))                        // coulombic
         force_coul = ni == 0 ?
           qri*q[j]/r : qri*q[j]/r*special_coul[ni];
 
       if (rsq < cut_bucksqi[typej = type[j]]) {                // buckingham
         register double rn = r2inv*r2inv*r2inv,
                         expr = exp(-r*rhoinvi[typej]);
         force_buck = ni == 0 ?
           (r*expr*buck1i[typej]-rn*buck2i[typej]) :
           (r*expr*buck1i[typej]-rn*buck2i[typej])*special_lj[ni];
       }
       else force_buck = 0.0;
 
       fpair = (force_coul + force_buck) * r2inv;
 
       if (rsq > cut_out_on_sq) {                        // switching
         register double rsw = (sqrt(rsq) - cut_out_on)/cut_out_diff;
         fpair  *= 1.0 + rsw*rsw*(2.0*rsw-3.0);
       }
 
       if (newton_pair || j < nlocal) {                        // force update
         register double *fj = f0+(j+(j<<1)), f;
         fi[0] += f = d[0]*fpair; fj[0] -= f;
         fi[1] += f = d[1]*fpair; fj[1] -= f;
         fi[2] += f = d[2]*fpair; fj[2] -= f;
       }
       else {
         fi[0] += d[0]*fpair;
         fi[1] += d[1]*fpair;
         fi[2] += d[2]*fpair;
       }
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBuckLongCoulLong::compute_middle()
 {
   double r, rsq, r2inv, force_coul = 0.0, force_buck, fpair;
 
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *x0 = atom->x[0], *f0 = atom->f[0], *fi = f0, *q = atom->q;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
   double cut_in_off = cut_respa[0];
   double cut_in_on = cut_respa[1];
   double cut_out_on = cut_respa[2];
   double cut_out_off = cut_respa[3];
 
   double cut_in_diff = cut_in_on - cut_in_off;
   double cut_out_diff = cut_out_off - cut_out_on;
   double cut_in_off_sq = cut_in_off*cut_in_off;
   double cut_in_on_sq = cut_in_on*cut_in_on;
   double cut_out_on_sq = cut_out_on*cut_out_on;
   double cut_out_off_sq = cut_out_off*cut_out_off;
 
   int *ineigh, *ineighn, *jneigh, *jneighn, typei, typej, ni;
   int i, j, order1 = (ewald_order|(ewald_off^-1))&(1<<1);
   double qri, *cut_bucksqi, *buck1i, *buck2i, *rhoinvi;
   vector xi, d;
 
   ineighn = (ineigh = listmiddle->ilist)+listmiddle->inum;
 
   for (; ineigh<ineighn; ++ineigh) {                        // loop over my atoms
     i = *ineigh; fi = f0+3*i;
     if (order1) qri = qqrd2e*q[i];
     memcpy(xi, x0+(i+(i<<1)), sizeof(vector));
     cut_bucksqi = cut_bucksq[typei = type[i]];
     buck1i = buck1[typei]; buck2i = buck2[typei]; rhoinvi = rhoinv[typei];
     jneighn = (jneigh = listmiddle->firstneigh[i])+listmiddle->numneigh[i];
 
     for (; jneigh<jneighn; ++jneigh) {                        // loop over neighbors
       j = *jneigh;
       ni = sbmask(j);
       j &= NEIGHMASK;
 
       { register double *xj = x0+(j+(j<<1));
         d[0] = xi[0] - xj[0];                                // pair vector
         d[1] = xi[1] - xj[1];
         d[2] = xi[2] - xj[2]; }
 
       if ((rsq = vec_dot(d, d)) >= cut_out_off_sq) continue;
       if (rsq <= cut_in_off_sq) continue;
       r2inv = 1.0/rsq;
       r = sqrt(rsq);
 
       if (order1 && (rsq < cut_coulsq))                        // coulombic
         force_coul = ni == 0 ?
           qri*q[j]/r : qri*q[j]/r*special_coul[ni];
 
       if (rsq < cut_bucksqi[typej = type[j]]) {                // buckingham
         register double rn = r2inv*r2inv*r2inv,
                         expr = exp(-r*rhoinvi[typej]);
         force_buck = ni == 0 ?
           (r*expr*buck1i[typej]-rn*buck2i[typej]) :
           (r*expr*buck1i[typej]-rn*buck2i[typej])*special_lj[ni];
       }
       else force_buck = 0.0;
 
       fpair = (force_coul + force_buck) * r2inv;
 
       if (rsq < cut_in_on_sq) {                                // switching
         register double rsw = (sqrt(rsq) - cut_in_off)/cut_in_diff;
         fpair  *= rsw*rsw*(3.0 - 2.0*rsw);
       }
       if (rsq > cut_out_on_sq) {
         register double rsw = (sqrt(rsq) - cut_out_on)/cut_out_diff;
         fpair  *= 1.0 + rsw*rsw*(2.0*rsw-3.0);
       }
 
       if (newton_pair || j < nlocal) {                        // force update
         register double *fj = f0+(j+(j<<1)), f;
         fi[0] += f = d[0]*fpair; fj[0] -= f;
         fi[1] += f = d[1]*fpair; fj[1] -= f;
         fi[2] += f = d[2]*fpair; fj[2] -= f;
       }
       else {
         fi[0] += d[0]*fpair;
         fi[1] += d[1]*fpair;
         fi[2] += d[2]*fpair;
       }
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBuckLongCoulLong::compute_outer(int eflag, int vflag)
 {
   double evdwl,ecoul,fpair,fvirial;
   evdwl = ecoul = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = 0;
 
   double **x = atom->x, *x0 = x[0];
   double **f = atom->f, *f0 = f[0], *fi = f0;
   double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
   int i, j, order1 = ewald_order&(1<<1), order6 = ewald_order&(1<<6);
   int *ineigh, *ineighn, *jneigh, *jneighn, typei, typej, ni, respa_flag;
   double qi = 0.0, qri = 0.0, *cutsqi, *cut_bucksqi,
          *buck1i, *buck2i, *buckai, *buckci, *rhoinvi, *offseti;
   double r, rsq, r2inv, force_coul, force_buck;
   double g2 = g_ewald_6*g_ewald_6, g6 = g2*g2*g2, g8 = g6*g2;
   double respa_buck = 0.0, respa_coul = 0.0, frespa = 0.0;
   vector xi, d;
 
   double cut_in_off = cut_respa[2];
   double cut_in_on = cut_respa[3];
 
   double cut_in_diff = cut_in_on - cut_in_off;
   double cut_in_off_sq = cut_in_off*cut_in_off;
   double cut_in_on_sq = cut_in_on*cut_in_on;
 
   ineighn = (ineigh = listouter->ilist)+listouter->inum;
 
   for (; ineigh<ineighn; ++ineigh) {                        // loop over my atoms
     i = *ineigh; fi = f0+3*i;
     if (order1) qri = (qi = q[i])*qqrd2e;                // initialize constants
     offseti = offset[typei = type[i]];
     buck1i = buck1[typei]; buck2i = buck2[typei];
     buckai = buck_a[typei]; buckci = buck_c[typei]; rhoinvi = rhoinv[typei];
     cutsqi = cutsq[typei]; cut_bucksqi = cut_bucksq[typei];
     memcpy(xi, x0+(i+(i<<1)), sizeof(vector));
     jneighn = (jneigh = listouter->firstneigh[i])+listouter->numneigh[i];
 
     for (; jneigh<jneighn; ++jneigh) {                        // loop over neighbors
       j = *jneigh;
       ni = sbmask(j);
       j &= NEIGHMASK;
 
       { register double *xj = x0+(j+(j<<1));
         d[0] = xi[0] - xj[0];                                // pair vector
         d[1] = xi[1] - xj[1];
         d[2] = xi[2] - xj[2]; }
 
       if ((rsq = vec_dot(d, d)) >= cutsqi[typej = type[j]]) continue;
       r2inv = 1.0/rsq;
       r = sqrt(rsq);
 
       frespa = 1.0;      //check whether and how to compute respa corrections
       respa_coul = 0.0;
       respa_buck = 0.0;
       respa_flag = rsq < cut_in_on_sq ? 1 : 0;
       if (respa_flag && (rsq > cut_in_off_sq)) {
         register double rsw = (r-cut_in_off)/cut_in_diff;
         frespa = 1-rsw*rsw*(3.0-2.0*rsw);
       }
 
       if (order1 && (rsq < cut_coulsq)) {                // coulombic
         if (!ncoultablebits || rsq <= tabinnersq) {        // series real space
           register double s = qri*q[j];
           if (respa_flag)                                // correct for respa
             respa_coul = ni == 0 ? frespa*s/r : frespa*s/r*special_coul[ni];
           register double x = g_ewald*r, t = 1.0/(1.0+EWALD_P*x);
           if (ni == 0) {
             s *= g_ewald*exp(-x*x);
             force_coul = (t *= ((((t*A5+A4)*t+A3)*t+A2)*t+A1)*s/x)+EWALD_F*s-respa_coul;
             if (eflag) ecoul = t;
           }
           else {                                        // correct for special
             register double ri = s*(1.0-special_coul[ni])/r; s *= g_ewald*exp(-x*x);
             force_coul = (t *= ((((t*A5+A4)*t+A3)*t+A2)*t+A1)*s/x)+EWALD_F*s-ri-respa_coul;
             if (eflag) ecoul = t-ri;
           }
         }                                                // table real space
         else {
           if (respa_flag) {
             register double s = qri*q[j];
             respa_coul = ni == 0 ? frespa*s/r : frespa*s/r*special_coul[ni];
           }
           register union_int_float_t t;
           t.f = rsq;
           register const int k = (t.i & ncoulmask) >> ncoulshiftbits;
           register double f = (rsq-rtable[k])*drtable[k], qiqj = qi*q[j];
           if (ni == 0) {
             force_coul = qiqj*(ftable[k]+f*dftable[k]);
             if (eflag) ecoul = qiqj*(etable[k]+f*detable[k]);
           }
           else {                                        // correct for special
             t.f = (1.0-special_coul[ni])*(ctable[k]+f*dctable[k]);
             force_coul = qiqj*(ftable[k]+f*dftable[k]-t.f);
             if (eflag) {
               t.f = (1.0-special_coul[ni])*(ptable[k]+f*dptable[k]);
               ecoul = qiqj*(etable[k]+f*detable[k]-t.f);
             }
           }
         }
       }
       else force_coul = respa_coul = ecoul = 0.0;
 
       if (rsq < cut_bucksqi[typej]) {                        // buckingham
         register double rn = r2inv*r2inv*r2inv,
                         expr = exp(-r*rhoinvi[typej]);
         if (respa_flag) respa_buck = ni == 0 ?                 // correct for respa
             frespa*(r*expr*buck1i[typej]-rn*buck2i[typej]) :
             frespa*(r*expr*buck1i[typej]-rn*buck2i[typej])*special_lj[ni];
         if (order6) {                                        // long-range form
           if (!ndisptablebits || rsq <= tabinnerdispsq) {
             register double x2 = g2*rsq, a2 = 1.0/x2;
             x2 = a2*exp(-x2)*buckci[typej];
             if (ni == 0) {
               force_buck =
                 r*expr*buck1i[typej]-g8*(((6.0*a2+6.0)*a2+3.0)*a2+1.0)*x2*rsq-respa_buck;
               if (eflag) evdwl = expr*buckai[typej]-g6*((a2+1.0)*a2+0.5)*x2;
 	    }
             else {                                        // correct for special
               register double f = special_lj[ni], t = rn*(1.0-f);
               force_buck = f*r*expr*buck1i[typej]-
                 g8*(((6.0*a2+6.0)*a2+3.0)*a2+1.0)*x2*rsq+t*buck2i[typej]-respa_buck;
               if (eflag) evdwl = f*expr*buckai[typej] -
                            g6*((a2+1.0)*a2+0.5)*x2+t*buckci[typej];
             }
           }
           else {          // table real space
             register union_int_float_t disp_t;
             disp_t.f = rsq;
             register const int disp_k = (disp_t.i & ndispmask)>>ndispshiftbits;
             register double f_disp = (rsq-rdisptable[disp_k])*drdisptable[disp_k];
             register double rn = r2inv*r2inv*r2inv;
             if (ni == 0) {
               force_buck = r*expr*buck1i[typej]-(fdisptable[disp_k]+f_disp*dfdisptable[disp_k])*buckci[typej]-respa_buck;
               if (eflag) evdwl =  expr*buckai[typej]-(edisptable[disp_k]+f_disp*dedisptable[disp_k])*buckci[typej];
             }
             else {                             //special case
               register double f = special_lj[ni], t = rn*(1.0-f);
               force_buck = f*r*expr*buck1i[typej]-(fdisptable[disp_k]+f_disp*dfdisptable[disp_k])*buckci[typej]+t*buck2i[typej]-respa_buck;
               if (eflag) evdwl = f*expr*buckai[typej]-(edisptable[disp_k]+f_disp*dedisptable[disp_k])*buckci[typej]+t*buckci[typej];
             }
           }
         }
         else {                                                // cut form
           if (ni == 0) {
             force_buck = r*expr*buck1i[typej]-rn*buck2i[typej]-respa_buck;
             if (eflag)
               evdwl = expr*buckai[typej]-rn*buckci[typej]-offseti[typej];
           }
           else {                                        // correct for special
             register double f = special_lj[ni];
             force_buck = f*(r*expr*buck1i[typej]-rn*buck2i[typej])-respa_buck;
             if (eflag)
               evdwl = f*(expr*buckai[typej]-rn*buckci[typej]-offseti[typej]);
           }
         }
       }
       else force_buck = respa_buck = evdwl = 0.0;
 
       fpair = (force_coul+force_buck)*r2inv;
 
       if (newton_pair || j < nlocal) {
         register double *fj = f0+(j+(j<<1)), f;
         fi[0] += f = d[0]*fpair; fj[0] -= f;
         fi[1] += f = d[1]*fpair; fj[1] -= f;
         fi[2] += f = d[2]*fpair; fj[2] -= f;
       }
       else {
         fi[0] += d[0]*fpair;
         fi[1] += d[1]*fpair;
         fi[2] += d[2]*fpair;
       }
 
       if (evflag) {
         fvirial = (force_coul + force_buck + respa_coul + respa_buck)*r2inv;
         ev_tally(i,j,nlocal,newton_pair,
                  evdwl,ecoul,fvirial,d[0],d[1],d[2]);
       }
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairBuckLongCoulLong::single(int i, int j, int itype, int jtype,
                             double rsq, double factor_coul, double factor_buck,
                             double &fforce)
 {
   double f, r, r2inv, r6inv, force_coul, force_buck;
   double g2 = g_ewald_6*g_ewald_6, g6 = g2*g2*g2, g8 = g6*g2, *q = atom->q;
 
   r = sqrt(rsq);
   r2inv = 1.0/rsq;
   double eng = 0.0;
 
   if ((ewald_order&2) && (rsq < cut_coulsq)) {                // coulombic
     if (!ncoultablebits || rsq <= tabinnersq) {                // series real space
       register double x = g_ewald*r;
       register double s = force->qqrd2e*q[i]*q[j], t = 1.0/(1.0+EWALD_P*x);
       f = s*(1.0-factor_coul)/r; s *= g_ewald*exp(-x*x);
       force_coul = (t *= ((((t*A5+A4)*t+A3)*t+A2)*t+A1)*s/x)+EWALD_F*s-f;
       eng += t-f;
     }
     else {                                                // table real space
       register union_int_float_t t;
       t.f = rsq;
       register const int k = (t.i & ncoulmask) >> ncoulshiftbits;
       register double f = (rsq-rtable[k])*drtable[k], qiqj = q[i]*q[j];
       t.f = (1.0-factor_coul)*(ctable[k]+f*dctable[k]);
       force_coul = qiqj*(ftable[k]+f*dftable[k]-t.f);
       eng += qiqj*(etable[k]+f*detable[k]-t.f);
     }
   } else force_coul = 0.0;
 
   if (rsq < cut_bucksq[itype][jtype]) {                        // buckingham
     register double expr = factor_buck*exp(-sqrt(rsq)*rhoinv[itype][jtype]);
     r6inv = r2inv*r2inv*r2inv;
     if (ewald_order&64) {                                // long-range
       register double x2 = g2*rsq, a2 = 1.0/x2, t = r6inv*(1.0-factor_buck);
       x2 = a2*exp(-x2)*buck_c[itype][jtype];
       force_buck = buck1[itype][jtype]*r*expr-
                g8*(((6.0*a2+6.0)*a2+3.0)*a2+a2)*x2*rsq+t*buck2[itype][jtype];
       eng += buck_a[itype][jtype]*expr-
         g6*((a2+1.0)*a2+0.5)*x2+t*buck_c[itype][jtype];
     }
     else {                                                // cut
       force_buck =
         buck1[itype][jtype]*r*expr-factor_buck*buck_c[itype][jtype]*r6inv;
       eng += buck_a[itype][jtype]*expr-
         factor_buck*(buck_c[itype][jtype]*r6inv-offset[itype][jtype]);
     }
   } else force_buck = 0.0;
 
   fforce = (force_coul+force_buck)*r2inv;
   return eng;
 }
diff --git a/src/KSPACE/pair_coul_long.cpp b/src/KSPACE/pair_coul_long.cpp
index a0f7d3ee2..84c4854de 100644
--- a/src/KSPACE/pair_coul_long.cpp
+++ b/src/KSPACE/pair_coul_long.cpp
@@ -1,379 +1,379 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Paul Crozier (SNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_coul_long.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "kspace.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "update.h"
 #include "integrate.h"
 #include "respa.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
 #define EWALD_F   1.12837917
 #define EWALD_P   0.3275911
 #define A1        0.254829592
 #define A2       -0.284496736
 #define A3        1.421413741
 #define A4       -1.453152027
 #define A5        1.061405429
 
 /* ---------------------------------------------------------------------- */
 
 PairCoulLong::PairCoulLong(LAMMPS *lmp) : Pair(lmp)
 {
   ewaldflag = pppmflag = 1;
   ftable = NULL;
   qdist = 0.0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 PairCoulLong::~PairCoulLong()
 {
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
 
     memory->destroy(scale);
   }
   if (ftable) free_tables();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairCoulLong::compute(int eflag, int vflag)
 {
   int i,j,ii,jj,inum,jnum,itable,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,ecoul,fpair;
   double fraction,table;
   double r,r2inv,forcecoul,factor_coul;
   double grij,expm2,prefactor,t,erfc;
   int *ilist,*jlist,*numneigh,**firstneigh;
   double rsq;
 
   ecoul = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cut_coulsq) {
         r2inv = 1.0/rsq;
         if (!ncoultablebits || rsq <= tabinnersq) {
           r = sqrt(rsq);
           grij = g_ewald * r;
           expm2 = exp(-grij*grij);
           t = 1.0 / (1.0 + EWALD_P*grij);
           erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
           prefactor = qqrd2e * scale[itype][jtype] * qtmp*q[j]/r;
           forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
           if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
         } else {
           union_int_float_t rsq_lookup;
           rsq_lookup.f = rsq;
           itable = rsq_lookup.i & ncoulmask;
           itable >>= ncoulshiftbits;
           fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable];
           table = ftable[itable] + fraction*dftable[itable];
           forcecoul = scale[itype][jtype] * qtmp*q[j] * table;
           if (factor_coul < 1.0) {
             table = ctable[itable] + fraction*dctable[itable];
             prefactor = scale[itype][jtype] * qtmp*q[j] * table;
             forcecoul -= (1.0-factor_coul)*prefactor;
           }
         }
 
         fpair = forcecoul * r2inv;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
         if (newton_pair || j < nlocal) {
           f[j][0] -= delx*fpair;
           f[j][1] -= dely*fpair;
           f[j][2] -= delz*fpair;
         }
 
         if (eflag) {
           if (!ncoultablebits || rsq <= tabinnersq)
             ecoul = prefactor*erfc;
           else {
             table = etable[itable] + fraction*detable[itable];
             ecoul = scale[itype][jtype] * qtmp*q[j] * table;
           }
           if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
         }
 
         if (evflag) ev_tally(i,j,nlocal,newton_pair,
                              0.0,ecoul,fpair,delx,dely,delz);
       }
     }
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairCoulLong::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
   memory->create(scale,n+1,n+1,"pair:scale");
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairCoulLong::settings(int narg, char **arg)
 {
   if (narg != 1) error->all(FLERR,"Illegal pair_style command");
 
   cut_coul = force->numeric(FLERR,arg[0]);
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairCoulLong::coeff(int narg, char **arg)
 {
   if (narg != 2) error->all(FLERR,"Incorrect args for pair coefficients");
   if (!allocated) allocate();
 
   int ilo,ihi,jlo,jhi;
   force->bounds(arg[0],atom->ntypes,ilo,ihi);
   force->bounds(arg[1],atom->ntypes,jlo,jhi);
 
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
     for (int j = MAX(jlo,i); j <= jhi; j++) {
       scale[i][j] = 1.0;
       setflag[i][j] = 1;
       count++;
     }
   }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairCoulLong::init_style()
 {
   if (!atom->q_flag)
     error->all(FLERR,"Pair style lj/cut/coul/long requires atom attribute q");
 
-  neighbor->request(this);
+  neighbor->request(this,instance_me);
 
   cut_coulsq = cut_coul * cut_coul;
 
   // insure use of KSpace long-range solver, set g_ewald
 
  if (force->kspace == NULL)
     error->all(FLERR,"Pair style requires a KSpace style");
   g_ewald = force->kspace->g_ewald;
 
   // setup force tables
 
   if (ncoultablebits) init_tables(cut_coul,NULL);
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairCoulLong::init_one(int i, int j)
 {
   scale[j][i] = scale[i][j];
   return cut_coul+2.0*qdist;
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairCoulLong::write_restart(FILE *fp)
 {
   write_restart_settings(fp);
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairCoulLong::read_restart(FILE *fp)
 {
   read_restart_settings(fp);
 
   allocate();
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairCoulLong::write_restart_settings(FILE *fp)
 {
   fwrite(&cut_coul,sizeof(double),1,fp);
   fwrite(&offset_flag,sizeof(int),1,fp);
   fwrite(&mix_flag,sizeof(int),1,fp);
   fwrite(&ncoultablebits,sizeof(int),1,fp);
   fwrite(&tabinner,sizeof(double),1,fp);
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairCoulLong::read_restart_settings(FILE *fp)
 {
   if (comm->me == 0) {
     fread(&cut_coul,sizeof(double),1,fp);
     fread(&offset_flag,sizeof(int),1,fp);
     fread(&mix_flag,sizeof(int),1,fp);
     fread(&ncoultablebits,sizeof(int),1,fp);
     fread(&tabinner,sizeof(double),1,fp);
   }
   MPI_Bcast(&cut_coul,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&offset_flag,1,MPI_INT,0,world);
   MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
   MPI_Bcast(&ncoultablebits,1,MPI_INT,0,world);
   MPI_Bcast(&tabinner,1,MPI_DOUBLE,0,world);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairCoulLong::single(int i, int j, int itype, int jtype,
                             double rsq,
                             double factor_coul, double factor_lj,
                             double &fforce)
 {
   double r2inv,r,grij,expm2,t,erfc,prefactor;
   double fraction,table,forcecoul,phicoul;
   int itable;
 
   r2inv = 1.0/rsq;
   if (!ncoultablebits || rsq <= tabinnersq) {
     r = sqrt(rsq);
     grij = g_ewald * r;
     expm2 = exp(-grij*grij);
     t = 1.0 / (1.0 + EWALD_P*grij);
     erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
     prefactor = force->qqrd2e * atom->q[i]*atom->q[j]/r;
     forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
     if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
   } else {
     union_int_float_t rsq_lookup;
     rsq_lookup.f = rsq;
     itable = rsq_lookup.i & ncoulmask;
     itable >>= ncoulshiftbits;
     fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable];
     table = ftable[itable] + fraction*dftable[itable];
     forcecoul = atom->q[i]*atom->q[j] * table;
     if (factor_coul < 1.0) {
       table = ctable[itable] + fraction*dctable[itable];
       prefactor = atom->q[i]*atom->q[j] * table;
       forcecoul -= (1.0-factor_coul)*prefactor;
     }
   }
   fforce = forcecoul * r2inv;
 
   if (!ncoultablebits || rsq <= tabinnersq)
     phicoul = prefactor*erfc;
   else {
     table = etable[itable] + fraction*detable[itable];
     phicoul = atom->q[i]*atom->q[j] * table;
   }
   if (factor_coul < 1.0) phicoul -= (1.0-factor_coul)*prefactor;
 
   return phicoul;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void *PairCoulLong::extract(const char *str, int &dim)
 {
   if (strcmp(str,"cut_coul") == 0) {
     dim = 0;
     return (void *) &cut_coul;
   }
   if (strcmp(str,"scale") == 0) {
     dim = 2;
     return (void *) scale;
   }
   return NULL;
 }
diff --git a/src/KSPACE/pair_lj_charmm_coul_long.cpp b/src/KSPACE/pair_lj_charmm_coul_long.cpp
index 96f9cfd89..a92d175cc 100644
--- a/src/KSPACE/pair_lj_charmm_coul_long.cpp
+++ b/src/KSPACE/pair_lj_charmm_coul_long.cpp
@@ -1,1039 +1,1039 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Paul Crozier (SNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_lj_charmm_coul_long.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "kspace.h"
 #include "update.h"
 #include "integrate.h"
 #include "respa.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
 #define EWALD_F   1.12837917
 #define EWALD_P   0.3275911
 #define A1        0.254829592
 #define A2       -0.284496736
 #define A3        1.421413741
 #define A4       -1.453152027
 #define A5        1.061405429
 
 /* ---------------------------------------------------------------------- */
 
 PairLJCharmmCoulLong::PairLJCharmmCoulLong(LAMMPS *lmp) : Pair(lmp)
 {
   respa_enable = 1;
   ewaldflag = pppmflag = 1;
   ftable = NULL;
   implicit = 0;
   mix_flag = ARITHMETIC;
   writedata = 1;
 }
 
 /* ---------------------------------------------------------------------- */
 
 PairLJCharmmCoulLong::~PairLJCharmmCoulLong()
 {
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
 
     memory->destroy(epsilon);
     memory->destroy(sigma);
     memory->destroy(eps14);
     memory->destroy(sigma14);
     memory->destroy(lj1);
     memory->destroy(lj2);
     memory->destroy(lj3);
     memory->destroy(lj4);
     memory->destroy(lj14_1);
     memory->destroy(lj14_2);
     memory->destroy(lj14_3);
     memory->destroy(lj14_4);
   }
   if (ftable) free_tables();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCharmmCoulLong::compute(int eflag, int vflag)
 {
   int i,j,ii,jj,inum,jnum,itype,jtype,itable;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
   double fraction,table;
   double r,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
   double grij,expm2,prefactor,t,erfc;
   double philj,switch1,switch2;
   int *ilist,*jlist,*numneigh,**firstneigh;
   double rsq;
 
   evdwl = ecoul = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
 
       if (rsq < cut_bothsq) {
         r2inv = 1.0/rsq;
 
         if (rsq < cut_coulsq) {
           if (!ncoultablebits || rsq <= tabinnersq) {
             r = sqrt(rsq);
             grij = g_ewald * r;
             expm2 = exp(-grij*grij);
             t = 1.0 / (1.0 + EWALD_P*grij);
             erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
             prefactor = qqrd2e * qtmp*q[j]/r;
             forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
             if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
           } else {
             union_int_float_t rsq_lookup;
             rsq_lookup.f = rsq;
             itable = rsq_lookup.i & ncoulmask;
             itable >>= ncoulshiftbits;
             fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable];
             table = ftable[itable] + fraction*dftable[itable];
             forcecoul = qtmp*q[j] * table;
             if (factor_coul < 1.0) {
               table = ctable[itable] + fraction*dctable[itable];
               prefactor = qtmp*q[j] * table;
               forcecoul -= (1.0-factor_coul)*prefactor;
             }
           }
         } else forcecoul = 0.0;
 
         if (rsq < cut_ljsq) {
           r6inv = r2inv*r2inv*r2inv;
           jtype = type[j];
           forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
           if (rsq > cut_lj_innersq) {
             switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) *
               (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj;
             switch2 = 12.0*rsq * (cut_ljsq-rsq) *
               (rsq-cut_lj_innersq) / denom_lj;
             philj = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]);
             forcelj = forcelj*switch1 + philj*switch2;
           }
         } else forcelj = 0.0;
 
         fpair = (forcecoul + factor_lj*forcelj) * r2inv;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
         if (newton_pair || j < nlocal) {
           f[j][0] -= delx*fpair;
           f[j][1] -= dely*fpair;
           f[j][2] -= delz*fpair;
         }
 
         if (eflag) {
           if (rsq < cut_coulsq) {
             if (!ncoultablebits || rsq <= tabinnersq)
               ecoul = prefactor*erfc;
             else {
               table = etable[itable] + fraction*detable[itable];
               ecoul = qtmp*q[j] * table;
             }
             if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
           } else ecoul = 0.0;
 
           if (rsq < cut_ljsq) {
             evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]);
             if (rsq > cut_lj_innersq) {
               switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) *
                 (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj;
               evdwl *= switch1;
             }
             evdwl *= factor_lj;
           } else evdwl = 0.0;
         }
 
         if (evflag) ev_tally(i,j,nlocal,newton_pair,
                              evdwl,ecoul,fpair,delx,dely,delz);
       }
     }
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCharmmCoulLong::compute_inner()
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,fpair;
   double rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
   double rsw;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
   inum = listinner->inum;
   ilist = listinner->ilist;
   numneigh = listinner->numneigh;
   firstneigh = listinner->firstneigh;
 
   double cut_out_on = cut_respa[0];
   double cut_out_off = cut_respa[1];
 
   double cut_out_diff = cut_out_off - cut_out_on;
   double cut_out_on_sq = cut_out_on*cut_out_on;
   double cut_out_off_sq = cut_out_off*cut_out_off;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
 
       if (rsq < cut_out_off_sq) {
         r2inv = 1.0/rsq;
         forcecoul = qqrd2e * qtmp*q[j]*sqrt(r2inv);
         if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*forcecoul;
 
         r6inv = r2inv*r2inv*r2inv;
         jtype = type[j];
         forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
 
         fpair = (forcecoul + factor_lj*forcelj) * r2inv;
 
         if (rsq > cut_out_on_sq) {
           rsw = (sqrt(rsq) - cut_out_on)/cut_out_diff;
           fpair *= 1.0 + rsw*rsw*(2.0*rsw-3.0);
         }
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
         if (newton_pair || j < nlocal) {
           f[j][0] -= delx*fpair;
           f[j][1] -= dely*fpair;
           f[j][2] -= delz*fpair;
         }
       }
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCharmmCoulLong::compute_middle()
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,fpair;
   double rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
   double philj,switch1,switch2;
   double rsw;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
   inum = listmiddle->inum;
   ilist = listmiddle->ilist;
   numneigh = listmiddle->numneigh;
   firstneigh = listmiddle->firstneigh;
 
   double cut_in_off = cut_respa[0];
   double cut_in_on = cut_respa[1];
   double cut_out_on = cut_respa[2];
   double cut_out_off = cut_respa[3];
 
   double cut_in_diff = cut_in_on - cut_in_off;
   double cut_out_diff = cut_out_off - cut_out_on;
   double cut_in_off_sq = cut_in_off*cut_in_off;
   double cut_in_on_sq = cut_in_on*cut_in_on;
   double cut_out_on_sq = cut_out_on*cut_out_on;
   double cut_out_off_sq = cut_out_off*cut_out_off;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
 
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
 
       if (rsq < cut_out_off_sq && rsq > cut_in_off_sq) {
         r2inv = 1.0/rsq;
         forcecoul = qqrd2e * qtmp*q[j]*sqrt(r2inv);
         if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*forcecoul;
 
         r6inv = r2inv*r2inv*r2inv;
         jtype = type[j];
         forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
         if (rsq > cut_lj_innersq) {
           switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) *
             (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj;
           switch2 = 12.0*rsq * (cut_ljsq-rsq) *
             (rsq-cut_lj_innersq) / denom_lj;
           philj = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]);
           forcelj = forcelj*switch1 + philj*switch2;
         }
 
         fpair = (forcecoul + factor_lj*forcelj) * r2inv;
         if (rsq < cut_in_on_sq) {
           rsw = (sqrt(rsq) - cut_in_off)/cut_in_diff;
           fpair *= rsw*rsw*(3.0 - 2.0*rsw);
         }
         if (rsq > cut_out_on_sq) {
           rsw = (sqrt(rsq) - cut_out_on)/cut_out_diff;
           fpair *= 1.0 + rsw*rsw*(2.0*rsw - 3.0);
         }
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
         if (newton_pair || j < nlocal) {
           f[j][0] -= delx*fpair;
           f[j][1] -= dely*fpair;
           f[j][2] -= delz*fpair;
         }
       }
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCharmmCoulLong::compute_outer(int eflag, int vflag)
 {
   int i,j,ii,jj,inum,jnum,itype,jtype,itable;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
   double fraction,table;
   double r,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
   double grij,expm2,prefactor,t,erfc;
   double philj,switch1,switch2;
   double rsw;
   int *ilist,*jlist,*numneigh,**firstneigh;
   double rsq;
 
   evdwl = ecoul = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = 0;
 
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
   inum = listouter->inum;
   ilist = listouter->ilist;
   numneigh = listouter->numneigh;
   firstneigh = listouter->firstneigh;
 
   double cut_in_off = cut_respa[2];
   double cut_in_on = cut_respa[3];
 
   double cut_in_diff = cut_in_on - cut_in_off;
   double cut_in_off_sq = cut_in_off*cut_in_off;
   double cut_in_on_sq = cut_in_on*cut_in_on;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cut_bothsq) {
         r2inv = 1.0/rsq;
 
         if (rsq < cut_coulsq) {
           if (!ncoultablebits || rsq <= tabinnersq) {
             r = sqrt(rsq);
             grij = g_ewald * r;
             expm2 = exp(-grij*grij);
             t = 1.0 / (1.0 + EWALD_P*grij);
             erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
             prefactor = qqrd2e * qtmp*q[j]/r;
             forcecoul = prefactor * (erfc + EWALD_F*grij*expm2 - 1.0);
             if (rsq > cut_in_off_sq) {
               if (rsq < cut_in_on_sq) {
                 rsw = (r - cut_in_off)/cut_in_diff;
                 forcecoul += prefactor*rsw*rsw*(3.0 - 2.0*rsw);
                 if (factor_coul < 1.0)
                   forcecoul -=
                     (1.0-factor_coul)*prefactor*rsw*rsw*(3.0 - 2.0*rsw);
               } else {
                 forcecoul += prefactor;
                 if (factor_coul < 1.0)
                   forcecoul -= (1.0-factor_coul)*prefactor;
               }
             }
           } else {
             union_int_float_t rsq_lookup;
             rsq_lookup.f = rsq;
             itable = rsq_lookup.i & ncoulmask;
             itable >>= ncoulshiftbits;
             fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable];
             table = ftable[itable] + fraction*dftable[itable];
             forcecoul = qtmp*q[j] * table;
             if (factor_coul < 1.0) {
               table = ctable[itable] + fraction*dctable[itable];
               prefactor = qtmp*q[j] * table;
               forcecoul -= (1.0-factor_coul)*prefactor;
             }
           }
         } else forcecoul = 0.0;
 
         if (rsq < cut_ljsq && rsq > cut_in_off_sq) {
           r6inv = r2inv*r2inv*r2inv;
           forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
           if (rsq > cut_lj_innersq) {
             switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) *
               (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj;
             switch2 = 12.0*rsq * (cut_ljsq-rsq) *
               (rsq-cut_lj_innersq) / denom_lj;
             philj = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]);
             forcelj = forcelj*switch1 + philj*switch2;
           }
           if (rsq < cut_in_on_sq) {
             rsw = (sqrt(rsq) - cut_in_off)/cut_in_diff;
             forcelj *= rsw*rsw*(3.0 - 2.0*rsw);
           }
         } else forcelj = 0.0;
 
         fpair = (forcecoul + forcelj) * r2inv;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
         if (newton_pair || j < nlocal) {
           f[j][0] -= delx*fpair;
           f[j][1] -= dely*fpair;
           f[j][2] -= delz*fpair;
         }
 
         if (eflag) {
           if (rsq < cut_coulsq) {
             if (!ncoultablebits || rsq <= tabinnersq) {
               ecoul = prefactor*erfc;
               if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
             } else {
               table = etable[itable] + fraction*detable[itable];
               ecoul = qtmp*q[j] * table;
               if (factor_coul < 1.0) {
                 table = ptable[itable] + fraction*dptable[itable];
                 prefactor = qtmp*q[j] * table;
                 ecoul -= (1.0-factor_coul)*prefactor;
               }
             }
           } else ecoul = 0.0;
 
           if (rsq < cut_ljsq) {
             r6inv = r2inv*r2inv*r2inv;
             evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]);
             if (rsq > cut_lj_innersq) {
               switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) *
                 (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj;
               evdwl *= switch1;
             }
             evdwl *= factor_lj;
           } else evdwl = 0.0;
         }
 
         if (vflag) {
           if (rsq < cut_coulsq) {
             if (!ncoultablebits || rsq <= tabinnersq) {
               forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
               if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
             } else {
               table = vtable[itable] + fraction*dvtable[itable];
               forcecoul = qtmp*q[j] * table;
               if (factor_coul < 1.0) {
                 table = ptable[itable] + fraction*dptable[itable];
                 prefactor = qtmp*q[j] * table;
                 forcecoul -= (1.0-factor_coul)*prefactor;
               }
             }
           } else forcecoul = 0.0;
 
           if (rsq <= cut_in_off_sq) {
             r6inv = r2inv*r2inv*r2inv;
             forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
             if (rsq > cut_lj_innersq) {
               switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) *
                 (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj;
               switch2 = 12.0*rsq * (cut_ljsq-rsq) *
                 (rsq-cut_lj_innersq) / denom_lj;
               philj = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]);
               forcelj = forcelj*switch1 + philj*switch2;
             }
           } else if (rsq <= cut_in_on_sq) {
             forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
             if (rsq > cut_lj_innersq) {
               switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) *
                 (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj;
               switch2 = 12.0*rsq * (cut_ljsq-rsq) *
                 (rsq-cut_lj_innersq) / denom_lj;
               philj = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]);
               forcelj = forcelj*switch1 + philj*switch2;
             }
           }
 
           fpair = (forcecoul + factor_lj*forcelj) * r2inv;
         }
 
         if (evflag) ev_tally(i,j,nlocal,newton_pair,
                              evdwl,ecoul,fpair,delx,dely,delz);
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairLJCharmmCoulLong::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
   memory->create(epsilon,n+1,n+1,"pair:epsilon");
   memory->create(sigma,n+1,n+1,"pair:sigma");
   memory->create(eps14,n+1,n+1,"pair:eps14");
   memory->create(sigma14,n+1,n+1,"pair:sigma14");
   memory->create(lj1,n+1,n+1,"pair:lj1");
   memory->create(lj2,n+1,n+1,"pair:lj2");
   memory->create(lj3,n+1,n+1,"pair:lj3");
   memory->create(lj4,n+1,n+1,"pair:lj4");
   memory->create(lj14_1,n+1,n+1,"pair:lj14_1");
   memory->create(lj14_2,n+1,n+1,"pair:lj14_2");
   memory->create(lj14_3,n+1,n+1,"pair:lj14_3");
   memory->create(lj14_4,n+1,n+1,"pair:lj14_4");
 }
 
 /* ----------------------------------------------------------------------
    global settings
    unlike other pair styles,
      there are no individual pair settings that these override
 ------------------------------------------------------------------------- */
 
 void PairLJCharmmCoulLong::settings(int narg, char **arg)
 {
   if (narg != 2 && narg != 3) error->all(FLERR,"Illegal pair_style command");
 
   cut_lj_inner = force->numeric(FLERR,arg[0]);
   cut_lj = force->numeric(FLERR,arg[1]);
   if (narg == 2) cut_coul = cut_lj;
   else cut_coul = force->numeric(FLERR,arg[2]);
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairLJCharmmCoulLong::coeff(int narg, char **arg)
 {
   if (narg != 4 && narg != 6) error->all(FLERR,"Illegal pair_coeff command");
   if (!allocated) allocate();
 
   int ilo,ihi,jlo,jhi;
   force->bounds(arg[0],atom->ntypes,ilo,ihi);
   force->bounds(arg[1],atom->ntypes,jlo,jhi);
 
   double epsilon_one = force->numeric(FLERR,arg[2]);
   double sigma_one = force->numeric(FLERR,arg[3]);
   double eps14_one = epsilon_one;
   double sigma14_one = sigma_one;
   if (narg == 6) {
     eps14_one = force->numeric(FLERR,arg[4]);
     sigma14_one = force->numeric(FLERR,arg[5]);
   }
 
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
     for (int j = MAX(jlo,i); j <= jhi; j++) {
       epsilon[i][j] = epsilon_one;
       sigma[i][j] = sigma_one;
       eps14[i][j] = eps14_one;
       sigma14[i][j] = sigma14_one;
       setflag[i][j] = 1;
       count++;
     }
   }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairLJCharmmCoulLong::init_style()
 {
   if (!atom->q_flag)
     error->all(FLERR,
                "Pair style lj/charmm/coul/long requires atom attribute q");
 
   // request regular or rRESPA neighbor lists
 
   int irequest;
 
   if (update->whichflag == 1 && strstr(update->integrate_style,"respa")) {
     int respa = 0;
     if (((Respa *) update->integrate)->level_inner >= 0) respa = 1;
     if (((Respa *) update->integrate)->level_middle >= 0) respa = 2;
 
-    if (respa == 0) irequest = neighbor->request(this);
+    if (respa == 0) irequest = neighbor->request(this,instance_me);
     else if (respa == 1) {
-      irequest = neighbor->request(this);
+      irequest = neighbor->request(this,instance_me);
       neighbor->requests[irequest]->id = 1;
       neighbor->requests[irequest]->half = 0;
       neighbor->requests[irequest]->respainner = 1;
-      irequest = neighbor->request(this);
+      irequest = neighbor->request(this,instance_me);
       neighbor->requests[irequest]->id = 3;
       neighbor->requests[irequest]->half = 0;
       neighbor->requests[irequest]->respaouter = 1;
     } else {
-      irequest = neighbor->request(this);
+      irequest = neighbor->request(this,instance_me);
       neighbor->requests[irequest]->id = 1;
       neighbor->requests[irequest]->half = 0;
       neighbor->requests[irequest]->respainner = 1;
-      irequest = neighbor->request(this);
+      irequest = neighbor->request(this,instance_me);
       neighbor->requests[irequest]->id = 2;
       neighbor->requests[irequest]->half = 0;
       neighbor->requests[irequest]->respamiddle = 1;
-      irequest = neighbor->request(this);
+      irequest = neighbor->request(this,instance_me);
       neighbor->requests[irequest]->id = 3;
       neighbor->requests[irequest]->half = 0;
       neighbor->requests[irequest]->respaouter = 1;
     }
 
-  } else irequest = neighbor->request(this);
+  } else irequest = neighbor->request(this,instance_me);
 
   // require cut_lj_inner < cut_lj
 
   if (cut_lj_inner >= cut_lj)
     error->all(FLERR,"Pair inner cutoff >= Pair outer cutoff");
 
   cut_lj_innersq = cut_lj_inner * cut_lj_inner;
   cut_ljsq = cut_lj * cut_lj;
   cut_coulsq = cut_coul * cut_coul;
   cut_bothsq = MAX(cut_ljsq,cut_coulsq);
 
   denom_lj = (cut_ljsq-cut_lj_innersq) * (cut_ljsq-cut_lj_innersq) *
     (cut_ljsq-cut_lj_innersq);
 
   // set & error check interior rRESPA cutoffs
 
   if (strstr(update->integrate_style,"respa") &&
       ((Respa *) update->integrate)->level_inner >= 0) {
     cut_respa = ((Respa *) update->integrate)->cutoff;
     if (MIN(cut_lj,cut_coul) < cut_respa[3])
       error->all(FLERR,"Pair cutoff < Respa interior cutoff");
     if (cut_lj_inner < cut_respa[1])
       error->all(FLERR,"Pair inner cutoff < Respa interior cutoff");
   } else cut_respa = NULL;
 
   // insure use of KSpace long-range solver, set g_ewald
 
   if (force->kspace == NULL)
     error->all(FLERR,"Pair style requires a KSpace style");
   g_ewald = force->kspace->g_ewald;
 
   // setup force tables
 
   if (ncoultablebits) init_tables(cut_coul,cut_respa);
 }
 
 /* ----------------------------------------------------------------------
    neighbor callback to inform pair style of neighbor list to use
    regular or rRESPA
 ------------------------------------------------------------------------- */
 
 void PairLJCharmmCoulLong::init_list(int id, NeighList *ptr)
 {
   if (id == 0) list = ptr;
   else if (id == 1) listinner = ptr;
   else if (id == 2) listmiddle = ptr;
   else if (id == 3) listouter = ptr;
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairLJCharmmCoulLong::init_one(int i, int j)
 {
   if (setflag[i][j] == 0) {
     epsilon[i][j] = mix_energy(epsilon[i][i],epsilon[j][j],
                                sigma[i][i],sigma[j][j]);
     sigma[i][j] = mix_distance(sigma[i][i],sigma[j][j]);
     eps14[i][j] = mix_energy(eps14[i][i],eps14[j][j],
                                sigma14[i][i],sigma14[j][j]);
     sigma14[i][j] = mix_distance(sigma14[i][i],sigma14[j][j]);
   }
 
   double cut = MAX(cut_lj,cut_coul);
 
   lj1[i][j] = 48.0 * epsilon[i][j] * pow(sigma[i][j],12.0);
   lj2[i][j] = 24.0 * epsilon[i][j] * pow(sigma[i][j],6.0);
   lj3[i][j] = 4.0 * epsilon[i][j] * pow(sigma[i][j],12.0);
   lj4[i][j] = 4.0 * epsilon[i][j] * pow(sigma[i][j],6.0);
   lj14_1[i][j] = 48.0 * eps14[i][j] * pow(sigma14[i][j],12.0);
   lj14_2[i][j] = 24.0 * eps14[i][j] * pow(sigma14[i][j],6.0);
   lj14_3[i][j] = 4.0 * eps14[i][j] * pow(sigma14[i][j],12.0);
   lj14_4[i][j] = 4.0 * eps14[i][j] * pow(sigma14[i][j],6.0);
 
   lj1[j][i] = lj1[i][j];
   lj2[j][i] = lj2[i][j];
   lj3[j][i] = lj3[i][j];
   lj4[j][i] = lj4[i][j];
   lj14_1[j][i] = lj14_1[i][j];
   lj14_2[j][i] = lj14_2[i][j];
   lj14_3[j][i] = lj14_3[i][j];
   lj14_4[j][i] = lj14_4[i][j];
 
   return cut;
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairLJCharmmCoulLong::write_restart(FILE *fp)
 {
   write_restart_settings(fp);
 
   int i,j;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       fwrite(&setflag[i][j],sizeof(int),1,fp);
       if (setflag[i][j]) {
         fwrite(&epsilon[i][j],sizeof(double),1,fp);
         fwrite(&sigma[i][j],sizeof(double),1,fp);
         fwrite(&eps14[i][j],sizeof(double),1,fp);
         fwrite(&sigma14[i][j],sizeof(double),1,fp);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairLJCharmmCoulLong::read_restart(FILE *fp)
 {
   read_restart_settings(fp);
 
   allocate();
 
   int i,j;
   int me = comm->me;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
       MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
       if (setflag[i][j]) {
         if (me == 0) {
           fread(&epsilon[i][j],sizeof(double),1,fp);
           fread(&sigma[i][j],sizeof(double),1,fp);
           fread(&eps14[i][j],sizeof(double),1,fp);
           fread(&sigma14[i][j],sizeof(double),1,fp);
         }
         MPI_Bcast(&epsilon[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&sigma[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&eps14[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&sigma14[i][j],1,MPI_DOUBLE,0,world);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairLJCharmmCoulLong::write_restart_settings(FILE *fp)
 {
   fwrite(&cut_lj_inner,sizeof(double),1,fp);
   fwrite(&cut_lj,sizeof(double),1,fp);
   fwrite(&cut_coul,sizeof(double),1,fp);
   fwrite(&offset_flag,sizeof(int),1,fp);
   fwrite(&mix_flag,sizeof(int),1,fp);
   fwrite(&ncoultablebits,sizeof(int),1,fp);
   fwrite(&tabinner,sizeof(double),1,fp);
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairLJCharmmCoulLong::read_restart_settings(FILE *fp)
 {
   if (comm->me == 0) {
     fread(&cut_lj_inner,sizeof(double),1,fp);
     fread(&cut_lj,sizeof(double),1,fp);
     fread(&cut_coul,sizeof(double),1,fp);
     fread(&offset_flag,sizeof(int),1,fp);
     fread(&mix_flag,sizeof(int),1,fp);
     fread(&ncoultablebits,sizeof(int),1,fp);
     fread(&tabinner,sizeof(double),1,fp);
   }
   MPI_Bcast(&cut_lj_inner,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&cut_lj,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&cut_coul,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&offset_flag,1,MPI_INT,0,world);
   MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
   MPI_Bcast(&ncoultablebits,1,MPI_INT,0,world);
   MPI_Bcast(&tabinner,1,MPI_DOUBLE,0,world);
 }
 
 
 /* ----------------------------------------------------------------------
    proc 0 writes to data file
 ------------------------------------------------------------------------- */
 
 void PairLJCharmmCoulLong::write_data(FILE *fp)
 {
   for (int i = 1; i <= atom->ntypes; i++)
     fprintf(fp,"%d %g %g %g %g\n",
             i,epsilon[i][i],sigma[i][i],eps14[i][i],sigma14[i][i]);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes all pairs to data file
 ------------------------------------------------------------------------- */
 
 void PairLJCharmmCoulLong::write_data_all(FILE *fp)
 {
   for (int i = 1; i <= atom->ntypes; i++)
     for (int j = i; j <= atom->ntypes; j++)
       fprintf(fp,"%d %d %g %g %g %g\n",i,j,
               epsilon[i][j],sigma[i][j],eps14[i][j],sigma14[i][j]);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairLJCharmmCoulLong::single(int i, int j, int itype, int jtype,
                                     double rsq,
                                     double factor_coul, double factor_lj,
                                     double &fforce)
 {
   double r2inv,r6inv,r,grij,expm2,t,erfc,prefactor;
   double switch1,switch2,fraction,table,forcecoul,forcelj,phicoul,philj;
   int itable;
 
   r2inv = 1.0/rsq;
   if (rsq < cut_coulsq) {
     if (!ncoultablebits || rsq <= tabinnersq) {
       r = sqrt(rsq);
       grij = g_ewald * r;
       expm2 = exp(-grij*grij);
       t = 1.0 / (1.0 + EWALD_P*grij);
       erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
       prefactor = force->qqrd2e * atom->q[i]*atom->q[j]/r;
       forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
       if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
     } else {
       union_int_float_t rsq_lookup;
       rsq_lookup.f = rsq;
       itable = rsq_lookup.i & ncoulmask;
       itable >>= ncoulshiftbits;
       fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable];
       table = ftable[itable] + fraction*dftable[itable];
       forcecoul = atom->q[i]*atom->q[j] * table;
       if (factor_coul < 1.0) {
         table = ctable[itable] + fraction*dctable[itable];
         prefactor = atom->q[i]*atom->q[j] * table;
         forcecoul -= (1.0-factor_coul)*prefactor;
       }
     }
   } else forcecoul = 0.0;
   if (rsq < cut_ljsq) {
     r6inv = r2inv*r2inv*r2inv;
     forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
     if (rsq > cut_lj_innersq) {
       switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) *
         (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj;
       switch2 = 12.0*rsq * (cut_ljsq-rsq) *
         (rsq-cut_lj_innersq) / denom_lj;
       philj = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]);
       forcelj = forcelj*switch1 + philj*switch2;
     }
   } else forcelj = 0.0;
   fforce = (forcecoul + factor_lj*forcelj) * r2inv;
 
   double eng = 0.0;
   if (rsq < cut_coulsq) {
     if (!ncoultablebits || rsq <= tabinnersq)
       phicoul = prefactor*erfc;
     else {
       table = etable[itable] + fraction*detable[itable];
       phicoul = atom->q[i]*atom->q[j] * table;
     }
     if (factor_coul < 1.0) phicoul -= (1.0-factor_coul)*prefactor;
     eng += phicoul;
   }
 
   if (rsq < cut_ljsq) {
     philj = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]);
     if (rsq > cut_lj_innersq) {
       switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) *
         (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj;
       philj *= switch1;
     }
     eng += factor_lj*philj;
   }
 
   return eng;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void *PairLJCharmmCoulLong::extract(const char *str, int &dim)
 {
   dim = 2;
   if (strcmp(str,"lj14_1") == 0) return (void *) lj14_1;
   if (strcmp(str,"lj14_2") == 0) return (void *) lj14_2;
   if (strcmp(str,"lj14_3") == 0) return (void *) lj14_3;
   if (strcmp(str,"lj14_4") == 0) return (void *) lj14_4;
 
   dim = 0;
   if (strcmp(str,"implicit") == 0) return (void *) &implicit;
   if (strcmp(str,"cut_coul") == 0) return (void *) &cut_coul;
 
   return NULL;
 }
diff --git a/src/KSPACE/pair_lj_cut_coul_long.cpp b/src/KSPACE/pair_lj_cut_coul_long.cpp
index 427012fa0..87a9c78b1 100644
--- a/src/KSPACE/pair_lj_cut_coul_long.cpp
+++ b/src/KSPACE/pair_lj_cut_coul_long.cpp
@@ -1,982 +1,982 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Paul Crozier (SNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_lj_cut_coul_long.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "kspace.h"
 #include "update.h"
 #include "integrate.h"
 #include "respa.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "math_const.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
 #define EWALD_F   1.12837917
 #define EWALD_P   0.3275911
 #define A1        0.254829592
 #define A2       -0.284496736
 #define A3        1.421413741
 #define A4       -1.453152027
 #define A5        1.061405429
 
 /* ---------------------------------------------------------------------- */
 
 PairLJCutCoulLong::PairLJCutCoulLong(LAMMPS *lmp) : Pair(lmp)
 {
   ewaldflag = pppmflag = 1;
   respa_enable = 1;
   writedata = 1;
   ftable = NULL;
   qdist = 0.0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 PairLJCutCoulLong::~PairLJCutCoulLong()
 {
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
 
     memory->destroy(cut_lj);
     memory->destroy(cut_ljsq);
     memory->destroy(epsilon);
     memory->destroy(sigma);
     memory->destroy(lj1);
     memory->destroy(lj2);
     memory->destroy(lj3);
     memory->destroy(lj4);
     memory->destroy(offset);
   }
   if (ftable) free_tables();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCutCoulLong::compute(int eflag, int vflag)
 {
   int i,ii,j,jj,inum,jnum,itype,jtype,itable;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
   double fraction,table;
   double r,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
   double grij,expm2,prefactor,t,erfc;
   int *ilist,*jlist,*numneigh,**firstneigh;
   double rsq;
 
   evdwl = ecoul = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r2inv = 1.0/rsq;
 
         if (rsq < cut_coulsq) {
           if (!ncoultablebits || rsq <= tabinnersq) {
             r = sqrt(rsq);
             grij = g_ewald * r;
             expm2 = exp(-grij*grij);
             t = 1.0 / (1.0 + EWALD_P*grij);
             erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
             prefactor = qqrd2e * qtmp*q[j]/r;
             forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
             if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
           } else {
             union_int_float_t rsq_lookup;
             rsq_lookup.f = rsq;
             itable = rsq_lookup.i & ncoulmask;
             itable >>= ncoulshiftbits;
             fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable];
             table = ftable[itable] + fraction*dftable[itable];
             forcecoul = qtmp*q[j] * table;
             if (factor_coul < 1.0) {
               table = ctable[itable] + fraction*dctable[itable];
               prefactor = qtmp*q[j] * table;
               forcecoul -= (1.0-factor_coul)*prefactor;
             }
           }
         } else forcecoul = 0.0;
 
         if (rsq < cut_ljsq[itype][jtype]) {
           r6inv = r2inv*r2inv*r2inv;
           forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
         } else forcelj = 0.0;
 
         fpair = (forcecoul + factor_lj*forcelj) * r2inv;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
         if (newton_pair || j < nlocal) {
           f[j][0] -= delx*fpair;
           f[j][1] -= dely*fpair;
           f[j][2] -= delz*fpair;
         }
 
         if (eflag) {
           if (rsq < cut_coulsq) {
             if (!ncoultablebits || rsq <= tabinnersq)
               ecoul = prefactor*erfc;
             else {
               table = etable[itable] + fraction*detable[itable];
               ecoul = qtmp*q[j] * table;
             }
             if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
           } else ecoul = 0.0;
 
           if (rsq < cut_ljsq[itype][jtype]) {
             evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) -
               offset[itype][jtype];
             evdwl *= factor_lj;
           } else evdwl = 0.0;
         }
 
         if (evflag) ev_tally(i,j,nlocal,newton_pair,
                              evdwl,ecoul,fpair,delx,dely,delz);
       }
     }
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCutCoulLong::compute_inner()
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,fpair;
   double rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
   double rsw;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
   inum = listinner->inum;
   ilist = listinner->ilist;
   numneigh = listinner->numneigh;
   firstneigh = listinner->firstneigh;
 
   double cut_out_on = cut_respa[0];
   double cut_out_off = cut_respa[1];
 
   double cut_out_diff = cut_out_off - cut_out_on;
   double cut_out_on_sq = cut_out_on*cut_out_on;
   double cut_out_off_sq = cut_out_off*cut_out_off;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
 
       if (rsq < cut_out_off_sq) {
         r2inv = 1.0/rsq;
         forcecoul = qqrd2e * qtmp*q[j]*sqrt(r2inv);
         if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*forcecoul;
 
         jtype = type[j];
         if (rsq < cut_ljsq[itype][jtype]) {
           r6inv = r2inv*r2inv*r2inv;
           forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
         } else forcelj = 0.0;
 
         fpair = (forcecoul + factor_lj*forcelj) * r2inv;
         if (rsq > cut_out_on_sq) {
           rsw = (sqrt(rsq) - cut_out_on)/cut_out_diff;
           fpair  *= 1.0 + rsw*rsw*(2.0*rsw-3.0);
         }
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
         if (newton_pair || j < nlocal) {
           f[j][0] -= delx*fpair;
           f[j][1] -= dely*fpair;
           f[j][2] -= delz*fpair;
         }
       }
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCutCoulLong::compute_middle()
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,fpair;
   double rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
   double rsw;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
   inum = listmiddle->inum;
   ilist = listmiddle->ilist;
   numneigh = listmiddle->numneigh;
   firstneigh = listmiddle->firstneigh;
 
   double cut_in_off = cut_respa[0];
   double cut_in_on = cut_respa[1];
   double cut_out_on = cut_respa[2];
   double cut_out_off = cut_respa[3];
 
   double cut_in_diff = cut_in_on - cut_in_off;
   double cut_out_diff = cut_out_off - cut_out_on;
   double cut_in_off_sq = cut_in_off*cut_in_off;
   double cut_in_on_sq = cut_in_on*cut_in_on;
   double cut_out_on_sq = cut_out_on*cut_out_on;
   double cut_out_off_sq = cut_out_off*cut_out_off;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
 
       if (rsq < cut_out_off_sq && rsq > cut_in_off_sq) {
         r2inv = 1.0/rsq;
         forcecoul = qqrd2e * qtmp*q[j]*sqrt(r2inv);
         if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*forcecoul;
 
         jtype = type[j];
         if (rsq < cut_ljsq[itype][jtype]) {
           r6inv = r2inv*r2inv*r2inv;
           forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
         } else forcelj = 0.0;
 
         fpair = (forcecoul + factor_lj*forcelj) * r2inv;
         if (rsq < cut_in_on_sq) {
           rsw = (sqrt(rsq) - cut_in_off)/cut_in_diff;
           fpair *= rsw*rsw*(3.0 - 2.0*rsw);
         }
         if (rsq > cut_out_on_sq) {
           rsw = (sqrt(rsq) - cut_out_on)/cut_out_diff;
           fpair *= 1.0 + rsw*rsw*(2.0*rsw - 3.0);
         }
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
         if (newton_pair || j < nlocal) {
           f[j][0] -= delx*fpair;
           f[j][1] -= dely*fpair;
           f[j][2] -= delz*fpair;
         }
       }
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCutCoulLong::compute_outer(int eflag, int vflag)
 {
   int i,j,ii,jj,inum,jnum,itype,jtype,itable;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
   double fraction,table;
   double r,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
   double grij,expm2,prefactor,t,erfc;
   double rsw;
   int *ilist,*jlist,*numneigh,**firstneigh;
   double rsq;
 
   evdwl = ecoul = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = 0;
 
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
   inum = listouter->inum;
   ilist = listouter->ilist;
   numneigh = listouter->numneigh;
   firstneigh = listouter->firstneigh;
 
   double cut_in_off = cut_respa[2];
   double cut_in_on = cut_respa[3];
 
   double cut_in_diff = cut_in_on - cut_in_off;
   double cut_in_off_sq = cut_in_off*cut_in_off;
   double cut_in_on_sq = cut_in_on*cut_in_on;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r2inv = 1.0/rsq;
 
         if (rsq < cut_coulsq) {
           if (!ncoultablebits || rsq <= tabinnersq) {
             r = sqrt(rsq);
             grij = g_ewald * r;
             expm2 = exp(-grij*grij);
             t = 1.0 / (1.0 + EWALD_P*grij);
             erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
             prefactor = qqrd2e * qtmp*q[j]/r;
             forcecoul = prefactor * (erfc + EWALD_F*grij*expm2 - 1.0);
             if (rsq > cut_in_off_sq) {
               if (rsq < cut_in_on_sq) {
                 rsw = (r - cut_in_off)/cut_in_diff;
                 forcecoul += prefactor*rsw*rsw*(3.0 - 2.0*rsw);
                 if (factor_coul < 1.0)
                   forcecoul -=
                     (1.0-factor_coul)*prefactor*rsw*rsw*(3.0 - 2.0*rsw);
               } else {
                 forcecoul += prefactor;
                 if (factor_coul < 1.0)
                   forcecoul -= (1.0-factor_coul)*prefactor;
               }
             }
           } else {
             union_int_float_t rsq_lookup;
             rsq_lookup.f = rsq;
             itable = rsq_lookup.i & ncoulmask;
             itable >>= ncoulshiftbits;
             fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable];
             table = ftable[itable] + fraction*dftable[itable];
             forcecoul = qtmp*q[j] * table;
             if (factor_coul < 1.0) {
               table = ctable[itable] + fraction*dctable[itable];
               prefactor = qtmp*q[j] * table;
               forcecoul -= (1.0-factor_coul)*prefactor;
             }
           }
         } else forcecoul = 0.0;
 
         if (rsq < cut_ljsq[itype][jtype] && rsq > cut_in_off_sq) {
           r6inv = r2inv*r2inv*r2inv;
           forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
           if (rsq < cut_in_on_sq) {
             rsw = (sqrt(rsq) - cut_in_off)/cut_in_diff;
             forcelj *= rsw*rsw*(3.0 - 2.0*rsw);
           }
         } else forcelj = 0.0;
 
         fpair = (forcecoul + forcelj) * r2inv;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
         if (newton_pair || j < nlocal) {
           f[j][0] -= delx*fpair;
           f[j][1] -= dely*fpair;
           f[j][2] -= delz*fpair;
         }
 
         if (eflag) {
           if (rsq < cut_coulsq) {
             if (!ncoultablebits || rsq <= tabinnersq) {
               ecoul = prefactor*erfc;
               if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
             } else {
               table = etable[itable] + fraction*detable[itable];
               ecoul = qtmp*q[j] * table;
               if (factor_coul < 1.0) {
                 table = ptable[itable] + fraction*dptable[itable];
                 prefactor = qtmp*q[j] * table;
                 ecoul -= (1.0-factor_coul)*prefactor;
               }
             }
           } else ecoul = 0.0;
 
           if (rsq < cut_ljsq[itype][jtype]) {
             r6inv = r2inv*r2inv*r2inv;
             evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) -
               offset[itype][jtype];
             evdwl *= factor_lj;
           } else evdwl = 0.0;
         }
 
         if (vflag) {
           if (rsq < cut_coulsq) {
             if (!ncoultablebits || rsq <= tabinnersq) {
               forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
               if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
             } else {
               table = vtable[itable] + fraction*dvtable[itable];
               forcecoul = qtmp*q[j] * table;
               if (factor_coul < 1.0) {
                 table = ptable[itable] + fraction*dptable[itable];
                 prefactor = qtmp*q[j] * table;
                 forcecoul -= (1.0-factor_coul)*prefactor;
               }
             }
           } else forcecoul = 0.0;
 
           if (rsq <= cut_in_off_sq) {
             r6inv = r2inv*r2inv*r2inv;
             forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
           } else if (rsq <= cut_in_on_sq)
             forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
 
           fpair = (forcecoul + factor_lj*forcelj) * r2inv;
         }
 
         if (evflag) ev_tally(i,j,nlocal,newton_pair,
                              evdwl,ecoul,fpair,delx,dely,delz);
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairLJCutCoulLong::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
   memory->create(cut_lj,n+1,n+1,"pair:cut_lj");
   memory->create(cut_ljsq,n+1,n+1,"pair:cut_ljsq");
   memory->create(epsilon,n+1,n+1,"pair:epsilon");
   memory->create(sigma,n+1,n+1,"pair:sigma");
   memory->create(lj1,n+1,n+1,"pair:lj1");
   memory->create(lj2,n+1,n+1,"pair:lj2");
   memory->create(lj3,n+1,n+1,"pair:lj3");
   memory->create(lj4,n+1,n+1,"pair:lj4");
   memory->create(offset,n+1,n+1,"pair:offset");
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairLJCutCoulLong::settings(int narg, char **arg)
 {
  if (narg < 1 || narg > 2) error->all(FLERR,"Illegal pair_style command");
 
   cut_lj_global = force->numeric(FLERR,arg[0]);
   if (narg == 1) cut_coul = cut_lj_global;
   else cut_coul = force->numeric(FLERR,arg[1]);
 
   // reset cutoffs that have been explicitly set
 
   if (allocated) {
     int i,j;
     for (i = 1; i <= atom->ntypes; i++)
       for (j = i+1; j <= atom->ntypes; j++)
         if (setflag[i][j]) cut_lj[i][j] = cut_lj_global;
   }
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairLJCutCoulLong::coeff(int narg, char **arg)
 {
   if (narg < 4 || narg > 5)
     error->all(FLERR,"Incorrect args for pair coefficients");
   if (!allocated) allocate();
 
   int ilo,ihi,jlo,jhi;
   force->bounds(arg[0],atom->ntypes,ilo,ihi);
   force->bounds(arg[1],atom->ntypes,jlo,jhi);
 
   double epsilon_one = force->numeric(FLERR,arg[2]);
   double sigma_one = force->numeric(FLERR,arg[3]);
 
   double cut_lj_one = cut_lj_global;
   if (narg == 5) cut_lj_one = force->numeric(FLERR,arg[4]);
 
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
     for (int j = MAX(jlo,i); j <= jhi; j++) {
       epsilon[i][j] = epsilon_one;
       sigma[i][j] = sigma_one;
       cut_lj[i][j] = cut_lj_one;
       setflag[i][j] = 1;
       count++;
     }
   }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairLJCutCoulLong::init_style()
 {
   if (!atom->q_flag)
     error->all(FLERR,"Pair style lj/cut/coul/long requires atom attribute q");
 
   // request regular or rRESPA neighbor lists
 
   int irequest;
 
   if (update->whichflag == 1 && strstr(update->integrate_style,"respa")) {
     int respa = 0;
     if (((Respa *) update->integrate)->level_inner >= 0) respa = 1;
     if (((Respa *) update->integrate)->level_middle >= 0) respa = 2;
 
-    if (respa == 0) irequest = neighbor->request(this);
+    if (respa == 0) irequest = neighbor->request(this,instance_me);
     else if (respa == 1) {
-      irequest = neighbor->request(this);
+      irequest = neighbor->request(this,instance_me);
       neighbor->requests[irequest]->id = 1;
       neighbor->requests[irequest]->half = 0;
       neighbor->requests[irequest]->respainner = 1;
-      irequest = neighbor->request(this);
+      irequest = neighbor->request(this,instance_me);
       neighbor->requests[irequest]->id = 3;
       neighbor->requests[irequest]->half = 0;
       neighbor->requests[irequest]->respaouter = 1;
     } else {
-      irequest = neighbor->request(this);
+      irequest = neighbor->request(this,instance_me);
       neighbor->requests[irequest]->id = 1;
       neighbor->requests[irequest]->half = 0;
       neighbor->requests[irequest]->respainner = 1;
-      irequest = neighbor->request(this);
+      irequest = neighbor->request(this,instance_me);
       neighbor->requests[irequest]->id = 2;
       neighbor->requests[irequest]->half = 0;
       neighbor->requests[irequest]->respamiddle = 1;
-      irequest = neighbor->request(this);
+      irequest = neighbor->request(this,instance_me);
       neighbor->requests[irequest]->id = 3;
       neighbor->requests[irequest]->half = 0;
       neighbor->requests[irequest]->respaouter = 1;
     }
 
-  } else irequest = neighbor->request(this);
+  } else irequest = neighbor->request(this,instance_me);
 
   cut_coulsq = cut_coul * cut_coul;
 
   // set rRESPA cutoffs
 
   if (strstr(update->integrate_style,"respa") &&
       ((Respa *) update->integrate)->level_inner >= 0)
     cut_respa = ((Respa *) update->integrate)->cutoff;
   else cut_respa = NULL;
 
   // insure use of KSpace long-range solver, set g_ewald
 
   if (force->kspace == NULL)
     error->all(FLERR,"Pair style requires a KSpace style");
   g_ewald = force->kspace->g_ewald;
 
   // setup force tables
 
   if (ncoultablebits) init_tables(cut_coul,cut_respa);
 }
 
 /* ----------------------------------------------------------------------
    neighbor callback to inform pair style of neighbor list to use
    regular or rRESPA
 ------------------------------------------------------------------------- */
 
 void PairLJCutCoulLong::init_list(int id, NeighList *ptr)
 {
   if (id == 0) list = ptr;
   else if (id == 1) listinner = ptr;
   else if (id == 2) listmiddle = ptr;
   else if (id == 3) listouter = ptr;
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairLJCutCoulLong::init_one(int i, int j)
 {
   if (setflag[i][j] == 0) {
     epsilon[i][j] = mix_energy(epsilon[i][i],epsilon[j][j],
                                sigma[i][i],sigma[j][j]);
     sigma[i][j] = mix_distance(sigma[i][i],sigma[j][j]);
     cut_lj[i][j] = mix_distance(cut_lj[i][i],cut_lj[j][j]);
   }
 
   // include TIP4P qdist in full cutoff, qdist = 0.0 if not TIP4P
 
   double cut = MAX(cut_lj[i][j],cut_coul+2.0*qdist);
   cut_ljsq[i][j] = cut_lj[i][j] * cut_lj[i][j];
 
   lj1[i][j] = 48.0 * epsilon[i][j] * pow(sigma[i][j],12.0);
   lj2[i][j] = 24.0 * epsilon[i][j] * pow(sigma[i][j],6.0);
   lj3[i][j] = 4.0 * epsilon[i][j] * pow(sigma[i][j],12.0);
   lj4[i][j] = 4.0 * epsilon[i][j] * pow(sigma[i][j],6.0);
 
   if (offset_flag) {
     double ratio = sigma[i][j] / cut_lj[i][j];
     offset[i][j] = 4.0 * epsilon[i][j] * (pow(ratio,12.0) - pow(ratio,6.0));
   } else offset[i][j] = 0.0;
 
   cut_ljsq[j][i] = cut_ljsq[i][j];
   lj1[j][i] = lj1[i][j];
   lj2[j][i] = lj2[i][j];
   lj3[j][i] = lj3[i][j];
   lj4[j][i] = lj4[i][j];
   offset[j][i] = offset[i][j];
 
   // check interior rRESPA cutoff
 
   if (cut_respa && MIN(cut_lj[i][j],cut_coul) < cut_respa[3])
     error->all(FLERR,"Pair cutoff < Respa interior cutoff");
 
   // compute I,J contribution to long-range tail correction
   // count total # of atoms of type I and J via Allreduce
 
   if (tail_flag) {
     int *type = atom->type;
     int nlocal = atom->nlocal;
 
     double count[2],all[2];
     count[0] = count[1] = 0.0;
     for (int k = 0; k < nlocal; k++) {
       if (type[k] == i) count[0] += 1.0;
       if (type[k] == j) count[1] += 1.0;
     }
     MPI_Allreduce(count,all,2,MPI_DOUBLE,MPI_SUM,world);
 
     double sig2 = sigma[i][j]*sigma[i][j];
     double sig6 = sig2*sig2*sig2;
     double rc3 = cut_lj[i][j]*cut_lj[i][j]*cut_lj[i][j];
     double rc6 = rc3*rc3;
     double rc9 = rc3*rc6;
     etail_ij = 8.0*MY_PI*all[0]*all[1]*epsilon[i][j] *
       sig6 * (sig6 - 3.0*rc6) / (9.0*rc9);
     ptail_ij = 16.0*MY_PI*all[0]*all[1]*epsilon[i][j] *
       sig6 * (2.0*sig6 - 3.0*rc6) / (9.0*rc9);
   }
 
   return cut;
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairLJCutCoulLong::write_restart(FILE *fp)
 {
   write_restart_settings(fp);
 
   int i,j;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       fwrite(&setflag[i][j],sizeof(int),1,fp);
       if (setflag[i][j]) {
         fwrite(&epsilon[i][j],sizeof(double),1,fp);
         fwrite(&sigma[i][j],sizeof(double),1,fp);
         fwrite(&cut_lj[i][j],sizeof(double),1,fp);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairLJCutCoulLong::read_restart(FILE *fp)
 {
   read_restart_settings(fp);
 
   allocate();
 
   int i,j;
   int me = comm->me;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
       MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
       if (setflag[i][j]) {
         if (me == 0) {
           fread(&epsilon[i][j],sizeof(double),1,fp);
           fread(&sigma[i][j],sizeof(double),1,fp);
           fread(&cut_lj[i][j],sizeof(double),1,fp);
         }
         MPI_Bcast(&epsilon[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&sigma[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&cut_lj[i][j],1,MPI_DOUBLE,0,world);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairLJCutCoulLong::write_restart_settings(FILE *fp)
 {
   fwrite(&cut_lj_global,sizeof(double),1,fp);
   fwrite(&cut_coul,sizeof(double),1,fp);
   fwrite(&offset_flag,sizeof(int),1,fp);
   fwrite(&mix_flag,sizeof(int),1,fp);
   fwrite(&tail_flag,sizeof(int),1,fp);
   fwrite(&ncoultablebits,sizeof(int),1,fp);
   fwrite(&tabinner,sizeof(double),1,fp);
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairLJCutCoulLong::read_restart_settings(FILE *fp)
 {
   if (comm->me == 0) {
     fread(&cut_lj_global,sizeof(double),1,fp);
     fread(&cut_coul,sizeof(double),1,fp);
     fread(&offset_flag,sizeof(int),1,fp);
     fread(&mix_flag,sizeof(int),1,fp);
     fread(&tail_flag,sizeof(int),1,fp);
     fread(&ncoultablebits,sizeof(int),1,fp);
     fread(&tabinner,sizeof(double),1,fp);
   }
   MPI_Bcast(&cut_lj_global,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&cut_coul,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&offset_flag,1,MPI_INT,0,world);
   MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
   MPI_Bcast(&tail_flag,1,MPI_INT,0,world);
   MPI_Bcast(&ncoultablebits,1,MPI_INT,0,world);
   MPI_Bcast(&tabinner,1,MPI_DOUBLE,0,world);
 }
 
 
 /* ----------------------------------------------------------------------
    proc 0 writes to data file
 ------------------------------------------------------------------------- */
 
 void PairLJCutCoulLong::write_data(FILE *fp)
 {
   for (int i = 1; i <= atom->ntypes; i++)
     fprintf(fp,"%d %g %g\n",i,epsilon[i][i],sigma[i][i]);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes all pairs to data file
 ------------------------------------------------------------------------- */
 
 void PairLJCutCoulLong::write_data_all(FILE *fp)
 {
   for (int i = 1; i <= atom->ntypes; i++)
     for (int j = i; j <= atom->ntypes; j++)
       fprintf(fp,"%d %d %g %g %g\n",i,j,epsilon[i][j],sigma[i][j],cut_lj[i][j]);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairLJCutCoulLong::single(int i, int j, int itype, int jtype,
                                  double rsq,
                                  double factor_coul, double factor_lj,
                                  double &fforce)
 {
   double r2inv,r6inv,r,grij,expm2,t,erfc,prefactor;
   double fraction,table,forcecoul,forcelj,phicoul,philj;
   int itable;
 
   r2inv = 1.0/rsq;
   if (rsq < cut_coulsq) {
     if (!ncoultablebits || rsq <= tabinnersq) {
       r = sqrt(rsq);
       grij = g_ewald * r;
       expm2 = exp(-grij*grij);
       t = 1.0 / (1.0 + EWALD_P*grij);
       erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
       prefactor = force->qqrd2e * atom->q[i]*atom->q[j]/r;
       forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
       if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
     } else {
       union_int_float_t rsq_lookup_single;
       rsq_lookup_single.f = rsq;
       itable = rsq_lookup_single.i & ncoulmask;
       itable >>= ncoulshiftbits;
       fraction = (rsq_lookup_single.f - rtable[itable]) * drtable[itable];
       table = ftable[itable] + fraction*dftable[itable];
       forcecoul = atom->q[i]*atom->q[j] * table;
       if (factor_coul < 1.0) {
         table = ctable[itable] + fraction*dctable[itable];
         prefactor = atom->q[i]*atom->q[j] * table;
         forcecoul -= (1.0-factor_coul)*prefactor;
       }
     }
   } else forcecoul = 0.0;
 
   if (rsq < cut_ljsq[itype][jtype]) {
     r6inv = r2inv*r2inv*r2inv;
     forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
   } else forcelj = 0.0;
 
   fforce = (forcecoul + factor_lj*forcelj) * r2inv;
 
   double eng = 0.0;
   if (rsq < cut_coulsq) {
     if (!ncoultablebits || rsq <= tabinnersq)
       phicoul = prefactor*erfc;
     else {
       table = etable[itable] + fraction*detable[itable];
       phicoul = atom->q[i]*atom->q[j] * table;
     }
     if (factor_coul < 1.0) phicoul -= (1.0-factor_coul)*prefactor;
     eng += phicoul;
   }
 
   if (rsq < cut_ljsq[itype][jtype]) {
     philj = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) -
       offset[itype][jtype];
     eng += factor_lj*philj;
   }
 
   return eng;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void *PairLJCutCoulLong::extract(const char *str, int &dim)
 {
   dim = 0;
   if (strcmp(str,"cut_coul") == 0) return (void *) &cut_coul;
   dim = 2;
   if (strcmp(str,"epsilon") == 0) return (void *) epsilon;
   if (strcmp(str,"sigma") == 0) return (void *) sigma;
   return NULL;
 }
diff --git a/src/KSPACE/pair_lj_long_coul_long.cpp b/src/KSPACE/pair_lj_long_coul_long.cpp
index 32d962581..06e77482d 100644
--- a/src/KSPACE/pair_lj_long_coul_long.cpp
+++ b/src/KSPACE/pair_lj_long_coul_long.cpp
@@ -1,1041 +1,1041 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Pieter J. in 't Veld (SNL)
    Tabulation for long-range dispersion added by Wayne Mitchell (Loyola 
    University New Orleans)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "math_vector.h"
 #include "pair_lj_long_coul_long.h"
 #include "atom.h"
 #include "comm.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "force.h"
 #include "kspace.h"
 #include "update.h"
 #include "integrate.h"
 #include "respa.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
 #define EWALD_F   1.12837917
 #define EWALD_P   0.3275911
 #define A1        0.254829592
 #define A2       -0.284496736
 #define A3        1.421413741
 #define A4       -1.453152027
 #define A5        1.061405429
 
 /* ---------------------------------------------------------------------- */
 
 PairLJLongCoulLong::PairLJLongCoulLong(LAMMPS *lmp) : Pair(lmp)
 {
   dispersionflag = ewaldflag = pppmflag = 1;
   respa_enable = 1;
   writedata = 1;
   ftable = NULL;
   fdisptable = NULL;
   qdist = 0.0;
 }
  
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairLJLongCoulLong::options(char **arg, int order)
 {
   const char *option[] = {"long", "cut", "off", NULL};
   int i;
 
   if (!*arg) error->all(FLERR,"Illegal pair_style lj/long/coul/long command");
   for (i=0; option[i]&&strcmp(arg[0], option[i]); ++i);
   switch (i) {
     default: error->all(FLERR,"Illegal pair_style lj/long/coul/long command");
     case 0: ewald_order |= 1<<order; break;
     case 2: ewald_off |= 1<<order;
     case 1: break;
   }
 }
 
 void PairLJLongCoulLong::settings(int narg, char **arg)
 {
   if (narg != 3 && narg != 4) error->all(FLERR,"Illegal pair_style command");
 
   ewald_off = 0;
   ewald_order = 0;
   options(arg, 6);
   options(++arg, 1);
   if (!comm->me && ewald_order == ((1<<1) | (1<<6)))
     error->warning(FLERR,"Using largest cutoff for lj/long/coul/long");
   if (!*(++arg)) 
     error->all(FLERR,"Cutoffs missing in pair_style lj/long/coul/long");
   if (!((ewald_order^ewald_off) & (1<<1))) 
     error->all(FLERR,
                "Coulomb cut not supported in pair_style lj/long/coul/long");
   cut_lj_global = force->numeric(FLERR,*(arg++));
   if (narg == 4 && ((ewald_order & 0x42) == 0x42)) 
     error->all(FLERR,"Only one cutoff allowed when requesting all long");
   if (narg == 4) cut_coul = force->numeric(FLERR,*arg);
   else cut_coul = cut_lj_global;
 
   if (allocated) {    
     int i,j;
     for (i = 1; i <= atom->ntypes; i++)
       for (j = i+1; j <= atom->ntypes; j++)
         if (setflag[i][j]) cut_lj[i][j] = cut_lj_global;
   }
 }
 
 /* ----------------------------------------------------------------------
    free all arrays
 ------------------------------------------------------------------------- */
 
 PairLJLongCoulLong::~PairLJLongCoulLong()
 {
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
 
     memory->destroy(cut_lj_read);
     memory->destroy(cut_lj);
     memory->destroy(cut_ljsq);
     memory->destroy(epsilon_read);
     memory->destroy(epsilon);
     memory->destroy(sigma_read);
     memory->destroy(sigma);
     memory->destroy(lj1);
     memory->destroy(lj2);
     memory->destroy(lj3);
     memory->destroy(lj4);
     memory->destroy(offset);
   }
   if (ftable) free_tables();
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairLJLongCoulLong::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
   memory->create(cut_lj_read,n+1,n+1,"pair:cut_lj_read");
   memory->create(cut_lj,n+1,n+1,"pair:cut_lj");
   memory->create(cut_ljsq,n+1,n+1,"pair:cut_ljsq");
   memory->create(epsilon_read,n+1,n+1,"pair:epsilon_read");
   memory->create(epsilon,n+1,n+1,"pair:epsilon");
   memory->create(sigma_read,n+1,n+1,"pair:sigma_read");
   memory->create(sigma,n+1,n+1,"pair:sigma");
   memory->create(lj1,n+1,n+1,"pair:lj1");
   memory->create(lj2,n+1,n+1,"pair:lj2");
   memory->create(lj3,n+1,n+1,"pair:lj3");
   memory->create(lj4,n+1,n+1,"pair:lj4");
   memory->create(offset,n+1,n+1,"pair:offset");
 }
 
 /* ----------------------------------------------------------------------
    extract protected data from object
 ------------------------------------------------------------------------- */
 
 void *PairLJLongCoulLong::extract(const char *id, int &dim)
 {
   const char *ids[] = {
     "B", "sigma", "epsilon", "ewald_order", "ewald_cut", "ewald_mix",
     "cut_coul", "cut_LJ", NULL};
   void *ptrs[] = {
     lj4, sigma, epsilon, &ewald_order, &cut_coul, &mix_flag,
     &cut_coul, &cut_lj_global, NULL};
   int i;
 
   for (i=0; ids[i]&&strcmp(ids[i], id); ++i);
   if (i <= 2) dim = 2;
   else dim = 0;
   return ptrs[i];
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairLJLongCoulLong::coeff(int narg, char **arg)
 {
   if (narg < 4 || narg > 5) error->all(FLERR,"Incorrect args for pair coefficients");
   if (!allocated) allocate();
 
   int ilo,ihi,jlo,jhi;
   force->bounds(arg[0],atom->ntypes,ilo,ihi);
   force->bounds(arg[1],atom->ntypes,jlo,jhi);
 
   double epsilon_one = force->numeric(FLERR,arg[2]);
   double sigma_one = force->numeric(FLERR,arg[3]);
 
   double cut_lj_one = cut_lj_global;
   if (narg == 5) cut_lj_one = force->numeric(FLERR,arg[4]);
 
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
     for (int j = MAX(jlo,i); j <= jhi; j++) {
       epsilon_read[i][j] = epsilon_one;
       sigma_read[i][j] = sigma_one;
       cut_lj_read[i][j] = cut_lj_one;
       setflag[i][j] = 1;
       count++;
     }
   }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairLJLongCoulLong::init_style()
 {
   // require an atom style with charge defined
 
   if (!atom->q_flag && (ewald_order&(1<<1)))
     error->all(FLERR,
         "Invoking coulombic in pair style lj/coul requires atom attribute q");
 
   // request regular or rRESPA neighbor lists if neighrequest_flag != 0
 
   if (force->kspace->neighrequest_flag) {
     int irequest;
 
     if (update->whichflag == 1 && strstr(update->integrate_style,"respa")) {
       int respa = 0;
       if (((Respa *) update->integrate)->level_inner >= 0) respa = 1;
       if (((Respa *) update->integrate)->level_middle >= 0) respa = 2;
 
-      if (respa == 0) irequest = neighbor->request(this);
+      if (respa == 0) irequest = neighbor->request(this,instance_me);
       else if (respa == 1) {
-        irequest = neighbor->request(this);
+        irequest = neighbor->request(this,instance_me);
         neighbor->requests[irequest]->id = 1;
         neighbor->requests[irequest]->half = 0;
         neighbor->requests[irequest]->respainner = 1;
-        irequest = neighbor->request(this);
+        irequest = neighbor->request(this,instance_me);
         neighbor->requests[irequest]->id = 3;
         neighbor->requests[irequest]->half = 0;
         neighbor->requests[irequest]->respaouter = 1;
       } else {
-        irequest = neighbor->request(this);
+        irequest = neighbor->request(this,instance_me);
         neighbor->requests[irequest]->id = 1;
         neighbor->requests[irequest]->half = 0;
         neighbor->requests[irequest]->respainner = 1;
-        irequest = neighbor->request(this);
+        irequest = neighbor->request(this,instance_me);
         neighbor->requests[irequest]->id = 2;
         neighbor->requests[irequest]->half = 0;
         neighbor->requests[irequest]->respamiddle = 1;
-        irequest = neighbor->request(this);
+        irequest = neighbor->request(this,instance_me);
         neighbor->requests[irequest]->id = 3;
         neighbor->requests[irequest]->half = 0;
         neighbor->requests[irequest]->respaouter = 1;
       }
 
-    } else irequest = neighbor->request(this);
+    } else irequest = neighbor->request(this,instance_me);
   }
   cut_coulsq = cut_coul * cut_coul;
 
   // set rRESPA cutoffs
 
   if (strstr(update->integrate_style,"respa") &&
       ((Respa *) update->integrate)->level_inner >= 0)
     cut_respa = ((Respa *) update->integrate)->cutoff;
   else cut_respa = NULL;
 
   // ensure use of KSpace long-range solver, set g_ewald
 
   if (force->kspace == NULL)
     error->all(FLERR,"Pair style requires a KSpace style");
   if (force->kspace) g_ewald = force->kspace->g_ewald;
   if (force->kspace) g_ewald_6 = force->kspace->g_ewald_6;
   
   // setup force tables
   
   if (ncoultablebits) init_tables(cut_coul,cut_respa);
   if (ndisptablebits) init_tables_disp(cut_lj_global);
   
 }
 
 /* ----------------------------------------------------------------------
    neighbor callback to inform pair style of neighbor list to use
    regular or rRESPA
 ------------------------------------------------------------------------- */
 
 void PairLJLongCoulLong::init_list(int id, NeighList *ptr)
 {
   if (id == 0) list = ptr;
   else if (id == 1) listinner = ptr;
   else if (id == 2) listmiddle = ptr;
   else if (id == 3) listouter = ptr;
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairLJLongCoulLong::init_one(int i, int j)
 {
   if (setflag[i][j] == 0) {
     epsilon[i][j] = mix_energy(epsilon_read[i][i],epsilon_read[j][j],
                                sigma_read[i][i],sigma_read[j][j]);
     sigma[i][j] = mix_distance(sigma_read[i][i],sigma_read[j][j]);
     if (ewald_order&(1<<6))
       cut_lj[i][j] = cut_lj_global;
     else
       cut_lj[i][j] = mix_distance(cut_lj_read[i][i],cut_lj_read[j][j]);
   }
   else {
     sigma[i][j] = sigma_read[i][j];
     epsilon[i][j] = epsilon_read[i][j];
     cut_lj[i][j] = cut_lj_read[i][j];
   }
 
   double cut = MAX(cut_lj[i][j], cut_coul + 2.0*qdist);
   cutsq[i][j] = cut*cut;
   cut_ljsq[i][j] = cut_lj[i][j] * cut_lj[i][j];
 
   lj1[i][j] = 48.0 * epsilon[i][j] * pow(sigma[i][j],12.0);
   lj2[i][j] = 24.0 * epsilon[i][j] * pow(sigma[i][j],6.0);
   lj3[i][j] = 4.0 * epsilon[i][j] * pow(sigma[i][j],12.0);
   lj4[i][j] = 4.0 * epsilon[i][j] * pow(sigma[i][j],6.0);
 
   // check interior rRESPA cutoff
 
   if (cut_respa && MIN(cut_lj[i][j],cut_coul) < cut_respa[3])
     error->all(FLERR,"Pair cutoff < Respa interior cutoff");
 
   if (offset_flag) {
     double ratio = sigma[i][j] / cut_lj[i][j];
     offset[i][j] = 4.0 * epsilon[i][j] * (pow(ratio,12.0) - pow(ratio,6.0));
   } else offset[i][j] = 0.0;
 
   cutsq[j][i] = cutsq[i][j];
   cut_ljsq[j][i] = cut_ljsq[i][j];
   lj1[j][i] = lj1[i][j];
   lj2[j][i] = lj2[i][j];
   lj3[j][i] = lj3[i][j];
   lj4[j][i] = lj4[i][j];
   offset[j][i] = offset[i][j];
 
   return cut;
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairLJLongCoulLong::write_restart(FILE *fp)
 {
   write_restart_settings(fp);
 
   int i,j;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       fwrite(&setflag[i][j],sizeof(int),1,fp);
       if (setflag[i][j]) {
         fwrite(&epsilon_read[i][j],sizeof(double),1,fp);
         fwrite(&sigma_read[i][j],sizeof(double),1,fp);
         fwrite(&cut_lj_read[i][j],sizeof(double),1,fp);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairLJLongCoulLong::read_restart(FILE *fp)
 {
   read_restart_settings(fp);
 
   allocate();
 
   int i,j;
   int me = comm->me;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
       MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
       if (setflag[i][j]) {
         if (me == 0) {
           fread(&epsilon_read[i][j],sizeof(double),1,fp);
           fread(&sigma_read[i][j],sizeof(double),1,fp);
           fread(&cut_lj_read[i][j],sizeof(double),1,fp);
         }
         MPI_Bcast(&epsilon_read[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&sigma_read[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&cut_lj_read[i][j],1,MPI_DOUBLE,0,world);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairLJLongCoulLong::write_restart_settings(FILE *fp)
 {
   fwrite(&cut_lj_global,sizeof(double),1,fp);
   fwrite(&cut_coul,sizeof(double),1,fp);
   fwrite(&offset_flag,sizeof(int),1,fp);
   fwrite(&mix_flag,sizeof(int),1,fp);
   fwrite(&ncoultablebits,sizeof(int),1,fp);
   fwrite(&tabinner,sizeof(double),1,fp);
   fwrite(&ewald_order,sizeof(int),1,fp);
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairLJLongCoulLong::read_restart_settings(FILE *fp)
 {
   if (comm->me == 0) {
     fread(&cut_lj_global,sizeof(double),1,fp);
     fread(&cut_coul,sizeof(double),1,fp);
     fread(&offset_flag,sizeof(int),1,fp);
     fread(&mix_flag,sizeof(int),1,fp);
     fread(&ncoultablebits,sizeof(int),1,fp);
     fread(&tabinner,sizeof(double),1,fp);
     fread(&ewald_order,sizeof(int),1,fp);
   }
   MPI_Bcast(&cut_lj_global,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&cut_coul,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&offset_flag,1,MPI_INT,0,world);
   MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
   MPI_Bcast(&ncoultablebits,1,MPI_INT,0,world);
   MPI_Bcast(&tabinner,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&ewald_order,1,MPI_INT,0,world);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes to data file
 ------------------------------------------------------------------------- */
 
 void PairLJLongCoulLong::write_data(FILE *fp)
 {
   for (int i = 1; i <= atom->ntypes; i++)
     fprintf(fp,"%d %g %g\n",i,epsilon_read[i][i],sigma_read[i][i]);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes all pairs to data file
 ------------------------------------------------------------------------- */
 
 void PairLJLongCoulLong::write_data_all(FILE *fp)
 {
   for (int i = 1; i <= atom->ntypes; i++)
     for (int j = i; j <= atom->ntypes; j++)
       fprintf(fp,"%d %d %g %g %g\n",i,j,
               epsilon_read[i][j],sigma_read[i][j],cut_lj_read[i][j]);
 }
 
 /* ----------------------------------------------------------------------
    compute pair interactions
 ------------------------------------------------------------------------- */
 
 void PairLJLongCoulLong::compute(int eflag, int vflag)
 {
   double evdwl,ecoul,fpair;
   evdwl = ecoul = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   double **x = atom->x, *x0 = x[0];
   double **f = atom->f, *f0 = f[0], *fi = f0;
   double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
   int i, j, order1 = ewald_order&(1<<1), order6 = ewald_order&(1<<6);
   int *ineigh, *ineighn, *jneigh, *jneighn, typei, typej, ni;
   double qi = 0.0, qri = 0.0;
   double *cutsqi, *cut_ljsqi, *lj1i, *lj2i, *lj3i, *lj4i, *offseti;
   double rsq, r2inv, force_coul, force_lj;
   double g2 = g_ewald_6*g_ewald_6, g6 = g2*g2*g2, g8 = g6*g2;
   vector xi, d;
 
   ineighn = (ineigh = list->ilist)+list->inum;
 
   for (; ineigh<ineighn; ++ineigh) {                        // loop over my atoms
     i = *ineigh; fi = f0+3*i;
     if (order1) qri = (qi = q[i])*qqrd2e;                // initialize constants
     offseti = offset[typei = type[i]];
     lj1i = lj1[typei]; lj2i = lj2[typei]; lj3i = lj3[typei]; lj4i = lj4[typei];
     cutsqi = cutsq[typei]; cut_ljsqi = cut_ljsq[typei];
     memcpy(xi, x0+(i+(i<<1)), sizeof(vector));
     jneighn = (jneigh = list->firstneigh[i])+list->numneigh[i];
 
     for (; jneigh<jneighn; ++jneigh) {                        // loop over neighbors
       j = *jneigh;
       ni = sbmask(j);
       j &= NEIGHMASK;
 
       { register double *xj = x0+(j+(j<<1));
         d[0] = xi[0] - xj[0];                                // pair vector
         d[1] = xi[1] - xj[1];
         d[2] = xi[2] - xj[2]; }
 
       if ((rsq = vec_dot(d, d)) >= cutsqi[typej = type[j]]) continue;
       r2inv = 1.0/rsq;
 
       if (order1 && (rsq < cut_coulsq)) {                // coulombic
         if (!ncoultablebits || rsq <= tabinnersq) {        // series real space
           register double r = sqrt(rsq), x = g_ewald*r;
           register double s = qri*q[j], t = 1.0/(1.0+EWALD_P*x);
           if (ni == 0) {
             s *= g_ewald*exp(-x*x);
             force_coul = (t *= ((((t*A5+A4)*t+A3)*t+A2)*t+A1)*s/x)+EWALD_F*s;
             if (eflag) ecoul = t;
           }
           else {                                        // special case
             r = s*(1.0-special_coul[ni])/r; s *= g_ewald*exp(-x*x);
             force_coul = (t *= ((((t*A5+A4)*t+A3)*t+A2)*t+A1)*s/x)+EWALD_F*s-r;
             if (eflag) ecoul = t-r;
           }
         }                                                // table real space
         else {
           register union_int_float_t t;
           t.f = rsq;
           register const int k = (t.i & ncoulmask)>>ncoulshiftbits;
           register double f = (rsq-rtable[k])*drtable[k], qiqj = qi*q[j];
           if (ni == 0) {
             force_coul = qiqj*(ftable[k]+f*dftable[k]);
             if (eflag) ecoul = qiqj*(etable[k]+f*detable[k]);
           }
           else {                                        // special case
             t.f = (1.0-special_coul[ni])*(ctable[k]+f*dctable[k]);
             force_coul = qiqj*(ftable[k]+f*dftable[k]-t.f);
             if (eflag) ecoul = qiqj*(etable[k]+f*detable[k]-t.f);
           }
         }
       }
       else force_coul = ecoul = 0.0;
 
       if (rsq < cut_ljsqi[typej]) {                        // lj
         if (order6) {                                        // long-range lj
           if(!ndisptablebits || rsq <= tabinnerdispsq) {				// series real space
             register double rn = r2inv*r2inv*r2inv;
             register double x2 = g2*rsq, a2 = 1.0/x2;
             x2 = a2*exp(-x2)*lj4i[typej];
             if (ni == 0) {
               force_lj =
               (rn*=rn)*lj1i[typej]-g8*(((6.0*a2+6.0)*a2+3.0)*a2+1.0)*x2*rsq;
               if (eflag)
                 evdwl = rn*lj3i[typej]-g6*((a2+1.0)*a2+0.5)*x2;
             }
             else {                                        // special case
               register double f = special_lj[ni], t = rn*(1.0-f);
               force_lj = f*(rn *= rn)*lj1i[typej]-
               g8*(((6.0*a2+6.0)*a2+3.0)*a2+1.0)*x2*rsq+t*lj2i[typej];
               if (eflag)
                 evdwl = f*rn*lj3i[typej]-g6*((a2+1.0)*a2+0.5)*x2+t*lj4i[typej];
             }
           }
           else {						// table real space
             register union_int_float_t disp_t;
             disp_t.f = rsq;
             register const int disp_k = (disp_t.i & ndispmask)>>ndispshiftbits;
             register double f_disp = (rsq-rdisptable[disp_k])*drdisptable[disp_k];
             register double rn = r2inv*r2inv*r2inv;
             if (ni == 0) {
               force_lj = (rn*=rn)*lj1i[typej]-(fdisptable[disp_k]+f_disp*dfdisptable[disp_k])*lj4i[typej];
               if (eflag) evdwl = rn*lj3i[typej]-(edisptable[disp_k]+f_disp*dedisptable[disp_k])*lj4i[typej];
             }
             else {					// special case
               register double f = special_lj[ni], t = rn*(1.0-f);
               force_lj = f*(rn *= rn)*lj1i[typej]-(fdisptable[disp_k]+f_disp*dfdisptable[disp_k])*lj4i[typej]+t*lj2i[typej];
               if (eflag) evdwl = f*rn*lj3i[typej]-(edisptable[disp_k]+f_disp*dedisptable[disp_k])*lj4i[typej]+t*lj4i[typej];
             }
           }
         }
         else {                                                // cut lj
           register double rn = r2inv*r2inv*r2inv;
           if (ni == 0) {
             force_lj = rn*(rn*lj1i[typej]-lj2i[typej]);
             if (eflag) evdwl = rn*(rn*lj3i[typej]-lj4i[typej])-offseti[typej];
           }
           else {                                        // special case
             register double f = special_lj[ni];
             force_lj = f*rn*(rn*lj1i[typej]-lj2i[typej]);
             if (eflag)
               evdwl = f * (rn*(rn*lj3i[typej]-lj4i[typej])-offseti[typej]);
           }
         }
       }
       
       else force_lj = evdwl = 0.0;
 
       fpair = (force_coul+force_lj)*r2inv;
 
       if (newton_pair || j < nlocal) {
         register double *fj = f0+(j+(j<<1)), f;
         fi[0] += f = d[0]*fpair; fj[0] -= f;
         fi[1] += f = d[1]*fpair; fj[1] -= f;
         fi[2] += f = d[2]*fpair; fj[2] -= f;
       }
       else {
         fi[0] += d[0]*fpair;
         fi[1] += d[1]*fpair;
         fi[2] += d[2]*fpair;
       }
 
       if (evflag) ev_tally(i,j,nlocal,newton_pair,
                            evdwl,ecoul,fpair,d[0],d[1],d[2]);
     }
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJLongCoulLong::compute_inner()
 {
   double rsq, r2inv, force_coul = 0.0, force_lj, fpair;
 
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *x0 = atom->x[0], *f0 = atom->f[0], *fi = f0, *q = atom->q;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
 
   double cut_out_on = cut_respa[0];
   double cut_out_off = cut_respa[1];
 
 
   double cut_out_diff = cut_out_off - cut_out_on;
   double cut_out_on_sq = cut_out_on*cut_out_on;
   double cut_out_off_sq = cut_out_off*cut_out_off;
 
   int *ineigh, *ineighn, *jneigh, *jneighn, typei, typej, ni;
   int i, j, order1 = (ewald_order|(ewald_off^-1))&(1<<1);
   double qri, *cut_ljsqi, *lj1i, *lj2i;
   vector xi, d;
 
   ineighn = (ineigh = listinner->ilist)+listinner->inum;
   for (; ineigh<ineighn; ++ineigh) {                        // loop over my atoms
     i = *ineigh; fi = f0+3*i;
     memcpy(xi, x0+(i+(i<<1)), sizeof(vector));
     cut_ljsqi = cut_ljsq[typei = type[i]];
     lj1i = lj1[typei]; lj2i = lj2[typei];
     jneighn = (jneigh = listinner->firstneigh[i])+listinner->numneigh[i];
     for (; jneigh<jneighn; ++jneigh) {                        // loop over neighbors
       j = *jneigh;
       ni = sbmask(j);
       j &= NEIGHMASK;
 
       { register double *xj = x0+(j+(j<<1));
         d[0] = xi[0] - xj[0];                                // pair vector
         d[1] = xi[1] - xj[1];
         d[2] = xi[2] - xj[2]; }
 
       if ((rsq = vec_dot(d, d)) >= cut_out_off_sq) continue;
       r2inv = 1.0/rsq;
 
       if (order1 && (rsq < cut_coulsq)) {                       // coulombic
         qri = qqrd2e*q[i];
         force_coul = ni == 0 ?
           qri*q[j]*sqrt(r2inv) : qri*q[j]*sqrt(r2inv)*special_coul[ni];
       }
 
       if (rsq < cut_ljsqi[typej = type[j]]) {                // lennard-jones
         register double rn = r2inv*r2inv*r2inv;
         force_lj = ni == 0 ?
           rn*(rn*lj1i[typej]-lj2i[typej]) :
           rn*(rn*lj1i[typej]-lj2i[typej])*special_lj[ni];
       }
       else force_lj = 0.0;
 
       fpair = (force_coul + force_lj) * r2inv;
 
       if (rsq > cut_out_on_sq) {                        // switching
         register double rsw = (sqrt(rsq) - cut_out_on)/cut_out_diff;
         fpair  *= 1.0 + rsw*rsw*(2.0*rsw-3.0);
       }
 
       if (newton_pair || j < nlocal) {                        // force update
         register double *fj = f0+(j+(j<<1)), f;
         fi[0] += f = d[0]*fpair; fj[0] -= f;
         fi[1] += f = d[1]*fpair; fj[1] -= f;
         fi[2] += f = d[2]*fpair; fj[2] -= f;
       }
       else {
         fi[0] += d[0]*fpair;
         fi[1] += d[1]*fpair;
         fi[2] += d[2]*fpair;
       }
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJLongCoulLong::compute_middle()
 {
   double rsq, r2inv, force_coul = 0.0, force_lj, fpair;
 
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *x0 = atom->x[0], *f0 = atom->f[0], *fi = f0, *q = atom->q;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
   double cut_in_off = cut_respa[0];
   double cut_in_on = cut_respa[1];
   double cut_out_on = cut_respa[2];
   double cut_out_off = cut_respa[3];
 
   double cut_in_diff = cut_in_on - cut_in_off;
   double cut_out_diff = cut_out_off - cut_out_on;
   double cut_in_off_sq = cut_in_off*cut_in_off;
   double cut_in_on_sq = cut_in_on*cut_in_on;
   double cut_out_on_sq = cut_out_on*cut_out_on;
   double cut_out_off_sq = cut_out_off*cut_out_off;
 
   int *ineigh, *ineighn, *jneigh, *jneighn, typei, typej, ni;
   int i, j, order1 = (ewald_order|(ewald_off^-1))&(1<<1);
   double qri, *cut_ljsqi, *lj1i, *lj2i;
   vector xi, d;
 
   ineighn = (ineigh = listmiddle->ilist)+listmiddle->inum;
 
   for (; ineigh<ineighn; ++ineigh) {                        // loop over my atoms
     i = *ineigh; fi = f0+3*i;
     if (order1) qri = qqrd2e*q[i];
     memcpy(xi, x0+(i+(i<<1)), sizeof(vector));
     cut_ljsqi = cut_ljsq[typei = type[i]];
     lj1i = lj1[typei]; lj2i = lj2[typei];
     jneighn = (jneigh = listmiddle->firstneigh[i])+listmiddle->numneigh[i];
 
     for (; jneigh<jneighn; ++jneigh) {
       j = *jneigh;
       ni = sbmask(j);
       j &= NEIGHMASK;
 
       { register double *xj = x0+(j+(j<<1));
         d[0] = xi[0] - xj[0];                                // pair vector
         d[1] = xi[1] - xj[1];
         d[2] = xi[2] - xj[2]; }
 
       if ((rsq = vec_dot(d, d)) >= cut_out_off_sq) continue;
       if (rsq <= cut_in_off_sq) continue;
       r2inv = 1.0/rsq;
 
       if (order1 && (rsq < cut_coulsq))                        // coulombic
         force_coul = ni == 0 ?
           qri*q[j]*sqrt(r2inv) : qri*q[j]*sqrt(r2inv)*special_coul[ni];
 
       if (rsq < cut_ljsqi[typej = type[j]]) {                // lennard-jones
         register double rn = r2inv*r2inv*r2inv;
         force_lj = ni == 0 ?
           rn*(rn*lj1i[typej]-lj2i[typej]) :
           rn*(rn*lj1i[typej]-lj2i[typej])*special_lj[ni];
       }
       else force_lj = 0.0;
 
       fpair = (force_coul + force_lj) * r2inv;
 
       if (rsq < cut_in_on_sq) {                                // switching
         register double rsw = (sqrt(rsq) - cut_in_off)/cut_in_diff;
         fpair  *= rsw*rsw*(3.0 - 2.0*rsw);
       }
       if (rsq > cut_out_on_sq) {
         register double rsw = (sqrt(rsq) - cut_out_on)/cut_out_diff;
         fpair  *= 1.0 + rsw*rsw*(2.0*rsw-3.0);
       }
 
       if (newton_pair || j < nlocal) {                        // force update
         register double *fj = f0+(j+(j<<1)), f;
         fi[0] += f = d[0]*fpair; fj[0] -= f;
         fi[1] += f = d[1]*fpair; fj[1] -= f;
         fi[2] += f = d[2]*fpair; fj[2] -= f;
       }
       else {
         fi[0] += d[0]*fpair;
         fi[1] += d[1]*fpair;
         fi[2] += d[2]*fpair;
       }
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJLongCoulLong::compute_outer(int eflag, int vflag)
 {
   double evdwl,ecoul,fvirial,fpair;
   evdwl = ecoul = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = 0;
   
   double **x = atom->x, *x0 = x[0];
   double **f = atom->f, *f0 = f[0], *fi = f0;
   double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
   
   int i, j, order1 = ewald_order&(1<<1), order6 = ewald_order&(1<<6);
   int *ineigh, *ineighn, *jneigh, *jneighn, typei, typej, ni, respa_flag;
   double qi = 0.0, qri = 0.0;
   double *cutsqi, *cut_ljsqi, *lj1i, *lj2i, *lj3i, *lj4i, *offseti;
   double rsq, r2inv, force_coul, force_lj;
   double g2 = g_ewald_6*g_ewald_6, g6 = g2*g2*g2, g8 = g6*g2;
   double respa_lj = 0.0, respa_coul = 0.0, frespa = 0.0;
   vector xi, d;
   
   double cut_in_off = cut_respa[2];
   double cut_in_on = cut_respa[3];
   
   double cut_in_diff = cut_in_on - cut_in_off;
   double cut_in_off_sq = cut_in_off*cut_in_off;
   double cut_in_on_sq = cut_in_on*cut_in_on;
   
   ineighn = (ineigh = listouter->ilist)+listouter->inum;
   
   for (; ineigh<ineighn; ++ineigh) {                        // loop over my atoms
     i = *ineigh; fi = f0+3*i;
     if (order1) qri = (qi = q[i])*qqrd2e;                // initialize constants
     offseti = offset[typei = type[i]];
     lj1i = lj1[typei]; lj2i = lj2[typei]; lj3i = lj3[typei]; lj4i = lj4[typei];
     cutsqi = cutsq[typei]; cut_ljsqi = cut_ljsq[typei];
     memcpy(xi, x0+(i+(i<<1)), sizeof(vector));
     jneighn = (jneigh = listouter->firstneigh[i])+listouter->numneigh[i];
     
     for (; jneigh<jneighn; ++jneigh) {                        // loop over neighbors
       j = *jneigh;
       ni = sbmask(j);
       j &= NEIGHMASK;
       
       { register double *xj = x0+(j+(j<<1));
         d[0] = xi[0] - xj[0];                                // pair vector
         d[1] = xi[1] - xj[1];
         d[2] = xi[2] - xj[2]; }
       
       if ((rsq = vec_dot(d, d)) >= cutsqi[typej = type[j]]) continue;
       r2inv = 1.0/rsq;
       
       frespa = 1.0;                                       // check whether and how to compute respa corrections
       respa_coul = 0;
       respa_lj = 0;
       respa_flag = rsq < cut_in_on_sq ? 1 : 0;
       if (respa_flag && (rsq > cut_in_off_sq)) {
         register double rsw = (sqrt(rsq)-cut_in_off)/cut_in_diff;
         frespa = 1-rsw*rsw*(3.0-2.0*rsw);
       }
       
       if (order1 && (rsq < cut_coulsq)) {                // coulombic
         if (!ncoultablebits || rsq <= tabinnersq) {        // series real space
           register double r = sqrt(rsq), s = qri*q[j];
           if (respa_flag)                                // correct for respa
             respa_coul = ni == 0 ? frespa*s/r : frespa*s/r*special_coul[ni];
           register double x = g_ewald*r, t = 1.0/(1.0+EWALD_P*x);
           if (ni == 0) {
             s *= g_ewald*exp(-x*x);
             force_coul = (t *= ((((t*A5+A4)*t+A3)*t+A2)*t+A1)*s/x)+EWALD_F*s-respa_coul;
             if (eflag) ecoul = t;
           }
           else {                                        // correct for special
             r = s*(1.0-special_coul[ni])/r; s *= g_ewald*exp(-x*x);
             force_coul = (t *= ((((t*A5+A4)*t+A3)*t+A2)*t+A1)*s/x)+EWALD_F*s-r-respa_coul;
             if (eflag) ecoul = t-r;
           }
         }                                                // table real space
         else {
           if (respa_flag) {
             register double r = sqrt(rsq), s = qri*q[j];
             respa_coul = ni == 0 ? frespa*s/r : frespa*s/r*special_coul[ni];
           }
           register union_int_float_t t;
           t.f = rsq;
           register const int k = (t.i & ncoulmask) >> ncoulshiftbits;
           register double f = (rsq-rtable[k])*drtable[k], qiqj = qi*q[j];
           if (ni == 0) {
             force_coul = qiqj*(ftable[k]+f*dftable[k]);
             if (eflag) ecoul = qiqj*(etable[k]+f*detable[k]);
           }
           else {                                        // correct for special
             t.f = (1.0-special_coul[ni])*(ctable[k]+f*dctable[k]);
             force_coul = qiqj*(ftable[k]+f*dftable[k]-t.f);
             if (eflag) {
               t.f = (1.0-special_coul[ni])*(ptable[k]+f*dptable[k]);
               ecoul = qiqj*(etable[k]+f*detable[k]-t.f);
             }
           }
         }
       }
        
       else force_coul = respa_coul = ecoul = 0.0;
 
       if (rsq < cut_ljsqi[typej]) {                        // lennard-jones
         register double rn = r2inv*r2inv*r2inv;
         if (respa_flag) respa_lj = ni == 0 ?                 // correct for respa
             frespa*rn*(rn*lj1i[typej]-lj2i[typej]) :
             frespa*rn*(rn*lj1i[typej]-lj2i[typej])*special_lj[ni];
         if (order6) {                                        // long-range form
           if (!ndisptablebits || rsq <= tabinnerdispsq) {
             register double x2 = g2*rsq, a2 = 1.0/x2;
             x2 = a2*exp(-x2)*lj4i[typej];
             if (ni == 0) {
               force_lj =
                 (rn*=rn)*lj1i[typej]-g8*(((6.0*a2+6.0)*a2+3.0)*a2+1.0)*x2*rsq-respa_lj;
               if (eflag) evdwl = rn*lj3i[typej]-g6*((a2+1.0)*a2+0.5)*x2;
             }
             else {                                        // correct for special
               register double f = special_lj[ni], t = rn*(1.0-f);
               force_lj = f*(rn *= rn)*lj1i[typej]-
                 g8*(((6.0*a2+6.0)*a2+3.0)*a2+1.0)*x2*rsq+t*lj2i[typej]-respa_lj;
               if (eflag)
                 evdwl = f*rn*lj3i[typej]-g6*((a2+1.0)*a2+0.5)*x2+t*lj4i[typej];
             }
           }
           else {						// table real space
             register union_int_float_t disp_t;
             disp_t.f = rsq;
             register const int disp_k = (disp_t.i & ndispmask)>>ndispshiftbits;
             register double f_disp = (rsq-rdisptable[disp_k])*drdisptable[disp_k];
             register double rn = r2inv*r2inv*r2inv;
             if (ni == 0) {
               force_lj = (rn*=rn)*lj1i[typej]-(fdisptable[disp_k]+f_disp*dfdisptable[disp_k])*lj4i[typej]-respa_lj;
               if (eflag) evdwl = rn*lj3i[typej]-(edisptable[disp_k]+f_disp*dedisptable[disp_k])*lj4i[typej];
             }
             else {					// special case
               register double f = special_lj[ni], t = rn*(1.0-f);
               force_lj = f*(rn *= rn)*lj1i[typej]-(fdisptable[disp_k]+f_disp*dfdisptable[disp_k])*lj4i[typej]+t*lj2i[typej]-respa_lj;
               if (eflag) evdwl = f*rn*lj3i[typej]-(edisptable[disp_k]+f_disp*dedisptable[disp_k])*lj4i[typej]+t*lj4i[typej];
             }
           }
         }
         else {                                                // cut form
           if (ni == 0) {
             force_lj = rn*(rn*lj1i[typej]-lj2i[typej])-respa_lj;
             if (eflag) evdwl = rn*(rn*lj3i[typej]-lj4i[typej])-offseti[typej];
           }
           else {                                        // correct for special
             register double f = special_lj[ni];
             force_lj = f*rn*(rn*lj1i[typej]-lj2i[typej])-respa_lj;
             if (eflag)
               evdwl = f*(rn*(rn*lj3i[typej]-lj4i[typej])-offseti[typej]);
           }
         }
       }
       else force_lj = respa_lj = evdwl = 0.0;
       
       fpair = (force_coul+force_lj)*r2inv;
       
       if (newton_pair || j < nlocal) {
         register double *fj = f0+(j+(j<<1)), f;
         fi[0] += f = d[0]*fpair; fj[0] -= f;
         fi[1] += f = d[1]*fpair; fj[1] -= f;
         fi[2] += f = d[2]*fpair; fj[2] -= f;
       }
       else {
         fi[0] += d[0]*fpair;
         fi[1] += d[1]*fpair;
         fi[2] += d[2]*fpair;
       }
       
       if (evflag) {
         fvirial = (force_coul + force_lj + respa_coul + respa_lj)*r2inv;
         ev_tally(i,j,nlocal,newton_pair,
                  evdwl,ecoul,fvirial,d[0],d[1],d[2]);
       }
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairLJLongCoulLong::single(int i, int j, int itype, int jtype,
                           double rsq, double factor_coul, double factor_lj,
                           double &fforce)
 {
   double r2inv, r6inv, force_coul, force_lj;
   double g2 = g_ewald_6*g_ewald_6, g6 = g2*g2*g2, g8 = g6*g2, *q = atom->q;
 
   double eng = 0.0;
 
   r2inv = 1.0/rsq;
   if ((ewald_order&2) && (rsq < cut_coulsq)) {                // coulombic
     if (!ncoultablebits || rsq <= tabinnersq) {                // series real space
       register double r = sqrt(rsq), x = g_ewald*r;
       register double s = force->qqrd2e*q[i]*q[j], t = 1.0/(1.0+EWALD_P*x);
       r = s*(1.0-factor_coul)/r; s *= g_ewald*exp(-x*x);
       force_coul = (t *= ((((t*A5+A4)*t+A3)*t+A2)*t+A1)*s/x)+EWALD_F*s-r;
       eng += t-r;
     }
     else {                                                // table real space
       register union_int_float_t t;
       t.f = rsq;
       register const int k = (t.i & ncoulmask) >> ncoulshiftbits;
       register double f = (rsq-rtable[k])*drtable[k], qiqj = q[i]*q[j];
       t.f = (1.0-factor_coul)*(ctable[k]+f*dctable[k]);
       force_coul = qiqj*(ftable[k]+f*dftable[k]-t.f);
       eng += qiqj*(etable[k]+f*detable[k]-t.f);
     }
   } else force_coul = 0.0;
 
   if (rsq < cut_ljsq[itype][jtype]) {                        // lennard-jones
     r6inv = r2inv*r2inv*r2inv;
     if (ewald_order&64) {                                // long-range
       register double x2 = g2*rsq, a2 = 1.0/x2, t = r6inv*(1.0-factor_lj);
       x2 = a2*exp(-x2)*lj4[itype][jtype];
       force_lj = factor_lj*(r6inv *= r6inv)*lj1[itype][jtype]-
                g8*(((6.0*a2+6.0)*a2+3.0)*a2+a2)*x2*rsq+t*lj2[itype][jtype];
       eng += factor_lj*r6inv*lj3[itype][jtype]-
         g6*((a2+1.0)*a2+0.5)*x2+t*lj4[itype][jtype];
     }
     else {                                                // cut
       force_lj = factor_lj*r6inv*(lj1[itype][jtype]*r6inv-lj2[itype][jtype]);
       eng += factor_lj*(r6inv*(r6inv*lj3[itype][jtype]-
                                lj4[itype][jtype])-offset[itype][jtype]);
     }
   } else force_lj = 0.0;
 
   fforce = (force_coul+force_lj)*r2inv;
   return eng;
 }
diff --git a/src/MANYBODY/pair_adp.cpp b/src/MANYBODY/pair_adp.cpp
index d19ffd1ff..369bcc8c9 100644
--- a/src/MANYBODY/pair_adp.cpp
+++ b/src/MANYBODY/pair_adp.cpp
@@ -1,1034 +1,1034 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing authors: Christopher Weinberger (SNL), Stephen Foiles (SNL),
                          Chandra Veer Singh (Cornell)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_adp.h"
 #include "atom.h"
 #include "force.h"
 #include "comm.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
 #define MAXLINE 1024
 
 /* ---------------------------------------------------------------------- */
 
 PairADP::PairADP(LAMMPS *lmp) : Pair(lmp)
 {
   restartinfo = 0;
 
   nmax = 0;
   rho = NULL;
   fp = NULL;
   mu = NULL;
   lambda = NULL;
 
   setfl = NULL;
 
   frho = NULL;
   rhor = NULL;
   z2r = NULL;
   u2r = NULL;
   w2r = NULL;
 
   frho_spline = NULL;
   rhor_spline = NULL;
   z2r_spline = NULL;
   u2r_spline = NULL;
   w2r_spline = NULL;
 
   // set comm size needed by this Pair
 
   comm_forward = 10;
   comm_reverse = 10;
 
   single_enable = 0;
   one_coeff = 1;
   manybody_flag = 1;
 }
 
 /* ----------------------------------------------------------------------
    check if allocated, since class can be destructed when incomplete
 ------------------------------------------------------------------------- */
 
 PairADP::~PairADP()
 {
   memory->destroy(rho);
   memory->destroy(fp);
   memory->destroy(mu);
   memory->destroy(lambda);
 
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
     delete [] map;
     delete [] type2frho;
     memory->destroy(type2rhor);
     memory->destroy(type2z2r);
     memory->destroy(type2u2r);
     memory->destroy(type2w2r);
   }
 
   if (setfl) {
     for (int i = 0; i < setfl->nelements; i++) delete [] setfl->elements[i];
     delete [] setfl->elements;
     delete [] setfl->mass;
     memory->destroy(setfl->frho);
     memory->destroy(setfl->rhor);
     memory->destroy(setfl->z2r);
     memory->destroy(setfl->u2r);
     memory->destroy(setfl->w2r);
     delete setfl;
   }
 
   memory->destroy(frho);
   memory->destroy(rhor);
   memory->destroy(z2r);
   memory->destroy(u2r);
   memory->destroy(w2r);
 
   memory->destroy(frho_spline);
   memory->destroy(rhor_spline);
   memory->destroy(z2r_spline);
   memory->destroy(u2r_spline);
   memory->destroy(w2r_spline);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairADP::compute(int eflag, int vflag)
 {
   int i,j,ii,jj,m,inum,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
   double rsq,r,p,rhoip,rhojp,z2,z2p,recip,phip,psip,phi;
   double u2,u2p,w2,w2p,nu;
   double *coeff;
   int *ilist,*jlist,*numneigh,**firstneigh;
   double delmux,delmuy,delmuz,trdelmu,tradellam;
   double adpx,adpy,adpz,fx,fy,fz;
   double sumlamxx,sumlamyy,sumlamzz,sumlamyz,sumlamxz,sumlamxy;
 
   evdwl = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   // grow local arrays if necessary
   // need to be atom->nmax in length
 
   if (atom->nmax > nmax) {
     memory->destroy(rho);
     memory->destroy(fp);
     memory->destroy(mu);
     memory->destroy(lambda);
     nmax = atom->nmax;
     memory->create(rho,nmax,"pair:rho");
     memory->create(fp,nmax,"pair:fp");
     memory->create(mu,nmax,3,"pair:mu");
     memory->create(lambda,nmax,6,"pair:lambda");
   }
 
   double **x = atom->x;
   double **f = atom->f;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   int newton_pair = force->newton_pair;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // zero out density
 
   if (newton_pair) {
     m = nlocal + atom->nghost;
     for (i = 0; i < m; i++) {
       rho[i] = 0.0;
       mu[i][0] = 0.0; mu[i][1] = 0.0; mu[i][2] = 0.0;
       lambda[i][0] = 0.0; lambda[i][1] = 0.0; lambda[i][2] = 0.0;
       lambda[i][3] = 0.0; lambda[i][4] = 0.0; lambda[i][5] = 0.0;
     }
   } else {
     for (i = 0; i < nlocal; i++) {
       rho[i] = 0.0;
       mu[i][0] = 0.0; mu[i][1] = 0.0; mu[i][2] = 0.0;
       lambda[i][0] = 0.0; lambda[i][1] = 0.0; lambda[i][2] = 0.0;
       lambda[i][3] = 0.0; lambda[i][4] = 0.0; lambda[i][5] = 0.0;
     }
   }
 
   // rho = density at each atom
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
 
       if (rsq < cutforcesq) {
         jtype = type[j];
         p = sqrt(rsq)*rdr + 1.0;
         m = static_cast<int> (p);
         m = MIN(m,nr-1);
         p -= m;
         p = MIN(p,1.0);
         coeff = rhor_spline[type2rhor[jtype][itype]][m];
         rho[i] += ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6];
         coeff = u2r_spline[type2u2r[jtype][itype]][m];
         u2 = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6];
         mu[i][0] += u2*delx;
         mu[i][1] += u2*dely;
         mu[i][2] += u2*delz;
         coeff = w2r_spline[type2w2r[jtype][itype]][m];
         w2 = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6];
         lambda[i][0] += w2*delx*delx;
         lambda[i][1] += w2*dely*dely;
         lambda[i][2] += w2*delz*delz;
         lambda[i][3] += w2*dely*delz;
         lambda[i][4] += w2*delx*delz;
         lambda[i][5] += w2*delx*dely;
 
         if (newton_pair || j < nlocal) {
           // verify sign difference for mu and lambda
           coeff = rhor_spline[type2rhor[itype][jtype]][m];
           rho[j] += ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6];
           coeff = u2r_spline[type2u2r[itype][jtype]][m];
           u2 = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6];
           mu[j][0] -= u2*delx;
           mu[j][1] -= u2*dely;
           mu[j][2] -= u2*delz;
           coeff = w2r_spline[type2w2r[itype][jtype]][m];
           w2 = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6];
           lambda[j][0] += w2*delx*delx;
           lambda[j][1] += w2*dely*dely;
           lambda[j][2] += w2*delz*delz;
           lambda[j][3] += w2*dely*delz;
           lambda[j][4] += w2*delx*delz;
           lambda[j][5] += w2*delx*dely;
         }
       }
     }
   }
 
   // communicate and sum densities
 
   if (newton_pair) comm->reverse_comm_pair(this);
 
   // fp = derivative of embedding energy at each atom
   // phi = embedding energy at each atom
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     p = rho[i]*rdrho + 1.0;
     m = static_cast<int> (p);
     m = MAX(1,MIN(m,nrho-1));
     p -= m;
     p = MIN(p,1.0);
     coeff = frho_spline[type2frho[type[i]]][m];
     fp[i] = (coeff[0]*p + coeff[1])*p + coeff[2];
     if (eflag) {
       phi = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6];
       phi += 0.5*(mu[i][0]*mu[i][0]+mu[i][1]*mu[i][1]+mu[i][2]*mu[i][2]);
       phi += 0.5*(lambda[i][0]*lambda[i][0]+lambda[i][1]*
                   lambda[i][1]+lambda[i][2]*lambda[i][2]);
       phi += 1.0*(lambda[i][3]*lambda[i][3]+lambda[i][4]*
                   lambda[i][4]+lambda[i][5]*lambda[i][5]);
       phi -= 1.0/6.0*(lambda[i][0]+lambda[i][1]+lambda[i][2])*
         (lambda[i][0]+lambda[i][1]+lambda[i][2]);
       if (eflag_global) eng_vdwl += phi;
       if (eflag_atom) eatom[i] += phi;
     }
   }
 
   // communicate derivative of embedding function
 
   comm->forward_comm_pair(this);
 
   // compute forces on each atom
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
 
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
 
       if (rsq < cutforcesq) {
         jtype = type[j];
         r = sqrt(rsq);
         p = r*rdr + 1.0;
         m = static_cast<int> (p);
         m = MIN(m,nr-1);
         p -= m;
         p = MIN(p,1.0);
 
         // rhoip = derivative of (density at atom j due to atom i)
         // rhojp = derivative of (density at atom i due to atom j)
         // phi = pair potential energy
         // phip = phi'
         // z2 = phi * r
         // z2p = (phi * r)' = (phi' r) + phi
         // u2 = u
         // u2p = u'
         // w2 = w
         // w2p = w'
         // psip needs both fp[i] and fp[j] terms since r_ij appears in two
         //   terms of embed eng: Fi(sum rho_ij) and Fj(sum rho_ji)
         //   hence embed' = Fi(sum rho_ij) rhojp + Fj(sum rho_ji) rhoip
 
         coeff = rhor_spline[type2rhor[itype][jtype]][m];
         rhoip = (coeff[0]*p + coeff[1])*p + coeff[2];
         coeff = rhor_spline[type2rhor[jtype][itype]][m];
         rhojp = (coeff[0]*p + coeff[1])*p + coeff[2];
         coeff = z2r_spline[type2z2r[itype][jtype]][m];
         z2p = (coeff[0]*p + coeff[1])*p + coeff[2];
         z2 = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6];
         coeff = u2r_spline[type2u2r[itype][jtype]][m];
         u2p = (coeff[0]*p + coeff[1])*p + coeff[2];
         u2 = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6];
         coeff = w2r_spline[type2w2r[itype][jtype]][m];
         w2p = (coeff[0]*p + coeff[1])*p + coeff[2];
         w2 = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6];
 
         recip = 1.0/r;
         phi = z2*recip;
         phip = z2p*recip - phi*recip;
         psip = fp[i]*rhojp + fp[j]*rhoip + phip;
         fpair = -psip*recip;
 
         delmux = mu[i][0]-mu[j][0];
         delmuy = mu[i][1]-mu[j][1];
         delmuz = mu[i][2]-mu[j][2];
         trdelmu = delmux*delx+delmuy*dely+delmuz*delz;
         sumlamxx = lambda[i][0]+lambda[j][0];
         sumlamyy = lambda[i][1]+lambda[j][1];
         sumlamzz = lambda[i][2]+lambda[j][2];
         sumlamyz = lambda[i][3]+lambda[j][3];
         sumlamxz = lambda[i][4]+lambda[j][4];
         sumlamxy = lambda[i][5]+lambda[j][5];
         tradellam = sumlamxx*delx*delx+sumlamyy*dely*dely+
           sumlamzz*delz*delz+2.0*sumlamxy*delx*dely+
           2.0*sumlamxz*delx*delz+2.0*sumlamyz*dely*delz;
         nu = sumlamxx+sumlamyy+sumlamzz;
 
         adpx = delmux*u2 + trdelmu*u2p*delx*recip +
           2.0*w2*(sumlamxx*delx+sumlamxy*dely+sumlamxz*delz) +
           w2p*delx*recip*tradellam - 1.0/3.0*nu*(w2p*r+2.0*w2)*delx;
         adpy = delmuy*u2 + trdelmu*u2p*dely*recip +
           2.0*w2*(sumlamxy*delx+sumlamyy*dely+sumlamyz*delz) +
           w2p*dely*recip*tradellam - 1.0/3.0*nu*(w2p*r+2.0*w2)*dely;
         adpz = delmuz*u2 + trdelmu*u2p*delz*recip +
           2.0*w2*(sumlamxz*delx+sumlamyz*dely+sumlamzz*delz) +
           w2p*delz*recip*tradellam - 1.0/3.0*nu*(w2p*r+2.0*w2)*delz;
         adpx*=-1.0; adpy*=-1.0; adpz*=-1.0;
 
         fx = delx*fpair+adpx;
         fy = dely*fpair+adpy;
         fz = delz*fpair+adpz;
 
         f[i][0] += fx;
         f[i][1] += fy;
         f[i][2] += fz;
         if (newton_pair || j < nlocal) {
           f[j][0] -= fx;
           f[j][1] -= fy;
           f[j][2] -= fz;
         }
 
         if (eflag) evdwl = phi;
         if (evflag) ev_tally_xyz(i,j,nlocal,newton_pair,evdwl,0.0,
                                  fx,fy,fz,delx,dely,delz);
       }
     }
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairADP::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
   map = new int[n+1];
   for (int i = 1; i <= n; i++) map[i] = -1;
 
   type2frho = new int[n+1];
   memory->create(type2rhor,n+1,n+1,"pair:type2rhor");
   memory->create(type2z2r,n+1,n+1,"pair:type2z2r");
   memory->create(type2u2r,n+1,n+1,"pair:type2u2r");
   memory->create(type2w2r,n+1,n+1,"pair:type2w2r");
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairADP::settings(int narg, char **arg)
 {
   if (narg > 0) error->all(FLERR,"Illegal pair_style command");
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
    read concatenated *.plt file
 ------------------------------------------------------------------------- */
 
 void PairADP::coeff(int narg, char **arg)
 {
   int i,j;
 
   if (!allocated) allocate();
 
   if (narg != 3 + atom->ntypes)
     error->all(FLERR,"Incorrect args for pair coefficients");
 
   // insure I,J args are * *
 
   if (strcmp(arg[0],"*") != 0 || strcmp(arg[1],"*") != 0)
     error->all(FLERR,"Incorrect args for pair coefficients");
 
   // read ADP parameter file
 
   if (setfl) {
     for (i = 0; i < setfl->nelements; i++) delete [] setfl->elements[i];
     delete [] setfl->elements;
     delete [] setfl->mass;
     memory->destroy(setfl->frho);
     memory->destroy(setfl->rhor);
     memory->destroy(setfl->z2r);
     memory->destroy(setfl->u2r);
     memory->destroy(setfl->w2r);
     delete setfl;
   }
   setfl = new Setfl();
   read_file(arg[2]);
 
   // read args that map atom types to elements in potential file
   // map[i] = which element the Ith atom type is, -1 if NULL
 
   for (i = 3; i < narg; i++) {
     if (strcmp(arg[i],"NULL") == 0) {
       map[i-2] = -1;
       continue;
     }
     for (j = 0; j < setfl->nelements; j++)
       if (strcmp(arg[i],setfl->elements[j]) == 0) break;
     if (j < setfl->nelements) map[i-2] = j;
     else error->all(FLERR,"No matching element in ADP potential file");
   }
 
   // clear setflag since coeff() called once with I,J = * *
 
   int n = atom->ntypes;
   for (i = 1; i <= n; i++)
     for (j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   // set setflag i,j for type pairs where both are mapped to elements
   // set mass of atom type if i = j
 
   int count = 0;
   for (i = 1; i <= n; i++) {
     for (j = i; j <= n; j++) {
       if (map[i] >= 0 && map[j] >= 0) {
         setflag[i][j] = 1;
         if (i == j) atom->set_mass(i,setfl->mass[map[i]]);
         count++;
       }
     }
   }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairADP::init_style()
 {
   // convert read-in file(s) to arrays and spline them
 
   file2array();
   array2spline();
 
-  neighbor->request(this);
+  neighbor->request(this,instance_me);
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairADP::init_one(int i, int j)
 {
   // single global cutoff = max of cut from all files read in
   // for funcfl could be multiple files
   // for setfl or fs, just one file
 
   if (setfl) cutmax = setfl->cut;
   cutforcesq = cutmax*cutmax;
 
   return cutmax;
 }
 
 /* ----------------------------------------------------------------------
    read potential values from a DYNAMO single element funcfl file
 ------------------------------------------------------------------------- */
 
 void PairADP::read_file(char *filename)
 {
   Setfl *file = setfl;
 
   // open potential file
 
   int me = comm->me;
   FILE *fp;
   char line[MAXLINE];
 
   if (me == 0) {
     fp = force->open_potential(filename);
     if (fp == NULL) {
       char str[128];
       sprintf(str,"Cannot open ADP potential file %s",filename);
       error->one(FLERR,str);
     }
   }
 
   // read and broadcast header
   // extract element names from nelements line
 
   int n;
   if (me == 0) {
     fgets(line,MAXLINE,fp);
     fgets(line,MAXLINE,fp);
     fgets(line,MAXLINE,fp);
     fgets(line,MAXLINE,fp);
     n = strlen(line) + 1;
   }
   MPI_Bcast(&n,1,MPI_INT,0,world);
   MPI_Bcast(line,n,MPI_CHAR,0,world);
 
   sscanf(line,"%d",&file->nelements);
   int nwords = atom->count_words(line);
   if (nwords != file->nelements + 1)
     error->all(FLERR,"Incorrect element names in ADP potential file");
 
   char **words = new char*[file->nelements+1];
   nwords = 0;
   strtok(line," \t\n\r\f");
   while ((words[nwords++] = strtok(NULL," \t\n\r\f"))) continue;
 
   file->elements = new char*[file->nelements];
   for (int i = 0; i < file->nelements; i++) {
     n = strlen(words[i]) + 1;
     file->elements[i] = new char[n];
     strcpy(file->elements[i],words[i]);
   }
   delete [] words;
 
   if (me == 0) {
     fgets(line,MAXLINE,fp);
     sscanf(line,"%d %lg %d %lg %lg",
            &file->nrho,&file->drho,&file->nr,&file->dr,&file->cut);
   }
 
   MPI_Bcast(&file->nrho,1,MPI_INT,0,world);
   MPI_Bcast(&file->drho,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&file->nr,1,MPI_INT,0,world);
   MPI_Bcast(&file->dr,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&file->cut,1,MPI_DOUBLE,0,world);
 
   file->mass = new double[file->nelements];
   memory->create(file->frho,file->nelements,file->nrho+1,"pair:frho");
   memory->create(file->rhor,file->nelements,file->nr+1,"pair:rhor");
   memory->create(file->z2r,file->nelements,file->nelements,file->nr+1,
                  "pair:z2r");
   memory->create(file->u2r,file->nelements,file->nelements,file->nr+1,
                  "pair:u2r");
   memory->create(file->w2r,file->nelements,file->nelements,file->nr+1,
                  "pair:w2r");
 
   int i,j,tmp;
   for (i = 0; i < file->nelements; i++) {
     if (me == 0) {
       fgets(line,MAXLINE,fp);
       sscanf(line,"%d %lg",&tmp,&file->mass[i]);
     }
     MPI_Bcast(&file->mass[i],1,MPI_DOUBLE,0,world);
 
     if (me == 0) grab(fp,file->nrho,&file->frho[i][1]);
     MPI_Bcast(&file->frho[i][1],file->nrho,MPI_DOUBLE,0,world);
     if (me == 0) grab(fp,file->nr,&file->rhor[i][1]);
     MPI_Bcast(&file->rhor[i][1],file->nr,MPI_DOUBLE,0,world);
   }
 
   for (i = 0; i < file->nelements; i++)
     for (j = 0; j <= i; j++) {
       if (me == 0) grab(fp,file->nr,&file->z2r[i][j][1]);
       MPI_Bcast(&file->z2r[i][j][1],file->nr,MPI_DOUBLE,0,world);
     }
 
   for (i = 0; i < file->nelements; i++)
     for (j = 0; j <= i; j++) {
       if (me == 0) grab(fp,file->nr,&file->u2r[i][j][1]);
       MPI_Bcast(&file->u2r[i][j][1],file->nr,MPI_DOUBLE,0,world);
     }
 
   for (i = 0; i < file->nelements; i++)
     for (j = 0; j <= i; j++) {
       if (me == 0) grab(fp,file->nr,&file->w2r[i][j][1]);
       MPI_Bcast(&file->w2r[i][j][1],file->nr,MPI_DOUBLE,0,world);
     }
 
   // close the potential file
 
   if (me == 0) fclose(fp);
 }
 
 /* ----------------------------------------------------------------------
    convert read-in funcfl potential(s) to standard array format
    interpolate all file values to a single grid and cutoff
 ------------------------------------------------------------------------- */
 
 void PairADP::file2array()
 {
   int i,j,m,n;
   int ntypes = atom->ntypes;
 
   // set function params directly from setfl file
 
   nrho = setfl->nrho;
   nr = setfl->nr;
   drho = setfl->drho;
   dr = setfl->dr;
 
   // ------------------------------------------------------------------
   // setup frho arrays
   // ------------------------------------------------------------------
 
   // allocate frho arrays
   // nfrho = # of setfl elements + 1 for zero array
 
   nfrho = setfl->nelements + 1;
   memory->destroy(frho);
   memory->create(frho,nfrho,nrho+1,"pair:frho");
 
   // copy each element's frho to global frho
 
   for (i = 0; i < setfl->nelements; i++)
     for (m = 1; m <= nrho; m++) frho[i][m] = setfl->frho[i][m];
 
   // add extra frho of zeroes for non-ADP types to point to (pair hybrid)
   // this is necessary b/c fp is still computed for non-ADP atoms
 
   for (m = 1; m <= nrho; m++) frho[nfrho-1][m] = 0.0;
 
   // type2frho[i] = which frho array (0 to nfrho-1) each atom type maps to
   // if atom type doesn't point to element (non-ADP atom in pair hybrid)
   // then map it to last frho array of zeroes
 
   for (i = 1; i <= ntypes; i++)
     if (map[i] >= 0) type2frho[i] = map[i];
     else type2frho[i] = nfrho-1;
 
   // ------------------------------------------------------------------
   // setup rhor arrays
   // ------------------------------------------------------------------
 
   // allocate rhor arrays
   // nrhor = # of setfl elements
 
   nrhor = setfl->nelements;
   memory->destroy(rhor);
   memory->create(rhor,nrhor,nr+1,"pair:rhor");
 
   // copy each element's rhor to global rhor
 
   for (i = 0; i < setfl->nelements; i++)
     for (m = 1; m <= nr; m++) rhor[i][m] = setfl->rhor[i][m];
 
   // type2rhor[i][j] = which rhor array (0 to nrhor-1) each type pair maps to
   // for setfl files, I,J mapping only depends on I
   // OK if map = -1 (non-APD atom in pair hybrid) b/c type2rhor not used
 
   for (i = 1; i <= ntypes; i++)
     for (j = 1; j <= ntypes; j++)
       type2rhor[i][j] = map[i];
 
   // ------------------------------------------------------------------
   // setup z2r arrays
   // ------------------------------------------------------------------
 
   // allocate z2r arrays
   // nz2r = N*(N+1)/2 where N = # of setfl elements
 
   nz2r = setfl->nelements * (setfl->nelements+1) / 2;
   memory->destroy(z2r);
   memory->create(z2r,nz2r,nr+1,"pair:z2r");
 
   // copy each element pair z2r to global z2r, only for I >= J
 
   n = 0;
   for (i = 0; i < setfl->nelements; i++)
     for (j = 0; j <= i; j++) {
       for (m = 1; m <= nr; m++) z2r[n][m] = setfl->z2r[i][j][m];
       n++;
     }
 
   // type2z2r[i][j] = which z2r array (0 to nz2r-1) each type pair maps to
   // set of z2r arrays only fill lower triangular Nelement matrix
   // value = n = sum over rows of lower-triangular matrix until reach irow,icol
   // swap indices when irow < icol to stay lower triangular
   // OK if map = -1 (non-ADP atom in pair hybrid) b/c type2z2r not used
 
   int irow,icol;
   for (i = 1; i <= ntypes; i++) {
     for (j = 1; j <= ntypes; j++) {
       irow = map[i];
       icol = map[j];
       if (irow == -1 || icol == -1) continue;
       if (irow < icol) {
         irow = map[j];
         icol = map[i];
       }
       n = 0;
       for (m = 0; m < irow; m++) n += m + 1;
       n += icol;
       type2z2r[i][j] = n;
     }
   }
 
   // ------------------------------------------------------------------
   // setup u2r arrays
   // ------------------------------------------------------------------
 
   // allocate u2r arrays
   // nu2r = N*(N+1)/2 where N = # of setfl elements
 
   nu2r = setfl->nelements * (setfl->nelements+1) / 2;
   memory->destroy(u2r);
   memory->create(u2r,nu2r,nr+1,"pair:u2r");
 
   // copy each element pair z2r to global z2r, only for I >= J
 
   n = 0;
   for (i = 0; i < setfl->nelements; i++)
     for (j = 0; j <= i; j++) {
       for (m = 1; m <= nr; m++) u2r[n][m] = setfl->u2r[i][j][m];
       n++;
     }
 
   // type2z2r[i][j] = which z2r array (0 to nz2r-1) each type pair maps to
   // set of z2r arrays only fill lower triangular Nelement matrix
   // value = n = sum over rows of lower-triangular matrix until reach irow,icol
   // swap indices when irow < icol to stay lower triangular
   // OK if map = -1 (non-ADP atom in pair hybrid) b/c type2z2r not used
 
   for (i = 1; i <= ntypes; i++) {
     for (j = 1; j <= ntypes; j++) {
       irow = map[i];
       icol = map[j];
       if (irow == -1 || icol == -1) continue;
       if (irow < icol) {
         irow = map[j];
         icol = map[i];
       }
       n = 0;
       for (m = 0; m < irow; m++) n += m + 1;
       n += icol;
       type2u2r[i][j] = n;
     }
   }
 
   // ------------------------------------------------------------------
   // setup w2r arrays
   // ------------------------------------------------------------------
 
   // allocate w2r arrays
   // nw2r = N*(N+1)/2 where N = # of setfl elements
 
   nw2r = setfl->nelements * (setfl->nelements+1) / 2;
   memory->destroy(w2r);
   memory->create(w2r,nw2r,nr+1,"pair:w2r");
 
   // copy each element pair z2r to global z2r, only for I >= J
 
   n = 0;
   for (i = 0; i < setfl->nelements; i++)
     for (j = 0; j <= i; j++) {
       for (m = 1; m <= nr; m++) w2r[n][m] = setfl->w2r[i][j][m];
       n++;
     }
 
   // type2z2r[i][j] = which z2r array (0 to nz2r-1) each type pair maps to
   // set of z2r arrays only fill lower triangular Nelement matrix
   // value = n = sum over rows of lower-triangular matrix until reach irow,icol
   // swap indices when irow < icol to stay lower triangular
   // OK if map = -1 (non-ADP atom in pair hybrid) b/c type2z2r not used
 
   for (i = 1; i <= ntypes; i++) {
     for (j = 1; j <= ntypes; j++) {
       irow = map[i];
       icol = map[j];
       if (irow == -1 || icol == -1) continue;
       if (irow < icol) {
         irow = map[j];
         icol = map[i];
       }
       n = 0;
       for (m = 0; m < irow; m++) n += m + 1;
       n += icol;
       type2w2r[i][j] = n;
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairADP::array2spline()
 {
   rdr = 1.0/dr;
   rdrho = 1.0/drho;
 
   memory->destroy(frho_spline);
   memory->destroy(rhor_spline);
   memory->destroy(z2r_spline);
   memory->destroy(u2r_spline);
   memory->destroy(w2r_spline);
 
   memory->create(frho_spline,nfrho,nrho+1,7,"pair:frho");
   memory->create(rhor_spline,nrhor,nr+1,7,"pair:rhor");
   memory->create(z2r_spline,nz2r,nr+1,7,"pair:z2r");
   memory->create(u2r_spline,nz2r,nr+1,7,"pair:u2r");
   memory->create(w2r_spline,nz2r,nr+1,7,"pair:w2r");
 
   for (int i = 0; i < nfrho; i++)
     interpolate(nrho,drho,frho[i],frho_spline[i]);
 
   for (int i = 0; i < nrhor; i++)
     interpolate(nr,dr,rhor[i],rhor_spline[i]);
 
   for (int i = 0; i < nz2r; i++)
     interpolate(nr,dr,z2r[i],z2r_spline[i]);
 
   for (int i = 0; i < nu2r; i++)
     interpolate(nr,dr,u2r[i],u2r_spline[i]);
 
   for (int i = 0; i < nw2r; i++)
     interpolate(nr,dr,w2r[i],w2r_spline[i]);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairADP::interpolate(int n, double delta, double *f, double **spline)
 {
   for (int m = 1; m <= n; m++) spline[m][6] = f[m];
 
   spline[1][5] = spline[2][6] - spline[1][6];
   spline[2][5] = 0.5 * (spline[3][6]-spline[1][6]);
   spline[n-1][5] = 0.5 * (spline[n][6]-spline[n-2][6]);
   spline[n][5] = spline[n][6] - spline[n-1][6];
 
   for (int m = 3; m <= n-2; m++)
     spline[m][5] = ((spline[m-2][6]-spline[m+2][6]) +
                     8.0*(spline[m+1][6]-spline[m-1][6])) / 12.0;
 
   for (int m = 1; m <= n-1; m++) {
     spline[m][4] = 3.0*(spline[m+1][6]-spline[m][6]) -
       2.0*spline[m][5] - spline[m+1][5];
     spline[m][3] = spline[m][5] + spline[m+1][5] -
       2.0*(spline[m+1][6]-spline[m][6]);
   }
 
   spline[n][4] = 0.0;
   spline[n][3] = 0.0;
 
   for (int m = 1; m <= n; m++) {
     spline[m][2] = spline[m][5]/delta;
     spline[m][1] = 2.0*spline[m][4]/delta;
     spline[m][0] = 3.0*spline[m][3]/delta;
   }
 }
 
 /* ----------------------------------------------------------------------
    grab n values from file fp and put them in list
    values can be several to a line
    only called by proc 0
 ------------------------------------------------------------------------- */
 
 void PairADP::grab(FILE *fp, int n, double *list)
 {
   char *ptr;
   char line[MAXLINE];
 
   int i = 0;
   while (i < n) {
     fgets(line,MAXLINE,fp);
     ptr = strtok(line," \t\n\r\f");
     list[i++] = atof(ptr);
     while ((ptr = strtok(NULL," \t\n\r\f"))) list[i++] = atof(ptr);
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 int PairADP::pack_forward_comm(int n, int *list, double *buf, 
                                int pbc_flag, int *pbc)
 {
   int i,j,m;
 
   m = 0;
   for (i = 0; i < n; i++) {
     j = list[i];
     buf[m++] = fp[j];
     buf[m++] = mu[j][0];
     buf[m++] = mu[j][1];
     buf[m++] = mu[j][2];
     buf[m++] = lambda[j][0];
     buf[m++] = lambda[j][1];
     buf[m++] = lambda[j][2];
     buf[m++] = lambda[j][3];
     buf[m++] = lambda[j][4];
     buf[m++] = lambda[j][5];
   }
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairADP::unpack_forward_comm(int n, int first, double *buf)
 {
   int i,m,last;
 
   m = 0;
   last = first + n;
   for (i = first; i < last; i++) {
     fp[i] = buf[m++];
     mu[i][0] = buf[m++];
     mu[i][1] = buf[m++];
     mu[i][2] = buf[m++];
     lambda[i][0] = buf[m++];
     lambda[i][1] = buf[m++];
     lambda[i][2] = buf[m++];
     lambda[i][3] = buf[m++];
     lambda[i][4] = buf[m++];
     lambda[i][5] = buf[m++];
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 int PairADP::pack_reverse_comm(int n, int first, double *buf)
 {
   int i,m,last;
 
   m = 0;
   last = first + n;
   for (i = first; i < last; i++) {
   buf[m++] = rho[i];
   buf[m++] = mu[i][0];
   buf[m++] = mu[i][1];
   buf[m++] = mu[i][2];
   buf[m++] = lambda[i][0];
   buf[m++] = lambda[i][1];
   buf[m++] = lambda[i][2];
   buf[m++] = lambda[i][3];
   buf[m++] = lambda[i][4];
   buf[m++] = lambda[i][5];
   }
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairADP::unpack_reverse_comm(int n, int *list, double *buf)
 {
   int i,j,m;
 
   m = 0;
   for (i = 0; i < n; i++) {
     j = list[i];
     rho[j] += buf[m++];
     mu[j][0] += buf[m++];
     mu[j][1] += buf[m++];
     mu[j][2] += buf[m++];
     lambda[j][0] += buf[m++];
     lambda[j][1] += buf[m++];
     lambda[j][2] += buf[m++];
     lambda[j][3] += buf[m++];
     lambda[j][4] += buf[m++];
     lambda[j][5] += buf[m++];
   }
 }
 
 /* ----------------------------------------------------------------------
    memory usage of local atom-based arrays
 ------------------------------------------------------------------------- */
 
 double PairADP::memory_usage()
 {
   double bytes = Pair::memory_usage();
   bytes += 21 * nmax * sizeof(double);
   return bytes;
 }
diff --git a/src/MANYBODY/pair_airebo.cpp b/src/MANYBODY/pair_airebo.cpp
index 98db58ec4..c643bd3b9 100644
--- a/src/MANYBODY/pair_airebo.cpp
+++ b/src/MANYBODY/pair_airebo.cpp
@@ -1,4200 +1,4200 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Ase Henry (MIT)
    Bugfixes and optimizations:
      Marcel Fallet & Steve Stuart (Clemson), Axel Kohlmeyer (Temple U)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "mpi.h"
 #include "pair_airebo.h"
 #include "atom.h"
 #include "neighbor.h"
 #include "force.h"
 #include "comm.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "my_page.h"
 #include "math_const.h"
 #include "math_special.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 using namespace MathConst;
 using namespace MathSpecial;
 
 #define MAXLINE 1024
 #define TOL 1.0e-9
 #define PGDELTA 1
 
 /* ---------------------------------------------------------------------- */
 
 PairAIREBO::PairAIREBO(LAMMPS *lmp) : Pair(lmp)
 {
   single_enable = 0;
   one_coeff = 1;
   ghostneigh = 1;
 
   maxlocal = 0;
   REBO_numneigh = NULL;
   REBO_firstneigh = NULL;
   ipage = NULL;
   pgsize = oneatom = 0;
 
   nC = nH = NULL;
   manybody_flag = 1;
 }
 
 /* ----------------------------------------------------------------------
    check if allocated, since class can be destructed when incomplete
 ------------------------------------------------------------------------- */
 
 PairAIREBO::~PairAIREBO()
 {
   memory->destroy(REBO_numneigh);
   memory->sfree(REBO_firstneigh);
   delete [] ipage;
   memory->destroy(nC);
   memory->destroy(nH);
 
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
     memory->destroy(cutghost);
 
     memory->destroy(cutljsq);
     memory->destroy(lj1);
     memory->destroy(lj2);
     memory->destroy(lj3);
     memory->destroy(lj4);
     delete [] map;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairAIREBO::compute(int eflag, int vflag)
 {
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = vflag_atom = 0;
 
   REBO_neigh();
   FREBO(eflag,vflag);
   if (ljflag) FLJ(eflag,vflag);
   if (torflag) TORSION(eflag,vflag);
 
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairAIREBO::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
   memory->create(cutghost,n+1,n+1,"pair:cutghost");
 
   // only sized by C,H = 2 types
 
   memory->create(cutljsq,2,2,"pair:cutljsq");
   memory->create(lj1,2,2,"pair:lj1");
   memory->create(lj2,2,2,"pair:lj2");
   memory->create(lj3,2,2,"pair:lj3");
   memory->create(lj4,2,2,"pair:lj4");
 
   map = new int[n+1];
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairAIREBO::settings(int narg, char **arg)
 {
   if (narg != 1 && narg != 3) error->all(FLERR,"Illegal pair_style command");
 
   cutlj = force->numeric(FLERR,arg[0]);
 
   ljflag = torflag = 1;
   if (narg == 3) {
     ljflag = force->inumeric(FLERR,arg[1]);
     torflag = force->inumeric(FLERR,arg[2]);
   }
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairAIREBO::coeff(int narg, char **arg)
 {
   if (!allocated) allocate();
 
   if (narg != 3 + atom->ntypes)
     error->all(FLERR,"Incorrect args for pair coefficients");
 
   // insure I,J args are * *
 
   if (strcmp(arg[0],"*") != 0 || strcmp(arg[1],"*") != 0)
     error->all(FLERR,"Incorrect args for pair coefficients");
 
   // read args that map atom types to C and H
   // map[i] = which element (0,1) the Ith atom type is, -1 if NULL
 
   for (int i = 3; i < narg; i++) {
     if (strcmp(arg[i],"NULL") == 0) {
       map[i-2] = -1;
       continue;
     } else if (strcmp(arg[i],"C") == 0) {
       map[i-2] = 0;
     } else if (strcmp(arg[i],"H") == 0) {
       map[i-2] = 1;
     } else error->all(FLERR,"Incorrect args for pair coefficients");
   }
 
   // read potential file and initialize fitting splines
 
   read_file(arg[2]);
   spline_init();
 
   // clear setflag since coeff() called once with I,J = * *
 
   int n = atom->ntypes;
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   // set setflag i,j for type pairs where both are mapped to elements
 
   int count = 0;
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       if (map[i] >= 0 && map[j] >= 0) {
         setflag[i][j] = 1;
         count++;
       }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairAIREBO::init_style()
 {
   if (atom->tag_enable == 0)
     error->all(FLERR,"Pair style AIREBO requires atom IDs");
   if (force->newton_pair == 0)
     error->all(FLERR,"Pair style AIREBO requires newton pair on");
 
   // need a full neighbor list, including neighbors of ghosts
 
-  int irequest = neighbor->request(this);
+  int irequest = neighbor->request(this,instance_me);
   neighbor->requests[irequest]->half = 0;
   neighbor->requests[irequest]->full = 1;
   neighbor->requests[irequest]->ghost = 1;
 
   // local REBO neighbor list
   // create pages if first time or if neighbor pgsize/oneatom has changed
 
   int create = 0;
   if (ipage == NULL) create = 1;
   if (pgsize != neighbor->pgsize) create = 1;
   if (oneatom != neighbor->oneatom) create = 1;
 
   if (create) {
     delete [] ipage;
     pgsize = neighbor->pgsize;
     oneatom = neighbor->oneatom;
 
     int nmypage= comm->nthreads;
     ipage = new MyPage<int>[nmypage];
     for (int i = 0; i < nmypage; i++)
       ipage[i].init(oneatom,pgsize,PGDELTA);
   }
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairAIREBO::init_one(int i, int j)
 {
   if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set");
 
   // convert to C,H types
 
   int ii = map[i];
   int jj = map[j];
 
   // use C-C values for these cutoffs since C atoms are biggest
 
   // cut3rebo = 3 REBO distances
 
   cut3rebo = 3.0 * rcmax[0][0];
 
   // cutljrebosq = furthest distance from an owned atom a ghost atom can be
   //               to need its REBO neighs computed
   // interaction = M-K-I-J-L-N with I = owned and J = ghost
   //   this insures N is in the REBO neigh list of L
   //   since I-J < rcLJmax and J-L < rmax
 
   double cutljrebo = rcLJmax[0][0] + rcmax[0][0];
   cutljrebosq = cutljrebo * cutljrebo;
 
   // cutmax = furthest distance from an owned atom
   //          at which another atom will feel force, i.e. the ghost cutoff
   // for REBO term in potential:
   //   interaction = M-K-I-J-L-N with I = owned and J = ghost
   //   I to N is max distance = 3 REBO distances
   // for LJ term in potential:
   //   short interaction = M-K-I-J-L-N with I = owned, J = ghost, I-J < rcLJmax
   //   rcLJmax + 2*rcmax, since I-J < rcLJmax and J-L,L-N = REBO distances
   //   long interaction = I-J with I = owned and J = ghost
   //   cutlj*sigma, since I-J < LJ cutoff
   // cutghost = REBO cutoff used in REBO_neigh() for neighbors of ghosts
 
   double cutmax = cut3rebo;
   if (ljflag) {
     cutmax = MAX(cutmax,rcLJmax[0][0] + 2.0*rcmax[0][0]);
     cutmax = MAX(cutmax,cutlj*sigma[0][0]);
   }
 
   cutghost[i][j] = rcmax[ii][jj];
   cutljsq[ii][jj] = cutlj*sigma[ii][jj] * cutlj*sigma[ii][jj];
   lj1[ii][jj] = 48.0 * epsilon[ii][jj] * pow(sigma[ii][jj],12.0);
   lj2[ii][jj] = 24.0 * epsilon[ii][jj] * pow(sigma[ii][jj],6.0);
   lj3[ii][jj] = 4.0 * epsilon[ii][jj] * pow(sigma[ii][jj],12.0);
   lj4[ii][jj] = 4.0 * epsilon[ii][jj] * pow(sigma[ii][jj],6.0);
 
   cutghost[j][i] = cutghost[i][j];
   cutljsq[jj][ii] = cutljsq[ii][jj];
   lj1[jj][ii] = lj1[ii][jj];
   lj2[jj][ii] = lj2[ii][jj];
   lj3[jj][ii] = lj3[ii][jj];
   lj4[jj][ii] = lj4[ii][jj];
 
   return cutmax;
 }
 
 /* ----------------------------------------------------------------------
    create REBO neighbor list from main neighbor list
    REBO neighbor list stores neighbors of ghost atoms
 ------------------------------------------------------------------------- */
 
 void PairAIREBO::REBO_neigh()
 {
   int i,j,ii,jj,n,allnum,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,rsq,dS;
   int *ilist,*jlist,*numneigh,**firstneigh;
   int *neighptr;
 
   double **x = atom->x;
   int *type = atom->type;
 
   if (atom->nmax > maxlocal) {
     maxlocal = atom->nmax;
     memory->destroy(REBO_numneigh);
     memory->sfree(REBO_firstneigh);
     memory->destroy(nC);
     memory->destroy(nH);
     memory->create(REBO_numneigh,maxlocal,"AIREBO:numneigh");
     REBO_firstneigh = (int **) memory->smalloc(maxlocal*sizeof(int *),
                                                "AIREBO:firstneigh");
     memory->create(nC,maxlocal,"AIREBO:nC");
     memory->create(nH,maxlocal,"AIREBO:nH");
   }
 
   allnum = list->inum + list->gnum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // store all REBO neighs of owned and ghost atoms
   // scan full neighbor list of I
 
   ipage->reset();
 
   for (ii = 0; ii < allnum; ii++) {
     i = ilist[ii];
 
     n = 0;
     neighptr = ipage->vget();
 
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = map[type[i]];
     nC[i] = nH[i] = 0.0;
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       j &= NEIGHMASK;
       jtype = map[type[j]];
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
 
       if (rsq < rcmaxsq[itype][jtype]) {
         neighptr[n++] = j;
         if (jtype == 0)
           nC[i] += Sp(sqrt(rsq),rcmin[itype][jtype],rcmax[itype][jtype],dS);
         else
           nH[i] += Sp(sqrt(rsq),rcmin[itype][jtype],rcmax[itype][jtype],dS);
       }
     }
 
     REBO_firstneigh[i] = neighptr;
     REBO_numneigh[i] = n;
     ipage->vgot(n);
     if (ipage->status())
       error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
   }
 }
 
 /* ----------------------------------------------------------------------
    REBO forces and energy
 ------------------------------------------------------------------------- */
 
 void PairAIREBO::FREBO(int eflag, int vflag)
 {
   int i,j,k,m,ii,inum,itype,jtype;
   tagint itag,jtag;
   double delx,dely,delz,evdwl,fpair,xtmp,ytmp,ztmp;
   double rsq,rij,wij;
   double Qij,Aij,alphaij,VR,pre,dVRdi,VA,term,bij,dVAdi,dVA;
   double dwij,del[3];
   int *ilist,*REBO_neighs;
 
   evdwl = 0.0;
 
   double **x = atom->x;
   double **f = atom->f;
   int *type = atom->type;
   tagint *tag = atom->tag;
   int nlocal = atom->nlocal;
   int newton_pair = force->newton_pair;
 
   inum = list->inum;
   ilist = list->ilist;
 
   // two-body interactions from REBO neighbor list, skip half of them
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     itag = tag[i];
     itype = map[type[i]];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     REBO_neighs = REBO_firstneigh[i];
 
     for (k = 0; k < REBO_numneigh[i]; k++) {
       j = REBO_neighs[k];
       jtag = tag[j];
 
       if (itag > jtag) {
         if ((itag+jtag) % 2 == 0) continue;
       } else if (itag < jtag) {
         if ((itag+jtag) % 2 == 1) continue;
       } else {
         if (x[j][2] < ztmp) continue;
         if (x[j][2] == ztmp && x[j][1] < ytmp) continue;
         if (x[j][2] == ztmp && x[j][1] == ytmp && x[j][0] < xtmp) continue;
       }
 
       jtype = map[type[j]];
 
       delx = x[i][0] - x[j][0];
       dely = x[i][1] - x[j][1];
       delz = x[i][2] - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       rij = sqrt(rsq);
       wij = Sp(rij,rcmin[itype][jtype],rcmax[itype][jtype],dwij);
       if (wij <= TOL) continue;
 
       Qij = Q[itype][jtype];
       Aij = A[itype][jtype];
       alphaij = alpha[itype][jtype];
 
       VR = wij*(1.0+(Qij/rij)) * Aij*exp(-alphaij*rij);
       pre = wij*Aij * exp(-alphaij*rij);
       dVRdi = pre * ((-alphaij)-(Qij/rsq)-(Qij*alphaij/rij));
       dVRdi += VR/wij * dwij;
 
       VA = dVA = 0.0;
       for (m = 0; m < 3; m++) {
         term = -wij * BIJc[itype][jtype][m] * exp(-Beta[itype][jtype][m]*rij);
         VA += term;
         dVA += -Beta[itype][jtype][m] * term;
       }
       dVA += VA/wij * dwij;
       del[0] = delx;
       del[1] = dely;
       del[2] = delz;
       bij = bondorder(i,j,del,rij,VA,f,vflag_atom);
       dVAdi = bij*dVA;
 
       fpair = -(dVRdi+dVAdi) / rij;
       f[i][0] += delx*fpair;
       f[i][1] += dely*fpair;
       f[i][2] += delz*fpair;
       f[j][0] -= delx*fpair;
       f[j][1] -= dely*fpair;
       f[j][2] -= delz*fpair;
 
       if (eflag) evdwl = VR + bij*VA;
       if (evflag) ev_tally(i,j,nlocal,newton_pair,
                            evdwl,0.0,fpair,delx,dely,delz);
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    compute LJ forces and energy
    find 3- and 4-step paths between atoms I,J via REBO neighbor lists
 ------------------------------------------------------------------------- */
 
 void PairAIREBO::FLJ(int eflag, int vflag)
 {
   int i,j,k,m,ii,jj,kk,mm,inum,jnum,itype,jtype,ktype,mtype;
   int atomi,atomj,atomk,atomm;
   int testpath,npath,done;
   tagint itag,jtag;
   double evdwl,fpair,xtmp,ytmp,ztmp;
   double rsq,best,wik,wkm,cij,rij,dwij,dwik,dwkj,dwkm,dwmj;
   double delij[3],rijsq,delik[3],rik,deljk[3];
   double rkj,wkj,dC,VLJ,dVLJ,VA,Str,dStr,Stb;
   double vdw,slw,dvdw,dslw,drij,swidth,tee,tee2;
   double rljmin,rljmax,sigcut,sigmin,sigwid;
   double delkm[3],rkm,deljm[3],rmj,wmj,r2inv,r6inv,scale,delscale[3];
   int *ilist,*jlist,*numneigh,**firstneigh;
   int *REBO_neighs_i,*REBO_neighs_k;
   double delikS[3],deljkS[3],delkmS[3],deljmS[3],delimS[3];
   double rikS,rkjS,rkmS,rmjS,wikS,dwikS;
   double wkjS,dwkjS,wkmS,dwkmS,wmjS,dwmjS;
   double fpair1,fpair2,fpair3;
   double fi[3],fj[3],fk[3],fm[3];
 
   // I-J interaction from full neighbor list
   // skip 1/2 of interactions since only consider each pair once
 
   evdwl = 0.0;
   rljmin = 0.0;
   rljmax = 0.0;
   sigcut = 0.0;
   sigmin = 0.0;
   sigwid = 0.0;
 
 
   double **x = atom->x;
   double **f = atom->f;
   tagint *tag = atom->tag;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   int newton_pair = force->newton_pair;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     itag = tag[i];
     itype = map[type[i]];
     atomi = i;
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       j &= NEIGHMASK;
       jtag = tag[j];
 
       if (itag > jtag) {
         if ((itag+jtag) % 2 == 0) continue;
       } else if (itag < jtag) {
         if ((itag+jtag) % 2 == 1) continue;
       } else {
         if (x[j][2] < ztmp) continue;
         if (x[j][2] == ztmp && x[j][1] < ytmp) continue;
         if (x[j][2] == ztmp && x[j][1] == ytmp && x[j][0] < xtmp) continue;
       }
 
       jtype = map[type[j]];
       atomj = j;
 
       delij[0] = xtmp - x[j][0];
       delij[1] = ytmp - x[j][1];
       delij[2] = ztmp - x[j][2];
       rijsq = delij[0]*delij[0] + delij[1]*delij[1] + delij[2]*delij[2];
 
       // if outside of LJ cutoff, skip
       // if outside of 4-path cutoff, best = 0.0, no need to test paths
       // if outside of 2-path cutoff but inside 4-path cutoff,
       //   best = 0.0, test 3-,4-paths
       // if inside 2-path cutoff, best = wij, only test 3-,4-paths if best < 1
 
       if (rijsq >= cutljsq[itype][jtype]) continue;
       rij = sqrt(rijsq);
       if (rij >= cut3rebo) {
         best = 0.0;
         testpath = 0;
       } else if (rij >= rcmax[itype][jtype]) {
         best = 0.0;
         testpath = 1;
       } else {
         best = Sp(rij,rcmin[itype][jtype],rcmax[itype][jtype],dwij);
         npath = 2;
         if (best < 1.0) testpath = 1;
         else testpath = 0;
       }
 
       done = 0;
       if (testpath) {
 
         // test all 3-body paths = I-K-J
         // I-K interactions come from atom I's REBO neighbors
         // if wik > current best, compute wkj
         // if best = 1.0, done
 
         REBO_neighs_i = REBO_firstneigh[i];
         for (kk = 0; kk < REBO_numneigh[i] && done==0; kk++) {
           k = REBO_neighs_i[kk];
           if (k == j) continue;
           ktype = map[type[k]];
 
           delik[0] = x[i][0] - x[k][0];
           delik[1] = x[i][1] - x[k][1];
           delik[2] = x[i][2] - x[k][2];
           rsq = delik[0]*delik[0] + delik[1]*delik[1] + delik[2]*delik[2];
           if (rsq < rcmaxsq[itype][ktype]) {
             rik = sqrt(rsq);
             wik = Sp(rik,rcmin[itype][ktype],rcmax[itype][ktype],dwik);
           } else wik = 0.0;
 
           if (wik > best) {
             deljk[0] = x[j][0] - x[k][0];
             deljk[1] = x[j][1] - x[k][1];
             deljk[2] = x[j][2] - x[k][2];
             rsq = deljk[0]*deljk[0] + deljk[1]*deljk[1] + deljk[2]*deljk[2];
             if (rsq < rcmaxsq[ktype][jtype]) {
               rkj = sqrt(rsq);
               wkj = Sp(rkj,rcmin[ktype][jtype],rcmax[ktype][jtype],dwkj);
               if (wik*wkj > best) {
                 best = wik*wkj;
                 npath = 3;
                  atomk = k;
                     delikS[0] = delik[0];
                     delikS[1] = delik[1];
                     delikS[2] = delik[2];
                     rikS = rik;
                     wikS = wik;
                     dwikS = dwik;
                     deljkS[0] = deljk[0];
                     deljkS[1] = deljk[1];
                     deljkS[2] = deljk[2];
                     rkjS = rkj;
                     wkjS = wkj;
                     dwkjS = dwkj;
                 if (best == 1.0) {
                   done = 1;
                   break;
                 }
               }
             }
 
             // test all 4-body paths = I-K-M-J
             // K-M interactions come from atom K's REBO neighbors
             // if wik*wkm > current best, compute wmj
             // if best = 1.0, done
 
             REBO_neighs_k = REBO_firstneigh[k];
             for (mm = 0; mm < REBO_numneigh[k] && done==0; mm++) {
               m = REBO_neighs_k[mm];
               if (m == i || m == j) continue;
               mtype = map[type[m]];
               delkm[0] = x[k][0] - x[m][0];
               delkm[1] = x[k][1] - x[m][1];
               delkm[2] = x[k][2] - x[m][2];
               rsq = delkm[0]*delkm[0] + delkm[1]*delkm[1] + delkm[2]*delkm[2];
               if (rsq < rcmaxsq[ktype][mtype]) {
                 rkm = sqrt(rsq);
                 wkm = Sp(rkm,rcmin[ktype][mtype],rcmax[ktype][mtype],dwkm);
               } else wkm = 0.0;
 
               if (wik*wkm > best) {
                 deljm[0] = x[j][0] - x[m][0];
                 deljm[1] = x[j][1] - x[m][1];
                 deljm[2] = x[j][2] - x[m][2];
                 rsq = deljm[0]*deljm[0] + deljm[1]*deljm[1] +
                   deljm[2]*deljm[2];
                 if (rsq < rcmaxsq[mtype][jtype]) {
                   rmj = sqrt(rsq);
                   wmj = Sp(rmj,rcmin[mtype][jtype],rcmax[mtype][jtype],dwmj);
                   if (wik*wkm*wmj > best) {
                     best = wik*wkm*wmj;
                     npath = 4;
                     atomk = k;
                     delikS[0] = delik[0];
                     delikS[1] = delik[1];
                     delikS[2] = delik[2];
                     rikS = rik;
                     wikS = wik;
                     dwikS = dwik;
                     atomm = m;
                     delkmS[0] = delkm[0];
                     delkmS[1] = delkm[1];
                         delkmS[2] = delkm[2];
                     rkmS = rkm;
                     wkmS = wkm;
                     dwkmS = dwkm;
                     deljmS[0] = deljm[0];
                     deljmS[1] = deljm[1];
                        deljmS[2] = deljm[2];
                     rmjS = rmj;
                     wmjS = wmj;
                     dwmjS = dwmj;
                     if (best == 1.0) {
                       done = 1;
                       break;
                     }
                   }
                 }
               }
             }
           }
         }
       }
 
       cij = 1.0 - best;
       if (cij == 0.0) continue;
 
       // compute LJ forces and energy
 
       sigwid = 0.84;
       sigcut = 3.0;
       sigmin = sigcut - sigwid;
 
       rljmin = sigma[itype][jtype];
       rljmax = sigcut * rljmin;
       rljmin = sigmin * rljmin;
 
       if (rij > rljmax) {
         slw = 0.0;
         dslw = 0.0;
       } else if (rij > rljmin) {
         drij = rij - rljmin;
         swidth = rljmax - rljmin;
         tee = drij / swidth;
         tee2 = tee*tee;
         slw = 1.0 - tee2 * (3.0 - 2.0 * tee);
         dslw = 6.0 * tee * (1.0 - tee) / rij / swidth;
       } else {
         slw = 1.0;
         dslw = 0.0;
       }
 
       r2inv = 1.0/rijsq;
       r6inv = r2inv*r2inv*r2inv;
 
       vdw = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]);
       dvdw = -r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]) / rij;
 
       // VLJ now becomes vdw * slw, derivaties, etc.
 
       VLJ = vdw * slw;
       dVLJ = dvdw * slw + vdw * dslw;
 
       Str = Sp2(rij,rcLJmin[itype][jtype],rcLJmax[itype][jtype],dStr);
       VA = Str*cij*VLJ;
       if (Str > 0.0) {
         scale = rcmin[itype][jtype] / rij;
         delscale[0] = scale * delij[0];
         delscale[1] = scale * delij[1];
         delscale[2] = scale * delij[2];
         Stb = bondorderLJ(i,j,delscale,rcmin[itype][jtype],VA,
                           delij,rij,f,vflag_atom);
       } else Stb = 0.0;
 
       fpair = -(dStr * (Stb*cij*VLJ - cij*VLJ) +
                 dVLJ * (Str*Stb*cij + cij - Str*cij)) / rij;
 
       f[i][0] += delij[0]*fpair;
       f[i][1] += delij[1]*fpair;
       f[i][2] += delij[2]*fpair;
       f[j][0] -= delij[0]*fpair;
       f[j][1] -= delij[1]*fpair;
       f[j][2] -= delij[2]*fpair;
 
       if (eflag) evdwl = VA*Stb + (1.0-Str)*cij*VLJ;
       if (evflag) ev_tally(i,j,nlocal,newton_pair,
                            evdwl,0.0,fpair,delij[0],delij[1],delij[2]);
 
       if (cij < 1.0) {
         dC = Str*Stb*VLJ + (1.0-Str)*VLJ;
         if (npath == 2) {
           fpair = dC*dwij / rij;
           f[atomi][0] += delij[0]*fpair;
           f[atomi][1] += delij[1]*fpair;
           f[atomi][2] += delij[2]*fpair;
           f[atomj][0] -= delij[0]*fpair;
           f[atomj][1] -= delij[1]*fpair;
           f[atomj][2] -= delij[2]*fpair;
 
           if (vflag_atom) v_tally2(atomi,atomj,fpair,delij);
 
         } else if (npath == 3) {
           fpair1 = dC*dwikS*wkjS / rikS;
           fi[0] = delikS[0]*fpair1;
           fi[1] = delikS[1]*fpair1;
           fi[2] = delikS[2]*fpair1;
           fpair2 = dC*wikS*dwkjS / rkjS;
           fj[0] = deljkS[0]*fpair2;
           fj[1] = deljkS[1]*fpair2;
           fj[2] = deljkS[2]*fpair2;
 
           f[atomi][0] += fi[0];
           f[atomi][1] += fi[1];
           f[atomi][2] += fi[2];
           f[atomj][0] += fj[0];
           f[atomj][1] += fj[1];
           f[atomj][2] += fj[2];
           f[atomk][0] -= fi[0] + fj[0];
           f[atomk][1] -= fi[1] + fj[1];
           f[atomk][2] -= fi[2] + fj[2];
 
           if (vflag_atom)
             v_tally3(atomi,atomj,atomk,fi,fj,delikS,deljkS);
 
         } else {
           fpair1 = dC*dwikS*wkmS*wmjS / rikS;
           fi[0] = delikS[0]*fpair1;
           fi[1] = delikS[1]*fpair1;
           fi[2] = delikS[2]*fpair1;
 
           fpair2 = dC*wikS*dwkmS*wmjS / rkmS;
           fk[0] = delkmS[0]*fpair2 - fi[0];
           fk[1] = delkmS[1]*fpair2 - fi[1];
           fk[2] = delkmS[2]*fpair2 - fi[2];
 
           fpair3 = dC*wikS*wkmS*dwmjS / rmjS;
           fj[0] = deljmS[0]*fpair3;
           fj[1] = deljmS[1]*fpair3;
           fj[2] = deljmS[2]*fpair3;
 
           fm[0] = -delkmS[0]*fpair2 - fj[0];
           fm[1] = -delkmS[1]*fpair2 - fj[1];
           fm[2] = -delkmS[2]*fpair2 - fj[2];
 
           f[atomi][0] += fi[0];
           f[atomi][1] += fi[1];
           f[atomi][2] += fi[2];
           f[atomj][0] += fj[0];
           f[atomj][1] += fj[1];
           f[atomj][2] += fj[2];
           f[atomk][0] += fk[0];
           f[atomk][1] += fk[1];
           f[atomk][2] += fk[2];
           f[atomm][0] += fm[0];
           f[atomm][1] += fm[1];
           f[atomm][2] += fm[2];
 
           if (vflag_atom) {
             delimS[0] = delikS[0] + delkmS[0];
             delimS[1] = delikS[1] + delkmS[1];
             delimS[2] = delikS[2] + delkmS[2];
             v_tally4(atomi,atomj,atomk,atomm,fi,fj,fk,delimS,deljmS,delkmS);
           }
         }
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    torsional forces and energy
 ------------------------------------------------------------------------- */
 
 void PairAIREBO::TORSION(int eflag, int vflag)
 {
   int i,j,k,l,ii,inum;
   tagint itag,jtag;
   double evdwl,fpair,xtmp,ytmp,ztmp;
   double cos321;
   double w21,dw21,cos234,w34,dw34;
   double cross321[3],cross321mag,cross234[3],cross234mag;
   double w23,dw23,cw2,ekijl,Ec;
   double cw,cwnum,cwnom;
   double rij,rij2,rik,rjl,tspjik,dtsjik,tspijl,dtsijl,costmp,fcpc;
   double sin321,sin234,rjk2,rik2,ril2,rjl2;
   double rjk,ril;
   double Vtors;
   double dndij[3],tmpvec[3],dndik[3],dndjl[3];
   double dcidij,dcidik,dcidjk,dcjdji,dcjdjl,dcjdil;
   double dsidij,dsidik,dsidjk,dsjdji,dsjdjl,dsjdil;
   double dxidij,dxidik,dxidjk,dxjdji,dxjdjl,dxjdil;
   double ddndij,ddndik,ddndjk,ddndjl,ddndil,dcwddn,dcwdn,dvpdcw,Ftmp[3];
   double del32[3],rsq,r32,del23[3],del21[3],r21;
   double deljk[3],del34[3],delil[3],delkl[3],r23,r34;
   double fi[3],fj[3],fk[3],fl[3];
   int itype,jtype,ktype,ltype,kk,ll,jj;
   int *ilist,*REBO_neighs_i,*REBO_neighs_j;
 
   double **x = atom->x;
   double **f = atom->f;
   int *type = atom->type;
   tagint *tag = atom->tag;
 
   inum = list->inum;
   ilist = list->ilist;
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     itag = tag[i];
     itype = map[type[i]];
     if (itype != 0) continue;
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     REBO_neighs_i = REBO_firstneigh[i];
 
     for (jj = 0; jj < REBO_numneigh[i]; jj++) {
       j = REBO_neighs_i[jj];
       jtag = tag[j];
 
       if (itag > jtag) {
         if ((itag+jtag) % 2 == 0) continue;
       } else if (itag < jtag) {
         if ((itag+jtag) % 2 == 1) continue;
       } else {
         if (x[j][2] < ztmp) continue;
         if (x[j][2] == ztmp && x[j][1] < ytmp) continue;
         if (x[j][2] == ztmp && x[j][1] == ytmp && x[j][0] < xtmp) continue;
       }
 
       jtype = map[type[j]];
       if (jtype != 0) continue;
 
       del32[0] = x[j][0]-x[i][0];
       del32[1] = x[j][1]-x[i][1];
       del32[2] = x[j][2]-x[i][2];
       rsq = del32[0]*del32[0] + del32[1]*del32[1] + del32[2]*del32[2];
       r32 = sqrt(rsq);
       del23[0] = -del32[0];
       del23[1] = -del32[1];
       del23[2] = -del32[2];
       r23 = r32;
       w23 = Sp(r23,rcmin[itype][jtype],rcmax[itype][jtype],dw23);
 
       for (kk = 0; kk < REBO_numneigh[i]; kk++) {
         k = REBO_neighs_i[kk];
         ktype = map[type[k]];
         if (k == j) continue;
         del21[0] = x[i][0]-x[k][0];
         del21[1] = x[i][1]-x[k][1];
         del21[2] = x[i][2]-x[k][2];
         rsq = del21[0]*del21[0] + del21[1]*del21[1] + del21[2]*del21[2];
         r21 = sqrt(rsq);
         cos321 = - ((del21[0]*del32[0]) + (del21[1]*del32[1]) +
                     (del21[2]*del32[2])) / (r21*r32);
         cos321 = MIN(cos321,1.0);
         cos321 = MAX(cos321,-1.0);
         sin321 = sqrt(1.0 - cos321*cos321);
         if (sin321 < TOL) continue;
 
         deljk[0] = del21[0]-del23[0];
         deljk[1] = del21[1]-del23[1];
         deljk[2] = del21[2]-del23[2];
         rjk2 = deljk[0]*deljk[0] + deljk[1]*deljk[1] + deljk[2]*deljk[2];
         rjk=sqrt(rjk2);
         rik2 = r21*r21;
         w21 = Sp(r21,rcmin[itype][ktype],rcmax[itype][ktype],dw21);
 
         rij = r32;
         rik = r21;
         rij2 = r32*r32;
         rik2 = r21*r21;
         costmp = 0.5*(rij2+rik2-rjk2)/rij/rik;
         tspjik = Sp2(costmp,thmin,thmax,dtsjik);
         dtsjik = -dtsjik;
 
         REBO_neighs_j = REBO_firstneigh[j];
         for (ll = 0; ll < REBO_numneigh[j]; ll++) {
           l = REBO_neighs_j[ll];
           ltype = map[type[l]];
           if (l == i || l == k) continue;
           del34[0] = x[j][0]-x[l][0];
           del34[1] = x[j][1]-x[l][1];
           del34[2] = x[j][2]-x[l][2];
           rsq = del34[0]*del34[0] + del34[1]*del34[1] + del34[2]*del34[2];
           r34 = sqrt(rsq);
           cos234 = (del32[0]*del34[0] + del32[1]*del34[1] +
                     del32[2]*del34[2]) / (r32*r34);
           cos234 = MIN(cos234,1.0);
           cos234 = MAX(cos234,-1.0);
           sin234 = sqrt(1.0 - cos234*cos234);
           if (sin234 < TOL) continue;
           w34 = Sp(r34,rcmin[jtype][ltype],rcmax[jtype][ltype],dw34);
           delil[0] = del23[0] + del34[0];
           delil[1] = del23[1] + del34[1];
           delil[2] = del23[2] + del34[2];
           ril2 = delil[0]*delil[0] + delil[1]*delil[1] + delil[2]*delil[2];
           ril=sqrt(ril2);
           rjl2 = r34*r34;
 
           rjl = r34;
           rjl2 = r34*r34;
           costmp = 0.5*(rij2+rjl2-ril2)/rij/rjl;
           tspijl = Sp2(costmp,thmin,thmax,dtsijl);
           dtsijl = -dtsijl; //need minus sign
           cross321[0] = (del32[1]*del21[2])-(del32[2]*del21[1]);
           cross321[1] = (del32[2]*del21[0])-(del32[0]*del21[2]);
           cross321[2] = (del32[0]*del21[1])-(del32[1]*del21[0]);
           cross321mag = sqrt(cross321[0]*cross321[0]+
                              cross321[1]*cross321[1]+
                              cross321[2]*cross321[2]);
           cross234[0] = (del23[1]*del34[2])-(del23[2]*del34[1]);
           cross234[1] = (del23[2]*del34[0])-(del23[0]*del34[2]);
           cross234[2] = (del23[0]*del34[1])-(del23[1]*del34[0]);
           cross234mag = sqrt(cross234[0]*cross234[0]+
                              cross234[1]*cross234[1]+
                              cross234[2]*cross234[2]);
           cwnum = (cross321[0]*cross234[0]) +
             (cross321[1]*cross234[1])+(cross321[2]*cross234[2]);
           cwnom = r21*r34*r32*r32*sin321*sin234;
           cw = cwnum/cwnom;
 
           cw2 = (.5*(1.0-cw));
           ekijl = epsilonT[ktype][ltype];
           Ec = 256.0*ekijl/405.0;
           Vtors = (Ec*(powint(cw2,5)))-(ekijl/10.0);
 
           if (eflag) evdwl = Vtors*w21*w23*w34*(1.0-tspjik)*(1.0-tspijl);
 
           dndij[0] = (cross234[1]*del21[2])-(cross234[2]*del21[1]);
           dndij[1] = (cross234[2]*del21[0])-(cross234[0]*del21[2]);
           dndij[2] = (cross234[0]*del21[1])-(cross234[1]*del21[0]);
 
           tmpvec[0] = (del34[1]*cross321[2])-(del34[2]*cross321[1]);
           tmpvec[1] = (del34[2]*cross321[0])-(del34[0]*cross321[2]);
           tmpvec[2] = (del34[0]*cross321[1])-(del34[1]*cross321[0]);
 
           dndij[0] = dndij[0]+tmpvec[0];
           dndij[1] = dndij[1]+tmpvec[1];
           dndij[2] = dndij[2]+tmpvec[2];
 
           dndik[0] = (del23[1]*cross234[2])-(del23[2]*cross234[1]);
           dndik[1] = (del23[2]*cross234[0])-(del23[0]*cross234[2]);
           dndik[2] = (del23[0]*cross234[1])-(del23[1]*cross234[0]);
 
           dndjl[0] = (cross321[1]*del23[2])-(cross321[2]*del23[1]);
           dndjl[1] = (cross321[2]*del23[0])-(cross321[0]*del23[2]);
           dndjl[2] = (cross321[0]*del23[1])-(cross321[1]*del23[0]);
 
           dcidij = ((r23*r23)-(r21*r21)+(rjk*rjk))/(2.0*r23*r23*r21);
           dcidik = ((r21*r21)-(r23*r23)+(rjk*rjk))/(2.0*r23*r21*r21);
           dcidjk = (-rjk)/(r23*r21);
           dcjdji = ((r23*r23)-(r34*r34)+(ril*ril))/(2.0*r23*r23*r34);
           dcjdjl = ((r34*r34)-(r23*r23)+(ril*ril))/(2.0*r23*r34*r34);
           dcjdil = (-ril)/(r23*r34);
 
           dsidij = (-cos321/sin321)*dcidij;
           dsidik = (-cos321/sin321)*dcidik;
           dsidjk = (-cos321/sin321)*dcidjk;
 
           dsjdji = (-cos234/sin234)*dcjdji;
           dsjdjl = (-cos234/sin234)*dcjdjl;
           dsjdil = (-cos234/sin234)*dcjdil;
 
           dxidij = (r21*sin321)+(r23*r21*dsidij);
           dxidik = (r23*sin321)+(r23*r21*dsidik);
           dxidjk = (r23*r21*dsidjk);
 
           dxjdji = (r34*sin234)+(r23*r34*dsjdji);
           dxjdjl = (r23*sin234)+(r23*r34*dsjdjl);
           dxjdil = (r23*r34*dsjdil);
 
           ddndij = (dxidij*cross234mag)+(cross321mag*dxjdji);
           ddndik = dxidik*cross234mag;
           ddndjk = dxidjk*cross234mag;
           ddndjl = cross321mag*dxjdjl;
           ddndil = cross321mag*dxjdil;
           dcwddn = -cwnum/(cwnom*cwnom);
           dcwdn = 1.0/cwnom;
           dvpdcw = (-1.0)*Ec*(-.5)*5.0*powint(cw2,4) *
             w23*w21*w34*(1.0-tspjik)*(1.0-tspijl);
 
           Ftmp[0] = dvpdcw*((dcwdn*dndij[0])+(dcwddn*ddndij*del23[0]/r23));
           Ftmp[1] = dvpdcw*((dcwdn*dndij[1])+(dcwddn*ddndij*del23[1]/r23));
           Ftmp[2] = dvpdcw*((dcwdn*dndij[2])+(dcwddn*ddndij*del23[2]/r23));
           fi[0] = Ftmp[0];
           fi[1] = Ftmp[1];
           fi[2] = Ftmp[2];
           fj[0] = -Ftmp[0];
           fj[1] = -Ftmp[1];
           fj[2] = -Ftmp[2];
 
           Ftmp[0] = dvpdcw*((dcwdn*dndik[0])+(dcwddn*ddndik*del21[0]/r21));
           Ftmp[1] = dvpdcw*((dcwdn*dndik[1])+(dcwddn*ddndik*del21[1]/r21));
           Ftmp[2] = dvpdcw*((dcwdn*dndik[2])+(dcwddn*ddndik*del21[2]/r21));
           fi[0] += Ftmp[0];
           fi[1] += Ftmp[1];
           fi[2] += Ftmp[2];
           fk[0] = -Ftmp[0];
           fk[1] = -Ftmp[1];
           fk[2] = -Ftmp[2];
 
           Ftmp[0] = (dvpdcw*dcwddn*ddndjk*deljk[0])/rjk;
           Ftmp[1] = (dvpdcw*dcwddn*ddndjk*deljk[1])/rjk;
           Ftmp[2] = (dvpdcw*dcwddn*ddndjk*deljk[2])/rjk;
           fj[0] += Ftmp[0];
           fj[1] += Ftmp[1];
           fj[2] += Ftmp[2];
           fk[0] -= Ftmp[0];
           fk[1] -= Ftmp[1];
           fk[2] -= Ftmp[2];
 
           Ftmp[0] = dvpdcw*((dcwdn*dndjl[0])+(dcwddn*ddndjl*del34[0]/r34));
           Ftmp[1] = dvpdcw*((dcwdn*dndjl[1])+(dcwddn*ddndjl*del34[1]/r34));
           Ftmp[2] = dvpdcw*((dcwdn*dndjl[2])+(dcwddn*ddndjl*del34[2]/r34));
           fj[0] += Ftmp[0];
           fj[1] += Ftmp[1];
           fj[2] += Ftmp[2];
           fl[0] = -Ftmp[0];
           fl[1] = -Ftmp[1];
           fl[2] = -Ftmp[2];
 
           Ftmp[0] = (dvpdcw*dcwddn*ddndil*delil[0])/ril;
           Ftmp[1] = (dvpdcw*dcwddn*ddndil*delil[1])/ril;
           Ftmp[2] = (dvpdcw*dcwddn*ddndil*delil[2])/ril;
           fi[0] += Ftmp[0];
           fi[1] += Ftmp[1];
           fi[2] += Ftmp[2];
           fl[0] -= Ftmp[0];
           fl[1] -= Ftmp[1];
           fl[2] -= Ftmp[2];
 
           // coordination forces
 
           fpair = Vtors*dw21*w23*w34*(1.0-tspjik)*(1.0-tspijl) / r21;
           fi[0] -= del21[0]*fpair;
           fi[1] -= del21[1]*fpair;
           fi[2] -= del21[2]*fpair;
           fk[0] += del21[0]*fpair;
           fk[1] += del21[1]*fpair;
           fk[2] += del21[2]*fpair;
 
           fpair = Vtors*w21*dw23*w34*(1.0-tspjik)*(1.0-tspijl) / r23;
           fi[0] -= del23[0]*fpair;
           fi[1] -= del23[1]*fpair;
           fi[2] -= del23[2]*fpair;
           fj[0] += del23[0]*fpair;
           fj[1] += del23[1]*fpair;
           fj[2] += del23[2]*fpair;
 
           fpair = Vtors*w21*w23*dw34*(1.0-tspjik)*(1.0-tspijl) / r34;
           fj[0] -= del34[0]*fpair;
           fj[1] -= del34[1]*fpair;
           fj[2] -= del34[2]*fpair;
           fl[0] += del34[0]*fpair;
           fl[1] += del34[1]*fpair;
           fl[2] += del34[2]*fpair;
 
           // additional cut off function forces
 
           fcpc = -Vtors*w21*w23*w34*dtsjik*(1.0-tspijl);
           fpair = fcpc*dcidij/rij;
           fi[0] += fpair*del23[0];
           fi[1] += fpair*del23[1];
           fi[2] += fpair*del23[2];
           fj[0] -= fpair*del23[0];
           fj[1] -= fpair*del23[1];
           fj[2] -= fpair*del23[2];
 
           fpair = fcpc*dcidik/rik;
           fi[0] += fpair*del21[0];
           fi[1] += fpair*del21[1];
           fi[2] += fpair*del21[2];
           fk[0] -= fpair*del21[0];
           fk[1] -= fpair*del21[1];
           fk[2] -= fpair*del21[2];
 
           fpair = fcpc*dcidjk/rjk;
           fj[0] += fpair*deljk[0];
           fj[1] += fpair*deljk[1];
           fj[2] += fpair*deljk[2];
           fk[0] -= fpair*deljk[0];
           fk[1] -= fpair*deljk[1];
           fk[2] -= fpair*deljk[2];
 
           fcpc = -Vtors*w21*w23*w34*(1.0-tspjik)*dtsijl;
           fpair = fcpc*dcjdji/rij;
           fi[0] += fpair*del23[0];
           fi[1] += fpair*del23[1];
           fi[2] += fpair*del23[2];
           fj[0] -= fpair*del23[0];
           fj[1] -= fpair*del23[1];
           fj[2] -= fpair*del23[2];
 
           fpair = fcpc*dcjdjl/rjl;
           fj[0] += fpair*del34[0];
           fj[1] += fpair*del34[1];
           fj[2] += fpair*del34[2];
           fl[0] -= fpair*del34[0];
           fl[1] -= fpair*del34[1];
           fl[2] -= fpair*del34[2];
 
           fpair = fcpc*dcjdil/ril;
           fi[0] += fpair*delil[0];
           fi[1] += fpair*delil[1];
           fi[2] += fpair*delil[2];
           fl[0] -= fpair*delil[0];
           fl[1] -= fpair*delil[1];
           fl[2] -= fpair*delil[2];
 
           // sum per-atom forces into atom force array
 
           f[i][0] += fi[0]; f[i][1] += fi[1]; f[i][2] += fi[2];
           f[j][0] += fj[0]; f[j][1] += fj[1]; f[j][2] += fj[2];
           f[k][0] += fk[0]; f[k][1] += fk[1]; f[k][2] += fk[2];
           f[l][0] += fl[0]; f[l][1] += fl[1]; f[l][2] += fl[2];
 
           if (evflag) {
             delkl[0] = delil[0] - del21[0];
             delkl[1] = delil[1] - del21[1];
             delkl[2] = delil[2] - del21[2];
             ev_tally4(i,j,k,l,evdwl,fi,fj,fk,delil,del34,delkl);
           }
         }
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    Bij function
 ------------------------------------------------------------------------- */
 
 double PairAIREBO::bondorder(int i, int j, double rij[3],
                              double rijmag, double VA,
                              double **f, int vflag_atom)
 {
   int atomi,atomj,k,n,l,atomk,atoml,atomn,atom1,atom2,atom3,atom4;
   int itype,jtype,ktype,ltype,ntype;
   double rik[3],rjl[3],rkn[3],rji[3],rki[3],rlj[3],rknmag,dNki,dwjl,bij;
   double NijC,NijH,NjiC,NjiH,wik,dwik,dwkn,wjl;
   double rikmag,rjlmag,cosjik,cosijl,g,tmp2,tmp3;
   double Etmp,pij,tmp,wij,dwij,NconjtmpI,NconjtmpJ,Nki,Nlj,dS;
   double lamdajik,lamdaijl,dgdc,dgdN,pji,Nijconj,piRC;
   double dcosjikdri[3],dcosijldri[3],dcosjikdrk[3];
   double dN2[2],dN3[3];
   double dcosjikdrj[3],dcosijldrj[3],dcosijldrl[3];
   double Tij;
   double r32[3],r32mag,cos321,r43[3],r13[3];
   double dNlj;
   double om1234,rln[3];
   double rlnmag,dwln,r23[3],r23mag,r21[3],r21mag;
   double w21,dw21,r34[3],r34mag,cos234,w34,dw34;
   double cross321[3],cross234[3],prefactor,SpN;
   double fcijpc,fcikpc,fcjlpc,fcjkpc,fcilpc;
   double dt2dik[3],dt2djl[3],dt2dij[3],aa,aaa1,aaa2,at2,cw,cwnum,cwnom;
   double sin321,sin234,rr,rijrik,rijrjl,rjk2,rik2,ril2,rjl2;
   double dctik,dctjk,dctjl,dctij,dctji,dctil,rik2i,rjl2i,sink2i,sinl2i;
   double rjk[3],ril[3],dt1dik,dt1djk,dt1djl,dt1dil,dt1dij;
   double F23[3],F12[3],F34[3],F31[3],F24[3],fi[3],fj[3],fk[3],fl[3];
   double f1[3],f2[3],f3[3],f4[4];
   double dcut321,PijS,PjiS;
   double rij2,tspjik,dtsjik,tspijl,dtsijl,costmp;
   int *REBO_neighs,*REBO_neighs_i,*REBO_neighs_j,*REBO_neighs_k,*REBO_neighs_l;
 
   double **x = atom->x;
   int *type = atom->type;
 
   atomi = i;
   atomj = j;
   itype = map[type[i]];
   jtype = map[type[j]];
   wij = Sp(rijmag,rcmin[itype][jtype],rcmax[itype][jtype],dwij);
   NijC = nC[i]-(wij*kronecker(jtype,0));
   NijH = nH[i]-(wij*kronecker(jtype,1));
   NjiC = nC[j]-(wij*kronecker(itype,0));
   NjiH = nH[j]-(wij*kronecker(itype,1));
   bij = 0.0;
   tmp = 0.0;
   tmp2 = 0.0;
   tmp3 = 0.0;
   dgdc = 0.0;
   dgdN = 0.0;
   NconjtmpI = 0.0;
   NconjtmpJ = 0.0;
   Etmp = 0.0;
 
   REBO_neighs = REBO_firstneigh[i];
   for (k = 0; k < REBO_numneigh[i]; k++) {
     atomk = REBO_neighs[k];
     if (atomk != atomj) {
       ktype = map[type[atomk]];
       rik[0] = x[atomi][0]-x[atomk][0];
       rik[1] = x[atomi][1]-x[atomk][1];
       rik[2] = x[atomi][2]-x[atomk][2];
       rikmag = sqrt((rik[0]*rik[0])+(rik[1]*rik[1])+(rik[2]*rik[2]));
       lamdajik = 4.0*kronecker(itype,1) *
         ((rho[ktype][1]-rikmag)-(rho[jtype][1]-rijmag));
       wik = Sp(rikmag,rcmin[itype][ktype],rcmax[itype][ktype],dS);
       Nki = nC[atomk]-(wik*kronecker(itype,0))+nH[atomk] -
         (wik*kronecker(itype,1));
       cosjik = ((rij[0]*rik[0])+(rij[1]*rik[1])+(rij[2]*rik[2])) /
         (rijmag*rikmag);
       cosjik = MIN(cosjik,1.0);
       cosjik = MAX(cosjik,-1.0);
 
       // evaluate splines g and derivatives dg
 
       g = gSpline(cosjik,(NijC+NijH),itype,&dgdc,&dgdN);
       Etmp = Etmp+(wik*g*exp(lamdajik));
       tmp3 = tmp3+(wik*dgdN*exp(lamdajik));
       NconjtmpI = NconjtmpI+(kronecker(ktype,0)*wik*Sp(Nki,Nmin,Nmax,dS));
     }
   }
 
   PijS = 0.0;
   dN2[0] = 0.0;
   dN2[1] = 0.0;
   PijS = PijSpline(NijC,NijH,itype,jtype,dN2);
   pij = pow(1.0+Etmp+PijS,-0.5);
   tmp = -0.5*cube(pij);
 
   // pij forces
 
   REBO_neighs = REBO_firstneigh[i];
   for (k = 0; k < REBO_numneigh[i]; k++) {
     atomk = REBO_neighs[k];
     if (atomk != atomj) {
       ktype = map[type[atomk]];
       rik[0] = x[atomi][0]-x[atomk][0];
       rik[1] = x[atomi][1]-x[atomk][1];
       rik[2] = x[atomi][2]-x[atomk][2];
       rikmag = sqrt((rik[0]*rik[0])+(rik[1]*rik[1])+(rik[2]*rik[2]));
       lamdajik = 4.0*kronecker(itype,1) *
         ((rho[ktype][1]-rikmag)-(rho[jtype][1]-rijmag));
       wik = Sp(rikmag,rcmin[itype][ktype],rcmax[itype][ktype],dwik);
       cosjik = (rij[0]*rik[0] + rij[1]*rik[1] + rij[2]*rik[2]) /
         (rijmag*rikmag);
       cosjik = MIN(cosjik,1.0);
       cosjik = MAX(cosjik,-1.0);
 
       dcosjikdri[0] = ((rij[0]+rik[0])/(rijmag*rikmag)) -
         (cosjik*((rij[0]/(rijmag*rijmag))+(rik[0]/(rikmag*rikmag))));
       dcosjikdri[1] = ((rij[1]+rik[1])/(rijmag*rikmag)) -
         (cosjik*((rij[1]/(rijmag*rijmag))+(rik[1]/(rikmag*rikmag))));
       dcosjikdri[2] = ((rij[2]+rik[2])/(rijmag*rikmag)) -
         (cosjik*((rij[2]/(rijmag*rijmag))+(rik[2]/(rikmag*rikmag))));
       dcosjikdrk[0] = (-rij[0]/(rijmag*rikmag)) +
         (cosjik*(rik[0]/(rikmag*rikmag)));
       dcosjikdrk[1] = (-rij[1]/(rijmag*rikmag)) +
         (cosjik*(rik[1]/(rikmag*rikmag)));
       dcosjikdrk[2] = (-rij[2]/(rijmag*rikmag)) +
         (cosjik*(rik[2]/(rikmag*rikmag)));
       dcosjikdrj[0] = (-rik[0]/(rijmag*rikmag)) +
         (cosjik*(rij[0]/(rijmag*rijmag)));
       dcosjikdrj[1] = (-rik[1]/(rijmag*rikmag)) +
         (cosjik*(rij[1]/(rijmag*rijmag)));
       dcosjikdrj[2] = (-rik[2]/(rijmag*rikmag)) +
         (cosjik*(rij[2]/(rijmag*rijmag)));
 
       g = gSpline(cosjik,(NijC+NijH),itype,&dgdc,&dgdN);
       tmp2 = VA*.5*(tmp*wik*dgdc*exp(lamdajik));
       fj[0] = -tmp2*dcosjikdrj[0];
       fj[1] = -tmp2*dcosjikdrj[1];
       fj[2] = -tmp2*dcosjikdrj[2];
       fi[0] = -tmp2*dcosjikdri[0];
       fi[1] = -tmp2*dcosjikdri[1];
       fi[2] = -tmp2*dcosjikdri[2];
       fk[0] = -tmp2*dcosjikdrk[0];
       fk[1] = -tmp2*dcosjikdrk[1];
       fk[2] = -tmp2*dcosjikdrk[2];
 
       tmp2 = VA*.5*(tmp*wik*g*exp(lamdajik)*4.0*kronecker(itype,1));
       fj[0] -= tmp2*(-rij[0]/rijmag);
       fj[1] -= tmp2*(-rij[1]/rijmag);
       fj[2] -= tmp2*(-rij[2]/rijmag);
       fi[0] -= tmp2*((-rik[0]/rikmag)+(rij[0]/rijmag));
       fi[1] -= tmp2*((-rik[1]/rikmag)+(rij[1]/rijmag));
       fi[2] -= tmp2*((-rik[2]/rikmag)+(rij[2]/rijmag));
       fk[0] -= tmp2*(rik[0]/rikmag);
       fk[1] -= tmp2*(rik[1]/rikmag);
       fk[2] -= tmp2*(rik[2]/rikmag);
 
       // coordination forces
 
       // dwik forces
 
       tmp2 = VA*.5*(tmp*dwik*g*exp(lamdajik))/rikmag;
       fi[0] -= tmp2*rik[0];
       fi[1] -= tmp2*rik[1];
       fi[2] -= tmp2*rik[2];
       fk[0] += tmp2*rik[0];
       fk[1] += tmp2*rik[1];
       fk[2] += tmp2*rik[2];
 
       // PIJ forces
 
       tmp2 = VA*.5*(tmp*dN2[ktype]*dwik)/rikmag;
       fi[0] -= tmp2*rik[0];
       fi[1] -= tmp2*rik[1];
       fi[2] -= tmp2*rik[2];
       fk[0] += tmp2*rik[0];
       fk[1] += tmp2*rik[1];
       fk[2] += tmp2*rik[2];
 
       // dgdN forces
 
       tmp2 = VA*.5*(tmp*tmp3*dwik)/rikmag;
       fi[0] -= tmp2*rik[0];
       fi[1] -= tmp2*rik[1];
       fi[2] -= tmp2*rik[2];
       fk[0] += tmp2*rik[0];
       fk[1] += tmp2*rik[1];
       fk[2] += tmp2*rik[2];
 
       f[atomi][0] += fi[0]; f[atomi][1] += fi[1]; f[atomi][2] += fi[2];
       f[atomj][0] += fj[0]; f[atomj][1] += fj[1]; f[atomj][2] += fj[2];
       f[atomk][0] += fk[0]; f[atomk][1] += fk[1]; f[atomk][2] += fk[2];
 
       if (vflag_atom) {
         rji[0] = -rij[0]; rji[1] = -rij[1]; rji[2] = -rij[2];
         rki[0] = -rik[0]; rki[1] = -rik[1]; rki[2] = -rik[2];
         v_tally3(atomi,atomj,atomk,fj,fk,rji,rki);
       }
     }
   }
 
   tmp = 0.0;
   tmp2 = 0.0;
   tmp3 = 0.0;
   Etmp = 0.0;
 
   REBO_neighs = REBO_firstneigh[j];
   for (l = 0; l < REBO_numneigh[j]; l++) {
     atoml = REBO_neighs[l];
     if (atoml != atomi) {
       ltype = map[type[atoml]];
       rjl[0] = x[atomj][0]-x[atoml][0];
       rjl[1] = x[atomj][1]-x[atoml][1];
       rjl[2] = x[atomj][2]-x[atoml][2];
       rjlmag = sqrt((rjl[0]*rjl[0])+(rjl[1]*rjl[1])+(rjl[2]*rjl[2]));
       lamdaijl = 4.0*kronecker(jtype,1) *
         ((rho[ltype][1]-rjlmag)-(rho[itype][1]-rijmag));
       wjl = Sp(rjlmag,rcmin[jtype][ltype],rcmax[jtype][ltype],dS);
       Nlj = nC[atoml]-(wjl*kronecker(jtype,0)) +
         nH[atoml]-(wjl*kronecker(jtype,1));
       cosijl = -1.0*((rij[0]*rjl[0])+(rij[1]*rjl[1])+(rij[2]*rjl[2])) /
         (rijmag*rjlmag);
       cosijl = MIN(cosijl,1.0);
       cosijl = MAX(cosijl,-1.0);
 
       // evaluate splines g and derivatives dg
 
       g = gSpline(cosijl,NjiC+NjiH,jtype,&dgdc,&dgdN);
       Etmp = Etmp+(wjl*g*exp(lamdaijl));
       tmp3 = tmp3+(wjl*dgdN*exp(lamdaijl));
       NconjtmpJ = NconjtmpJ+(kronecker(ltype,0)*wjl*Sp(Nlj,Nmin,Nmax,dS));
     }
   }
 
   PjiS = 0.0;
   dN2[0] = 0.0;
   dN2[1] = 0.0;
   PjiS = PijSpline(NjiC,NjiH,jtype,itype,dN2);
   pji = pow(1.0+Etmp+PjiS,-0.5);
   tmp = -0.5*cube(pji);
 
   REBO_neighs = REBO_firstneigh[j];
   for (l = 0; l < REBO_numneigh[j]; l++) {
     atoml = REBO_neighs[l];
     if (atoml != atomi) {
       ltype = map[type[atoml]];
       rjl[0] = x[atomj][0]-x[atoml][0];
       rjl[1] = x[atomj][1]-x[atoml][1];
       rjl[2] = x[atomj][2]-x[atoml][2];
       rjlmag = sqrt((rjl[0]*rjl[0])+(rjl[1]*rjl[1])+(rjl[2]*rjl[2]));
       lamdaijl = 4.0*kronecker(jtype,1) *
         ((rho[ltype][1]-rjlmag)-(rho[itype][1]-rijmag));
       wjl = Sp(rjlmag,rcmin[jtype][ltype],rcmax[jtype][ltype],dwjl);
       cosijl = (-1.0*((rij[0]*rjl[0])+(rij[1]*rjl[1])+(rij[2]*rjl[2]))) /
         (rijmag*rjlmag);
       cosijl = MIN(cosijl,1.0);
       cosijl = MAX(cosijl,-1.0);
 
       dcosijldri[0] = (-rjl[0]/(rijmag*rjlmag)) -
         (cosijl*rij[0]/(rijmag*rijmag));
       dcosijldri[1] = (-rjl[1]/(rijmag*rjlmag)) -
         (cosijl*rij[1]/(rijmag*rijmag));
       dcosijldri[2] = (-rjl[2]/(rijmag*rjlmag)) -
         (cosijl*rij[2]/(rijmag*rijmag));
       dcosijldrj[0] = ((-rij[0]+rjl[0])/(rijmag*rjlmag)) +
         (cosijl*((rij[0]/square(rijmag))-(rjl[0]/(rjlmag*rjlmag))));
       dcosijldrj[1] = ((-rij[1]+rjl[1])/(rijmag*rjlmag)) +
         (cosijl*((rij[1]/square(rijmag))-(rjl[1]/(rjlmag*rjlmag))));
       dcosijldrj[2] = ((-rij[2]+rjl[2])/(rijmag*rjlmag)) +
         (cosijl*((rij[2]/square(rijmag))-(rjl[2]/(rjlmag*rjlmag))));
       dcosijldrl[0] = (rij[0]/(rijmag*rjlmag))+(cosijl*rjl[0]/(rjlmag*rjlmag));
       dcosijldrl[1] = (rij[1]/(rijmag*rjlmag))+(cosijl*rjl[1]/(rjlmag*rjlmag));
       dcosijldrl[2] = (rij[2]/(rijmag*rjlmag))+(cosijl*rjl[2]/(rjlmag*rjlmag));
 
       // evaluate splines g and derivatives dg
 
       g = gSpline(cosijl,NjiC+NjiH,jtype,&dgdc,&dgdN);
       tmp2 = VA*.5*(tmp*wjl*dgdc*exp(lamdaijl));
       fi[0] = -tmp2*dcosijldri[0];
       fi[1] = -tmp2*dcosijldri[1];
       fi[2] = -tmp2*dcosijldri[2];
       fj[0] = -tmp2*dcosijldrj[0];
       fj[1] = -tmp2*dcosijldrj[1];
       fj[2] = -tmp2*dcosijldrj[2];
       fl[0] = -tmp2*dcosijldrl[0];
       fl[1] = -tmp2*dcosijldrl[1];
       fl[2] = -tmp2*dcosijldrl[2];
 
       tmp2 = VA*.5*(tmp*wjl*g*exp(lamdaijl)*4.0*kronecker(jtype,1));
       fi[0] -= tmp2*(rij[0]/rijmag);
       fi[1] -= tmp2*(rij[1]/rijmag);
       fi[2] -= tmp2*(rij[2]/rijmag);
       fj[0] -= tmp2*((-rjl[0]/rjlmag)-(rij[0]/rijmag));
       fj[1] -= tmp2*((-rjl[1]/rjlmag)-(rij[1]/rijmag));
       fj[2] -= tmp2*((-rjl[2]/rjlmag)-(rij[2]/rijmag));
       fl[0] -= tmp2*(rjl[0]/rjlmag);
       fl[1] -= tmp2*(rjl[1]/rjlmag);
       fl[2] -= tmp2*(rjl[2]/rjlmag);
 
       // coordination forces
 
       // dwik forces
 
       tmp2 = VA*.5*(tmp*dwjl*g*exp(lamdaijl))/rjlmag;
       fj[0] -= tmp2*rjl[0];
       fj[1] -= tmp2*rjl[1];
       fj[2] -= tmp2*rjl[2];
       fl[0] += tmp2*rjl[0];
       fl[1] += tmp2*rjl[1];
       fl[2] += tmp2*rjl[2];
 
       // PIJ forces
 
       tmp2 = VA*.5*(tmp*dN2[ltype]*dwjl)/rjlmag;
       fj[0] -= tmp2*rjl[0];
       fj[1] -= tmp2*rjl[1];
       fj[2] -= tmp2*rjl[2];
       fl[0] += tmp2*rjl[0];
       fl[1] += tmp2*rjl[1];
       fl[2] += tmp2*rjl[2];
 
       // dgdN forces
 
       tmp2 = VA*.5*(tmp*tmp3*dwjl)/rjlmag;
       fj[0] -= tmp2*rjl[0];
       fj[1] -= tmp2*rjl[1];
       fj[2] -= tmp2*rjl[2];
       fl[0] += tmp2*rjl[0];
       fl[1] += tmp2*rjl[1];
       fl[2] += tmp2*rjl[2];
 
       f[atomi][0] += fi[0]; f[atomi][1] += fi[1]; f[atomi][2] += fi[2];
       f[atomj][0] += fj[0]; f[atomj][1] += fj[1]; f[atomj][2] += fj[2];
       f[atoml][0] += fl[0]; f[atoml][1] += fl[1]; f[atoml][2] += fl[2];
 
       if (vflag_atom) {
         rlj[0] = -rjl[0]; rlj[1] = -rjl[1]; rlj[2] = -rjl[2];
         v_tally3(atomi,atomj,atoml,fi,fl,rij,rlj);
       }
     }
   }
 
   // evaluate Nij conj
 
   Nijconj = 1.0+(NconjtmpI*NconjtmpI)+(NconjtmpJ*NconjtmpJ);
   piRC = piRCSpline(NijC+NijH,NjiC+NjiH,Nijconj,itype,jtype,dN3);
 
   // piRC forces
 
   REBO_neighs_i = REBO_firstneigh[i];
   for (k = 0; k < REBO_numneigh[i]; k++) {
     atomk = REBO_neighs_i[k];
     if (atomk !=atomj) {
       ktype = map[type[atomk]];
       rik[0] = x[atomi][0]-x[atomk][0];
       rik[1] = x[atomi][1]-x[atomk][1];
       rik[2] = x[atomi][2]-x[atomk][2];
       rikmag = sqrt((rik[0]*rik[0])+(rik[1]*rik[1])+(rik[2]*rik[2]));
       wik = Sp(rikmag,rcmin[itype][ktype],rcmax[itype][ktype],dwik);
       Nki = nC[atomk]-(wik*kronecker(itype,0))+nH[atomk] -
         (wik*kronecker(itype,1));
       SpN = Sp(Nki,Nmin,Nmax,dNki);
 
       tmp2 = VA*dN3[0]*dwik/rikmag;
       f[atomi][0] -= tmp2*rik[0];
       f[atomi][1] -= tmp2*rik[1];
       f[atomi][2] -= tmp2*rik[2];
       f[atomk][0] += tmp2*rik[0];
       f[atomk][1] += tmp2*rik[1];
       f[atomk][2] += tmp2*rik[2];
 
       if (vflag_atom) v_tally2(atomi,atomk,-tmp2,rik);
 
       tmp2 = VA*dN3[2]*(2.0*NconjtmpI*dwik*SpN)/rikmag;
       f[atomi][0] -= tmp2*rik[0];
       f[atomi][1] -= tmp2*rik[1];
       f[atomi][2] -= tmp2*rik[2];
       f[atomk][0] += tmp2*rik[0];
       f[atomk][1] += tmp2*rik[1];
       f[atomk][2] += tmp2*rik[2];
 
       if (vflag_atom) v_tally2(atomi,atomk,-tmp2,rik);
 
       if (fabs(dNki) > TOL) {
         REBO_neighs_k = REBO_firstneigh[atomk];
         for (n = 0; n < REBO_numneigh[atomk]; n++) {
           atomn = REBO_neighs_k[n];
           if (atomn != atomi) {
             ntype = map[type[atomn]];
             rkn[0] = x[atomk][0]-x[atomn][0];
             rkn[1] = x[atomk][1]-x[atomn][1];
             rkn[2] = x[atomk][2]-x[atomn][2];
             rknmag = sqrt((rkn[0]*rkn[0])+(rkn[1]*rkn[1])+(rkn[2]*rkn[2]));
             Sp(rknmag,rcmin[ktype][ntype],rcmax[ktype][ntype],dwkn);
 
             tmp2 = VA*dN3[2]*(2.0*NconjtmpI*wik*dNki*dwkn)/rknmag;
             f[atomk][0] -= tmp2*rkn[0];
             f[atomk][1] -= tmp2*rkn[1];
             f[atomk][2] -= tmp2*rkn[2];
             f[atomn][0] += tmp2*rkn[0];
             f[atomn][1] += tmp2*rkn[1];
             f[atomn][2] += tmp2*rkn[2];
 
             if (vflag_atom) v_tally2(atomk,atomn,-tmp2,rkn);
           }
         }
       }
     }
   }
 
   // piRC forces
 
   REBO_neighs = REBO_firstneigh[atomj];
   for (l = 0; l < REBO_numneigh[atomj]; l++) {
     atoml = REBO_neighs[l];
     if (atoml !=atomi) {
       ltype = map[type[atoml]];
       rjl[0] = x[atomj][0]-x[atoml][0];
       rjl[1] = x[atomj][1]-x[atoml][1];
       rjl[2] = x[atomj][2]-x[atoml][2];
       rjlmag = sqrt((rjl[0]*rjl[0])+(rjl[1]*rjl[1])+(rjl[2]*rjl[2]));
       wjl = Sp(rjlmag,rcmin[jtype][ltype],rcmax[jtype][ltype],dwjl);
       Nlj = nC[atoml]-(wjl*kronecker(jtype,0))+nH[atoml] -
         (wjl*kronecker(jtype,1));
       SpN = Sp(Nlj,Nmin,Nmax,dNlj);
 
       tmp2 = VA*dN3[1]*dwjl/rjlmag;
       f[atomj][0] -= tmp2*rjl[0];
       f[atomj][1] -= tmp2*rjl[1];
       f[atomj][2] -= tmp2*rjl[2];
       f[atoml][0] += tmp2*rjl[0];
       f[atoml][1] += tmp2*rjl[1];
       f[atoml][2] += tmp2*rjl[2];
 
       if (vflag_atom) v_tally2(atomj,atoml,-tmp2,rjl);
 
       tmp2 = VA*dN3[2]*(2.0*NconjtmpJ*dwjl*SpN)/rjlmag;
       f[atomj][0] -= tmp2*rjl[0];
       f[atomj][1] -= tmp2*rjl[1];
       f[atomj][2] -= tmp2*rjl[2];
       f[atoml][0] += tmp2*rjl[0];
       f[atoml][1] += tmp2*rjl[1];
       f[atoml][2] += tmp2*rjl[2];
 
       if (vflag_atom) v_tally2(atomj,atoml,-tmp2,rjl);
 
       if (fabs(dNlj) > TOL) {
         REBO_neighs_l = REBO_firstneigh[atoml];
         for (n = 0; n < REBO_numneigh[atoml]; n++) {
           atomn = REBO_neighs_l[n];
           if (atomn != atomj) {
             ntype = map[type[atomn]];
             rln[0] = x[atoml][0]-x[atomn][0];
             rln[1] = x[atoml][1]-x[atomn][1];
             rln[2] = x[atoml][2]-x[atomn][2];
             rlnmag = sqrt((rln[0]*rln[0])+(rln[1]*rln[1])+(rln[2]*rln[2]));
             Sp(rlnmag,rcmin[ltype][ntype],rcmax[ltype][ntype],dwln);
 
             tmp2 = VA*dN3[2]*(2.0*NconjtmpJ*wjl*dNlj*dwln)/rlnmag;
             f[atoml][0] -= tmp2*rln[0];
             f[atoml][1] -= tmp2*rln[1];
             f[atoml][2] -= tmp2*rln[2];
             f[atomn][0] += tmp2*rln[0];
             f[atomn][1] += tmp2*rln[1];
             f[atomn][2] += tmp2*rln[2];
 
             if (vflag_atom) v_tally2(atoml,atomn,-tmp2,rln);
           }
         }
       }
     }
   }
 
   Tij = 0.0;
   dN3[0] = 0.0;
   dN3[1] = 0.0;
   dN3[2] = 0.0;
   if (itype == 0 && jtype == 0)
     Tij=TijSpline((NijC+NijH),(NjiC+NjiH),Nijconj,dN3);
   Etmp = 0.0;
 
   if (fabs(Tij) > TOL) {
     atom2 = atomi;
     atom3 = atomj;
     r32[0] = x[atom3][0]-x[atom2][0];
     r32[1] = x[atom3][1]-x[atom2][1];
     r32[2] = x[atom3][2]-x[atom2][2];
     r32mag = sqrt((r32[0]*r32[0])+(r32[1]*r32[1])+(r32[2]*r32[2]));
     r23[0] = -r32[0];
     r23[1] = -r32[1];
     r23[2] = -r32[2];
     r23mag = r32mag;
     REBO_neighs_i = REBO_firstneigh[i];
     for (k = 0; k < REBO_numneigh[i]; k++) {
       atomk = REBO_neighs_i[k];
       atom1 = atomk;
       ktype = map[type[atomk]];
       if (atomk != atomj) {
         r21[0] = x[atom2][0]-x[atom1][0];
         r21[1] = x[atom2][1]-x[atom1][1];
         r21[2] = x[atom2][2]-x[atom1][2];
         r21mag = sqrt(r21[0]*r21[0] + r21[1]*r21[1] + r21[2]*r21[2]);
         cos321 = -1.0*((r21[0]*r32[0])+(r21[1]*r32[1])+(r21[2]*r32[2])) /
           (r21mag*r32mag);
         cos321 = MIN(cos321,1.0);
         cos321 = MAX(cos321,-1.0);
         Sp2(cos321,thmin,thmax,dcut321);
         sin321 = sqrt(1.0 - cos321*cos321);
         sink2i = 1.0/(sin321*sin321);
         rik2i = 1.0/(r21mag*r21mag);
         if (sin321 != 0.0) {
           rr = (r23mag*r23mag)-(r21mag*r21mag);
           rjk[0] = r21[0]-r23[0];
           rjk[1] = r21[1]-r23[1];
           rjk[2] = r21[2]-r23[2];
           rjk2 = (rjk[0]*rjk[0])+(rjk[1]*rjk[1])+(rjk[2]*rjk[2]);
           rijrik = 2.0*r23mag*r21mag;
           rik2 = r21mag*r21mag;
           dctik = (-rr+rjk2)/(rijrik*rik2);
           dctij = (rr+rjk2)/(rijrik*r23mag*r23mag);
           dctjk = -2.0/rijrik;
           w21 = Sp(r21mag,rcmin[itype][ktype],rcmaxp[itype][ktype],dw21);
           rijmag = r32mag;
           rikmag = r21mag;
           rij2 = r32mag*r32mag;
           rik2 = r21mag*r21mag;
           costmp = 0.5*(rij2+rik2-rjk2)/rijmag/rikmag;
           tspjik = Sp2(costmp,thmin,thmax,dtsjik);
           dtsjik = -dtsjik;
 
           REBO_neighs_j = REBO_firstneigh[j];
           for (l = 0; l < REBO_numneigh[j]; l++) {
             atoml = REBO_neighs_j[l];
             atom4 = atoml;
             ltype = map[type[atoml]];
             if (!(atoml == atomi || atoml == atomk)) {
               r34[0] = x[atom3][0]-x[atom4][0];
               r34[1] = x[atom3][1]-x[atom4][1];
               r34[2] = x[atom3][2]-x[atom4][2];
               r34mag = sqrt((r34[0]*r34[0])+(r34[1]*r34[1])+(r34[2]*r34[2]));
               cos234 = (r32[0]*r34[0] + r32[1]*r34[1] + r32[2]*r34[2]) /
                 (r32mag*r34mag);
               cos234 = MIN(cos234,1.0);
               cos234 = MAX(cos234,-1.0);
               sin234 = sqrt(1.0 - cos234*cos234);
               sinl2i = 1.0/(sin234*sin234);
               rjl2i = 1.0/(r34mag*r34mag);
 
               if (sin234 != 0.0) {
                 w34 = Sp(r34mag,rcmin[jtype][ltype],rcmaxp[jtype][ltype],dw34);
                 rr = (r23mag*r23mag)-(r34mag*r34mag);
                 ril[0] = r23[0]+r34[0];
                 ril[1] = r23[1]+r34[1];
                 ril[2] = r23[2]+r34[2];
                 ril2 = (ril[0]*ril[0])+(ril[1]*ril[1])+(ril[2]*ril[2]);
                 rijrjl = 2.0*r23mag*r34mag;
                 rjl2 = r34mag*r34mag;
                 dctjl = (-rr+ril2)/(rijrjl*rjl2);
                 dctji = (rr+ril2)/(rijrjl*r23mag*r23mag);
                 dctil = -2.0/rijrjl;
                 rjlmag = r34mag;
                 rjl2 = r34mag*r34mag;
                 costmp = 0.5*(rij2+rjl2-ril2)/rijmag/rjlmag;
                 tspijl = Sp2(costmp,thmin,thmax,dtsijl);
                 dtsijl = -dtsijl;
                 prefactor = VA*Tij;
 
                 cross321[0] = (r32[1]*r21[2])-(r32[2]*r21[1]);
                 cross321[1] = (r32[2]*r21[0])-(r32[0]*r21[2]);
                 cross321[2] = (r32[0]*r21[1])-(r32[1]*r21[0]);
                 cross234[0] = (r23[1]*r34[2])-(r23[2]*r34[1]);
                 cross234[1] = (r23[2]*r34[0])-(r23[0]*r34[2]);
                 cross234[2] = (r23[0]*r34[1])-(r23[1]*r34[0]);
 
                 cwnum = (cross321[0]*cross234[0]) +
                   (cross321[1]*cross234[1]) + (cross321[2]*cross234[2]);
                 cwnom = r21mag*r34mag*r23mag*r23mag*sin321*sin234;
                 om1234 = cwnum/cwnom;
                 cw = om1234;
                 Etmp += ((1.0-square(om1234))*w21*w34) *
                   (1.0-tspjik)*(1.0-tspijl);
 
                 dt1dik = (rik2i)-(dctik*sink2i*cos321);
                 dt1djk = (-dctjk*sink2i*cos321);
                 dt1djl = (rjl2i)-(dctjl*sinl2i*cos234);
                 dt1dil = (-dctil*sinl2i*cos234);
                 dt1dij = (2.0/(r23mag*r23mag))-(dctij*sink2i*cos321) -
                   (dctji*sinl2i*cos234);
 
                 dt2dik[0] = (-r23[2]*cross234[1])+(r23[1]*cross234[2]);
                 dt2dik[1] = (-r23[0]*cross234[2])+(r23[2]*cross234[0]);
                 dt2dik[2] = (-r23[1]*cross234[0])+(r23[0]*cross234[1]);
 
                 dt2djl[0] = (-r23[1]*cross321[2])+(r23[2]*cross321[1]);
                 dt2djl[1] = (-r23[2]*cross321[0])+(r23[0]*cross321[2]);
                 dt2djl[2] = (-r23[0]*cross321[1])+(r23[1]*cross321[0]);
 
                 dt2dij[0] = (r21[2]*cross234[1])-(r34[2]*cross321[1]) -
                   (r21[1]*cross234[2])+(r34[1]*cross321[2]);
                 dt2dij[1] = (r21[0]*cross234[2])-(r34[0]*cross321[2]) -
                   (r21[2]*cross234[0])+(r34[2]*cross321[0]);
                 dt2dij[2] = (r21[1]*cross234[0])-(r34[1]*cross321[0]) -
                   (r21[0]*cross234[1])+(r34[0]*cross321[1]);
 
                 aa = (prefactor*2.0*cw/cwnom)*w21*w34 *
                   (1.0-tspjik)*(1.0-tspijl);
                 aaa1 = -prefactor*(1.0-square(om1234)) *
                   (1.0-tspjik)*(1.0-tspijl);
                 aaa2 = aaa1*w21*w34;
                 at2 = aa*cwnum;
 
                 fcijpc = (-dt1dij*at2)+(aaa2*dtsjik*dctij*(1.0-tspijl)) +
                   (aaa2*dtsijl*dctji*(1.0-tspjik));
                 fcikpc = (-dt1dik*at2)+(aaa2*dtsjik*dctik*(1.0-tspijl));
                 fcjlpc = (-dt1djl*at2)+(aaa2*dtsijl*dctjl*(1.0-tspjik));
                 fcjkpc = (-dt1djk*at2)+(aaa2*dtsjik*dctjk*(1.0-tspijl));
                 fcilpc = (-dt1dil*at2)+(aaa2*dtsijl*dctil*(1.0-tspjik));
 
                 F23[0] = (fcijpc*r23[0])+(aa*dt2dij[0]);
                 F23[1] = (fcijpc*r23[1])+(aa*dt2dij[1]);
                 F23[2] = (fcijpc*r23[2])+(aa*dt2dij[2]);
 
                 F12[0] = (fcikpc*r21[0])+(aa*dt2dik[0]);
                 F12[1] = (fcikpc*r21[1])+(aa*dt2dik[1]);
                 F12[2] = (fcikpc*r21[2])+(aa*dt2dik[2]);
 
                 F34[0] = (fcjlpc*r34[0])+(aa*dt2djl[0]);
                 F34[1] = (fcjlpc*r34[1])+(aa*dt2djl[1]);
                 F34[2] = (fcjlpc*r34[2])+(aa*dt2djl[2]);
 
                 F31[0] = (fcjkpc*rjk[0]);
                 F31[1] = (fcjkpc*rjk[1]);
                 F31[2] = (fcjkpc*rjk[2]);
 
                 F24[0] = (fcilpc*ril[0]);
                 F24[1] = (fcilpc*ril[1]);
                 F24[2] = (fcilpc*ril[2]);
 
                 f1[0] = -F12[0]-F31[0];
                 f1[1] = -F12[1]-F31[1];
                 f1[2] = -F12[2]-F31[2];
                 f2[0] = F23[0]+F12[0]+F24[0];
                 f2[1] = F23[1]+F12[1]+F24[1];
                 f2[2] = F23[2]+F12[2]+F24[2];
                 f3[0] = -F23[0]+F34[0]+F31[0];
                 f3[1] = -F23[1]+F34[1]+F31[1];
                 f3[2] = -F23[2]+F34[2]+F31[2];
                 f4[0] = -F34[0]-F24[0];
                 f4[1] = -F34[1]-F24[1];
                 f4[2] = -F34[2]-F24[2];
 
                 // coordination forces
 
                 tmp2 = VA*Tij*((1.0-(om1234*om1234))) *
                   (1.0-tspjik)*(1.0-tspijl)*dw21*w34/r21mag;
                 f2[0] -= tmp2*r21[0];
                 f2[1] -= tmp2*r21[1];
                 f2[2] -= tmp2*r21[2];
                 f1[0] += tmp2*r21[0];
                 f1[1] += tmp2*r21[1];
                 f1[2] += tmp2*r21[2];
 
                 tmp2 = VA*Tij*((1.0-(om1234*om1234))) *
                   (1.0-tspjik)*(1.0-tspijl)*w21*dw34/r34mag;
                 f3[0] -= tmp2*r34[0];
                 f3[1] -= tmp2*r34[1];
                 f3[2] -= tmp2*r34[2];
                 f4[0] += tmp2*r34[0];
                 f4[1] += tmp2*r34[1];
                 f4[2] += tmp2*r34[2];
 
                 f[atom1][0] += f1[0]; f[atom1][1] += f1[1];
                 f[atom1][2] += f1[2];
                 f[atom2][0] += f2[0]; f[atom2][1] += f2[1];
                 f[atom2][2] += f2[2];
                 f[atom3][0] += f3[0]; f[atom3][1] += f3[1];
                 f[atom3][2] += f3[2];
                 f[atom4][0] += f4[0]; f[atom4][1] += f4[1];
                 f[atom4][2] += f4[2];
 
                 if (vflag_atom) {
                   r13[0] = -rjk[0]; r13[1] = -rjk[1]; r13[2] = -rjk[2];
                   r43[0] = -r34[0]; r43[1] = -r34[1]; r43[2] = -r34[2];
                   v_tally4(atom1,atom2,atom3,atom4,f1,f2,f4,r13,r23,r43);
                 }
               }
             }
           }
         }
       }
     }
 
     // Tij forces now that we have Etmp
 
     REBO_neighs = REBO_firstneigh[i];
     for (k = 0; k < REBO_numneigh[i]; k++) {
       atomk = REBO_neighs[k];
       if (atomk != atomj) {
         ktype = map[type[atomk]];
         rik[0] = x[atomi][0]-x[atomk][0];
         rik[1] = x[atomi][1]-x[atomk][1];
         rik[2] = x[atomi][2]-x[atomk][2];
         rikmag = sqrt((rik[0]*rik[0])+(rik[1]*rik[1])+(rik[2]*rik[2]));
         wik = Sp(rikmag,rcmin[itype][ktype],rcmax[itype][ktype],dwik);
         Nki = nC[atomk]-(wik*kronecker(itype,0))+nH[atomk] -
           (wik*kronecker(itype,1));
         SpN = Sp(Nki,Nmin,Nmax,dNki);
 
         tmp2 = VA*dN3[0]*dwik*Etmp/rikmag;
         f[atomi][0] -= tmp2*rik[0];
         f[atomi][1] -= tmp2*rik[1];
         f[atomi][2] -= tmp2*rik[2];
         f[atomk][0] += tmp2*rik[0];
         f[atomk][1] += tmp2*rik[1];
         f[atomk][2] += tmp2*rik[2];
 
         if (vflag_atom) v_tally2(atomi,atomk,-tmp2,rik);
 
         tmp2 = VA*dN3[2]*(2.0*NconjtmpI*dwik*SpN)*Etmp/rikmag;
         f[atomi][0] -= tmp2*rik[0];
         f[atomi][1] -= tmp2*rik[1];
         f[atomi][2] -= tmp2*rik[2];
         f[atomk][0] += tmp2*rik[0];
         f[atomk][1] += tmp2*rik[1];
         f[atomk][2] += tmp2*rik[2];
 
         if (vflag_atom) v_tally2(atomi,atomk,-tmp2,rik);
 
         if (fabs(dNki) > TOL) {
           REBO_neighs_k = REBO_firstneigh[atomk];
           for (n = 0; n < REBO_numneigh[atomk]; n++) {
             atomn = REBO_neighs_k[n];
             ntype = map[type[atomn]];
             if (atomn != atomi) {
               rkn[0] = x[atomk][0]-x[atomn][0];
               rkn[1] = x[atomk][1]-x[atomn][1];
               rkn[2] = x[atomk][2]-x[atomn][2];
               rknmag = sqrt((rkn[0]*rkn[0])+(rkn[1]*rkn[1])+(rkn[2]*rkn[2]));
               Sp(rknmag,rcmin[ktype][ntype],rcmax[ktype][ntype],dwkn);
 
               tmp2 = VA*dN3[2]*(2.0*NconjtmpI*wik*dNki*dwkn)*Etmp/rknmag;
               f[atomk][0] -= tmp2*rkn[0];
               f[atomk][1] -= tmp2*rkn[1];
               f[atomk][2] -= tmp2*rkn[2];
               f[atomn][0] += tmp2*rkn[0];
               f[atomn][1] += tmp2*rkn[1];
               f[atomn][2] += tmp2*rkn[2];
 
               if (vflag_atom) v_tally2(atomk,atomn,-tmp2,rkn);
             }
           }
         }
       }
     }
 
     // Tij forces
 
     REBO_neighs = REBO_firstneigh[j];
     for (l = 0; l < REBO_numneigh[j]; l++) {
       atoml = REBO_neighs[l];
       if (atoml != atomi) {
         ltype = map[type[atoml]];
         rjl[0] = x[atomj][0]-x[atoml][0];
         rjl[1] = x[atomj][1]-x[atoml][1];
         rjl[2] = x[atomj][2]-x[atoml][2];
         rjlmag = sqrt((rjl[0]*rjl[0])+(rjl[1]*rjl[1])+(rjl[2]*rjl[2]));
         wjl = Sp(rjlmag,rcmin[jtype][ltype],rcmax[jtype][ltype],dwjl);
         Nlj = nC[atoml]-(wjl*kronecker(jtype,0))+nH[atoml] -
           (wjl*kronecker(jtype,1));
         SpN = Sp(Nlj,Nmin,Nmax,dNlj);
 
         tmp2 = VA*dN3[1]*dwjl*Etmp/rjlmag;
         f[atomj][0] -= tmp2*rjl[0];
         f[atomj][1] -= tmp2*rjl[1];
         f[atomj][2] -= tmp2*rjl[2];
         f[atoml][0] += tmp2*rjl[0];
         f[atoml][1] += tmp2*rjl[1];
         f[atoml][2] += tmp2*rjl[2];
 
         if (vflag_atom) v_tally2(atomj,atoml,-tmp2,rjl);
 
         tmp2 = VA*dN3[2]*(2.0*NconjtmpJ*dwjl*SpN)*Etmp/rjlmag;
         f[atomj][0] -= tmp2*rjl[0];
         f[atomj][1] -= tmp2*rjl[1];
         f[atomj][2] -= tmp2*rjl[2];
         f[atoml][0] += tmp2*rjl[0];
         f[atoml][1] += tmp2*rjl[1];
         f[atoml][2] += tmp2*rjl[2];
 
         if (vflag_atom) v_tally2(atomj,atoml,-tmp2,rjl);
 
         if (fabs(dNlj) > TOL) {
           REBO_neighs_l = REBO_firstneigh[atoml];
           for (n = 0; n < REBO_numneigh[atoml]; n++) {
             atomn = REBO_neighs_l[n];
             ntype = map[type[atomn]];
             if (atomn !=atomj) {
               rln[0] = x[atoml][0]-x[atomn][0];
               rln[1] = x[atoml][1]-x[atomn][1];
               rln[2] = x[atoml][2]-x[atomn][2];
               rlnmag = sqrt((rln[0]*rln[0])+(rln[1]*rln[1])+(rln[2]*rln[2]));
               Sp(rlnmag,rcmin[ltype][ntype],rcmax[ltype][ntype],dwln);
 
               tmp2 = VA*dN3[2]*(2.0*NconjtmpJ*wjl*dNlj*dwln)*Etmp/rlnmag;
               f[atoml][0] -= tmp2*rln[0];
               f[atoml][1] -= tmp2*rln[1];
               f[atoml][2] -= tmp2*rln[2];
               f[atomn][0] += tmp2*rln[0];
               f[atomn][1] += tmp2*rln[1];
               f[atomn][2] += tmp2*rln[2];
 
               if (vflag_atom) v_tally2(atoml,atomn,-tmp2,rln);
             }
           }
         }
       }
     }
   }
 
   bij = (0.5*(pij+pji))+piRC+(Tij*Etmp);
   return bij;
 }
 
 /* ----------------------------------------------------------------------
    Bij* function
 ------------------------------------------------------------------------- */
 
 double PairAIREBO::bondorderLJ(int i, int j, double rij[3], double rijmag,
                                double VA, double rij0[3], double rij0mag,
                                double **f, int vflag_atom)
 {
   int k,n,l,atomk,atoml,atomn,atom1,atom2,atom3,atom4;
   int atomi,atomj,itype,jtype,ktype,ltype,ntype;
   double rik[3], rjl[3], rkn[3],rknmag,dNki;
   double NijC,NijH,NjiC,NjiH,wik,dwik,dwkn,wjl;
   double rikmag,rjlmag,cosjik,cosijl,g,tmp2,tmp3;
   double Etmp,pij,tmp,wij,dwij,NconjtmpI,NconjtmpJ;
   double Nki,Nlj,dS,lamdajik,lamdaijl,dgdc,dgdN,pji,Nijconj,piRC;
   double dcosjikdri[3],dcosijldri[3],dcosjikdrk[3];
   double dN2[2],dN3[3];
   double dcosijldrj[3],dcosijldrl[3],dcosjikdrj[3],dwjl;
   double Tij,crosskij[3],crosskijmag;
   double crossijl[3],crossijlmag,omkijl;
   double tmppij,tmppji,dN2PIJ[2],dN2PJI[2],dN3piRC[3],dN3Tij[3];
   double bij,tmp3pij,tmp3pji,Stb,dStb;
   double r32[3],r32mag,cos321;
   double om1234,rln[3];
   double rlnmag,dwln,r23[3],r23mag,r21[3],r21mag;
   double w21,dw21,r34[3],r34mag,cos234,w34,dw34;
   double cross321[3],cross234[3],prefactor,SpN;
   double fcijpc,fcikpc,fcjlpc,fcjkpc,fcilpc;
   double dt2dik[3],dt2djl[3],dt2dij[3],aa,aaa1,aaa2,at2,cw,cwnum,cwnom;
   double sin321,sin234,rr,rijrik,rijrjl,rjk2,rik2,ril2,rjl2;
   double dctik,dctjk,dctjl,dctij,dctji,dctil,rik2i,rjl2i,sink2i,sinl2i;
   double rjk[3],ril[3],dt1dik,dt1djk,dt1djl,dt1dil,dt1dij;
   double dNlj;
   double PijS,PjiS;
   double rij2,tspjik,dtsjik,tspijl,dtsijl,costmp;
   int *REBO_neighs,*REBO_neighs_i,*REBO_neighs_j,*REBO_neighs_k,*REBO_neighs_l;
   double F12[3],F23[3],F34[3],F31[3],F24[3];
   double fi[3],fj[3],fk[3],fl[3],f1[3],f2[3],f3[3],f4[4];
   double rji[3],rki[3],rlj[3],r13[3],r43[3];
 
   double **x = atom->x;
   int *type = atom->type;
 
   atomi = i;
   atomj = j;
   itype = map[type[atomi]];
   jtype = map[type[atomj]];
   wij = Sp(rij0mag,rcmin[itype][jtype],rcmax[itype][jtype],dwij);
   NijC = nC[atomi]-(wij*kronecker(jtype,0));
   NijH = nH[atomi]-(wij*kronecker(jtype,1));
   NjiC = nC[atomj]-(wij*kronecker(itype,0));
   NjiH = nH[atomj]-(wij*kronecker(itype,1));
 
   bij = 0.0;
   tmp = 0.0;
   tmp2 = 0.0;
   tmp3 = 0.0;
   dgdc = 0.0;
   dgdN = 0.0;
   NconjtmpI = 0.0;
   NconjtmpJ = 0.0;
   Etmp = 0.0;
   Stb = 0.0;
   dStb = 0.0;
 
   REBO_neighs = REBO_firstneigh[i];
   for (k = 0; k < REBO_numneigh[i]; k++) {
     atomk = REBO_neighs[k];
     if (atomk != atomj) {
       ktype = map[type[atomk]];
       rik[0] = x[atomi][0]-x[atomk][0];
       rik[1] = x[atomi][1]-x[atomk][1];
       rik[2] = x[atomi][2]-x[atomk][2];
       rikmag = sqrt((rik[0]*rik[0])+(rik[1]*rik[1])+(rik[2]*rik[2]));
       lamdajik = 4.0*kronecker(itype,1) *
         ((rho[ktype][1]-rikmag)-(rho[jtype][1]-rijmag));
       wik = Sp(rikmag,rcmin[itype][ktype],rcmax[itype][ktype],dS);
       Nki = nC[atomk]-(wik*kronecker(itype,0)) +
         nH[atomk]-(wik*kronecker(itype,1));
       cosjik = ((rij[0]*rik[0])+(rij[1]*rik[1])+(rij[2]*rik[2])) /
         (rijmag*rikmag);
       cosjik = MIN(cosjik,1.0);
       cosjik = MAX(cosjik,-1.0);
 
       // evaluate splines g and derivatives dg
 
       g = gSpline(cosjik,(NijC+NijH),itype,&dgdc,&dgdN);
       Etmp += (wik*g*exp(lamdajik));
       tmp3 += (wik*dgdN*exp(lamdajik));
       NconjtmpI = NconjtmpI+(kronecker(ktype,0)*wik*Sp(Nki,Nmin,Nmax,dS));
     }
   }
 
   PijS = 0.0;
   dN2PIJ[0] = 0.0;
   dN2PIJ[1] = 0.0;
   PijS = PijSpline(NijC,NijH,itype,jtype,dN2PIJ);
   pij = pow(1.0+Etmp+PijS,-0.5);
   tmppij = -.5*cube(pij);
   tmp3pij = tmp3;
   tmp = 0.0;
   tmp2 = 0.0;
   tmp3 = 0.0;
   Etmp = 0.0;
 
   REBO_neighs = REBO_firstneigh[j];
   for (l = 0; l < REBO_numneigh[j]; l++) {
     atoml = REBO_neighs[l];
     if (atoml != atomi) {
       ltype = map[type[atoml]];
       rjl[0] = x[atomj][0]-x[atoml][0];
       rjl[1] = x[atomj][1]-x[atoml][1];
       rjl[2] = x[atomj][2]-x[atoml][2];
       rjlmag = sqrt((rjl[0]*rjl[0])+(rjl[1]*rjl[1])+(rjl[2]*rjl[2]));
       lamdaijl = 4.0*kronecker(jtype,1) *
         ((rho[ltype][1]-rjlmag)-(rho[itype][1]-rijmag));
       wjl = Sp(rjlmag,rcmin[jtype][ltype],rcmax[jtype][ltype],dS);
       Nlj = nC[atoml]-(wjl*kronecker(jtype,0))+nH[atoml] -
         (wjl*kronecker(jtype,1));
       cosijl = -1.0*((rij[0]*rjl[0])+(rij[1]*rjl[1])+(rij[2]*rjl[2])) /
         (rijmag*rjlmag);
       cosijl = MIN(cosijl,1.0);
       cosijl = MAX(cosijl,-1.0);
 
       // evaluate splines g and derivatives dg
 
       g = gSpline(cosijl,NjiC+NjiH,jtype,&dgdc,&dgdN);
       Etmp += (wjl*g*exp(lamdaijl));
       tmp3 += (wjl*dgdN*exp(lamdaijl));
       NconjtmpJ = NconjtmpJ+(kronecker(ltype,0)*wjl*Sp(Nlj,Nmin,Nmax,dS));
     }
   }
 
   PjiS = 0.0;
   dN2PJI[0] = 0.0;
   dN2PJI[1] = 0.0;
   PjiS = PijSpline(NjiC,NjiH,jtype,itype,dN2PJI);
   pji = pow(1.0+Etmp+PjiS,-0.5);
   tmppji = -.5*cube(pji);
   tmp3pji = tmp3;
 
   // evaluate Nij conj
 
   Nijconj = 1.0+(NconjtmpI*NconjtmpI)+(NconjtmpJ*NconjtmpJ);
   piRC = piRCSpline(NijC+NijH,NjiC+NjiH,Nijconj,itype,jtype,dN3piRC);
   Tij = 0.0;
   dN3Tij[0] = 0.0;
   dN3Tij[1] = 0.0;
   dN3Tij[2] = 0.0;
   if (itype == 0 && jtype == 0)
     Tij=TijSpline((NijC+NijH),(NjiC+NjiH),Nijconj,dN3Tij);
 
   Etmp = 0.0;
   if (fabs(Tij) > TOL) {
     REBO_neighs_i = REBO_firstneigh[i];
     for (k = 0; k < REBO_numneigh[i]; k++) {
       atomk = REBO_neighs_i[k];
       ktype = map[type[atomk]];
       if (atomk != atomj) {
         rik[0] = x[atomi][0]-x[atomk][0];
         rik[1] = x[atomi][1]-x[atomk][1];
         rik[2] = x[atomi][2]-x[atomk][2];
         rikmag = sqrt((rik[0]*rik[0])+(rik[1]*rik[1])+(rik[2]*rik[2]));
         cos321 = ((rij[0]*rik[0])+(rij[1]*rik[1])+(rij[2]*rik[2])) /
           (rijmag*rikmag);
         cos321 = MIN(cos321,1.0);
         cos321 = MAX(cos321,-1.0);
 
         rjk[0] = rik[0]-rij[0];
         rjk[1] = rik[1]-rij[1];
         rjk[2] = rik[2]-rij[2];
         rjk2 = (rjk[0]*rjk[0])+(rjk[1]*rjk[1])+(rjk[2]*rjk[2]);
         rij2 = rijmag*rijmag;
         rik2 = rikmag*rikmag;
         costmp = 0.5*(rij2+rik2-rjk2)/rijmag/rikmag;
         tspjik = Sp2(costmp,thmin,thmax,dtsjik);
 
         if (sqrt(1.0 - cos321*cos321) > sqrt(TOL)) {
           wik = Sp(rikmag,rcmin[itype][ktype],rcmaxp[itype][ktype],dwik);
           REBO_neighs_j = REBO_firstneigh[j];
           for (l = 0; l < REBO_numneigh[j]; l++) {
             atoml = REBO_neighs_j[l];
             ltype = map[type[atoml]];
             if (!(atoml == atomi || atoml == atomk)) {
               rjl[0] = x[atomj][0]-x[atoml][0];
               rjl[1] = x[atomj][1]-x[atoml][1];
               rjl[2] = x[atomj][2]-x[atoml][2];
               rjlmag = sqrt(rjl[0]*rjl[0] + rjl[1]*rjl[1] + rjl[2]*rjl[2]);
               cos234 = -((rij[0]*rjl[0])+(rij[1]*rjl[1])+(rij[2]*rjl[2])) /
                 (rijmag*rjlmag);
               cos234 = MIN(cos234,1.0);
               cos234 = MAX(cos234,-1.0);
 
               ril[0] = rij[0]+rjl[0];
               ril[1] = rij[1]+rjl[1];
               ril[2] = rij[2]+rjl[2];
               ril2 = (ril[0]*ril[0])+(ril[1]*ril[1])+(ril[2]*ril[2]);
               rijrjl = 2.0*rijmag*rjlmag;
               rjl2 = rjlmag*rjlmag;
               costmp = 0.5*(rij2+rjl2-ril2)/rijmag/rjlmag;
               tspijl = Sp2(costmp,thmin,thmax,dtsijl);
 
               if (sqrt(1.0 - cos234*cos234) > sqrt(TOL)) {
                 wjl = Sp(rjlmag,rcmin[jtype][ltype],rcmaxp[jtype][ltype],dS);
                 crosskij[0] = (rij[1]*rik[2]-rij[2]*rik[1]);
                 crosskij[1] = (rij[2]*rik[0]-rij[0]*rik[2]);
                 crosskij[2] = (rij[0]*rik[1]-rij[1]*rik[0]);
                 crosskijmag = sqrt(crosskij[0]*crosskij[0] +
                                    crosskij[1]*crosskij[1] +
                                    crosskij[2]*crosskij[2]);
                 crossijl[0] = (rij[1]*rjl[2]-rij[2]*rjl[1]);
                 crossijl[1] = (rij[2]*rjl[0]-rij[0]*rjl[2]);
                 crossijl[2] = (rij[0]*rjl[1]-rij[1]*rjl[0]);
                 crossijlmag = sqrt(crossijl[0]*crossijl[0] +
                                    crossijl[1]*crossijl[1] +
                                    crossijl[2]*crossijl[2]);
                 omkijl = -1.0*(((crosskij[0]*crossijl[0]) +
                                 (crosskij[1]*crossijl[1]) +
                                 (crosskij[2]*crossijl[2])) /
                                (crosskijmag*crossijlmag));
                 Etmp += ((1.0-square(omkijl))*wik*wjl) *
                   (1.0-tspjik)*(1.0-tspijl);
               }
             }
           }
         }
       }
     }
   }
 
   bij = (.5*(pij+pji))+piRC+(Tij*Etmp);
   Stb = Sp2(bij,bLJmin[itype][jtype],bLJmax[itype][jtype],dStb);
   VA = VA*dStb;
 
   if (dStb != 0.0) {
     tmp = tmppij;
     dN2[0] = dN2PIJ[0];
     dN2[1] = dN2PIJ[1];
     tmp3 = tmp3pij;
 
     // pij forces
 
     REBO_neighs_i = REBO_firstneigh[i];
     for (k = 0; k < REBO_numneigh[i]; k++) {
       atomk = REBO_neighs_i[k];
       if (atomk != atomj) {
         lamdajik = 0.0;
         rik[0] = x[atomi][0]-x[atomk][0];
         rik[1] = x[atomi][1]-x[atomk][1];
         rik[2] = x[atomi][2]-x[atomk][2];
         rikmag = sqrt(rik[0]*rik[0] + rik[1]*rik[1] + rik[2]*rik[2]);
         lamdajik = 4.0*kronecker(itype,1) *
           ((rho[ktype][1]-rikmag)-(rho[jtype][1]-rijmag));
         wik = Sp(rikmag,rcmin[itype][ktype],rcmax[itype][ktype],dwik);
         cosjik = ((rij[0]*rik[0])+(rij[1]*rik[1])+(rij[2]*rik[2])) /
           (rijmag*rikmag);
         cosjik = MIN(cosjik,1.0);
         cosjik = MAX(cosjik,-1.0);
 
         dcosjikdri[0] = ((rij[0]+rik[0])/(rijmag*rikmag)) -
           (cosjik*((rij[0]/(rijmag*rijmag))+(rik[0]/(rikmag*rikmag))));
         dcosjikdri[1] = ((rij[1]+rik[1])/(rijmag*rikmag)) -
           (cosjik*((rij[1]/(rijmag*rijmag))+(rik[1]/(rikmag*rikmag))));
         dcosjikdri[2] = ((rij[2]+rik[2])/(rijmag*rikmag)) -
           (cosjik*((rij[2]/(rijmag*rijmag))+(rik[2]/(rikmag*rikmag))));
         dcosjikdrk[0] = (-rij[0]/(rijmag*rikmag)) +
           (cosjik*(rik[0]/(rikmag*rikmag)));
         dcosjikdrk[1] = (-rij[1]/(rijmag*rikmag)) +
           (cosjik*(rik[1]/(rikmag*rikmag)));
         dcosjikdrk[2] = (-rij[2]/(rijmag*rikmag)) +
           (cosjik*(rik[2]/(rikmag*rikmag)));
         dcosjikdrj[0] = (-rik[0]/(rijmag*rikmag)) +
           (cosjik*(rij[0]/(rijmag*rijmag)));
         dcosjikdrj[1] = (-rik[1]/(rijmag*rikmag)) +
           (cosjik*(rij[1]/(rijmag*rijmag)));
         dcosjikdrj[2] = (-rik[2]/(rijmag*rikmag)) +
           (cosjik*(rij[2]/(rijmag*rijmag)));
 
         g = gSpline(cosjik,(NijC+NijH),itype,&dgdc,&dgdN);
 
         tmp2 = VA*.5*(tmp*wik*dgdc*exp(lamdajik));
         fj[0] = -tmp2*dcosjikdrj[0];
         fj[1] = -tmp2*dcosjikdrj[1];
         fj[2] = -tmp2*dcosjikdrj[2];
         fi[0] = -tmp2*dcosjikdri[0];
         fi[1] = -tmp2*dcosjikdri[1];
         fi[2] = -tmp2*dcosjikdri[2];
         fk[0] = -tmp2*dcosjikdrk[0];
         fk[1] = -tmp2*dcosjikdrk[1];
         fk[2] = -tmp2*dcosjikdrk[2];
 
         tmp2 = VA*.5*(tmp*wik*g*exp(lamdajik)*4.0*kronecker(itype,1));
         fj[0] -= tmp2*(-rij[0]/rijmag);
         fj[1] -= tmp2*(-rij[1]/rijmag);
         fj[2] -= tmp2*(-rij[2]/rijmag);
         fi[0] -= tmp2*((-rik[0]/rikmag)+(rij[0]/rijmag));
         fi[1] -= tmp2*((-rik[1]/rikmag)+(rij[1]/rijmag));
         fi[2] -= tmp2*((-rik[2]/rikmag)+(rij[2]/rijmag));
         fk[0] -= tmp2*(rik[0]/rikmag);
         fk[1] -= tmp2*(rik[1]/rikmag);
         fk[2] -= tmp2*(rik[2]/rikmag);
 
         // coordination forces
 
         // dwik forces
 
         tmp2 = VA*.5*(tmp*dwik*g*exp(lamdajik))/rikmag;
         fi[0] -= tmp2*rik[0];
         fi[1] -= tmp2*rik[1];
         fi[2] -= tmp2*rik[2];
         fk[0] += tmp2*rik[0];
         fk[1] += tmp2*rik[1];
         fk[2] += tmp2*rik[2];
 
         // PIJ forces
 
         tmp2 = VA*.5*(tmp*dN2[ktype]*dwik)/rikmag;
         fi[0] -= tmp2*rik[0];
         fi[1] -= tmp2*rik[1];
         fi[2] -= tmp2*rik[2];
         fk[0] += tmp2*rik[0];
         fk[1] += tmp2*rik[1];
         fk[2] += tmp2*rik[2];
 
         // dgdN forces
 
         tmp2 = VA*.5*(tmp*tmp3*dwik)/rikmag;
         fi[0] -= tmp2*rik[0];
         fi[1] -= tmp2*rik[1];
         fi[2] -= tmp2*rik[2];
         fk[0] += tmp2*rik[0];
         fk[1] += tmp2*rik[1];
         fk[2] += tmp2*rik[2];
 
         f[atomi][0] += fi[0]; f[atomi][1] += fi[1]; f[atomi][2] += fi[2];
         f[atomj][0] += fj[0]; f[atomj][1] += fj[1]; f[atomj][2] += fj[2];
         f[atomk][0] += fk[0]; f[atomk][1] += fk[1]; f[atomk][2] += fk[2];
 
         if (vflag_atom) {
           rji[0] = -rij[0]; rji[1] = -rij[1]; rji[2] = -rij[2];
           rki[0] = -rik[0]; rki[1] = -rik[1]; rki[2] = -rik[2];
           v_tally3(atomi,atomj,atomk,fj,fk,rji,rki);
         }
       }
     }
 
     tmp = tmppji;
     tmp3 = tmp3pji;
     dN2[0] = dN2PJI[0];
     dN2[1] = dN2PJI[1];
     REBO_neighs  =  REBO_firstneigh[j];
     for (l = 0; l < REBO_numneigh[j]; l++) {
       atoml = REBO_neighs[l];
       if (atoml !=atomi) {
         ltype = map[type[atoml]];
         rjl[0] = x[atomj][0]-x[atoml][0];
         rjl[1] = x[atomj][1]-x[atoml][1];
         rjl[2] = x[atomj][2]-x[atoml][2];
         rjlmag = sqrt((rjl[0]*rjl[0])+(rjl[1]*rjl[1])+(rjl[2]*rjl[2]));
         lamdaijl = 4.0*kronecker(jtype,1) *
           ((rho[ltype][1]-rjlmag)-(rho[itype][1]-rijmag));
         wjl = Sp(rjlmag,rcmin[jtype][ltype],rcmax[jtype][ltype],dwjl);
         cosijl = (-1.0*((rij[0]*rjl[0])+(rij[1]*rjl[1])+(rij[2]*rjl[2]))) /
           (rijmag*rjlmag);
         cosijl = MIN(cosijl,1.0);
         cosijl = MAX(cosijl,-1.0);
 
         dcosijldri[0] = (-rjl[0]/(rijmag*rjlmag)) -
           (cosijl*rij[0]/(rijmag*rijmag));
         dcosijldri[1] = (-rjl[1]/(rijmag*rjlmag)) -
           (cosijl*rij[1]/(rijmag*rijmag));
         dcosijldri[2] = (-rjl[2]/(rijmag*rjlmag)) -
           (cosijl*rij[2]/(rijmag*rijmag));
         dcosijldrj[0] = ((-rij[0]+rjl[0])/(rijmag*rjlmag)) +
           (cosijl*((rij[0]/square(rijmag))-(rjl[0]/(rjlmag*rjlmag))));
         dcosijldrj[1] = ((-rij[1]+rjl[1])/(rijmag*rjlmag)) +
           (cosijl*((rij[1]/square(rijmag))-(rjl[1]/(rjlmag*rjlmag))));
         dcosijldrj[2] = ((-rij[2]+rjl[2])/(rijmag*rjlmag)) +
           (cosijl*((rij[2]/square(rijmag))-(rjl[2]/(rjlmag*rjlmag))));
         dcosijldrl[0] = (rij[0]/(rijmag*rjlmag)) +
           (cosijl*rjl[0]/(rjlmag*rjlmag));
         dcosijldrl[1] = (rij[1]/(rijmag*rjlmag)) +
           (cosijl*rjl[1]/(rjlmag*rjlmag));
         dcosijldrl[2] = (rij[2]/(rijmag*rjlmag)) +
           (cosijl*rjl[2]/(rjlmag*rjlmag));
 
         // evaluate splines g and derivatives dg
 
         g = gSpline(cosijl,NjiC+NjiH,jtype,&dgdc,&dgdN);
         tmp2 = VA*.5*(tmp*wjl*dgdc*exp(lamdaijl));
         fi[0] = -tmp2*dcosijldri[0];
         fi[1] = -tmp2*dcosijldri[1];
         fi[2] = -tmp2*dcosijldri[2];
         fj[0] = -tmp2*dcosijldrj[0];
         fj[1] = -tmp2*dcosijldrj[1];
         fj[2] = -tmp2*dcosijldrj[2];
         fl[0] = -tmp2*dcosijldrl[0];
         fl[1] = -tmp2*dcosijldrl[1];
         fl[2] = -tmp2*dcosijldrl[2];
 
         tmp2 = VA*.5*(tmp*wjl*g*exp(lamdaijl)*4.0*kronecker(jtype,1));
         fi[0] -= tmp2*(rij[0]/rijmag);
         fi[1] -= tmp2*(rij[1]/rijmag);
         fi[2] -= tmp2*(rij[2]/rijmag);
         fj[0] -= tmp2*((-rjl[0]/rjlmag)-(rij[0]/rijmag));
         fj[1] -= tmp2*((-rjl[1]/rjlmag)-(rij[1]/rijmag));
         fj[2] -= tmp2*((-rjl[2]/rjlmag)-(rij[2]/rijmag));
         fl[0] -= tmp2*(rjl[0]/rjlmag);
         fl[1] -= tmp2*(rjl[1]/rjlmag);
         fl[2] -= tmp2*(rjl[2]/rjlmag);
 
          // coordination forces
         // dwik forces
 
         tmp2 = VA*.5*(tmp*dwjl*g*exp(lamdaijl))/rjlmag;
         fj[0] -= tmp2*rjl[0];
         fj[1] -= tmp2*rjl[1];
         fj[2] -= tmp2*rjl[2];
         fl[0] += tmp2*rjl[0];
         fl[1] += tmp2*rjl[1];
         fl[2] += tmp2*rjl[2];
 
         // PIJ forces
 
         tmp2 = VA*.5*(tmp*dN2[ltype]*dwjl)/rjlmag;
         fj[0] -= tmp2*rjl[0];
         fj[1] -= tmp2*rjl[1];
         fj[2] -= tmp2*rjl[2];
         fl[0] += tmp2*rjl[0];
         fl[1] += tmp2*rjl[1];
         fl[2] += tmp2*rjl[2];
 
         // dgdN forces
 
         tmp2=VA*.5*(tmp*tmp3*dwjl)/rjlmag;
         fj[0] -= tmp2*rjl[0];
         fj[1] -= tmp2*rjl[1];
         fj[2] -= tmp2*rjl[2];
         fl[0] += tmp2*rjl[0];
         fl[1] += tmp2*rjl[1];
         fl[2] += tmp2*rjl[2];
 
         f[atomi][0] += fi[0]; f[atomi][1] += fi[1]; f[atomi][2] += fi[2];
         f[atomj][0] += fj[0]; f[atomj][1] += fj[1]; f[atomj][2] += fj[2];
         f[atoml][0] += fl[0]; f[atoml][1] += fl[1]; f[atoml][2] += fl[2];
 
         if (vflag_atom) {
           rlj[0] = -rjl[0]; rlj[1] = -rjl[1]; rlj[2] = -rjl[2];
           v_tally3(atomi,atomj,atoml,fi,fl,rij,rlj);
         }
       }
     }
 
     // piRC forces
 
     dN3[0] = dN3piRC[0];
     dN3[1] = dN3piRC[1];
     dN3[2] = dN3piRC[2];
 
     REBO_neighs_i = REBO_firstneigh[i];
     for (k = 0; k < REBO_numneigh[i]; k++) {
       atomk = REBO_neighs_i[k];
       if (atomk != atomj) {
         ktype = map[type[atomk]];
         rik[0] = x[atomi][0]-x[atomk][0];
         rik[1] = x[atomi][1]-x[atomk][1];
         rik[2] = x[atomi][2]-x[atomk][2];
         rikmag = sqrt((rik[0]*rik[0])+(rik[1]*rik[1])+(rik[2]*rik[2]));
         wik = Sp(rikmag,rcmin[itype][ktype],rcmax[itype][ktype],dwik);
         Nki = nC[atomk]-(wik*kronecker(itype,0))+nH[atomk] -
           (wik*kronecker(itype,1));
         SpN = Sp(Nki,Nmin,Nmax,dNki);
 
         tmp2 = VA*dN3[0]*dwik/rikmag;
         f[atomi][0] -= tmp2*rik[0];
         f[atomi][1] -= tmp2*rik[1];
         f[atomi][2] -= tmp2*rik[2];
         f[atomk][0] += tmp2*rik[0];
         f[atomk][1] += tmp2*rik[1];
         f[atomk][2] += tmp2*rik[2];
 
         if (vflag_atom) v_tally2(atomi,atomk,-tmp2,rik);
 
         tmp2 = VA*dN3[2]*(2.0*NconjtmpI*dwik*SpN)/rikmag;
         f[atomi][0] -= tmp2*rik[0];
         f[atomi][1] -= tmp2*rik[1];
         f[atomi][2] -= tmp2*rik[2];
         f[atomk][0] += tmp2*rik[0];
         f[atomk][1] += tmp2*rik[1];
         f[atomk][2] += tmp2*rik[2];
 
         if (vflag_atom) v_tally2(atomi,atomk,-tmp2,rik);
 
         if (fabs(dNki) > TOL) {
           REBO_neighs_k = REBO_firstneigh[atomk];
           for (n = 0; n < REBO_numneigh[atomk]; n++) {
             atomn = REBO_neighs_k[n];
             if (atomn != atomi) {
               ntype = map[type[atomn]];
               rkn[0] = x[atomk][0]-x[atomn][0];
               rkn[1] = x[atomk][1]-x[atomn][1];
               rkn[2] = x[atomk][2]-x[atomn][2];
               rknmag = sqrt((rkn[0]*rkn[0])+(rkn[1]*rkn[1])+(rkn[2]*rkn[2]));
               Sp(rknmag,rcmin[ktype][ntype],rcmax[ktype][ntype],dwkn);
 
               tmp2 = VA*dN3[2]*(2.0*NconjtmpI*wik*dNki*dwkn)/rknmag;
               f[atomk][0] -= tmp2*rkn[0];
               f[atomk][1] -= tmp2*rkn[1];
               f[atomk][2] -= tmp2*rkn[2];
               f[atomn][0] += tmp2*rkn[0];
               f[atomn][1] += tmp2*rkn[1];
               f[atomn][2] += tmp2*rkn[2];
 
               if (vflag_atom) v_tally2(atomk,atomn,-tmp2,rkn);
             }
           }
         }
       }
     }
 
     // piRC forces to J side
 
     REBO_neighs = REBO_firstneigh[j];
     for (l = 0; l < REBO_numneigh[j]; l++) {
       atoml = REBO_neighs[l];
       if (atoml != atomi) {
         ltype = map[type[atoml]];
         rjl[0] = x[atomj][0]-x[atoml][0];
         rjl[1] = x[atomj][1]-x[atoml][1];
         rjl[2] = x[atomj][2]-x[atoml][2];
         rjlmag = sqrt((rjl[0]*rjl[0])+(rjl[1]*rjl[1])+(rjl[2]*rjl[2]));
         wjl = Sp(rjlmag,rcmin[jtype][ltype],rcmax[jtype][ltype],dwjl);
         Nlj = nC[atoml]-(wjl*kronecker(jtype,0))+nH[atoml] -
           (wjl*kronecker(jtype,1));
         SpN = Sp(Nlj,Nmin,Nmax,dNlj);
 
         tmp2 = VA*dN3[1]*dwjl/rjlmag;
         f[atomj][0] -= tmp2*rjl[0];
         f[atomj][1] -= tmp2*rjl[1];
         f[atomj][2] -= tmp2*rjl[2];
         f[atoml][0] += tmp2*rjl[0];
         f[atoml][1] += tmp2*rjl[1];
         f[atoml][2] += tmp2*rjl[2];
 
         if (vflag_atom) v_tally2(atomj,atoml,-tmp2,rjl);
 
         tmp2 = VA*dN3[2]*(2.0*NconjtmpJ*dwjl*SpN)/rjlmag;
         f[atomj][0] -= tmp2*rjl[0];
         f[atomj][1] -= tmp2*rjl[1];
         f[atomj][2] -= tmp2*rjl[2];
         f[atoml][0] += tmp2*rjl[0];
         f[atoml][1] += tmp2*rjl[1];
         f[atoml][2] += tmp2*rjl[2];
 
         if (vflag_atom) v_tally2(atomj,atoml,-tmp2,rjl);
 
         if (fabs(dNlj) > TOL) {
           REBO_neighs_l = REBO_firstneigh[atoml];
           for (n = 0; n < REBO_numneigh[atoml]; n++) {
             atomn = REBO_neighs_l[n];
             if (atomn != atomj) {
               ntype = map[type[atomn]];
               rln[0] = x[atoml][0]-x[atomn][0];
               rln[1] = x[atoml][1]-x[atomn][1];
               rln[2] = x[atoml][2]-x[atomn][2];
               rlnmag = sqrt((rln[0]*rln[0])+(rln[1]*rln[1])+(rln[2]*rln[2]));
               Sp(rlnmag,rcmin[ltype][ntype],rcmax[ltype][ntype],dwln);
 
               tmp2 = VA*dN3[2]*(2.0*NconjtmpJ*wjl*dNlj*dwln)/rlnmag;
               f[atoml][0] -= tmp2*rln[0];
               f[atoml][1] -= tmp2*rln[1];
               f[atoml][2] -= tmp2*rln[2];
               f[atomn][0] += tmp2*rln[0];
               f[atomn][1] += tmp2*rln[1];
               f[atomn][2] += tmp2*rln[2];
 
               if (vflag_atom) v_tally2(atoml,atomn,-tmp2,rln);
             }
           }
         }
       }
     }
 
     if (fabs(Tij) > TOL) {
       dN3[0] = dN3Tij[0];
       dN3[1] = dN3Tij[1];
       dN3[2] = dN3Tij[2];
       atom2 = atomi;
       atom3 = atomj;
       r32[0] = x[atom3][0]-x[atom2][0];
       r32[1] = x[atom3][1]-x[atom2][1];
       r32[2] = x[atom3][2]-x[atom2][2];
       r32mag = sqrt((r32[0]*r32[0])+(r32[1]*r32[1])+(r32[2]*r32[2]));
       r23[0] = -r32[0];
       r23[1] = -r32[1];
       r23[2] = -r32[2];
       r23mag = r32mag;
 
       REBO_neighs_i = REBO_firstneigh[i];
       for (k = 0; k < REBO_numneigh[i]; k++) {
         atomk = REBO_neighs_i[k];
         atom1 = atomk;
         ktype = map[type[atomk]];
         if (atomk != atomj) {
           r21[0] = x[atom2][0]-x[atom1][0];
           r21[1] = x[atom2][1]-x[atom1][1];
           r21[2] = x[atom2][2]-x[atom1][2];
           r21mag = sqrt(r21[0]*r21[0] + r21[1]*r21[1] + r21[2]*r21[2]);
           cos321 = ((r21[0]*rij[0])+(r21[1]*rij[1])+(r21[2]*rij[2])) /
             (r21mag*rijmag);
           cos321 = MIN(cos321,1.0);
           cos321 = MAX(cos321,-1.0);
           sin321 = sqrt(1.0 - cos321*cos321);
           sink2i = 1.0/(sin321*sin321);
           rik2i = 1.0/(r21mag*r21mag);
 
           if (sin321 != 0.0) {
             rr = (rijmag*rijmag)-(r21mag*r21mag);
             rjk[0] = r21[0]-rij[0];
             rjk[1] = r21[1]-rij[1];
             rjk[2] = r21[2]-rij[2];
             rjk2 = (rjk[0]*rjk[0])+(rjk[1]*rjk[1])+(rjk[2]*rjk[2]);
             rijrik = 2.0*rijmag*r21mag;
             rik2 = r21mag*r21mag;
             dctik = (-rr+rjk2)/(rijrik*rik2);
             dctij = (rr+rjk2)/(rijrik*rijmag*rijmag);
             dctjk = -2.0/rijrik;
             w21 = Sp(r21mag,rcmin[itype][ktype],rcmaxp[itype][ktype],dw21);
             rikmag = r21mag;
             rij2 = r32mag*r32mag;
             rik2 = r21mag*r21mag;
             costmp = 0.5*(rij2+rik2-rjk2)/rijmag/rikmag;
             tspjik = Sp2(costmp,thmin,thmax,dtsjik);
             dtsjik = -dtsjik;
 
             REBO_neighs_j = REBO_firstneigh[j];
             for (l = 0; l < REBO_numneigh[j]; l++) {
               atoml = REBO_neighs_j[l];
               atom4 = atoml;
               ltype = map[type[atoml]];
               if (!(atoml == atomi || atoml == atomk)) {
                 r34[0] = x[atom3][0]-x[atom4][0];
                 r34[1] = x[atom3][1]-x[atom4][1];
                 r34[2] = x[atom3][2]-x[atom4][2];
                 r34mag = sqrt(r34[0]*r34[0] + r34[1]*r34[1] + r34[2]*r34[2]);
                 cos234 = -1.0*((rij[0]*r34[0])+(rij[1]*r34[1]) +
                                (rij[2]*r34[2]))/(rijmag*r34mag);
                 cos234 = MIN(cos234,1.0);
                 cos234 = MAX(cos234,-1.0);
                 sin234 = sqrt(1.0 - cos234*cos234);
                 sinl2i = 1.0/(sin234*sin234);
                 rjl2i = 1.0/(r34mag*r34mag);
 
                 if (sin234 != 0.0) {
                   w34 = Sp(r34mag,rcmin[jtype][ltype],
                            rcmaxp[jtype][ltype],dw34);
                   rr = (r23mag*r23mag)-(r34mag*r34mag);
                   ril[0] = r23[0]+r34[0];
                   ril[1] = r23[1]+r34[1];
                   ril[2] = r23[2]+r34[2];
                   ril2 = (ril[0]*ril[0])+(ril[1]*ril[1])+(ril[2]*ril[2]);
                   rijrjl = 2.0*r23mag*r34mag;
                   rjl2 = r34mag*r34mag;
                   dctjl = (-rr+ril2)/(rijrjl*rjl2);
                   dctji = (rr+ril2)/(rijrjl*r23mag*r23mag);
                   dctil = -2.0/rijrjl;
                   rjlmag = r34mag;
                   rjl2 = r34mag*r34mag;
                   costmp = 0.5*(rij2+rjl2-ril2)/rijmag/rjlmag;
                   tspijl = Sp2(costmp,thmin,thmax,dtsijl);
                   dtsijl = -dtsijl; //need minus sign
                   prefactor = VA*Tij;
 
                   cross321[0] = (r32[1]*r21[2])-(r32[2]*r21[1]);
                   cross321[1] = (r32[2]*r21[0])-(r32[0]*r21[2]);
                   cross321[2] = (r32[0]*r21[1])-(r32[1]*r21[0]);
                   cross234[0] = (r23[1]*r34[2])-(r23[2]*r34[1]);
                   cross234[1] = (r23[2]*r34[0])-(r23[0]*r34[2]);
                   cross234[2] = (r23[0]*r34[1])-(r23[1]*r34[0]);
 
                   cwnum = (cross321[0]*cross234[0]) +
                     (cross321[1]*cross234[1])+(cross321[2]*cross234[2]);
                   cwnom = r21mag*r34mag*r23mag*r23mag*sin321*sin234;
                   om1234 = cwnum/cwnom;
                   cw = om1234;
                   Etmp += ((1.0-square(om1234))*w21*w34) *
                     (1.0-tspjik)*(1.0-tspijl);
 
                   dt1dik = (rik2i)-(dctik*sink2i*cos321);
                   dt1djk = (-dctjk*sink2i*cos321);
                   dt1djl = (rjl2i)-(dctjl*sinl2i*cos234);
                   dt1dil = (-dctil*sinl2i*cos234);
                   dt1dij = (2.0/(r23mag*r23mag)) -
                     (dctij*sink2i*cos321)-(dctji*sinl2i*cos234);
 
                   dt2dik[0] = (-r23[2]*cross234[1])+(r23[1]*cross234[2]);
                   dt2dik[1] = (-r23[0]*cross234[2])+(r23[2]*cross234[0]);
                   dt2dik[2] = (-r23[1]*cross234[0])+(r23[0]*cross234[1]);
 
                   dt2djl[0] = (-r23[1]*cross321[2])+(r23[2]*cross321[1]);
                   dt2djl[1] = (-r23[2]*cross321[0])+(r23[0]*cross321[2]);
                   dt2djl[2] = (-r23[0]*cross321[1])+(r23[1]*cross321[0]);
 
                   dt2dij[0] = (r21[2]*cross234[1]) -
                     (r34[2]*cross321[1])-(r21[1]*cross234[2]) +
                     (r34[1]*cross321[2]);
                   dt2dij[1] = (r21[0]*cross234[2]) -
                     (r34[0]*cross321[2])-(r21[2]*cross234[0]) +
                     (r34[2]*cross321[0]);
                   dt2dij[2] = (r21[1]*cross234[0]) -
                     (r34[1]*cross321[0])-(r21[0]*cross234[1]) +
                     (r34[0]*cross321[1]);
 
                   aa = (prefactor*2.0*cw/cwnom)*w21*w34 *
                     (1.0-tspjik)*(1.0-tspijl);
                   aaa1 = -prefactor*(1.0-square(om1234)) *
                     (1.0-tspjik)*(1.0-tspijl);
                   aaa2 = aaa1*w21*w34;
                   at2 = aa*cwnum;
 
                   fcijpc = (-dt1dij*at2)+(aaa2*dtsjik*dctij*(1.0-tspijl)) +
                     (aaa2*dtsijl*dctji*(1.0-tspjik));
                   fcikpc = (-dt1dik*at2)+(aaa2*dtsjik*dctik*(1.0-tspijl));
                   fcjlpc = (-dt1djl*at2)+(aaa2*dtsijl*dctjl*(1.0-tspjik));
                   fcjkpc = (-dt1djk*at2)+(aaa2*dtsjik*dctjk*(1.0-tspijl));
                   fcilpc = (-dt1dil*at2)+(aaa2*dtsijl*dctil*(1.0-tspjik));
 
                   F23[0] = (fcijpc*r23[0])+(aa*dt2dij[0]);
                   F23[1] = (fcijpc*r23[1])+(aa*dt2dij[1]);
                   F23[2] = (fcijpc*r23[2])+(aa*dt2dij[2]);
 
                   F12[0] = (fcikpc*r21[0])+(aa*dt2dik[0]);
                   F12[1] = (fcikpc*r21[1])+(aa*dt2dik[1]);
                   F12[2] = (fcikpc*r21[2])+(aa*dt2dik[2]);
 
                   F34[0] = (fcjlpc*r34[0])+(aa*dt2djl[0]);
                   F34[1] = (fcjlpc*r34[1])+(aa*dt2djl[1]);
                   F34[2] = (fcjlpc*r34[2])+(aa*dt2djl[2]);
 
                   F31[0] = (fcjkpc*rjk[0]);
                   F31[1] = (fcjkpc*rjk[1]);
                   F31[2] = (fcjkpc*rjk[2]);
 
                   F24[0] = (fcilpc*ril[0]);
                   F24[1] = (fcilpc*ril[1]);
                   F24[2] = (fcilpc*ril[2]);
 
                   f1[0] = -F12[0]-F31[0];
                   f1[1] = -F12[1]-F31[1];
                   f1[2] = -F12[2]-F31[2];
                   f2[0] = F23[0]+F12[0]+F24[0];
                   f2[1] = F23[1]+F12[1]+F24[1];
                   f2[2] = F23[2]+F12[2]+F24[2];
                   f3[0] = -F23[0]+F34[0]+F31[0];
                   f3[1] = -F23[1]+F34[1]+F31[1];
                   f3[2] = -F23[2]+F34[2]+F31[2];
                   f4[0] = -F34[0]-F24[0];
                   f4[1] = -F34[1]-F24[1];
                   f4[2] = -F34[2]-F24[2];
 
                   // coordination forces
 
                   tmp2 = VA*Tij*((1.0-(om1234*om1234))) *
                     (1.0-tspjik)*(1.0-tspijl)*dw21*w34/r21mag;
                   f2[0] -= tmp2*r21[0];
                   f2[1] -= tmp2*r21[1];
                   f2[2] -= tmp2*r21[2];
                   f1[0] += tmp2*r21[0];
                   f1[1] += tmp2*r21[1];
                   f1[2] += tmp2*r21[2];
 
                   tmp2 = VA*Tij*((1.0-(om1234*om1234))) *
                     (1.0-tspjik)*(1.0-tspijl)*w21*dw34/r34mag;
                   f3[0] -= tmp2*r34[0];
                   f3[1] -= tmp2*r34[1];
                   f3[2] -= tmp2*r34[2];
                   f4[0] += tmp2*r34[0];
                   f4[1] += tmp2*r34[1];
                   f4[2] += tmp2*r34[2];
 
                   f[atom1][0] += f1[0]; f[atom1][1] += f1[1];
                   f[atom1][2] += f1[2];
                   f[atom2][0] += f2[0]; f[atom2][1] += f2[1];
                   f[atom2][2] += f2[2];
                   f[atom3][0] += f3[0]; f[atom3][1] += f3[1];
                   f[atom3][2] += f3[2];
                   f[atom4][0] += f4[0]; f[atom4][1] += f4[1];
                   f[atom4][2] += f4[2];
 
                   if (vflag_atom) {
                     r13[0] = -rjk[0]; r13[1] = -rjk[1]; r13[2] = -rjk[2];
                     r43[0] = -r34[0]; r43[1] = -r34[1]; r43[2] = -r34[2];
                     v_tally4(atom1,atom2,atom3,atom4,f1,f2,f4,r13,r23,r43);
                   }
                 }
               }
             }
           }
         }
       }
 
       REBO_neighs = REBO_firstneigh[i];
       for (k = 0; k < REBO_numneigh[i]; k++) {
         atomk = REBO_neighs[k];
         if (atomk != atomj) {
           ktype = map[type[atomk]];
           rik[0] = x[atomi][0]-x[atomk][0];
           rik[1] = x[atomi][1]-x[atomk][1];
           rik[2] = x[atomi][2]-x[atomk][2];
           rikmag = sqrt((rik[0]*rik[0])+(rik[1]*rik[1])+(rik[2]*rik[2]));
           wik = Sp(rikmag,rcmin[itype][ktype],rcmax[itype][ktype],dwik);
           Nki = nC[atomk]-(wik*kronecker(itype,0))+nH[atomk] -
             (wik*kronecker(itype,1));
           SpN = Sp(Nki,Nmin,Nmax,dNki);
 
           tmp2 = VA*dN3[0]*dwik*Etmp/rikmag;
           f[atomi][0] -= tmp2*rik[0];
           f[atomi][1] -= tmp2*rik[1];
           f[atomi][2] -= tmp2*rik[2];
           f[atomk][0] += tmp2*rik[0];
           f[atomk][1] += tmp2*rik[1];
           f[atomk][2] += tmp2*rik[2];
 
           if (vflag_atom) v_tally2(atomi,atomk,-tmp2,rik);
 
           tmp2 = VA*dN3[2]*(2.0*NconjtmpI*dwik*SpN)*Etmp/rikmag;
           f[atomi][0] -= tmp2*rik[0];
           f[atomi][1] -= tmp2*rik[1];
           f[atomi][2] -= tmp2*rik[2];
           f[atomk][0] += tmp2*rik[0];
           f[atomk][1] += tmp2*rik[1];
           f[atomk][2] += tmp2*rik[2];
 
           if (vflag_atom) v_tally2(atomi,atomk,-tmp2,rik);
 
           if (fabs(dNki) > TOL) {
             REBO_neighs_k = REBO_firstneigh[atomk];
             for (n = 0; n < REBO_numneigh[atomk]; n++) {
               atomn = REBO_neighs_k[n];
               ntype = map[type[atomn]];
               if (atomn !=atomi) {
                 rkn[0] = x[atomk][0]-x[atomn][0];
                 rkn[1] = x[atomk][1]-x[atomn][1];
                 rkn[2] = x[atomk][2]-x[atomn][2];
                 rknmag = sqrt((rkn[0]*rkn[0])+(rkn[1]*rkn[1])+(rkn[2]*rkn[2]));
                 Sp(rknmag,rcmin[ktype][ntype],rcmax[ktype][ntype],dwkn);
 
                 tmp2 = VA*dN3[2]*(2.0*NconjtmpI*wik*dNki*dwkn)*Etmp/rknmag;
                 f[atomk][0] -= tmp2*rkn[0];
                 f[atomk][1] -= tmp2*rkn[1];
                 f[atomk][2] -= tmp2*rkn[2];
                 f[atomn][0] += tmp2*rkn[0];
                 f[atomn][1] += tmp2*rkn[1];
                 f[atomn][2] += tmp2*rkn[2];
 
                 if (vflag_atom) v_tally2(atomk,atomn,-tmp2,rkn);
               }
             }
           }
         }
       }
 
       // Tij forces
 
       REBO_neighs = REBO_firstneigh[j];
       for (l = 0; l < REBO_numneigh[j]; l++) {
         atoml = REBO_neighs[l];
         if (atoml != atomi) {
           ltype = map[type[atoml]];
           rjl[0] = x[atomj][0]-x[atoml][0];
           rjl[1] = x[atomj][1]-x[atoml][1];
           rjl[2] = x[atomj][2]-x[atoml][2];
           rjlmag = sqrt((rjl[0]*rjl[0])+(rjl[1]*rjl[1])+(rjl[2]*rjl[2]));
           wjl = Sp(rjlmag,rcmin[jtype][ltype],rcmax[jtype][ltype],dwjl);
           Nlj = nC[atoml]-(wjl*kronecker(jtype,0))+nH[atoml] -
             (wjl*kronecker(jtype,1));
           SpN = Sp(Nlj,Nmin,Nmax,dNlj);
 
           tmp2 = VA*dN3[1]*dwjl*Etmp/rjlmag;
           f[atomj][0] -= tmp2*rjl[0];
           f[atomj][1] -= tmp2*rjl[1];
           f[atomj][2] -= tmp2*rjl[2];
           f[atoml][0] += tmp2*rjl[0];
           f[atoml][1] += tmp2*rjl[1];
           f[atoml][2] += tmp2*rjl[2];
 
           if (vflag_atom) v_tally2(atomj,atoml,-tmp2,rjl);
 
           tmp2 = VA*dN3[2]*(2.0*NconjtmpJ*dwjl*SpN)*Etmp/rjlmag;
           f[atomj][0] -= tmp2*rjl[0];
           f[atomj][1] -= tmp2*rjl[1];
           f[atomj][2] -= tmp2*rjl[2];
           f[atoml][0] += tmp2*rjl[0];
           f[atoml][1] += tmp2*rjl[1];
           f[atoml][2] += tmp2*rjl[2];
 
           if (vflag_atom) v_tally2(atomj,atoml,-tmp2,rjl);
 
           if (fabs(dNlj) > TOL) {
             REBO_neighs_l = REBO_firstneigh[atoml];
             for (n = 0; n < REBO_numneigh[atoml]; n++) {
               atomn = REBO_neighs_l[n];
               ntype = map[type[atomn]];
               if (atomn != atomj) {
                 rln[0] = x[atoml][0]-x[atomn][0];
                 rln[1] = x[atoml][1]-x[atomn][1];
                 rln[2] = x[atoml][2]-x[atomn][2];
                 rlnmag = sqrt((rln[0]*rln[0])+(rln[1]*rln[1])+(rln[2]*rln[2]));
                 Sp(rlnmag,rcmin[ltype][ntype],rcmax[ltype][ntype],dwln);
 
                 tmp2 = VA*dN3[2]*(2.0*NconjtmpJ*wjl*dNlj*dwln)*Etmp/rlnmag;
                 f[atoml][0] -= tmp2*rln[0];
                 f[atoml][1] -= tmp2*rln[1];
                 f[atoml][2] -= tmp2*rln[2];
                 f[atomn][0] += tmp2*rln[0];
                 f[atomn][1] += tmp2*rln[1];
                 f[atomn][2] += tmp2*rln[2];
 
                 if (vflag_atom) v_tally2(atoml,atomn,-tmp2,rln);
               }
             }
           }
         }
       }
     }
   }
 
   return Stb;
 }
 
 /* ----------------------------------------------------------------------
    G spline
 ------------------------------------------------------------------------- */
 
 double PairAIREBO::gSpline(double costh, double Nij, int typei,
                            double *dgdc, double *dgdN)
 {
   double coeffs[6],dS,g1,g2,dg1,dg2,cut,g;
   int i,j;
 
   i = 0;
   j = 0;
   g = 0.0;
   cut = 0.0;
   dS = 0.0;
   dg1 = 0.0;
   dg2 = 0.0;
   *dgdc = 0.0;
   *dgdN = 0.0;
 
   // central atom is Carbon
 
   if (typei == 0) {
     if (costh < gCdom[0]) costh = gCdom[0];
     if (costh > gCdom[4]) costh = gCdom[4];
     if (Nij >= NCmax) {
       for (i = 0; i < 4; i++) {
         if (costh >= gCdom[i] && costh <= gCdom[i+1]) {
           for (j = 0; j < 6; j++) coeffs[j] = gC2[i][j];
         }
       }
       g2 = Sp5th(costh,coeffs,&dg2);
       g = g2;
       *dgdc = dg2;
       *dgdN = 0.0;
     }
     if (Nij <= NCmin) {
       for (i = 0; i < 4; i++) {
         if (costh >= gCdom[i] && costh <= gCdom[i+1]) {
           for (j = 0; j < 6; j++) coeffs[j] = gC1[i][j];
         }
       }
       g1 = Sp5th(costh,coeffs,&dg1);
       g = g1;
       *dgdc = dg1;
       *dgdN = 0.0;
     }
     if (Nij > NCmin && Nij < NCmax) {
       for (i = 0; i < 4; i++) {
         if (costh >= gCdom[i] && costh <= gCdom[i+1]) {
           for (j = 0; j < 6; j++) coeffs[j] = gC1[i][j];
         }
       }
       g1 = Sp5th(costh,coeffs,&dg1);
       for (i = 0; i < 4; i++) {
         if (costh >= gCdom[i] && costh <= gCdom[i+1]) {
           for (j = 0; j < 6; j++) coeffs[j] = gC2[i][j];
         }
       }
       g2 = Sp5th(costh,coeffs,&dg2);
       cut = Sp(Nij,NCmin,NCmax,dS);
       g = g2+cut*(g1-g2);
       *dgdc = dg2+(cut*(dg1-dg2));
       *dgdN = dS*(g1-g2);
     }
   }
 
   // central atom is Hydrogen
 
   if (typei == 1) {
     if (costh < gHdom[0]) costh = gHdom[0];
     if (costh > gHdom[3]) costh = gHdom[3];
     for (i = 0; i < 3; i++) {
       if (costh >= gHdom[i] && costh <= gHdom[i+1]) {
         for (j = 0; j < 6; j++) coeffs[j] = gH[i][j];
       }
     }
     g = Sp5th(costh,coeffs,&dg1);
     *dgdN = 0.0;
     *dgdc = dg1;
   }
 
   return g;
 }
 
 /* ----------------------------------------------------------------------
    Pij spline
 ------------------------------------------------------------------------- */
 
 double PairAIREBO::PijSpline(double NijC, double NijH, int typei, int typej,
                              double dN2[2])
 {
   int x,y,i,done;
   double Pij,coeffs[16];
 
   for (i = 0; i < 16; i++) coeffs[i]=0.0;
 
   x = 0;
   y = 0;
   dN2[0] = 0.0;
   dN2[1] = 0.0;
   done = 0;
 
   // if inputs are out of bounds set them back to a point in bounds
 
   if (typei == 0 && typej == 0) {
     if (NijC < pCCdom[0][0]) NijC=pCCdom[0][0];
     if (NijC > pCCdom[0][1]) NijC=pCCdom[0][1];
     if (NijH < pCCdom[1][0]) NijH=pCCdom[1][0];
     if (NijH > pCCdom[1][1]) NijH=pCCdom[1][1];
 
     if (fabs(NijC-floor(NijC)) < TOL && fabs(NijH-floor(NijH)) < TOL) {
       Pij = PCCf[(int) NijC][(int) NijH];
       dN2[0] = PCCdfdx[(int) NijC][(int) NijH];
       dN2[1] = PCCdfdy[(int) NijC][(int) NijH];
       done = 1;
     }
     if (done == 0) {
       x = (int) (floor(NijC));
       y = (int) (floor(NijH));
       for (i = 0; i<16; i++) coeffs[i] = pCC[x][y][i];
       Pij = Spbicubic(NijC,NijH,coeffs,dN2);
     }
   }
 
   // if inputs are out of bounds set them back to a point in bounds
 
    if (typei == 0 && typej == 1){
      if (NijC < pCHdom[0][0]) NijC=pCHdom[0][0];
      if (NijC > pCHdom[0][1]) NijC=pCHdom[0][1];
       if (NijH < pCHdom[1][0]) NijH=pCHdom[1][0];
       if (NijH > pCHdom[1][1]) NijH=pCHdom[1][1];
 
     if (fabs(NijC-floor(NijC)) < TOL && fabs(NijH-floor(NijH)) < TOL) {
       Pij = PCHf[(int) NijC][(int) NijH];
       dN2[0] = PCHdfdx[(int) NijC][(int) NijH];
       dN2[1] = PCHdfdy[(int) NijC][(int) NijH];
       done = 1;
     }
     if (done == 0) {
       x = (int) (floor(NijC));
       y = (int) (floor(NijH));
       for (i = 0; i<16; i++) coeffs[i] = pCH[x][y][i];
       Pij = Spbicubic(NijC,NijH,coeffs,dN2);
     }
   }
 
   if (typei == 1 && typej == 0) {
     Pij = 0.0;
     dN2[0] = 0.0;
     dN2[1] = 0.0;
   }
 
 
   if (typei == 1 && typej == 1) {
     Pij = 0.0;
     dN2[0] = 0.0;
     dN2[1] = 0.0;
   }
   return Pij;
 }
 
 /* ----------------------------------------------------------------------
    PiRC spline
 ------------------------------------------------------------------------- */
 
 double PairAIREBO::piRCSpline(double Nij, double Nji, double Nijconj,
                               int typei, int typej, double dN3[3])
 {
   int x,y,z,i,done;
   double piRC,coeffs[64];
   x=0;
   y=0;
   z=0;
   i=0;
 
   done=0;
 
   for (i=0; i<64; i++) coeffs[i]=0.0;
 
   if (typei==0 && typej==0) {
     //if the inputs are out of bounds set them back to a point in bounds
     if (Nij<piCCdom[0][0]) Nij=piCCdom[0][0];
     if (Nij>piCCdom[0][1]) Nij=piCCdom[0][1];
     if (Nji<piCCdom[1][0]) Nji=piCCdom[1][0];
     if (Nji>piCCdom[1][1]) Nji=piCCdom[1][1];
     if (Nijconj<piCCdom[2][0]) Nijconj=piCCdom[2][0];
     if (Nijconj>piCCdom[2][1]) Nijconj=piCCdom[2][1];
 
     if (fabs(Nij-floor(Nij))<TOL && fabs(Nji-floor(Nji))<TOL &&
         fabs(Nijconj-floor(Nijconj))<TOL) {
       piRC=piCCf[(int) Nij][(int) Nji][(int) Nijconj];
       dN3[0]=piCCdfdx[(int) Nij][(int) Nji][(int) Nijconj];
       dN3[1]=piCCdfdy[(int) Nij][(int) Nji][(int) Nijconj];
       dN3[2]=piCCdfdz[(int) Nij][(int) Nji][(int) Nijconj];
       done=1;
     }
 
     if (done==0) {
       for (i=0; i<piCCdom[0][1]; i++)
         if (Nij>=(double) i && Nij<=(double) i+1 || Nij==(double) i) x=i;
       for (i=0; i<piCCdom[1][1]; i++)
         if (Nji>=(double) i && Nji<=(double) i+1 || Nji==(double) i) y=i;
       for (i=0; i<piCCdom[2][1]; i++)
         if (Nijconj>=(double) i && Nijconj<=(double) i+1 ||
             Nijconj==(double) i) z=i;
 
       for (i=0; i<64; i++) coeffs[i]=piCC[x][y][z][i];
       piRC=Sptricubic(Nij,Nji,Nijconj,coeffs,dN3);
     }
   }
 
 
   // CH interaction
 
   if (typei==0 && typej==1 || typei==1 && typej==0) {
     // if the inputs are out of bounds set them back to a point in bounds
 
     if (Nij<piCHdom[0][0] || Nij>piCHdom[0][1] ||
         Nji<piCHdom[1][0] || Nji>piCHdom[1][1] ||
         Nijconj<piCHdom[2][0] || Nijconj>piCHdom[2][1]) {
       if (Nij<piCHdom[0][0]) Nij=piCHdom[0][0];
       if (Nij>piCHdom[0][1]) Nij=piCHdom[0][1];
       if (Nji<piCHdom[1][0]) Nji=piCHdom[1][0];
       if (Nji>piCHdom[1][1]) Nji=piCHdom[1][1];
       if (Nijconj<piCHdom[2][0]) Nijconj=piCHdom[2][0];
       if (Nijconj>piCHdom[2][1]) Nijconj=piCHdom[2][1];
     }
 
     if (fabs(Nij-floor(Nij))<TOL && fabs(Nji-floor(Nji))<TOL &&
         fabs(Nijconj-floor(Nijconj))<TOL) {
       piRC=piCHf[(int) Nij][(int) Nji][(int) Nijconj];
       dN3[0]=piCHdfdx[(int) Nij][(int) Nji][(int) Nijconj];
       dN3[1]=piCHdfdy[(int) Nij][(int) Nji][(int) Nijconj];
       dN3[2]=piCHdfdz[(int) Nij][(int) Nji][(int) Nijconj];
       done=1;
     }
 
     if (done==0) {
       for (i=0; i<piCHdom[0][1]; i++)
         if (Nij>=i && Nij<=i+1) x=i;
       for (i=0; i<piCHdom[1][1]; i++)
         if (Nji>=i && Nji<=i+1) y=i;
       for (i=0; i<piCHdom[2][1]; i++)
         if (Nijconj>=i && Nijconj<=i+1) z=i;
 
       for (i=0; i<64; i++) coeffs[i]=piCH[x][y][z][i];
       piRC=Sptricubic(Nij,Nji,Nijconj,coeffs,dN3);
     }
   }
 
   if (typei==1 && typej==1) {
     if (Nij<piHHdom[0][0] || Nij>piHHdom[0][1] ||
         Nji<piHHdom[1][0] || Nji>piHHdom[1][1] ||
         Nijconj<piHHdom[2][0] || Nijconj>piHHdom[2][1]) {
       Nij=0.0;
       Nji=0.0;
       Nijconj=0.0;
     }
     if (fabs(Nij-floor(Nij))<TOL && fabs(Nji-floor(Nji))<TOL &&
         fabs(Nijconj-floor(Nijconj))<TOL) {
       piRC=piHHf[(int) Nij][(int) Nji][(int) Nijconj];
       dN3[0]=piHHdfdx[(int) Nij][(int) Nji][(int) Nijconj];
       dN3[1]=piHHdfdy[(int) Nij][(int) Nji][(int) Nijconj];
       dN3[2]=piHHdfdz[(int) Nij][(int) Nji][(int) Nijconj];
       done=1;
     }
     if (done==0) {
       for (i=0; i<piHHdom[0][1]; i++)
         if (Nij>=i && Nij<=i+1) x=i;
       for (i=0; i<piHHdom[1][1]; i++)
         if (Nji>=i && Nji<=i+1) y=i;
       for (i=0; i<piHHdom[2][1]; i++)
         if (Nijconj>=i && Nijconj<=i+1) z=i;
 
       for (i=0; i<64; i++) coeffs[i]=piHH[x][y][z][i];
       piRC=Sptricubic(Nij,Nji,Nijconj,coeffs,dN3);
     }
   }
 
   return piRC;
 }
 
 /* ----------------------------------------------------------------------
    Tij spline
 ------------------------------------------------------------------------- */
 
 double PairAIREBO::TijSpline(double Nij, double Nji,
                              double Nijconj, double dN3[3])
 {
   int x,y,z,i,done;
   double Tijf,coeffs[64];
 
   x=0;
   y=0;
   z=0;
   i=0;
   Tijf=0.0;
   done=0;
   for (i=0; i<64; i++) coeffs[i]=0.0;
 
   //if the inputs are out of bounds set them back to a point in bounds
 
   if (Nij<Tijdom[0][0]) Nij=Tijdom[0][0];
   if (Nij>Tijdom[0][1]) Nij=Tijdom[0][1];
   if (Nji<Tijdom[1][0]) Nji=Tijdom[1][0];
   if (Nji>Tijdom[1][1]) Nji=Tijdom[1][1];
   if (Nijconj<Tijdom[2][0]) Nijconj=Tijdom[2][0];
   if (Nijconj>Tijdom[2][1]) Nijconj=Tijdom[2][1];
 
   if (fabs(Nij-floor(Nij))<TOL && fabs(Nji-floor(Nji))<TOL &&
       fabs(Nijconj-floor(Nijconj))<TOL) {
     Tijf=Tf[(int) Nij][(int) Nji][(int) Nijconj];
     dN3[0]=Tdfdx[(int) Nij][(int) Nji][(int) Nijconj];
     dN3[1]=Tdfdy[(int) Nij][(int) Nji][(int) Nijconj];
     dN3[2]=Tdfdz[(int) Nij][(int) Nji][(int) Nijconj];
     done=1;
   }
 
   if (done==0) {
     for (i=0; i<Tijdom[0][1]; i++)
       if (Nij>=i && Nij<=i+1) x=i;
     for (i=0; i<Tijdom[1][1]; i++)
       if (Nji>=i && Nji<=i+1) y=i;
     for (i=0; i<Tijdom[2][1]; i++)
       if (Nijconj>=i && Nijconj<=i+1) z=i;
 
     for (i=0; i<64; i++) coeffs[i]=Tijc[x][y][z][i];
     Tijf=Sptricubic(Nij,Nji,Nijconj,coeffs,dN3);
   }
 
   return Tijf;
 }
 
 /* ----------------------------------------------------------------------
    read AIREBO potential file
 ------------------------------------------------------------------------- */
 
 void PairAIREBO::read_file(char *filename)
 {
   int i,j,k,l,limit;
   char s[MAXLINE];
 
   // REBO Parameters (AIREBO)
 
   double rcmin_CC,rcmin_CH,rcmin_HH,rcmax_CC,rcmax_CH,
     rcmax_HH,rcmaxp_CC,rcmaxp_CH,rcmaxp_HH;
   double Q_CC,Q_CH,Q_HH,alpha_CC,alpha_CH,alpha_HH,A_CC,A_CH,A_HH;
   double BIJc_CC1,BIJc_CC2,BIJc_CC3,BIJc_CH1,BIJc_CH2,BIJc_CH3,
     BIJc_HH1,BIJc_HH2,BIJc_HH3;
   double Beta_CC1,Beta_CC2,Beta_CC3,Beta_CH1,Beta_CH2,Beta_CH3,
     Beta_HH1,Beta_HH2,Beta_HH3;
   double rho_CC,rho_CH,rho_HH;
 
   // LJ Parameters (AIREBO)
 
   double rcLJmin_CC,rcLJmin_CH,rcLJmin_HH,rcLJmax_CC,rcLJmax_CH,
     rcLJmax_HH,bLJmin_CC;
   double bLJmin_CH,bLJmin_HH,bLJmax_CC,bLJmax_CH,bLJmax_HH,
     epsilon_CC,epsilon_CH,epsilon_HH;
   double sigma_CC,sigma_CH,sigma_HH,epsilonT_CCCC,epsilonT_CCCH,epsilonT_HCCH;
 
   MPI_Comm_rank(world,&me);
 
   // read file on proc 0
 
   if (me == 0) {
     FILE *fp = force->open_potential(filename);
     if (fp == NULL) {
       char str[128];
       sprintf(str,"Cannot open AIREBO potential file %s",filename);
       error->one(FLERR,str);
     }
 
     // skip initial comment lines
 
     while (1) {
       fgets(s,MAXLINE,fp);
       if (s[0] != '#') break;
     }
 
     // read parameters
 
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&rcmin_CC);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&rcmin_CH);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&rcmin_HH);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&rcmax_CC);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&rcmax_CH);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&rcmax_HH);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&rcmaxp_CC);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&rcmaxp_CH);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&rcmaxp_HH);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&smin);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&Nmin);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&Nmax);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&NCmin);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&NCmax);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&Q_CC);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&Q_CH);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&Q_HH);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&alpha_CC);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&alpha_CH);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&alpha_HH);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&A_CC);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&A_CH);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&A_HH);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&BIJc_CC1);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&BIJc_CC2);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&BIJc_CC3);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&BIJc_CH1);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&BIJc_CH2);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&BIJc_CH3);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&BIJc_HH1);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&BIJc_HH2);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&BIJc_HH3);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&Beta_CC1);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&Beta_CC2);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&Beta_CC3);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&Beta_CH1);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&Beta_CH2);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&Beta_CH3);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&Beta_HH1);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&Beta_HH2);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&Beta_HH3);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&rho_CC);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&rho_CH);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&rho_HH);
 
     // LJ parameters
 
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&rcLJmin_CC);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&rcLJmin_CH);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&rcLJmin_HH);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&rcLJmax_CC);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&rcLJmax_CH);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&rcLJmax_HH);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&bLJmin_CC);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&bLJmin_CH);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&bLJmin_HH);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&bLJmax_CC);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&bLJmax_CH);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&bLJmax_HH);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&epsilon_CC);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&epsilon_CH);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&epsilon_HH);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&sigma_CC);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&sigma_CH);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&sigma_HH);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&epsilonT_CCCC);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&epsilonT_CCCH);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lg",&epsilonT_HCCH);
 
     // gC spline
 
     fgets(s,MAXLINE,fp);
     fgets(s,MAXLINE,fp);
     fgets(s,MAXLINE,fp);
 
     // number-1 = # of domains for the spline
 
     fgets(s,MAXLINE,fp);
     sscanf(s,"%d",&limit);
 
     for (i = 0; i < limit; i++) {
       fgets(s,MAXLINE,fp);
       sscanf(s,"%lg",&gCdom[i]);
     }
     fgets(s,MAXLINE,fp);
     for (i = 0; i < limit-1; i++) {
       for (j = 0; j < 6; j++) {
         fgets(s,MAXLINE,fp);
         sscanf(s,"%lg",&gC1[i][j]);
       }
     }
     fgets(s,MAXLINE,fp);
     for (i = 0; i < limit-1; i++) {
       for (j = 0; j < 6; j++) {
         fgets(s,MAXLINE,fp);
         sscanf(s,"%lg",&gC2[i][j]);
       }
     }
 
     // gH spline
 
     fgets(s,MAXLINE,fp);
     fgets(s,MAXLINE,fp);
     fgets(s,MAXLINE,fp);
 
     fgets(s,MAXLINE,fp);
     sscanf(s,"%d",&limit);
 
     for (i = 0; i < limit; i++) {
       fgets(s,MAXLINE,fp);
       sscanf(s,"%lg",&gHdom[i]);
     }
 
     fgets(s,MAXLINE,fp);
 
     for (i = 0; i < limit-1; i++) {
       for (j = 0; j < 6; j++) {
         fgets(s,MAXLINE,fp);
         sscanf(s,"%lg",&gH[i][j]);
       }
     }
 
     // pCC spline
 
     fgets(s,MAXLINE,fp);
     fgets(s,MAXLINE,fp);
     fgets(s,MAXLINE,fp);
 
     fgets(s,MAXLINE,fp);
     sscanf(s,"%d",&limit);
 
     for (i = 0; i < limit/2; i++) {
       for (j = 0; j < limit/2; j++) {
         fgets(s,MAXLINE,fp);
         sscanf(s,"%lg",&pCCdom[i][j]);
       }
     }
     fgets(s,MAXLINE,fp);
 
     for (i = 0; i < (int) pCCdom[0][1]; i++) {
       for (j = 0; j < (int) pCCdom[1][1]; j++) {
         for (k = 0; k < 16; k++) {
           fgets(s,MAXLINE,fp);
           sscanf(s,"%lg",&pCC[i][j][k]);
         }
       }
     }
 
     // pCH spline
 
     fgets(s,MAXLINE,fp);
     fgets(s,MAXLINE,fp);
     fgets(s,MAXLINE,fp);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%d",&limit);
 
     for (i = 0; i < limit/2; i++) {
       for (j = 0; j < limit/2; j++) {
         fgets(s,MAXLINE,fp);
         sscanf(s,"%lg",&pCHdom[i][j]);
       }
     }
     fgets(s,MAXLINE,fp);
 
     for (i = 0; i < (int) pCHdom[0][1]; i++) {
       for (j = 0; j < (int) pCHdom[1][1]; j++) {
         for (k = 0; k < 16; k++) {
           fgets(s,MAXLINE,fp);
           sscanf(s,"%lg",&pCH[i][j][k]);
         }
       }
     }
 
     // piCC cpline
 
     fgets(s,MAXLINE,fp);
     fgets(s,MAXLINE,fp);
     fgets(s,MAXLINE,fp);
 
     fgets(s,MAXLINE,fp);
     sscanf(s,"%d",&limit);
 
     for (i = 0; i < limit/2; i++) {
       for (j = 0; j < limit/3; j++) {
         fgets(s,MAXLINE,fp);
         sscanf(s,"%lg",&piCCdom[i][j]);
       }
     }
     fgets(s,MAXLINE,fp);
 
     for (i = 0; i < (int) piCCdom[0][1]; i++) {
       for (j = 0; j < (int) piCCdom[1][1]; j++) {
         for (k = 0; k < (int) piCCdom[2][1]; k++) {
           for (l = 0; l < 64; l = l+1) {
             fgets(s,MAXLINE,fp);
             sscanf(s,"%lg",&piCC[i][j][k][l]);
           }
         }
       }
     }
 
     // piCH spline
 
     fgets(s,MAXLINE,fp);
     fgets(s,MAXLINE,fp);
     fgets(s,MAXLINE,fp);
 
     fgets(s,MAXLINE,fp);
     sscanf(s,"%d",&limit);
 
     for (i = 0; i < limit/2; i++) {
       for (j = 0; j < limit/3; j++) {
         fgets(s,MAXLINE,fp);
         sscanf(s,"%lg",&piCHdom[i][j]);
       }
     }
     fgets(s,MAXLINE,fp);
 
     for (i = 0; i < (int) piCHdom[0][1]; i++) {
       for (j = 0; j < (int) piCHdom[1][1]; j++) {
         for (k = 0; k < (int) piCHdom[2][1]; k++) {
           for (l = 0; l < 64; l = l+1) {
             fgets(s,MAXLINE,fp);
             sscanf(s,"%lg",&piCH[i][j][k][l]);
           }
         }
       }
     }
 
     // piHH spline
 
     fgets(s,MAXLINE,fp);
     fgets(s,MAXLINE,fp);
     fgets(s,MAXLINE,fp);
 
     fgets(s,MAXLINE,fp);
     sscanf(s,"%d",&limit);
 
     for (i = 0; i < limit/2; i++) {
       for (j = 0; j < limit/3; j++) {
         fgets(s,MAXLINE,fp);
         sscanf(s,"%lg",&piHHdom[i][j]);
       }
     }
     fgets(s,MAXLINE,fp);
 
     for (i = 0; i < (int) piHHdom[0][1]; i++) {
       for (j = 0; j < (int) piHHdom[1][1]; j++) {
         for (k = 0; k < (int) piHHdom[2][1]; k++) {
           for (l = 0; l < 64; l = l+1) {
             fgets(s,MAXLINE,fp);
             sscanf(s,"%lg",&piHH[i][j][k][l]);
           }
         }
       }
     }
 
     // Tij spline
 
     fgets(s,MAXLINE,fp);
     fgets(s,MAXLINE,fp);
     fgets(s,MAXLINE,fp);
 
     fgets(s,MAXLINE,fp);
     sscanf(s,"%d",&limit);
 
     for (i = 0; i < limit/2; i++) {
       for (j = 0; j < limit/3; j++) {
         fgets(s,MAXLINE,fp);
         sscanf(s,"%lg",&Tijdom[i][j]);
       }
     }
     fgets(s,MAXLINE,fp);
 
     for (i = 0; i < (int) Tijdom[0][1]; i++) {
       for (j = 0; j < (int) Tijdom[1][1]; j++) {
         for (k = 0; k < (int) Tijdom[2][1]; k++) {
           for (l = 0; l < 64; l = l+1) {
             fgets(s,MAXLINE,fp);
             sscanf(s,"%lg",&Tijc[i][j][k][l]);
           }
         }
       }
     }
 
     fclose(fp);
   }
 
   // store read-in values in arrays
 
   if (me == 0) {
 
     // REBO
 
     rcmin[0][0] = rcmin_CC;
     rcmin[0][1] = rcmin_CH;
     rcmin[1][0] = rcmin[0][1];
     rcmin[1][1] = rcmin_HH;
 
     rcmax[0][0] = rcmax_CC;
     rcmax[0][1] = rcmax_CH;
     rcmax[1][0] = rcmax[0][1];
     rcmax[1][1] = rcmax_HH;
 
     rcmaxsq[0][0] = rcmax[0][0]*rcmax[0][0];
     rcmaxsq[1][0] = rcmax[1][0]*rcmax[1][0];
     rcmaxsq[0][1] = rcmax[0][1]*rcmax[0][1];
     rcmaxsq[1][1] = rcmax[1][1]*rcmax[1][1];
 
     rcmaxp[0][0] = rcmaxp_CC;
     rcmaxp[0][1] = rcmaxp_CH;
     rcmaxp[1][0] = rcmaxp[0][1];
     rcmaxp[1][1] = rcmaxp_HH;
 
     Q[0][0] = Q_CC;
     Q[0][1] = Q_CH;
     Q[1][0] = Q[0][1];
     Q[1][1] = Q_HH;
 
     alpha[0][0] = alpha_CC;
     alpha[0][1] = alpha_CH;
     alpha[1][0] = alpha[0][1];
     alpha[1][1] = alpha_HH;
 
     A[0][0] = A_CC;
     A[0][1] = A_CH;
     A[1][0] = A[0][1];
     A[1][1] = A_HH;
 
     rho[0][0] = rho_CC;
     rho[0][1] = rho_CH;
     rho[1][0] = rho[0][1];
     rho[1][1] = rho_HH;
 
     BIJc[0][0][0] = BIJc_CC1;
     BIJc[0][0][1] = BIJc_CC2;
     BIJc[0][0][2] = BIJc_CC3;
     BIJc[0][1][0] = BIJc_CH1;
     BIJc[0][1][1] = BIJc_CH2;
     BIJc[0][1][2] = BIJc_CH3;
     BIJc[1][0][0] = BIJc_CH1;
     BIJc[1][0][1] = BIJc_CH2;
     BIJc[1][0][2] = BIJc_CH3;
     BIJc[1][1][0] = BIJc_HH1;
     BIJc[1][1][1] = BIJc_HH2;
     BIJc[1][1][2] = BIJc_HH3;
 
     Beta[0][0][0] = Beta_CC1;
     Beta[0][0][1] = Beta_CC2;
     Beta[0][0][2] = Beta_CC3;
     Beta[0][1][0] = Beta_CH1;
     Beta[0][1][1] = Beta_CH2;
     Beta[0][1][2] = Beta_CH3;
     Beta[1][0][0] = Beta_CH1;
     Beta[1][0][1] = Beta_CH2;
     Beta[1][0][2] = Beta_CH3;
     Beta[1][1][0] = Beta_HH1;
     Beta[1][1][1] = Beta_HH2;
     Beta[1][1][2] = Beta_HH3;
 
     // LJ
 
     rcLJmin[0][0] = rcLJmin_CC;
     rcLJmin[0][1] = rcLJmin_CH;
     rcLJmin[1][0] = rcLJmin[0][1];
     rcLJmin[1][1] = rcLJmin_HH;
 
     rcLJmax[0][0] = rcLJmax_CC;
     rcLJmax[0][1] = rcLJmax_CH;
     rcLJmax[1][0] = rcLJmax[0][1];
     rcLJmax[1][1] = rcLJmax_HH;
 
     rcLJmaxsq[0][0] = rcLJmax[0][0]*rcLJmax[0][0];
     rcLJmaxsq[1][0] = rcLJmax[1][0]*rcLJmax[1][0];
     rcLJmaxsq[0][1] = rcLJmax[0][1]*rcLJmax[0][1];
     rcLJmaxsq[1][1] = rcLJmax[1][1]*rcLJmax[1][1];
 
     bLJmin[0][0] = bLJmin_CC;
     bLJmin[0][1] = bLJmin_CH;
     bLJmin[1][0] = bLJmin[0][1];
     bLJmin[1][1] = bLJmin_HH;
 
     bLJmax[0][0] = bLJmax_CC;
     bLJmax[0][1] = bLJmax_CH;
     bLJmax[1][0] = bLJmax[0][1];
     bLJmax[1][1] = bLJmax_HH;
 
     epsilon[0][0] = epsilon_CC;
     epsilon[0][1] = epsilon_CH;
     epsilon[1][0] = epsilon[0][1];
     epsilon[1][1] = epsilon_HH;
 
     sigma[0][0] = sigma_CC;
     sigma[0][1] = sigma_CH;
     sigma[1][0] = sigma[0][1];
     sigma[1][1] = sigma_HH;
 
     // torsional
 
     thmin = -1.0;
     thmax = -0.995;
     epsilonT[0][0] = epsilonT_CCCC;
     epsilonT[0][1] = epsilonT_CCCH;
     epsilonT[1][0] = epsilonT[0][1];
     epsilonT[1][1] = epsilonT_HCCH;
   }
 
   // broadcast read-in and setup values
 
   MPI_Bcast(&thmin,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&thmax,1,MPI_DOUBLE,0,world);
 
   MPI_Bcast(&smin,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&Nmin,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&Nmax,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&NCmin,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&NCmax,1,MPI_DOUBLE,0,world);
 
 
   MPI_Bcast(&rcmin[0][0],4,MPI_DOUBLE,0,world);
   MPI_Bcast(&rcmax[0][0],4,MPI_DOUBLE,0,world);
   MPI_Bcast(&rcmaxsq[0][0],4,MPI_DOUBLE,0,world);
   MPI_Bcast(&rcmaxp[0][0],4,MPI_DOUBLE,0,world);
 
   MPI_Bcast(&Q[0][0],4,MPI_DOUBLE,0,world);
   MPI_Bcast(&alpha[0][0],4,MPI_DOUBLE,0,world);
   MPI_Bcast(&A[0][0],4,MPI_DOUBLE,0,world);
   MPI_Bcast(&rho[0][0],4,MPI_DOUBLE,0,world);
   MPI_Bcast(&BIJc[0][0][0],12,MPI_DOUBLE,0,world);
   MPI_Bcast(&Beta[0][0][0],12,MPI_DOUBLE,0,world);
 
   MPI_Bcast(&rcLJmin[0][0],4,MPI_DOUBLE,0,world);
   MPI_Bcast(&rcLJmax[0][0],4,MPI_DOUBLE,0,world);
   MPI_Bcast(&rcLJmaxsq[0][0],4,MPI_DOUBLE,0,world);
   MPI_Bcast(&rcLJmin[0][0],4,MPI_DOUBLE,0,world);
   MPI_Bcast(&rcLJmin[0][0],4,MPI_DOUBLE,0,world);
 
   MPI_Bcast(&rcLJmin[0][0],4,MPI_DOUBLE,0,world);
   MPI_Bcast(&rcLJmax[0][0],4,MPI_DOUBLE,0,world);
   MPI_Bcast(&bLJmin[0][0],4,MPI_DOUBLE,0,world);
   MPI_Bcast(&bLJmax[0][0],4,MPI_DOUBLE,0,world);
 
   MPI_Bcast(&epsilon[0][0],4,MPI_DOUBLE,0,world);
   MPI_Bcast(&sigma[0][0],4,MPI_DOUBLE,0,world);
   MPI_Bcast(&epsilonT[0][0],4,MPI_DOUBLE,0,world);
 
   MPI_Bcast(&gCdom[0],5,MPI_DOUBLE,0,world);
   MPI_Bcast(&gC1[0][0],24,MPI_DOUBLE,0,world);
   MPI_Bcast(&gC2[0][0],24,MPI_DOUBLE,0,world);
   MPI_Bcast(&gHdom[0],4,MPI_DOUBLE,0,world);
   MPI_Bcast(&gH[0][0],18,MPI_DOUBLE,0,world);
 
   MPI_Bcast(&pCCdom[0][0],4,MPI_DOUBLE,0,world);
   MPI_Bcast(&pCHdom[0][0],4,MPI_DOUBLE,0,world);
   MPI_Bcast(&pCC[0][0][0],256,MPI_DOUBLE,0,world);
   MPI_Bcast(&pCH[0][0][0],256,MPI_DOUBLE,0,world);
 
   MPI_Bcast(&piCCdom[0][0],6,MPI_DOUBLE,0,world);
   MPI_Bcast(&piCHdom[0][0],6,MPI_DOUBLE,0,world);
   MPI_Bcast(&piHHdom[0][0],6,MPI_DOUBLE,0,world);
   MPI_Bcast(&piCC[0][0][0][0],9216,MPI_DOUBLE,0,world);
   MPI_Bcast(&piCH[0][0][0][0],9216,MPI_DOUBLE,0,world);
   MPI_Bcast(&piHH[0][0][0][0],9216,MPI_DOUBLE,0,world);
 
   MPI_Bcast(&Tijdom[0][0],6,MPI_DOUBLE,0,world);
   MPI_Bcast(&Tijc[0][0][0][0],9216,MPI_DOUBLE,0,world);
 }
 
 // ----------------------------------------------------------------------
 // generic Spline functions
 // ----------------------------------------------------------------------
 
 /* ----------------------------------------------------------------------
    fifth order spline evaluation
 ------------------------------------------------------------------------- */
 
 double PairAIREBO::Sp5th(double x, double coeffs[6], double *df)
 {
   double f, d;
   const double x2 = x*x;
   const double x3 = x2*x;
 
   f  = coeffs[0];
   f += coeffs[1]*x;
   d  = coeffs[1];
   f += coeffs[2]*x2;
   d += 2.0*coeffs[2]*x;
   f += coeffs[3]*x3;
   d += 3.0*coeffs[3]*x2;
   f += coeffs[4]*x2*x2;
   d += 4.0*coeffs[4]*x3;
   f += coeffs[5]*x2*x3;
   d += 5.0*coeffs[5]*x2*x2;
 
   *df = d;
   return f;
 }
 
 /* ----------------------------------------------------------------------
    bicubic spline evaluation
 ------------------------------------------------------------------------- */
 
 double PairAIREBO::Spbicubic(double x, double y,
                              double coeffs[16], double df[2])
 {
   double f,xn,yn,xn1,yn1,c;
   int i,j;
 
   f = 0.0;
   df[0] = 0.0;
   df[1] = 0.0;
 
   xn = 1.0;
   for (i = 0; i < 4; i++) {
     yn = 1.0;
     for (j = 0; j < 4; j++) {
       c = coeffs[i*4+j];
 
       f += c*xn*yn;
       if (i > 0) df[0] += c * ((double) i) * xn1 * yn;
       if (j > 0) df[1] += c * ((double) j) * xn * yn1;
 
       yn1 = yn;
       yn *= y;
     }
     xn1 = xn;
     xn *= x;
   }
 
   return f;
 }
 
 /* ----------------------------------------------------------------------
    tricubic spline evaluation
 ------------------------------------------------------------------------- */
 
 double PairAIREBO::Sptricubic(double x, double y, double z,
                               double coeffs[64], double df[3])
 {
   double f,ir,jr,kr,xn,yn,zn,xn1,yn1,zn1,c;
   int i,j,k;
 
   f = 0.0;
   df[0] = 0.0;
   df[1] = 0.0;
   df[2] = 0.0;
 
   xn = 1.0;
   for (i = 0; i < 4; i++) {
     ir = (double) i;
     yn = 1.0;
     for (j = 0; j < 4; j++) {
       jr = (double) j;
       zn = 1.0;
       for (k = 0; k < 4; k++) {
         kr = (double) k;
         c = coeffs[16*i+4*j+k];
         f += c*xn*yn*zn;
         if (i > 0) df[0] += c * ir * xn1 * yn * zn;
         if (j > 0) df[1] += c * jr * xn * yn1 * zn;
         if (k > 0) df[2] += c * kr * xn * yn * zn1;
         zn1 = zn;
         zn *= z;
       }
       yn1 = yn;
       yn *= y;
     }
     xn1 = xn;
     xn *= x;
   }
 
   return f;
 }
 
 /* ----------------------------------------------------------------------
    initialize spline knot values
 ------------------------------------------------------------------------- */
 
 void PairAIREBO::spline_init()
 {
   int i,j,k;
 
   for (i = 0; i < 5; i++) {
     for (j = 0; j < 5; j++) {
       PCCf[i][j] = 0.0;
       PCCdfdx[i][j] = 0.0;
       PCCdfdy[i][j] = 0.0;
       PCHf[i][j] = 0.0;
       PCHdfdx[i][j] = 0.0;
       PCHdfdy[i][j] = 0.0;
     }
   }
 
   PCCf[0][2] = -0.00050;
   PCCf[0][3] = 0.0161253646;
   PCCf[1][1] = -0.010960;
   PCCf[1][2] = 0.00632624824;
   PCCf[2][0] = -0.0276030;
   PCCf[2][1] = 0.00317953083;
 
   PCHf[0][1] = 0.209336733;
   PCHf[0][2] = -0.0644496154;
   PCHf[0][3] = -0.303927546;
   PCHf[1][0] = 0.010;
   PCHf[1][1] = -0.125123401;
   PCHf[1][2] = -0.298905246;
   PCHf[2][0] = -0.122042146;
   PCHf[2][1] = -0.300529172;
   PCHf[3][0] = -0.307584705;
 
   for (i = 0; i < 5; i++) {
     for (j = 0; j < 5; j++) {
       for (k = 0; k < 10; k++) {
         piCCf[i][j][k] = 0.0;
         piCCdfdx[i][j][k] = 0.0;
         piCCdfdy[i][j][k] = 0.0;
         piCCdfdz[i][j][k] = 0.0;
         piCHf[i][j][k] = 0.0;
         piCHdfdx[i][j][k] = 0.0;
         piCHdfdy[i][j][k] = 0.0;
         piCHdfdz[i][j][k] = 0.0;
         piHHf[i][j][k] = 0.0;
         piHHdfdx[i][j][k] = 0.0;
         piHHdfdy[i][j][k] = 0.0;
         piHHdfdz[i][j][k] = 0.0;
         Tf[i][j][k] = 0.0;
         Tdfdx[i][j][k] = 0.0;
         Tdfdy[i][j][k] = 0.0;
         Tdfdz[i][j][k] = 0.0;
       }
     }
   }
 
   for (i = 3; i < 10; i++) piCCf[0][0][i] = 0.0049586079;
   piCCf[1][0][1] = 0.021693495;
   piCCf[0][1][1] = 0.021693495;
   for (i = 2; i < 10; i++) piCCf[1][0][i] = 0.0049586079;
   for (i = 2; i < 10; i++) piCCf[0][1][i] = 0.0049586079;
   piCCf[1][1][1] = 0.05250;
   piCCf[1][1][2] = -0.002088750;
   for (i = 3; i < 10; i++) piCCf[1][1][i] = -0.00804280;
   piCCf[2][0][1] = 0.024698831850;
   piCCf[0][2][1] = 0.024698831850;
   piCCf[2][0][2] = -0.00597133450;
   piCCf[0][2][2] = -0.00597133450;
   for (i = 3; i < 10; i++) piCCf[2][0][i] = 0.0049586079;
   for (i = 3; i < 10; i++) piCCf[0][2][i] = 0.0049586079;
   piCCf[2][1][1] = 0.00482478490;
   piCCf[1][2][1] = 0.00482478490;
   piCCf[2][1][2] = 0.0150;
   piCCf[1][2][2] = 0.0150;
   piCCf[2][1][3] = -0.010;
   piCCf[1][2][3] = -0.010;
   piCCf[2][1][4] = -0.01168893870;
   piCCf[1][2][4] = -0.01168893870;
   piCCf[2][1][5] = -0.013377877400;
   piCCf[1][2][5] = -0.013377877400;
   piCCf[2][1][6] = -0.015066816000;
   piCCf[1][2][6] = -0.015066816000;
   for (i = 7; i < 10; i++) piCCf[2][1][i] = -0.015066816000;
   for (i = 7; i < 10; i++) piCCf[1][2][i] = -0.015066816000;
   piCCf[2][2][1] = 0.0472247850;
   piCCf[2][2][2] = 0.0110;
   piCCf[2][2][3] = 0.0198529350;
   piCCf[2][2][4] = 0.01654411250;
   piCCf[2][2][5] = 0.013235290;
   piCCf[2][2][6] = 0.00992646749999 ;
   piCCf[2][2][7] = 0.006617644999;
   piCCf[2][2][8] = 0.00330882250;
   piCCf[3][0][1] = -0.05989946750;
   piCCf[0][3][1] = -0.05989946750;
   piCCf[3][0][2] = -0.05989946750;
   piCCf[0][3][2] = -0.05989946750;
   for (i = 3; i < 10; i++) piCCf[3][0][i] = 0.0049586079;
   for (i = 3; i < 10; i++) piCCf[0][3][i] = 0.0049586079;
   piCCf[3][1][2] = -0.0624183760;
   piCCf[1][3][2] = -0.0624183760;
   for (i = 3; i < 10; i++) piCCf[3][1][i] = -0.0624183760;
   for (i = 3; i < 10; i++) piCCf[1][3][i] = -0.0624183760;
   piCCf[3][2][1] = -0.02235469150;
   piCCf[2][3][1] = -0.02235469150;
   for (i = 2; i < 10; i++) piCCf[3][2][i] = -0.02235469150;
   for (i = 2; i < 10; i++) piCCf[2][3][i] = -0.02235469150;
 
   piCCdfdx[2][1][1] = -0.026250;
   piCCdfdx[2][1][5] = -0.0271880;
   piCCdfdx[2][1][6] = -0.0271880;
   for (i = 7; i < 10; i++) piCCdfdx[2][1][i] = -0.0271880;
   piCCdfdx[1][3][2] = 0.0187723882;
   for (i = 2; i < 10; i++) piCCdfdx[2][3][i] = 0.031209;
 
   piCCdfdy[1][2][1] = -0.026250;
   piCCdfdy[1][2][5] = -0.0271880;
   piCCdfdy[1][2][6] = -0.0271880;
   for (i = 7; i < 10; i++) piCCdfdy[1][2][i] = -0.0271880;
   piCCdfdy[3][1][2] = 0.0187723882;
   for (i = 2; i < 10; i++) piCCdfdy[3][2][i] = 0.031209;
 
   piCCdfdz[1][1][2] = -0.0302715;
   piCCdfdz[2][1][4] = -0.0100220;
   piCCdfdz[1][2][4] = -0.0100220;
   piCCdfdz[2][1][5] = -0.0100220;
   piCCdfdz[1][2][5] = -0.0100220;
   for (i = 4; i < 9; i++) piCCdfdz[2][2][i] = -0.0033090;
 
   //  make top end of piCC flat instead of zero
   i = 4;
   for (j = 0; j < 4; j++){
       for (k = 1; k < 11; k++){
           piCCf[i][j][k] = piCCf[i-1][j][k];
       }
   }
   for (i = 0; i < 4; i++){ // also enforces some symmetry
       for (j = i+1; j < 5; j++){
           for (k = 1; k < 11; k++){
               piCCf[i][j][k] = piCCf[j][i][k];
           }
       }
   }
   for (k = 1; k < 11; k++) piCCf[4][4][k] = piCCf[3][4][k];
   k = 10;
   for (i = 0; i < 5; i++){
       for (j = 0; j < 5; j++){
       piCCf[i][j][k] = piCCf[i][j][k-1];
       }
   }
 
   piCHf[1][1][1] = -0.050;
   piCHf[1][1][2] = -0.050;
   piCHf[1][1][3] = -0.30;
   for (i = 4; i < 10; i++) piCHf[1][1][i] = -0.050;
   for (i = 5; i < 10; i++) piCHf[2][0][i] = -0.004523893758064;
   for (i = 5; i < 10; i++) piCHf[0][2][i] = -0.004523893758064;
   piCHf[2][1][2] = -0.250;
   piCHf[1][2][2] = -0.250;
   piCHf[2][1][3] = -0.250;
   piCHf[1][2][3] = -0.250;
   piCHf[3][1][1] = -0.10;
   piCHf[1][3][1] = -0.10;
   piCHf[3][1][2] = -0.125;
   piCHf[1][3][2] = -0.125;
   piCHf[3][1][3] = -0.125;
   piCHf[1][3][3] = -0.125;
   for (i = 4; i < 10; i++) piCHf[3][1][i] = -0.10;
   for (i = 4; i < 10; i++) piCHf[1][3][i] = -0.10;
 
   // make top end of piCH flat instead of zero
  // also enforces some symmetry
 
   i = 4;
   for (j = 0; j < 4; j++){
       for (k = 1; k < 11; k++){
           piCHf[i][j][k] = piCHf[i-1][j][k];
       }
   }
   for (i = 0; i < 4; i++){
       for (j = i+1; j < 5; j++){
           for (k = 1; k < 11; k++){
               piCHf[i][j][k] = piCHf[j][i][k];
           }
       }
   }
   for (k = 1; k < 11; k++) piCHf[4][4][k] = piCHf[3][4][k];
   k = 10;
   for (i = 0; i < 5; i++){
       for (j = 0; j < 5; j++){
       piCHf[i][j][k] = piCHf[i][j][k-1];
       }
   }
 
   piHHf[1][1][1] = 0.124915958;
 
   Tf[2][2][1] = -0.035140;
   for (i = 2; i < 10; i++) Tf[2][2][i] = -0.0040480;
 }
 
 /* ----------------------------------------------------------------------
    memory usage of local atom-based arrays
 ------------------------------------------------------------------------- */
 
 double PairAIREBO::memory_usage()
 {
   double bytes = 0.0;
   bytes += maxlocal * sizeof(int);
   bytes += maxlocal * sizeof(int *);
 
   for (int i = 0; i < comm->nthreads; i++)
     bytes += ipage[i].size();
   
   bytes += 2*maxlocal * sizeof(double);
   return bytes;
 }
diff --git a/src/MANYBODY/pair_bop.cpp b/src/MANYBODY/pair_bop.cpp
index c6f5f4bfc..1017e58b4 100644
--- a/src/MANYBODY/pair_bop.cpp
+++ b/src/MANYBODY/pair_bop.cpp
@@ -1,9363 +1,9363 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing authors: D.K. Ward (donward@sandia.gov) and X.W. Zhou (Sandia)
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    The formulation for this work follows (a) D.G. Pettifor, et al., Mat.
    Sci. and Eng. A365, 2-13, (2004);(b) D.A. Murdick, et al., Phys.
    Rev. B 73, 045206 (2006);(c) D.G. Pettifor and I.I. Oleinik., Phys
    Rev. Lett. 84, 4124 (2000); (d) D.K. Ward, et al., Phys. Rev. B 85,
    115206 (2012).
 
    Copyright (2012) Sandia Corporation.  Under the terms of Contract DE-
    AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    rights in this software.
 
    pairbop v 1.0 comes with no warranty of any kind.  pairbop v 1.0 is a
    copyrighted code that is distributed free-of-charge, under the terms
    of the GNU Public License (GPL).  See "Open-Source
    Rules"_http://lammps.sandia.gov/open_source.html
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "mpi.h"
 #include "pair_bop.h"
 #include "atom.h"
 #include "neighbor.h"
 #include "neigh_request.h"
 #include "force.h"
 #include "timer.h"
 #include "comm.h"
 #include "domain.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "memory.h"
 #include "error.h"
 #include "math_special.h"
 
 using namespace LAMMPS_NS;
 using namespace MathSpecial;
 
 #define MAXLINE 1024
 #define EPSILON 1.0e-6
 
 /* ---------------------------------------------------------------------- */
 
 PairBOP::PairBOP(LAMMPS *lmp) : Pair(lmp)
 {
   single_enable = 0;
   one_coeff = 1;
   manybody_flag = 1;
 
   map = NULL;
   pi_a = NULL;
   pro_delta = NULL;
   pi_delta = NULL;
   pi_p = NULL;
   pi_c = NULL;
   sigma_r0 = NULL;
   pi_r0 = NULL;
   phi_r0 = NULL;
   sigma_rc = NULL;
   pi_rc = NULL;
   phi_rc = NULL;
   r1 = NULL;
   sigma_beta0 = NULL;
   pi_beta0 = NULL;
   phi0 = NULL;
   sigma_n = NULL;
   pi_n = NULL;
   phi_m = NULL;
   sigma_nc = NULL;
   pi_nc = NULL;
   phi_nc = NULL;
   pro = NULL;
   sigma_delta = NULL;
   sigma_c = NULL;
   sigma_a = NULL;
   sigma_g0 = NULL;
   sigma_g1 = NULL;
   sigma_g2 = NULL;
   sigma_g3 = NULL;
   sigma_g4 = NULL;
   sigma_f = NULL;
   sigma_k = NULL;
   small3 = NULL;
   rcut = NULL;
   dr = NULL;
   rdr = NULL;
   disij = NULL;
   rij = NULL;
   cosAng = NULL;
   betaS = NULL;
   dBetaS = NULL;
   betaP = NULL;
   dBetaP = NULL;
   repul = NULL;
   dRepul = NULL;
   itypeSigBk = NULL;
   nSigBk = NULL;
   sigB = NULL;
   sigB1 = NULL;
   itypePiBk = NULL;
   nPiBk = NULL;
   piB = NULL;
   pBetaS = NULL;
   pBetaS1 = NULL;
   pBetaS2 = NULL;
   pBetaS3 = NULL;
   pBetaS4 = NULL;
   pBetaS5 = NULL;
   pBetaS6 = NULL;
   pBetaP = NULL;
   pBetaP1 = NULL;
   pBetaP2 = NULL;
   pBetaP3 = NULL;
   pBetaP4 = NULL;
   pBetaP5 = NULL;
   pBetaP6 = NULL;
   pRepul = NULL;
   pRepul1 = NULL;
   pRepul2 = NULL;
   pRepul3 = NULL;
   pRepul4 = NULL;
   pRepul5 = NULL;
   pRepul6 = NULL;
   FsigBO = NULL;
   FsigBO1 = NULL;
   FsigBO2 = NULL;
   FsigBO3 = NULL;
   FsigBO4 = NULL;
   FsigBO5 = NULL;
   FsigBO6 = NULL;
   rcmin = NULL;
   rcmax = NULL;
   rcmaxp = NULL;
   setflag = NULL;
   cutsq = NULL;
   cutghost = NULL;
 
   ghostneigh = 1;
   bt_sg=NULL;
   bt_pi=NULL;
 }
 
 /* ----------------------------------------------------------------------
    check if allocated, since class can be destructed when incomplete
 ------------------------------------------------------------------------- */
 
 PairBOP::~PairBOP()
 {
   if(allocated) {
     memory_theta_destroy();
     if (otfly==0) memory->destroy(cos_index);
     delete [] map;
 
     memory->destroy(BOP_index);
     memory->destroy(rcut);
     memory->destroy(dr);
     memory->destroy(rdr);
     memory->destroy(setflag);
     memory->destroy(cutsq);
     memory->destroy(cutghost);
     memory->destroy(pBetaS);
     memory->destroy(pBetaS1);
     memory->destroy(pBetaS2);
     memory->destroy(pBetaS3);
     memory->destroy(pBetaS4);
     memory->destroy(pBetaS5);
     memory->destroy(pBetaS6);
     memory->destroy(pBetaP);
     memory->destroy(pBetaP1);
     memory->destroy(pBetaP2);
     memory->destroy(pBetaP3);
     memory->destroy(pBetaP4);
     memory->destroy(pBetaP5);
     memory->destroy(pBetaP6);
     memory->destroy(pRepul);
     memory->destroy(pRepul1);
     memory->destroy(pRepul2);
     memory->destroy(pRepul3);
     memory->destroy(pRepul4);
     memory->destroy(pRepul5);
     memory->destroy(pRepul6);
     memory->destroy(FsigBO);
     memory->destroy(FsigBO1);
     memory->destroy(FsigBO2);
     memory->destroy(FsigBO3);
     memory->destroy(FsigBO4);
     memory->destroy(FsigBO5);
     memory->destroy(FsigBO6);
     if(table==0) {
       memory->destroy(pi_a);
       memory->destroy(pro_delta);
       memory->destroy(pi_delta);
       memory->destroy(pi_p);
       memory->destroy(pi_c);
       memory->destroy(sigma_r0);
       memory->destroy(pi_r0);
       memory->destroy(phi_r0);
       memory->destroy(sigma_rc);
       memory->destroy(pi_rc);
       memory->destroy(phi_rc);
       memory->destroy(r1);
       memory->destroy(sigma_beta0);
       memory->destroy(pi_beta0);
       memory->destroy(phi0);
       memory->destroy(sigma_n);
       memory->destroy(pi_n);
       memory->destroy(phi_m);
       memory->destroy(sigma_nc);
       memory->destroy(pi_nc);
       memory->destroy(phi_nc);
       memory->destroy(pro);
       memory->destroy(sigma_delta);
       memory->destroy(sigma_c);
       memory->destroy(sigma_a);
       memory->destroy(sigma_g0);
       memory->destroy(sigma_g1);
       memory->destroy(sigma_g2);
       memory->destroy(sigma_g3);
       memory->destroy(sigma_g4);
       memory->destroy(sigma_f);
       memory->destroy(sigma_k);
       memory->destroy(small3);
     }
     else {
       memory->destroy(pi_a);
       memory->destroy(pro_delta);
       memory->destroy(pi_delta);
       memory->destroy(pi_p);
       memory->destroy(pi_c);
       memory->destroy(r1);
       memory->destroy(pro);
       memory->destroy(sigma_delta);
       memory->destroy(sigma_c);
       memory->destroy(sigma_a);
       memory->destroy(sigma_g0);
       memory->destroy(sigma_g1);
       memory->destroy(sigma_g2);
       memory->destroy(sigma_f);
       memory->destroy(sigma_k);
       memory->destroy(small3);
     }
   }
   if(allocate_sigma) {
     destroy_sigma();
   }
   if(allocate_pi) {
     destroy_pi();
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBOP::compute(int eflag, int vflag)
 {
   int ago,delay,every;
   int i,j,ii,jj,iij;
   int n,inum,temp_ij,ks;
   int itype,jtype;
   tagint i_tag,j_tag;
   int *ilist,*iilist,*numneigh;
   int **firstneigh;
   double dpr1,ps;
   double ftmp1,ftmp2,ftmp3,dE;
   double dis_ij[3],rsq_ij,r_ij;
   double betaS_ij,dBetaS_ij;
   double betaP_ij,dBetaP_ij;
   double repul_ij,dRepul_ij;
   double totE;
 
   double **f = atom->f;
   double **x = atom->x;
   int *type = atom->type;
   tagint *tag = atom->tag;
   int newton_pair = force->newton_pair;
   int nlocal = atom->nlocal;
   int nall = nlocal + atom->nghost;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
   ago=neighbor->ago;
   delay=neighbor->delay;
   every=neighbor->every;
 
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   // BOP Neighbor lists must be updated every time
   // atoms are moved between processors
 
   if ((ago ==0)||bop_step==0||(ago>=delay&&(ago%every)==0)||(nall>maxnall))
     gneigh();
 
   // For non on the fly calculations cos and derivatives
   // are calculated in advance and stored
 
   if(otfly==0) theta();
   else theta_mod();
 
   // Calculate Sigma Bond-Order
 
   if(a_flag==1) {
     if (otfly==0) sigmaBo_noa();
     else sigmaBo_noa_otf();
   }
   else {
     if (otfly==0) sigmaBo();
     else sigmaBo_otf();
   }
 
   // Calculate Pi Bond-Order
 
   if (otfly==0) PiBo();
   else PiBo_otf();
 
   n=0;
   totE=0;
   for (ii = 0; ii < inum; ii++) {
     i=ilist[ii];
     i_tag=tag[i];
     itype=map[type[i]]+1;
     iilist=firstneigh[i];
     for(jj=0;jj<numneigh[i];jj++) {
       temp_ij=BOP_index[i]+jj;
       j=iilist[jj];
       j_tag=tag[j];
       jtype=map[type[j]]+1;
       if(j_tag>=i_tag) {
         if(otfly==0) {
           if(neigh_flag[temp_ij]) {
             dpr1=(dRepul[temp_ij]-2.0*dBetaS[temp_ij]*sigB[n]
                 -2.0*dBetaP[temp_ij]*piB[n])/rij[temp_ij];
             ftmp1=dpr1*disij[0][temp_ij];
             ftmp2=dpr1*disij[1][temp_ij];
             ftmp3=dpr1*disij[2][temp_ij];
             f[i][0]=f[i][0]+ftmp1;
             f[i][1]=f[i][1]+ftmp2;
             f[i][2]=f[i][2]+ftmp3;
             f[j][0]=f[j][0]-ftmp1;
             f[j][1]=f[j][1]-ftmp2;
             f[j][2]=f[j][2]-ftmp3;
 
             // add repulsive and bond order components to total energy
             // (d) Eq.1
 
             dE=-2.0*betaS[temp_ij]*sigB[n]-2.0*betaP[temp_ij]*piB[n];
             totE+=dE+repul[temp_ij];
             if(evflag) {
               ev_tally_full(i,repul[temp_ij],dE,0.0,0.0,0.0,0.0);
               ev_tally_full(j,repul[temp_ij],dE,0.0,0.0,0.0,0.0);
               ev_tally_xyz(i,j,nlocal,newton_pair,0.0,0.0,-ftmp1,-ftmp2,-ftmp3,
                   disij[0][temp_ij],disij[1][temp_ij],disij[2][temp_ij]);
             }
             n++;
           }
         }
         else {
           if(itype==jtype)
             iij=itype-1;
           else if(itype<jtype)
             iij=itype*bop_types-itype*(itype+1)/2+jtype-1;
           else
             iij=jtype*bop_types-jtype*(jtype+1)/2+itype-1;
           dis_ij[0]=x[j][0]-x[i][0];
           dis_ij[1]=x[j][1]-x[i][1];
           dis_ij[2]=x[j][2]-x[i][2];
           rsq_ij=dis_ij[0]*dis_ij[0]
               +dis_ij[1]*dis_ij[1]
               +dis_ij[2]*dis_ij[2];
           r_ij=sqrt(rsq_ij);
           if(r_ij<=rcut[iij]) {
             ps=r_ij*rdr[iij]+1.0;
             ks=(int)ps;
             if(nr-1<ks)
               ks=nr-1;
             ps=ps-ks;
             if(ps>1.0)
               ps=1.0;
             betaS_ij=((pBetaS3[iij][ks-1]*ps+pBetaS2[iij][ks-1])*ps
                 +pBetaS1[iij][ks-1])*ps+pBetaS[iij][ks-1];
             dBetaS_ij=(pBetaS6[iij][ks-1]*ps+pBetaS5[iij][ks-1])*ps
                 +pBetaS4[iij][ks-1];
             betaP_ij=((pBetaP3[iij][ks-1]*ps+pBetaP2[iij][ks-1])*ps
                 +pBetaP1[iij][ks-1])*ps+pBetaP[iij][ks-1];
             dBetaP_ij=(pBetaP6[iij][ks-1]*ps+pBetaP5[iij][ks-1])*ps
                 +pBetaP4[iij][ks-1];
             repul_ij=((pRepul3[iij][ks-1]*ps+pRepul2[iij][ks-1])*ps
                 +pRepul1[iij][ks-1])*ps+pRepul[iij][ks-1];
             dRepul_ij=(pRepul6[iij][ks-1]*ps+pRepul5[iij][ks-1])*ps
                 +pRepul4[iij][ks-1];
             dpr1=(dRepul_ij-2.0*dBetaS_ij*sigB[n]
                 -2.0*dBetaP_ij*piB[n])/r_ij;
             ftmp1=dpr1*dis_ij[0];
             ftmp2=dpr1*dis_ij[1];
             ftmp3=dpr1*dis_ij[2];
             f[i][0]=f[i][0]+ftmp1;
             f[i][1]=f[i][1]+ftmp2;
             f[i][2]=f[i][2]+ftmp3;
             f[j][0]=f[j][0]-ftmp1;
             f[j][1]=f[j][1]-ftmp2;
             f[j][2]=f[j][2]-ftmp3;
 
             // add repulsive and bond order components to total energy
             // (d) Eq. 1
 
             dE=-2.0*betaS_ij*sigB[n]-2.0*betaP_ij*piB[n];
             totE+=dE+repul_ij;
             if(evflag) {
               ev_tally_full(i,repul_ij,dE,0.0,0.0,0.0,0.0);
               ev_tally_full(j,repul_ij,dE,0.0,0.0,0.0,0.0);
               ev_tally_xyz(i,j,nlocal,newton_pair,0.0,0.0,-ftmp1,-ftmp2,-ftmp3,
                   dis_ij[0],dis_ij[1],dis_ij[2]);
             }
             n++;
           }
         }
       }
     }
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
   bop_step = 1;
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairBOP::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(rcut,npairs,"BOP:rcut");
   memory->create(dr,npairs,"BOP:dr");
   memory->create(rdr,npairs,"BOP:dr");
   memory->create(setflag,n+1,n+1,"pair:setflag");
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
   memory->create(cutghost,n+1,n+1,"pair:cutghost");
   memory->create(pBetaS,npairs,nr,"BOP:pBetaS");
   memory->create(pBetaS1,npairs,nr,"BOP:pBetaS1");
   memory->create(pBetaS2,npairs,nr,"BOP:pBetaS2");
   memory->create(pBetaS3,npairs,nr,"BOP:pBetaS3");
   memory->create(pBetaS4,npairs,nr,"BOP:pBetaS4");
   memory->create(pBetaS5,npairs,nr,"BOP:pBetaS5");
   memory->create(pBetaS6,npairs,nr,"BOP:pBetaS6");
   memory->create(pBetaP,npairs,nr,"BOP:pBetaP");
   memory->create(pBetaP1,npairs,nr,"BOP:pBetaP1");
   memory->create(pBetaP2,npairs,nr,"BOP:pBetaP2");
   memory->create(pBetaP3,npairs,nr,"BOP:pBetaP3");
   memory->create(pBetaP4,npairs,nr,"BOP:pBetaP4");
   memory->create(pBetaP5,npairs,nr,"BOP:pBetaP5");
   memory->create(pBetaP6,npairs,nr,"BOP:pBetaP6");
   memory->create(pRepul,npairs,nr,"BOP:pRepul");
   memory->create(pRepul1,npairs,nr,"BOP:pRepul1");
   memory->create(pRepul2,npairs,nr,"BOP:pRepul2");
   memory->create(pRepul3,npairs,nr,"BOP:pRepul3");
   memory->create(pRepul4,npairs,nr,"BOP:pRepul4");
   memory->create(pRepul5,npairs,nr,"BOP:pRepul5");
   memory->create(pRepul6,npairs,nr,"BOP:pRepul6");
   memory->create(FsigBO,npairs,nBOt,"BOP:FsigBO");
   memory->create(FsigBO1,npairs,nBOt,"BOP:FsigBO1");
   memory->create(FsigBO2,npairs,nBOt,"BOP:FsigBO2");
   memory->create(FsigBO3,npairs,nBOt,"BOP:FsigBO3");
   memory->create(FsigBO4,npairs,nBOt,"BOP:FsigBO4");
   memory->create(FsigBO5,npairs,nBOt,"BOP:FsigBO5");
   memory->create(FsigBO6,npairs,nBOt,"BOP:FsigBO6");
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairBOP::settings(int narg, char **arg)
 {
   table = 0;
   otfly = 1;
   a_flag = 0;
 
   int iarg = 0;
   while (iarg < narg) {
     if (strcmp(arg[iarg],"table") == 0) {
       table = 1;
       iarg++;
     } else if (strcmp(arg[iarg],"save") == 0) {
       otfly = 0;
       iarg++;
     } else if (strcmp(arg[iarg],"sigmaoff") == 0) {
       a_flag = 1;
       iarg++;
     } else error->all(FLERR,"Illegal pair_style command");
   }
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs(Updated: D.K. Ward 05/06/10)
 ------------------------------------------------------------------------- */
 
 void PairBOP::coeff(int narg, char **arg)
 {
   int i,j,n;
   MPI_Comm_rank(world,&me);
   map = new int[atom->ntypes+1];
 
   if (narg < 3 + atom->ntypes)
     error->all(FLERR,"Incorrect args for pair coefficients");
 
   // ensure I,J args are * *
 
   if (strcmp(arg[0],"*") != 0 || strcmp(arg[1],"*") != 0)
     error->all(FLERR,"Incorrect args for pair coefficients");
 
   // read the potential file
 
   nr=2000;
   nBOt=2000;
   bop_step=0;
   nb_pi=0;
   nb_sg=0;
   allocate_sigma=0;
   allocate_pi=0;
   allocate_neigh=0;
   update_list=0;
 
   if (table == 0) read_file(arg[2]);
   else read_table(arg[2]);
 
   if (table == 0) {
     setPbetaS();
     setPbetaP();
     setPrepul();
     setSign();
   }
 
   // match element names to BOP word types
 
   if (me == 0) {
     for (i = 3; i < narg; i++) {
       if (strcmp(arg[i],"NULL") == 0) {
         map[i-2] = -1;
         continue;
       }
       for (j = 0; j < bop_types; j++)
         if (strcmp(arg[i],words[j]) == 0) break;
       map[i-2] = j;
     }
   }
 
   MPI_Bcast(&map[1],atom->ntypes,MPI_INT,0,world);
 
   if (me == 0) {
     if (words) {
       for (i = 0; i < bop_types; i++) delete [] words[i];
       delete [] words;
     }
   }
 
   // clear setflag since coeff() called once with I,J = * *
 
   n = atom->ntypes;
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   // set setflag i,j for type pairs where both are mapped to elements
 
   int count = 0;
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       if (map[i] >= 0 && map[j] >= 0) {
         setflag[i][j] = 1;
         count++;
       }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairBOP::init_style()
 {
   if (atom->tag_enable == 0)
     error->all(FLERR,"Pair style BOP requires atom IDs");
   if (force->newton_pair == 0)
     error->all(FLERR,"Pair style BOP requires newton pair on");
 
   // check that user sets comm->cutghostuser to 3x the max BOP cutoff
 
   if (comm->cutghostuser < 3.0*cutmax - EPSILON) {
     char str[128];
     sprintf(str,"Pair style bop requires comm ghost cutoff "
             "at least 3x larger than %g",cutmax);
     error->all(FLERR,str);
   }
 
   // need a full neighbor list and neighbors of ghosts
 
-  int irequest = neighbor->request(this);
+  int irequest = neighbor->request(this,instance_me);
   neighbor->requests[irequest]->half = 0;
   neighbor->requests[irequest]->full = 1;
   neighbor->requests[irequest]->ghost = 1;
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairBOP::init_one(int i, int j)
 {
   if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set");
 
   int ii = map[i]+1;
   int jj = map[j]+1;
 
   int ij;
   if (ii==jj) ij=ii-1;
   else if (ii<jj) ij=ii*bop_types-ii*(ii+1)/2+jj-1;
   else ij=jj*bop_types-jj*(jj+1)/2+ii-1;
 
   cutghost[i][j] = rcut[ij];
   cutghost[j][i] = cutghost[i][j];
   cutsq[i][j] = rcut[ij]*rcut[ij];
   cutsq[j][i] = cutsq[i][j];
   return rcut[ij];
 }
 
 /* ----------------------------------------------------------------------
    create BOP neighbor list from main neighbor list
    BOP neighbor list stores neighbors of ghost atoms
    BOP requires neighbor's of k if k is a neighbor of
    j and j is a neighbor of i
 ------------------------------------------------------------------------- */
 
 void PairBOP::gneigh()
 {
   int i,ii;
   int *ilist,*numneigh;
   int nlocal = atom->nlocal;
   int nall = nlocal + atom->nghost;
 
   if(allocate_neigh==0) {
     memory->create (BOP_index,nall,"BOP_index");
     if (otfly==0) memory->create (cos_index,nall,"cos_index");
     allocate_neigh=1;
   }
   else {
     memory->grow (BOP_index,nall,"BOP_index");
     if (otfly==0) memory->grow (cos_index,nall,"cos_index");
     allocate_neigh=1;
   }
   ilist = list->ilist;
   numneigh = list->numneigh;
   if(bop_step==0) {
     maxneigh=0;
     maxnall=0;
   }
   neigh_total=0;
   cos_total=0;
   for (ii = 0; ii < nall; ii++) {
     if (ii < nlocal) {
       i=ilist[ii];
       if(numneigh[i]>maxneigh) maxneigh=numneigh[i];
     } else {
       i=ii;
       if(numneigh[i]>maxneigh) maxneigh=numneigh[i];
     }
     BOP_index[i]=neigh_total;
     neigh_total+=numneigh[i];
     if(otfly==0) {
       cos_index[i]=cos_total;
       cos_total+=numneigh[i]*(numneigh[i]-1)/2;
     }
   }
   maxnall=nall;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBOP::theta()
 {
   int i,j,ii,jj,kk;
   int itype,jtype,i12;
   int temp_ij,temp_ik,temp_ijk;
   int n,nlocal,nall,ks;
   int *ilist,*numneigh;
   int *iilist;
   int **firstneigh;
   double rj2,rk2,rsq,ps;
   double rj1k1,rj2k2;
   double **x = atom->x;
   int *type = atom->type;
 
   nlocal = atom->nlocal;
   nall = nlocal+atom->nghost;
   ilist = list->ilist;
   firstneigh = list->firstneigh;
   numneigh = list->numneigh;
   if(update_list!=0)
     memory_theta_grow();
   else
     memory_theta_create();
   for (ii = 0; ii < nall; ii++) {
     if(ii<nlocal)
       i= ilist[ii];
     else
       i=ii;
     itype = map[type[i]]+1;
 
     iilist=firstneigh[i];
     for(jj=0;jj<numneigh[i];jj++) {
       j=iilist[jj];
       temp_ij=BOP_index[i]+jj;
       jtype = map[type[j]]+1;
 
       if(itype==jtype)
         i12=itype-1;
       else if(itype<jtype)
         i12=itype*bop_types-itype*(itype+1)/2+jtype-1;
       else
         i12=jtype*bop_types-jtype*(jtype+1)/2+itype-1;
       if(i12>=npairs) {
         error->one(FLERR,"Too many atom pairs for pair bop");
       }
       disij[0][temp_ij]=x[j][0]-x[i][0];
       disij[1][temp_ij]=x[j][1]-x[i][1];
       disij[2][temp_ij]=x[j][2]-x[i][2];
       rsq=disij[0][temp_ij]*disij[0][temp_ij]
           +disij[1][temp_ij]*disij[1][temp_ij]
           +disij[2][temp_ij]*disij[2][temp_ij];
       rij[temp_ij]=sqrt(rsq);
       if(rij[temp_ij]<=rcut[i12])
         neigh_flag[temp_ij]=1;
       else
         neigh_flag[temp_ij]=0;
       ps=rij[temp_ij]*rdr[i12]+1.0;
       ks=(int)ps;
 
       if(nr-1<ks)
         ks=nr-1;
       ps=ps-ks;
       if(ps>1.0)
         ps=1.0;
       betaS[temp_ij]=((pBetaS3[i12][ks-1]*ps+pBetaS2[i12][ks-1])*ps
           +pBetaS1[i12][ks-1])*ps+pBetaS[i12][ks-1];
       dBetaS[temp_ij]=(pBetaS6[i12][ks-1]*ps+pBetaS5[i12][ks-1])*ps
           +pBetaS4[i12][ks-1];
       betaP[temp_ij]=((pBetaP3[i12][ks-1]*ps+pBetaP2[i12][ks-1])*ps
           +pBetaP1[i12][ks-1])*ps+pBetaP[i12][ks-1];
       dBetaP[temp_ij]=(pBetaP6[i12][ks-1]*ps+pBetaP5[i12][ks-1])*ps
           +pBetaP4[i12][ks-1];
       repul[temp_ij]=((pRepul3[i12][ks-1]*ps+pRepul2[i12][ks-1])*ps
           +pRepul1[i12][ks-1])*ps+pRepul[i12][ks-1];
       dRepul[temp_ij]=(pRepul6[i12][ks-1]*ps+pRepul5[i12][ks-1])*ps
           +pRepul4[i12][ks-1];
     }
   }
   for (ii = 0; ii < nall; ii++) {
     n=0;
     if(ii<nlocal)
       i= ilist[ii];
     else
       i=ii;
     iilist=firstneigh[i];
     for(jj=0;jj<numneigh[i];jj++) {
       j=iilist[jj];
       temp_ij=BOP_index[i]+jj;
       rj2=rij[temp_ij]*rij[temp_ij];
       for(kk=jj+1;kk<numneigh[i];kk++) {
         if(cos_index[i]+n>=cos_total) {
           error->one(FLERR,"Too many atom triplets for pair bop");
         }
         temp_ik=BOP_index[i]+kk;
         temp_ijk=cos_index[i]+n;
         if(temp_ijk>=cos_total) {
           error->one(FLERR,"Too many atom triplets for pair bop");
         }
         rk2=rij[temp_ik]*rij[temp_ik];
         rj1k1=rij[temp_ij]*rij[temp_ik];
         rj2k2=rj1k1*rj1k1;
         if(temp_ijk>=cos_total) {
           error->one(FLERR,"Too many atom triplets for pair bop");
         }
         cosAng[temp_ijk]=(disij[0][temp_ij]*disij[0][temp_ik]+disij[1][temp_ij]
             *disij[1][temp_ik]+disij[2][temp_ij]*disij[2][temp_ik])/rj1k1;
         dcAng[temp_ijk][0][0]=(disij[0][temp_ik]*rj1k1-cosAng[temp_ijk]
               *disij[0][temp_ij]*rk2)/(rj2k2);
         dcAng[temp_ijk][1][0]=(disij[1][temp_ik]*rj1k1-cosAng[temp_ijk]
             *disij[1][temp_ij]*rk2)/(rj2k2);
         dcAng[temp_ijk][2][0]=(disij[2][temp_ik]*rj1k1-cosAng[temp_ijk]
             *disij[2][temp_ij]*rk2)/(rj2k2);
         dcAng[temp_ijk][0][1]=(disij[0][temp_ij]*rj1k1-cosAng[temp_ijk]
             *disij[0][temp_ik]*rj2)/(rj2k2);
         dcAng[temp_ijk][1][1]=(disij[1][temp_ij]*rj1k1-cosAng[temp_ijk]
             *disij[1][temp_ik]*rj2)/(rj2k2);
         dcAng[temp_ijk][2][1]=(disij[2][temp_ij]*rj1k1-cosAng[temp_ijk]
             *disij[2][temp_ik]*rj2)/(rj2k2);
         n++;
       }
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBOP::theta_mod()
 {
   if(update_list!=0)
     memory_theta_grow();
   else
     memory_theta_create();
 }
 
 /* ---------------------------------------------------------------------- */
 
 /*  The formulation differs slightly to avoid negative square roots
     in the calculation of Sigma^(1/2) of (a) Eq. 6 and (b) Eq. 11 */
 
 void PairBOP::sigmaBo()
 {
   int nb_t,new_n_tot;
   int n,i,j,k,kp,m,pp,kkp;
   int iij,ji,ki;
   int itmp,jtmp,ktmp,ltmp,mtmp;
   tagint i_tag,j_tag;
   int ngi,ngj,ngk,nglkp,ngli,nglj,ngl;
   int ngji,ngjk,nikj,ngki,ngkj,ngjkp;
   int ngkpk,ngkpj,ngkkp,nglk;
   int njik,nijk,nikkp,nkp,nijkp;
   int nkikp,njikp,nk0;
   int njkpk,nkjkp,njkkp;
   int jNeik,kNeii,kNeij,kNeikp;
   int kpNeij,kpNeik;
   int new1,new2,nlocal;
   int inum,*ilist,*iilist,*jlist,*klist,*kplist;
   int **firstneigh,*numneigh;
   int temp_ji,temp_ikp,temp_kkp;
   int temp_ij,temp_ik,temp_jkp,temp_kk,temp_jk;
   int ang_ijkp,ang_ikkp,ang_jkpk,ang_kjkp;
   int ang_ijk,ang_ikj,ang_jikp,ang_jkkp;
   int ang_jik,ang_kikp;
   int nb_ij,nb_ik,nb_ikp;
   int nb_jk,nb_jkp,nb_kkp;
   int nsearch;
   int sig_flag,setting,ncmp,ks;
   int itype,jtype,ktype,kptype;
   int bt_i,bt_j,bt_ij;
   int kp_index,same_ikp,same_jkp;
   int same_kkp;
   double AA,BB,CC,DD,EE,EE1,FF;
   double AAC,BBC,CCC,DDC,EEC,FFC,GGC;
   double AACFF,UT,bndtmp,UTcom;
   double amean,gmean0,gmean1,gmean2,ps;
   double gfactor1,gprime1,gsqprime;
   double gfactorsq,gfactor2,gprime2;
   double gfactorsq2,gsqprime2;
   double gfactor3,gprime3,gfactor,rfactor;
   double drfactor,gfactor4,gprime4,agpdpr3;
   double rfactor0,rfactorrt,rfactor1rt,rfactor1;
   double rcm1,rcm2,gcm1,gcm2,gcm3;
   double agpdpr1,agpdpr2,app1,app2,app3,app4;
   double dsigB1,dsigB2;
   double part0,part1,part2,part3,part4;
   double psign,bndtmp0,pp1;
   double bndtmp1,bndtmp2,bndtmp3,bndtmp4,bndtmp5;
   double ftmp[3];
   double **x = atom->x;
   double **f = atom->f;
   tagint *tag = atom->tag;
   int newton_pair = force->newton_pair;
   int *type = atom->type;
 
   nlocal = atom->nlocal;
   firstneigh = list->firstneigh;
   numneigh = list->numneigh;
   inum = list->inum;
   ilist = list->ilist;
   n=0;
 
 //loop over all local atoms
 
   if(nb_sg>16) {
     nb_sg=16;
   }
   if(nb_sg==0) {
     nb_sg=(maxneigh)*(maxneigh/2);
   }
   if(allocate_sigma) {
     destroy_sigma();
   }
   create_sigma(nb_sg);
   for(itmp=0;itmp<inum;itmp++) {
     i = ilist[itmp];
     i_tag=tag[i];
     itype = map[type[i]]+1;
 
 //j is loop over all neighbors of i
 
     for(jtmp=0;jtmp<numneigh[i];jtmp++) {
       temp_ij=BOP_index[i]+jtmp;
       if(neigh_flag[temp_ij]) {
         for(m=0;m<nb_sg;m++) {
           for(pp=0;pp<3;pp++) {
             bt_sg[m].dAA[pp]=0.0;
             bt_sg[m].dBB[pp]=0.0;
             bt_sg[m].dCC[pp]=0.0;
             bt_sg[m].dDD[pp]=0.0;
             bt_sg[m].dEE[pp]=0.0;
             bt_sg[m].dEE1[pp]=0.0;
             bt_sg[m].dFF[pp]=0.0;
             bt_sg[m].dAAC[pp]=0.0;
             bt_sg[m].dBBC[pp]=0.0;
             bt_sg[m].dCCC[pp]=0.0;
             bt_sg[m].dDDC[pp]=0.0;
             bt_sg[m].dEEC[pp]=0.0;
             bt_sg[m].dFFC[pp]=0.0;
             bt_sg[m].dGGC[pp]=0.0;
             bt_sg[m].dUT[pp]=0.0;
             bt_sg[m].dSigB1[pp]=0.0;
             bt_sg[m].dSigB[pp]=0.0;
           }
           bt_sg[m].i=-1;
           bt_sg[m].j=-1;
           bt_sg[m].temp=-1;
         }
         nb_t=0;
         iilist=firstneigh[i];
         j=iilist[jtmp];
         jlist=firstneigh[j];
         for(ki=0;ki<numneigh[j];ki++) {
           if(x[jlist[ki]][0]==x[i][0]) {
             if(x[jlist[ki]][1]==x[i][1]) {
               if(x[jlist[ki]][2]==x[i][2]) {
                 break;
               }
             }
           }
         }
         j_tag=tag[j];
         jtype = map[type[j]]+1;
         nb_ij=nb_t;
         nb_t++;
         if(nb_t>nb_sg) {
           new_n_tot=nb_sg+maxneigh;
           grow_sigma(nb_sg,new_n_tot);
           nb_sg=new_n_tot;
         }
         bt_sg[nb_ij].temp=temp_ij;
         bt_sg[nb_ij].i=i;
         bt_sg[nb_ij].j=j;
         if(j_tag>=i_tag) {
           if(itype==jtype)
             iij=itype-1;
           else if(itype<jtype)
             iij=itype*bop_types-itype*(itype+1)/2+jtype-1;
           else
             iij=jtype*bop_types-jtype*(jtype+1)/2+itype-1;
           for(ji=0;ji<numneigh[j];ji++) {
             temp_ji=BOP_index[j]+ji;
             if(x[jlist[ji]][0]==x[i][0]) {
               if(x[jlist[ji]][1]==x[i][1]) {
                 if(x[jlist[ji]][2]==x[i][2]) {
                   break;
                 }
               }
             }
           }
           nSigBk[n]=0;
 
 //AA-EE1 are the components making up Eq. 30 (a)
 
           AA=0.0;
           BB=0.0;
           CC=0.0;
           DD=0.0;
           EE=0.0;
           EE1=0.0;
 
 //FF is the Beta_sigma^2 term
 
           FF=betaS[temp_ij]*betaS[temp_ij];
 
 //agpdpr1 is derivative of FF w.r.t. r_ij
 
           agpdpr1=2.0*betaS[temp_ij]*dBetaS[temp_ij]/rij[temp_ij];
 
 //dXX derivatives are taken with respect to all pairs contributing to the energy
 //nb_ij is derivative w.r.t. ij pair
 
           bt_sg[nb_ij].dFF[0]=agpdpr1*disij[0][temp_ij];
           bt_sg[nb_ij].dFF[1]=agpdpr1*disij[1][temp_ij];
           bt_sg[nb_ij].dFF[2]=agpdpr1*disij[2][temp_ij];
 
 //k is loop over all neighbors of i again with j neighbor of i
 
           for(ktmp=0;ktmp<numneigh[i];ktmp++) {
             temp_ik=BOP_index[i]+ktmp;
             if(neigh_flag[temp_ik]) {
               if(ktmp!=jtmp) {
                 if(jtmp<ktmp) {
                   njik=jtmp*(2*numneigh[i]-jtmp-1)/2+(ktmp-jtmp)-1;
                   ngj=0;
                   ngk=1;
                 }
                 else {
                   njik=ktmp*(2*numneigh[i]-ktmp-1)/2+(jtmp-ktmp)-1;
                   ngj=1;
                   ngk=0;
                 }
                 k=iilist[ktmp];
                 ktype = map[type[k]]+1;
 
 //find neighbor of k that is equal to i
 
                 klist=firstneigh[k];
                 for(kNeii=0;kNeii<numneigh[k];kNeii++) {
                   if(x[klist[kNeii]][0]==x[i][0]) {
                     if(x[klist[kNeii]][1]==x[i][1]) {
                       if(x[klist[kNeii]][2]==x[i][2]) {
                         break;
                       }
                     }
                   }
                 }
 
 //find neighbor of i that is equal to k
 
                 for(jNeik=0;jNeik<numneigh[j];jNeik++) {
                   temp_jk=BOP_index[j]+jNeik;
                   if(x[jlist[jNeik]][0]==x[k][0]) {
                     if(x[jlist[jNeik]][1]==x[k][1]) {
                       if(x[jlist[jNeik]][2]==x[k][2]) {
                         break;
                       }
                     }
                   }
                 }
 
 //find neighbor of k that is equal to j
 
                 for(kNeij=0;kNeij<numneigh[k];kNeij++) {
                   if(x[klist[kNeij]][0]==x[j][0]) {
                     if(x[klist[kNeij]][1]==x[j][1]) {
                       if(x[klist[kNeij]][2]==x[j][2]) {
                         break;
                       }
                     }
                   }
                 }
                 sig_flag=0;
                 for(nsearch=0;nsearch<nSigBk[n];nsearch++) {
                   ncmp=itypeSigBk[n][nsearch];
                   if(x[ncmp][0]==x[k][0]) {
                     if(x[ncmp][1]==x[k][1]) {
                       if(x[ncmp][2]==x[k][2]) {
                         nk0=nsearch;
                         sig_flag=1;
                         break;
                       }
                     }
                   }
                 }
                 if(sig_flag==0) {
                   nSigBk[n]=nSigBk[n]+1;
                   nk0=nSigBk[n]-1;
                   itypeSigBk[n][nk0]=k;
                 }
                 nb_ik=nb_t;
                 nb_t++;
                 if(nb_t>nb_sg) {
                   new_n_tot=nb_sg+maxneigh;
                   grow_sigma(nb_sg,new_n_tot);
                   nb_sg=new_n_tot;
                 }
                 bt_sg[nb_ik].temp=temp_ik;
                 bt_sg[nb_ik].i=i;
                 bt_sg[nb_ik].j=k;
                 nb_jk=nb_t;
                 nb_t++;
                 if(nb_t>nb_sg) {
                   new_n_tot=nb_sg+maxneigh;
                   grow_sigma(nb_sg,new_n_tot);
                   nb_sg=new_n_tot;
                 }
                 bt_sg[nb_jk].temp=temp_jk;
                 bt_sg[nb_jk].i=j;
                 bt_sg[nb_jk].j=k;
                 ang_jik=cos_index[i]+njik;
                 gmean0=sigma_g0[jtype-1][itype-1][ktype-1];
                 gmean1=sigma_g1[jtype-1][itype-1][ktype-1];
                 gmean2=sigma_g2[jtype-1][itype-1][ktype-1];
                 amean=cosAng[ang_jik];
                 gfactor1=gmean0+gmean1*amean
                     +gmean2*amean*amean;
                 gfactorsq=gfactor1*gfactor1;
                 gprime1=gmean1+2.0*gmean2*amean;
                 gsqprime=2.0*gfactor1*gprime1;
 
 //AA is Eq. 34 (a) or Eq. 10 (c) for the i atom
 //1st CC is Eq. 11 (c) for i atom where j & k=neighbor of i
 
                 AA=AA+gfactorsq*betaS[temp_ik]*betaS[temp_ik];
                 CC=CC+gfactorsq*betaS[temp_ik]*betaS[temp_ik]*betaS[temp_ik]*betaS[temp_ik];
 
 //agpdpr1 is derivative of AA w.r.t. Beta(rik)
 //agpdpr2 is derivative of CC 1st term w.r.t. Beta(rik)
 //app1 is derivative of AA w.r.t. cos(theta_jik)
 //app2 is derivative of CC 1st term w.r.t. cos(theta_jik)
 
                 agpdpr1=2.0*gfactorsq*betaS[temp_ik]*dBetaS[temp_ik]/rij[temp_ik];
                 agpdpr1=2.0*betaS[temp_ik]*betaS[temp_ik]*agpdpr1;
                 app1=betaS[temp_ik]*betaS[temp_ik]*gsqprime;
                 app1=betaS[temp_ik]*betaS[temp_ik]*app1;
                 bt_sg[nb_ij].dAA[0]+=
                     app1*dcAng[ang_jik][0][ngj];
                 bt_sg[nb_ij].dAA[1]+=
                     app1*dcAng[ang_jik][1][ngj];
                 bt_sg[nb_ij].dAA[2]+=
                     app1*dcAng[ang_jik][2][ngj];
                 bt_sg[nb_ij].dCC[0]+=
                     app2*dcAng[ang_jik][0][ngj];
                 bt_sg[nb_ij].dCC[1]+=
                     app2*dcAng[ang_jik][1][ngj];
                 bt_sg[nb_ij].dCC[2]+=
                     app2*dcAng[ang_jik][2][ngj];
                 bt_sg[nb_ik].dAA[0]+=
                     app1*dcAng[ang_jik][0][ngk]
                     +agpdpr1*disij[0][temp_ik];
                 bt_sg[nb_ik].dAA[1]+=
                     app1*dcAng[ang_jik][1][ngk]
                     +agpdpr1*disij[1][temp_ik];
                 bt_sg[nb_ik].dAA[2]+=
                     app1*dcAng[ang_jik][2][ngk]
                     +agpdpr1*disij[2][temp_ik];
                 bt_sg[nb_ik].dCC[0]+=
                     app2*dcAng[ang_jik][0][ngk]
                     +agpdpr2*disij[0][temp_ik];
                 bt_sg[nb_ik].dCC[1]+=
                     app2*dcAng[ang_jik][1][ngk]
                     +agpdpr2*disij[1][temp_ik];
                 bt_sg[nb_ik].dCC[2]+=
                     app2*dcAng[ang_jik][2][ngk]
                     +agpdpr2*disij[2][temp_ik];
 
 //k' is loop over neighbors all neighbors of j with k a neighbor
 //of i and j a neighbor of i and determine which k' is k
 
                 kp_index=0;
                 for(ltmp=0;ltmp<numneigh[j];ltmp++) {
                   temp_jkp=BOP_index[j]+ltmp;
                   kp=jlist[ltmp];
                   if(x[kp][0]==x[k][0]) {
                     if(x[kp][1]==x[k][1]) {
                       if(x[kp][2]==x[k][2]) {
                         kp_index=1;
                         break;
                       }
                     }
                   }
                 }
                 if(kp_index) {
 
 //loop over neighbors of k
 
                   for(mtmp=0;mtmp<numneigh[k];mtmp++) {
                     kp=klist[mtmp];
                     if(x[kp][0]==x[j][0]) {
                       if(x[kp][1]==x[j][1]) {
                         if(x[kp][2]==x[j][2]) {
                           break;
                         }
                       }
                     }
                   }
                   if(ki<ltmp) {
                     nijk=ki*(2*numneigh[j]-ki-1)/2+(ltmp-ki)-1;
                     ngji=0;
                     ngjk=1;
                   }
                   else {
                     nijk=ltmp*(2*numneigh[j]-ltmp-1)/2+(ki-ltmp)-1;
                     ngji=1;
                     ngjk=0;
                   }
                   if(kNeii<mtmp) {
                     nikj=kNeii*(2*numneigh[k]-kNeii-1)/2+(mtmp-kNeii)-1;
                     ngki=0;
                     ngkj=1;
                   }
                   else {
                     nikj=mtmp*(2*numneigh[k]-mtmp-1)/2+(kNeii-mtmp)-1;
                     ngki=1;
                     ngkj=0;
                   }
                   ang_ijk=cos_index[j]+nijk;
                   gmean0=sigma_g0[itype-1][jtype-1][ktype-1];
                   gmean1=sigma_g1[itype-1][jtype-1][ktype-1];
                   gmean2=sigma_g2[itype-1][jtype-1][ktype-1];
                   amean=cosAng[ang_ijk];
                   gfactor2=gmean0+gmean1*amean
                       +gmean2*amean*amean;
                   gprime2=gmean1+2.0*gmean2*amean;
                   gmean0=sigma_g0[itype-1][ktype-1][jtype-1];
                   gmean1=sigma_g1[itype-1][ktype-1][jtype-1];
                   gmean2=sigma_g2[itype-1][ktype-1][jtype-1];
                   ang_ikj=cos_index[k]+nikj;
                   amean=cosAng[ang_ikj];
                   gfactor3=gmean0+gmean1*amean
                       +gmean2*amean*amean;
                   gprime3=gmean1+2.0*gmean2*amean;
                   gfactor=gfactor1*gfactor2*gfactor3;
                   rfactor=betaS[temp_ik]*betaS[temp_jkp];
 
 //EE1 is (b) Eq. 12
 
                   EE1=EE1+gfactor*rfactor;
 
 //rcm2 is derivative of EE1 w.r.t Beta(r_jk')
 //gcm1 is derivative of EE1 w.r.t cos(theta_jik)
 //gcm2 is derivative of EE1 w.r.t cos(theta_ijk)
 //gcm3 is derivative of EE1 w.r.t cos(theta_ikj)
 
                   rcm1=gfactor*betaS[temp_jkp]*dBetaS[temp_ik]/rij[temp_ik];
                   rcm2=gfactor*betaS[temp_ik]*dBetaS[temp_jkp]/rij[temp_jkp];
                   gcm1=rfactor*gprime1*gfactor2*gfactor3;
                   gcm2=rfactor*gfactor1*gprime2*gfactor3;
                   gcm3=rfactor*gfactor1*gfactor2*gprime3;
                   bt_sg[nb_ij].dEE1[0]+=
                       gcm1*dcAng[ang_jik][0][ngj]
                       -gcm2*dcAng[ang_ijk][0][ngji];
                   bt_sg[nb_ij].dEE1[1]+=
                       gcm1*dcAng[ang_jik][1][ngj]
                       -gcm2*dcAng[ang_ijk][1][ngji];
                   bt_sg[nb_ij].dEE1[2]+=
                       gcm1*dcAng[ang_jik][2][ngj]
                       -gcm2*dcAng[ang_ijk][2][ngji];
                   bt_sg[nb_ik].dEE1[0]+=
                       gcm1*dcAng[ang_jik][0][ngk]
                       +rcm1*disij[0][temp_ik]
                       -gcm3*dcAng[ang_ikj][0][ngki];
                   bt_sg[nb_ik].dEE1[1]+=
                       gcm1*dcAng[ang_jik][1][ngk]
                       +rcm1*disij[1][temp_ik]
                       -gcm3*dcAng[ang_ikj][1][ngki];
                   bt_sg[nb_ik].dEE1[2]+=
                       gcm1*dcAng[ang_jik][2][ngk]
                       +rcm1*disij[2][temp_ik]
                       -gcm3*dcAng[ang_ikj][2][ngki];
                   bt_sg[nb_jk].dEE1[0]+=
                       gcm2*dcAng[ang_ijk][0][ngjk]
                       +rcm2*disij[0][temp_jkp]
                       -gcm3*dcAng[ang_ikj][0][ngkj];
                   bt_sg[nb_jk].dEE1[1]+=
                       gcm2*dcAng[ang_ijk][1][ngjk]
                       +rcm2*disij[1][temp_jkp]
                       -gcm3*dcAng[ang_ikj][1][ngkj];
                   bt_sg[nb_jk].dEE1[2]+=
                       gcm2*dcAng[ang_ijk][2][ngjk]
                       +rcm2*disij[2][temp_jkp]
                       -gcm3*dcAng[ang_ikj][2][ngkj];
                 }
 
 // k and k' and j are all different neighbors of i
 
                 for(ltmp=0;ltmp<ktmp;ltmp++) {
                   if(ltmp!=jtmp) {
                     temp_ikp=BOP_index[i]+ltmp;
                     if(neigh_flag[temp_ikp]) {
                       kp=iilist[ltmp];
                       kptype = map[type[kp]]+1;
                       for(nsearch=0;nsearch<nSigBk[n];nsearch++) {
                         ncmp=itypeSigBk[n][nsearch];
                         if(x[ncmp][0]==x[kp][0]) {
                           if(x[ncmp][1]==x[kp][1]) {
                             if(x[ncmp][2]==x[kp][2]) {
                               break;
                             }
                           }
                         }
                       }
                       if(jtmp<ltmp) {
                         njikp=jtmp*(2*numneigh[i]-jtmp-1)/2+(ltmp-jtmp)-1;
                         nglj=0;
                         ngl=1;
                       }
                       else {
                         njikp=ltmp*(2*numneigh[i]-ltmp-1)/2+(jtmp-ltmp)-1;
                         nglj=1;
                         ngl=0;
                       }
                       if(ktmp<ltmp) {
                         nkikp=ktmp*(2*numneigh[i]-ktmp-1)/2+(ltmp-ktmp)-1;
                         nglk=0;
                         nglkp=1;
                       }
                       else {
                         nkikp=ltmp*(2*numneigh[i]-ltmp-1)/2+(ktmp-ltmp)-1;
                         nglk=1;
                         nglkp=0;
                       }
                       ang_jikp=cos_index[i]+njikp;
                       nb_ikp=nb_t;
                       nb_t++;
                       if(nb_t>nb_sg) {
                         new_n_tot=nb_sg+maxneigh;
                         grow_sigma(nb_sg,new_n_tot);
                         nb_sg=new_n_tot;
                       }
                       bt_sg[nb_ikp].temp=temp_ikp;
                       bt_sg[nb_ikp].i=i;
                       bt_sg[nb_ikp].j=kp;
                       gmean0=sigma_g0[jtype-1][itype-1][kptype-1];
                       gmean1=sigma_g1[jtype-1][itype-1][kptype-1];
                       gmean2=sigma_g2[jtype-1][itype-1][kptype-1];
                       amean=cosAng[ang_jikp];
                       gfactor2=gmean0+gmean1*amean
                           +gmean2*amean*amean;
                       gprime2=gmean1+2.0*gmean2*amean;
                       gmean0=sigma_g0[ktype-1][itype-1][kptype-1];
                       gmean1=sigma_g1[ktype-1][itype-1][kptype-1];
                       gmean2=sigma_g2[ktype-1][itype-1][kptype-1];
                       ang_kikp=cos_index[i]+nkikp;
                       amean=cosAng[ang_kikp];
                       gfactor3=gmean0+gmean1*amean
                           +gmean2*amean*amean;
                       gprime3=gmean1+2.0*gmean2*amean;
                       gfactor=gfactor1*gfactor2*gfactor3;
                       rfactorrt=betaS[temp_ik]*betaS[temp_ikp];
                       rfactor=rfactorrt*rfactorrt;
 
 //2nd CC is second term of Eq. 11 (c) for i atom where j , k & k' =neighbor of i
 
                       CC=CC+2.0*gfactor*rfactor;
 
 //agpdpr1 is derivative of CC 2nd term w.r.t. Beta(r_ik)
 //agpdpr2 is derivative of CC 2nd term w.r.t. Beta(r_ik')
 //app1 is derivative of CC 2nd term w.r.t. cos(theta_jik)
 //app2 is derivative of CC 2nd term w.r.t. cos(theta_jik')
 //app3 is derivative of CC 2nd term w.r.t. cos(theta_kik')
 
                       agpdpr1=4.0*gfactor*rfactorrt*betaS[temp_ikp]
                           *dBetaS[temp_ik]/rij[temp_ik];
                       agpdpr2=4.0*gfactor*rfactorrt*betaS[temp_ik]
                           *dBetaS[temp_ikp]/rij[temp_ikp];
                       app1=2.0*rfactor*gfactor2*gfactor3*gprime1;
                       app2=2.0*rfactor*gfactor1*gfactor3*gprime2;
                       app3=2.0*rfactor*gfactor1*gfactor2*gprime3;
                       bt_sg[nb_ij].dCC[0]+=
                           app1*dcAng[ang_jik][0][ngj]
                           +app2*dcAng[ang_jikp][0][nglj];
                       bt_sg[nb_ij].dCC[1]+=
                           app1*dcAng[ang_jik][1][ngj]
                           +app2*dcAng[ang_jikp][1][nglj];
                       bt_sg[nb_ij].dCC[2]+=
                           app1*dcAng[ang_jik][2][ngj]
                           +app2*dcAng[ang_jikp][2][nglj];
                       bt_sg[nb_ik].dCC[0]+=
                           app1*dcAng[ang_jik][0][ngk]
                           +app3*dcAng[ang_kikp][0][nglk]
                           +agpdpr1*disij[0][temp_ik];
                       bt_sg[nb_ik].dCC[1]+=
                           app1*dcAng[ang_jik][1][ngk]
                           +app3*dcAng[ang_kikp][1][nglk]
                           +agpdpr1*disij[1][temp_ik];
                       bt_sg[nb_ik].dCC[2]+=
                           app1*dcAng[ang_jik][2][ngk]
                           +app3*dcAng[ang_kikp][2][nglk]
                           +agpdpr1*disij[2][temp_ik];
                       bt_sg[nb_ikp].dCC[0]+=
                           app2*dcAng[ang_jikp][0][ngl]
                           +app3*dcAng[ang_kikp][0][nglkp]
                           +agpdpr2*disij[0][temp_ikp];
                       bt_sg[nb_ikp].dCC[1]+=
                           app2*dcAng[ang_jikp][1][ngl]
                           +app3*dcAng[ang_kikp][1][nglkp]
                           +agpdpr2*disij[1][temp_ikp];
                       bt_sg[nb_ikp].dCC[2]+=
                           app2*dcAng[ang_jikp][2][ngl]
                           +app3*dcAng[ang_kikp][2][nglkp]
                           +agpdpr2*disij[2][temp_ikp];
                     }
                   }
                 }
 
 // j and k are different neighbors of i and k' is a neighbor k not equal to i
 
                 for(ltmp=0;ltmp<numneigh[k];ltmp++) {
                   temp_kkp=BOP_index[k]+ltmp;
                   if(neigh_flag[temp_kkp]) {
                     kp=klist[ltmp];;
                     kptype = map[type[kp]]+1;
                     same_ikp=0;
                     same_jkp=0;
                     if(x[i][0]==x[kp][0]) {
                       if(x[i][1]==x[kp][1]) {
                         if(x[i][2]==x[kp][2]) {
                           same_ikp=1;
                         }
                       }
                     }
                     if(x[j][0]==x[kp][0]) {
                       if(x[j][1]==x[kp][1]) {
                         if(x[j][2]==x[kp][2]) {
                           same_jkp=1;
                         }
                       }
                     }
                     if(!same_ikp&&!same_jkp) {
                       if(kNeii<ltmp) {
                         nikkp=kNeii*(2*numneigh[k]-kNeii-1)/2+(ltmp-kNeii)-1;
                         nglkp=1;
                         ngli=0;
                       }
                       else {
                         nikkp=ltmp*(2*numneigh[k]-ltmp-1)/2+(kNeii-ltmp)-1;
                         nglkp=0;
                         ngli=1;
                       }
                       sig_flag=0;
                       for(nsearch=0;nsearch<nSigBk[n];nsearch++) {
                         ncmp=itypeSigBk[n][nsearch];
                         if(x[ncmp][0]==x[kp][0]) {
                           if(x[ncmp][1]==x[kp][1]) {
                             if(x[ncmp][2]==x[kp][2]) {
                               sig_flag=1;
                               nkp=nsearch;
                               break;
                             }
                           }
                         }
                       }
                       if(sig_flag==0) {
                         nSigBk[n]=nSigBk[n]+1;
                         nkp=nSigBk[n]-1;
                         itypeSigBk[n][nkp]=kp;
                       }
                       ang_ikkp=cos_index[k]+nikkp;
                       nb_kkp=nb_t;
                       nb_t++;
                       if(nb_t>nb_sg) {
                         new_n_tot=nb_sg+maxneigh;
                         grow_sigma(nb_sg,new_n_tot);
                         nb_sg=new_n_tot;
                       }
                       bt_sg[nb_kkp].temp=temp_kkp;
                       bt_sg[nb_kkp].i=k;
                       bt_sg[nb_kkp].j=kp;
                       gmean0=sigma_g0[itype-1][ktype-1][kptype-1];
                       gmean1=sigma_g1[itype-1][ktype-1][kptype-1];
                       gmean2=sigma_g2[itype-1][ktype-1][kptype-1];
                       amean=cosAng[ang_ikkp];
                       gfactor2=gmean0+gmean1*amean
                           +gmean2*amean*amean;
                       gprime2=gmean1+2.0*gmean2*amean;
                       gfactorsq2=gfactor2*gfactor2;
                       gsqprime2=2.0*gfactor2*gprime2;
                       gfactor=gfactorsq*gfactorsq2;
                       rfactorrt=betaS[temp_ik]*betaS[temp_kkp];
                       rfactor=rfactorrt*rfactorrt;
 
 //3rd CC is third term of Eq. 11 (c) for i atom
 //where j , k =neighbor of i & k' =neighbor of k
 
                       CC=CC+gfactor*rfactor;
                       agpdpr1=2.0*gfactor*rfactorrt*betaS[temp_kkp]
                           *dBetaS[temp_ik]/rij[temp_ik];
                       agpdpr2=2.0*gfactor*rfactorrt*betaS[temp_ik]
                           *dBetaS[temp_kkp]/rij[temp_kkp];
                       app1=rfactor*gfactorsq2*gsqprime;
                       app2=rfactor*gfactorsq*gsqprime2;
                       bt_sg[nb_ij].dCC[0]+=
                           app1*dcAng[ang_jik][0][ngj];
                       bt_sg[nb_ij].dCC[1]+=
                           app1*dcAng[ang_jik][1][ngj];
                       bt_sg[nb_ij].dCC[2]+=
                           app1*dcAng[ang_jik][2][ngj];
                       bt_sg[nb_ik].dCC[0]+=
                           app1*dcAng[ang_jik][0][ngk]
                           +agpdpr1*disij[0][temp_ik]
                           -app2*dcAng[ang_ikkp][0][ngli];
                       bt_sg[nb_ik].dCC[1]+=
                           app1*dcAng[ang_jik][1][ngk]
                           +agpdpr1*disij[1][temp_ik]
                           -app2*dcAng[ang_ikkp][1][ngli];
                       bt_sg[nb_ik].dCC[2]+=
                           app1*dcAng[ang_jik][2][ngk]
                           +agpdpr1*disij[2][temp_ik]
                           -app2*dcAng[ang_ikkp][2][ngli];
                       bt_sg[nb_kkp].dCC[0]+=
                           app2*dcAng[ang_ikkp][0][nglkp]
                           +agpdpr2*disij[0][temp_kkp];
                       bt_sg[nb_kkp].dCC[1]+=
                           app2*dcAng[ang_ikkp][1][nglkp]
                           +agpdpr2*disij[1][temp_kkp];
                       bt_sg[nb_kkp].dCC[2]+=
                           app2*dcAng[ang_ikkp][2][nglkp]
                           +agpdpr2*disij[2][temp_kkp];
 
                     }
                   }
                 }
 
        //j and k are different neighbors of i and k' is a neighbor j not equal to k
 
                 kplist=firstneigh[kp];
                 for(ltmp=0;ltmp<numneigh[j];ltmp++) {
                   sig_flag=0;
                   temp_jkp=BOP_index[j]+ltmp;
                   if(neigh_flag[temp_jkp]) {
                     kp=jlist[ltmp];
                     kptype = map[type[kp]]+1;
                     same_jkp=0;
                     same_kkp=0;
                     for(kpNeij=0;kpNeij<numneigh[kp];kpNeij++) {
                       if(x[j][0]==x[kp][0]) {
                         if(x[j][1]==x[kp][1]) {
                           if(x[j][2]==x[kp][2]) {
                             same_jkp=1;
                             break;
                           }
                         }
                       }
                     }
                     for(kpNeik=0;kpNeik<numneigh[kp];kpNeik++) {
                       if(x[k][0]==x[kp][0]) {
                         if(x[k][1]==x[kp][1]) {
                           if(x[k][2]==x[kp][2]) {
                             same_kkp=1;
                             break;
                           }
                         }
                       }
                     }
                     if(!same_kkp&&!same_jkp) {
                       for(kNeikp=0;kNeikp<numneigh[k];kNeikp++) {
                         temp_kkp=BOP_index[k]+kNeikp;
                         kkp=klist[kNeikp];
                         if(x[kkp][0]==x[kp][0]) {
                           if(x[kkp][1]==x[kp][1]) {
                             if(x[kkp][2]==x[kp][2]) {
                               sig_flag=1;
                               break;
                             }
                           }
                         }
                       }
                       if(sig_flag==1) {
                         for(nsearch=0;nsearch<numneigh[kp];nsearch++) {
                           ncmp=kplist[nsearch];
                           if(x[ncmp][0]==x[j][0]) {
                             if(x[ncmp][1]==x[j][1]) {
                               if(x[ncmp][2]==x[j][2]) {
                                 kpNeij=nsearch;
                               }
                             }
                           }
                           if(x[ncmp][0]==x[k][0]) {
                             if(x[ncmp][1]==x[k][1]) {
                               if(x[ncmp][2]==x[k][2]) {
                                 kpNeik=nsearch;
                               }
                             }
                           }
                         }
                         if(ji<ltmp) {
                           nijkp=(ji)*numneigh[j]-(ji+1)*(ji+2)/2+ltmp;
                           ngji=0;
                           ngjkp=1;
                         }
                         else {
                           nijkp=(ltmp)*numneigh[j]-(ltmp+1)*(ltmp+2)/2+ji;
                           ngji=1;
                           ngjkp=0;
                         }
                         if(kNeii<kNeikp) {
                           nikkp=(kNeii)*numneigh[k]-(kNeii+1)*(kNeii+2)/2+kNeikp;
                           ngki=0;
                           ngkkp=1;
                         }
                         else {
                           nikkp=(kNeikp)*numneigh[k]-(kNeikp+1)*(kNeikp+2)/2+kNeii;
                           ngki=1;
                           ngkkp=0;
                         }
                         if(kpNeij<kpNeik) {
                           njkpk=(kpNeij)*numneigh[kp]-(kpNeij+1)*(kpNeij+2)/2+kpNeik;
                           ngkpj=0;
                           ngkpk=1;
                         }
                         else {
                           njkpk=(kpNeik)*numneigh[kp]-(kpNeik+1)*(kpNeik+2)/2+kpNeij;
                           ngkpj=1;
                           ngkpk=0;
                         }
                         sig_flag=0;
                         for(nsearch=0;nsearch<nSigBk[n];nsearch++) {
                           ncmp=itypeSigBk[n][nsearch];
                           if(x[ncmp][0]==x[kp][0]) {
                             if(x[ncmp][1]==x[kp][1]) {
                               if(x[ncmp][2]==x[kp][2]) {
                                 nkp=nsearch;
                                 sig_flag=1;
                                 break;
                               }
                             }
                           }
                         }
                         if(sig_flag==0) {
                           nSigBk[n]=nSigBk[n]+1;
                           nkp=nSigBk[n]-1;
                           itypeSigBk[n][nkp]=kp;
                         }
                         ang_ijkp=cos_index[j]+nijkp;
                         ang_ikkp=cos_index[k]+nikkp;
                         ang_jkpk=cos_index[kp]+njkpk;
                         nb_jkp=nb_t;
                         nb_t++;
                         if(nb_t>nb_sg) {
                           new_n_tot=nb_sg+maxneigh;
                           grow_sigma(nb_sg,new_n_tot);
                           nb_sg=new_n_tot;
                         }
                         bt_sg[nb_jkp].temp=temp_jkp;
                         bt_sg[nb_jkp].i=j;
                         bt_sg[nb_jkp].j=kp;
                         nb_kkp=nb_t;
                         nb_t++;
                         if(nb_t>nb_sg) {
                           new_n_tot=nb_sg+maxneigh;
                           grow_sigma(nb_sg,new_n_tot);
                           nb_sg=new_n_tot;
                         }
                         bt_sg[nb_kkp].temp=temp_kkp;
                         bt_sg[nb_kkp].i=k;
                         bt_sg[nb_kkp].j=kp;
                         gmean0=sigma_g0[itype-1][jtype-1][kptype-1];
                         gmean1=sigma_g1[itype-1][jtype-1][kptype-1];
                         gmean2=sigma_g2[itype-1][jtype-1][kptype-1];
                         amean=cosAng[ang_ijkp];
                         gfactor2=gmean0+gmean1*amean
                             +gmean2*amean*amean;
                         gprime2=gmean1+2.0*gmean2*amean;
                         gmean0=sigma_g0[itype-1][ktype-1][kptype-1];
                         gmean1=sigma_g1[itype-1][ktype-1][kptype-1];
                         gmean2=sigma_g2[itype-1][ktype-1][kptype-1];
                         amean=cosAng[ang_ikkp];
                         gfactor3=gmean0+gmean1*amean
                             +gmean2*amean*amean;
                         gprime3=gmean1+2.0*gmean2*amean;
                         gmean0=sigma_g0[jtype-1][kptype-1][ktype-1];
                         gmean1=sigma_g1[jtype-1][kptype-1][ktype-1];
                         gmean2=sigma_g2[jtype-1][kptype-1][ktype-1];
                         amean=cosAng[ang_jkpk];
                         gfactor4=gmean0+gmean1*amean
                             +gmean2*amean*amean;
                         gprime4=gmean1+2.0*gmean2*amean;
                         gfactor=gfactor1*gfactor2*gfactor3*gfactor4;
                         rfactor0=(betaS[temp_ik]+small2)*(betaS[temp_jkp]+small2)
                             *(betaS[temp_kkp]+small2);
                         rfactor=pow(rfactor0,2.0/3.0);
                         drfactor=2.0/3.0*pow(rfactor0,-1.0/3.0);
 
 //EE is Eq. 25(notes)
 
                         EE=EE+gfactor*rfactor;
 
 //agpdpr1 is derivative of agpdpr1 w.r.t. Beta(r_ik)
 //agpdpr2 is derivative of agpdpr1 w.r.t. Beta(r_jk')
 //agpdpr3 is derivative of agpdpr1 w.r.t. Beta(r_kk')
 //app1 is derivative of agpdpr1 w.r.t. cos(theta_jik)
 //app2 is derivative of agpdpr1 w.r.t. cos(theta_ijk')
 //app3 is derivative of agpdpr1 w.r.t. cos(theta_ikk')
 //app4 is derivative of agpdpr1 w.r.t. cos(theta_jk'k)
 
                         agpdpr1=gfactor*drfactor*(betaS[temp_jkp]+small2)*(betaS[temp_kkp]
                             +small2)*dBetaS[temp_ik]/rij[temp_ik];
                         agpdpr2=gfactor*drfactor*(betaS[temp_ik]+small2)*(betaS[temp_kkp]
                             +small2)*dBetaS[temp_jkp]/rij[temp_jkp];
                         agpdpr3=gfactor*drfactor*(betaS[temp_ik]+small2)*(betaS[temp_jkp]
                             +small2)*dBetaS[temp_kkp]/rij[temp_kkp];
                         app1=rfactor*gfactor2*gfactor3*gfactor4*gprime1;
                         app2=rfactor*gfactor1*gfactor3*gfactor4*gprime2;
                         app3=rfactor*gfactor1*gfactor2*gfactor4*gprime3;
                         app4=rfactor*gfactor1*gfactor2*gfactor3*gprime4;
                         bt_sg[nb_ij].dEE[0]+=
                             app1*dcAng[ang_jik][0][ngj]
                             -app2*dcAng[ang_ijkp][0][ngji];
                         bt_sg[nb_ij].dEE[1]+=
                             app1*dcAng[ang_jik][1][ngj]
                             -app2*dcAng[ang_ijkp][1][ngji];
                         bt_sg[nb_ij].dEE[2]+=
                             app1*dcAng[ang_jik][2][ngj]
                             -app2*dcAng[ang_ijkp][2][ngji];
                         bt_sg[nb_ik].dEE[0]+=
                             app1*dcAng[ang_jik][0][ngk]
                             +agpdpr1*disij[0][temp_ik]
                             -app3*dcAng[ang_ikkp][0][ngki];
                         bt_sg[nb_ik].dEE[1]+=
                             app1*dcAng[ang_jik][1][ngk]
                             +agpdpr1*disij[1][temp_ik]
                             -app3*dcAng[ang_ikkp][1][ngki];
                         bt_sg[nb_ik].dEE[2]+=
                             app1*dcAng[ang_jik][2][ngk]
                             +agpdpr1*disij[2][temp_ik]
                             -app3*dcAng[ang_ikkp][2][ngki];
                         bt_sg[nb_jkp].dEE[0]+=
                             app2*dcAng[ang_ijkp][0][ngjkp]
                             +agpdpr2*disij[0][temp_jkp]
                             -app4*dcAng[ang_jkpk][0][ngkpj];
                         bt_sg[nb_jkp].dEE[1]+=
                             app2*dcAng[ang_ijkp][1][ngjkp]
                             +agpdpr2*disij[1][temp_jkp]
                             -app4*dcAng[ang_jkpk][1][ngkpj];
                         bt_sg[nb_jkp].dEE[2]+=
                             app2*dcAng[ang_ijkp][2][ngjkp]
                             +agpdpr2*disij[2][temp_jkp]
                             -app4*dcAng[ang_jkpk][2][ngkpj];
                         bt_sg[nb_kkp].dEE[0]+=
                             app3*dcAng[ang_ikkp][0][ngkkp]
                             +agpdpr3*disij[0][temp_kkp]
                             -app4*dcAng[ang_jkpk][0][ngkpk];
                         bt_sg[nb_kkp].dEE[1]+=
                             app3*dcAng[ang_ikkp][1][ngkkp]
                             +agpdpr3*disij[1][temp_kkp]
                             -app4*dcAng[ang_jkpk][1][ngkpk];
                         bt_sg[nb_kkp].dEE[2]+=
                             app3*dcAng[ang_ikkp][2][ngkkp]
                             +agpdpr3*disij[2][temp_kkp]
                             -app4*dcAng[ang_jkpk][2][ngkpk];
                       }
                     }
                   }
                 }
               }
             }
           }
 
 //j is a neighbor of i and k is a neighbor of j not equal to i
 
           for(ktmp=0;ktmp<numneigh[j];ktmp++) {
             if(ktmp!=ji) {
               if(ktmp<ji) {
                 njik=ktmp*(2*numneigh[j]-ktmp-1)/2+(ji-ktmp)-1;
                 ngi=1;
                 ngk=0;
               }
               else {
                 njik=ji*(2*numneigh[j]-ji-1)/2+(ktmp-ji)-1;
                 ngi=0;
                 ngk=1;
               }
               temp_jk=BOP_index[j]+ktmp;
               if(neigh_flag[temp_jk]) {
                 k=jlist[ktmp];
                 ktype=map[type[k]]+1;
                 klist=firstneigh[k];
 
                 for(kNeij=0;kNeij<numneigh[k];kNeij++) {
                   if(x[klist[kNeij]][0]==x[j][0]) {
                     if(x[klist[kNeij]][1]==x[j][1]) {
                       if(x[klist[kNeij]][2]==x[j][2]) {
                         break;
                       }
                     }
                   }
                 }
                 sig_flag=0;
                 for(nsearch=0;nsearch<nSigBk[n];nsearch++) {
                   ncmp=itypeSigBk[n][nsearch];
                   if(x[ncmp][0]==x[k][0]) {
                     if(x[ncmp][1]==x[k][1]) {
                       if(x[ncmp][2]==x[k][2]) {
                         new1=nsearch;
                         sig_flag=1;
                         break;
                       }
                     }
                   }
                 }
                 if(sig_flag==0) {
                   nSigBk[n]=nSigBk[n]+1;
                   new1=nSigBk[n]-1;
                   itypeSigBk[n][new1]=k;
                 }
                 ang_ijk=cos_index[j]+njik;
                 nb_jk=nb_t;
                 nb_t++;
                 if(nb_t>nb_sg) {
                   new_n_tot=nb_sg+maxneigh;
                   grow_sigma(nb_sg,new_n_tot);
                   nb_sg=new_n_tot;
                 }
                 bt_sg[nb_jk].temp=temp_jk;
                 bt_sg[nb_jk].i=j;
                 bt_sg[nb_jk].j=k;
                 gmean0=sigma_g0[itype-1][jtype-1][ktype-1];
                 gmean1=sigma_g1[itype-1][jtype-1][ktype-1];
                 gmean2=sigma_g2[itype-1][jtype-1][ktype-1];
                 amean=cosAng[ang_ijk];
                 gfactor1=gmean0+gmean1*amean
                     +gmean2*amean*amean;
                 gprime1=gmean1+2.0*gmean2*amean;
                 gfactorsq=gfactor1*gfactor1;
                 gsqprime=2.0*gfactor1*gprime1;
                 rfactor1rt=betaS[temp_jk]*betaS[temp_jk];
                 rfactor1=rfactor1rt*rfactor1rt;
 
 //BB is Eq. 34 (a) or Eq. 10 (c) for the j atom
 //1st DD is Eq. 11 (c) for j atom where i & k=neighbor of j
 
                 BB=BB+gfactorsq*rfactor1rt;
                 DD=DD+gfactorsq*rfactor1;
 
 //agpdpr1 is derivative of BB  w.r.t. Beta(r_jk)
 //app1 is derivative of BB w.r.t. cos(theta_ijk)
 
                 agpdpr1=2.0*gfactorsq*betaS[temp_jk]*dBetaS[temp_jk]/rij[temp_jk];
                 app1=rfactor1rt*gsqprime;
                 bt_sg[nb_ij].dBB[0]-=
                     app1*dcAng[ang_ijk][0][ngi];
                 bt_sg[nb_ij].dBB[1]-=
                     app1*dcAng[ang_ijk][1][ngi];
                 bt_sg[nb_ij].dBB[2]-=
                     app1*dcAng[ang_ijk][2][ngi];
                 bt_sg[nb_ij].dDD[0]-=
                     app2*dcAng[ang_ijk][0][ngi];
                 bt_sg[nb_ij].dDD[1]-=
                     app2*dcAng[ang_ijk][1][ngi];
                 bt_sg[nb_ij].dDD[2]-=
                     app2*dcAng[ang_ijk][2][ngi];
                 bt_sg[nb_jk].dBB[0]+=
                     app1*dcAng[ang_ijk][0][ngk]
                     +agpdpr1*disij[0][temp_jk];
                 bt_sg[nb_jk].dBB[1]+=
                     app1*dcAng[ang_ijk][1][ngk]
                     +agpdpr1*disij[1][temp_jk];
                 bt_sg[nb_jk].dBB[2]+=
                     app1*dcAng[ang_ijk][2][ngk]
                     +agpdpr1*disij[2][temp_jk];
                 bt_sg[nb_jk].dDD[0]+=
                     app2*dcAng[ang_ijk][0][ngk]
                     +agpdpr2*disij[0][temp_jk];
                 bt_sg[nb_jk].dDD[1]+=
                     app2*dcAng[ang_ijk][1][ngk]
                     +agpdpr2*disij[1][temp_jk];
                 bt_sg[nb_jk].dDD[2]+=
                     app2*dcAng[ang_ijk][2][ngk]
                     +agpdpr2*disij[2][temp_jk];
 
 //j is a neighbor of i, k and k' prime different neighbors of j not equal to i
 
                 for(ltmp=0;ltmp<ktmp;ltmp++) {
                   if(ltmp!=ji) {
                     temp_jkp=BOP_index[j]+ltmp;
                     if(neigh_flag[temp_jkp]) {
                       kp=jlist[ltmp];
                       kptype=map[type[kp]]+1;
                       for(nsearch=0;nsearch<nSigBk[n];nsearch++) {
                         ncmp=itypeSigBk[n][nsearch];
                         if(x[ncmp][0]==x[kp][0]) {
                           if(x[ncmp][1]==x[kp][1]) {
                             if(x[ncmp][2]==x[kp][2]) {
                               new2=nsearch;
                               break;
                             }
                           }
                         }
                       }
                       if(ji<ltmp) {
                         nijkp=ji*(2*numneigh[j]-ji-1)/2+(ltmp-ji)-1;
                         ngli=0;
                         ngl=1;
                       }
                       else {
                         nijkp=ltmp*(2*numneigh[j]-ltmp-1)/2+(ji-ltmp)-1;
                         ngli=1;
                         ngl=0;
                       }
                       if(ktmp<ltmp) {
                         nkjkp=ktmp*(2*numneigh[j]-ktmp-1)/2+(ltmp-ktmp)-1;
                         ngjk=0;
                         ngjkp=1;
                       }
                       else {
                         nkjkp=ltmp*(2*numneigh[j]-ltmp-1)/2+(ktmp-ltmp)-1;
                         ngjk=1;
                         ngjkp=0;
                       }
                       ang_ijkp=cos_index[j]+nijkp;
                       ang_kjkp=cos_index[j]+nkjkp;
                       gmean0=sigma_g0[itype-1][jtype-1][kptype-1];
                       gmean1=sigma_g1[itype-1][jtype-1][kptype-1];
                       gmean2=sigma_g2[itype-1][jtype-1][kptype-1];
                       amean=cosAng[ang_ijkp];
                       gfactor2=gmean0+gmean1*amean
                         +gmean2*amean*amean;
                       gprime2=gmean1+2.0*gmean2*amean;
                       gmean0=sigma_g0[ktype-1][jtype-1][kptype-1];
                       gmean1=sigma_g1[ktype-1][jtype-1][kptype-1];
                       gmean2=sigma_g2[ktype-1][jtype-1][kptype-1];
                       amean=cosAng[ang_kjkp];
                       gfactor3=gmean0+gmean1*amean
                         +gmean2*amean*amean;
                       gprime3=gmean1+2.0*gmean2*amean;
                       gfactor=gfactor1*gfactor2*gfactor3;
                       rfactorrt=betaS[temp_jk]*betaS[temp_jkp];
                       rfactor=rfactorrt*rfactorrt;
 
 //2nd DD is Eq. 11 (c) for j atom where i , k & k'=neighbor of j
 
                       DD=DD+2.0*gfactor*rfactor;
 
 //agpdpr1 is derivative of DD  w.r.t. Beta(r_jk)
 //agpdpr2 is derivative of DD  w.r.t. Beta(r_jk')
 //app1 is derivative of DD  w.r.t. cos(theta_ijk)
 //app2 is derivative of DD  w.r.t. cos(theta_ijkp)
 //app3 is derivative of DD  w.r.t. cos(theta_kjkp)
 
                       agpdpr1=4.0*gfactor*rfactorrt*betaS[temp_jkp]
                           *dBetaS[temp_jk]/rij[temp_jk];
                           agpdpr2=4.0*gfactor*rfactorrt*betaS[temp_jk]
                           *dBetaS[temp_jkp]/rij[temp_jkp];
                       app1=2.0*rfactor*gfactor2*gfactor3*gprime1;
                       app2=2.0*rfactor*gfactor1*gfactor3*gprime2;
                       app3=2.0*rfactor*gfactor1*gfactor2*gprime3;
                       bt_sg[nb_ij].dDD[0]-=
                           app1*dcAng[ang_ijk][0][ngi]
                           +app2*dcAng[ang_ijkp][0][ngli];
                       bt_sg[nb_ij].dDD[1]-=
                           app1*dcAng[ang_ijk][1][ngi]
                           +app2*dcAng[ang_ijkp][1][ngli];
                       bt_sg[nb_ij].dDD[2]-=
                           app1*dcAng[ang_ijk][2][ngi]
                           +app2*dcAng[ang_ijkp][2][ngli];
                       bt_sg[nb_jk].dDD[0]+=
                           app1*dcAng[ang_ijk][0][ngk]
                           +app3*dcAng[ang_kjkp][0][ngjk]
                           +agpdpr1*disij[0][temp_jk];
                       bt_sg[nb_jk].dDD[1]+=
                           app1*dcAng[ang_ijk][1][ngk]
                           +app3*dcAng[ang_kjkp][1][ngjk]
                           +agpdpr1*disij[1][temp_jk];
                       bt_sg[nb_jk].dDD[2]+=
                           app1*dcAng[ang_ijk][2][ngk]
                           +app3*dcAng[ang_kjkp][2][ngjk]
                           +agpdpr1*disij[2][temp_jk];
                       bt_sg[nb_jkp].dDD[0]+=
                           app2*dcAng[ang_ijkp][0][ngl]
                           +app3*dcAng[ang_kjkp][0][ngjkp]
                           +agpdpr2*disij[0][temp_jkp];
                       bt_sg[nb_jkp].dDD[1]+=
                           app2*dcAng[ang_ijkp][1][ngl]
                           +app3*dcAng[ang_kjkp][1][ngjkp]
                           +agpdpr2*disij[1][temp_jkp];
                       bt_sg[nb_jkp].dDD[2]+=
                           app2*dcAng[ang_ijkp][2][ngl]
                           +app3*dcAng[ang_kjkp][2][ngjkp]
                           +agpdpr2*disij[2][temp_jkp];
                     }
                   }
                 }
 
 //j is a neighbor of i, k is a neighbor of j not equal to i and k'
 //is a neighbor of k not equal to j or i
 
                 for(ltmp=0;ltmp<numneigh[k];ltmp++) {
                   temp_kkp=BOP_index[k]+ltmp;
                   if(neigh_flag[temp_kkp]) {
                     kp=klist[ltmp];
                     kptype=map[type[kp]]+1;
                     same_ikp=0;
                     same_jkp=0;
                     if(x[i][0]==x[kp][0]) {
                       if(x[i][1]==x[kp][1]) {
                         if(x[i][2]==x[kp][2]) {
                           same_ikp=1;
                         }
                       }
                     }
                     if(x[j][0]==x[kp][0]) {
                       if(x[j][1]==x[kp][1]) {
                         if(x[j][2]==x[kp][2]) {
                           same_jkp=1;
                         }
                       }
                     }
                     if(!same_ikp&&!same_jkp) {
                       for(kNeij=0;kNeij<numneigh[k];kNeij++) {
                         if(x[klist[kNeij]][0]==x[j][0]) {
                           if(x[klist[kNeij]][1]==x[j][1]) {
                             if(x[klist[kNeij]][2]==x[j][2]) {
                               break;
                             }
                           }
                         }
                       }
                       if(kNeij<ltmp) {
                         njkkp=kNeij*(2*numneigh[k]-kNeij-1)/2+(ltmp-kNeij)-1;
                         nglkp=1;
                         nglj=0;
                       }
                       else {
                         njkkp=ltmp*(2*numneigh[k]-ltmp-1)/2+(kNeij-ltmp)-1;
                         nglkp=0;
                         nglj=1;
                       }
                       sig_flag=0;
                       for(nsearch=0;nsearch<nSigBk[n];nsearch++) {
                         ncmp=itypeSigBk[n][nsearch];
                         if(x[ncmp][0]==x[kp][0]) {
                           if(x[ncmp][1]==x[kp][1]) {
                             if(x[ncmp][2]==x[kp][2]) {
                               new2=nsearch;
                               sig_flag=1;
                               break;
                             }
                           }
                         }
                       }
                       if(sig_flag==0) {
                         nSigBk[n]=nSigBk[n]+1;
                         new2=nSigBk[n]-1;
                         itypeSigBk[n][new2]=kp;
                       }
                       ang_jkkp=cos_index[k]+njkkp;
                       nb_kkp=nb_t;
                       nb_t++;
                       if(nb_t>nb_sg) {
                         new_n_tot=nb_sg+maxneigh;
                         grow_sigma(nb_sg,new_n_tot);
                         nb_sg=new_n_tot;
                       }
                       bt_sg[nb_kkp].temp=temp_kkp;
                       bt_sg[nb_kkp].i=k;
                       bt_sg[nb_kkp].j=kp;
                       gmean0=sigma_g0[jtype-1][ktype-1][kptype-1];
                       gmean1=sigma_g1[jtype-1][ktype-1][kptype-1];
                       gmean2=sigma_g2[jtype-1][ktype-1][kptype-1];
                       amean=cosAng[ang_jkkp];
                       gfactor2=gmean0+gmean1*amean
                           +gmean2*amean*amean;
                       gprime2=gmean1+2.0*gmean2*amean;
                       gfactorsq2=gfactor2*gfactor2;
                       gsqprime2=2.0*gfactor2*gprime2;
                       gfactor=gfactorsq*gfactorsq2;
                       rfactorrt=betaS[temp_jk]*betaS[temp_kkp];
                       rfactor=rfactorrt*rfactorrt;
 
 //3rd DD is Eq. 11 (c) for j atom where i & k=neighbor of j & k'=neighbor of k
 
                       DD=DD+gfactor*rfactor;
 
 //agpdpr1 is derivative of DD  3rd term w.r.t. Beta(r_jk)
 //agpdpr2 is derivative of DD  3rd term w.r.t. Beta(r_kk')
 //app1 is derivative of DD  3rd term w.r.t. cos(theta_ijk)
 //app2 is derivative of DD  3rd term w.r.t. cos(theta_jkkp)
 
                       agpdpr1=2.0*gfactor*rfactorrt*betaS[temp_kkp]
                           *dBetaS[temp_jk]/rij[temp_jk];
                       agpdpr2=2.0*gfactor*rfactorrt*betaS[temp_jk]
                           *dBetaS[temp_kkp]/rij[temp_kkp];
                       app1=rfactor*gfactorsq2*gsqprime;
                       app2=rfactor*gfactorsq*gsqprime2;
                       bt_sg[nb_ij].dDD[0]-=
                           app1*dcAng[ang_ijk][0][ngi];
                       bt_sg[nb_ij].dDD[1]-=
                           app1*dcAng[ang_ijk][1][ngi];
                       bt_sg[nb_ij].dDD[2]-=
                           app1*dcAng[ang_ijk][2][ngi];
                       bt_sg[nb_jk].dDD[0]+=
                           app1*dcAng[ang_ijk][0][ngk]
                           +agpdpr1*disij[0][temp_jk]
                           -app2*dcAng[ang_jkkp][0][nglj];
                       bt_sg[nb_jk].dDD[1]+=
                           app1*dcAng[ang_ijk][1][ngk]
                           +agpdpr1*disij[1][temp_jk]
                           -app2*dcAng[ang_jkkp][1][nglj];
                       bt_sg[nb_jk].dDD[2]+=
                           app1*dcAng[ang_ijk][2][ngk]
                           +agpdpr1*disij[2][temp_jk]
                           -app2*dcAng[ang_jkkp][2][nglj];
                       bt_sg[nb_kkp].dDD[0]+=
                           app2*dcAng[ang_jkkp][0][nglkp]
                           +agpdpr2*disij[0][temp_kkp];
                       bt_sg[nb_kkp].dDD[1]+=
                           app2*dcAng[ang_jkkp][1][nglkp]
                           +agpdpr2*disij[1][temp_kkp];
                       bt_sg[nb_kkp].dDD[2]+=
                           app2*dcAng[ang_jkkp][2][nglkp]
                           +agpdpr2*disij[2][temp_kkp];
                     }
                   }
                 }
               }
             }
           }
 
           sig_flag=0;
           if(FF<=0.000001) {
             sigB[n]=0.0;
             sig_flag=1;
           }
           if(sig_flag==0) {
             if(AA<0.0)
               AA=0.0;
             if(BB<0.0)
               BB=0.0;
             if(CC<0.0)
               CC=0.0;
             if(DD<0.0)
               DD=0.0;
 
 // AA and BB are the representations of (a) Eq. 34 and (b) Eq. 9
 // for atoms i and j respectively
 
             AAC=AA+BB;
             BBC=AA*BB;
             CCC=AA*AA+BB*BB;
             DDC=CC+DD;
 
 //EEC is a modified form of (a) Eq. 33
 
             EEC=(DDC-CCC)/(AAC+2.0*small1);
             AACFF=1.0/(AAC+2.0*small1);
             for(m=0;m<nb_t;m++) {
               if((bt_sg[m].i>-1)&&(bt_sg[m].j>-1)) {
                 bt_i=bt_sg[m].i;
                 bt_j=bt_sg[m].j;
                 bt_sg[m].dAAC[0]=bt_sg[m].dAA[0]
                     +bt_sg[m].dBB[0];
                 bt_sg[m].dAAC[1]=bt_sg[m].dAA[1]
                     +bt_sg[m].dBB[1];
                 bt_sg[m].dAAC[2]=bt_sg[m].dAA[2]
                     +bt_sg[m].dBB[2];
                 bt_sg[m].dBBC[0]=bt_sg[m].dAA[0]*BB
                     +AA*bt_sg[m].dBB[0];
                 bt_sg[m].dBBC[1]=bt_sg[m].dAA[1]*BB
                     +AA*bt_sg[m].dBB[1];
                 bt_sg[m].dBBC[2]=bt_sg[m].dAA[2]*BB
                     +AA*bt_sg[m].dBB[2];
                 bt_sg[m].dCCC[0]=2.0*AA*bt_sg[m].dAA[0]
                     +2.0*BB*bt_sg[m].dBB[0];
                 bt_sg[m].dCCC[1]=2.0*AA*bt_sg[m].dAA[1]
                     +2.0*BB*bt_sg[m].dBB[1];
                 bt_sg[m].dCCC[2]=2.0*AA*bt_sg[m].dAA[2]
                     +2.0*BB*bt_sg[m].dBB[2];
                 bt_sg[m].dDDC[0]=bt_sg[m].dCC[0]
                     +bt_sg[m].dDD[0];
                 bt_sg[m].dDDC[1]=bt_sg[m].dCC[1]
                     +bt_sg[m].dDD[1];
                 bt_sg[m].dDDC[2]=bt_sg[m].dCC[2]
                     +bt_sg[m].dDD[2];
                 bt_sg[m].dEEC[0]=(bt_sg[m].dDDC[0]
                     -bt_sg[m].dCCC[0]
                     -EEC*bt_sg[m].dAAC[0])*AACFF;
                 bt_sg[m].dEEC[1]=(bt_sg[m].dDDC[1]
                     -bt_sg[m].dCCC[1]
                     -EEC*bt_sg[m].dAAC[1])*AACFF;
                 bt_sg[m].dEEC[2]=(bt_sg[m].dDDC[2]
                     -bt_sg[m].dCCC[2]
                     -EEC*bt_sg[m].dAAC[2])*AACFF;
               }
             }
             UT=EEC*FF+BBC+small3[iij];
             UT=1.0/sqrt(UT);
 
 // FFC is slightly modified form of (a) Eq. 31
 // GGC is slightly modified form of (a) Eq. 32
 // bndtmp is a slightly modified form of (a) Eq. 30 and (b) Eq. 8
 
             FFC=BBC*UT;
             GGC=EEC*UT;
             bndtmp=(FF+sigma_delta[iij]*sigma_delta[iij])
                 +sigma_c[iij]*AAC+small4;
             UTcom=-0.5*UT*UT*UT;
             for(m=0;m<nb_t;m++) {
               if((bt_sg[m].i>-1)&&(bt_sg[m].j>-1)) {
                 bt_sg[m].dUT[0]=UTcom*(bt_sg[m].dEEC[0]*FF
                     +EEC*bt_sg[m].dFF[0]+bt_sg[m].dBBC[0]);
                 bt_sg[m].dUT[1]=UTcom*(bt_sg[m].dEEC[1]*FF
                     +EEC*bt_sg[m].dFF[1]+bt_sg[m].dBBC[1]);
                 bt_sg[m].dUT[2]=UTcom*(bt_sg[m].dEEC[2]*FF
                     +EEC*bt_sg[m].dFF[2]+bt_sg[m].dBBC[2]);
                 bt_sg[m].dFFC[0]=bt_sg[m].dBBC[0]*UT
                     +BBC*bt_sg[m].dUT[0];
                 bt_sg[m].dFFC[1]=bt_sg[m].dBBC[1]*UT
                     +BBC*bt_sg[m].dUT[1];
                 bt_sg[m].dFFC[2]=bt_sg[m].dBBC[2]*UT
                     +BBC*bt_sg[m].dUT[2];
                 bt_sg[m].dGGC[0]=bt_sg[m].dEEC[0]*UT
                     +EEC*bt_sg[m].dUT[0];
                 bt_sg[m].dGGC[1]=bt_sg[m].dEEC[1]*UT
                     +EEC*bt_sg[m].dUT[1];
                 bt_sg[m].dGGC[2]=bt_sg[m].dEEC[2]*UT
                     +EEC*bt_sg[m].dUT[2];
               }
             }
             psign=1.0;
             if(1.0+sigma_a[iij]*GGC<0.0)
               psign=-1.0;
             bndtmp0=1.0/sqrt(bndtmp);
             sigB1[n]=psign*betaS[temp_ij]*(1.0+sigma_a[iij]*GGC)*bndtmp0;
             bndtmp=-0.5*bndtmp0*bndtmp0*bndtmp0;
             bndtmp1=psign*(1.0+sigma_a[iij]*GGC)*bndtmp0+psign*betaS[temp_ij]
                 *(1.0+sigma_a[iij]*GGC)*bndtmp*2.0*betaS[temp_ij]*(1.0
                 +sigma_a[iij]*GGC)*(1.0+sigma_a[iij]*GGC);
             bndtmp1=bndtmp1*dBetaS[temp_ij]/rij[temp_ij];
             bndtmp2=psign*betaS[temp_ij]*(1.0+sigma_a[iij]*GGC)*bndtmp*sigma_c[iij];
             bndtmp3=psign*betaS[temp_ij]*(1.0+sigma_a[iij]*GGC)
                 *bndtmp*sigma_c[iij]*sigma_a[iij];
             bndtmp4=psign*betaS[temp_ij]*(1.0+sigma_a[iij]*GGC)
                 *bndtmp*sigma_c[iij]*sigma_a[iij]*(2.0+GGC);
             bndtmp5=sigma_a[iij]*psign*betaS[temp_ij]*bndtmp0
                 +psign*betaS[temp_ij]*(1.0+sigma_a[iij]*GGC)*bndtmp
                 *(2.0*(FF+sigma_delta[iij]*sigma_delta[iij])*(1.0
                 +sigma_a[iij]*GGC)*sigma_a[iij]+sigma_c[iij]*sigma_a[iij]*FFC);
             setting=0;
             for(m=0;m<nb_t;m++) {
               if((bt_sg[m].i>-1)&&(bt_sg[m].j>-1)) {
                 temp_kk=bt_sg[m].temp;
                 if(temp_kk==temp_ij&&setting==0) {
                   bt_sg[m].dSigB1[0]=bndtmp1*disij[0][temp_ij]
                       +(bndtmp2*bt_sg[m].dAAC[0]
                       +bndtmp3*bt_sg[m].dEE[0]
                       +bndtmp4*bt_sg[m].dFFC[0]
                       +bndtmp5*bt_sg[m].dGGC[0]);
                   bt_sg[m].dSigB1[1]=bndtmp1*disij[1][temp_ij]
                       +(bndtmp2*bt_sg[m].dAAC[1]
                       +bndtmp3*bt_sg[m].dEE[1]
                       +bndtmp4*bt_sg[m].dFFC[1]
                       +bndtmp5*bt_sg[m].dGGC[1]);
                   bt_sg[m].dSigB1[2]=bndtmp1*disij[2][temp_ij]
                       +(bndtmp2*bt_sg[m].dAAC[2]
                       +bndtmp3*bt_sg[m].dEE[2]
                       +bndtmp4*bt_sg[m].dFFC[2]
                       +bndtmp5*bt_sg[m].dGGC[2]);
                   setting=1;
                 }
                 else if(temp_kk==temp_ji&&setting==0) {
                   bt_sg[m].dSigB1[0]=-bndtmp1*disij[0][temp_ij]
                       +(bndtmp2*bt_sg[m].dAAC[0]
                       +bndtmp3*bt_sg[m].dEE[0]
                       +bndtmp4*bt_sg[m].dFFC[0]
                       +bndtmp5*bt_sg[m].dGGC[0]);
                   bt_sg[m].dSigB1[1]=-bndtmp1*disij[1][temp_ij]
                       +(bndtmp2*bt_sg[m].dAAC[1]
                       +bndtmp3*bt_sg[m].dEE[1]
                       +bndtmp4*bt_sg[m].dFFC[1]
                       +bndtmp5*bt_sg[m].dGGC[1]);
                   bt_sg[m].dSigB1[2]=-bndtmp1*disij[2][temp_ij]
                       +(bndtmp2*bt_sg[m].dAAC[2]
                       +bndtmp3*bt_sg[m].dEE[2]
                       +bndtmp4*bt_sg[m].dFFC[2]
                       +bndtmp5*bt_sg[m].dGGC[2]);
                   setting=1;
                 }
                 else {
                   bt_sg[m].dSigB1[0]=(bndtmp2*bt_sg[m].dAAC[0]
                       +bndtmp3*bt_sg[m].dEE[0]
                       +bndtmp4*bt_sg[m].dFFC[0]
                       +bndtmp5*bt_sg[m].dGGC[0]);
                   bt_sg[m].dSigB1[1]=(bndtmp2*bt_sg[m].dAAC[1]
                       +bndtmp3*bt_sg[m].dEE[1]
                       +bndtmp4*bt_sg[m].dFFC[1]
                       +bndtmp5*bt_sg[m].dGGC[1]);
                   bt_sg[m].dSigB1[2]=(bndtmp2*bt_sg[m].dAAC[2]
                       +bndtmp3*bt_sg[m].dEE[2]
                       +bndtmp4*bt_sg[m].dFFC[2]
                       +bndtmp5*bt_sg[m].dGGC[2]);
                 }
               }
             }
 
 //This loop is to ensure there is not an error for atoms with no neighbors (deposition)
 
             if(nb_t==0) {
               if(j>i) {
                 bt_sg[0].dSigB1[0]=bndtmp1*disij[0][temp_ij];
                 bt_sg[0].dSigB1[1]=bndtmp1*disij[1][temp_ij];
                 bt_sg[0].dSigB1[2]=bndtmp1*disij[2][temp_ij];
               }
               else {
                 bt_sg[0].dSigB1[0]=-bndtmp1*disij[0][temp_ij];
                 bt_sg[0].dSigB1[1]=-bndtmp1*disij[1][temp_ij];
                 bt_sg[0].dSigB1[2]=-bndtmp1*disij[2][temp_ij];
               }
               for(pp=0;pp<3;pp++) {
                 bt_sg[0].dAA[pp]=0.0;
                 bt_sg[0].dBB[pp]=0.0;
                 bt_sg[0].dCC[pp]=0.0;
                 bt_sg[0].dDD[pp]=0.0;
                 bt_sg[0].dEE[pp]=0.0;
                 bt_sg[0].dEE1[pp]=0.0;
                 bt_sg[0].dFF[pp]=0.0;
                 bt_sg[0].dAAC[pp]=0.0;
                 bt_sg[0].dBBC[pp]=0.0;
                 bt_sg[0].dCCC[pp]=0.0;
                 bt_sg[0].dDDC[pp]=0.0;
                 bt_sg[0].dEEC[pp]=0.0;
                 bt_sg[0].dFFC[pp]=0.0;
                 bt_sg[0].dGGC[pp]=0.0;
                 bt_sg[0].dUT[pp]=0.0;
                 bt_sg[0].dSigB1[pp]=0.0;
                 bt_sg[0].dSigB[pp]=0.0;
               }
               bt_sg[0].i=i;
               bt_sg[0].j=j;
               bt_sg[0].temp=temp_ij;
               nb_t++;
               if(nb_t>nb_sg) {
                 new_n_tot=nb_sg+maxneigh;
                 grow_sigma(nb_sg,new_n_tot);
                 nb_sg=new_n_tot;
               }
             }
             ps=sigB1[n]*rdBO+1.0;
             ks=(int)ps;
             if(nBOt-1<ks)
               ks=nBOt-1;
             ps=ps-ks;
             if(ps>1.0)
               ps=1.0;
             dsigB1=((FsigBO3[iij][ks-1]*ps+FsigBO2[iij][ks-1])*ps
                 +FsigBO1[iij][ks-1])*ps+FsigBO[iij][ks-1];
             dsigB2=(FsigBO6[iij][ks-1]*ps+FsigBO5[iij][ks-1])*ps+FsigBO4[iij][ks-1];
             part0=(FF+0.5*AAC+small5);
             part1=(sigma_f[iij]-0.5)*sigma_k[iij];
             part2=1.0-part1*EE1/part0;
             part3=dsigB1*part1/part0;
             part4=part3/part0*EE1;
 
 // sigB is the final expression for (a) Eq. 6 and (b) Eq. 11
 
             sigB[n]=dsigB1*part2;
             pp1=2.0*betaS[temp_ij];
             for(m=0;m<nb_t;m++) {
               if((bt_sg[m].i>-1)&&(bt_sg[m].j>-1)) {
                 temp_kk=bt_sg[m].temp;
                 bt_ij=bt_sg[m].temp;
                 bt_i=bt_sg[m].i;
                 bt_j=bt_sg[m].j;
                 for(pp=0;pp<3;pp++) {
                   bt_sg[m].dSigB[pp]=dsigB2*part2*bt_sg[m].dSigB1[pp]
                       -part3*bt_sg[m].dEE1[pp]
                       +part4*(bt_sg[m].dFF[pp]
                       +0.5*bt_sg[m].dAAC[pp]);
                 }
                 for(pp=0;pp<3;pp++) {
                   ftmp[pp]=pp1*bt_sg[m].dSigB[pp];
                   f[bt_i][pp]-=ftmp[pp];
                   f[bt_j][pp]+=ftmp[pp];
                 }
                 if(evflag) {
                   ev_tally_xyz(bt_i,bt_j,nlocal,newton_pair,0.0,0.0,ftmp[0],ftmp[1]
                       ,ftmp[2],disij[0][bt_ij],disij[1][bt_ij],disij[2][bt_ij]);
                 }
               }
             }
           }
           n++;
         }
       }
     }
   }
   if(allocate_sigma)
     destroy_sigma();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBOP::sigmaBo_noa()
 {
   int nb_t,new_n_tot;
   int n,i,j,k,kp,m,pp;
   int iij,ji,ki;
   int itmp,jtmp,ktmp,ltmp,mtmp;
   tagint i_tag,j_tag;
   int ngi,ngj,ngk;
   int ngji,ngjk,nikj,ngki,ngkj;
   int njik,nijk,nikkp,nkp,nijkp;
   int nkikp,njikp,nk0,nkjkp,njkkp;
   int jNeik,kNeii,kNeij;
   int new1,new2,nlocal,nsearch;
   int inum,*ilist,*iilist,*jlist,*klist;
   int **firstneigh,*numneigh;
   int temp_ji,temp_ikp,temp_kkp;
   int temp_ij,temp_ik,temp_jkp,temp_kk,temp_jk;
   int ang_ijkp,ang_ikkp,ang_kjkp;
   int ang_ijk,ang_ikj,ang_jikp,ang_jkkp;
   int ang_jik,ang_kikp;
   int nb_ij,nb_ik,nb_jk;
   int sig_flag,setting,ncmp,ks;
   int itype,jtype,ktype,kptype;
   int bt_i,bt_j,bt_ij;
   int kp_index,same_ikp,same_jkp;
   double AA,BB,CC,DD,EE1,FF;
   double AAC,BBC,CCC,DDC,EEC;
   double UT,bndtmp;
   double amean,gmean0,gmean1,gmean2,ps;
   double gfactor1,gprime1,gsqprime;
   double gfactorsq,gfactor2,gprime2;
   double gfactorsq2;
   double gfactor3,gprime3,gfactor,rfactor;
   double rfactorrt,rfactor1rt,rfactor1;
   double rcm1,rcm2,gcm1,gcm2,gcm3;
   double agpdpr1,app1;
   double dsigB1,dsigB2;
   double part0,part1,part2,part3,part4;
   double psign,bndtmp0,pp1,bndtmp1,bndtmp2;
   double ftmp[3];
   double **x = atom->x;
   double **f = atom->f;
   tagint *tag = atom->tag;
   int newton_pair = force->newton_pair;
   int *type = atom->type;
 
   nlocal = atom->nlocal;
   firstneigh = list->firstneigh;
   numneigh = list->numneigh;
   inum = list->inum;
   ilist = list->ilist;
   n=0;
 
 //loop over all local atoms
 
   if(nb_sg>16) {
     nb_sg=16;
   }
   if(nb_sg==0) {
     nb_sg=(maxneigh)*(maxneigh/2);
   }
   if(allocate_sigma) {
     destroy_sigma();
   }
   create_sigma(nb_sg);
   for(itmp=0;itmp<inum;itmp++) {
     i = ilist[itmp];
     i_tag=tag[i];
     itype = map[type[i]]+1;
 
 //j is loop over all neighbors of i
 
     for(jtmp=0;jtmp<numneigh[i];jtmp++) {
       temp_ij=BOP_index[i]+jtmp;
       if(neigh_flag[temp_ij]) {
         for(m=0;m<nb_sg;m++) {
           for(pp=0;pp<3;pp++) {
             bt_sg[m].dAA[pp]=0.0;
             bt_sg[m].dBB[pp]=0.0;
             bt_sg[m].dEE1[pp]=0.0;
             bt_sg[m].dFF[pp]=0.0;
             bt_sg[m].dAAC[pp]=0.0;
             bt_sg[m].dSigB1[pp]=0.0;
             bt_sg[m].dSigB[pp]=0.0;
           }
           bt_sg[m].i=-1;
           bt_sg[m].j=-1;
         }
         nb_t=0;
         iilist=firstneigh[i];
         j=iilist[jtmp];
         jlist=firstneigh[j];
         for(ki=0;ki<numneigh[j];ki++) {
           if(x[jlist[ki]][0]==x[i][0]) {
             if(x[jlist[ki]][1]==x[i][1]) {
               if(x[jlist[ki]][2]==x[i][2]) {
                 break;
               }
             }
           }
         }
         j_tag=tag[j];
         jtype = map[type[j]]+1;
         nb_ij=nb_t;
         nb_t++;
         if(nb_t>nb_sg) {
           new_n_tot=nb_sg+maxneigh;
           grow_sigma(nb_sg,new_n_tot);
           nb_sg=new_n_tot;
         }
         bt_sg[nb_ij].temp=temp_ij;
         bt_sg[nb_ij].i=i;
         bt_sg[nb_ij].j=j;
         if(j_tag>=i_tag) {
           if(itype==jtype)
             iij=itype-1;
           else if(itype<jtype)
             iij=itype*bop_types-itype*(itype+1)/2+jtype-1;
           else
             iij=jtype*bop_types-jtype*(jtype+1)/2+itype-1;
           for(ji=0;ji<numneigh[j];ji++) {
             temp_ji=BOP_index[j]+ji;
             if(x[jlist[ji]][0]==x[i][0]) {
               if(x[jlist[ji]][1]==x[i][1]) {
                 if(x[jlist[ji]][2]==x[i][2]) {
                   break;
                 }
               }
             }
           }
           nSigBk[n]=0;
 
 //AA-EE1 are the components making up Eq. 30 (a)
 
           AA=0.0;
           BB=0.0;
           CC=0.0;
           DD=0.0;
           EE1=0.0;
 
 //FF is the Beta_sigma^2 term
 
           FF=betaS[temp_ij]*betaS[temp_ij];
 
 //agpdpr1 is derivative of FF w.r.t. r_ij
 
           agpdpr1=2.0*betaS[temp_ij]*dBetaS[temp_ij]/rij[temp_ij];
 
 //dXX derivatives are taken with respect to all pairs contributing to the energy
 //nb_ij is derivative w.r.t. ij pair
 
           bt_sg[nb_ij].dFF[0]=agpdpr1*disij[0][temp_ij];
           bt_sg[nb_ij].dFF[1]=agpdpr1*disij[1][temp_ij];
           bt_sg[nb_ij].dFF[2]=agpdpr1*disij[2][temp_ij];
 
 //k is loop over all neighbors of i again with j neighbor of i
           for(ktmp=0;ktmp<numneigh[i];ktmp++) {
             temp_ik=BOP_index[i]+ktmp;
             if(neigh_flag[temp_ik]) {
               if(ktmp!=jtmp) {
                 if(jtmp<ktmp) {
                   njik=jtmp*(2*numneigh[i]-jtmp-1)/2+(ktmp-jtmp)-1;
                   ngj=0;
                   ngk=1;
                 }
                 else {
                   njik=ktmp*(2*numneigh[i]-ktmp-1)/2+(jtmp-ktmp)-1;
                   ngj=1;
                   ngk=0;
                 }
                 k=iilist[ktmp];
                 ktype = map[type[k]]+1;
 
 //find neighbor of k that is equal to i
 
                 klist=firstneigh[k];
                 for(kNeii=0;kNeii<numneigh[k];kNeii++) {
                   if(x[klist[kNeii]][0]==x[i][0]) {
                     if(x[klist[kNeii]][1]==x[i][1]) {
                       if(x[klist[kNeii]][2]==x[i][2]) {
                         break;
                       }
                     }
                   }
                 }
 
 //find neighbor of i that is equal to k
 
                 for(jNeik=0;jNeik<numneigh[j];jNeik++) {
                   temp_jk=BOP_index[j]+jNeik;
                   if(x[jlist[jNeik]][0]==x[k][0]) {
                     if(x[jlist[jNeik]][1]==x[k][1]) {
                       if(x[jlist[jNeik]][2]==x[k][2]) {
                         break;
                       }
                     }
                   }
                 }
 
 //find neighbor of k that is equal to j
 
                 for(kNeij=0;kNeij<numneigh[k];kNeij++) {
                   if(x[klist[kNeij]][0]==x[j][0]) {
                     if(x[klist[kNeij]][1]==x[j][1]) {
                       if(x[klist[kNeij]][2]==x[j][2]) {
                         break;
                       }
                     }
                   }
                 }
                 sig_flag=0;
                 for(nsearch=0;nsearch<nSigBk[n];nsearch++) {
                   ncmp=itypeSigBk[n][nsearch];
                   if(x[ncmp][0]==x[k][0]) {
                     if(x[ncmp][1]==x[k][1]) {
                       if(x[ncmp][2]==x[k][2]) {
                         nk0=nsearch;
                         sig_flag=1;
                         break;
                       }
                     }
                   }
                 }
                 if(sig_flag==0) {
                   nSigBk[n]=nSigBk[n]+1;
                   nk0=nSigBk[n]-1;
                   itypeSigBk[n][nk0]=k;
                 }
                 nb_ik=nb_t;
                 nb_t++;
                 if(nb_t>nb_sg) {
                   new_n_tot=nb_sg+maxneigh;
                   grow_sigma(nb_sg,new_n_tot);
                   nb_sg=new_n_tot;
                 }
                 bt_sg[nb_ik].temp=temp_ik;
                 bt_sg[nb_ik].i=i;
                 bt_sg[nb_ik].j=k;
                 nb_jk=nb_t;
                 nb_t++;
                 if(nb_t>nb_sg) {
                   new_n_tot=nb_sg+maxneigh;
                   grow_sigma(nb_sg,new_n_tot);
                   nb_sg=new_n_tot;
                 }
                 bt_sg[nb_jk].temp=temp_jk;
                 bt_sg[nb_jk].i=j;
                 bt_sg[nb_jk].j=k;
                 ang_jik=cos_index[i]+njik;
                 if(ang_jik>=cos_total) {
                   error->one(FLERR,"Too many atom triplets for pair bop");
                 }
                 gmean0=sigma_g0[jtype-1][itype-1][ktype-1];
                 gmean1=sigma_g1[jtype-1][itype-1][ktype-1];
                 gmean2=sigma_g2[jtype-1][itype-1][ktype-1];
                 amean=cosAng[ang_jik];
                 gfactor1=gmean0+gmean1*amean
                     +gmean2*amean*amean;
                 gfactorsq=gfactor1*gfactor1;
                 gprime1=gmean1+2.0*gmean2*amean;
                 gsqprime=2.0*gfactor1*gprime1;
 
 //AA is Eq. 34 (a) or Eq. 10 (c) for the i atom
 //1st CC is Eq. 11 (c) for i atom where j & k=neighbor of i
 
                 AA=AA+gfactorsq*betaS[temp_ik]*betaS[temp_ik];
                 CC=CC+gfactorsq*betaS[temp_ik]*betaS[temp_ik]*betaS[temp_ik]*betaS[temp_ik];
 //agpdpr1 is derivative of AA w.r.t. Beta(rik)
 //agpdpr2 is derivative of CC 1st term w.r.t. Beta(rik)
 //app1 is derivative of AA w.r.t. cos(theta_jik)
 //app2 is derivative of CC 1st term w.r.t. cos(theta_jik)
 
                 agpdpr1=2.0*gfactorsq*betaS[temp_ik]*dBetaS[temp_ik]/rij[temp_ik];
                 app1=betaS[temp_ik]*betaS[temp_ik]*gsqprime;
                 bt_sg[nb_ij].dAA[0]+=
                     app1*dcAng[ang_jik][0][ngj];
                 bt_sg[nb_ij].dAA[1]+=
                     app1*dcAng[ang_jik][1][ngj];
                 bt_sg[nb_ij].dAA[2]+=
                     app1*dcAng[ang_jik][2][ngj];
                 bt_sg[nb_ik].dAA[0]+=
                     app1*dcAng[ang_jik][0][ngk]
                     +agpdpr1*disij[0][temp_ik];
                 bt_sg[nb_ik].dAA[1]+=
                     app1*dcAng[ang_jik][1][ngk]
                     +agpdpr1*disij[1][temp_ik];
                 bt_sg[nb_ik].dAA[2]+=
                     app1*dcAng[ang_jik][2][ngk]
                     +agpdpr1*disij[2][temp_ik];
 
 //k' is loop over neighbors all neighbors of j with k a neighbor
 //of i and j a neighbor of i and determine which k' is k
 
                 kp_index=0;
                 for(ltmp=0;ltmp<numneigh[j];ltmp++) {
                   temp_jkp=BOP_index[j]+ltmp;
                   kp=jlist[ltmp];
                   if(x[kp][0]==x[k][0]) {
                     if(x[kp][1]==x[k][1]) {
                       if(x[kp][2]==x[k][2]) {
                         kp_index=1;
                         break;
                       }
                     }
                   }
                 }
                 if(kp_index) {
 
 //loop over neighbors of k
 
                   for(mtmp=0;mtmp<numneigh[k];mtmp++) {
                     kp=klist[mtmp];
                     if(x[kp][0]==x[j][0]) {
                       if(x[kp][1]==x[j][1]) {
                         if(x[kp][2]==x[j][2]) {
                           break;
                         }
                       }
                     }
                   }
                   if(ki<ltmp) {
                     nijk=ki*(2*numneigh[j]-ki-1)/2+(ltmp-ki)-1;
                     ngji=0;
                     ngjk=1;
                   }
                   else {
                     nijk=ltmp*(2*numneigh[j]-ltmp-1)/2+(ki-ltmp)-1;
                     ngji=1;
                     ngjk=0;
                   }
                   if(kNeii<mtmp) {
                     nikj=kNeii*(2*numneigh[k]-kNeii-1)/2+(mtmp-kNeii)-1;
                     ngki=0;
                     ngkj=1;
                   }
                   else {
                     nikj=mtmp*(2*numneigh[k]-mtmp-1)/2+(kNeii-mtmp)-1;
                     ngki=1;
                     ngkj=0;
                   }
                   ang_ijk=cos_index[j]+nijk;
                   if(ang_ijk>=cos_total) {
                     error->one(FLERR,"Too many atom triplets for pair bop");
                   }
                   gmean0=sigma_g0[itype-1][jtype-1][ktype-1];
                   gmean1=sigma_g1[itype-1][jtype-1][ktype-1];
                   gmean2=sigma_g2[itype-1][jtype-1][ktype-1];
                   amean=cosAng[ang_ijk];
                   gfactor2=gmean0+gmean1*amean
                       +gmean2*amean*amean;
                   gprime2=gmean1+2.0*gmean2*amean;
                   gmean0=sigma_g0[itype-1][ktype-1][jtype-1];
                   gmean1=sigma_g1[itype-1][ktype-1][jtype-1];
                   gmean2=sigma_g2[itype-1][ktype-1][jtype-1];
                   ang_ikj=cos_index[k]+nikj;
                   if(ang_ikj>=cos_total) {
                     error->one(FLERR,"Too many atom triplets for pair bop");
                   }
                   amean=cosAng[ang_ikj];
                   gfactor3=gmean0+gmean1*amean
                       +gmean2*amean*amean;
                   gprime3=gmean1+2.0*gmean2*amean;
                   gfactor=gfactor1*gfactor2*gfactor3;
                   rfactor=betaS[temp_ik]*betaS[temp_jkp];
 
 //EE1 is (b) Eq. 12
 
                   EE1=EE1+gfactor*rfactor;
 
 //rcm2 is derivative of EE1 w.r.t Beta(r_jk')
 //gcm1 is derivative of EE1 w.r.t cos(theta_jik)
 //gcm2 is derivative of EE1 w.r.t cos(theta_ijk)
 //gcm3 is derivative of EE1 w.r.t cos(theta_ikj)
 
                   rcm1=gfactor*betaS[temp_jkp]*dBetaS[temp_ik]/rij[temp_ik];
                   rcm2=gfactor*betaS[temp_ik]*dBetaS[temp_jkp]/rij[temp_jkp];
                   gcm1=rfactor*gprime1*gfactor2*gfactor3;
                   gcm2=rfactor*gfactor1*gprime2*gfactor3;
                   gcm3=rfactor*gfactor1*gfactor2*gprime3;
                   bt_sg[nb_ij].dEE1[0]+=
                       gcm1*dcAng[ang_jik][0][ngj]
                       -gcm2*dcAng[ang_ijk][0][ngji];
                   bt_sg[nb_ij].dEE1[1]+=
                       gcm1*dcAng[ang_jik][1][ngj]
                       -gcm2*dcAng[ang_ijk][1][ngji];
                   bt_sg[nb_ij].dEE1[2]+=
                       gcm1*dcAng[ang_jik][2][ngj]
                       -gcm2*dcAng[ang_ijk][2][ngji];
                   bt_sg[nb_ik].dEE1[0]+=
                       gcm1*dcAng[ang_jik][0][ngk]
                       +rcm1*disij[0][temp_ik]
                       -gcm3*dcAng[ang_ikj][0][ngki];
                   bt_sg[nb_ik].dEE1[1]+=
                       gcm1*dcAng[ang_jik][1][ngk]
                       +rcm1*disij[1][temp_ik]
                       -gcm3*dcAng[ang_ikj][1][ngki];
                   bt_sg[nb_ik].dEE1[2]+=
                       gcm1*dcAng[ang_jik][2][ngk]
                       +rcm1*disij[2][temp_ik]
                       -gcm3*dcAng[ang_ikj][2][ngki];
                   bt_sg[nb_jk].dEE1[0]+=
                       gcm2*dcAng[ang_ijk][0][ngjk]
                       +rcm2*disij[0][temp_jkp]
                       -gcm3*dcAng[ang_ikj][0][ngkj];
                   bt_sg[nb_jk].dEE1[1]+=
                       gcm2*dcAng[ang_ijk][1][ngjk]
                       +rcm2*disij[1][temp_jkp]
                       -gcm3*dcAng[ang_ikj][1][ngkj];
                   bt_sg[nb_jk].dEE1[2]+=
                       gcm2*dcAng[ang_ijk][2][ngjk]
                       +rcm2*disij[2][temp_jkp]
                       -gcm3*dcAng[ang_ikj][2][ngkj];
                 }
 
 // k and k' and j are all different neighbors of i
 
                 for(ltmp=0;ltmp<ktmp;ltmp++) {
                   if(ltmp!=jtmp) {
                     temp_ikp=BOP_index[i]+ltmp;
                     if(neigh_flag[temp_ikp]) {
                       kp=iilist[ltmp];
                       kptype = map[type[kp]]+1;
                       for(nsearch=0;nsearch<nSigBk[n];nsearch++) {
                         ncmp=itypeSigBk[n][nsearch];
                         if(x[ncmp][0]==x[kp][0]) {
                           if(x[ncmp][1]==x[kp][1]) {
                             if(x[ncmp][2]==x[kp][2]) {
                               break;
                             }
                           }
                         }
                       }
                       if(jtmp<ltmp) {
                         njikp=jtmp*(2*numneigh[i]-jtmp-1)/2+(ltmp-jtmp)-1;
                       } else {
                         njikp=ltmp*(2*numneigh[i]-ltmp-1)/2+(jtmp-ltmp)-1;
                       }
                       if(ktmp<ltmp) {
                         nkikp=ktmp*(2*numneigh[i]-ktmp-1)/2+(ltmp-ktmp)-1;
                       } else {
                         nkikp=ltmp*(2*numneigh[i]-ltmp-1)/2+(ktmp-ltmp)-1;
                       }
                       ang_jikp=cos_index[i]+njikp;
                       if(ang_jikp>=cos_total) {
                         error->one(FLERR,"Too many atom triplets for pair bop");
                       }
                       gmean0=sigma_g0[jtype-1][itype-1][kptype-1];
                       gmean1=sigma_g1[jtype-1][itype-1][kptype-1];
                       gmean2=sigma_g2[jtype-1][itype-1][kptype-1];
                       amean=cosAng[ang_jikp];
                       gfactor2=gmean0+gmean1*amean
                           +gmean2*amean*amean;
                       gprime2=gmean1+2.0*gmean2*amean;
                       gmean0=sigma_g0[ktype-1][itype-1][kptype-1];
                       gmean1=sigma_g1[ktype-1][itype-1][kptype-1];
                       gmean2=sigma_g2[ktype-1][itype-1][kptype-1];
                       ang_kikp=cos_index[i]+nkikp;
                       if(ang_kikp>=cos_total) {
                         error->one(FLERR,"Too many atom triplets for pair bop");
                       }
                       amean=cosAng[ang_kikp];
                       gfactor3=gmean0+gmean1*amean
                           +gmean2*amean*amean;
                       gprime3=gmean1+2.0*gmean2*amean;
                       gfactor=gfactor1*gfactor2*gfactor3;
                       rfactorrt=betaS[temp_ik]*betaS[temp_ikp];
                       rfactor=rfactorrt*rfactorrt;
 
 //2nd CC is second term of Eq. 11 (c) for i atom where j , k & k' =neighbor of i
 
                       CC=CC+2.0*gfactor*rfactor;
                     }
                   }
                 }
 
 // j and k are different neighbors of i and k' is a neighbor k not equal to i
 
                 for(ltmp=0;ltmp<numneigh[k];ltmp++) {
                   temp_kkp=BOP_index[k]+ltmp;
                   if(neigh_flag[temp_kkp]) {
                     kp=klist[ltmp];;
                     kptype = map[type[kp]]+1;
                     same_ikp=0;
                     same_jkp=0;
                     if(x[i][0]==x[kp][0]) {
                       if(x[i][1]==x[kp][1]) {
                         if(x[i][2]==x[kp][2]) {
                           same_ikp=1;
                         }
                       }
                     }
                     if(x[j][0]==x[kp][0]) {
                       if(x[j][1]==x[kp][1]) {
                         if(x[j][2]==x[kp][2]) {
                           same_jkp=1;
                         }
                       }
                     }
                     if(!same_ikp&&!same_jkp) {
                       if(kNeii<ltmp) {
                         nikkp=kNeii*(2*numneigh[k]-kNeii-1)/2+(ltmp-kNeii)-1;
                       } else {
                         nikkp=ltmp*(2*numneigh[k]-ltmp-1)/2+(kNeii-ltmp)-1;
                       }
                       sig_flag=0;
                       for(nsearch=0;nsearch<nSigBk[n];nsearch++) {
                         ncmp=itypeSigBk[n][nsearch];
                         if(x[ncmp][0]==x[kp][0]) {
                           if(x[ncmp][1]==x[kp][1]) {
                             if(x[ncmp][2]==x[kp][2]) {
                               sig_flag=1;
                               nkp=nsearch;
                               break;
                             }
                           }
                         }
                       }
                       if(sig_flag==0) {
                         nSigBk[n]=nSigBk[n]+1;
                         nkp=nSigBk[n]-1;
                         itypeSigBk[n][nkp]=kp;
                       }
                       ang_ikkp=cos_index[k]+nikkp;
                       if(ang_ikkp>=cos_total) {
                         error->one(FLERR,"Too many atom triplets for pair bop");
                       }
                       gmean0=sigma_g0[itype-1][ktype-1][kptype-1];
                       gmean1=sigma_g1[itype-1][ktype-1][kptype-1];
                       gmean2=sigma_g2[itype-1][ktype-1][kptype-1];
                       amean=cosAng[ang_ikkp];
                       gfactor2=gmean0+gmean1*amean
                           +gmean2*amean*amean;
                       gprime2=gmean1+2.0*gmean2*amean;
                       gfactorsq2=gfactor2*gfactor2;
                       gfactor=gfactorsq*gfactorsq2;
                       rfactorrt=betaS[temp_ik]*betaS[temp_kkp];
                       rfactor=rfactorrt*rfactorrt;
 
 //3rd CC is third term of Eq. 11 (c) for i atom
 //where j , k =neighbor of i & k' =neighbor of k
 
                       CC=CC+gfactor*rfactor;
                     }
                   }
                 }
               }
             }
           }
 
 //j is a neighbor of i and k is a neighbor of j not equal to i
 
           for(ktmp=0;ktmp<numneigh[j];ktmp++) {
             if(ktmp!=ji) {
               if(ktmp<ji) {
                 njik=ktmp*(2*numneigh[j]-ktmp-1)/2+(ji-ktmp)-1;
                 ngi=1;
                 ngk=0;
               }
               else {
                 njik=ji*(2*numneigh[j]-ji-1)/2+(ktmp-ji)-1;
                 ngi=0;
                 ngk=1;
               }
               temp_jk=BOP_index[j]+ktmp;
               if(neigh_flag[temp_jk]) {
                 k=jlist[ktmp];
                 ktype=map[type[k]]+1;
                 klist=firstneigh[k];
 
                 for(kNeij=0;kNeij<numneigh[k];kNeij++) {
                   if(x[klist[kNeij]][0]==x[j][0]) {
                     if(x[klist[kNeij]][1]==x[j][1]) {
                       if(x[klist[kNeij]][2]==x[j][2]) {
                         break;
                       }
                     }
                   }
                 }
                 sig_flag=0;
                 for(nsearch=0;nsearch<nSigBk[n];nsearch++) {
                   ncmp=itypeSigBk[n][nsearch];
                   if(x[ncmp][0]==x[k][0]) {
                     if(x[ncmp][1]==x[k][1]) {
                       if(x[ncmp][2]==x[k][2]) {
                         new1=nsearch;
                         sig_flag=1;
                         break;
                       }
                     }
                   }
                 }
                 if(sig_flag==0) {
                   nSigBk[n]=nSigBk[n]+1;
                   new1=nSigBk[n]-1;
                   itypeSigBk[n][new1]=k;
                 }
                 ang_ijk=cos_index[j]+njik;
                 if(ang_ijk>=cos_total) {
                   error->one(FLERR,"Too many atom triplets for pair bop");
                 }
                 nb_jk=nb_t;
                 nb_t++;
                 if(nb_t>nb_sg) {
                   new_n_tot=nb_sg+maxneigh;
                   grow_sigma(nb_sg,new_n_tot);
                   nb_sg=new_n_tot;
                 }
                 bt_sg[nb_jk].temp=temp_jk;
                 bt_sg[nb_jk].i=j;
                 bt_sg[nb_jk].j=k;
                 gmean0=sigma_g0[itype-1][jtype-1][ktype-1];
                 gmean1=sigma_g1[itype-1][jtype-1][ktype-1];
                 gmean2=sigma_g2[itype-1][jtype-1][ktype-1];
                 amean=cosAng[ang_ijk];
                 gfactor1=gmean0+gmean1*amean
                     +gmean2*amean*amean;
                 gprime1=gmean1+2.0*gmean2*amean;
                 gfactorsq=gfactor1*gfactor1;
                 gsqprime=2.0*gfactor1*gprime1;
                 rfactor1rt=betaS[temp_jk]*betaS[temp_jk];
                 rfactor1=rfactor1rt*rfactor1rt;
 
 //BB is Eq. 34 (a) or Eq. 10 (c) for the j atom
 //1st DD is Eq. 11 (c) for j atom where i & k=neighbor of j
                 BB=BB+gfactorsq*rfactor1rt;
                 DD=DD+gfactorsq*rfactor1;
 
 //agpdpr1 is derivative of BB  w.r.t. Beta(r_jk)
 //app1 is derivative of BB w.r.t. cos(theta_ijk)
 
                 agpdpr1=2.0*gfactorsq*betaS[temp_jk]*dBetaS[temp_jk]/rij[temp_jk];
                 app1=rfactor1rt*gsqprime;
                 bt_sg[nb_ij].dBB[0]-=
                     app1*dcAng[ang_ijk][0][ngi];
                 bt_sg[nb_ij].dBB[1]-=
                     app1*dcAng[ang_ijk][1][ngi];
                 bt_sg[nb_ij].dBB[2]-=
                     app1*dcAng[ang_ijk][2][ngi];
                 bt_sg[nb_jk].dBB[0]+=
                     app1*dcAng[ang_ijk][0][ngk]
                     +agpdpr1*disij[0][temp_jk];
                 bt_sg[nb_jk].dBB[1]+=
                     app1*dcAng[ang_ijk][1][ngk]
                     +agpdpr1*disij[1][temp_jk];
                 bt_sg[nb_jk].dBB[2]+=
                     app1*dcAng[ang_ijk][2][ngk]
                     +agpdpr1*disij[2][temp_jk];
 
 //j is a neighbor of i, k and k' prime different neighbors of j not equal to i
 
                 for(ltmp=0;ltmp<ktmp;ltmp++) {
                   if(ltmp!=ji) {
                     temp_jkp=BOP_index[j]+ltmp;
                     if(neigh_flag[temp_jkp]) {
                       kp=jlist[ltmp];
                       kptype=map[type[kp]]+1;
                       for(nsearch=0;nsearch<nSigBk[n];nsearch++) {
                         ncmp=itypeSigBk[n][nsearch];
                         if(x[ncmp][0]==x[kp][0]) {
                           if(x[ncmp][1]==x[kp][1]) {
                             if(x[ncmp][2]==x[kp][2]) {
                               new2=nsearch;
                               break;
                             }
                           }
                         }
                       }
                       if(ji<ltmp) {
                         nijkp=ji*(2*numneigh[j]-ji-1)/2+(ltmp-ji)-1;
                       } else {
                         nijkp=ltmp*(2*numneigh[j]-ltmp-1)/2+(ji-ltmp)-1;
                       }
                       if(ktmp<ltmp) {
                         nkjkp=ktmp*(2*numneigh[j]-ktmp-1)/2+(ltmp-ktmp)-1;
                         ngjk=0;
                       }
                       else {
                         nkjkp=ltmp*(2*numneigh[j]-ltmp-1)/2+(ktmp-ltmp)-1;
                         ngjk=1;
                       }
                       ang_ijkp=cos_index[j]+nijkp;
                       if(ang_ijkp>=cos_total) {
                         error->one(FLERR,"Too many atom triplets for pair bop");
                       }
                       ang_kjkp=cos_index[j]+nkjkp;
                       if(ang_kjkp>=cos_total) {
                         error->one(FLERR,"Too many atom triplets for pair bop");
                       }
                       gmean0=sigma_g0[itype-1][jtype-1][kptype-1];
                       gmean1=sigma_g1[itype-1][jtype-1][kptype-1];
                       gmean2=sigma_g2[itype-1][jtype-1][kptype-1];
                       amean=cosAng[ang_ijkp];
                       gfactor2=gmean0+gmean1*amean
                         +gmean2*amean*amean;
                       gprime2=gmean1+2.0*gmean2*amean;
                       gmean0=sigma_g0[ktype-1][jtype-1][kptype-1];
                       gmean1=sigma_g1[ktype-1][jtype-1][kptype-1];
                       gmean2=sigma_g2[ktype-1][jtype-1][kptype-1];
                       amean=cosAng[ang_kjkp];
                       gfactor3=gmean0+gmean1*amean
                         +gmean2*amean*amean;
                       gprime3=gmean1+2.0*gmean2*amean;
                       gfactor=gfactor1*gfactor2*gfactor3;
                       rfactorrt=betaS[temp_jk]*betaS[temp_jkp];
                       rfactor=rfactorrt*rfactorrt;
 
 //2nd DD is Eq. 11 (c) for j atom where i , k & k'=neighbor of j
 
                       DD=DD+2.0*gfactor*rfactor;
                     }
                   }
                 }
 
 //j is a neighbor of i, k is a neighbor of j not equal to i and k'
 //is a neighbor of k not equal to j or i
 
                 for(ltmp=0;ltmp<numneigh[k];ltmp++) {
                   temp_kkp=BOP_index[k]+ltmp;
                   if(neigh_flag[temp_kkp]) {
                     kp=klist[ltmp];
                     kptype=map[type[kp]]+1;
                     same_ikp=0;
                     same_jkp=0;
                     if(x[i][0]==x[kp][0]) {
                       if(x[i][1]==x[kp][1]) {
                         if(x[i][2]==x[kp][2]) {
                           same_ikp=1;
                         }
                       }
                     }
                     if(x[j][0]==x[kp][0]) {
                       if(x[j][1]==x[kp][1]) {
                         if(x[j][2]==x[kp][2]) {
                           same_jkp=1;
                         }
                       }
                     }
                     if(!same_ikp&&!same_jkp) {
                       for(kNeij=0;kNeij<numneigh[k];kNeij++) {
                         if(x[klist[kNeij]][0]==x[j][0]) {
                           if(x[klist[kNeij]][1]==x[j][1]) {
                             if(x[klist[kNeij]][2]==x[j][2]) {
                               break;
                             }
                           }
                         }
                       }
                       if(kNeij<ltmp) {
                         njkkp=kNeij*(2*numneigh[k]-kNeij-1)/2+(ltmp-kNeij)-1;
                       } else {
                         njkkp=ltmp*(2*numneigh[k]-ltmp-1)/2+(kNeij-ltmp)-1;
                       }
                       sig_flag=0;
                       for(nsearch=0;nsearch<nSigBk[n];nsearch++) {
                         ncmp=itypeSigBk[n][nsearch];
                         if(x[ncmp][0]==x[kp][0]) {
                           if(x[ncmp][1]==x[kp][1]) {
                             if(x[ncmp][2]==x[kp][2]) {
                               new2=nsearch;
                               sig_flag=1;
                               break;
                             }
                           }
                         }
                       }
                       if(sig_flag==0) {
                         nSigBk[n]=nSigBk[n]+1;
                         new2=nSigBk[n]-1;
                         itypeSigBk[n][new2]=kp;
                       }
                       ang_jkkp=cos_index[k]+njkkp;
                       if(ang_jkkp>=cos_total) {
                         error->one(FLERR,"Too many atom triplets for pair bop");
                       }
                       gmean0=sigma_g0[jtype-1][ktype-1][kptype-1];
                       gmean1=sigma_g1[jtype-1][ktype-1][kptype-1];
                       gmean2=sigma_g2[jtype-1][ktype-1][kptype-1];
                       amean=cosAng[ang_jkkp];
                       gfactor2=gmean0+gmean1*amean
                           +gmean2*amean*amean;
                       gprime2=gmean1+2.0*gmean2*amean;
                       gfactorsq2=gfactor2*gfactor2;
                       gfactor=gfactorsq*gfactorsq2;
                       rfactorrt=betaS[temp_jk]*betaS[temp_kkp];
                       rfactor=rfactorrt*rfactorrt;
 
 //3rd DD is Eq. 11 (c) for j atom where i & k=neighbor of j & k'=neighbor of k
 
                       DD=DD+gfactor*rfactor;
                     }
                   }
                 }
               }
             }
           }
 
           sig_flag=0;
           if(sig_flag==0) {
 
 // AA and BB are the representations of (a) Eq. 34 and (b) Eq. 9
 // for atoms i and j respectively
 
             AAC=AA+BB;
             BBC=AA*BB;
             CCC=AA*AA+BB*BB;
             DDC=CC+DD;
 
 //EEC is a modified form of (a) Eq. 33
 
             EEC=(DDC-CCC)/(AAC+2.0*small1);
             for(m=0;m<nb_t;m++) {
               if((bt_sg[m].i>-1)&&(bt_sg[m].j>-1)) {
                 bt_i=bt_sg[m].i;
                 bt_j=bt_sg[m].j;
                 bt_sg[m].dAAC[0]=bt_sg[m].dAA[0]
                     +bt_sg[m].dBB[0];
                 bt_sg[m].dAAC[1]=bt_sg[m].dAA[1]
                     +bt_sg[m].dBB[1];
                 bt_sg[m].dAAC[2]=bt_sg[m].dAA[2]
                     +bt_sg[m].dBB[2];
               }
             }
             UT=EEC*FF+BBC+small3[iij];
             UT=1.0/sqrt(UT);
 
 // FFC is slightly modified form of (a) Eq. 31
 // GGC is slightly modified form of (a) Eq. 32
 // bndtmp is a slightly modified form of (a) Eq. 30 and (b) Eq. 8
 
             bndtmp=(FF+sigma_delta[iij]*sigma_delta[iij])
                 +sigma_c[iij]*AAC+small4;
             psign=1.0;
             bndtmp0=1.0/sqrt(bndtmp);
             sigB1[n]=psign*betaS[temp_ij]*bndtmp0;
             bndtmp=-0.5*bndtmp0*bndtmp0*bndtmp0;
             bndtmp1=psign*bndtmp0+psign*betaS[temp_ij]
                 *bndtmp*2.0*betaS[temp_ij];
             bndtmp1=bndtmp1*dBetaS[temp_ij]/rij[temp_ij];
             bndtmp2=psign*betaS[temp_ij]*bndtmp*sigma_c[iij];
             setting=0;
             for(m=0;m<nb_t;m++) {
               if((bt_sg[m].i>-1)&&(bt_sg[m].j>-1)) {
                 temp_kk=bt_sg[m].temp;
                 if(temp_kk==temp_ij&&setting==0) {
                   bt_sg[m].dSigB1[0]=bndtmp1*disij[0][temp_ij]
                       +(bndtmp2*bt_sg[m].dAAC[0]);
                   bt_sg[m].dSigB1[1]=bndtmp1*disij[1][temp_ij]
                       +(bndtmp2*bt_sg[m].dAAC[1]);
                   bt_sg[m].dSigB1[2]=bndtmp1*disij[2][temp_ij]
                       +(bndtmp2*bt_sg[m].dAAC[2]);
                   setting=1;
                 }
                 else if(temp_kk==temp_ji&&setting==0) {
                   bt_sg[m].dSigB1[0]=-bndtmp1*disij[0][temp_ij]
                       +(bndtmp2*bt_sg[m].dAAC[0]);
                   bt_sg[m].dSigB1[1]=-bndtmp1*disij[1][temp_ij]
                       +(bndtmp2*bt_sg[m].dAAC[1]);
                   bt_sg[m].dSigB1[2]=-bndtmp1*disij[2][temp_ij]
                       +(bndtmp2*bt_sg[m].dAAC[2]);
                   setting=1;
                 }
                 else {
                   bt_sg[m].dSigB1[0]=(bndtmp2*bt_sg[m].dAAC[0]);
                   bt_sg[m].dSigB1[1]=(bndtmp2*bt_sg[m].dAAC[1]);
                   bt_sg[m].dSigB1[2]=(bndtmp2*bt_sg[m].dAAC[2]);
                 }
               }
             }
 
 //This loop is to ensure there is not an error for atoms with no neighbors (deposition)
 
             if(nb_t==0) {
               if(j>i) {
                 bt_sg[0].dSigB1[0]=bndtmp1*disij[0][temp_ij];
                 bt_sg[0].dSigB1[1]=bndtmp1*disij[1][temp_ij];
                 bt_sg[0].dSigB1[2]=bndtmp1*disij[2][temp_ij];
               }
               else {
                 bt_sg[0].dSigB1[0]=-bndtmp1*disij[0][temp_ij];
                 bt_sg[0].dSigB1[1]=-bndtmp1*disij[1][temp_ij];
                 bt_sg[0].dSigB1[2]=-bndtmp1*disij[2][temp_ij];
               }
               for(pp=0;pp<3;pp++) {
                 bt_sg[0].dAA[pp]=0.0;
                 bt_sg[0].dBB[pp]=0.0;
                 bt_sg[0].dEE1[pp]=0.0;
                 bt_sg[0].dFF[pp]=0.0;
                 bt_sg[0].dAAC[pp]=0.0;
                 bt_sg[0].dSigB[pp]=0.0;
               }
               bt_sg[0].i=i;
               bt_sg[0].j=j;
               bt_sg[0].temp=temp_ij;
               nb_t++;
               if(nb_t>nb_sg) {
                 new_n_tot=nb_sg+maxneigh;
                 grow_sigma(nb_sg,new_n_tot);
                 nb_sg=new_n_tot;
               }
             }
             ps=sigB1[n]*rdBO+1.0;
             ks=(int)ps;
             if(nBOt-1<ks)
               ks=nBOt-1;
             ps=ps-ks;
             if(ps>1.0)
               ps=1.0;
             dsigB1=((FsigBO3[iij][ks-1]*ps+FsigBO2[iij][ks-1])*ps
                 +FsigBO1[iij][ks-1])*ps+FsigBO[iij][ks-1];
             dsigB2=(FsigBO6[iij][ks-1]*ps+FsigBO5[iij][ks-1])*ps+FsigBO4[iij][ks-1];
             part0=(FF+0.5*AAC+small5);
             part1=(sigma_f[iij]-0.5)*sigma_k[iij];
             part2=1.0-part1*EE1/part0;
             part3=dsigB1*part1/part0;
             part4=part3/part0*EE1;
 
 // sigB is the final expression for (a) Eq. 6 and (b) Eq. 11
 
             sigB[n]=dsigB1*part2;
             pp1=2.0*betaS[temp_ij];
             for(m=0;m<nb_t;m++) {
               if((bt_sg[m].i>-1)&&(bt_sg[m].j>-1)) {
                 temp_kk=bt_sg[m].temp;
                 bt_ij=bt_sg[m].temp;
                 bt_i=bt_sg[m].i;
                 bt_j=bt_sg[m].j;
                 for(pp=0;pp<3;pp++) {
                   bt_sg[m].dSigB[pp]=dsigB2*part2*bt_sg[m].dSigB1[pp]
                       -part3*bt_sg[m].dEE1[pp]
                       +part4*(bt_sg[m].dFF[pp]
                       +0.5*bt_sg[m].dAAC[pp]);
                 }
                 for(pp=0;pp<3;pp++) {
                   ftmp[pp]=pp1*bt_sg[m].dSigB[pp];
                   f[bt_i][pp]-=ftmp[pp];
                   f[bt_j][pp]+=ftmp[pp];
                 }
                 if(evflag) {
                   ev_tally_xyz(bt_i,bt_j,nlocal,newton_pair,0.0,0.0,ftmp[0],ftmp[1]
                       ,ftmp[2],disij[0][bt_ij],disij[1][bt_ij],disij[2][bt_ij]);
                 }
               }
             }
           }
           n++;
         }
       }
     }
   }
   destroy_sigma();
 }
 
 /* ---------------------------------------------------------------------- */
 
 /*  The formulation differs slightly to avoid negative square roots
     in the calculation of Theta_pi,ij of (a) Eq. 36 and (b) Eq. 18 */
 
 void PairBOP::sigmaBo_otf()
 {
   int nb_t,new_n_tot;
   int n,i,j,k,kp,m,pp,kpj,kpk,kkp;
   int itmp,jtmp,ktmp,ltmp,mtmp;
   tagint i_tag,j_tag;
   int kp1,kp2,kp1type;
   int iij,iik,ijk,ikkp,ji,iikp,ijkp;
   int nkp;
   int nk0;
   int jNeik,kNeii,kNeij,kNeikp;
   int kpNeij,kpNeik;
   int new1,new2,nlocal;
   int inum,*ilist,*iilist,*jlist,*klist,*kplist;
   int **firstneigh,*numneigh;
   int temp_ij,temp_ik,temp_jkp,temp_kk,temp_jk;
   int temp_ji,temp_kkp;
   int temp_ikp;
   int nb_ij,nb_ik,nb_ikp;
   int nb_jk,nb_jkp,nb_kkp;
   int nsearch;
   int sig_flag,setting,ncmp,ks;
   int itype,jtype,ktype,kptype;
   int bt_i,bt_j;
   int same_ikp,same_jkp,same_kpk;
   int same_jkpj,same_kkpk;
   double AA,BB,CC,DD,EE,EE1,FF;
   double AAC,BBC,CCC,DDC,EEC,FFC,GGC;
   double AACFF,UT,bndtmp,UTcom;
   double amean,gmean0,gmean1,gmean2,ps;
   double gfactor1,gprime1,gsqprime;
   double gfactorsq,gfactor2,gprime2;
   double gfactorsq2,gsqprime2;
   double gfactor3,gprime3,gfactor,rfactor;
   double drfactor,gfactor4,gprime4,agpdpr3;
   double rfactor0,rfactorrt,rfactor1rt,rfactor1;
   double rcm1,rcm2,gcm1,gcm2,gcm3;
   double agpdpr1,agpdpr2,app1,app2,app3,app4;
   double dsigB1,dsigB2;
   double part0,part1,part2,part3,part4;
   double psign,bndtmp0,pp1;
   double bndtmp1,bndtmp2,bndtmp3,bndtmp4,bndtmp5;
   double dis_ij[3],rsq_ij,r_ij;
   double betaS_ij,dBetaS_ij;
   double dis_ik[3],rsq_ik,r_ik;
   double betaS_ik,dBetaS_ik;
   double dis_ikp[3],rsq_ikp,r_ikp;
   double betaS_ikp,dBetaS_ikp;
   double dis_jk[3],rsq_jk,r_jk;
   double betaS_jk,dBetaS_jk;
   double dis_jkp[3],rsq_jkp,r_jkp;
   double betaS_jkp,dBetaS_jkp;
   double dis_kkp[3],rsq_kkp,r_kkp;
   double betaS_kkp,dBetaS_kkp;
   double cosAng_jik,dcA_jik[3][2];
   double cosAng_jikp,dcA_jikp[3][2];
   double cosAng_kikp,dcA_kikp[3][2];
   double cosAng_ijk,dcA_ijk[3][2];
   double cosAng_ijkp,dcA_ijkp[3][2];
   double cosAng_kjkp,dcA_kjkp[3][2];
   double cosAng_ikj,dcA_ikj[3][2];
   double cosAng_ikkp,dcA_ikkp[3][2];
   double cosAng_jkkp,dcA_jkkp[3][2];
   double cosAng_jkpk,dcA_jkpk[3][2];
 
   double ftmp[3],xtmp[3];
   double **x = atom->x;
   double **f = atom->f;
   tagint *tag = atom->tag;
   int newton_pair = force->newton_pair;
   int *type = atom->type;
 
   nlocal = atom->nlocal;
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   n=0;
   if(nb_sg==0) {
     nb_sg=(maxneigh)*(maxneigh/2);
   }
   if(allocate_sigma) {
     destroy_sigma();
   }
 
   create_sigma(nb_sg);
 
   for(itmp=0;itmp<inum;itmp++) {
     i = ilist[itmp];
     i_tag=tag[i];
     itype = map[type[i]]+1;
 
 //j is loop over all neighbors of i
 
     for(jtmp=0;jtmp<numneigh[i];jtmp++) {
       for(m=0;m<nb_sg;m++) {
         for(pp=0;pp<3;pp++) {
           bt_sg[m].dAA[pp]=0.0;
           bt_sg[m].dBB[pp]=0.0;
           bt_sg[m].dCC[pp]=0.0;
           bt_sg[m].dDD[pp]=0.0;
           bt_sg[m].dEE[pp]=0.0;
           bt_sg[m].dEE1[pp]=0.0;
           bt_sg[m].dFF[pp]=0.0;
           bt_sg[m].dAAC[pp]=0.0;
           bt_sg[m].dBBC[pp]=0.0;
           bt_sg[m].dCCC[pp]=0.0;
           bt_sg[m].dDDC[pp]=0.0;
           bt_sg[m].dEEC[pp]=0.0;
           bt_sg[m].dFFC[pp]=0.0;
           bt_sg[m].dGGC[pp]=0.0;
           bt_sg[m].dUT[pp]=0.0;
           bt_sg[m].dSigB1[pp]=0.0;
           bt_sg[m].dSigB[pp]=0.0;
         }
         bt_sg[m].i=-1;
         bt_sg[m].j=-1;
         bt_sg[m].temp=-1;
       }
       nb_t=0;
       iilist=firstneigh[i];
       temp_ij=BOP_index[i]+jtmp;
       j=iilist[jtmp];
       jlist=firstneigh[j];
       j_tag=tag[j];
       jtype = map[type[j]]+1;
       nb_ij=nb_t;
       nb_t++;
       if(nb_t>nb_sg) {
         new_n_tot=nb_sg+maxneigh;
         grow_sigma(nb_sg,new_n_tot);
         nb_sg=new_n_tot;
       }
       bt_sg[nb_ij].temp=temp_ij;
       bt_sg[nb_ij].i=i;
       bt_sg[nb_ij].j=j;
       if(j_tag>=i_tag) {
         if(itype==jtype)
           iij=itype-1;
         else if(itype<jtype)
           iij=itype*bop_types-itype*(itype+1)/2+jtype-1;
         else
           iij=jtype*bop_types-jtype*(jtype+1)/2+itype-1;
         for(ji=0;ji<numneigh[j];ji++) {
           temp_ji=BOP_index[j]+ji;
           if(x[jlist[ji]][0]==x[i][0]) {
             if(x[jlist[ji]][1]==x[i][1]) {
               if(x[jlist[ji]][2]==x[i][2]) {
                 break;
               }
             }
           }
         }
         dis_ij[0]=x[j][0]-x[i][0];
         dis_ij[1]=x[j][1]-x[i][1];
         dis_ij[2]=x[j][2]-x[i][2];
         rsq_ij=dis_ij[0]*dis_ij[0]
             +dis_ij[1]*dis_ij[1]
             +dis_ij[2]*dis_ij[2];
         r_ij=sqrt(rsq_ij);
 
         if(r_ij<rcut[iij]) {
           ps=r_ij*rdr[iij]+1.0;
           ks=(int)ps;
           if(nr-1<ks)
             ks=nr-1;
           ps=ps-ks;
           if(ps>1.0)
             ps=1.0;
           betaS_ij=((pBetaS3[iij][ks-1]*ps+pBetaS2[iij][ks-1])*ps
               +pBetaS1[iij][ks-1])*ps+pBetaS[iij][ks-1];
           dBetaS_ij=(pBetaS6[iij][ks-1]*ps+pBetaS5[iij][ks-1])*ps
               +pBetaS4[iij][ks-1];
           nSigBk[n]=0;
 
 //AA-EE1 are the components making up Eq. 30 (a)
 
           AA=0.0;
           BB=0.0;
           CC=0.0;
           DD=0.0;
           EE=0.0;
           EE1=0.0;
 
 //FF is the Beta_sigma^2 term
 
           FF=betaS_ij*betaS_ij;
 
 //agpdpr1 is derivative of FF w.r.t. r_ij
 
           agpdpr1=2.0*betaS_ij*dBetaS_ij/r_ij;
 
 //dXX derivatives are taken with respect to all pairs contributing to the energy
 //nb_ij is derivative w.r.t. ij pair
 
           bt_sg[nb_ij].dFF[0]=agpdpr1*dis_ij[0];
           bt_sg[nb_ij].dFF[1]=agpdpr1*dis_ij[1];
           bt_sg[nb_ij].dFF[2]=agpdpr1*dis_ij[2];
 
 //k is loop over all neighbors of i again with j neighbor of i
 
           for(ktmp=0;ktmp<numneigh[i];ktmp++) {
             temp_ik=BOP_index[i]+ktmp;
             if(ktmp!=jtmp) {
               k=iilist[ktmp];
               klist=firstneigh[k];
               ktype = map[type[k]]+1;
               if(itype==ktype)
                 iik=itype-1;
               else if(itype<ktype)
                 iik=itype*bop_types-itype*(itype+1)/2+ktype-1;
               else
                 iik=ktype*bop_types-ktype*(ktype+1)/2+itype-1;
 
 //find neighbor of k that is equal to i
 
               for(kNeii=0;kNeii<numneigh[k];kNeii++) {
                 if(x[klist[kNeii]][0]==x[i][0]) {
                   if(x[klist[kNeii]][1]==x[i][1]) {
                     if(x[klist[kNeii]][2]==x[i][2]) {
                       break;
                     }
                   }
                 }
               }
               dis_ik[0]=x[k][0]-x[i][0];
               dis_ik[1]=x[k][1]-x[i][1];
               dis_ik[2]=x[k][2]-x[i][2];
               rsq_ik=dis_ik[0]*dis_ik[0]
                   +dis_ik[1]*dis_ik[1]
                   +dis_ik[2]*dis_ik[2];
               r_ik=sqrt(rsq_ik);
               if(r_ik<=rcut[iik]) {
                 ps=r_ik*rdr[iik]+1.0;
                 ks=(int)ps;
                 if(nr-1<ks)
                   ks=nr-1;
                 ps=ps-ks;
                 if(ps>1.0)
                   ps=1.0;
                 betaS_ik=((pBetaS3[iik][ks-1]*ps+pBetaS2[iik][ks-1])*ps
                     +pBetaS1[iik][ks-1])*ps+pBetaS[iik][ks-1];
                 dBetaS_ik=(pBetaS6[iik][ks-1]*ps+pBetaS5[iik][ks-1])*ps
                     +pBetaS4[iik][ks-1];
 
 //find neighbor of i that is equal to k
 
                 for(jNeik=0;jNeik<numneigh[j];jNeik++) {
                   temp_jk=BOP_index[j]+jNeik;
                   if(x[jlist[jNeik]][0]==x[k][0]) {
                     if(x[jlist[jNeik]][1]==x[k][1]) {
                       if(x[jlist[jNeik]][2]==x[k][2]) {
                         break;
                       }
                     }
                   }
                 }
 
 //find neighbor of k that is equal to j
 
                 for(kNeij=0;kNeij<numneigh[k];kNeij++) {
                   if(x[klist[kNeij]][0]==x[j][0]) {
                     if(x[klist[kNeij]][1]==x[j][1]) {
                       if(x[klist[kNeij]][2]==x[j][2]) {
                         break;
                       }
                     }
                   }
                 }
                 dis_jk[0]=x[k][0]-x[j][0];
                 dis_jk[1]=x[k][1]-x[j][1];
                 dis_jk[2]=x[k][2]-x[j][2];
                 rsq_jk=dis_jk[0]*dis_jk[0]
                     +dis_jk[1]*dis_jk[1]
                     +dis_jk[2]*dis_jk[2];
                 r_jk=sqrt(rsq_jk);
 
                 sig_flag=0;
                 for(nsearch=0;nsearch<nSigBk[n];nsearch++) {
                   ncmp=itypeSigBk[n][nsearch];
                   if(x[ncmp][0]==x[k][0]) {
                     if(x[ncmp][1]==x[k][1]) {
                       if(x[ncmp][2]==x[k][2]) {
                         nk0=nsearch;
                         sig_flag=1;
                         break;
                       }
                     }
                   }
                 }
                 if(sig_flag==0) {
                   nSigBk[n]=nSigBk[n]+1;
                   nk0=nSigBk[n]-1;
                   itypeSigBk[n][nk0]=k;
                 }
                 nb_ik=nb_t;
                 nb_t++;
                 if(nb_t>nb_sg) {
                   new_n_tot=nb_sg+maxneigh;
                   grow_sigma(nb_sg,new_n_tot);
                   nb_sg=new_n_tot;
                 }
                 bt_sg[nb_ik].temp=temp_ik;
                 bt_sg[nb_ik].i=i;
                 bt_sg[nb_ik].j=k;
                 nb_jk=nb_t;
                 nb_t++;
                 if(nb_t>nb_sg) {
                   new_n_tot=nb_sg+maxneigh;
                   grow_sigma(nb_sg,new_n_tot);
                   nb_sg=new_n_tot;
                 }
                 bt_sg[nb_jk].temp=temp_jk;
                 bt_sg[nb_jk].i=j;
                 bt_sg[nb_jk].j=k;
                 cosAng_jik=(dis_ij[0]*dis_ik[0]+dis_ij[1]*dis_ik[1]
                     +dis_ij[2]*dis_ik[2])/(r_ij*r_ik);
                 dcA_jik[0][0]=(dis_ik[0]*r_ij*r_ik-cosAng_jik
                     *dis_ij[0]*r_ik*r_ik)/(r_ij*r_ij*r_ik*r_ik);
                 dcA_jik[1][0]=(dis_ik[1]*r_ij*r_ik-cosAng_jik
                     *dis_ij[1]*r_ik*r_ik)/(r_ij*r_ij*r_ik*r_ik);
                 dcA_jik[2][0]=(dis_ik[2]*r_ij*r_ik-cosAng_jik
                     *dis_ij[2]*r_ik*r_ik)/(r_ij*r_ij*r_ik*r_ik);
                 dcA_jik[0][1]=(dis_ij[0]*r_ij*r_ik-cosAng_jik
                     *dis_ik[0]*r_ij*r_ij)/(r_ij*r_ij*r_ik*r_ik);
                 dcA_jik[1][1]=(dis_ij[1]*r_ij*r_ik-cosAng_jik
                     *dis_ik[1]*r_ij*r_ij)/(r_ij*r_ij*r_ik*r_ik);
                 dcA_jik[2][1]=(dis_ij[2]*r_ij*r_ik-cosAng_jik
                     *dis_ik[2]*r_ij*r_ij)/(r_ij*r_ij*r_ik*r_ik);
                 gmean0=sigma_g0[jtype-1][itype-1][ktype-1];
                 gmean1=sigma_g1[jtype-1][itype-1][ktype-1];
                 gmean2=sigma_g2[jtype-1][itype-1][ktype-1];
                 amean=cosAng_jik;
                 gfactor1=gmean0+gmean1*amean
                     +gmean2*amean*amean;
                 gfactorsq=gfactor1*gfactor1;
                 gprime1=gmean1+2.0*gmean2*amean;
                 gsqprime=2.0*gfactor1*gprime1;
 
 //AA is Eq. 34 (a) or Eq. 10 (c) for the i atom
 //1st CC is Eq. 11 (c) for i atom where j & k=neighbor of i
 
                 AA=AA+gfactorsq*betaS_ik*betaS_ik;
                 CC=CC+gfactorsq*betaS_ik*betaS_ik*betaS_ik*betaS_ik;
 
 //agpdpr1 is derivative of AA w.r.t. Beta(rik)
 //app1 is derivative of AA w.r.t. cos(theta_jik)
 
                 agpdpr1=2.0*gfactorsq*betaS_ik*dBetaS_ik/r_ik;
                 app1=betaS_ik*betaS_ik*gsqprime;
                 bt_sg[nb_ij].dAA[0]+=
                     app1*dcA_jik[0][0];
                 bt_sg[nb_ij].dAA[1]+=
                     app1*dcA_jik[1][0];
                 bt_sg[nb_ij].dAA[2]+=
                     app1*dcA_jik[2][0];
                 bt_sg[nb_ij].dCC[0]+=
                     app2*dcA_jik[0][0];
                 bt_sg[nb_ij].dCC[1]+=
                     app2*dcA_jik[1][0];
                 bt_sg[nb_ij].dCC[2]+=
                     app2*dcA_jik[2][0];
                 bt_sg[nb_ik].dAA[0]+=
                     app1*dcA_jik[0][1]
                     +agpdpr1*dis_ik[0];
                 bt_sg[nb_ik].dAA[1]+=
                     app1*dcA_jik[1][1]
                     +agpdpr1*dis_ik[1];
                 bt_sg[nb_ik].dAA[2]+=
                     app1*dcA_jik[2][1]
                     +agpdpr1*dis_ik[2];
                 bt_sg[nb_ik].dCC[0]+=
                     app2*dcA_jik[0][1]
                     +agpdpr2*dis_ik[0];
                 bt_sg[nb_ik].dCC[1]+=
                     app2*dcA_jik[1][1]
                     +agpdpr2*dis_ik[1];
                 bt_sg[nb_ik].dCC[2]+=
                     app2*dcA_jik[2][1]
                     +agpdpr2*dis_ik[2];
 
 //k' is loop over neighbors all neighbors of j with k a neighbor
 //of i and j a neighbor of i and determine which k' is k
 
                 same_kpk=0;
                 for(ltmp=0;ltmp<numneigh[j];ltmp++) {
                   temp_jkp=BOP_index[j]+ltmp;
                   kp1=jlist[ltmp];
                   kp1type=map[type[kp1]]+1;
                   if(x[kp1][0]==x[k][0]) {
                     if(x[kp1][1]==x[k][1]) {
                       if(x[kp1][2]==x[k][2]) {
                         same_kpk=1;
                         break;
                       }
                     }
                   }
                 }
                 if(same_kpk){
 
 //loop over neighbors of k
 
                   for(mtmp=0;mtmp<numneigh[k];mtmp++) {
                     kp2=klist[mtmp];
                     if(x[kp2][0]==x[k][0]) {
                       if(x[kp2][1]==x[k][1]) {
                         if(x[kp2][2]==x[k][2]) {
                           break;
                         }
                       }
                     }
                   }
                   if(jtype==ktype)
                     ijk=jtype-1;
                   else if(jtype < ktype)
                     ijk=jtype*bop_types-jtype*(jtype+1)/2+ktype-1;
                   else
                     ijk=ktype*bop_types-ktype*(ktype+1)/2+jtype-1;
                   if(jtype==kp1type)
                     ijkp=jtype-1;
                   else if(jtype<kp1type)
                     ijkp=jtype*bop_types-jtype*(jtype+1)/2+kp1type-1;
                   else
                     ijkp=kp1type*bop_types-kp1type*(kp1type+1)/2+jtype-1;
 
                   dis_jkp[0]=x[kp1][0]-x[j][0];
                   dis_jkp[1]=x[kp1][1]-x[j][1];
                   dis_jkp[2]=x[kp1][2]-x[j][2];
                   rsq_jkp=dis_jkp[0]*dis_jkp[0]
                       +dis_jkp[1]*dis_jkp[1]
                       +dis_jkp[2]*dis_jkp[2];
                   r_jkp=sqrt(rsq_jkp);
                   if(r_jkp<=rcut[ijkp]) {
                     ps=r_jkp*rdr[ijkp]+1.0;
                     ks=(int)ps;
                     if(nr-1<ks)
                       ks=nr-1;
                     ps=ps-ks;
                     if(ps>1.0)
                       ps=1.0;
                     betaS_jkp=((pBetaS3[ijkp][ks-1]*ps+pBetaS2[ijkp][ks-1])*ps
                         +pBetaS1[ijkp][ks-1])*ps+pBetaS[ijkp][ks-1];
                     dBetaS_jkp=(pBetaS6[ijkp][ks-1]*ps+pBetaS5[ijkp][ks-1])*ps
                         +pBetaS4[ijkp][ks-1];
                     cosAng_ijk=(-dis_ij[0]*dis_jk[0]-dis_ij[1]*dis_jk[1]
                         -dis_ij[2]*dis_jk[2])/(r_ij*r_jk);
                     dcA_ijk[0][0]=(dis_jk[0]*r_ij*r_jk-cosAng_ijk
                         *-dis_ij[0]*r_jk*r_jk)/(r_ij*r_ij*r_jk*r_jk);
                     dcA_ijk[1][0]=(dis_jk[1]*r_ij*r_jk-cosAng_ijk
                         *-dis_ij[1]*r_jk*r_jk)/(r_ij*r_ij*r_jk*r_jk);
                     dcA_ijk[2][0]=(dis_jk[2]*r_ij*r_jk-cosAng_ijk
                         *-dis_ij[2]*r_jk*r_jk)/(r_ij*r_ij*r_jk*r_jk);
                     dcA_ijk[0][1]=(-dis_ij[0]*r_ij*r_jk-cosAng_ijk
                         *dis_jk[0]*r_ij*r_ij)/(r_ij*r_ij*r_jk*r_jk);
                     dcA_ijk[1][1]=(-dis_ij[1]*r_ij*r_jk-cosAng_ijk
                         *dis_jk[1]*r_ij*r_ij)/(r_ij*r_ij*r_jk*r_jk);
                     dcA_ijk[2][1]=(-dis_ij[2]*r_ij*r_jk-cosAng_ijk
                         *dis_jk[2]*r_ij*r_ij)/(r_ij*r_ij*r_jk*r_jk);
                     gmean0=sigma_g0[itype-1][jtype-1][ktype-1];
                     gmean1=sigma_g1[itype-1][jtype-1][ktype-1];
                     gmean2=sigma_g2[itype-1][jtype-1][ktype-1];
                     amean=cosAng_ijk;
                     gfactor2=gmean0+gmean1*amean
                         +gmean2*amean*amean;
                     gprime2=gmean1+2.0*gmean2*amean;
                     gmean0=sigma_g0[itype-1][ktype-1][jtype-1];
                     gmean1=sigma_g1[itype-1][ktype-1][jtype-1];
                     gmean2=sigma_g2[itype-1][ktype-1][jtype-1];
                     cosAng_ikj=(dis_ik[0]*dis_jk[0]+dis_ik[1]*dis_jk[1]
                         +dis_ik[2]*dis_jk[2])/(r_ik*r_jk);
                     dcA_ikj[0][0]=(-dis_jk[0]*r_ik*r_jk-cosAng_ikj
                         *-dis_ik[0]*r_jk*r_jk)/(r_ik*r_ik*r_jk*r_jk);
                     dcA_ikj[1][0]=(-dis_jk[1]*r_ik*r_jk-cosAng_ikj
                         *-dis_ik[1]*r_jk*r_jk)/(r_ik*r_ik*r_jk*r_jk);
                     dcA_ikj[2][0]=(-dis_jk[2]*r_ik*r_jk-cosAng_ikj
                         *-dis_ik[2]*r_jk*r_jk)/(r_ik*r_ik*r_jk*r_jk);
                     dcA_ikj[0][1]=(-dis_ik[0]*r_ik*r_jk-cosAng_ikj
                         *-dis_jk[0]*r_ik*r_ik)/(r_ik*r_ik*r_jk*r_jk);
                     dcA_ikj[1][1]=(-dis_ik[1]*r_ik*r_jk-cosAng_ikj
                         *-dis_jk[1]*r_ik*r_ik)/(r_ik*r_ik*r_jk*r_jk);
                     dcA_ikj[2][1]=(-dis_ik[2]*r_ik*r_jk-cosAng_ikj
                         *-dis_jk[2]*r_ik*r_ik)/(r_ik*r_ik*r_jk*r_jk);
                     amean=cosAng_ikj;
                     gfactor3=gmean0+gmean1*amean
                         +gmean2*amean*amean;
                     gprime3=gmean1+2.0*gmean2*amean;
                     gfactor=gfactor1*gfactor2*gfactor3;
                     rfactor=betaS_ik*betaS_jkp;
 
 //EE1 is (b) Eq. 12
 
                     EE1=EE1+gfactor*rfactor;
 
 //rcm1 is derivative of EE1 w.r.t Beta(r_ik)
 //rcm2 is derivative of EE1 w.r.t Beta(r_jk')
 //gcm1 is derivative of EE1 w.r.t cos(theta_jik)
 //gcm2 is derivative of EE1 w.r.t cos(theta_ijk)
 //gcm3 is derivative of EE1 w.r.t cos(theta_ikj)
 
                     rcm1=gfactor*betaS_jkp*dBetaS_ik/r_ik;
                     rcm2=gfactor*betaS_ik*dBetaS_jkp/r_jkp;
                     gcm1=rfactor*gprime1*gfactor2*gfactor3;
                     gcm2=rfactor*gfactor1*gprime2*gfactor3;
                     gcm3=rfactor*gfactor1*gfactor2*gprime3;
                     bt_sg[nb_ij].dEE1[0]+=
                         gcm1*dcA_jik[0][0]
                         -gcm2*dcA_ijk[0][0];
                     bt_sg[nb_ij].dEE1[1]+=
                         gcm1*dcA_jik[1][0]
                         -gcm2*dcA_ijk[1][0];
                     bt_sg[nb_ij].dEE1[2]+=
                         gcm1*dcA_jik[2][0]
                         -gcm2*dcA_ijk[2][0];
                     bt_sg[nb_ik].dEE1[0]+=
                         gcm1*dcA_jik[0][1]
                         +rcm1*dis_ik[0]
                         -gcm3*dcA_ikj[0][0];
                     bt_sg[nb_ik].dEE1[1]+=
                         gcm1*dcA_jik[1][1]
                         +rcm1*dis_ik[1]
                         -gcm3*dcA_ikj[1][0];
                     bt_sg[nb_ik].dEE1[2]+=
                         gcm1*dcA_jik[2][1]
                         +rcm1*dis_ik[2]
                         -gcm3*dcA_ikj[2][0];
                     bt_sg[nb_jk].dEE1[0]+=
                         gcm2*dcA_ijk[0][1]
                         +rcm2*dis_jkp[0]
                         -gcm3*dcA_ikj[0][1];
                     bt_sg[nb_jk].dEE1[1]+=
                         gcm2*dcA_ijk[1][1]
                         +rcm2*dis_jkp[1]
                         -gcm3*dcA_ikj[1][1];
                     bt_sg[nb_jk].dEE1[2]+=
                         gcm2*dcA_ijk[2][1]
                         +rcm2*dis_jkp[2]
                         -gcm3*dcA_ikj[2][1];
                   }
                 }
 
 // k and k' and j are all different neighbors of i
 
                 for(ltmp=0;ltmp<ktmp;ltmp++) {
                   if(ltmp!=jtmp) {
                     temp_ikp=BOP_index[i]+ltmp;
                     kp=iilist[ltmp];;
                     kptype = map[type[kp]]+1;
                     if(itype==kptype)
                       iikp=itype-1;
                     else if(itype<kptype)
                       iikp=itype*bop_types-itype*(itype+1)/2+kptype-1;
                     else
                       iikp=kptype*bop_types-kptype*(kptype+1)/2+itype-1;
                     for(nsearch=0;nsearch<nSigBk[n];nsearch++) {
                       ncmp=itypeSigBk[n][nsearch];
                       if(x[ncmp][0]==x[kp][0]) {
                         if(x[ncmp][1]==x[kp][1]) {
                           if(x[ncmp][2]==x[kp][2]) {
                             break;
                           }
                         }
                       }
                     }
                     dis_ikp[0]=x[kp][0]-x[i][0];
                     dis_ikp[1]=x[kp][1]-x[i][1];
                     dis_ikp[2]=x[kp][2]-x[i][2];
                     rsq_ikp=dis_ikp[0]*dis_ikp[0]
                         +dis_ikp[1]*dis_ikp[1]
                         +dis_ikp[2]*dis_ikp[2];
                     r_ikp=sqrt(rsq_ikp);
                     if(r_ikp<=rcut[iikp]) {
                       ps=r_ikp*rdr[iikp]+1.0;
                       ks=(int)ps;
                       if(nr-1<ks)
                         ks=nr-1;
                       ps=ps-ks;
                       if(ps>1.0)
                         ps=1.0;
                       betaS_ikp=((pBetaS3[iikp][ks-1]*ps+pBetaS2[iikp][ks-1])*ps
                           +pBetaS1[iikp][ks-1])*ps+pBetaS[iikp][ks-1];
                       dBetaS_ikp=(pBetaS6[iikp][ks-1]*ps+pBetaS5[iikp][ks-1])*ps
                           +pBetaS4[iikp][ks-1];
                       nb_ikp=nb_t;
                       nb_t++;
                       if(nb_t>nb_sg) {
                         new_n_tot=nb_sg+maxneigh;
                         grow_sigma(nb_sg,new_n_tot);
                         nb_sg=new_n_tot;
                       }
                       bt_sg[nb_ikp].temp=temp_ikp;
                       bt_sg[nb_ikp].i=i;
                       bt_sg[nb_ikp].j=kp;
                       gmean0=sigma_g0[jtype-1][itype-1][kptype-1];
                       gmean1=sigma_g1[jtype-1][itype-1][kptype-1];
                       gmean2=sigma_g2[jtype-1][itype-1][kptype-1];
                       cosAng_jikp=(dis_ij[0]*dis_ikp[0]+dis_ij[1]*dis_ikp[1]
                           +dis_ij[2]*dis_ikp[2])/(r_ij*r_ikp);
                       dcA_jikp[0][0]=(dis_ikp[0]*r_ij*r_ikp-cosAng_jikp
                           *dis_ij[0]*r_ikp*r_ikp)/(r_ij*r_ij*r_ikp*r_ikp);
                       dcA_jikp[1][0]=(dis_ikp[1]*r_ij*r_ikp-cosAng_jikp
                           *dis_ij[1]*r_ikp*r_ikp)/(r_ij*r_ij*r_ikp*r_ikp);
                       dcA_jikp[2][0]=(dis_ikp[2]*r_ij*r_ikp-cosAng_jikp
                           *dis_ij[2]*r_ikp*r_ikp)/(r_ij*r_ij*r_ikp*r_ikp);
                       dcA_jikp[0][1]=(dis_ij[0]*r_ij*r_ikp-cosAng_jikp
                           *dis_ikp[0]*r_ij*r_ij)/(r_ij*r_ij*r_ikp*r_ikp);
                       dcA_jikp[1][1]=(dis_ij[1]*r_ij*r_ikp-cosAng_jikp
                           *dis_ikp[1]*r_ij*r_ij)/(r_ij*r_ij*r_ikp*r_ikp);
                       dcA_jikp[2][1]=(dis_ij[2]*r_ij*r_ikp-cosAng_jikp
                           *dis_ikp[2]*r_ij*r_ij)/(r_ij*r_ij*r_ikp*r_ikp);
                       cosAng_kikp=(dis_ik[0]*dis_ikp[0]+dis_ik[1]*dis_ikp[1]
                           +dis_ik[2]*dis_ikp[2])/(r_ik*r_ikp);
                       dcA_kikp[0][0]=(dis_ikp[0]*r_ik*r_ikp-cosAng_kikp
                           *dis_ik[0]*r_ikp*r_ikp)/(r_ik*r_ik*r_ikp*r_ikp);
                       dcA_kikp[1][0]=(dis_ikp[1]*r_ik*r_ikp-cosAng_kikp
                           *dis_ik[1]*r_ikp*r_ikp)/(r_ik*r_ik*r_ikp*r_ikp);
                       dcA_kikp[2][0]=(dis_ikp[2]*r_ik*r_ikp-cosAng_kikp
                           *dis_ik[2]*r_ikp*r_ikp)/(r_ik*r_ik*r_ikp*r_ikp);
                       dcA_kikp[0][1]=(dis_ik[0]*r_ik*r_ikp-cosAng_kikp
                           *dis_ikp[0]*r_ik*r_ik)/(r_ik*r_ik*r_ikp*r_ikp);
                       dcA_kikp[1][1]=(dis_ik[1]*r_ik*r_ikp-cosAng_kikp
                           *dis_ikp[1]*r_ik*r_ik)/(r_ik*r_ik*r_ikp*r_ikp);
                       dcA_kikp[2][1]=(dis_ik[2]*r_ik*r_ikp-cosAng_kikp
                           *dis_ikp[2]*r_ik*r_ik)/(r_ik*r_ik*r_ikp*r_ikp);
                       amean=cosAng_jikp;
                       gfactor2=gmean0+gmean1*amean
                           +gmean2*amean*amean;
                       gprime2=gmean1+2.0*gmean2*amean;
                       gmean0=sigma_g0[ktype-1][itype-1][kptype-1];
                       gmean1=sigma_g1[ktype-1][itype-1][kptype-1];
                       gmean2=sigma_g2[ktype-1][itype-1][kptype-1];
                       amean=cosAng_kikp;
                       gfactor3=gmean0+gmean1*amean
                           +gmean2*amean*amean;
                       gprime3=gmean1+2.0*gmean2*amean;
                       gfactor=gfactor1*gfactor2*gfactor3;
                       rfactorrt=betaS_ik*betaS_ikp;
                       rfactor=rfactorrt*rfactorrt;
 
 //2nd CC is second term of Eq. 11 (c) for i atom where j , k & k' =neighbor of i
 
                       CC=CC+2.0*gfactor*rfactor;
 
 //agpdpr1 is derivative of CC 2nd term w.r.t. Beta(r_ik)
 //agpdpr2 is derivative of CC 2nd term w.r.t. Beta(r_ik')
 //app1 is derivative of CC 2nd term w.r.t. cos(theta_jik)
 //app2 is derivative of CC 2nd term w.r.t. cos(theta_jik')
 //app3 is derivative of CC 2nd term w.r.t. cos(theta_kik')
 
                       agpdpr1=4.0*gfactor*rfactorrt*betaS_ikp
                           *dBetaS_ik/r_ik;
                       agpdpr2=4.0*gfactor*rfactorrt*betaS_ik
                           *dBetaS_ikp/r_ikp;
                       app1=2.0*rfactor*gfactor2*gfactor3*gprime1;
                       app2=2.0*rfactor*gfactor1*gfactor3*gprime2;
                       app3=2.0*rfactor*gfactor1*gfactor2*gprime3;
                       bt_sg[nb_ij].dCC[0]+=
                           app1*dcA_jik[0][0]
                           +app2*dcA_jikp[0][0];
                       bt_sg[nb_ij].dCC[1]+=
                           app1*dcA_jik[1][0]
                           +app2*dcA_jikp[1][0];
                       bt_sg[nb_ij].dCC[2]+=
                           app1*dcA_jik[2][0]
                           +app2*dcA_jikp[2][0];
                       bt_sg[nb_ik].dCC[0]+=
                           app1*dcA_jik[0][1]
                           +app3*dcA_kikp[0][0]
                           +agpdpr1*dis_ik[0];
                       bt_sg[nb_ik].dCC[1]+=
                           app1*dcA_jik[1][1]
                           +app3*dcA_kikp[1][0]
                           +agpdpr1*dis_ik[1];
                       bt_sg[nb_ik].dCC[2]+=
                           app1*dcA_jik[2][1]
                           +app3*dcA_kikp[2][0]
                           +agpdpr1*dis_ik[2];
                       bt_sg[nb_ikp].dCC[0]=
                           app2*dcA_jikp[0][1]
                           +app3*dcA_kikp[0][1]
                           +agpdpr2*dis_ikp[0];
                       bt_sg[nb_ikp].dCC[1]=
                           app2*dcA_jikp[1][1]
                           +app3*dcA_kikp[1][1]
                           +agpdpr2*dis_ikp[1];
                       bt_sg[nb_ikp].dCC[2]=
                           app2*dcA_jikp[2][1]
                           +app3*dcA_kikp[2][1]
                           +agpdpr2*dis_ikp[2];
                     }
                   }
                 }
 
 // j and k are different neighbors of i and k' is a neighbor k not equal to i
 
                 for(ltmp=0;ltmp<numneigh[k];ltmp++) {
                   temp_kkp=BOP_index[k]+ltmp;
                   kp=klist[ltmp];;
                   kptype = map[type[kp]]+1;
                   same_ikp=0;
                   same_jkp=0;
                   if(x[i][0]==x[kp][0]) {
                     if(x[i][1]==x[kp][1]) {
                       if(x[i][2]==x[kp][2]) {
                         same_ikp=1;
                       }
                     }
                   }
                   if(x[j][0]==x[kp][0]) {
                     if(x[j][1]==x[kp][1]) {
                       if(x[j][2]==x[kp][2]) {
                         same_jkp=1;
                       }
                     }
                   }
                   if(!same_ikp&&!same_jkp) {
                     if(ktype==kptype)
                       ikkp=ktype-1;
                     else if(ktype<kptype)
                       ikkp=ktype*bop_types-ktype*(ktype+1)/2+kptype-1;
                     else
                       ikkp=kptype*bop_types-kptype*(kptype+1)/2+ktype-1;
                     dis_kkp[0]=x[kp][0]-x[k][0];
                     dis_kkp[1]=x[kp][1]-x[k][1];
                     dis_kkp[2]=x[kp][2]-x[k][2];
                     rsq_kkp=dis_kkp[0]*dis_kkp[0]
                         +dis_kkp[1]*dis_kkp[1]
                         +dis_kkp[2]*dis_kkp[2];
                     r_kkp=sqrt(rsq_kkp);
                     if(r_kkp<=rcut[ikkp]) {
                       ps=r_kkp*rdr[ikkp]+1.0;
                       ks=(int)ps;
                       if(nr-1<ks)
                         ks=nr-1;
                       ps=ps-ks;
                       if(ps>1.0)
                         ps=1.0;
                       betaS_kkp=((pBetaS3[ikkp][ks-1]*ps+pBetaS2[ikkp][ks-1])*ps
                           +pBetaS1[ikkp][ks-1])*ps+pBetaS[ikkp][ks-1];
                       dBetaS_kkp=(pBetaS6[ikkp][ks-1]*ps+pBetaS5[ikkp][ks-1])*ps
                           +pBetaS4[ikkp][ks-1];
                       sig_flag=0;
                       for(nsearch=0;nsearch<nSigBk[n];nsearch++) {
                         ncmp=itypeSigBk[n][nsearch];
                         if(x[ncmp][0]==x[kp][0]) {
                           if(x[ncmp][1]==x[kp][1]) {
                             if(x[ncmp][2]==x[kp][2]) {
                               sig_flag=1;
                               nkp=nsearch;
                               break;
                             }
                           }
                         }
                       }
                       if(sig_flag==0) {
                         nSigBk[n]=nSigBk[n]+1;
                         nkp=nSigBk[n]-1;
                         itypeSigBk[n][nkp]=kp;
                       }
                       cosAng_ikkp=(-dis_ik[0]*dis_kkp[0]-dis_ik[1]*dis_kkp[1]
                           -dis_ik[2]*dis_kkp[2])/(r_ik*r_kkp);
                       dcA_ikkp[0][0]=(dis_kkp[0]*r_ik*r_kkp-cosAng_ikkp
                           *-dis_ik[0]*r_kkp*r_kkp)/(r_ik*r_ik*r_kkp*r_kkp);
                       dcA_ikkp[1][0]=(dis_kkp[1]*r_ik*r_kkp-cosAng_ikkp
                           *-dis_ik[1]*r_kkp*r_kkp)/(r_ik*r_ik*r_kkp*r_kkp);
                       dcA_ikkp[2][0]=(dis_kkp[2]*r_ik*r_kkp-cosAng_ikkp
                           *-dis_ik[2]*r_kkp*r_kkp)/(r_ik*r_ik*r_kkp*r_kkp);
                       dcA_ikkp[0][1]=(-dis_ik[0]*r_ik*r_kkp-cosAng_ikkp
                           *dis_kkp[0]*r_ik*r_ik)/(r_ik*r_ik*r_kkp*r_kkp);
                       dcA_ikkp[1][1]=(-dis_ik[1]*r_ik*r_kkp-cosAng_ikkp
                           *dis_kkp[1]*r_ik*r_ik)/(r_ik*r_ik*r_kkp*r_kkp);
                       dcA_ikkp[2][1]=(-dis_ik[2]*r_ik*r_kkp-cosAng_ikkp
                           *dis_kkp[2]*r_ik*r_ik)/(r_ik*r_ik*r_kkp*r_kkp);
                       nb_kkp=nb_t;
                       nb_t++;
                       if(nb_t>nb_sg) {
                         new_n_tot=nb_sg+maxneigh;
                         grow_sigma(nb_sg,new_n_tot);
                         nb_sg=new_n_tot;
                       }
                       bt_sg[nb_kkp].temp=temp_kkp;
                       bt_sg[nb_kkp].i=k;
                       bt_sg[nb_kkp].j=kp;
                       gmean0=sigma_g0[itype-1][ktype-1][kptype-1];
                       gmean1=sigma_g1[itype-1][ktype-1][kptype-1];
                       gmean2=sigma_g2[itype-1][ktype-1][kptype-1];
                       amean=cosAng_ikkp;
                       gfactor2=gmean0+gmean1*amean
                           +gmean2*amean*amean;
                       gprime2=gmean1+2.0*gmean2*amean;
                       gfactorsq2=gfactor2*gfactor2;
                       gsqprime2=2.0*gfactor2*gprime2;
                       gfactor=gfactorsq*gfactorsq2;
                       rfactorrt=betaS_ik*betaS_kkp;
                       rfactor=rfactorrt*rfactorrt;
 
 //3rd CC is third term of Eq. 11 (c) for i atom
 //where j , k =neighbor of i & k' =neighbor of k
 
                       CC=CC+gfactor*rfactor;
 
 //agpdpr1 is derivative of CC 3rd term w.r.t. Beta(r_ik)
 //agpdpr2 is derivative of CC 3rd term w.r.t. Beta(r_kk')
 //app1 is derivative of CC 3rd term w.r.t. cos(theta_jik)
 //app2 is derivative of CC 3rd term w.r.t. cos(theta_ikk')
 
                       agpdpr1=2.0*gfactor*rfactorrt*betaS_kkp
                           *dBetaS_ik/r_ik;
                       agpdpr2=2.0*gfactor*rfactorrt*betaS_ik
                           *dBetaS_kkp/r_kkp;
                       app1=rfactor*gfactorsq2*gsqprime;
                       app2=rfactor*gfactorsq*gsqprime2;
                       bt_sg[nb_ij].dCC[0]+=
                           app1*dcA_jik[0][0];
                       bt_sg[nb_ij].dCC[1]+=
                           app1*dcA_jik[1][0];
                       bt_sg[nb_ij].dCC[2]+=
                           app1*dcA_jik[2][0];
                       bt_sg[nb_ik].dCC[0]+=
                           app1*dcA_jik[0][1]
                           +agpdpr1*dis_ik[0]
                           -app2*dcA_ikkp[0][0];
                       bt_sg[nb_ik].dCC[1]+=
                           app1*dcA_jik[1][1]
                           +agpdpr1*dis_ik[1]
                           -app2*dcA_ikkp[1][0];
                       bt_sg[nb_ik].dCC[2]+=
                           app1*dcA_jik[2][1]
                           +agpdpr1*dis_ik[2]
                           -app2*dcA_ikkp[2][0];
                       bt_sg[nb_kkp].dCC[0]+=
                           app2*dcA_ikkp[0][1]
                           +agpdpr2*dis_kkp[0];
                       bt_sg[nb_kkp].dCC[1]+=
                           app2*dcA_ikkp[1][1]
                           +agpdpr2*dis_kkp[1];
                       bt_sg[nb_kkp].dCC[2]+=
                           app2*dcA_ikkp[2][1]
                           +agpdpr2*dis_kkp[2];
                     }
                   }
                 }
 
 //j and k are different neighbors of i and k' is a neighbor j not equal to k
 
                 for(ltmp=0;ltmp<numneigh[j];ltmp++) {
                   sig_flag=0;
                   temp_jkp=BOP_index[j]+ltmp;
                   kp=jlist[ltmp];
                   kptype = map[type[kp]]+1;
                   kplist=firstneigh[kp];
 
                   same_kkpk=0;
                   same_jkpj=0;
 
                   for(kpNeij=0;kpNeij<numneigh[kp];kpNeij++) {
                     kpj=kplist[kpNeij];
                     if(x[j][0]==x[kpj][0]) {
                       if(x[j][1]==x[kpj][1]) {
                         if(x[j][2]==x[kpj][2]) {
                           same_jkpj=1;
                           break;
                         }
                       }
                     }
                   }
                   for(kpNeik=0;kpNeik<numneigh[kp];kpNeik++) {
                     kpk=kplist[kpNeik];
                     if(x[k][0]==x[kpk][0]) {
                       if(x[k][1]==x[kpk][1]) {
                         if(x[k][2]==x[kpk][2]) {
                           same_kkpk=1;
                           break;
                         }
                       }
                     }
                   }
                   if(!same_jkpj&&!same_kkpk) {
                     same_kkpk=0;
                     for(kNeikp=0;kNeikp<numneigh[k];kNeikp++) {
                       temp_kkp=BOP_index[k]+kNeikp;
                       kkp=kplist[kNeikp];
                       if(x[kp][0]==x[kkp][0]) {
                         if(x[kp][1]==x[kkp][1]) {
                           if(x[kp][2]==x[kkp][2]) {
                             sig_flag=1;
                             break;
                           }
                         }
                       }
                     }
                     if(sig_flag==1) {
                       for(nsearch=0;nsearch<numneigh[kp];nsearch++) {
                         ncmp=kplist[nsearch];
                         if(x[ncmp][0]==x[j][0]) {
                           if(x[ncmp][1]==x[j][1]) {
                             if(x[ncmp][2]==x[j][2]) {
                               kpNeij=nsearch;
                             }
                           }
                         }
                         if(x[ncmp][0]==x[k][0]) {
                           if(x[ncmp][1]==x[k][1]) {
                             if(x[ncmp][2]==x[k][2]) {
                               kpNeik=nsearch;
                             }
                           }
                         }
                       }
                       if(jtype==kptype)
                         ijkp=jtype-1;
                       else if(jtype<kptype)
                         ijkp=jtype*bop_types-jtype*(jtype+1)/2+kptype-1;
                       else
                         ijkp=kptype*bop_types-kptype*(kptype+1)/2+jtype-1;
                       if(ktype==kptype)
                         ikkp=ktype-1;
                       else if(ktype<kptype)
                         ikkp=ktype*bop_types-ktype*(ktype+1)/2+kptype-1;
                       else
                         ikkp=kptype*bop_types-kptype*(kptype+1)/2+ktype-1;
 
                       dis_jkp[0]=x[kp][0]-x[j][0];
                       dis_jkp[1]=x[kp][1]-x[j][1];
                       dis_jkp[2]=x[kp][2]-x[j][2];
                       rsq_jkp=dis_jkp[0]*dis_jkp[0]
                           +dis_jkp[1]*dis_jkp[1]
                           +dis_jkp[2]*dis_jkp[2];
                       r_jkp=sqrt(rsq_jkp);
                       ps=r_jkp*rdr[ijkp]+1.0;
                       ks=(int)ps;
                       if(nr-1<ks)
                         ks=nr-1;
                       ps=ps-ks;
                       if(ps>1.0)
                         ps=1.0;
                       betaS_jkp=((pBetaS3[ijkp][ks-1]*ps+pBetaS2[ijkp][ks-1])*ps
                           +pBetaS1[ijkp][ks-1])*ps+pBetaS[ijkp][ks-1];
                       dBetaS_jkp=(pBetaS6[ijkp][ks-1]*ps+pBetaS5[ijkp][ks-1])*ps
                           +pBetaS4[ijkp][ks-1];
                       dis_kkp[0]=x[kp][0]-x[k][0];
                       dis_kkp[1]=x[kp][1]-x[k][1];
                       dis_kkp[2]=x[kp][2]-x[k][2];
                       rsq_kkp=dis_kkp[0]*dis_kkp[0]
                           +dis_kkp[1]*dis_kkp[1]
                           +dis_kkp[2]*dis_kkp[2];
                       r_kkp=sqrt(rsq_kkp);
                       ps=r_kkp*rdr[ikkp]+1.0;
                       ks=(int)ps;
                       if(nr-1<ks)
                         ks=nr-1;
                       ps=ps-ks;
                       if(ps>1.0)
                         ps=1.0;
                       betaS_kkp=((pBetaS3[ikkp][ks-1]*ps+pBetaS2[ikkp][ks-1])*ps
                           +pBetaS1[ikkp][ks-1])*ps+pBetaS[ikkp][ks-1];
                       dBetaS_kkp=(pBetaS6[ikkp][ks-1]*ps+pBetaS5[ikkp][ks-1])*ps
                           +pBetaS4[ikkp][ks-1];
                       cosAng_ijkp=(-dis_ij[0]*dis_jkp[0]-dis_ij[1]*dis_jkp[1]
                           -dis_ij[2]*dis_jkp[2])/(r_ij*r_jkp);
                       dcA_ijkp[0][0]=(dis_jkp[0]*r_ij*r_jkp-cosAng_ijkp
                           *-dis_ij[0]*r_jkp*r_jkp)/(r_ij*r_ij*r_jkp*r_jkp);
                       dcA_ijkp[1][0]=(dis_jkp[1]*r_ij*r_jkp-cosAng_ijkp
                           *-dis_ij[1]*r_jkp*r_jkp)/(r_ij*r_ij*r_jkp*r_jkp);
                       dcA_ijkp[2][0]=(dis_jkp[2]*r_ij*r_jkp-cosAng_ijkp
                           *-dis_ij[2]*r_jkp*r_jkp)/(r_ij*r_ij*r_jkp*r_jkp);
                       dcA_ijkp[0][1]=(-dis_ij[0]*r_ij*r_jkp-cosAng_ijkp
                           *dis_jkp[0]*r_ij*r_ij)/(r_ij*r_ij*r_jkp*r_jkp);
                       dcA_ijkp[1][1]=(-dis_ij[1]*r_ij*r_jkp-cosAng_ijkp
                           *dis_jkp[1]*r_ij*r_ij)/(r_ij*r_ij*r_jkp*r_jkp);
                       dcA_ijkp[2][1]=(-dis_ij[2]*r_ij*r_jkp-cosAng_ijkp
                           *dis_jkp[2]*r_ij*r_ij)/(r_ij*r_ij*r_jkp*r_jkp);
                       cosAng_ikkp=(-dis_ik[0]*dis_kkp[0]-dis_ik[1]*dis_kkp[1]
                           -dis_ik[2]*dis_kkp[2])/(r_ik*r_kkp);
                       dcA_ikkp[0][0]=(dis_kkp[0]*r_ik*r_kkp-cosAng_ikkp
                           *-dis_ik[0]*r_kkp*r_kkp)/(r_ik*r_ik*r_kkp*r_kkp);
                       dcA_ikkp[1][0]=(dis_kkp[1]*r_ik*r_kkp-cosAng_ikkp
                           *-dis_ik[1]*r_kkp*r_kkp)/(r_ik*r_ik*r_kkp*r_kkp);
                       dcA_ikkp[2][0]=(dis_kkp[2]*r_ik*r_kkp-cosAng_ikkp
                           *-dis_ik[2]*r_kkp*r_kkp)/(r_ik*r_ik*r_kkp*r_kkp);
                       dcA_ikkp[0][1]=(-dis_ik[0]*r_ik*r_kkp-cosAng_ikkp
                           *dis_kkp[0]*r_ik*r_ik)/(r_ik*r_ik*r_kkp*r_kkp);
                       dcA_ikkp[1][1]=(-dis_ik[1]*r_ik*r_kkp-cosAng_ikkp
                           *dis_kkp[1]*r_ik*r_ik)/(r_ik*r_ik*r_kkp*r_kkp);
                       dcA_ikkp[2][1]=(-dis_ik[2]*r_ik*r_kkp-cosAng_ikkp
                           *dis_kkp[2]*r_ik*r_ik)/(r_ik*r_ik*r_kkp*r_kkp);
                       cosAng_jkpk=(dis_jkp[0]*dis_kkp[0]+dis_jkp[1]*dis_kkp[1]
                           +dis_jkp[2]*dis_kkp[2])/(r_jkp*r_kkp);
                       dcA_jkpk[0][0]=(-dis_kkp[0]*r_jkp*r_kkp-cosAng_jkpk
                           *-dis_jkp[0]*r_kkp*r_kkp)/(r_jkp*r_jkp*r_kkp*r_kkp);
                       dcA_jkpk[1][0]=(-dis_kkp[1]*r_jkp*r_kkp-cosAng_jkpk
                           *-dis_jkp[1]*r_kkp*r_kkp)/(r_jkp*r_jkp*r_kkp*r_kkp);
                       dcA_jkpk[2][0]=(-dis_kkp[2]*r_jkp*r_kkp-cosAng_jkpk
                           *-dis_jkp[2]*r_kkp*r_kkp)/(r_jkp*r_jkp*r_kkp*r_kkp);
                       dcA_jkpk[0][1]=(-dis_jkp[0]*r_jkp*r_kkp-cosAng_jkpk
                           *-dis_kkp[0]*r_jkp*r_jkp)/(r_jkp*r_jkp*r_kkp*r_kkp);
                       dcA_jkpk[1][1]=(-dis_jkp[1]*r_jkp*r_kkp-cosAng_jkpk
                           *-dis_kkp[1]*r_jkp*r_jkp)/(r_jkp*r_jkp*r_kkp*r_kkp);
                       dcA_jkpk[2][1]=(-dis_jkp[2]*r_jkp*r_kkp-cosAng_jkpk
                           *-dis_kkp[2]*r_jkp*r_jkp)/(r_jkp*r_jkp*r_kkp*r_kkp);
                       sig_flag=0;
                       for(nsearch=0;nsearch<nSigBk[n];nsearch++) {
                         ncmp=itypeSigBk[n][nsearch];
                         if(x[ncmp][0]==x[kp][0]) {
                           if(x[ncmp][1]==x[kp][1]) {
                             if(x[ncmp][2]==x[kp][2]) {
                               nkp=nsearch;
                               sig_flag=1;
                               break;
                             }
                           }
                         }
                       }
                       if(sig_flag==0) {
                         nSigBk[n]=nSigBk[n]+1;
                         nkp=nSigBk[n]-1;
                         itypeSigBk[n][nkp]=kp;
                       }
                       nb_jkp=nb_t;
                       nb_t++;
                       if(nb_t>nb_sg) {
                         new_n_tot=nb_sg+maxneigh;
                         grow_sigma(nb_sg,new_n_tot);
                         nb_sg=new_n_tot;
                       }
                       bt_sg[nb_jkp].temp=temp_jkp;
                       bt_sg[nb_jkp].i=j;
                       bt_sg[nb_jkp].j=kp;
                       nb_kkp=nb_t;
                       nb_t++;
                       if(nb_t>nb_sg) {
                         new_n_tot=nb_sg+maxneigh;
                         grow_sigma(nb_sg,new_n_tot);
                         nb_sg=new_n_tot;
                       }
                       bt_sg[nb_kkp].temp=temp_kkp;
                       bt_sg[nb_kkp].i=k;
                       bt_sg[nb_kkp].j=kp;
                       gmean0=sigma_g0[itype-1][jtype-1][kptype-1];
                       gmean1=sigma_g1[itype-1][jtype-1][kptype-1];
                       gmean2=sigma_g2[itype-1][jtype-1][kptype-1];
                       amean=cosAng_ijkp;
                       gfactor2=gmean0+gmean1*amean
                           +gmean2*amean*amean;
                       gprime2=gmean1+2.0*gmean2*amean;
                       gmean0=sigma_g0[itype-1][ktype-1][kptype-1];
                       gmean1=sigma_g1[itype-1][ktype-1][kptype-1];
                       gmean2=sigma_g2[itype-1][ktype-1][kptype-1];
                       amean=cosAng_ikkp;
                       gfactor3=gmean0+gmean1*amean
                           +gmean2*amean*amean;
                       gprime3=gmean1+2.0*gmean2*amean;
                       gmean0=sigma_g0[jtype-1][kptype-1][ktype-1];
                       gmean1=sigma_g1[jtype-1][kptype-1][ktype-1];
                       gmean2=sigma_g2[jtype-1][kptype-1][ktype-1];
                       amean=cosAng_jkpk;
                       gfactor4=gmean0+gmean1*amean
                           +gmean2*amean*amean;
                       gprime4=gmean1+2.0*gmean2*amean;
                       gfactor=gfactor1*gfactor2*gfactor3*gfactor4;
                       rfactor0=(betaS_ik+small2)*(betaS_jkp+small2)
                           *(betaS_kkp+small2);
                       rfactor=pow(rfactor0,2.0/3.0);
                       drfactor=2.0/3.0*pow(rfactor0,-1.0/3.0);
 
 //EE is Eq. 25(notes)
 
                       EE=EE+gfactor*rfactor;
 
 //agpdpr1 is derivative of agpdpr1 w.r.t. Beta(r_ik)
 //agpdpr2 is derivative of agpdpr1 w.r.t. Beta(r_jk')
 //agpdpr3 is derivative of agpdpr1 w.r.t. Beta(r_kk')
 //app1 is derivative of agpdpr1 w.r.t. cos(theta_jik)
 //app2 is derivative of agpdpr1 w.r.t. cos(theta_ijk')
 //app3 is derivative of agpdpr1 w.r.t. cos(theta_ikk')
 //app4 is derivative of agpdpr1 w.r.t. cos(theta_jk'k)
 
                       agpdpr1=gfactor*drfactor*(betaS_jkp+small2)*(betaS_kkp
                           +small2)*dBetaS_ik/r_ik;
                       agpdpr2=gfactor*drfactor*(betaS_ik+small2)*(betaS_kkp
                           +small2)*dBetaS_jkp/r_jkp;
                       agpdpr3=gfactor*drfactor*(betaS_ik+small2)*(betaS_jkp
                           +small2)*dBetaS_kkp/r_kkp;
                       app1=rfactor*gfactor2*gfactor3*gfactor4*gprime1;
                       app2=rfactor*gfactor1*gfactor3*gfactor4*gprime2;
                       app3=rfactor*gfactor1*gfactor2*gfactor4*gprime3;
                       app4=rfactor*gfactor1*gfactor2*gfactor3*gprime4;
                       bt_sg[nb_ij].dEE[0]+=
                           app1*dcA_jik[0][0]
                           -app2*dcA_ijkp[0][0];
                       bt_sg[nb_ij].dEE[1]+=
                           app1*dcA_jik[1][0]
                           -app2*dcA_ijkp[1][0];
                       bt_sg[nb_ij].dEE[2]+=
                           app1*dcA_jik[2][0]
                           -app2*dcA_ijkp[2][0];
                       bt_sg[nb_ik].dEE[0]+=
                           app1*dcA_jik[0][1]
                           +agpdpr1*dis_ik[0]
                           -app3*dcA_ikkp[0][0];
                       bt_sg[nb_ik].dEE[1]+=
                           app1*dcA_jik[1][1]
                           +agpdpr1*dis_ik[1]
                           -app3*dcA_ikkp[1][0];
                       bt_sg[nb_ik].dEE[2]+=
                           app1*dcA_jik[2][1]
                           +agpdpr1*dis_ik[2]
                           -app3*dcA_ikkp[2][0];
                       bt_sg[nb_jkp].dEE[0]+=
                           app2*dcA_ijkp[0][1]
                           +agpdpr2*dis_jkp[0]
                           -app4*dcA_jkpk[0][0];
                       bt_sg[nb_jkp].dEE[1]+=
                           app2*dcA_ijkp[1][1]
                           +agpdpr2*dis_jkp[1]
                           -app4*dcA_jkpk[1][0];
                       bt_sg[nb_jkp].dEE[2]+=
                           app2*dcA_ijkp[2][1]
                           +agpdpr2*dis_jkp[2]
                           -app4*dcA_jkpk[2][0];
                       bt_sg[nb_kkp].dEE[0]+=
                           app3*dcA_ikkp[0][1]
                           +agpdpr3*dis_kkp[0]
                           -app4*dcA_jkpk[0][1];
                       bt_sg[nb_kkp].dEE[1]+=
                           app3*dcA_ikkp[1][1]
                           +agpdpr3*dis_kkp[1]
                           -app4*dcA_jkpk[1][1];
                       bt_sg[nb_kkp].dEE[2]+=
                           app3*dcA_ikkp[2][1]
                           +agpdpr3*dis_kkp[2]
                           -app4*dcA_jkpk[2][1];
                     }
                   }
                 }
               }
             }
           }
 
 //j is a neighbor of i and k is a neighbor of j not equal to i
 
           for(ktmp=0;ktmp<numneigh[j];ktmp++) {
             if(ktmp!=ji) {
               temp_jk=BOP_index[j]+ktmp;
               k=jlist[ktmp];
               klist=firstneigh[k];
               ktype=map[type[k]]+1;
               for(kNeij=0;kNeij<numneigh[k];kNeij++) {
                 if(x[klist[kNeij]][0]==x[j][0]) {
                   if(x[klist[kNeij]][1]==x[j][1]) {
                     if(x[klist[kNeij]][2]==x[j][2]) {
                       break;
                     }
                   }
                 }
               }
               if(jtype==ktype)
                 ijk=jtype-1;
               else if(jtype<ktype)
                 ijk=jtype*bop_types-jtype*(jtype+1)/2+ktype-1;
               else
                 ijk=ktype*bop_types-ktype*(ktype+1)/2+jtype-1;
               sig_flag=0;
               for(nsearch=0;nsearch<nSigBk[n];nsearch++) {
                 ncmp=itypeSigBk[n][nsearch];
                 if(x[ncmp][0]==x[k][0]) {
                   if(x[ncmp][1]==x[k][1]) {
                     if(x[ncmp][2]==x[k][2]) {
                       new1=nsearch;
                       sig_flag=1;
                       break;
                     }
                   }
                 }
               }
               if(sig_flag==0) {
                 nSigBk[n]=nSigBk[n]+1;
                 new1=nSigBk[n]-1;
                 itypeSigBk[n][new1]=k;
               }
               dis_jk[0]=x[k][0]-x[j][0];
               dis_jk[1]=x[k][1]-x[j][1];
               dis_jk[2]=x[k][2]-x[j][2];
               rsq_jk=dis_jk[0]*dis_jk[0]
                   +dis_jk[1]*dis_jk[1]
                   +dis_jk[2]*dis_jk[2];
               r_jk=sqrt(rsq_jk);
               if(r_jk<=rcut[ijk]) {
                 ps=r_jk*rdr[ijk]+1.0;
                 ks=(int)ps;
                 if(nr-1<ks)
                   ks=nr-1;
                 ps=ps-ks;
                 if(ps>1.0)
                   ps=1.0;
                 betaS_jk=((pBetaS3[ijk][ks-1]*ps+pBetaS2[ijk][ks-1])*ps
                     +pBetaS1[ijk][ks-1])*ps+pBetaS[ijk][ks-1];
                 dBetaS_jk=(pBetaS6[ijk][ks-1]*ps+pBetaS5[ijk][ks-1])*ps
                     +pBetaS4[ijk][ks-1];
                 cosAng_ijk=(-dis_ij[0]*dis_jk[0]-dis_ij[1]*dis_jk[1]
                     -dis_ij[2]*dis_jk[2])/(r_ij*r_jk);
                 dcA_ijk[0][0]=(dis_jk[0]*r_ij*r_jk-cosAng_ijk
                     *-dis_ij[0]*r_jk*r_jk)/(r_ij*r_ij*r_jk*r_jk);
                 dcA_ijk[1][0]=(dis_jk[1]*r_ij*r_jk-cosAng_ijk
                     *-dis_ij[1]*r_jk*r_jk)/(r_ij*r_ij*r_jk*r_jk);
                 dcA_ijk[2][0]=(dis_jk[2]*r_ij*r_jk-cosAng_ijk
                     *-dis_ij[2]*r_jk*r_jk)/(r_ij*r_ij*r_jk*r_jk);
                 dcA_ijk[0][1]=(-dis_ij[0]*r_ij*r_jk-cosAng_ijk
                     *dis_jk[0]*r_ij*r_ij)/(r_ij*r_ij*r_jk*r_jk);
                 dcA_ijk[1][1]=(-dis_ij[1]*r_ij*r_jk-cosAng_ijk
                     *dis_jk[1]*r_ij*r_ij)/(r_ij*r_ij*r_jk*r_jk);
                 dcA_ijk[2][1]=(-dis_ij[2]*r_ij*r_jk-cosAng_ijk
                     *dis_jk[2]*r_ij*r_ij)/(r_ij*r_ij*r_jk*r_jk);
                 nb_jk=nb_t;
                 nb_t++;
                 if(nb_t>nb_sg) {
                   new_n_tot=nb_sg+maxneigh;
                   grow_sigma(nb_sg,new_n_tot);
                   nb_sg=new_n_tot;
                 }
                 bt_sg[nb_jk].temp=temp_jk;
                 bt_sg[nb_jk].i=j;
                 bt_sg[nb_jk].j=k;
                 gmean0=sigma_g0[itype-1][jtype-1][ktype-1];
                 gmean1=sigma_g1[itype-1][jtype-1][ktype-1];
                 gmean2=sigma_g2[itype-1][jtype-1][ktype-1];
                 amean=cosAng_ijk;
                 gfactor1=gmean0+gmean1*amean
                     +gmean2*amean*amean;
                 gprime1=gmean1+2.0*gmean2*amean;
                 gfactorsq=gfactor1*gfactor1;
                 gsqprime=2.0*gfactor1*gprime1;
                 rfactor1rt=betaS_jk*betaS_jk;
                 rfactor1=rfactor1rt*rfactor1rt;
 
 //BB is Eq. 34 (a) or Eq. 10 (c) for the j atom
 //1st DD is Eq. 11 (c) for j atom where i & k=neighbor of j
 
                 BB=BB+gfactorsq*rfactor1rt;
                 DD=DD+gfactorsq*rfactor1;
 
 //agpdpr1 is derivative of BB  w.r.t. Beta(r_jk)
 //app1 is derivative of BB w.r.t. cos(theta_ijk)
 
                 agpdpr1=2.0*gfactorsq*betaS_jk*dBetaS_jk/r_jk;
                 agpdpr2=2.0*rfactor1rt*agpdpr1;
                 app1=rfactor1rt*gsqprime;
                 app2=rfactor1rt*app1;
                 bt_sg[nb_ij].dBB[0]-=
                     app1*dcA_ijk[0][0];
                 bt_sg[nb_ij].dBB[1]-=
                     app1*dcA_ijk[1][0];
                 bt_sg[nb_ij].dBB[2]-=
                     app1*dcA_ijk[2][0];
                 bt_sg[nb_ij].dDD[0]-=
                     app2*dcA_ijk[0][0];
                 bt_sg[nb_ij].dDD[1]-=
                     app2*dcA_ijk[1][0];
                 bt_sg[nb_ij].dDD[2]-=
                     app2*dcA_ijk[2][0];
                 bt_sg[nb_jk].dBB[0]+=
                     app1*dcA_ijk[0][1]
                     +agpdpr1*dis_jk[0];
                 bt_sg[nb_jk].dBB[1]+=
                     app1*dcA_ijk[1][1]
                     +agpdpr1*dis_jk[1];
                 bt_sg[nb_jk].dBB[2]+=
                     app1*dcA_ijk[2][1]
                     +agpdpr1*dis_jk[2];
                 bt_sg[nb_jk].dDD[0]+=
                     app2*dcA_ijk[0][1]
                     +agpdpr2*dis_jk[0];
                 bt_sg[nb_jk].dDD[1]+=
                     app2*dcA_ijk[1][1]
                     +agpdpr2*dis_jk[1];
                 bt_sg[nb_jk].dDD[2]+=
                     app2*dcA_ijk[2][1]
                     +agpdpr2*dis_jk[2];
 
 //j is a neighbor of i, k and k' prime different neighbors of j not equal to i
 
                 for(ltmp=0;ltmp<ktmp;ltmp++) {
                   if(ltmp!=ji) {
                     temp_jkp=BOP_index[j]+ltmp;
                     kp=jlist[ltmp];
                     kptype=map[type[kp]]+1;
                     if(jtype==kptype)
                       ijkp=jtype-1;
                     else if(jtype<kptype)
                       ijkp=jtype*bop_types-jtype*(jtype+1)/2+kptype-1;
                     else
                       ijkp=kptype*bop_types-kptype*(kptype+1)/2+jtype-1;
                     for(nsearch=0;nsearch<nSigBk[n];nsearch++) {
                       ncmp=itypeSigBk[n][nsearch];
                       if(x[ncmp][0]==x[kp][0]) {
                         if(x[ncmp][1]==x[kp][1]) {
                           if(x[ncmp][2]==x[kp][2]) {
                             new2=nsearch;
                             break;
                           }
                         }
                       }
                     }
                     dis_jkp[0]=x[kp][0]-x[j][0];
                     dis_jkp[1]=x[kp][1]-x[j][1];
                     dis_jkp[2]=x[kp][2]-x[j][2];
                     rsq_jkp=dis_jkp[0]*dis_jkp[0]
                         +dis_jkp[1]*dis_jkp[1]
                         +dis_jkp[2]*dis_jkp[2];
                     r_jkp=sqrt(rsq_jkp);
                     if(r_jkp<=rcut[ijkp]) {
                       ps=r_jkp*rdr[ijkp]+1.0;
                       ks=(int)ps;
                       if(nr-1<ks)
                         ks=nr-1;
                       ps=ps-ks;
                       if(ps>1.0)
                         ps=1.0;
                       betaS_jkp=((pBetaS3[ijkp][ks-1]*ps+pBetaS2[ijkp][ks-1])*ps
                         +pBetaS1[ijkp][ks-1])*ps+pBetaS[ijkp][ks-1];
                       dBetaS_jkp=(pBetaS6[ijkp][ks-1]*ps+pBetaS5[ijkp][ks-1])*ps
                         +pBetaS4[ijkp][ks-1];
                       cosAng_ijkp=(-dis_ij[0]*dis_jkp[0]-dis_ij[1]*dis_jkp[1]
                         -dis_ij[2]*dis_jkp[2])/(r_ij*r_jkp);
                       dcA_ijkp[0][0]=(dis_jkp[0]*r_ij*r_jkp-cosAng_ijkp
                         *-dis_ij[0]*r_jkp*r_jkp)/(r_ij*r_ij*r_jkp*r_jkp);
                       dcA_ijkp[1][0]=(dis_jkp[1]*r_ij*r_jkp-cosAng_ijkp
                         *-dis_ij[1]*r_jkp*r_jkp)/(r_ij*r_ij*r_jkp*r_jkp);
                       dcA_ijkp[2][0]=(dis_jkp[2]*r_ij*r_jkp-cosAng_ijkp
                         *-dis_ij[2]*r_jkp*r_jkp)/(r_ij*r_ij*r_jkp*r_jkp);
                       dcA_ijkp[0][1]=(-dis_ij[0]*r_ij*r_jkp-cosAng_ijkp
                         *dis_jkp[0]*r_ij*r_ij)/(r_ij*r_ij*r_jkp*r_jkp);
                       dcA_ijkp[1][1]=(-dis_ij[1]*r_ij*r_jkp-cosAng_ijkp
                         *dis_jkp[1]*r_ij*r_ij)/(r_ij*r_ij*r_jkp*r_jkp);
                       dcA_ijkp[2][1]=(-dis_ij[2]*r_ij*r_jkp-cosAng_ijkp
                         *dis_jkp[2]*r_ij*r_ij)/(r_ij*r_ij*r_jkp*r_jkp);
                       cosAng_kjkp=(dis_jk[0]*dis_jkp[0]+dis_jk[1]*dis_jkp[1]
                         +dis_jk[2]*dis_jkp[2])/(r_jk*r_jkp);
                       dcA_kjkp[0][0]=(dis_jkp[0]*r_jk*r_jkp-cosAng_kjkp
                         *dis_jk[0]*r_jkp*r_jkp)/(r_jk*r_jk*r_jkp*r_jkp);
                       dcA_kjkp[1][0]=(dis_jkp[1]*r_jk*r_jkp-cosAng_kjkp
                         *dis_jk[1]*r_jkp*r_jkp)/(r_jk*r_jk*r_jkp*r_jkp);
                       dcA_kjkp[2][0]=(dis_jkp[2]*r_jk*r_jkp-cosAng_kjkp
                         *dis_jk[2]*r_jkp*r_jkp)/(r_jk*r_jk*r_jkp*r_jkp);
                       dcA_kjkp[0][1]=(dis_jk[0]*r_jk*r_jkp-cosAng_kjkp
                         *dis_jkp[0]*r_jk*r_jk)/(r_jk*r_jk*r_jkp*r_jkp);
                       dcA_kjkp[1][1]=(dis_jk[1]*r_jk*r_jkp-cosAng_kjkp
                         *dis_jkp[1]*r_jk*r_jk)/(r_jk*r_jk*r_jkp*r_jkp);
                       dcA_kjkp[2][1]=(dis_jk[2]*r_jk*r_jkp-cosAng_kjkp
                         *dis_jkp[2]*r_jk*r_jk)/(r_jk*r_jk*r_jkp*r_jkp);
                       nb_jkp=nb_t;
                       nb_t++;
                       if(nb_t>nb_sg) {
                         new_n_tot=nb_sg+maxneigh;
                         grow_sigma(nb_sg,new_n_tot);
                         nb_sg=new_n_tot;
                       }
                       bt_sg[nb_jkp].temp=temp_jkp;
                       bt_sg[nb_jkp].i=j;
                       bt_sg[nb_jkp].j=kp;
                       gmean0=sigma_g0[itype-1][jtype-1][kptype-1];
                       gmean1=sigma_g1[itype-1][jtype-1][kptype-1];
                       gmean2=sigma_g2[itype-1][jtype-1][kptype-1];
                       amean=cosAng_ijkp;
                       gfactor2=gmean0+gmean1*amean
                           +gmean2*amean*amean;
                       gprime2=gmean1+2.0*gmean2*amean;
                       gmean0=sigma_g0[ktype-1][jtype-1][kptype-1];
                       gmean1=sigma_g1[ktype-1][jtype-1][kptype-1];
                       gmean2=sigma_g2[ktype-1][jtype-1][kptype-1];
                       amean=cosAng_kjkp;
                       gfactor3=gmean0+gmean1*amean
                           +gmean2*amean*amean;
                       gprime3=gmean1+2.0*gmean2*amean;
                       gfactor=gfactor1*gfactor2*gfactor3;
                       rfactorrt=betaS_jk*betaS_jkp;
                       rfactor=rfactorrt*rfactorrt;
 
 //2nd DD is Eq. 11 (c) for j atom where i , k & k'=neighbor of j
 
                       DD=DD+2.0*gfactor*rfactor;
 
 //agpdpr1 is derivative of DD  w.r.t. Beta(r_jk)
 //agpdpr2 is derivative of DD  w.r.t. Beta(r_jk')
 //app1 is derivative of DD  w.r.t. cos(theta_ijk)
 //app2 is derivative of DD  w.r.t. cos(theta_ijkp)
 //app3 is derivative of DD  w.r.t. cos(theta_kjkp)
 
                       agpdpr1=4.0*gfactor*rfactorrt*betaS_jkp
                           *dBetaS_jk/r_jk;
                       agpdpr2=4.0*gfactor*rfactorrt*betaS_jk
                           *dBetaS_jkp/r_jkp;
                       app1=2.0*rfactor*gfactor2*gfactor3*gprime1;
                       app2=2.0*rfactor*gfactor1*gfactor3*gprime2;
                       app3=2.0*rfactor*gfactor1*gfactor2*gprime3;
                       bt_sg[nb_ij].dDD[0]-=
                           app1*dcA_ijk[0][0]
                           +app2*dcA_ijkp[0][0];
                       bt_sg[nb_ij].dDD[1]-=
                           app1*dcA_ijk[1][0]
                           +app2*dcA_ijkp[1][0];
                       bt_sg[nb_ij].dDD[2]-=
                           app1*dcA_ijk[2][0]
                           +app2*dcA_ijkp[2][0];
                       bt_sg[nb_jk].dDD[0]+=
                           app1*dcA_ijk[0][1]
                           +app3*dcA_kjkp[0][0]
                           +agpdpr1*dis_jk[0];
                       bt_sg[nb_jk].dDD[1]+=
                           app1*dcA_ijk[1][1]
                           +app3*dcA_kjkp[1][0]
                           +agpdpr1*dis_jk[1];
                       bt_sg[nb_jk].dDD[2]+=
                           app1*dcA_ijk[2][1]
                           +app3*dcA_kjkp[2][0]
                           +agpdpr1*dis_jk[2];
                       bt_sg[nb_jkp].dDD[0]+=
                           app2*dcA_ijkp[0][1]
                           +app3*dcA_kjkp[0][1]
                           +agpdpr2*dis_jkp[0];
                       bt_sg[nb_jkp].dDD[1]+=
                           app2*dcA_ijkp[1][1]
                           +app3*dcA_kjkp[1][1]
                           +agpdpr2*dis_jkp[1];
                       bt_sg[nb_jkp].dDD[2]+=
                           app2*dcA_ijkp[2][1]
                           +app3*dcA_kjkp[2][1]
                           +agpdpr2*dis_jkp[2];
 
                     }
                   }
                 }
 
 //j is a neighbor of i, k is a neighbor of j not equal to i and k'
 //is a neighbor of k not equal to j or i
 
                 for(ltmp=0;ltmp<numneigh[k];ltmp++) {
                   temp_kkp=BOP_index[k]+ltmp;
                   kp=klist[ltmp];
                   kptype=map[type[kp]]+1;
                   same_ikp=0;
                   same_jkp=0;
                   if(x[i][0]==x[kp][0]) {
                     if(x[i][1]==x[kp][1]) {
                       if(x[i][2]==x[kp][2]) {
                         same_ikp=1;
                       }
                     }
                   }
                   if(x[j][0]==x[kp][0]) {
                     if(x[j][1]==x[kp][1]) {
                       if(x[j][2]==x[kp][2]) {
                         same_jkp=1;
                       }
                     }
                   }
                   if(!same_ikp&&!same_jkp) {
                     if(ktype==kptype)
                       ikkp=ktype-1;
                     else if(ktype<kptype)
                       ikkp=ktype*bop_types-ktype*(ktype+1)/2+kptype-1;
                     else
                       ikkp=kptype*bop_types-kptype*(kptype+1)/2+ktype-1;
                     for(kNeij=0;kNeij<numneigh[k];kNeij++) {
                       if(x[klist[kNeij]][0]==x[j][0]) {
                         if(x[klist[kNeij]][1]==x[j][1]) {
                           if(x[klist[kNeij]][2]==x[j][2]) {
                             break;
                           }
                         }
                       }
                     }
                     sig_flag=0;
                     for(nsearch=0;nsearch<nSigBk[n];nsearch++) {
                       ncmp=itypeSigBk[n][nsearch];
                       if(x[ncmp][0]==x[kp][0]) {
                         if(x[ncmp][1]==x[kp][1]) {
                           if(x[ncmp][2]==x[kp][2]) {
                             new2=nsearch;
                             sig_flag=1;
                             break;
                           }
                         }
                       }
                     }
                     if(sig_flag==0) {
                       nSigBk[n]=nSigBk[n]+1;
                       new2=nSigBk[n]-1;
                       itypeSigBk[n][new2]=kp;
                     }
                     dis_kkp[0]=x[kp][0]-x[k][0];
                     dis_kkp[1]=x[kp][1]-x[k][1];
                     dis_kkp[2]=x[kp][2]-x[k][2];
                     rsq_kkp=dis_kkp[0]*dis_kkp[0]
                         +dis_kkp[1]*dis_kkp[1]
                         +dis_kkp[2]*dis_kkp[2];
                     r_kkp=sqrt(rsq_kkp);
                     if(r_kkp<=rcut[ikkp]) {
                       ps=r_kkp*rdr[ikkp]+1.0;
                       ks=(int)ps;
                       if(nr-1<ks)
                         ks=nr-1;
                       ps=ps-ks;
                       if(ps>1.0)
                         ps=1.0;
                       betaS_kkp=((pBetaS3[ikkp][ks-1]*ps+pBetaS2[ikkp][ks-1])*ps
                           +pBetaS1[ikkp][ks-1])*ps+pBetaS[ikkp][ks-1];
                       dBetaS_kkp=(pBetaS6[ikkp][ks-1]*ps+pBetaS5[ikkp][ks-1])*ps
                           +pBetaS4[ikkp][ks-1];
                       cosAng_jkkp=(-dis_jk[0]*dis_kkp[0]-dis_jk[1]*dis_kkp[1]
                           -dis_jk[2]*dis_kkp[2])/(r_jk*r_kkp);
                       dcA_jkkp[0][0]=(dis_kkp[0]*r_jk*r_kkp-cosAng_jkkp
                           *-dis_jk[0]*r_kkp*r_kkp)/(r_jk*r_jk*r_kkp*r_kkp);
                       dcA_jkkp[1][0]=(dis_kkp[1]*r_jk*r_kkp-cosAng_jkkp
                           *-dis_jk[1]*r_kkp*r_kkp)/(r_jk*r_jk*r_kkp*r_kkp);
                       dcA_jkkp[2][0]=(dis_kkp[2]*r_jk*r_kkp-cosAng_jkkp
                           *-dis_jk[2]*r_kkp*r_kkp)/(r_jk*r_jk*r_kkp*r_kkp);
                       dcA_jkkp[0][1]=(-dis_jk[0]*r_jk*r_kkp-cosAng_jkkp
                           *dis_kkp[0]*r_jk*r_jk)/(r_jk*r_jk*r_kkp*r_kkp);
                       dcA_jkkp[1][1]=(-dis_jk[1]*r_jk*r_kkp-cosAng_jkkp
                           *dis_kkp[1]*r_jk*r_jk)/(r_jk*r_jk*r_kkp*r_kkp);
                       dcA_jkkp[2][1]=(-dis_jk[2]*r_jk*r_kkp-cosAng_jkkp
                           *dis_kkp[2]*r_jk*r_jk)/(r_jk*r_jk*r_kkp*r_kkp);
                       nb_kkp=nb_t;
                       nb_t++;
                       if(nb_t>nb_sg) {
                         new_n_tot=nb_sg+maxneigh;
                         grow_sigma(nb_sg,new_n_tot);
                         nb_sg=new_n_tot;
                       }
                       bt_sg[nb_kkp].temp=temp_kkp;
                       bt_sg[nb_kkp].i=k;
                       bt_sg[nb_kkp].j=kp;
                       gmean0=sigma_g0[jtype-1][ktype-1][kptype-1];
                       gmean1=sigma_g1[jtype-1][ktype-1][kptype-1];
                       gmean2=sigma_g2[jtype-1][ktype-1][kptype-1];
                       amean=cosAng_jkkp;
                       gfactor2=gmean0+gmean1*amean
                           +gmean2*amean*amean;
                       gprime2=gmean1+2.0*gmean2*amean;
                       gfactorsq2=gfactor2*gfactor2;
                       gsqprime2=2.0*gfactor2*gprime2;
                       gfactor=gfactorsq*gfactorsq2;
                       rfactorrt=betaS_jk*betaS_kkp;
                       rfactor=rfactorrt*rfactorrt;
 
 //3rd DD is Eq. 11 (c) for j atom where i & k=neighbor of j & k'=neighbor of k
 
                       DD=DD+gfactor*rfactor;
 
 //agpdpr1 is derivative of DD  3rd term w.r.t. Beta(r_jk)
 //agpdpr2 is derivative of DD  3rd term w.r.t. Beta(r_kk')
 //app1 is derivative of DD  3rd term w.r.t. cos(theta_ijk)
 //app2 is derivative of DD  3rd term w.r.t. cos(theta_jkkp)
 
                       agpdpr1=2.0*gfactor*rfactorrt*betaS_kkp
                           *dBetaS_jk/r_jk;
                       agpdpr2=2.0*gfactor*rfactorrt*betaS_jk
                           *dBetaS_kkp/r_kkp;
                       app1=rfactor*gfactorsq2*gsqprime;
                       app2=rfactor*gfactorsq*gsqprime2;
                       bt_sg[nb_ij].dDD[0]-=
                           app1*dcA_ijk[0][0];
                       bt_sg[nb_ij].dDD[1]-=
                           app1*dcA_ijk[1][0];
                       bt_sg[nb_ij].dDD[2]-=
                           app1*dcA_ijk[2][0];
                       bt_sg[nb_jk].dDD[0]+=
                           app1*dcA_ijk[0][1]
                           +agpdpr1*dis_jk[0]
                           -app2*dcA_jkkp[0][0];
                       bt_sg[nb_jk].dDD[1]+=
                           app1*dcA_ijk[1][1]
                           +agpdpr1*dis_jk[1]
                           -app2*dcA_jkkp[1][0];
                       bt_sg[nb_jk].dDD[2]+=
                           app1*dcA_ijk[2][1]
                           +agpdpr1*dis_jk[2]
                           -app2*dcA_jkkp[2][0];
                       bt_sg[nb_kkp].dDD[0]+=
                           app2*dcA_jkkp[0][1]
                           +agpdpr2*dis_kkp[0];
                       bt_sg[nb_kkp].dDD[1]+=
                           app2*dcA_jkkp[1][1]
                           +agpdpr2*dis_kkp[1];
                       bt_sg[nb_kkp].dDD[2]+=
                           app2*dcA_jkkp[2][1]
                           +agpdpr2*dis_kkp[2];
 
                     }
                   }
                 }
               }
             }
           }
 
           sig_flag=0;
           if(FF<=0.000001) {
             sigB[n]=0.0;
             sig_flag=1;
           }
           if(sig_flag==0) {
             if(AA<0.0)
               AA=0.0;
             if(BB<0.0)
               BB=0.0;
             if(CC<0.0)
               CC=0.0;
             if(DD<0.0)
               DD=0.0;
 
 // AA and BB are the representations of (a) Eq. 34 and (b) Eq. 9
 // for atoms i and j respectively
 
             AAC=AA+BB;
             BBC=AA*BB;
             CCC=AA*AA+BB*BB;
             DDC=CC+DD;
 
 //EEC is a modified form of (a) Eq. 33
 
             EEC=(DDC-CCC)/(AAC+2.0*small1);
             AACFF=1.0/(AAC+2.0*small1);
             for(m=0;m<nb_t;m++) {
               if((bt_sg[m].i>-1)&&(bt_sg[m].j>-1)) {
                 bt_sg[m].dAAC[0]=bt_sg[m].dAA[0]
                     +bt_sg[m].dBB[0];
                 bt_sg[m].dAAC[1]=bt_sg[m].dAA[1]
                     +bt_sg[m].dBB[1];
                 bt_sg[m].dAAC[2]=bt_sg[m].dAA[2]
                     +bt_sg[m].dBB[2];
                 bt_sg[m].dBBC[0]=bt_sg[m].dAA[0]*BB
                     +AA*bt_sg[m].dBB[0];
                 bt_sg[m].dBBC[1]=bt_sg[m].dAA[1]*BB
                     +AA*bt_sg[m].dBB[1];
                 bt_sg[m].dBBC[2]=bt_sg[m].dAA[2]*BB
                     +AA*bt_sg[m].dBB[2];
                 bt_sg[m].dCCC[0]=2.0*AA*bt_sg[m].dAA[0]
                     +2.0*BB*bt_sg[m].dBB[0];
                 bt_sg[m].dCCC[1]=2.0*AA*bt_sg[m].dAA[1]
                     +2.0*BB*bt_sg[m].dBB[1];
                 bt_sg[m].dCCC[2]=2.0*AA*bt_sg[m].dAA[2]
                     +2.0*BB*bt_sg[m].dBB[2];
                 bt_sg[m].dDDC[0]=bt_sg[m].dCC[0]
                     +bt_sg[m].dDD[0];
                 bt_sg[m].dDDC[1]=bt_sg[m].dCC[1]
                     +bt_sg[m].dDD[1];
                 bt_sg[m].dDDC[2]=bt_sg[m].dCC[2]
                     +bt_sg[m].dDD[2];
                 bt_sg[m].dEEC[0]=(bt_sg[m].dDDC[0]
                     -bt_sg[m].dCCC[0]
                     -EEC*bt_sg[m].dAAC[0])*AACFF;
                 bt_sg[m].dEEC[1]=(bt_sg[m].dDDC[1]
                     -bt_sg[m].dCCC[1]
                     -EEC*bt_sg[m].dAAC[1])*AACFF;
                 bt_sg[m].dEEC[2]=(bt_sg[m].dDDC[2]
                     -bt_sg[m].dCCC[2]
                     -EEC*bt_sg[m].dAAC[2])*AACFF;
               }
             }
             UT=EEC*FF+BBC+small3[iij];
             UT=1.0/sqrt(UT);
 
 // FFC is slightly modified form of (a) Eq. 31
 // GGC is slightly modified form of (a) Eq. 32
 // bndtmp is a slightly modified form of (a) Eq. 30 and (b) Eq. 8
 
             FFC=BBC*UT;
             GGC=EEC*UT;
             bndtmp=(FF+sigma_delta[iij]*sigma_delta[iij])*(1.0+sigma_a[iij]*GGC)
                 *(1.0+sigma_a[iij]*GGC)+sigma_c[iij]*(AAC+sigma_a[iij]*EE
                 +sigma_a[iij]*FFC*(2.0+GGC))+small4;
             UTcom=-0.5*UT*UT*UT;
             for(m=0;m<nb_t;m++) {
               if((bt_sg[m].i>-1)&&(bt_sg[m].j>-1)) {
                 bt_sg[m].dUT[0]=UTcom*(bt_sg[m].dEEC[0]*FF
                     +EEC*bt_sg[m].dFF[0]+bt_sg[m].dBBC[0]);
                 bt_sg[m].dUT[1]=UTcom*(bt_sg[m].dEEC[1]*FF
                     +EEC*bt_sg[m].dFF[1]+bt_sg[m].dBBC[1]);
                 bt_sg[m].dUT[2]=UTcom*(bt_sg[m].dEEC[2]*FF
                     +EEC*bt_sg[m].dFF[2]+bt_sg[m].dBBC[2]);
                 bt_sg[m].dFFC[0]=bt_sg[m].dBBC[0]*UT
                     +BBC*bt_sg[m].dUT[0];
                 bt_sg[m].dFFC[1]=bt_sg[m].dBBC[1]*UT
                     +BBC*bt_sg[m].dUT[1];
                 bt_sg[m].dFFC[2]=bt_sg[m].dBBC[2]*UT
                     +BBC*bt_sg[m].dUT[2];
                 bt_sg[m].dGGC[0]=bt_sg[m].dEEC[0]*UT
                     +EEC*bt_sg[m].dUT[0];
                 bt_sg[m].dGGC[1]=bt_sg[m].dEEC[1]*UT
                     +EEC*bt_sg[m].dUT[1];
                 bt_sg[m].dGGC[2]=bt_sg[m].dEEC[2]*UT
                     +EEC*bt_sg[m].dUT[2];
               }
             }
             psign=1.0;
             if(1.0+sigma_a[iij]*GGC<0.0)
               psign=-1.0;
             bndtmp0=1.0/sqrt(bndtmp);
             sigB1[n]=psign*betaS_ij*(1.0+sigma_a[iij]*GGC)*bndtmp0;
             bndtmp=-0.5*bndtmp0*bndtmp0*bndtmp0;
             bndtmp1=psign*(1.0+sigma_a[iij]*GGC)*bndtmp0+psign*betaS_ij
                 *(1.0+sigma_a[iij]*GGC)*bndtmp*2.0*betaS_ij*(1.0
                 +sigma_a[iij]*GGC)*(1.0+sigma_a[iij]*GGC);
             bndtmp1=bndtmp1*dBetaS_ij/r_ij;
             bndtmp2=psign*betaS_ij*(1.0+sigma_a[iij]*GGC)*bndtmp*sigma_c[iij];
             bndtmp3=psign*betaS_ij*(1.0+sigma_a[iij]*GGC)
                 *bndtmp*sigma_c[iij]*sigma_a[iij];
             bndtmp4=psign*betaS_ij*(1.0+sigma_a[iij]*GGC)
                 *bndtmp*sigma_c[iij]*sigma_a[iij]*(2.0+GGC);
             bndtmp5=sigma_a[iij]*psign*betaS_ij*bndtmp0
                 +psign*betaS_ij*(1.0+sigma_a[iij]*GGC)*bndtmp
                 *(2.0*(FF+sigma_delta[iij]*sigma_delta[iij])*(1.0
                 +sigma_a[iij]*GGC)*sigma_a[iij]+sigma_c[iij]*sigma_a[iij]*FFC);
             setting=0;
             for(m=0;m<nb_t;m++) {
               if((bt_sg[m].i>-1)&&(bt_sg[m].j>-1)) {
                 temp_kk=bt_sg[m].temp;
                 if(temp_kk==temp_ij&&setting==0) {
                   bt_sg[m].dSigB1[0]=bndtmp1*dis_ij[0]
                       +(bndtmp2*bt_sg[m].dAAC[0]
                       +bndtmp3*bt_sg[m].dEE[0]
                       +bndtmp4*bt_sg[m].dFFC[0]
                       +bndtmp5*bt_sg[m].dGGC[0]);
                   bt_sg[m].dSigB1[1]=bndtmp1*dis_ij[1]
                       +(bndtmp2*bt_sg[m].dAAC[1]
                       +bndtmp3*bt_sg[m].dEE[1]
                       +bndtmp4*bt_sg[m].dFFC[1]
                       +bndtmp5*bt_sg[m].dGGC[1]);
                   bt_sg[m].dSigB1[2]=bndtmp1*dis_ij[2]
                       +(bndtmp2*bt_sg[m].dAAC[2]
                       +bndtmp3*bt_sg[m].dEE[2]
                       +bndtmp4*bt_sg[m].dFFC[2]
                       +bndtmp5*bt_sg[m].dGGC[2]);
                   setting=1;
                 }
                 else if(temp_kk==temp_ji&&setting==0) {
                   bt_sg[m].dSigB1[0]=-bndtmp1*dis_ij[0]
                       +(bndtmp2*bt_sg[m].dAAC[0]
                       +bndtmp3*bt_sg[m].dEE[0]
                       +bndtmp4*bt_sg[m].dFFC[0]
                       +bndtmp5*bt_sg[m].dGGC[0]);
                   bt_sg[m].dSigB1[1]=-bndtmp1*dis_ij[1]
                       +(bndtmp2*bt_sg[m].dAAC[1]
                       +bndtmp3*bt_sg[m].dEE[1]
                       +bndtmp4*bt_sg[m].dFFC[1]
                       +bndtmp5*bt_sg[m].dGGC[1]);
                   bt_sg[m].dSigB1[2]=-bndtmp1*dis_ij[2]
                       +(bndtmp2*bt_sg[m].dAAC[2]
                       +bndtmp3*bt_sg[m].dEE[2]
                       +bndtmp4*bt_sg[m].dFFC[2]
                       +bndtmp5*bt_sg[m].dGGC[2]);
                   setting=1;
                 }
                 else {
                   bt_sg[m].dSigB1[0]=(bndtmp2*bt_sg[m].dAAC[0]
                       +bndtmp3*bt_sg[m].dEE[0]
                       +bndtmp4*bt_sg[m].dFFC[0]
                       +bndtmp5*bt_sg[m].dGGC[0]);
                   bt_sg[m].dSigB1[1]=(bndtmp2*bt_sg[m].dAAC[1]
                       +bndtmp3*bt_sg[m].dEE[1]
                       +bndtmp4*bt_sg[m].dFFC[1]
                       +bndtmp5*bt_sg[m].dGGC[1]);
                   bt_sg[m].dSigB1[2]=(bndtmp2*bt_sg[m].dAAC[2]
                       +bndtmp3*bt_sg[m].dEE[2]
                       +bndtmp4*bt_sg[m].dFFC[2]
                       +bndtmp5*bt_sg[m].dGGC[2]);
                 }
               }
             }
 
 //This loop is to ensure there is not an error for atoms with no neighbors (deposition)
 
             if(nb_t==0) {
               if(j>i) {
                 bt_sg[0].dSigB1[0]=bndtmp1*dis_ij[0];
                 bt_sg[0].dSigB1[1]=bndtmp1*dis_ij[1];
                 bt_sg[0].dSigB1[2]=bndtmp1*dis_ij[2];
               }
               else {
                 bt_sg[0].dSigB1[0]=-bndtmp1*dis_ij[0];
                 bt_sg[0].dSigB1[1]=-bndtmp1*dis_ij[1];
                 bt_sg[0].dSigB1[2]=-bndtmp1*dis_ij[2];
               }
               for(pp=0;pp<3;pp++) {
                 bt_sg[0].dAA[pp]=0.0;
                 bt_sg[0].dBB[pp]=0.0;
                 bt_sg[0].dCC[pp]=0.0;
                 bt_sg[0].dDD[pp]=0.0;
                 bt_sg[0].dEE[pp]=0.0;
                 bt_sg[0].dEE1[pp]=0.0;
                 bt_sg[0].dFF[pp]=0.0;
                 bt_sg[0].dAAC[pp]=0.0;
                 bt_sg[0].dBBC[pp]=0.0;
                 bt_sg[0].dCCC[pp]=0.0;
                 bt_sg[0].dDDC[pp]=0.0;
                 bt_sg[0].dEEC[pp]=0.0;
                 bt_sg[0].dFFC[pp]=0.0;
                 bt_sg[0].dGGC[pp]=0.0;
                 bt_sg[0].dUT[pp]=0.0;
                 bt_sg[0].dSigB1[pp]=0.0;
                 bt_sg[0].dSigB[pp]=0.0;
               }
               bt_sg[0].i=i;
               bt_sg[0].j=j;
               bt_sg[0].temp=temp_ij;
               nb_t++;
               if(nb_t>nb_sg) {
                 new_n_tot=nb_sg+maxneigh;
                 grow_sigma(nb_sg,new_n_tot);
                 nb_sg=new_n_tot;
               }
             }
             ps=sigB1[n]*rdBO+1.0;
             ks=(int)ps;
             if(nBOt-1<ks)
               ks=nBOt-1;
             ps=ps-ks;
             if(ps>1.0)
               ps=1.0;
             dsigB1=((FsigBO3[iij][ks-1]*ps+FsigBO2[iij][ks-1])*ps
                 +FsigBO1[iij][ks-1])*ps+FsigBO[iij][ks-1];
             dsigB2=(FsigBO6[iij][ks-1]*ps+FsigBO5[iij][ks-1])*ps+FsigBO4[iij][ks-1];
             part0=(FF+0.5*AAC+small5);
             part1=(sigma_f[iij]-0.5)*sigma_k[iij];
             part2=1.0-part1*EE1/part0;
             part3=dsigB1*part1/part0;
             part4=part3/part0*EE1;
 
 // sigB is the final expression for (a) Eq. 6 and (b) Eq. 11
 
             sigB[n]=dsigB1*part2;
             pp1=2.0*betaS_ij;
             for(m=0;m<nb_t;m++) {
               if((bt_sg[m].i>-1)&&(bt_sg[m].j>-1)) {
                 temp_kk=bt_sg[m].temp;
                 bt_i=bt_sg[m].i;
                 bt_j=bt_sg[m].j;
                 xtmp[0]=x[bt_j][0]-x[bt_i][0];
                 xtmp[1]=x[bt_j][1]-x[bt_i][1];
                 xtmp[2]=x[bt_j][2]-x[bt_i][2];
                 for(pp=0;pp<3;pp++) {
                   bt_sg[m].dSigB[pp]=dsigB2*part2*bt_sg[m].dSigB1[pp]
                       -part3*bt_sg[m].dEE1[pp]
                       +part4*(bt_sg[m].dFF[pp]
                       +0.5*bt_sg[m].dAAC[pp]);
                 }
                 for(pp=0;pp<3;pp++) {
                   ftmp[pp]=pp1*bt_sg[m].dSigB[pp];
                   f[bt_i][pp]-=ftmp[pp];
                   f[bt_j][pp]+=ftmp[pp];
                 }
                 if(evflag) {
                   ev_tally_xyz(bt_i,bt_j,nlocal,newton_pair,0.0,0.0,ftmp[0],ftmp[1]
                       ,ftmp[2],xtmp[0],xtmp[1],xtmp[2]);
                 }
               }
             }
           }
           n++;
         }
       }
     }
   }
   destroy_sigma();
 }
 
 /* ---------------------------------------------------------------------- */
 
 /*  The formulation differs slightly to avoid negative square roots
     in the calculation of Theta_pi,ij of (a) Eq. 36 and (b) Eq. 18
     see (d) */
 
 void PairBOP::sigmaBo_noa_otf()
 {
   int nb_t,new_n_tot;
   int n,i,j,k,kp,m,pp;
   int itmp,jtmp,ktmp,ltmp,mtmp;
   tagint i_tag,j_tag;
   int kp1,kp2,kp1type;
   int iij,iik,ijk,ikkp,ji,iikp,ijkp;
   int nkp;
   int nk0;
   int jNeik,kNeii,kNeij;
   int new1,new2,nlocal;
   int inum,*ilist,*iilist,*jlist,*klist;
   int **firstneigh,*numneigh;
   int temp_ij,temp_ik,temp_jkp,temp_kk,temp_jk;
   int temp_ji,temp_kkp;
   int nb_ij,nb_ik;
   int nb_jk,nb_jkp,nb_kkp;
   int nsearch;
   int sig_flag,setting,ncmp,ks;
   int itype,jtype,ktype,kptype;
   int bt_i,bt_j;
   int same_ikp,same_jkp,same_kpk;
   double AA,BB,CC,DD,EE1,FF;
   double AAC,BBC,CCC,DDC,EEC;
   double UT,bndtmp;
   double amean,gmean0,gmean1,gmean2,ps;
   double gfactor1,gprime1,gsqprime;
   double gfactorsq,gfactor2,gprime2;
   double gfactorsq2;
   double gfactor3,gprime3,gfactor,rfactor;
   double rfactorrt,rfactor1rt,rfactor1;
   double rcm1,rcm2,gcm1,gcm2,gcm3;
   double agpdpr1,app1;
   double dsigB1,dsigB2;
   double part0,part1,part2,part3,part4;
   double psign,bndtmp0,pp1;
   double bndtmp1,bndtmp2;
   double dis_ij[3],rsq_ij,r_ij;
   double betaS_ij,dBetaS_ij;
   double dis_ik[3],rsq_ik,r_ik;
   double betaS_ik,dBetaS_ik;
   double dis_ikp[3],rsq_ikp,r_ikp;
   double betaS_ikp;
   double dis_jk[3],rsq_jk,r_jk;
   double betaS_jk,dBetaS_jk;
   double dis_jkp[3],rsq_jkp,r_jkp;
   double betaS_jkp,dBetaS_jkp;
   double dis_kkp[3],rsq_kkp,r_kkp;
   double betaS_kkp;
   double cosAng_jik,dcA_jik[3][2];
   double cosAng_jikp;
   double cosAng_kikp;
   double cosAng_ijk,dcA_ijk[3][2];
   double cosAng_ijkp;
   double cosAng_kjkp;
   double cosAng_ikj,dcA_ikj[3][2];
   double cosAng_ikkp;
   double cosAng_jkkp;
 
 
   double ftmp[3],xtmp[3];
   double **x = atom->x;
   double **f = atom->f;
   tagint *tag = atom->tag;
   int newton_pair = force->newton_pair;
   int *type = atom->type;
 
   nlocal = atom->nlocal;
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   n=0;
   if(nb_sg==0) {
     nb_sg=4;
   }
   if(allocate_sigma) {
     destroy_sigma();
   }
     create_sigma(nb_sg);
   for(itmp=0;itmp<inum;itmp++) {
 
     i = ilist[itmp];
     i_tag=tag[i];
     itype = map[type[i]]+1;
 
 //j is loop over all neighbors of i
 
     for(jtmp=0;jtmp<numneigh[i];jtmp++) {
       for(m=0;m<nb_sg;m++) {
         for(pp=0;pp<3;pp++) {
           bt_sg[m].dAA[pp]=0.0;
           bt_sg[m].dBB[pp]=0.0;
           bt_sg[m].dEE1[pp]=0.0;
           bt_sg[m].dFF[pp]=0.0;
           bt_sg[m].dAAC[pp]=0.0;
           bt_sg[m].dSigB1[pp]=0.0;
           bt_sg[m].dSigB[pp]=0.0;
         }
         bt_sg[m].i=-1;
         bt_sg[m].j=-1;
       }
       nb_t=0;
       iilist=firstneigh[i];
       temp_ij=BOP_index[i]+jtmp;
       j=iilist[jtmp];
       jlist=firstneigh[j];
       j_tag=tag[j];
       jtype = map[type[j]]+1;
       nb_ij=nb_t;
       nb_t++;
       if(nb_t>nb_sg) {
         new_n_tot=nb_sg+maxneigh;
         grow_sigma(nb_sg,new_n_tot);
         nb_sg=new_n_tot;
       }
       bt_sg[nb_ij].temp=temp_ij;
       bt_sg[nb_ij].i=i;
       bt_sg[nb_ij].j=j;
       if(j_tag>=i_tag) {
         if(itype==jtype)
           iij=itype-1;
         else if(itype<jtype)
           iij=itype*bop_types-itype*(itype+1)/2+jtype-1;
         else
           iij=jtype*bop_types-jtype*(jtype+1)/2+itype-1;
         for(ji=0;ji<numneigh[j];ji++) {
           temp_ji=BOP_index[j]+ji;
           if(x[jlist[ji]][0]==x[i][0]) {
             if(x[jlist[ji]][1]==x[i][1]) {
               if(x[jlist[ji]][2]==x[i][2]) {
                 break;
               }
             }
           }
         }
         dis_ij[0]=x[j][0]-x[i][0];
         dis_ij[1]=x[j][1]-x[i][1];
         dis_ij[2]=x[j][2]-x[i][2];
         rsq_ij=dis_ij[0]*dis_ij[0]
             +dis_ij[1]*dis_ij[1]
             +dis_ij[2]*dis_ij[2];
         r_ij=sqrt(rsq_ij);
 
         if(r_ij<rcut[iij]) {
 
           ps=r_ij*rdr[iij]+1.0;
           ks=(int)ps;
           if(nr-1<ks)
             ks=nr-1;
           ps=ps-ks;
           if(ps>1.0)
             ps=1.0;
           betaS_ij=((pBetaS3[iij][ks-1]*ps+pBetaS2[iij][ks-1])*ps
               +pBetaS1[iij][ks-1])*ps+pBetaS[iij][ks-1];
           dBetaS_ij=(pBetaS6[iij][ks-1]*ps+pBetaS5[iij][ks-1])*ps
               +pBetaS4[iij][ks-1];
           nSigBk[n]=0;
 
 //AA-EE1 are the components making up Eq. 30 (a)
 
           AA=0.0;
           BB=0.0;
           CC=0.0;
           DD=0.0;
           EE1=0.0;
 
 //FF is the Beta_sigma^2 term
 
           FF=betaS_ij*betaS_ij;
 
 //agpdpr1 is derivative of FF w.r.t. r_ij
 
           agpdpr1=2.0*betaS_ij*dBetaS_ij/r_ij;
 
 //dXX derivatives are taken with respect to all pairs contributing to the energy
 //nb_ij is derivative w.r.t. ij pair
 
           bt_sg[nb_ij].dFF[0]=agpdpr1*dis_ij[0];
           bt_sg[nb_ij].dFF[1]=agpdpr1*dis_ij[1];
           bt_sg[nb_ij].dFF[2]=agpdpr1*dis_ij[2];
 
 //k is loop over all neighbors of i again with j neighbor of i
 
           for(ktmp=0;ktmp<numneigh[i];ktmp++) {
             temp_ik=BOP_index[i]+ktmp;
             if(ktmp!=jtmp) {
               k=iilist[ktmp];
               klist=firstneigh[k];
               ktype = map[type[k]]+1;
               if(itype==ktype)
                 iik=itype-1;
               else if(itype<ktype)
                 iik=itype*bop_types-itype*(itype+1)/2+ktype-1;
               else
                 iik=ktype*bop_types-ktype*(ktype+1)/2+itype-1;
 
 //find neighbor of k that is equal to i
 
               for(kNeii=0;kNeii<numneigh[k];kNeii++) {
                 if(x[klist[kNeii]][0]==x[i][0]) {
                   if(x[klist[kNeii]][1]==x[i][1]) {
                     if(x[klist[kNeii]][2]==x[i][2]) {
                       break;
                     }
                   }
                 }
               }
               dis_ik[0]=x[k][0]-x[i][0];
               dis_ik[1]=x[k][1]-x[i][1];
               dis_ik[2]=x[k][2]-x[i][2];
               rsq_ik=dis_ik[0]*dis_ik[0]
                   +dis_ik[1]*dis_ik[1]
                   +dis_ik[2]*dis_ik[2];
               r_ik=sqrt(rsq_ik);
               if(r_ik<=rcut[iik]) {
                 ps=r_ik*rdr[iik]+1.0;
                 ks=(int)ps;
                 if(nr-1<ks)
                   ks=nr-1;
                 ps=ps-ks;
                 if(ps>1.0)
                   ps=1.0;
                 betaS_ik=((pBetaS3[iik][ks-1]*ps+pBetaS2[iik][ks-1])*ps
                     +pBetaS1[iik][ks-1])*ps+pBetaS[iik][ks-1];
                 dBetaS_ik=(pBetaS6[iik][ks-1]*ps+pBetaS5[iik][ks-1])*ps
                     +pBetaS4[iik][ks-1];
 
 //find neighbor of i that is equal to k
 
                 for(jNeik=0;jNeik<numneigh[j];jNeik++) {
                   temp_jk=BOP_index[j]+jNeik;
                   if(x[jlist[jNeik]][0]==x[k][0]) {
                     if(x[jlist[jNeik]][1]==x[k][1]) {
                       if(x[jlist[jNeik]][2]==x[k][2]) {
                         break;
                       }
                     }
                   }
                 }
 
 //find neighbor of k that is equal to j
 
                 for(kNeij=0;kNeij<numneigh[k];kNeij++) {
                   if(x[klist[kNeij]][0]==x[j][0]) {
                     if(x[klist[kNeij]][1]==x[j][1]) {
                       if(x[klist[kNeij]][2]==x[j][2]) {
                         break;
                       }
                     }
                   }
                 }
                 dis_jk[0]=x[k][0]-x[j][0];
                 dis_jk[1]=x[k][1]-x[j][1];
                 dis_jk[2]=x[k][2]-x[j][2];
                 rsq_jk=dis_jk[0]*dis_jk[0]
                     +dis_jk[1]*dis_jk[1]
                     +dis_jk[2]*dis_jk[2];
                 r_jk=sqrt(rsq_jk);
 
                 sig_flag=0;
                 for(nsearch=0;nsearch<nSigBk[n];nsearch++) {
                   ncmp=itypeSigBk[n][nsearch];
                   if(x[ncmp][0]==x[k][0]) {
                     if(x[ncmp][1]==x[k][1]) {
                       if(x[ncmp][2]==x[k][2]) {
                         nk0=nsearch;
                         sig_flag=1;
                         break;
                       }
                     }
                   }
                 }
                 if(sig_flag==0) {
                   nSigBk[n]=nSigBk[n]+1;
                   nk0=nSigBk[n]-1;
                   itypeSigBk[n][nk0]=k;
                 }
                 nb_ik=nb_t;
                 nb_t++;
                 if(nb_t>nb_sg) {
                   new_n_tot=nb_sg+maxneigh;
                   grow_sigma(nb_sg,new_n_tot);
                   nb_sg=new_n_tot;
                 }
                 bt_sg[nb_ik].temp=temp_ik;
                 bt_sg[nb_ik].i=i;
                 bt_sg[nb_ik].j=k;
                 nb_jk=nb_t;
                 nb_t++;
                 if(nb_t>nb_sg) {
                   new_n_tot=nb_sg+maxneigh;
                   grow_sigma(nb_sg,new_n_tot);
                   nb_sg=new_n_tot;
                 }
                 bt_sg[nb_jk].temp=temp_jk;
                 bt_sg[nb_jk].i=j;
                 bt_sg[nb_jk].j=k;
                 cosAng_jik=(dis_ij[0]*dis_ik[0]+dis_ij[1]*dis_ik[1]
                     +dis_ij[2]*dis_ik[2])/(r_ij*r_ik);
                 dcA_jik[0][0]=(dis_ik[0]*r_ij*r_ik-cosAng_jik
                     *dis_ij[0]*r_ik*r_ik)/(r_ij*r_ij*r_ik*r_ik);
                 dcA_jik[1][0]=(dis_ik[1]*r_ij*r_ik-cosAng_jik
                     *dis_ij[1]*r_ik*r_ik)/(r_ij*r_ij*r_ik*r_ik);
                 dcA_jik[2][0]=(dis_ik[2]*r_ij*r_ik-cosAng_jik
                     *dis_ij[2]*r_ik*r_ik)/(r_ij*r_ij*r_ik*r_ik);
                 dcA_jik[0][1]=(dis_ij[0]*r_ij*r_ik-cosAng_jik
                     *dis_ik[0]*r_ij*r_ij)/(r_ij*r_ij*r_ik*r_ik);
                 dcA_jik[1][1]=(dis_ij[1]*r_ij*r_ik-cosAng_jik
                     *dis_ik[1]*r_ij*r_ij)/(r_ij*r_ij*r_ik*r_ik);
                 dcA_jik[2][1]=(dis_ij[2]*r_ij*r_ik-cosAng_jik
                     *dis_ik[2]*r_ij*r_ij)/(r_ij*r_ij*r_ik*r_ik);
                 gmean0=sigma_g0[jtype-1][itype-1][ktype-1];
                 gmean1=sigma_g1[jtype-1][itype-1][ktype-1];
                 gmean2=sigma_g2[jtype-1][itype-1][ktype-1];
                 amean=cosAng_jik;
                 gfactor1=gmean0+gmean1*amean
                     +gmean2*amean*amean;
                 gfactorsq=gfactor1*gfactor1;
                 gprime1=gmean1+2.0*gmean2*amean;
                 gsqprime=2.0*gfactor1*gprime1;
 
 //AA is Eq. 34 (a) or Eq. 10 (c) for the i atom
 //1st CC is Eq. 11 (c) for i atom where j & k=neighbor of i
 
                 AA=AA+gfactorsq*betaS_ik*betaS_ik;
                 CC=CC+gfactorsq*betaS_ik*betaS_ik*betaS_ik*betaS_ik;
 
 //agpdpr1 is derivative of AA w.r.t. Beta(rik)
 //app1 is derivative of AA w.r.t. cos(theta_jik)
 
                 agpdpr1=2.0*gfactorsq*betaS_ik*dBetaS_ik/r_ik;
                 app1=betaS_ik*betaS_ik*gsqprime;
                 bt_sg[nb_ij].dAA[0]+=
                     app1*dcA_jik[0][0];
                 bt_sg[nb_ij].dAA[1]+=
                     app1*dcA_jik[1][0];
                 bt_sg[nb_ij].dAA[2]+=
                     app1*dcA_jik[2][0];
                 bt_sg[nb_ik].dAA[0]+=
                     app1*dcA_jik[0][1]
                     +agpdpr1*dis_ik[0];
                 bt_sg[nb_ik].dAA[1]+=
                     app1*dcA_jik[1][1]
                     +agpdpr1*dis_ik[1];
                 bt_sg[nb_ik].dAA[2]+=
                     app1*dcA_jik[2][1]
                     +agpdpr1*dis_ik[2];
 
 //k' is loop over neighbors all neighbors of j with k a neighbor
 //of i and j a neighbor of i and determine which k' is k
 
                 same_kpk=0;
                 for(ltmp=0;ltmp<numneigh[j];ltmp++) {
                   temp_jkp=BOP_index[j]+ltmp;
                   kp1=jlist[ltmp];
                   kp1type=map[type[kp1]]+1;
                   if(x[kp1][0]==x[k][0]) {
                     if(x[kp1][1]==x[k][1]) {
                       if(x[kp1][2]==x[k][2]) {
                         same_kpk=1;
                         break;
                       }
                     }
                   }
                 }
                 if(same_kpk){
 
 //loop over neighbors of k
 
                   for(mtmp=0;mtmp<numneigh[k];mtmp++) {
                     kp2=klist[mtmp];
                     if(x[kp2][0]==x[k][0]) {
                       if(x[kp2][1]==x[k][1]) {
                         if(x[kp2][2]==x[k][2]) {
                           break;
                         }
                       }
                     }
                   }
                   if(jtype==ktype)
                     ijk=jtype-1;
                   else if(jtype < ktype)
                     ijk=jtype*bop_types-jtype*(jtype+1)/2+ktype-1;
                   else
                     ijk=ktype*bop_types-ktype*(ktype+1)/2+jtype-1;
                   if(jtype==kp1type)
                     ijkp=jtype-1;
                   else if(jtype<kp1type)
                     ijkp=jtype*bop_types-jtype*(jtype+1)/2+kp1type-1;
                   else
                     ijkp=kp1type*bop_types-kp1type*(kp1type+1)/2+jtype-1;
 
                   dis_jkp[0]=x[kp1][0]-x[j][0];
                   dis_jkp[1]=x[kp1][1]-x[j][1];
                   dis_jkp[2]=x[kp1][2]-x[j][2];
                   rsq_jkp=dis_jkp[0]*dis_jkp[0]
                       +dis_jkp[1]*dis_jkp[1]
                       +dis_jkp[2]*dis_jkp[2];
                   r_jkp=sqrt(rsq_jkp);
                   if(r_jkp<=rcut[ijkp]) {
                     ps=r_jkp*rdr[ijkp]+1.0;
                     ks=(int)ps;
                     if(nr-1<ks)
                       ks=nr-1;
                     ps=ps-ks;
                     if(ps>1.0)
                       ps=1.0;
                     betaS_jkp=((pBetaS3[ijkp][ks-1]*ps+pBetaS2[ijkp][ks-1])*ps
                         +pBetaS1[ijkp][ks-1])*ps+pBetaS[ijkp][ks-1];
                     dBetaS_jkp=(pBetaS6[ijkp][ks-1]*ps+pBetaS5[ijkp][ks-1])*ps
                         +pBetaS4[ijkp][ks-1];
                     cosAng_ijk=(-dis_ij[0]*dis_jk[0]-dis_ij[1]*dis_jk[1]
                         -dis_ij[2]*dis_jk[2])/(r_ij*r_jk);
                     dcA_ijk[0][0]=(dis_jk[0]*r_ij*r_jk-cosAng_ijk
                         *-dis_ij[0]*r_jk*r_jk)/(r_ij*r_ij*r_jk*r_jk);
                     dcA_ijk[1][0]=(dis_jk[1]*r_ij*r_jk-cosAng_ijk
                         *-dis_ij[1]*r_jk*r_jk)/(r_ij*r_ij*r_jk*r_jk);
                     dcA_ijk[2][0]=(dis_jk[2]*r_ij*r_jk-cosAng_ijk
                         *-dis_ij[2]*r_jk*r_jk)/(r_ij*r_ij*r_jk*r_jk);
                     dcA_ijk[0][1]=(-dis_ij[0]*r_ij*r_jk-cosAng_ijk
                         *dis_jk[0]*r_ij*r_ij)/(r_ij*r_ij*r_jk*r_jk);
                     dcA_ijk[1][1]=(-dis_ij[1]*r_ij*r_jk-cosAng_ijk
                         *dis_jk[1]*r_ij*r_ij)/(r_ij*r_ij*r_jk*r_jk);
                     dcA_ijk[2][1]=(-dis_ij[2]*r_ij*r_jk-cosAng_ijk
                         *dis_jk[2]*r_ij*r_ij)/(r_ij*r_ij*r_jk*r_jk);
                     gmean0=sigma_g0[itype-1][jtype-1][ktype-1];
                     gmean1=sigma_g1[itype-1][jtype-1][ktype-1];
                     gmean2=sigma_g2[itype-1][jtype-1][ktype-1];
                     amean=cosAng_ijk;
                     gfactor2=gmean0+gmean1*amean
                         +gmean2*amean*amean;
                     gprime2=gmean1+2.0*gmean2*amean;
                     gmean0=sigma_g0[itype-1][ktype-1][jtype-1];
                     gmean1=sigma_g1[itype-1][ktype-1][jtype-1];
                     gmean2=sigma_g2[itype-1][ktype-1][jtype-1];
                     cosAng_ikj=(dis_ik[0]*dis_jk[0]+dis_ik[1]*dis_jk[1]
                         +dis_ik[2]*dis_jk[2])/(r_ik*r_jk);
                     dcA_ikj[0][0]=(-dis_jk[0]*r_ik*r_jk-cosAng_ikj
                         *-dis_ik[0]*r_jk*r_jk)/(r_ik*r_ik*r_jk*r_jk);
                     dcA_ikj[1][0]=(-dis_jk[1]*r_ik*r_jk-cosAng_ikj
                         *-dis_ik[1]*r_jk*r_jk)/(r_ik*r_ik*r_jk*r_jk);
                     dcA_ikj[2][0]=(-dis_jk[2]*r_ik*r_jk-cosAng_ikj
                         *-dis_ik[2]*r_jk*r_jk)/(r_ik*r_ik*r_jk*r_jk);
                     dcA_ikj[0][1]=(-dis_ik[0]*r_ik*r_jk-cosAng_ikj
                         *-dis_jk[0]*r_ik*r_ik)/(r_ik*r_ik*r_jk*r_jk);
                     dcA_ikj[1][1]=(-dis_ik[1]*r_ik*r_jk-cosAng_ikj
                         *-dis_jk[1]*r_ik*r_ik)/(r_ik*r_ik*r_jk*r_jk);
                     dcA_ikj[2][1]=(-dis_ik[2]*r_ik*r_jk-cosAng_ikj
                         *-dis_jk[2]*r_ik*r_ik)/(r_ik*r_ik*r_jk*r_jk);
                     amean=cosAng_ikj;
                     gfactor3=gmean0+gmean1*amean
                         +gmean2*amean*amean;
                     gprime3=gmean1+2.0*gmean2*amean;
                     gfactor=gfactor1*gfactor2*gfactor3;
                     rfactor=betaS_ik*betaS_jkp;
 
 //EE1 is (b) Eq. 12
 
                     EE1=EE1+gfactor*rfactor;
 
 //rcm1 is derivative of EE1 w.r.t Beta(r_ik)
 //rcm2 is derivative of EE1 w.r.t Beta(r_jk')
 //gcm1 is derivative of EE1 w.r.t cos(theta_jik)
 //gcm2 is derivative of EE1 w.r.t cos(theta_ijk)
 //gcm3 is derivative of EE1 w.r.t cos(theta_ikj)
 
                     rcm1=gfactor*betaS_jkp*dBetaS_ik/r_ik;
                     rcm2=gfactor*betaS_ik*dBetaS_jkp/r_jkp;
                     gcm1=rfactor*gprime1*gfactor2*gfactor3;
                     gcm2=rfactor*gfactor1*gprime2*gfactor3;
                     gcm3=rfactor*gfactor1*gfactor2*gprime3;
                     bt_sg[nb_ij].dEE1[0]+=
                         gcm1*dcA_jik[0][0]
                         -gcm2*dcA_ijk[0][0];
                     bt_sg[nb_ij].dEE1[1]+=
                         gcm1*dcA_jik[1][0]
                         -gcm2*dcA_ijk[1][0];
                     bt_sg[nb_ij].dEE1[2]+=
                         gcm1*dcA_jik[2][0]
                         -gcm2*dcA_ijk[2][0];
                     bt_sg[nb_ik].dEE1[0]+=
                         gcm1*dcA_jik[0][1]
                         +rcm1*dis_ik[0]
                         -gcm3*dcA_ikj[0][0];
                     bt_sg[nb_ik].dEE1[1]+=
                         gcm1*dcA_jik[1][1]
                         +rcm1*dis_ik[1]
                         -gcm3*dcA_ikj[1][0];
                     bt_sg[nb_ik].dEE1[2]+=
                         gcm1*dcA_jik[2][1]
                         +rcm1*dis_ik[2]
                         -gcm3*dcA_ikj[2][0];
                     bt_sg[nb_jk].dEE1[0]+=
                         gcm2*dcA_ijk[0][1]
                         +rcm2*dis_jkp[0]
                         -gcm3*dcA_ikj[0][1];
                     bt_sg[nb_jk].dEE1[1]+=
                         gcm2*dcA_ijk[1][1]
                         +rcm2*dis_jkp[1]
                         -gcm3*dcA_ikj[1][1];
                     bt_sg[nb_jk].dEE1[2]+=
                         gcm2*dcA_ijk[2][1]
                         +rcm2*dis_jkp[2]
                         -gcm3*dcA_ikj[2][1];
                   }
                 }
 
 // k and k' and j are all different neighbors of i
 
                 for(ltmp=0;ltmp<ktmp;ltmp++) {
                   if(ltmp!=jtmp) {
                     kp=iilist[ltmp];;
                     kptype = map[type[kp]]+1;
                     if(itype==kptype)
                       iikp=itype-1;
                     else if(itype<kptype)
                       iikp=itype*bop_types-itype*(itype+1)/2+kptype-1;
                     else
                       iikp=kptype*bop_types-kptype*(kptype+1)/2+itype-1;
                     for(nsearch=0;nsearch<nSigBk[n];nsearch++) {
                       ncmp=itypeSigBk[n][nsearch];
                       if(x[ncmp][0]==x[kp][0]) {
                         if(x[ncmp][1]==x[kp][1]) {
                           if(x[ncmp][2]==x[kp][2]) {
                             break;
                           }
                         }
                       }
                     }
                     dis_ikp[0]=x[kp][0]-x[i][0];
                     dis_ikp[1]=x[kp][1]-x[i][1];
                     dis_ikp[2]=x[kp][2]-x[i][2];
                     rsq_ikp=dis_ikp[0]*dis_ikp[0]
                         +dis_ikp[1]*dis_ikp[1]
                         +dis_ikp[2]*dis_ikp[2];
                     r_ikp=sqrt(rsq_ikp);
                     if(r_ikp<=rcut[iikp]) {
                       ps=r_ikp*rdr[iikp]+1.0;
                       ks=(int)ps;
                       if(nr-1<ks)
                         ks=nr-1;
                       ps=ps-ks;
                       if(ps>1.0)
                         ps=1.0;
                       betaS_ikp=((pBetaS3[iikp][ks-1]*ps+pBetaS2[iikp][ks-1])*ps
                           +pBetaS1[iikp][ks-1])*ps+pBetaS[iikp][ks-1];
                       gmean0=sigma_g0[jtype-1][itype-1][kptype-1];
                       gmean1=sigma_g1[jtype-1][itype-1][kptype-1];
                       gmean2=sigma_g2[jtype-1][itype-1][kptype-1];
                       cosAng_jikp=(dis_ij[0]*dis_ikp[0]+dis_ij[1]*dis_ikp[1]
                           +dis_ij[2]*dis_ikp[2])/(r_ij*r_ikp);
                       cosAng_kikp=(dis_ik[0]*dis_ikp[0]+dis_ik[1]*dis_ikp[1]
                           +dis_ik[2]*dis_ikp[2])/(r_ik*r_ikp);
                       amean=cosAng_jikp;
                       gfactor2=gmean0+gmean1*amean
                           +gmean2*amean*amean;
                       gprime2=gmean1+2.0*gmean2*amean;
                       gmean0=sigma_g0[ktype-1][itype-1][kptype-1];
                       gmean1=sigma_g1[ktype-1][itype-1][kptype-1];
                       gmean2=sigma_g2[ktype-1][itype-1][kptype-1];
                       amean=cosAng_kikp;
                       gfactor3=gmean0+gmean1*amean
                           +gmean2*amean*amean;
                       gprime3=gmean1+2.0*gmean2*amean;
                       gfactor=gfactor1*gfactor2*gfactor3;
                       rfactorrt=betaS_ik*betaS_ikp;
                       rfactor=rfactorrt*rfactorrt;
 
 //2nd CC is second term of Eq. 11 (c) for i atom where j , k & k' =neighbor of i
 
                       CC=CC+2.0*gfactor*rfactor;
                     }
                   }
                 }
 
 // j and k are different neighbors of i and k' is a neighbor k not equal to i
 
                 for(ltmp=0;ltmp<numneigh[k];ltmp++) {
                   temp_kkp=BOP_index[k]+ltmp;
                   kp=klist[ltmp];;
                   kptype = map[type[kp]]+1;
                   same_ikp=0;
                   same_jkp=0;
                   if(x[i][0]==x[kp][0]) {
                     if(x[i][1]==x[kp][1]) {
                       if(x[i][2]==x[kp][2]) {
                         same_ikp=1;
                       }
                     }
                   }
                   if(x[j][0]==x[kp][0]) {
                     if(x[j][1]==x[kp][1]) {
                       if(x[j][2]==x[kp][2]) {
                         same_jkp=1;
                       }
                     }
                   }
                   if(!same_ikp&&!same_jkp) {
                     if(ktype==kptype)
                       ikkp=ktype-1;
                     else if(ktype<kptype)
                       ikkp=ktype*bop_types-ktype*(ktype+1)/2+kptype-1;
                     else
                       ikkp=kptype*bop_types-kptype*(kptype+1)/2+ktype-1;
                     dis_kkp[0]=x[kp][0]-x[k][0];
                     dis_kkp[1]=x[kp][1]-x[k][1];
                     dis_kkp[2]=x[kp][2]-x[k][2];
                     rsq_kkp=dis_kkp[0]*dis_kkp[0]
                         +dis_kkp[1]*dis_kkp[1]
                         +dis_kkp[2]*dis_kkp[2];
                     r_kkp=sqrt(rsq_kkp);
                     if(r_kkp<=rcut[ikkp]) {
                       ps=r_kkp*rdr[ikkp]+1.0;
                       ks=(int)ps;
                       if(nr-1<ks)
                         ks=nr-1;
                       ps=ps-ks;
                       if(ps>1.0)
                         ps=1.0;
                       betaS_kkp=((pBetaS3[ikkp][ks-1]*ps+pBetaS2[ikkp][ks-1])*ps
                           +pBetaS1[ikkp][ks-1])*ps+pBetaS[ikkp][ks-1];
                       sig_flag=0;
                       for(nsearch=0;nsearch<nSigBk[n];nsearch++) {
                         ncmp=itypeSigBk[n][nsearch];
                         if(x[ncmp][0]==x[kp][0]) {
                           if(x[ncmp][1]==x[kp][1]) {
                             if(x[ncmp][2]==x[kp][2]) {
                               sig_flag=1;
                               nkp=nsearch;
                               break;
                             }
                           }
                         }
                       }
                       if(sig_flag==0) {
                         nSigBk[n]=nSigBk[n]+1;
                         nkp=nSigBk[n]-1;
                         itypeSigBk[n][nkp]=kp;
                       }
                       cosAng_ikkp=(-dis_ik[0]*dis_kkp[0]-dis_ik[1]*dis_kkp[1]
                           -dis_ik[2]*dis_kkp[2])/(r_ik*r_kkp);
                       gmean0=sigma_g0[itype-1][ktype-1][kptype-1];
                       gmean1=sigma_g1[itype-1][ktype-1][kptype-1];
                       gmean2=sigma_g2[itype-1][ktype-1][kptype-1];
                       amean=cosAng_ikkp;
                       gfactor2=gmean0+gmean1*amean
                           +gmean2*amean*amean;
                       gprime2=gmean1+2.0*gmean2*amean;
                       gfactorsq2=gfactor2*gfactor2;
                       gfactor=gfactorsq*gfactorsq2;
                       rfactorrt=betaS_ik*betaS_kkp;
                       rfactor=rfactorrt*rfactorrt;
 
 //3rd CC is third term of Eq. 11 (c) for i atom
 //where j , k =neighbor of i & k' =neighbor of k
 
                       CC=CC+gfactor*rfactor;
                     }
                   }
                 }
               }
             }
           }
 
 //j is a neighbor of i and k is a neighbor of j not equal to i
 
           for(ktmp=0;ktmp<numneigh[j];ktmp++) {
             if(ktmp!=ji) {
               temp_jk=BOP_index[j]+ktmp;
               k=jlist[ktmp];
               klist=firstneigh[k];
               ktype=map[type[k]]+1;
               for(kNeij=0;kNeij<numneigh[k];kNeij++) {
                 if(x[klist[kNeij]][0]==x[j][0]) {
                   if(x[klist[kNeij]][1]==x[j][1]) {
                     if(x[klist[kNeij]][2]==x[j][2]) {
                       break;
                     }
                   }
                 }
               }
               if(jtype==ktype)
                 ijk=jtype-1;
               else if(jtype<ktype)
                 ijk=jtype*bop_types-jtype*(jtype+1)/2+ktype-1;
               else
                 ijk=ktype*bop_types-ktype*(ktype+1)/2+jtype-1;
               sig_flag=0;
               for(nsearch=0;nsearch<nSigBk[n];nsearch++) {
                 ncmp=itypeSigBk[n][nsearch];
                 if(x[ncmp][0]==x[k][0]) {
                   if(x[ncmp][1]==x[k][1]) {
                     if(x[ncmp][2]==x[k][2]) {
                       new1=nsearch;
                       sig_flag=1;
                       break;
                     }
                   }
                 }
               }
               if(sig_flag==0) {
                 nSigBk[n]=nSigBk[n]+1;
                 new1=nSigBk[n]-1;
                 itypeSigBk[n][new1]=k;
               }
               dis_jk[0]=x[k][0]-x[j][0];
               dis_jk[1]=x[k][1]-x[j][1];
               dis_jk[2]=x[k][2]-x[j][2];
               rsq_jk=dis_jk[0]*dis_jk[0]
                   +dis_jk[1]*dis_jk[1]
                   +dis_jk[2]*dis_jk[2];
               r_jk=sqrt(rsq_jk);
               if(r_jk<=rcut[ijk]) {
                 ps=r_jk*rdr[ijk]+1.0;
                 ks=(int)ps;
                 if(nr-1<ks)
                   ks=nr-1;
                 ps=ps-ks;
                 if(ps>1.0)
                   ps=1.0;
                 betaS_jk=((pBetaS3[ijk][ks-1]*ps+pBetaS2[ijk][ks-1])*ps
                     +pBetaS1[ijk][ks-1])*ps+pBetaS[ijk][ks-1];
                 dBetaS_jk=(pBetaS6[ijk][ks-1]*ps+pBetaS5[ijk][ks-1])*ps
                     +pBetaS4[ijk][ks-1];
                 cosAng_ijk=(-dis_ij[0]*dis_jk[0]-dis_ij[1]*dis_jk[1]
                     -dis_ij[2]*dis_jk[2])/(r_ij*r_jk);
                 dcA_ijk[0][0]=(dis_jk[0]*r_ij*r_jk-cosAng_ijk
                     *-dis_ij[0]*r_jk*r_jk)/(r_ij*r_ij*r_jk*r_jk);
                 dcA_ijk[1][0]=(dis_jk[1]*r_ij*r_jk-cosAng_ijk
                     *-dis_ij[1]*r_jk*r_jk)/(r_ij*r_ij*r_jk*r_jk);
                 dcA_ijk[2][0]=(dis_jk[2]*r_ij*r_jk-cosAng_ijk
                     *-dis_ij[2]*r_jk*r_jk)/(r_ij*r_ij*r_jk*r_jk);
                 dcA_ijk[0][1]=(-dis_ij[0]*r_ij*r_jk-cosAng_ijk
                     *dis_jk[0]*r_ij*r_ij)/(r_ij*r_ij*r_jk*r_jk);
                 dcA_ijk[1][1]=(-dis_ij[1]*r_ij*r_jk-cosAng_ijk
                     *dis_jk[1]*r_ij*r_ij)/(r_ij*r_ij*r_jk*r_jk);
                 dcA_ijk[2][1]=(-dis_ij[2]*r_ij*r_jk-cosAng_ijk
                     *dis_jk[2]*r_ij*r_ij)/(r_ij*r_ij*r_jk*r_jk);
                 nb_jk=nb_t;
                 nb_t++;
                 if(nb_t>nb_sg) {
                   new_n_tot=nb_sg+maxneigh;
                   grow_sigma(nb_sg,new_n_tot);
                   nb_sg=new_n_tot;
                 }
                 bt_sg[nb_jk].temp=temp_jk;
                 bt_sg[nb_jk].i=j;
                 bt_sg[nb_jk].j=k;
                 gmean0=sigma_g0[itype-1][jtype-1][ktype-1];
                 gmean1=sigma_g1[itype-1][jtype-1][ktype-1];
                 gmean2=sigma_g2[itype-1][jtype-1][ktype-1];
                 amean=cosAng_ijk;
                 gfactor1=gmean0+gmean1*amean
                     +gmean2*amean*amean;
                 gprime1=gmean1+2.0*gmean2*amean;
                 gfactorsq=gfactor1*gfactor1;
                 gsqprime=2.0*gfactor1*gprime1;
                 rfactor1rt=betaS_jk*betaS_jk;
                 rfactor1=rfactor1rt*rfactor1rt;
 
 //BB is Eq. 34 (a) or Eq. 10 (c) for the j atom
 //1st DD is Eq. 11 (c) for j atom where i & k=neighbor of j
 
                 BB=BB+gfactorsq*rfactor1rt;
                 DD=DD+gfactorsq*rfactor1;
 
 //agpdpr1 is derivative of BB  w.r.t. Beta(r_jk)
 //app1 is derivative of BB w.r.t. cos(theta_ijk)
 
                 agpdpr1=2.0*gfactorsq*betaS_jk*dBetaS_jk/r_jk;
                 app1=rfactor1rt*gsqprime;
                 bt_sg[nb_ij].dBB[0]-=
                     app1*dcA_ijk[0][0];
                 bt_sg[nb_ij].dBB[1]-=
                     app1*dcA_ijk[1][0];
                 bt_sg[nb_ij].dBB[2]-=
                     app1*dcA_ijk[2][0];
                 bt_sg[nb_jk].dBB[0]+=
                     app1*dcA_ijk[0][1]
                     +agpdpr1*dis_jk[0];
                 bt_sg[nb_jk].dBB[1]+=
                     app1*dcA_ijk[1][1]
                     +agpdpr1*dis_jk[1];
                 bt_sg[nb_jk].dBB[2]+=
                     app1*dcA_ijk[2][1]
                     +agpdpr1*dis_jk[2];
 
 //j is a neighbor of i, k and k' prime different neighbors of j not equal to i
 
                 for(ltmp=0;ltmp<ktmp;ltmp++) {
                   if(ltmp!=ji) {
                     temp_jkp=BOP_index[j]+ltmp;
                     kp=jlist[ltmp];
                     kptype=map[type[kp]]+1;
                     if(jtype==kptype)
                       ijkp=jtype-1;
                     else if(jtype<kptype)
                       ijkp=jtype*bop_types-jtype*(jtype+1)/2+kptype-1;
                     else
                       ijkp=kptype*bop_types-kptype*(kptype+1)/2+jtype-1;
                     for(nsearch=0;nsearch<nSigBk[n];nsearch++) {
                       ncmp=itypeSigBk[n][nsearch];
                       if(x[ncmp][0]==x[kp][0]) {
                         if(x[ncmp][1]==x[kp][1]) {
                           if(x[ncmp][2]==x[kp][2]) {
                             new2=nsearch;
                             break;
                           }
                         }
                       }
                     }
                     dis_jkp[0]=x[kp][0]-x[j][0];
                     dis_jkp[1]=x[kp][1]-x[j][1];
                     dis_jkp[2]=x[kp][2]-x[j][2];
                     rsq_jkp=dis_jkp[0]*dis_jkp[0]
                         +dis_jkp[1]*dis_jkp[1]
                         +dis_jkp[2]*dis_jkp[2];
                     r_jkp=sqrt(rsq_jkp);
                     if(r_jkp<=rcut[ijkp]) {
                       ps=r_jkp*rdr[ijkp]+1.0;
                       ks=(int)ps;
                       if(nr-1<ks)
                         ks=nr-1;
                       ps=ps-ks;
                       if(ps>1.0)
                         ps=1.0;
                       betaS_jkp=((pBetaS3[ijkp][ks-1]*ps+pBetaS2[ijkp][ks-1])*ps
                         +pBetaS1[ijkp][ks-1])*ps+pBetaS[ijkp][ks-1];
                       dBetaS_jkp=(pBetaS6[ijkp][ks-1]*ps+pBetaS5[ijkp][ks-1])*ps
                         +pBetaS4[ijkp][ks-1];
                       cosAng_ijkp=(-dis_ij[0]*dis_jkp[0]-dis_ij[1]*dis_jkp[1]
                         -dis_ij[2]*dis_jkp[2])/(r_ij*r_jkp);
                       cosAng_kjkp=(dis_jk[0]*dis_jkp[0]+dis_jk[1]*dis_jkp[1]
                         +dis_jk[2]*dis_jkp[2])/(r_jk*r_jkp);
                       nb_jkp=nb_t;
                       nb_t++;
                       if(nb_t>nb_sg) {
                         new_n_tot=nb_sg+maxneigh;
                         grow_sigma(nb_sg,new_n_tot);
                         nb_sg=new_n_tot;
                       }
                       bt_sg[nb_jkp].temp=temp_jkp;
                       bt_sg[nb_jkp].i=j;
                       bt_sg[nb_jkp].j=kp;
                       gmean0=sigma_g0[itype-1][jtype-1][kptype-1];
                       gmean1=sigma_g1[itype-1][jtype-1][kptype-1];
                       gmean2=sigma_g2[itype-1][jtype-1][kptype-1];
                       amean=cosAng_ijkp;
                       gfactor2=gmean0+gmean1*amean
                           +gmean2*amean*amean;
                       gprime2=gmean1+2.0*gmean2*amean;
                       gmean0=sigma_g0[ktype-1][jtype-1][kptype-1];
                       gmean1=sigma_g1[ktype-1][jtype-1][kptype-1];
                       gmean2=sigma_g2[ktype-1][jtype-1][kptype-1];
                       amean=cosAng_kjkp;
                       gfactor3=gmean0+gmean1*amean
                           +gmean2*amean*amean;
                       gprime3=gmean1+2.0*gmean2*amean;
                       gfactor=gfactor1*gfactor2*gfactor3;
                       rfactorrt=betaS_jk*betaS_jkp;
                       rfactor=rfactorrt*rfactorrt;
 
 //2nd DD is Eq. 11 (c) for j atom where i , k & k'=neighbor of j
 
                       DD=DD+2.0*gfactor*rfactor;
                     }
                   }
                 }
 
 //j is a neighbor of i, k is a neighbor of j not equal to i and k'
 //is a neighbor of k not equal to j or i
 
                 for(ltmp=0;ltmp<numneigh[k];ltmp++) {
                   temp_kkp=BOP_index[k]+ltmp;
                   kp=klist[ltmp];
                   kptype=map[type[kp]]+1;
                   same_ikp=0;
                   same_jkp=0;
                   if(x[i][0]==x[kp][0]) {
                     if(x[i][1]==x[kp][1]) {
                       if(x[i][2]==x[kp][2]) {
                         same_ikp=1;
                       }
                     }
                   }
                   if(x[j][0]==x[kp][0]) {
                     if(x[j][1]==x[kp][1]) {
                       if(x[j][2]==x[kp][2]) {
                         same_jkp=1;
                       }
                     }
                   }
                   if(!same_ikp&&!same_jkp) {
                     if(ktype==kptype)
                       ikkp=ktype-1;
                     else if(ktype<kptype)
                       ikkp=ktype*bop_types-ktype*(ktype+1)/2+kptype-1;
                     else
                       ikkp=kptype*bop_types-kptype*(kptype+1)/2+ktype-1;
                     for(kNeij=0;kNeij<numneigh[k];kNeij++) {
                       if(x[klist[kNeij]][0]==x[j][0]) {
                         if(x[klist[kNeij]][1]==x[j][1]) {
                           if(x[klist[kNeij]][2]==x[j][2]) {
                             break;
                           }
                         }
                       }
                     }
                     sig_flag=0;
                     for(nsearch=0;nsearch<nSigBk[n];nsearch++) {
                       ncmp=itypeSigBk[n][nsearch];
                       if(x[ncmp][0]==x[kp][0]) {
                         if(x[ncmp][1]==x[kp][1]) {
                           if(x[ncmp][2]==x[kp][2]) {
                             new2=nsearch;
                             sig_flag=1;
                             break;
                           }
                         }
                       }
                     }
                     if(sig_flag==0) {
                       nSigBk[n]=nSigBk[n]+1;
                       new2=nSigBk[n]-1;
                       itypeSigBk[n][new2]=kp;
                     }
                     dis_kkp[0]=x[kp][0]-x[k][0];
                     dis_kkp[1]=x[kp][1]-x[k][1];
                     dis_kkp[2]=x[kp][2]-x[k][2];
                     rsq_kkp=dis_kkp[0]*dis_kkp[0]
                         +dis_kkp[1]*dis_kkp[1]
                         +dis_kkp[2]*dis_kkp[2];
                     r_kkp=sqrt(rsq_kkp);
                     if(r_kkp<=rcut[ikkp]) {
                       ps=r_kkp*rdr[ikkp]+1.0;
                       ks=(int)ps;
                       if(nr-1<ks)
                         ks=nr-1;
                       ps=ps-ks;
                       if(ps>1.0)
                         ps=1.0;
                       betaS_kkp=((pBetaS3[ikkp][ks-1]*ps+pBetaS2[ikkp][ks-1])*ps
                           +pBetaS1[ikkp][ks-1])*ps+pBetaS[ikkp][ks-1];
                       cosAng_jkkp=(-dis_jk[0]*dis_kkp[0]-dis_jk[1]*dis_kkp[1]
                           -dis_jk[2]*dis_kkp[2])/(r_jk*r_kkp);
                       nb_kkp=nb_t;
                       nb_t++;
                       if(nb_t>nb_sg) {
                         new_n_tot=nb_sg+maxneigh;
                         grow_sigma(nb_sg,new_n_tot);
                         nb_sg=new_n_tot;
                       }
                       bt_sg[nb_kkp].temp=temp_kkp;
                       bt_sg[nb_kkp].i=k;
                       bt_sg[nb_kkp].j=kp;
                       gmean0=sigma_g0[jtype-1][ktype-1][kptype-1];
                       gmean1=sigma_g1[jtype-1][ktype-1][kptype-1];
                       gmean2=sigma_g2[jtype-1][ktype-1][kptype-1];
                       amean=cosAng_jkkp;
                       gfactor2=gmean0+gmean1*amean
                           +gmean2*amean*amean;
                       gprime2=gmean1+2.0*gmean2*amean;
                       gfactorsq2=gfactor2*gfactor2;
                       gfactor=gfactorsq*gfactorsq2;
                       rfactorrt=betaS_jk*betaS_kkp;
                       rfactor=rfactorrt*rfactorrt;
 
 //3rd DD is Eq. 11 (c) for j atom where i & k=neighbor of j & k'=neighbor of k
 
                       DD=DD+gfactor*rfactor;
                     }
                   }
                 }
               }
             }
           }
 
           sig_flag=0;
           if(FF<=0.000001) {
             sigB[n]=0.0;
             sig_flag=1;
           }
           if(sig_flag==0) {
             if(AA<0.0)
               AA=0.0;
             if(BB<0.0)
               BB=0.0;
             if(CC<0.0)
               CC=0.0;
             if(DD<0.0)
               DD=0.0;
 
 // AA and BB are the representations of (a) Eq. 34 and (b) Eq. 9
 // for atoms i and j respectively
 
             AAC=AA+BB;
             BBC=AA*BB;
             CCC=AA*AA+BB*BB;
             DDC=CC+DD;
 
 //EEC is a modified form of (a) Eq. 33
 
             EEC=(DDC-CCC)/(AAC+2.0*small1);
             for(m=0;m<nb_t;m++) {
               if((bt_sg[m].i>-1)&&(bt_sg[m].j>-1)) {
                 bt_sg[m].dAAC[0]=bt_sg[m].dAA[0]
                     +bt_sg[m].dBB[0];
                 bt_sg[m].dAAC[1]=bt_sg[m].dAA[1]
                     +bt_sg[m].dBB[1];
                 bt_sg[m].dAAC[2]=bt_sg[m].dAA[2]
                     +bt_sg[m].dBB[2];
               }
             }
             UT=EEC*FF+BBC+small3[iij];
             UT=1.0/sqrt(UT);
 
 // bndtmp is a slightly modified form of (a) Eq. 30 and (b) Eq. 8
 
             bndtmp=(FF+sigma_delta[iij]*sigma_delta[iij])
                 +sigma_c[iij]*AAC+small4;
             psign=1.0;
             bndtmp0=1.0/sqrt(bndtmp);
             sigB1[n]=psign*betaS_ij*bndtmp0;
             bndtmp=-0.5*bndtmp0*bndtmp0*bndtmp0;
             bndtmp1=psign*bndtmp0+psign*betaS_ij
                 *bndtmp*2.0*betaS_ij;
             bndtmp1=bndtmp1*dBetaS_ij/r_ij;
             bndtmp2=psign*betaS_ij*bndtmp*sigma_c[iij];
             setting=0;
             for(m=0;m<nb_t;m++) {
               if((bt_sg[m].i>-1)&&(bt_sg[m].j>-1)) {
                 temp_kk=bt_sg[m].temp;
                 if(temp_kk==temp_ij&&setting==0) {
                   bt_sg[m].dSigB1[0]=bndtmp1*dis_ij[0]
                       +(bndtmp2*bt_sg[m].dAAC[0]);
                   bt_sg[m].dSigB1[1]=bndtmp1*dis_ij[1]
                       +(bndtmp2*bt_sg[m].dAAC[1]);
                   bt_sg[m].dSigB1[2]=bndtmp1*dis_ij[2]
                       +(bndtmp2*bt_sg[m].dAAC[2]);
                   setting=1;
                 }
                 else if(temp_kk==temp_ji&&setting==0) {
                   bt_sg[m].dSigB1[0]=-bndtmp1*dis_ij[0]
                       +(bndtmp2*bt_sg[m].dAAC[0]);
                   bt_sg[m].dSigB1[1]=-bndtmp1*dis_ij[1]
                       +(bndtmp2*bt_sg[m].dAAC[1]);
                   bt_sg[m].dSigB1[2]=-bndtmp1*dis_ij[2]
                       +(bndtmp2*bt_sg[m].dAAC[2]);
                   setting=1;
                 }
                 else {
                   bt_sg[m].dSigB1[0]=(bndtmp2*bt_sg[m].dAAC[0]);
                   bt_sg[m].dSigB1[1]=(bndtmp2*bt_sg[m].dAAC[1]);
                   bt_sg[m].dSigB1[2]=(bndtmp2*bt_sg[m].dAAC[2]);
                 }
               }
             }
 
 //This loop is to ensure there is not an error for atoms with no neighbors (deposition)
 
             if(nb_t==0) {
               if(j>i) {
                 bt_sg[0].dSigB1[0]=bndtmp1*dis_ij[0];
                 bt_sg[0].dSigB1[1]=bndtmp1*dis_ij[1];
                 bt_sg[0].dSigB1[2]=bndtmp1*dis_ij[2];
               }
               else {
                 bt_sg[0].dSigB1[0]=-bndtmp1*dis_ij[0];
                 bt_sg[0].dSigB1[1]=-bndtmp1*dis_ij[1];
                 bt_sg[0].dSigB1[2]=-bndtmp1*dis_ij[2];
               }
               for(pp=0;pp<3;pp++) {
                 bt_sg[0].dAA[pp]=0.0;
                 bt_sg[0].dBB[pp]=0.0;
                 bt_sg[0].dEE1[pp]=0.0;
                 bt_sg[0].dFF[pp]=0.0;
                 bt_sg[0].dAAC[pp]=0.0;
                 bt_sg[0].dSigB[pp]=0.0;
               }
               bt_sg[0].i=i;
               bt_sg[0].j=j;
               bt_sg[0].temp=temp_ij;
               nb_t++;
               if(nb_t>nb_sg) {
                 new_n_tot=nb_sg+maxneigh;
                 grow_sigma(nb_sg,new_n_tot);
                 nb_sg=new_n_tot;
               }
             }
             ps=sigB1[n]*rdBO+1.0;
             ks=(int)ps;
             if(nBOt-1<ks)
               ks=nBOt-1;
             ps=ps-ks;
             if(ps>1.0)
               ps=1.0;
             dsigB1=((FsigBO3[iij][ks-1]*ps+FsigBO2[iij][ks-1])*ps
                 +FsigBO1[iij][ks-1])*ps+FsigBO[iij][ks-1];
             dsigB2=(FsigBO6[iij][ks-1]*ps+FsigBO5[iij][ks-1])*ps+FsigBO4[iij][ks-1];
             part0=(FF+0.5*AAC+small5);
             part1=(sigma_f[iij]-0.5)*sigma_k[iij];
             part2=1.0-part1*EE1/part0;
             part3=dsigB1*part1/part0;
             part4=part3/part0*EE1;
 
 // sigB is the final expression for (a) Eq. 6 and (b) Eq. 11
 
             sigB[n]=dsigB1*part2;
             pp1=2.0*betaS_ij;
             for(m=0;m<nb_t;m++) {
               if((bt_sg[m].i>-1)&&(bt_sg[m].j>-1)) {
                 temp_kk=bt_sg[m].temp;
                 bt_i=bt_sg[m].i;
                 bt_j=bt_sg[m].j;
                 xtmp[0]=x[bt_j][0]-x[bt_i][0];
                 xtmp[1]=x[bt_j][1]-x[bt_i][1];
                 xtmp[2]=x[bt_j][2]-x[bt_i][2];
                 for(pp=0;pp<3;pp++) {
                   bt_sg[m].dSigB[pp]=dsigB2*part2*bt_sg[m].dSigB1[pp]
                       -part3*bt_sg[m].dEE1[pp]
                       +part4*(bt_sg[m].dFF[pp]
                       +0.5*bt_sg[m].dAAC[pp]);
                 }
                 for(pp=0;pp<3;pp++) {
                   ftmp[pp]=pp1*bt_sg[m].dSigB[pp];
                   f[bt_i][pp]-=ftmp[pp];
                   f[bt_j][pp]+=ftmp[pp];
                 }
                 if(evflag) {
                   ev_tally_xyz(bt_i,bt_j,nlocal,newton_pair,0.0,0.0,ftmp[0],ftmp[1]
                       ,ftmp[2],xtmp[0],xtmp[1],xtmp[2]);
                 }
               }
             }
           }
           n++;
         }
       }
     }
   }
   destroy_sigma();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBOP::PiBo()
 {
   int new_n_tot;
   int i,j,k,kp,m,n,pp,nb_t;
   int iij,ji,ki;
   int nsearch,ncmp;
   tagint i_tag,j_tag;
   int njik,ngj,ngk,nglj,ngl,ngi;
   int nkjkp,nijkp,ngli,nkikp,njikp;
   int itmp,ltmp,jtmp,ktmp;
   int nlocal,pi_flag;
   int inum,*ilist,*iilist,*jlist;
   int **firstneigh,*numneigh;
   int itype,jtype;
   int temp_ij,temp_ik,temp_ikp;
   int temp_jk,temp_jkp;
   int ang_jikp,ang_kikp,ang_ijk;
   int ang_ijkp,ang_kjkp,ang_jik;
   int nb_ij,nb_ik,nb_jk,nb_ikp,nb_jkp;
   int bt_ij,bt_i,bt_j;
   double AA,BB,CC;
   double cosSq,sinFactor,cosFactor;
   double cosSq1,dotV,BBrt,AB1,AB2;
   double BBrtR,ABrtR1,ABrtR2;
   double angFactor,angFactor1,angFactor2;
   double angFactor3,angFactor4,angRfactor;
   double dAngR1,dAngR2,agpdpr3;
   double agpdpr1,agpdpr2,app1,app2,app3;
   double betaCapSq1,dbetaCapSq1;
   double betaCapSq2,dbetaCapSq2;
   double betaCapSum,ftmp[3];
   double dPiB1,dPiB2,dPiB3,pp2;
   double **f = atom->f;
   double **x = atom->x;
   int *type = atom->type;
   tagint *tag = atom->tag;
   int newton_pair = force->newton_pair;
 
   nlocal = atom->nlocal;
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   n=0;
 
 // Loop over all local atoms for i
 
   if(nb_pi>16) {
     nb_pi=16;
   }
   if(nb_pi==0) {
     nb_pi=(maxneigh)*(maxneigh/2);
   }
   if(allocate_pi) {
     destroy_pi();
   }
   create_pi(nb_pi);
   for(itmp=0;itmp<inum;itmp++) {
     nb_t=0;
     i = ilist[itmp];
     itype = map[type[i]]+1;
     i_tag=tag[i];
 
 // j is a loop over all neighbors of i
 
     iilist=firstneigh[i];
     for(jtmp=0;jtmp<numneigh[i];jtmp++) {
       temp_ij=BOP_index[i]+jtmp;
       if(neigh_flag[temp_ij]) {
         for(m=0;m<nb_pi;m++) {
           for(pp=0;pp<3;pp++) {
             bt_pi[m].dAA[pp]=0.0;
             bt_pi[m].dBB[pp]=0.0;
             bt_pi[m].dPiB[pp]=0.0;
           }
           bt_pi[m].i=-1;
           bt_pi[m].j=-1;
         }
         j=iilist[jtmp];
         jlist=firstneigh[j];
         jtype=map[type[j]]+1;
         j_tag=tag[j];
         nb_t=0;
         ftmp[0]=0.0;
         ftmp[1]=0.0;
         ftmp[2]=0.0;
         nb_ij=nb_t;
         nb_t++;
         if(nb_t>nb_pi) {
           new_n_tot=nb_pi+maxneigh;
           grow_pi(nb_pi,new_n_tot);
           nb_pi=new_n_tot;
         }
         bt_pi[nb_ij].i=i;
         bt_pi[nb_ij].j=j;
         bt_pi[nb_ij].temp=temp_ij;
         if(j_tag>=i_tag) {
           if(itype==jtype)
             iij=itype-1;
           else if(itype<jtype)
             iij=itype*bop_types-itype*(itype+1)/2+jtype-1;
           else
             iij=jtype*bop_types-jtype*(jtype+1)/2+itype-1;
           AA=0.0;
           BB=0.0;
           nPiBk[n]=0;
           for(ji=0;ji<numneigh[j];ji++) {
             if(x[jlist[ji]][0]==x[i][0]) {
               if(x[jlist[ji]][1]==x[i][1]) {
                 if(x[jlist[ji]][2]==x[i][2]) {
                   break;
                 }
               }
             }
           }
 
 // j and k are different neighbors of i
 
           for(ktmp=0;ktmp<numneigh[i];ktmp++) {
             if(ktmp!=jtmp) {
               temp_ik=BOP_index[i]+ktmp;
               if(neigh_flag[temp_ik]) {
                 k=iilist[ktmp];
                 if(jtmp<ktmp) {
                   njik=jtmp*(2*numneigh[i]-jtmp-1)/2+(ktmp-jtmp)-1;
                   ngj=0;
                   ngk=1;
                 }
                 else {
                   njik=ktmp*(2*numneigh[i]-ktmp-1)/2+(jtmp-ktmp)-1;
                   ngj=1;
                   ngk=0;
                 }
                 ang_jik=cos_index[i]+njik;
                 if(ang_jik>=cos_total) {
                   error->one(FLERR,"Too many atom triplets for pair bop");
                 }
                 nb_ik=nb_t;
                 nb_t++;
                 if(nb_t>nb_pi) {
                   new_n_tot=nb_pi+maxneigh;
                   grow_pi(nb_pi,new_n_tot);
                   nb_pi=new_n_tot;
                 }
                 bt_pi[nb_ik].i=i;
                 bt_pi[nb_ik].j=k;
                 bt_pi[nb_ik].temp=temp_ik;
                 cosSq=cosAng[ang_jik]*cosAng[ang_jik];
                 sinFactor=.5*(1.0-cosSq)*pi_p[itype-1]*betaS[temp_ik];
                 cosFactor=.5*(1.0+cosSq)*betaP[temp_ik];
                 betaCapSq1=pi_p[itype-1]*betaS[temp_ik]*betaS[temp_ik]-betaP[temp_ik]
                     *betaP[temp_ik];
                 dbetaCapSq1=2.0*pi_p[itype-1]*betaS[temp_ik]*dBetaS[temp_ik]
                     -2.0*betaP[temp_ik]*dBetaP[temp_ik];
 
 //AA is Eq. 37 (a) and Eq. 19 (b) or i atoms
 //1st BB is first term of Eq. 38 (a) where j and k =neighbors i
 
                 AA=AA+sinFactor*betaS[temp_ik]+cosFactor*betaP[temp_ik];
                 BB=BB+.25*(1.0-cosSq)*(1.0-cosSq)*betaCapSq1*betaCapSq1;
 
 //agpdpr1 is derivative of AA w.r.t. for atom i w.r.t. Beta(r_ik)
 //agpdpr2 is derivative of BB w.r.t. for atom i w.r.t. Beta(r_ik)
 //app1 is derivative of AA w.r.t. for atom i w.r.t. cos(theta_jik)
 //app2 is derivative of BB w.r.t. for atom i w.r.t. cos(theta_jik)
 
                 agpdpr1=(2.0*sinFactor*dBetaS[temp_ik]+2.0*cosFactor
                     *dBetaP[temp_ik])/rij[temp_ik];
                 app1=cosAng[ang_jik]*(-pi_p[itype-1]*betaS[temp_ik]*betaS[temp_ik]
                     +betaP[temp_ik]*betaP[temp_ik]);
                 app2=-(1.0-cosSq)*cosAng[ang_jik]*betaCapSq1*betaCapSq1;
                 agpdpr2=.5*(1.0-cosSq)*(1.0-cosSq)*betaCapSq1*dbetaCapSq1/rij[temp_ik];
                 itypePiBk[n][nPiBk[n]]=k;
                 bt_pi[nb_ij].dAA[0]+=
                     app1*dcAng[ang_jik][0][ngj];
                 bt_pi[nb_ij].dAA[1]+=
                     app1*dcAng[ang_jik][1][ngj];
                 bt_pi[nb_ij].dAA[2]+=
                     app1*dcAng[ang_jik][2][ngj];
                 bt_pi[nb_ij].dBB[0]+=
                     app2*dcAng[ang_jik][0][ngj];
                 bt_pi[nb_ij].dBB[1]+=
                     app2*dcAng[ang_jik][1][ngj];
                 bt_pi[nb_ij].dBB[2]+=
                     app2*dcAng[ang_jik][2][ngj];
                 bt_pi[nb_ik].dAA[0]+=
                     agpdpr1*disij[0][temp_ik]
                     +app1*dcAng[ang_jik][0][ngk];
                 bt_pi[nb_ik].dAA[1]+=
                     agpdpr1*disij[1][temp_ik]
                     +app1*dcAng[ang_jik][1][ngk];
                 bt_pi[nb_ik].dAA[2]+=
                     agpdpr1*disij[2][temp_ik]
                     +app1*dcAng[ang_jik][2][ngk];
                 bt_pi[nb_ik].dBB[0]+=
                     app2*dcAng[ang_jik][0][ngk]
                     +agpdpr2*disij[0][temp_ik];
                 bt_pi[nb_ik].dBB[1]+=
                     app2*dcAng[ang_jik][1][ngk]
                     +agpdpr2*disij[1][temp_ik];
                 bt_pi[nb_ik].dBB[2]+=
                     app2*dcAng[ang_jik][2][ngk]
                     +agpdpr2*disij[2][temp_ik];
 
 // j and k and k' are different neighbors of i
 
                 for(ltmp=0;ltmp<ktmp;ltmp++) {
                   if(ltmp!=jtmp) {
                     temp_ikp=BOP_index[i]+ltmp;
                     if(neigh_flag[temp_ikp]) {
                       kp=iilist[ltmp];
                       for(nsearch=0;nsearch<nPiBk[n];nsearch++) {
                         ncmp=itypePiBk[n][nsearch];
                         if(x[ncmp][0]==x[kp][0]) {
                           if(x[ncmp][1]==x[kp][1]) {
                             if(x[ncmp][2]==x[kp][2]) {
                               break;
                             }
                           }
                         }
                       }
                       nkikp=ltmp*(2*numneigh[i]-ltmp-1)/2+(ktmp-ltmp)-1;
                       if(jtmp<ltmp) {
                         njikp=jtmp*(2*numneigh[i]-jtmp-1)/2+(ltmp-jtmp)-1;
                         nglj=0;
                         ngl=1;
                       }
                       else {
                         njikp=ltmp*(2*numneigh[i]-ltmp-1)/2+(jtmp-ltmp)-1;
                         nglj=1;
                         ngl=0;
                       }
                       ang_jikp=cos_index[i]+njikp;
                       if(ang_jikp>=cos_total) {
                         error->one(FLERR,"Too many atom triplets for pair bop");
                       }
                       nb_ikp=nb_t;
                       nb_t++;
                       if(nb_t>nb_pi) {
                         new_n_tot=nb_pi+maxneigh;
                         grow_pi(nb_pi,new_n_tot);
                         nb_pi=new_n_tot;
                       }
                       bt_pi[nb_ikp].i=i;
                       bt_pi[nb_ikp].j=kp;
                       bt_pi[nb_ikp].temp=temp_ikp;
                       ang_kikp=cos_index[i]+nkikp;
                       if(ang_kikp>=cos_total) {
                         error->one(FLERR,"Too many atom triplets for pair bop");
                       }
                       betaCapSq2=pi_p[itype-1]*betaS[temp_ikp]*betaS[temp_ikp]
                           -betaP[temp_ikp]*betaP[temp_ikp];
                       dbetaCapSq2=2.0*pi_p[itype-1]*betaS[temp_ikp]*dBetaS[temp_ikp]
                           -2.0*betaP[temp_ikp]*dBetaP[temp_ikp];
                       cosSq1=cosAng[ang_jikp]*cosAng[ang_jikp];
                       angFactor=cosAng[ang_kikp]-cosAng[ang_jikp]*cosAng[ang_jik];
                       angFactor1=4.0*angFactor;
                       angFactor2=-angFactor1*cosAng[ang_jikp]
                           +2.0*cosAng[ang_jik]*(1.0-cosSq1);
                       angFactor3=-angFactor1*cosAng[ang_jik]
                           +2.0*cosAng[ang_jikp]*(1.0-cosSq);
                       angFactor4=2.0*angFactor*angFactor-(1.0-cosSq)*(1.0-cosSq1);
                       betaCapSum=.5*betaCapSq1*betaCapSq2;
 
 //2nd BB is third term of Eq. 38 (a) where j , k and k'=neighbors i
 
                       BB=BB+betaCapSum*angFactor4;
 
 //agpdpr1 is derivative of BB w.r.t. for atom i w.r.t. Beta(r_ik)
 //agpdpr2 is derivative of BB w.r.t. for atom i w.r.t. Beta(r_ik')
 //app1 is derivative of BB 3rd term w.r.t. cos(theta_kik')
 //app2 is derivative of BB 3rd term w.r.t. cos(theta_jik)
 //app3 is derivative of BB 3rd term w.r.t. cos(theta_jik')
 
                       app1=betaCapSum*angFactor1;
                       app2=betaCapSum*angFactor2;
                       app3=betaCapSum*angFactor3;
                       agpdpr1=.5*angFactor4*dbetaCapSq1*betaCapSq2/rij[temp_ik];
                       agpdpr2=.5*angFactor4*betaCapSq1*dbetaCapSq2/rij[temp_ikp];
 
                       bt_pi[nb_ij].dBB[0]+=
                           app2*dcAng[ang_jik][0][ngj]
                           +app3*dcAng[ang_jikp][0][nglj];
                       bt_pi[nb_ij].dBB[1]+=
                           app2*dcAng[ang_jik][1][ngj]
                           +app3*dcAng[ang_jikp][1][nglj];
                       bt_pi[nb_ij].dBB[2]+=
                           app2*dcAng[ang_jik][2][ngj]
                           +app3*dcAng[ang_jikp][2][nglj];
                       bt_pi[nb_ik].dBB[0]+=
                           agpdpr1*disij[0][temp_ik]
                           +app1*dcAng[ang_kikp][0][1]
                           +app2*dcAng[ang_jik][0][ngk];
                       bt_pi[nb_ik].dBB[1]+=
                           agpdpr1*disij[1][temp_ik]
                           +app1*dcAng[ang_kikp][1][1]
                           +app2*dcAng[ang_jik][1][ngk];
                       bt_pi[nb_ik].dBB[2]+=
                           agpdpr1*disij[2][temp_ik]
                           +app1*dcAng[ang_kikp][2][1]
                           +app2*dcAng[ang_jik][2][ngk];
                       bt_pi[nb_ikp].dBB[0]+=
                           agpdpr2*disij[0][temp_ikp]
                           +app1*dcAng[ang_kikp][0][0]
                           +app3*dcAng[ang_jikp][0][ngl];
                       bt_pi[nb_ikp].dBB[1]+=
                           agpdpr2*disij[1][temp_ikp]
                           +app1*dcAng[ang_kikp][1][0]
                           +app3*dcAng[ang_jikp][1][ngl];
                       bt_pi[nb_ikp].dBB[2]+=
                           agpdpr2*disij[2][temp_ikp]
                           +app1*dcAng[ang_kikp][2][0]
                           +app3*dcAng[ang_jikp][2][ngl];
                     }
                   }
                 }
                 nPiBk[n]=nPiBk[n]+1;
               }
             }
           }
 
 //j is a neighbor of i and k is a neighbor of j and equal to i
 
           for(ki=0;ki<numneigh[j];ki++) {
             k=jlist[ki];
             if(x[k][0]==x[i][0]) {
               if(x[k][1]==x[i][1]) {
                 if(x[k][2]==x[i][2]) {
                   break;
                 }
               }
             }
           }
 
 //j is a neighbor of i and k is a neighbor of j not equal to i
 
           for(ktmp=0;ktmp<numneigh[j];ktmp++) {
             if(ktmp!=ki) {
               temp_jk=BOP_index[j]+ktmp;
               if(neigh_flag[temp_jk]) {
                 k=jlist[ktmp];
                 pi_flag=0;
                 for(nsearch=0;nsearch<nPiBk[n];nsearch++) {
                   ncmp=itypePiBk[n][nsearch];
                   if(x[ncmp][0]==x[k][0]) {
                     if(x[ncmp][1]==x[k][1]) {
                       if(x[ncmp][2]==x[k][2]) {
                         pi_flag=1;
                         break;
                       }
                     }
                   }
                 }
                 if(pi_flag==0) {
                   itypePiBk[n][nPiBk[n]]=k;
                 }
                 if(ktmp<ki) {
                   njik=ktmp*(2*numneigh[j]-ktmp-1)/2+(ki-ktmp)-1;
                   ngi=1;
                   ngk=0;
                 }
                 else {
                   njik=ki*(2*numneigh[j]-ki-1)/2+(ktmp-ki)-1;
                   ngi=0;
                   ngk=1;
                 }
                 ang_ijk=cos_index[j]+njik;
                 if(ang_ijk>=cos_total) {
                   error->one(FLERR,"Too many atom triplets for pair bop");
                 }
                 nb_jk=nb_t;
                 nb_t++;
                 if(nb_t>nb_pi) {
                   new_n_tot=nb_pi+maxneigh;
                   grow_pi(nb_pi,new_n_tot);
                   nb_pi=new_n_tot;
                 }
                 bt_pi[nb_jk].i=j;
                 bt_pi[nb_jk].j=k;
                 bt_pi[nb_jk].temp=temp_jk;
                 cosSq=cosAng[ang_ijk]*cosAng[ang_ijk];
                 sinFactor=.5*(1.0-cosSq)*pi_p[jtype-1]*betaS[temp_jk];
                 cosFactor=.5*(1.0+cosSq)*betaP[temp_jk];
                 betaCapSq1=pi_p[jtype-1]*betaS[temp_jk]*betaS[temp_jk]
                     -betaP[temp_jk]*betaP[temp_jk];
                 dbetaCapSq1=2.0*pi_p[jtype-1]*betaS[temp_jk]*dBetaS[temp_jk]
                     -2.0*betaP[temp_jk]*dBetaP[temp_jk];
 
 //AA is Eq. 37 (a) and Eq. 19 (b) for j atoms
 //3rd BB is 2nd term of Eq. 38 (a) where i and k =neighbors j
 
                 AA=AA+sinFactor*betaS[temp_jk]+cosFactor*betaP[temp_jk];
                 BB=BB+.25*(1.0-cosSq)*(1.0-cosSq)*betaCapSq1*betaCapSq1;
 
 //agpdpr1 is derivative of AA for atom j w.r.t. Beta(r_jk)
 //agpdpr2 is derivative of BB for atom j w.r.t. Beta(r_jk)
 //app1 is derivative of AA for j atom w.r.t. cos(theta_ijk)
 //app2 is derivative of BB 2nd term w.r.t. cos(theta_ijk)
 
                 agpdpr1=(2.0*sinFactor*dBetaS[temp_jk]+2.0*cosFactor
                     *dBetaP[temp_jk])/rij[temp_jk];
                 agpdpr2=.5*(1.0-cosSq)*(1.0-cosSq)*betaCapSq1*dbetaCapSq1/rij[temp_jk];
                 app1=cosAng[ang_ijk]*(-pi_p[jtype-1]*betaS[temp_jk]*betaS[temp_jk]
                     +betaP[temp_jk]*betaP[temp_jk]);
                 app2=-(1.0-cosSq)*cosAng[ang_ijk]*betaCapSq1*betaCapSq1;
                 bt_pi[nb_ij].dAA[0]-=
                     app1*dcAng[ang_ijk][0][ngi];
                 bt_pi[nb_ij].dAA[1]-=
                     app1*dcAng[ang_ijk][1][ngi];
                 bt_pi[nb_ij].dAA[2]-=
                     app1*dcAng[ang_ijk][2][ngi];
                 bt_pi[nb_ij].dBB[0]-=
                     app2*dcAng[ang_ijk][0][ngi];
                 bt_pi[nb_ij].dBB[1]-=
                     app2*dcAng[ang_ijk][1][ngi];
                 bt_pi[nb_ij].dBB[2]-=
                     app2*dcAng[ang_ijk][2][ngi];
                 bt_pi[nb_jk].dAA[0]+=
                     agpdpr1*disij[0][temp_jk]
                     +app1*dcAng[ang_ijk][0][ngk];
                 bt_pi[nb_jk].dAA[1]+=
                     agpdpr1*disij[1][temp_jk]
                     +app1*dcAng[ang_ijk][1][ngk];
                 bt_pi[nb_jk].dAA[2]+=
                     agpdpr1*disij[2][temp_jk]
                     +app1*dcAng[ang_ijk][2][ngk];
                 bt_pi[nb_jk].dBB[0]+=
                     app2*dcAng[ang_ijk][0][ngk]
                     +agpdpr2*disij[0][temp_jk];
                 bt_pi[nb_jk].dBB[1]+=
                     app2*dcAng[ang_ijk][1][ngk]
                     +agpdpr2*disij[1][temp_jk];
                 bt_pi[nb_jk].dBB[2]+=
                     app2*dcAng[ang_ijk][2][ngk]
                     +agpdpr2*disij[2][temp_jk];
 
 //j is a neighbor of i and k and k' are different neighbors of j not equal to i
 
                 for(ltmp=0;ltmp<ktmp;ltmp++) {
                   if(ltmp!=ki) {
                     temp_jkp=BOP_index[j]+ltmp;
                     if(neigh_flag[temp_jkp]) {
                       kp=jlist[ltmp];
                       for(nsearch=0;nsearch<nPiBk[n];nsearch++) {
                         ncmp=itypePiBk[n][nsearch];
                         if(x[ncmp][0]==x[kp][0]) {
                           if(x[ncmp][1]==x[kp][1]) {
                             if(x[ncmp][2]==x[kp][2]) {
                               break;
                             }
                           }
                         }
                       }
                       nkjkp=ltmp*(2*numneigh[j]-ltmp-1)/2+(ktmp-ltmp)-1;
                       if(ki<ltmp) {
                         nijkp=ki*(2*numneigh[j]-ki-1)/2+(ltmp-ki)-1;
                         ngli=0;
                         ngl=1;
                       }
                       else {
                         nijkp=ltmp*(2*numneigh[j]-ltmp-1)/2+(ki-ltmp)-1;
                         ngli=1;
                         ngl=0;
                       }
                       ang_ijkp=cos_index[j]+nijkp;
                       if(ang_ijkp>=cos_total) {
                         error->one(FLERR,"Too many atom triplets for pair bop");
                       }
                       ang_kjkp=cos_index[j]+nkjkp;
                       if(ang_kjkp>=cos_total) {
                         error->one(FLERR,"Too many atom triplets for pair bop");
                       }
                       nb_jkp=nb_t;
                       nb_t++;
                       if(nb_t>nb_pi) {
                         new_n_tot=nb_pi+maxneigh;
                         grow_pi(nb_pi,new_n_tot);
                         nb_pi=new_n_tot;
                       }
                       bt_pi[nb_jkp].i=j;
                       bt_pi[nb_jkp].j=kp;
                       bt_pi[nb_jkp].temp=temp_jkp;
                       betaCapSq2=pi_p[jtype-1]*betaS[temp_jkp]*betaS[temp_jkp]
                           -betaP[temp_jkp]*betaP[temp_jkp];
                       dbetaCapSq2=2.0*pi_p[jtype-1]*betaS[temp_jkp]*dBetaS[temp_jkp]
                           -2.0*betaP[temp_jkp]*dBetaP[temp_jkp];
                       cosSq1=cosAng[ang_ijkp]*cosAng[ang_ijkp];
                       angFactor=cosAng[ang_kjkp]-cosAng[ang_ijkp]*cosAng[ang_ijk];
                       angFactor1=4.0*angFactor;
                       angFactor2=-angFactor1*cosAng[ang_ijkp]
                           +2.0*cosAng[ang_ijk]*(1.0-cosSq1);
                       angFactor3=-angFactor1*cosAng[ang_ijk]
                           +2.0*cosAng[ang_ijkp]*(1.0-cosSq);
                       angFactor4=2.0*angFactor*angFactor-(1.0-cosSq)*(1.0-cosSq1);
                       betaCapSum=.5*betaCapSq1*betaCapSq2;
 
 //4th BB is 4th term of Eq. 38 (a) where i , k and k' =neighbors j
 
                       BB=BB+betaCapSum*angFactor4;
 
 //app1 is derivative of BB 4th term w.r.t. cos(theta_kjk')
 //app2 is derivative of BB 4th term w.r.t. cos(theta_ijk)
 //app3 is derivative of BB 4th term w.r.t. cos(theta_ijk')
 //agpdpr1 is derivative of BB 4th term for atom j w.r.t. Beta(r_jk)
 //agpdpr2 is derivative of BB 4th term for atom j w.r.t. Beta(r_jk')
 
                       app1=betaCapSum*angFactor1;
                       app2=betaCapSum*angFactor2;
                       app3=betaCapSum*angFactor3;
                       agpdpr1=.5*angFactor4*dbetaCapSq1*betaCapSq2/rij[temp_jk];
                       agpdpr2=.5*angFactor4*betaCapSq1*dbetaCapSq2/rij[temp_jkp];
 
                       bt_pi[nb_ij].dBB[0]-=
                           app3*dcAng[ang_ijkp][0][ngli]
                           +app2*dcAng[ang_ijk][0][ngi];
                       bt_pi[nb_ij].dBB[1]-=
                           app3*dcAng[ang_ijkp][1][ngli]
                           +app2*dcAng[ang_ijk][1][ngi];
                       bt_pi[nb_ij].dBB[2]-=
                           app3*dcAng[ang_ijkp][2][ngli]
                           +app2*dcAng[ang_ijk][2][ngi];
                       bt_pi[nb_jk].dBB[0]+=
                           agpdpr1*disij[0][temp_jk]
                           +app1*dcAng[ang_kjkp][0][1]
                           +app2*dcAng[ang_ijk][0][ngk];
                       bt_pi[nb_jk].dBB[1]+=
                           agpdpr1*disij[1][temp_jk]
                           +app1*dcAng[ang_kjkp][1][1]
                           +app2*dcAng[ang_ijk][1][ngk];
                       bt_pi[nb_jk].dBB[2]+=
                           agpdpr1*disij[2][temp_jk]
                           +app1*dcAng[ang_kjkp][2][1]
                           +app2*dcAng[ang_ijk][2][ngk];
                       bt_pi[nb_jkp].dBB[0]+=
                           agpdpr2*disij[0][temp_jkp]
                           +app1*dcAng[ang_kjkp][0][0]
                           +app3*dcAng[ang_ijkp][0][ngl];
                       bt_pi[nb_jkp].dBB[1]+=
                           agpdpr2*disij[1][temp_jkp]
                           +app1*dcAng[ang_kjkp][1][0]
                           +app3*dcAng[ang_ijkp][1][ngl];
                       bt_pi[nb_jkp].dBB[2]+=
                           agpdpr2*disij[2][temp_jkp]
                           +app1*dcAng[ang_kjkp][2][0]
                           +app3*dcAng[ang_ijkp][2][ngl];
                     }
                   }
                 }
 
 //j and k' are different neighbors of i and k is a neighbor of j not equal to i
 
                 for(ltmp=0;ltmp<numneigh[i];ltmp++) {
                   if(ltmp!=jtmp) {
                     temp_ikp=BOP_index[i]+ltmp;
                     if(neigh_flag[temp_ikp]) {
                       kp=iilist[ltmp];
                       for(nsearch=0;nsearch<nPiBk[n];nsearch++) {
                         ncmp=itypePiBk[n][nsearch];
                         if(x[ncmp][0]==x[kp][0]) {
                           if(x[ncmp][1]==x[kp][1]) {
                             if(x[ncmp][2]==x[kp][2]) {
                               break;
                             }
                           }
                         }
                       }
                       if(ltmp<jtmp) {
                         njikp=ltmp*(2*numneigh[i]-ltmp-1)/2+(jtmp-ltmp)-1;
                         ngl=1;
                         nglj=0;
                       }
                       else {
                         njikp=jtmp*(2*numneigh[i]-jtmp-1)/2+(ltmp-jtmp)-1;
                         ngl=0;
                         nglj=1;
                       }
                       ang_jikp=cos_index[i]+njikp;
                       if(ang_jikp>=cos_total) {
                         error->one(FLERR,"Too many atom triplets for pair bop");
                       }
                       nb_ikp=nb_t;
                       nb_t++;
                       if(nb_t>nb_pi) {
                         new_n_tot=nb_pi+maxneigh;
                         grow_pi(nb_pi,new_n_tot);
                         nb_pi=new_n_tot;
                       }
                       bt_pi[nb_ikp].i=i;
                       bt_pi[nb_ikp].j=kp;
                       bt_pi[nb_ikp].temp=temp_ikp;
                       betaCapSq2=pi_p[itype-1]*betaS[temp_ikp]*betaS[temp_ikp]
                           -betaP[temp_ikp]*betaP[temp_ikp];
                       dbetaCapSq2=2.0*pi_p[itype-1]*betaS[temp_ikp]*dBetaS[temp_ikp]
                           -2.0*betaP[temp_ikp]*dBetaP[temp_ikp];
                       dotV=(disij[0][temp_jk]*disij[0][temp_ikp]+disij[1][temp_jk]
                           *disij[1][temp_ikp]+disij[2][temp_jk]*disij[2][temp_ikp])
                           /(rij[temp_jk]*rij[temp_ikp]);
                       cosSq1=cosAng[ang_jikp]*cosAng[ang_jikp];
                       angFactor=dotV+cosAng[ang_jikp]*cosAng[ang_ijk];
                       angRfactor=4.0*angFactor*dotV;
                       dAngR1=-angRfactor/rij[temp_jk];
                       dAngR2=-angRfactor/rij[temp_ikp];
                       angFactor1=4.0*angFactor*cosAng[ang_jikp]
                           +2.0*cosAng[ang_ijk]*(1.0-cosSq1);
                       angFactor2=4.0*angFactor*cosAng[ang_ijk]
                           +2.0*cosAng[ang_jikp]*(1.0-cosSq);
                       angFactor3=2.0*angFactor*angFactor-(1.0-cosSq)*(1.0-cosSq1);
                       betaCapSum=.5*betaCapSq1*betaCapSq2;
 
 //5th BB is 5th term of Eq. 38 (a) Eq. 21 (b) where i , k and k' =neighbors j
 
                       BB=BB+betaCapSum*angFactor3;
 
 //app1 is derivative of BB 5th term w.r.t. cos(theta_ijk)
 //app2 is derivative of BB 5th term w.r.t. cos(theta_jik')
 //agpdpr1 is derivative of BB 5th term for atom j w.r.t. Beta(r_jk)
 //agpdpr2 is derivative of BB 5th term for atom j w.r.t. Beta(r_ik')
 //agpdpr3 is derivative of BB 5th term for atom j w.r.t. dot(r_ik',r_ij)
 
                       app1=betaCapSum*angFactor1;
                       app2=betaCapSum*angFactor2;
                       agpdpr1=(.5*angFactor3*dbetaCapSq1*betaCapSq2
                           +betaCapSum*dAngR1)/rij[temp_jk];
                       agpdpr2=(.5*angFactor3*betaCapSq1*dbetaCapSq2
                           +betaCapSum*dAngR2)/rij[temp_ikp];
                       agpdpr3=4.0*betaCapSum*angFactor/(rij[temp_ikp]*rij[temp_jk]);
 
                       bt_pi[nb_ij].dBB[0]+=
                           +app2*dcAng[ang_jikp][0][ngl]
                           -app1*dcAng[ang_ijk][0][ngi];
                       bt_pi[nb_ij].dBB[1]+=
                           +app2*dcAng[ang_jikp][1][ngl]
                           -app1*dcAng[ang_ijk][1][ngi];
                       bt_pi[nb_ij].dBB[2]+=
                           +app2*dcAng[ang_jikp][2][ngl]
                           -app1*dcAng[ang_ijk][2][ngi];
                       bt_pi[nb_ikp].dBB[0]+=
                           agpdpr2*disij[0][temp_ikp]
                           +agpdpr3*disij[0][temp_jk]
                           +app2*dcAng[ang_jikp][0][nglj];
                       bt_pi[nb_ikp].dBB[1]+=
                           agpdpr2*disij[1][temp_ikp]
                           +agpdpr3*disij[1][temp_jk]
                           +app2*dcAng[ang_jikp][1][nglj];
                       bt_pi[nb_ikp].dBB[2]+=
                           agpdpr2*disij[2][temp_ikp]
                           +agpdpr3*disij[2][temp_jk]
                           +app2*dcAng[ang_jikp][2][nglj];
                       bt_pi[nb_jk].dBB[0]+=
                           agpdpr1*disij[0][temp_jk]
                           +agpdpr3*disij[0][temp_ikp]
                           +app1*dcAng[ang_ijk][0][ngk];
                       bt_pi[nb_jk].dBB[1]+=
                           agpdpr1*disij[1][temp_jk]
                           +agpdpr3*disij[1][temp_ikp]
                           +app1*dcAng[ang_ijk][1][ngk];
                       bt_pi[nb_jk].dBB[2]+=
                           agpdpr1*disij[2][temp_jk]
                           +agpdpr3*disij[2][temp_ikp]
                           +app1*dcAng[ang_ijk][2][ngk];
                     }
                   }
                 }
                 if(pi_flag==0)
                   nPiBk[n]=nPiBk[n]+1;
               }
             }
           }
           CC=betaP[temp_ij]*betaP[temp_ij]+pi_delta[iij]*pi_delta[iij];
           BBrt=sqrt(BB+small6);
           AB1=CC+pi_c[iij]*(AA+BBrt)+small7;
           AB2=CC+pi_c[iij]*(AA-BBrt+sqrt(small6))+small7;
           BBrtR=1.0/BBrt;
           ABrtR1=1.0/sqrt(AB1);
           ABrtR2=1.0/sqrt(AB2);
 
 // piB is similary formulation to (a) Eq. 36 and (b) Eq. 18
 
           piB[n]=(ABrtR1+ABrtR2)*pi_a[iij]*betaP[temp_ij];
           dPiB1=-.5*(cube(ABrtR1)+cube(ABrtR2))*pi_c[iij]*pi_a[iij]*betaP[temp_ij];
           dPiB2=.25*BBrtR*(cube(ABrtR2)-cube(ABrtR1))*pi_c[iij]*pi_a[iij]*betaP[temp_ij];
           dPiB3=((ABrtR1+ABrtR2)*pi_a[iij]-(cube(ABrtR1)+cube(ABrtR2))*pi_a[iij]
               *betaP[temp_ij]*betaP[temp_ij])*dBetaP[temp_ij]/rij[temp_ij];
           n++;
           pp2=2.0*betaP[temp_ij];
           for(m=0;m<nb_t;m++) {
             bt_ij=bt_pi[m].temp;
             bt_i=bt_pi[m].i;
             bt_j=bt_pi[m].j;
             for(pp=0;pp<3;pp++) {
               bt_pi[m].dPiB[pp]=
                   +dPiB1*bt_pi[m].dAA[pp]
                   +dPiB2*bt_pi[m].dBB[pp];
               ftmp[pp]=pp2*bt_pi[m].dPiB[pp];
               f[bt_i][pp]-=ftmp[pp];
               f[bt_j][pp]+=ftmp[pp];
 
             }
             if(evflag) {
               ev_tally_xyz(bt_i,bt_j,nlocal,newton_pair,0.0,0.0,ftmp[0],ftmp[1]
                   ,ftmp[2],disij[0][bt_ij],disij[1][bt_ij],disij[2][bt_ij]);
             }
           }
           for(pp=0;pp<3;pp++) {
             ftmp[pp]=pp2*dPiB3*disij[pp][temp_ij];
             f[i][pp]-=ftmp[pp];
             f[j][pp]+=ftmp[pp];
           }
           if(evflag) {
             ev_tally_xyz(i,j,nlocal,newton_pair,0.0,0.0,ftmp[0],ftmp[1]
                 ,ftmp[2],disij[0][temp_ij],disij[1][temp_ij],disij[2][temp_ij]);
           }
         }
       }
     }
   }
   destroy_pi();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBOP::PiBo_otf()
 {
   int new_n_tot;
   int i,j,k,kp,m,n,pp,nb_t;
   int iij,iik,iikp,ji,ki,ijkp,ijk;
   int nsearch,ncmp;
   tagint i_tag,j_tag;
   int itmp,ltmp,jtmp,ktmp;
   int pi_flag,ks;
   int nlocal;
   int inum,*ilist,*iilist,*jlist;
   int **firstneigh,*numneigh;
   int itype,jtype,ktype,kptype;
   int temp_ij,temp_ik,temp_ikp;
   int temp_jk,temp_jkp;
   int nb_ij,nb_ik,nb_jk,nb_ikp,nb_jkp;
   int bt_i,bt_j;
   double AA,BB,CC;
   double cosSq,sinFactor,cosFactor;
   double cosSq1,dotV,BBrt,AB1,AB2;
   double BBrtR,ABrtR1,ABrtR2;
   double angFactor,angFactor1,angFactor2;
   double angFactor3,angFactor4,angRfactor;
   double dAngR1,dAngR2,agpdpr3;
   double agpdpr1,agpdpr2,app1,app2,app3;
   double betaCapSq1,dbetaCapSq1;
   double betaCapSq2,dbetaCapSq2;
   double betaCapSum,ps;
   double ftmp[3],xtmp[3];
   double dPiB1,dPiB2,dPiB3,pp2;
 
   double dis_ij[3],rsq_ij,r_ij;
   double betaP_ij,dBetaP_ij;
   double dis_ik[3],rsq_ik,r_ik;
   double betaS_ik,dBetaS_ik;
   double betaP_ik,dBetaP_ik;
   double dis_ikp[3],rsq_ikp,r_ikp;
   double betaS_ikp,dBetaS_ikp;
   double betaP_ikp,dBetaP_ikp;
   double dis_jk[3],rsq_jk,r_jk;
   double betaS_jk,dBetaS_jk;
   double betaP_jk,dBetaP_jk;
   double dis_jkp[3],rsq_jkp,r_jkp;
   double betaS_jkp,dBetaS_jkp;
   double betaP_jkp,dBetaP_jkp;
 
   double cosAng_jik,dcA_jik[3][2];
   double cosAng_jikp,dcA_jikp[3][2];
   double cosAng_kikp,dcA_kikp[3][2];
   double cosAng_ijk,dcA_ijk[3][2];
   double cosAng_ijkp,dcA_ijkp[3][2];
   double cosAng_kjkp,dcA_kjkp[3][2];
 
   int newton_pair = force->newton_pair;
 
   double **f = atom->f;
   double **x = atom->x;
   int *type = atom->type;
   tagint *tag = atom->tag;
 
   nlocal = atom->nlocal;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
   inum = list->inum;
   ilist = list->ilist;
   n=0;
   if(nb_pi>16) {
     nb_pi=16;
   }
   if(nb_pi==0) {
     nb_pi=(maxneigh)*(maxneigh/2);
   }
 
 // Loop over all local atoms for i
 
   if(allocate_pi) {
     destroy_pi();
   }
   create_pi(nb_pi);
 
   for(itmp=0;itmp<inum;itmp++) {
     nb_t=0;
     i = ilist[itmp];
     itype = map[type[i]]+1;
     i_tag=tag[i];
 
 // j is a loop over all neighbors of i
 
     iilist=firstneigh[i];
     for(jtmp=0;jtmp<numneigh[i];jtmp++) {
       for(m=0;m<nb_pi;m++) {
         for(pp=0;pp<3;pp++) {
           bt_pi[m].dAA[pp]=0.0;
           bt_pi[m].dBB[pp]=0.0;
           bt_pi[m].dPiB[pp]=0.0;
         }
         bt_pi[m].i=-1;
         bt_pi[m].j=-1;
       }
       temp_ij=BOP_index[i]+jtmp;
       j=iilist[jtmp];
       jlist=firstneigh[j];
       jtype=map[type[j]]+1;
       j_tag=tag[j];
       nb_t=0;
       ftmp[0]=0.0;
       ftmp[1]=0.0;
       ftmp[2]=0.0;
       if(j_tag>=i_tag) {
         if(itype==jtype)
           iij=itype-1;
         else if(itype<jtype)
           iij=itype*bop_types-itype*(itype+1)/2+jtype-1;
         else
           iij=jtype*bop_types-jtype*(jtype+1)/2+itype-1;
         AA=0.0;
         BB=0.0;
         nPiBk[n]=0;
         for(ji=0;ji<numneigh[j];ji++) {
           if(x[jlist[ji]][0]==x[i][0]) {
             if(x[jlist[ji]][1]==x[i][1]) {
               if(x[jlist[ji]][2]==x[i][2]) {
                   break;
               }
             }
           }
         }
         nb_ij=nb_t;
         nb_t++;
         if(nb_t>nb_pi) {
           new_n_tot=nb_pi+maxneigh;
           grow_pi(nb_pi,new_n_tot);
           nb_pi=new_n_tot;
         }
         bt_pi[nb_ij].i=i;
         bt_pi[nb_ij].j=j;
         bt_pi[nb_ij].temp=temp_ij;
         dis_ij[0]=x[j][0]-x[i][0];
         dis_ij[1]=x[j][1]-x[i][1];
         dis_ij[2]=x[j][2]-x[i][2];
         rsq_ij=dis_ij[0]*dis_ij[0]
             +dis_ij[1]*dis_ij[1]
             +dis_ij[2]*dis_ij[2];
         r_ij=sqrt(rsq_ij);
         if(r_ij<=rcut[iij]) {
           ps=r_ij*rdr[iij]+1.0;
           ks=(int)ps;
           if(nr-1<ks)
             ks=nr-1;
           ps=ps-ks;
           if(ps>1.0)
             ps=1.0;
           betaP_ij=((pBetaP3[iij][ks-1]*ps+pBetaP2[iij][ks-1])*ps
               +pBetaP1[iij][ks-1])*ps+pBetaP[iij][ks-1];
           dBetaP_ij=(pBetaP6[iij][ks-1]*ps+pBetaP5[iij][ks-1])*ps
               +pBetaP4[iij][ks-1];
 
 // j and k are different neighbors of i
 
           for(ktmp=0;ktmp<numneigh[i];ktmp++) {
             if(ktmp!=jtmp) {
               temp_ik=BOP_index[i]+ktmp;
               k=iilist[ktmp];
               ktype=map[type[k]]+1;
               if(itype==ktype)
                 iik=itype-1;
               else if(itype<ktype)
                 iik=itype*bop_types-itype*(itype+1)/2+ktype-1;
               else
                 iik=ktype*bop_types-ktype*(ktype+1)/2+itype-1;
               dis_ik[0]=x[k][0]-x[i][0];
               dis_ik[1]=x[k][1]-x[i][1];
               dis_ik[2]=x[k][2]-x[i][2];
               rsq_ik=dis_ik[0]*dis_ik[0]
                   +dis_ik[1]*dis_ik[1]
                   +dis_ik[2]*dis_ik[2];
               r_ik=sqrt(rsq_ik);
               if(r_ik<=rcut[iik]) {
                 ps=r_ik*rdr[iik]+1.0;
                 ks=(int)ps;
                 if(nr-1<ks)
                   ks=nr-1;
                 ps=ps-ks;
                 if(ps>1.0)
                   ps=1.0;
                 betaS_ik=((pBetaS3[iik][ks-1]*ps+pBetaS2[iik][ks-1])*ps
                     +pBetaS1[iik][ks-1])*ps+pBetaS[iik][ks-1];
                 dBetaS_ik=(pBetaS6[iik][ks-1]*ps+pBetaS5[iik][ks-1])*ps
                     +pBetaS4[iik][ks-1];
                 betaP_ik=((pBetaP3[iik][ks-1]*ps+pBetaP2[iik][ks-1])*ps
                     +pBetaP1[iik][ks-1])*ps+pBetaP[iik][ks-1];
                 dBetaP_ik=(pBetaP6[iik][ks-1]*ps+pBetaP5[iik][ks-1])*ps
                     +pBetaP4[iik][ks-1];
                 cosAng_jik=(dis_ij[0]*dis_ik[0]+dis_ij[1]*dis_ik[1]
                     +dis_ij[2]*dis_ik[2])/(r_ij*r_ik);
                 dcA_jik[0][0]=(dis_ik[0]*r_ij*r_ik-cosAng_jik
                     *dis_ij[0]*r_ik*r_ik)/(r_ij*r_ij*r_ik*r_ik);
                 dcA_jik[1][0]=(dis_ik[1]*r_ij*r_ik-cosAng_jik
                     *dis_ij[1]*r_ik*r_ik)/(r_ij*r_ij*r_ik*r_ik);
                 dcA_jik[2][0]=(dis_ik[2]*r_ij*r_ik-cosAng_jik
                     *dis_ij[2]*r_ik*r_ik)/(r_ij*r_ij*r_ik*r_ik);
                 dcA_jik[0][1]=(dis_ij[0]*r_ij*r_ik-cosAng_jik
                     *dis_ik[0]*r_ij*r_ij)/(r_ij*r_ij*r_ik*r_ik);
                 dcA_jik[1][1]=(dis_ij[1]*r_ij*r_ik-cosAng_jik
                     *dis_ik[1]*r_ij*r_ij)/(r_ij*r_ij*r_ik*r_ik);
                 dcA_jik[2][1]=(dis_ij[2]*r_ij*r_ik-cosAng_jik
                     *dis_ik[2]*r_ij*r_ij)/(r_ij*r_ij*r_ik*r_ik);
                 nb_ik=nb_t;
                 nb_t++;
                 if(nb_t>nb_pi) {
                   new_n_tot=nb_pi+maxneigh;
                   grow_pi(nb_pi,new_n_tot);
                   nb_pi=new_n_tot;
                 }
                 bt_pi[nb_ik].i=i;
                 bt_pi[nb_ik].j=k;
                 bt_pi[nb_ik].temp=temp_ik;
                 cosSq=cosAng_jik*cosAng_jik;
                 sinFactor=.5*(1.0-cosSq)*pi_p[itype-1]*betaS_ik;
                 cosFactor=.5*(1.0+cosSq)*betaP_ik;
                 betaCapSq1=pi_p[itype-1]*betaS_ik*betaS_ik-betaP_ik
                     *betaP_ik;
                 dbetaCapSq1=2.0*pi_p[itype-1]*betaS_ik*dBetaS_ik
                     -2.0*betaP_ik*dBetaP_ik;
 
 //AA is Eq. 37 (a) and Eq. 19 (b) or i atoms
 //1st BB is first term of Eq. 38 (a) where j and k =neighbors i
 
                 AA=AA+sinFactor*betaS_ik+cosFactor*betaP_ik;
                 BB=BB+.25*(1.0-cosSq)*(1.0-cosSq)*betaCapSq1*betaCapSq1;
 
 //agpdpr1 is derivative of AA w.r.t. for atom i w.r.t. Beta(r_ik)
 //agpdpr2 is derivative of BB w.r.t. for atom i w.r.t. Beta(r_ik)
 //app1 is derivative of AA w.r.t. for atom i w.r.t. cos(theta_jik)
 //app2 is derivative of BB w.r.t. for atom i w.r.t. cos(theta_jik)
 
                 agpdpr1=(2.0*sinFactor*dBetaS_ik+2.0*cosFactor
                     *dBetaP_ik)/r_ik;
                 app1=cosAng_jik*(-pi_p[itype-1]*betaS_ik*betaS_ik
                     +betaP_ik*betaP_ik);
                 app2=-(1.0-cosSq)*cosAng_jik*betaCapSq1*betaCapSq1;
                 agpdpr2=.5*(1.0-cosSq)*(1.0-cosSq)*betaCapSq1*dbetaCapSq1/r_ik;
                 itypePiBk[n][nPiBk[n]]=k;
                 bt_pi[nb_ij].dAA[0]+=
                     app1*dcA_jik[0][0];
                 bt_pi[nb_ij].dAA[1]+=
                     app1*dcA_jik[1][0];
                 bt_pi[nb_ij].dAA[2]+=
                     app1*dcA_jik[2][0];
                 bt_pi[nb_ij].dBB[0]+=
                     app2*dcA_jik[0][0];
                 bt_pi[nb_ij].dBB[1]+=
                     app2*dcA_jik[1][0];
                 bt_pi[nb_ij].dBB[2]+=
                     app2*dcA_jik[2][0];
                 bt_pi[nb_ik].dAA[0]+=
                     agpdpr1*dis_ik[0]
                     +app1*dcA_jik[0][1];
                 bt_pi[nb_ik].dAA[1]+=
                     agpdpr1*dis_ik[1]
                     +app1*dcA_jik[1][1];
                 bt_pi[nb_ik].dAA[2]+=
                     agpdpr1*dis_ik[2]
                     +app1*dcA_jik[2][1];
                 bt_pi[nb_ik].dBB[0]+=
                     app2*dcA_jik[0][1]
                     +agpdpr2*dis_ik[0];
                 bt_pi[nb_ik].dBB[1]+=
                     app2*dcA_jik[1][1]
                     +agpdpr2*dis_ik[1];
                 bt_pi[nb_ik].dBB[2]+=
                     app2*dcA_jik[2][1]
                     +agpdpr2*dis_ik[2];
 
 // j and k and k' are different neighbors of i
 
                 for(ltmp=0;ltmp<ktmp;ltmp++) {
                   if(ltmp!=jtmp) {
                     temp_ikp=BOP_index[i]+ltmp;
                     kp=iilist[ltmp];
                     kptype=map[type[kp]]+1;
                     for(nsearch=0;nsearch<nPiBk[n];nsearch++) {
                       ncmp=itypePiBk[n][nsearch];
                       if(x[ncmp][0]==x[kp][0]) {
                         if(x[ncmp][1]==x[kp][1]) {
                           if(x[ncmp][2]==x[kp][2]) {
                             break;
                           }
                         }
                       }
                     }
                     if(itype==kptype)
                       iikp=itype-1;
                     else if(itype<kptype)
                       iikp=itype*bop_types-itype*(itype+1)/2+kptype-1;
                     else
                       iikp=kptype*bop_types-kptype*(kptype+1)/2+itype-1;
                     dis_ikp[0]=x[kp][0]-x[i][0];
                     dis_ikp[1]=x[kp][1]-x[i][1];
                     dis_ikp[2]=x[kp][2]-x[i][2];
                     rsq_ikp=dis_ikp[0]*dis_ikp[0]
                         +dis_ikp[1]*dis_ikp[1]
                         +dis_ikp[2]*dis_ikp[2];
                     r_ikp=sqrt(rsq_ikp);
                     if(r_ikp<=rcut[iikp]) {
                       ps=r_ikp*rdr[iikp]+1.0;
                       ks=(int)ps;
                       if(nr-1<ks)
                         ks=nr-1;
                       ps=ps-ks;
                       if(ps>1.0)
                         ps=1.0;
                       betaS_ikp=((pBetaS3[iikp][ks-1]*ps+pBetaS2[iikp][ks-1])*ps
                           +pBetaS1[iikp][ks-1])*ps+pBetaS[iikp][ks-1];
                       dBetaS_ikp=(pBetaS6[iikp][ks-1]*ps+pBetaS5[iikp][ks-1])*ps
                           +pBetaS4[iikp][ks-1];
                       betaP_ikp=((pBetaP3[iikp][ks-1]*ps+pBetaP2[iikp][ks-1])*ps
                           +pBetaP1[iikp][ks-1])*ps+pBetaP[iikp][ks-1];
                       dBetaP_ikp=(pBetaP6[iikp][ks-1]*ps+pBetaP5[iikp][ks-1])*ps
                           +pBetaP4[iikp][ks-1];
                       cosAng_jikp=(dis_ij[0]*dis_ikp[0]+dis_ij[1]*dis_ikp[1]
                           +dis_ij[2]*dis_ikp[2])/(r_ij*r_ikp);
                       dcA_jikp[0][0]=(dis_ikp[0]*r_ij*r_ikp-cosAng_jikp
                           *dis_ij[0]*r_ikp*r_ikp)/(r_ij*r_ij*r_ikp*r_ikp);
                       dcA_jikp[1][0]=(dis_ikp[1]*r_ij*r_ikp-cosAng_jikp
                           *dis_ij[1]*r_ikp*r_ikp)/(r_ij*r_ij*r_ikp*r_ikp);
                       dcA_jikp[2][0]=(dis_ikp[2]*r_ij*r_ikp-cosAng_jikp
                           *dis_ij[2]*r_ikp*r_ikp)/(r_ij*r_ij*r_ikp*r_ikp);
                       dcA_jikp[0][1]=(dis_ij[0]*r_ij*r_ikp-cosAng_jikp
                           *dis_ikp[0]*r_ij*r_ij)/(r_ij*r_ij*r_ikp*r_ikp);
                       dcA_jikp[1][1]=(dis_ij[1]*r_ij*r_ikp-cosAng_jikp
                           *dis_ikp[1]*r_ij*r_ij)/(r_ij*r_ij*r_ikp*r_ikp);
                       dcA_jikp[2][1]=(dis_ij[2]*r_ij*r_ikp-cosAng_jikp
                           *dis_ikp[2]*r_ij*r_ij)/(r_ij*r_ij*r_ikp*r_ikp);
                       cosAng_kikp=(dis_ik[0]*dis_ikp[0]+dis_ik[1]*dis_ikp[1]
                           +dis_ik[2]*dis_ikp[2])/(r_ik*r_ikp);
                       dcA_kikp[0][0]=(dis_ikp[0]*r_ik*r_ikp-cosAng_kikp
                           *dis_ik[0]*r_ikp*r_ikp)/(r_ik*r_ik*r_ikp*r_ikp);
                       dcA_kikp[1][0]=(dis_ikp[1]*r_ik*r_ikp-cosAng_kikp
                           *dis_ik[1]*r_ikp*r_ikp)/(r_ik*r_ik*r_ikp*r_ikp);
                       dcA_kikp[2][0]=(dis_ikp[2]*r_ik*r_ikp-cosAng_kikp
                           *dis_ik[2]*r_ikp*r_ikp)/(r_ik*r_ik*r_ikp*r_ikp);
                       dcA_kikp[0][1]=(dis_ik[0]*r_ik*r_ikp-cosAng_kikp
                           *dis_ikp[0]*r_ik*r_ik)/(r_ik*r_ik*r_ikp*r_ikp);
                       dcA_kikp[1][1]=(dis_ik[1]*r_ik*r_ikp-cosAng_kikp
                           *dis_ikp[1]*r_ik*r_ik)/(r_ik*r_ik*r_ikp*r_ikp);
                       dcA_kikp[2][1]=(dis_ik[2]*r_ik*r_ikp-cosAng_kikp
                           *dis_ikp[2]*r_ik*r_ik)/(r_ik*r_ik*r_ikp*r_ikp);
                       nb_ikp=nb_t;
                       nb_t++;
                       if(nb_t>nb_pi) {
                         new_n_tot=nb_pi+maxneigh;
                         grow_pi(nb_pi,new_n_tot);
                         nb_pi=new_n_tot;
                       }
                       bt_pi[nb_ikp].i=i;
                       bt_pi[nb_ikp].j=kp;
                       bt_pi[nb_ikp].temp=temp_ikp;
                       betaCapSq2=pi_p[itype-1]*betaS_ikp*betaS_ikp
                           -betaP_ikp*betaP_ikp;
                       dbetaCapSq2=2.0*pi_p[itype-1]*betaS_ikp*dBetaS_ikp
                           -2.0*betaP_ikp*dBetaP_ikp;
                       cosSq1=cosAng_jikp*cosAng_jikp;
                       angFactor=cosAng_kikp-cosAng_jikp*cosAng_jik;
                       angFactor1=4.0*angFactor;
                       angFactor2=-angFactor1*cosAng_jikp
                           +2.0*cosAng_jik*(1.0-cosSq1);
                       angFactor3=-angFactor1*cosAng_jik
                           +2.0*cosAng_jikp*(1.0-cosSq);
                       angFactor4=2.0*angFactor*angFactor-(1.0-cosSq)*(1.0-cosSq1);
                       betaCapSum=.5*betaCapSq1*betaCapSq2;
 
 //2nd BB is third term of Eq. 38 (a) where j , k and k'=neighbors i
 
                       BB=BB+betaCapSum*angFactor4;
 
 //agpdpr1 is derivative of BB w.r.t. for atom i w.r.t. Beta(r_ik)
 //agpdpr2 is derivative of BB w.r.t. for atom i w.r.t. Beta(r_ik')
 //app1 is derivative of BB 3rd term w.r.t. cos(theta_kik')
 //app2 is derivative of BB 3rd term w.r.t. cos(theta_jik)
 //app3 is derivative of BB 3rd term w.r.t. cos(theta_jik')
 
                       app1=betaCapSum*angFactor1;
                       app2=betaCapSum*angFactor2;
                       app3=betaCapSum*angFactor3;
                       agpdpr1=.5*angFactor4*dbetaCapSq1*betaCapSq2/r_ik;
                       agpdpr2=.5*angFactor4*betaCapSq1*dbetaCapSq2/r_ikp;
                       bt_pi[nb_ij].dBB[0]+=
                           app2*dcA_jik[0][0]
                           +app3*dcA_jikp[0][0];
                       bt_pi[nb_ij].dBB[1]+=
                           app2*dcA_jik[1][0]
                           +app3*dcA_jikp[1][0];
                       bt_pi[nb_ij].dBB[2]+=
                           app2*dcA_jik[2][0]
                           +app3*dcA_jikp[2][0];
                       bt_pi[nb_ik].dBB[0]+=
                           agpdpr1*dis_ik[0]
                           +app1*dcA_kikp[0][0]
                           +app2*dcA_jik[0][1];
                       bt_pi[nb_ik].dBB[1]+=
                           agpdpr1*dis_ik[1]
                           +app1*dcA_kikp[1][0]
                           +app2*dcA_jik[1][1];
                       bt_pi[nb_ik].dBB[2]+=
                           agpdpr1*dis_ik[2]
                           +app1*dcA_kikp[2][0]
                           +app2*dcA_jik[2][1];
                       bt_pi[nb_ikp].dBB[0]+=
                           agpdpr2*dis_ikp[0]
                           +app1*dcA_kikp[0][1]
                           +app3*dcA_jikp[0][1];
                       bt_pi[nb_ikp].dBB[1]+=
                           agpdpr2*dis_ikp[1]
                           +app1*dcA_kikp[1][1]
                           +app3*dcA_jikp[1][1];
                       bt_pi[nb_ikp].dBB[2]+=
                           agpdpr2*dis_ikp[2]
                           +app1*dcA_kikp[2][1]
                           +app3*dcA_jikp[2][1];
                       }
                     }
                   }
                 nPiBk[n]=nPiBk[n]+1;
                 }
               }
             }
 
 //j is a neighbor of i and k is a neighbor of j and equal to i
 
           for(ki=0;ki<numneigh[j];ki++) {
             k=jlist[ki];
             if(x[k][0]==x[i][0]) {
               if(x[k][1]==x[i][1]) {
                 if(x[k][2]==x[i][2]) {
                   break;
                 }
               }
             }
           }
 
 //j is a neighbor of i and k is a neighbor of j not equal to i
 
           for(ktmp=0;ktmp<numneigh[j];ktmp++) {
             if(ktmp!=ki) {
               temp_jk=BOP_index[j]+ktmp;
               k=jlist[ktmp];
               ktype=map[type[k]]+1;
               pi_flag=0;
               for(nsearch=0;nsearch<nPiBk[n];nsearch++) {
                 ncmp=itypePiBk[n][nsearch];
                 if(x[ncmp][0]==x[k][0]) {
                   if(x[ncmp][1]==x[k][1]) {
                     if(x[ncmp][2]==x[k][2]) {
                       pi_flag=1;
                       break;
                     }
                   }
                 }
               }
               if(pi_flag==0) {
                 itypePiBk[n][nPiBk[n]]=k;
               }
               if(jtype==ktype)
                 ijk=jtype-1;
               else if(jtype<ktype)
                 ijk=jtype*bop_types-jtype*(jtype+1)/2+ktype-1;
               else
                 ijk=ktype*bop_types-ktype*(ktype+1)/2+jtype-1;
               dis_jk[0]=x[k][0]-x[j][0];
               dis_jk[1]=x[k][1]-x[j][1];
               dis_jk[2]=x[k][2]-x[j][2];
               rsq_jk=dis_jk[0]*dis_jk[0]
                   +dis_jk[1]*dis_jk[1]
                   +dis_jk[2]*dis_jk[2];
               r_jk=sqrt(rsq_jk);
               if(r_jk<=rcut[ijk]) {
                 ps=r_jk*rdr[ijk]+1.0;
                 ks=(int)ps;
                 if(nr-1<ks)
                   ks=nr-1;
                 ps=ps-ks;
                 if(ps>1.0)
                   ps=1.0;
                 betaS_jk=((pBetaS3[ijk][ks-1]*ps+pBetaS2[ijk][ks-1])*ps
                     +pBetaS1[ijk][ks-1])*ps+pBetaS[ijk][ks-1];
                 dBetaS_jk=(pBetaS6[ijk][ks-1]*ps+pBetaS5[ijk][ks-1])*ps
                     +pBetaS4[ijk][ks-1];
                 betaP_jk=((pBetaP3[ijk][ks-1]*ps+pBetaP2[ijk][ks-1])*ps
                     +pBetaP1[ijk][ks-1])*ps+pBetaP[ijk][ks-1];
                 dBetaP_jk=(pBetaP6[ijk][ks-1]*ps+pBetaP5[ijk][ks-1])*ps
                     +pBetaP4[ijk][ks-1];
                 cosAng_ijk=(-dis_ij[0]*dis_jk[0]-dis_ij[1]*dis_jk[1]
                     -dis_ij[2]*dis_jk[2])/(r_ij*r_jk);
                 dcA_ijk[0][0]=(dis_jk[0]*r_ij*r_jk-cosAng_ijk
                     *-dis_ij[0]*r_jk*r_jk)/(r_ij*r_ij*r_jk*r_jk);
                 dcA_ijk[1][0]=(dis_jk[1]*r_ij*r_jk-cosAng_ijk
                     *-dis_ij[1]*r_jk*r_jk)/(r_ij*r_ij*r_jk*r_jk);
                 dcA_ijk[2][0]=(dis_jk[2]*r_ij*r_jk-cosAng_ijk
                     *-dis_ij[2]*r_jk*r_jk)/(r_ij*r_ij*r_jk*r_jk);
                 dcA_ijk[0][1]=(-dis_ij[0]*r_ij*r_jk-cosAng_ijk
                     *dis_jk[0]*r_ij*r_ij)/(r_ij*r_ij*r_jk*r_jk);
                 dcA_ijk[1][1]=(-dis_ij[1]*r_ij*r_jk-cosAng_ijk
                     *dis_jk[1]*r_ij*r_ij)/(r_ij*r_ij*r_jk*r_jk);
                 dcA_ijk[2][1]=(-dis_ij[2]*r_ij*r_jk-cosAng_ijk
                     *dis_jk[2]*r_ij*r_ij)/(r_ij*r_ij*r_jk*r_jk);
                 nb_jk=nb_t;
                 nb_t++;
                 if(nb_t>nb_pi) {
                   new_n_tot=nb_pi+maxneigh;
                   grow_pi(nb_pi,new_n_tot);
                   nb_pi=new_n_tot;
                 }
                 bt_pi[nb_jk].i=j;
                 bt_pi[nb_jk].j=k;
                 bt_pi[nb_jk].temp=temp_jk;
                 cosSq=cosAng_ijk*cosAng_ijk;
                 sinFactor=.5*(1.0-cosSq)*pi_p[jtype-1]*betaS_jk;
                 cosFactor=.5*(1.0+cosSq)*betaP_jk;
                 betaCapSq1=pi_p[jtype-1]*betaS_jk*betaS_jk
                     -betaP_jk*betaP_jk;
                 dbetaCapSq1=2.0*pi_p[jtype-1]*betaS_jk*dBetaS_jk
                     -2.0*betaP_jk*dBetaP_jk;
 
 //AA is Eq. 37 (a) and Eq. 19 (b) for j atoms
 //3rd BB is 2nd term of Eq. 38 (a) where i and k =neighbors j
 
                 AA=AA+sinFactor*betaS_jk+cosFactor*betaP_jk;
                 BB=BB+.25*(1.0-cosSq)*(1.0-cosSq)*betaCapSq1*betaCapSq1;
 
                 agpdpr1=(2.0*sinFactor*dBetaS_jk+2.0*cosFactor
                     *dBetaP_jk)/r_jk;
 
 //agpdpr1 is derivative of AA for atom j w.r.t. Beta(r_jk)
 //agpdpr2 is derivative of BB for atom j w.r.t. Beta(r_jk)
 //app1 is derivative of AA for j atom w.r.t. cos(theta_ijk)
 //app2 is derivative of BB 2nd term w.r.t. cos(theta_ijk)
 
                 agpdpr2=.5*(1.0-cosSq)*(1.0-cosSq)*betaCapSq1*dbetaCapSq1/r_jk;
                 app1=cosAng_ijk*(-pi_p[jtype-1]*betaS_jk*betaS_jk
                     +betaP_jk*betaP_jk);
                 app2=-(1.0-cosSq)*cosAng_ijk*betaCapSq1*betaCapSq1;
                 bt_pi[nb_ij].dAA[0]-=
                     app1*dcA_ijk[0][0];
                 bt_pi[nb_ij].dAA[1]-=
                     app1*dcA_ijk[1][0];
                 bt_pi[nb_ij].dAA[2]-=
                     app1*dcA_ijk[2][0];
                 bt_pi[nb_ij].dBB[0]-=
                     app2*dcA_ijk[0][0];
                 bt_pi[nb_ij].dBB[1]-=
                     app2*dcA_ijk[1][0];
                 bt_pi[nb_ij].dBB[2]-=
                     app2*dcA_ijk[2][0];
                 bt_pi[nb_jk].dAA[0]+=
                     agpdpr1*dis_jk[0]
                     +app1*dcA_ijk[0][1];
                 bt_pi[nb_jk].dAA[1]+=
                     agpdpr1*dis_jk[1]
                     +app1*dcA_ijk[1][1];
                 bt_pi[nb_jk].dAA[2]+=
                     agpdpr1*dis_jk[2]
                     +app1*dcA_ijk[2][1];
                 bt_pi[nb_jk].dBB[0]+=
                     app2*dcA_ijk[0][1]
                     +agpdpr2*dis_jk[0];
                 bt_pi[nb_jk].dBB[1]+=
                     app2*dcA_ijk[1][1]
                     +agpdpr2*dis_jk[1];
                 bt_pi[nb_jk].dBB[2]+=
                     app2*dcA_ijk[2][1]
                     +agpdpr2*dis_jk[2];
 
 //j is a neighbor of i and k and k' are different neighbors of j not equal to i
 
                 for(ltmp=0;ltmp<ktmp;ltmp++) {
                   if(ltmp!=ki) {
                     temp_jkp=BOP_index[j]+ltmp;
                     kp=jlist[ltmp];
                     kptype=map[type[kp]]+1;
                     for(nsearch=0;nsearch<nPiBk[n];nsearch++) {
                       ncmp=itypePiBk[n][nsearch];
                       if(x[ncmp][0]==x[kp][0]) {
                         if(x[ncmp][1]==x[kp][1]) {
                           if(x[ncmp][2]==x[kp][2]) {
                             break;
                           }
                         }
                       }
                     }
                     if(jtype==kptype)
                       ijkp=jtype-1;
                     else if(jtype<kptype)
                       ijkp=jtype*bop_types-jtype*(jtype+1)/2+kptype-1;
                     else
                       ijkp=kptype*bop_types-kptype*(kptype+1)/2+jtype-1;
                     dis_jkp[0]=x[kp][0]-x[j][0];
                     dis_jkp[1]=x[kp][1]-x[j][1];
                     dis_jkp[2]=x[kp][2]-x[j][2];
                     rsq_jkp=dis_jkp[0]*dis_jkp[0]
                         +dis_jkp[1]*dis_jkp[1]
                         +dis_jkp[2]*dis_jkp[2];
                     r_jkp=sqrt(rsq_jkp);
                     if(r_jkp<=rcut[ijkp]) {
                       ps=r_jkp*rdr[ijkp]+1.0;
                       ks=(int)ps;
                       if(nr-1<ks)
                         ks=nr-1;
                       ps=ps-ks;
                       if(ps>1.0)
                         ps=1.0;
                       betaS_jkp=((pBetaS3[ijkp][ks-1]*ps+pBetaS2[ijkp][ks-1])*ps
                           +pBetaS1[ijkp][ks-1])*ps+pBetaS[ijkp][ks-1];
                       dBetaS_jkp=(pBetaS6[ijkp][ks-1]*ps+pBetaS5[ijkp][ks-1])*ps
                           +pBetaS4[ijkp][ks-1];
                       betaP_jkp=((pBetaP3[ijkp][ks-1]*ps+pBetaP2[ijkp][ks-1])*ps
                           +pBetaP1[ijkp][ks-1])*ps+pBetaP[ijkp][ks-1];
                       dBetaP_jkp=(pBetaP6[ijkp][ks-1]*ps+pBetaP5[ijkp][ks-1])*ps
                           +pBetaP4[ijkp][ks-1];
                       cosAng_ijkp=(-dis_ij[0]*dis_jkp[0]-dis_ij[1]*dis_jkp[1]
                           -dis_ij[2]*dis_jkp[2])/(r_ij*r_jkp);
                       dcA_ijkp[0][0]=(dis_jkp[0]*r_ij*r_jkp-cosAng_ijkp
                           *-dis_ij[0]*r_jkp*r_jkp)/(r_ij*r_ij*r_jkp*r_jkp);
                       dcA_ijkp[1][0]=(dis_jkp[1]*r_ij*r_jkp-cosAng_ijkp
                           *-dis_ij[1]*r_jkp*r_jkp)/(r_ij*r_ij*r_jkp*r_jkp);
                       dcA_ijkp[2][0]=(dis_jkp[2]*r_ij*r_jkp-cosAng_ijkp
                           *-dis_ij[2]*r_jkp*r_jkp)/(r_ij*r_ij*r_jkp*r_jkp);
                       dcA_ijkp[0][1]=(-dis_ij[0]*r_ij*r_jkp-cosAng_ijkp
                           *dis_jkp[0]*r_ij*r_ij)/(r_ij*r_ij*r_jkp*r_jkp);
                       dcA_ijkp[1][1]=(-dis_ij[1]*r_ij*r_jkp-cosAng_ijkp
                           *dis_jkp[1]*r_ij*r_ij)/(r_ij*r_ij*r_jkp*r_jkp);
                       dcA_ijkp[2][1]=(-dis_ij[2]*r_ij*r_jkp-cosAng_ijkp
                           *dis_jkp[2]*r_ij*r_ij)/(r_ij*r_ij*r_jkp*r_jkp);
                       cosAng_kjkp=(dis_jk[0]*dis_jkp[0]+dis_jk[1]*dis_jkp[1]
                           +dis_jk[2]*dis_jkp[2])/(r_jk*r_jkp);
                       dcA_kjkp[0][0]=(dis_jkp[0]*r_jk*r_jkp-cosAng_kjkp
                           *dis_jk[0]*r_jkp*r_jkp)/(r_jk*r_jk*r_jkp*r_jkp);
                       dcA_kjkp[1][0]=(dis_jkp[1]*r_jk*r_jkp-cosAng_kjkp
                           *dis_jk[1]*r_jkp*r_jkp)/(r_jk*r_jk*r_jkp*r_jkp);
                       dcA_kjkp[2][0]=(dis_jkp[2]*r_jk*r_jkp-cosAng_kjkp
                           *dis_jk[2]*r_jkp*r_jkp)/(r_jk*r_jk*r_jkp*r_jkp);
                       dcA_kjkp[0][1]=(dis_jk[0]*r_jk*r_jkp-cosAng_kjkp
                           *dis_jkp[0]*r_jk*r_jk)/(r_jk*r_jk*r_jkp*r_jkp);
                       dcA_kjkp[1][1]=(dis_jk[1]*r_jk*r_jkp-cosAng_kjkp
                           *dis_jkp[1]*r_jk*r_jk)/(r_jk*r_jk*r_jkp*r_jkp);
                       dcA_kjkp[2][1]=(dis_jk[2]*r_jk*r_jkp-cosAng_kjkp
                           *dis_jkp[2]*r_jk*r_jk)/(r_jk*r_jk*r_jkp*r_jkp);
                       nb_jkp=nb_t;
                       nb_t++;
                       if(nb_t>nb_pi) {
                         new_n_tot=nb_pi+maxneigh;
                         grow_pi(nb_pi,new_n_tot);
                         nb_pi=new_n_tot;
                       }
                     bt_pi[nb_jkp].i=j;
                     bt_pi[nb_jkp].j=kp;
                     bt_pi[nb_jkp].temp=temp_jkp;
                     betaCapSq2=pi_p[jtype-1]*betaS_jkp*betaS_jkp
                       -betaP_jkp*betaP_jkp;
                     dbetaCapSq2=2.0*pi_p[jtype-1]*betaS_jkp*dBetaS_jkp
                       -2.0*betaP_jkp*dBetaP_jkp;
                     cosSq1=cosAng_ijkp*cosAng_ijkp;
                     angFactor=cosAng_kjkp-cosAng_ijkp*cosAng_ijk;
                     angFactor1=4.0*angFactor;
                     angFactor2=-angFactor1*cosAng_ijkp
                       +2.0*cosAng_ijk*(1.0-cosSq1);
                     angFactor3=-angFactor1*cosAng_ijk
                       +2.0*cosAng_ijkp*(1.0-cosSq);
                     angFactor4=2.0*angFactor*angFactor-(1.0-cosSq)*(1.0-cosSq1);
                       betaCapSum=.5*betaCapSq1*betaCapSq2;
 
 //4th BB is 4th term of Eq. 38 (a) where i , k and k' =neighbors j
 
                     BB=BB+betaCapSum*angFactor4;
 
 //app1 is derivative of BB 4th term w.r.t. cos(theta_kjk')
 //app2 is derivative of BB 4th term w.r.t. cos(theta_ijk)
 //app3 is derivative of BB 4th term w.r.t. cos(theta_ijk')
 //agpdpr1 is derivative of BB 4th term for atom j w.r.t. Beta(r_jk)
 //agpdpr2 is derivative of BB 4th term for atom j w.r.t. Beta(r_jk')
 
                     app1=betaCapSum*angFactor1;
                     app2=betaCapSum*angFactor2;
                     app3=betaCapSum*angFactor3;
                     agpdpr1=.5*angFactor4*dbetaCapSq1*betaCapSq2/r_jk;
                     agpdpr2=.5*angFactor4*betaCapSq1*dbetaCapSq2/r_jkp;
                     bt_pi[nb_ij].dBB[0]-=
                       app3*dcA_ijkp[0][0]
                       +app2*dcA_ijk[0][0];
                     bt_pi[nb_ij].dBB[1]-=
                       app3*dcA_ijkp[1][0]
                       +app2*dcA_ijk[1][0];
                     bt_pi[nb_ij].dBB[2]-=
                       app3*dcA_ijkp[2][0]
                       +app2*dcA_ijk[2][0];
                     bt_pi[nb_jk].dBB[0]+=
                       agpdpr1*dis_jk[0]
                       +app1*dcA_kjkp[0][0]
                       +app2*dcA_ijk[0][1];
                     bt_pi[nb_jk].dBB[1]+=
                       agpdpr1*dis_jk[1]
                       +app1*dcA_kjkp[1][0]
                       +app2*dcA_ijk[1][1];
                     bt_pi[nb_jk].dBB[2]+=
                       agpdpr1*dis_jk[2]
                       +app1*dcA_kjkp[2][0]
                       +app2*dcA_ijk[2][1];
                     bt_pi[nb_jkp].dBB[0]+=
                       agpdpr2*dis_jkp[0]
                       +app1*dcA_kjkp[0][1]
                       +app3*dcA_ijkp[0][1];
                     bt_pi[nb_jkp].dBB[1]+=
                       agpdpr2*dis_jkp[1]
                       +app1*dcA_kjkp[1][1]
                       +app3*dcA_ijkp[1][1];
                     bt_pi[nb_jkp].dBB[2]+=
                       agpdpr2*dis_jkp[2]
                       +app1*dcA_kjkp[2][1]
                       +app3*dcA_ijkp[2][1];
                     }
                   }
                 }
 
 //j and k' are different neighbors of i and k is a neighbor of j not equal to i
 
                 for(ltmp=0;ltmp<numneigh[i];ltmp++) {
                   if(ltmp!=jtmp) {
                     temp_ikp=BOP_index[i]+ltmp;
                     kp=iilist[ltmp];
                     kptype=map[type[kp]]+1;
                     for(nsearch=0;nsearch<nPiBk[n];nsearch++) {
                       ncmp=itypePiBk[n][nsearch];
                       if(x[ncmp][0]==x[kp][0]) {
                         if(x[ncmp][1]==x[kp][1]) {
                           if(x[ncmp][2]==x[kp][2]) {
                             break;
                           }
                         }
                       }
                     }
                     if(itype==kptype)
                       iikp=itype-1;
                     else if(itype<kptype)
                       iikp=itype*bop_types-itype*(itype+1)/2+kptype-1;
                     else
                       iikp=kptype*bop_types-kptype*(kptype+1)/2+itype-1;
                     dis_ikp[0]=x[kp][0]-x[i][0];
                     dis_ikp[1]=x[kp][1]-x[i][1];
                     dis_ikp[2]=x[kp][2]-x[i][2];
                     rsq_ikp=dis_ikp[0]*dis_ikp[0]
                         +dis_ikp[1]*dis_ikp[1]
                         +dis_ikp[2]*dis_ikp[2];
                     r_ikp=sqrt(rsq_ikp);
                     if(r_ikp<=rcut[iikp]) {
                       ps=r_ikp*rdr[iikp]+1.0;
                       ks=(int)ps;
                       if(nr-1<ks)
                         ks=nr-1;
                       ps=ps-ks;
                       if(ps>1.0)
                         ps=1.0;
                       betaS_ikp=((pBetaS3[iikp][ks-1]*ps+pBetaS2[iikp][ks-1])*ps
                           +pBetaS1[iikp][ks-1])*ps+pBetaS[iikp][ks-1];
                       dBetaS_ikp=(pBetaS6[iikp][ks-1]*ps+pBetaS5[iikp][ks-1])*ps
                           +pBetaS4[iikp][ks-1];
                       betaP_ikp=((pBetaP3[iikp][ks-1]*ps+pBetaP2[iikp][ks-1])*ps
                           +pBetaP1[iikp][ks-1])*ps+pBetaP[iikp][ks-1];
                       dBetaP_ikp=(pBetaP6[iikp][ks-1]*ps+pBetaP5[iikp][ks-1])*ps
                           +pBetaP4[iikp][ks-1];
                       cosAng_jikp=(dis_ij[0]*dis_ikp[0]+dis_ij[1]*dis_ikp[1]
                           +dis_ij[2]*dis_ikp[2])/(r_ij*r_ikp);
                       dcA_jikp[0][0]=(dis_ikp[0]*r_ij*r_ikp-cosAng_jikp
                           *dis_ij[0]*r_ikp*r_ikp)/(r_ij*r_ij*r_ikp*r_ikp);
                       dcA_jikp[1][0]=(dis_ikp[1]*r_ij*r_ikp-cosAng_jikp
                           *dis_ij[1]*r_ikp*r_ikp)/(r_ij*r_ij*r_ikp*r_ikp);
                       dcA_jikp[2][0]=(dis_ikp[2]*r_ij*r_ikp-cosAng_jikp
                           *dis_ij[2]*r_ikp*r_ikp)/(r_ij*r_ij*r_ikp*r_ikp);
                       dcA_jikp[0][1]=(dis_ij[0]*r_ij*r_ikp-cosAng_jikp
                           *dis_ikp[0]*r_ij*r_ij)/(r_ij*r_ij*r_ikp*r_ikp);
                       dcA_jikp[1][1]=(dis_ij[1]*r_ij*r_ikp-cosAng_jikp
                           *dis_ikp[1]*r_ij*r_ij)/(r_ij*r_ij*r_ikp*r_ikp);
                       dcA_jikp[2][1]=(dis_ij[2]*r_ij*r_ikp-cosAng_jikp
                           *dis_ikp[2]*r_ij*r_ij)/(r_ij*r_ij*r_ikp*r_ikp);
                       nb_ikp=nb_t;
                       nb_t++;
                       if(nb_t>nb_pi) {
                         new_n_tot=nb_pi+maxneigh;
                         grow_pi(nb_pi,new_n_tot);
                         nb_pi=new_n_tot;
                       }
                       bt_pi[nb_ikp].i=i;
                       bt_pi[nb_ikp].j=kp;
                       bt_pi[nb_ikp].temp=temp_ikp;
 
                       betaCapSq2=pi_p[itype-1]*betaS_ikp*betaS_ikp
                           -betaP_ikp*betaP_ikp;
                       dbetaCapSq2=2.0*pi_p[itype-1]*betaS_ikp*dBetaS_ikp
                           -2.0*betaP_ikp*dBetaP_ikp;
                       dotV=(dis_jk[0]*dis_ikp[0]+dis_jk[1]
                           *dis_ikp[1]+dis_jk[2]*dis_ikp[2])
                           /(r_jk*r_ikp);
                       cosSq1=cosAng_jikp*cosAng_jikp;
                       angFactor=dotV+cosAng_jikp*cosAng_ijk;
                       angRfactor=4.0*angFactor*dotV;
                       dAngR1=-angRfactor/r_jk;
                       dAngR2=-angRfactor/r_ikp;
                       angFactor1=4.0*angFactor*cosAng_jikp
                           +2.0*cosAng_ijk*(1.0-cosSq1);
                       angFactor2=4.0*angFactor*cosAng_ijk
                           +2.0*cosAng_jikp*(1.0-cosSq);
                       angFactor3=2.0*angFactor*angFactor-(1.0-cosSq)*(1.0-cosSq1);
                       betaCapSum=.5*betaCapSq1*betaCapSq2;
 
 //5th BB is 5th term of Eq. 38 (a) Eq. 21 (b) where i , k and k' =neighbors j
 
                       BB=BB+betaCapSum*angFactor3;
 
 //app1 is derivative of BB 5th term w.r.t. cos(theta_ijk)
 //app2 is derivative of BB 5th term w.r.t. cos(theta_jik')
 //agpdpr1 is derivative of BB 5th term for atom j w.r.t. Beta(r_jk)
 //agpdpr2 is derivative of BB 5th term for atom j w.r.t. Beta(r_ik')
 //agpdpr3 is derivative of BB 5th term for atom j w.r.t. dot(r_ik',r_ij)
 
                       app1=betaCapSum*angFactor1;
                       app2=betaCapSum*angFactor2;
                       agpdpr1=(.5*angFactor3*dbetaCapSq1*betaCapSq2
                           +betaCapSum*dAngR1)/r_jk;
                       agpdpr2=(.5*angFactor3*betaCapSq1*dbetaCapSq2
                           +betaCapSum*dAngR2)/r_ikp;
                       agpdpr3=4.0*betaCapSum*angFactor/(r_ikp*r_jk);
                       bt_pi[nb_ij].dBB[0]+=
                           +app2*dcA_jikp[0][0]
                           -app1*dcA_ijk[0][0];
                       bt_pi[nb_ij].dBB[1]+=
                           +app2*dcA_jikp[1][0]
                           -app1*dcA_ijk[1][0];
                       bt_pi[nb_ij].dBB[2]+=
                           +app2*dcA_jikp[2][0]
                           -app1*dcA_ijk[2][0];
                       bt_pi[nb_ikp].dBB[0]+=
                           agpdpr2*dis_ikp[0]
                           +agpdpr3*dis_jk[0]
                           +app2*dcA_jikp[0][1];
                       bt_pi[nb_ikp].dBB[1]+=
                           agpdpr2*dis_ikp[1]
                           +agpdpr3*dis_jk[1]
                           +app2*dcA_jikp[1][1];
                       bt_pi[nb_ikp].dBB[2]+=
                           agpdpr2*dis_ikp[2]
                           +agpdpr3*dis_jk[2]
                           +app2*dcA_jikp[2][1];
                       bt_pi[nb_jk].dBB[0]+=
                           agpdpr1*dis_jk[0]
                           +agpdpr3*dis_ikp[0]
                           +app1*dcA_ijk[0][1];
                       bt_pi[nb_jk].dBB[1]+=
                           agpdpr1*dis_jk[1]
                           +agpdpr3*dis_ikp[1]
                           +app1*dcA_ijk[1][1];
                       bt_pi[nb_jk].dBB[2]+=
                           agpdpr1*dis_jk[2]
                           +agpdpr3*dis_ikp[2]
                           +app1*dcA_ijk[2][1];
                     }
                   }
                 }
                 if(pi_flag==0)
                   nPiBk[n]=nPiBk[n]+1;
               }
             }
           }
           CC=betaP_ij*betaP_ij+pi_delta[iij]*pi_delta[iij];
           BBrt=sqrt(BB+small6);
           AB1=CC+pi_c[iij]*(AA+BBrt)+small7;
           AB2=CC+pi_c[iij]*(AA-BBrt+sqrt(small6))+small7;
           BBrtR=1.0/BBrt;
           ABrtR1=1.0/sqrt(AB1);
           ABrtR2=1.0/sqrt(AB2);
 
 // piB is similary formulation to (a) Eq. 36 and (b) Eq. 18
 
           piB[n]=(ABrtR1+ABrtR2)*pi_a[iij]*betaP_ij;
           dPiB1=-.5*(cube(ABrtR1)+cube(ABrtR2))*pi_c[iij]*pi_a[iij]*betaP_ij;
           dPiB2=.25*BBrtR*(cube(ABrtR2)-cube(ABrtR1))*pi_c[iij]*pi_a[iij]*betaP_ij;
           dPiB3=((ABrtR1+ABrtR2)*pi_a[iij]-(cube(ABrtR1)+cube(ABrtR2))*pi_a[iij]
               *betaP_ij*betaP_ij)*dBetaP_ij/r_ij;
           n++;
 
           pp2=2.0*betaP_ij;
           for(m=0;m<nb_t;m++) {
             bt_i=bt_pi[m].i;
             bt_j=bt_pi[m].j;
             xtmp[0]=x[bt_j][0]-x[bt_i][0];
             xtmp[1]=x[bt_j][1]-x[bt_i][1];
             xtmp[2]=x[bt_j][2]-x[bt_i][2];
             for(pp=0;pp<3;pp++) {
               bt_pi[m].dPiB[pp]=
                   +dPiB1*bt_pi[m].dAA[pp]
                   +dPiB2*bt_pi[m].dBB[pp];
               ftmp[pp]=pp2*bt_pi[m].dPiB[pp];
               f[bt_i][pp]-=ftmp[pp];
               f[bt_j][pp]+=ftmp[pp];
             }
             if(evflag) {
               ev_tally_xyz(bt_i,bt_j,nlocal,newton_pair,0.0,0.0,ftmp[0],ftmp[1]
                   ,ftmp[2],xtmp[0],xtmp[1],xtmp[2]);
             }
           }
           for(pp=0;pp<3;pp++) {
             ftmp[pp]=pp2*dPiB3*dis_ij[pp];
             f[i][pp]-=ftmp[pp];
             f[j][pp]+=ftmp[pp];
           }
           if(evflag) {
             ev_tally_xyz(i,j,nlocal,newton_pair,0.0,0.0,ftmp[0],ftmp[1]
                 ,ftmp[2],dis_ij[0],dis_ij[1],dis_ij[2]);
           }
         }
       }
     }
   }
   destroy_pi();
 }
 
 /* ----------------------------------------------------------------------
    read BOP potential file
 ------------------------------------------------------------------------- */
 
 void PairBOP::read_file(char *filename)
 {
   int i,j,k;
   int ij,ii,jj;
   int buf1;
   int n;
   double buf2;
   char s[MAXLINE];
   char buf[2];
 
   MPI_Comm_rank(world,&me);
 
   // read file on proc 0
 
   rcore=0.1;
 
   if (me == 0) {
     FILE *fp = force->open_potential(filename);
     if (fp == NULL) {
       char str[128];
       sprintf(str,"Cannot open BOP potential file %s",filename);
       error->one(FLERR,str);
     }
 
     // read parameters
 
     fgets(s,MAXLINE,fp);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%d",&bop_types);
     fclose(fp);
     npairs=bop_types*(bop_types+1)/2;
   }
 
   MPI_Bcast(&bop_types,1,MPI_INT,0,world);
   MPI_Bcast(&npairs,1,MPI_INT,0,world);
   allocate();
   memory->create(pi_a,npairs,"BOP:pi_a");
   memory->create(pro_delta,bop_types,"BOP:pro_delta");
   memory->create(pi_delta,npairs,"BOP:pi_delta");
   memory->create(pi_p,bop_types,"BOP:pi_p");
   memory->create(pi_c,npairs,"BOP:pi_c");
   memory->create(sigma_r0,npairs,"BOP:sigma_r0");
   memory->create(pi_r0,npairs,"BOP:pi_r0");
   memory->create(phi_r0,npairs,"BOP:phi_r0");
   memory->create(sigma_rc,npairs,"BOP:sigma_rc");
   memory->create(pi_rc,npairs,"BOP:pi_rc");
   memory->create(phi_rc,npairs,"BOP:phi_rc");
   memory->create(r1,npairs,"BOP:r1");
   memory->create(sigma_beta0,npairs,"BOP:sigma_beta0");
   memory->create(pi_beta0,npairs,"BOP:pi_beta0");
   memory->create(phi0,npairs,"BOP:phi0");
   memory->create(sigma_n,npairs,"BOP:sigma_n");
   memory->create(pi_n,npairs,"BOP:pi_n");
   memory->create(phi_m,npairs,"BOP:phi_m");
   memory->create(sigma_nc,npairs,"BOP:sigma_nc");
   memory->create(pi_nc,npairs,"BOP:pi_nc");
   memory->create(phi_nc,npairs,"BOP:phi_nc");
   memory->create(pro,bop_types,"BOP:pro");
   memory->create(sigma_delta,npairs,"BOP:sigma_delta");
   memory->create(sigma_c,npairs,"BOP:sigma_c");
   memory->create(sigma_a,npairs,"BOP:sigma_a");
   memory->create(sigma_g0,bop_types
       ,bop_types,bop_types,"BOP:sigma_g0");
   memory->create(sigma_g1,bop_types
       ,bop_types,bop_types,"BOP:sigma_g1");
   memory->create(sigma_g2,bop_types
       ,bop_types,bop_types,"BOP:sigma_g2");
   memory->create(sigma_g3,bop_types
       ,bop_types,bop_types,"BOP:sigma_g3");
   memory->create(sigma_g4,bop_types
       ,bop_types,bop_types,"BOP:sigma_g4");
   memory->create(sigma_f,npairs,"BOP:sigma_f");
   memory->create(sigma_k,npairs,"BOP:sigma_k");
   memory->create(small3,npairs,"BOP:small3");
 
   if (me == 0) {
     words = new char*[bop_types];
     for(i=0;i<bop_types;i++) words[i]=NULL;
     FILE *fp = force->open_potential(filename);
     if (fp == NULL) {
       char str[128];
       sprintf(str,"Cannot open BOP potential file %s",filename);
       error->one(FLERR,str);
     }
     fgets(s,MAXLINE,fp);
     fgets(s,MAXLINE,fp);
     for(i=0;i<bop_types;i++) {
       fgets(s,MAXLINE,fp);
       sscanf(s,"%d %lf %s",&buf1,&buf2,buf);
       n= strlen(buf)+1;
       words[i] = new char[n];
       strcpy(words[i],buf);
     }
     fgets(s,MAXLINE,fp);
     fgets(s,MAXLINE,fp);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lf%lf%lf%lf%lf%lf%lf",&small1,&small2,&small3g,&small4
         ,&small5,&small6,&small7);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%d%lf%lf",&ncutoff,&rbig,&rsmall);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lf%lf%d",&which,&alpha,&nfunc);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lf%lf%lf",&alpha1,&beta1,&gamma1);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lf%lf",&alpha2,&beta2);
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lf%lf",&alpha3,&beta3);
     fgets(s,MAXLINE,fp);
     fgets(s,MAXLINE,fp);
     for(i=0;i<bop_types;i++) {
         fgets(s,MAXLINE,fp);
         sscanf(s,"%lf%lf%lf",&pro[i],&pro_delta[i],&pi_p[i]);
     }
     fgets(s,MAXLINE,fp);
     fgets(s,MAXLINE,fp);
     cutmax=0;
 
     for(i=0;i<bop_types;i++) {
       ii=i+1;
       for(j=i;j<bop_types;j++) {
         jj=j+1;
         if(ii==jj)
           ij=ii-1;
         else if(ii<jj)
           ij=ii*bop_types-ii*(ii+1)/2+jj-1;
         else
           ij=jj*bop_types-jj*(jj+1)/2+ii-1;
         fgets(s,MAXLINE,fp);
         sscanf(s,"%lf%lf%lf%lf",&sigma_r0[ij],&sigma_rc[ij],&r1[ij],&rcut[ij]);
         if(rcut[ij]>cutmax)
           cutmax=rcut[ij];
         pi_r0[ij]=sigma_r0[ij];
         phi_r0[ij]=sigma_r0[ij];
         pi_rc[ij]=sigma_rc[ij];
         phi_rc[ij]=sigma_rc[ij];
         fgets(s,MAXLINE,fp);
         sscanf(s,"%lf%lf%lf",&phi_m[ij],&sigma_n[ij],&sigma_nc[ij]);
         pi_n[ij]=sigma_n[ij];
         pi_nc[ij]=sigma_nc[ij];
         phi_nc[ij]=sigma_nc[ij];
         fgets(s,MAXLINE,fp);
         sscanf(s,"%lf%lf%lf",&phi0[ij],&sigma_beta0[ij],&pi_beta0[ij]);
         fgets(s,MAXLINE,fp);
         sscanf(s,"%lf%lf%lf",&sigma_a[ij],&sigma_c[ij],&sigma_delta[ij]);
         fgets(s,MAXLINE,fp);
         sscanf(s,"%lf%lf%lf",&pi_a[ij],&pi_c[ij],&pi_delta[ij]);
         fgets(s,MAXLINE,fp);
         sscanf(s,"%lf%lf%lf",&sigma_f[ij],&sigma_k[ij],&small3[ij]);
       }
     }
     fgets(s,MAXLINE,fp);
     fgets(s,MAXLINE,fp);
     for(i=0;i<bop_types;i++) {
       for(j=0;j<bop_types;j++) {
         for(k=j;k<bop_types;k++) {
           fgets(s,MAXLINE,fp);
           sscanf(s,"%lf%lf%lf",&sigma_g0[j][i][k],&sigma_g1[j][i][k]
               ,&sigma_g2[j][i][k]);
           sigma_g0[k][i][j]=sigma_g0[j][i][k];
           sigma_g1[k][i][j]=sigma_g1[j][i][k];
           sigma_g2[k][i][j]=sigma_g2[j][i][k];
         }
       }
     }
     for(i=0;i<npairs;i++) {
       dr[i]=rcut[i]/(nr-1.0);
       rdr[i]=1.0/dr[i];
     }
     fclose(fp);
   }
   MPI_Bcast(&small1,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&small2,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&small3g,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&small4,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&small5,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&small6,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&small7,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&ncutoff,1,MPI_INT,0,world);
   MPI_Bcast(&rbig,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&rsmall,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&which,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&alpha,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&nfunc,1,MPI_INT,0,world);
   MPI_Bcast(&alpha1,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&beta1,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&gamma1,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&alpha2,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&beta2,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&alpha3,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&beta3,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&pro[0],bop_types,MPI_DOUBLE,0,world);
   MPI_Bcast(&pro_delta[0],bop_types,MPI_DOUBLE,0,world);
   MPI_Bcast(&pi_p[0],bop_types,MPI_DOUBLE,0,world);
   MPI_Bcast(&sigma_r0[0],npairs,MPI_DOUBLE,0,world);
   MPI_Bcast(&sigma_rc[0],npairs,MPI_DOUBLE,0,world);
   MPI_Bcast(&r1[0],npairs,MPI_DOUBLE,0,world);
   MPI_Bcast(&rcut[0],npairs,MPI_DOUBLE,0,world);
   MPI_Bcast(&cutmax,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&pi_r0[0],npairs,MPI_DOUBLE,0,world);
   MPI_Bcast(&phi_r0[0],npairs,MPI_DOUBLE,0,world);
   MPI_Bcast(&pi_rc[0],npairs,MPI_DOUBLE,0,world);
   MPI_Bcast(&phi_rc[0],npairs,MPI_DOUBLE,0,world);
   MPI_Bcast(&phi_m[0],npairs,MPI_DOUBLE,0,world);
   MPI_Bcast(&sigma_n[0],npairs,MPI_DOUBLE,0,world);
   MPI_Bcast(&sigma_nc[0],npairs,MPI_DOUBLE,0,world);
   MPI_Bcast(&pi_n[0],npairs,MPI_DOUBLE,0,world);
   MPI_Bcast(&pi_nc[0],npairs,MPI_DOUBLE,0,world);
   MPI_Bcast(&phi_nc[0],npairs,MPI_DOUBLE,0,world);
   MPI_Bcast(&phi0[0],npairs,MPI_DOUBLE,0,world);
   MPI_Bcast(&sigma_beta0[0],npairs,MPI_DOUBLE,0,world);
   MPI_Bcast(&pi_beta0[0],npairs,MPI_DOUBLE,0,world);
   MPI_Bcast(&sigma_a[0],npairs,MPI_DOUBLE,0,world);
   MPI_Bcast(&sigma_c[0],npairs,MPI_DOUBLE,0,world);
   MPI_Bcast(&sigma_delta[0],npairs,MPI_DOUBLE,0,world);
   MPI_Bcast(&pi_a[0],npairs,MPI_DOUBLE,0,world);
   MPI_Bcast(&pi_c[0],npairs,MPI_DOUBLE,0,world);
   MPI_Bcast(&pi_delta[0],npairs,MPI_DOUBLE,0,world);
   MPI_Bcast(&sigma_f[0],npairs,MPI_DOUBLE,0,world);
   MPI_Bcast(&sigma_k[0],npairs,MPI_DOUBLE,0,world);
   MPI_Bcast(&small3[0],npairs,MPI_DOUBLE,0,world);
   MPI_Bcast(&sigma_g0[0][0][0],bop_types*bop_types*bop_types,MPI_DOUBLE,0,world);
   MPI_Bcast(&sigma_g1[0][0][0],bop_types*bop_types*bop_types,MPI_DOUBLE,0,world);
   MPI_Bcast(&sigma_g2[0][0][0],bop_types*bop_types*bop_types,MPI_DOUBLE,0,world);
   MPI_Bcast(&sigma_g3[0][0][0],bop_types*bop_types*bop_types,MPI_DOUBLE,0,world);
   MPI_Bcast(&sigma_g4[0][0][0],bop_types*bop_types*bop_types,MPI_DOUBLE,0,world);
   MPI_Bcast(&dr[0],npairs,MPI_DOUBLE,0,world);
   MPI_Bcast(&rdr[0],npairs,MPI_DOUBLE,0,world);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBOP::read_table(char *filename)
 {
   int i,j,k,n;
   int buf1;
   double buf2;
   char s[MAXLINE],buf[2];
 
   MPI_Comm_rank(world,&me);
 
   if (me == 0) {
     FILE *fp = force->open_potential(filename);
     if (fp == NULL) {
       char str[128];
       sprintf(str,"Cannot open BOP potential file %s",filename);
       error->one(FLERR,str);
     }
     fgets(s,MAXLINE,fp);  // skip first comment line
     fgets(s,MAXLINE,fp);
     sscanf(s,"%d",&bop_types);
     words = new char*[bop_types];
     for(i=0;i<bop_types;i++) words[i]=NULL;
     for(i=0;i<bop_types;i++) {
       fgets(s,MAXLINE,fp);
       sscanf(s,"%d %lf %s",&buf1,&buf2,buf);
       n= strlen(buf)+1;
       words[i] = new char[n];
       strcpy(words[i],buf);
     }
     fgets(s,MAXLINE,fp);
     sscanf(s,"%d %d",&nr,&nBOt);
     fclose(fp);
     npairs=bop_types*(bop_types+1)/2;
   }
 
   MPI_Bcast(&nr,1,MPI_INT,0,world);
   MPI_Bcast(&nBOt,1,MPI_INT,0,world);
   MPI_Bcast(&bop_types,1,MPI_INT,0,world);
   MPI_Bcast(&npairs,1,MPI_INT,0,world);
   memory->create(pi_a,npairs,"BOP:pi_a");
   memory->create(pro_delta,bop_types,"BOP:pro_delta");
   memory->create(pi_delta,npairs,"BOP:pi_delta");
   memory->create(pi_p,bop_types,"BOP:pi_p");
   memory->create(pi_c,npairs,"BOP:pi_c");
   memory->create(r1,npairs,"BOP:r1");
   memory->create(pro,bop_types,"BOP:pro");
   memory->create(sigma_delta,npairs,"BOP:sigma_delta");
   memory->create(sigma_c,npairs,"BOP:sigma_c");
   memory->create(sigma_a,npairs,"BOP:sigma_a");
   memory->create(sigma_g0,bop_types
       ,bop_types,bop_types,"BOP:sigma_g0");
   memory->create(sigma_g1,bop_types
       ,bop_types,bop_types,"BOP:sigma_g1");
   memory->create(sigma_g2,bop_types
       ,bop_types,bop_types,"BOP:sigma_g2");
   memory->create(sigma_f,npairs,"BOP:sigma_f");
   memory->create(sigma_k,npairs,"BOP:sigma_k");
   memory->create(small3,npairs,"BOP:small3");
   allocate();
 
   if (me == 0) {
     FILE *fp = force->open_potential(filename);
     if (fp == NULL) {
       char str[128];
       sprintf(str,"Cannot open BOP potential file %s",filename);
       error->one(FLERR,str);
     }
     for(i=0;i<bop_types+2;i++) {
       fgets(s,MAXLINE,fp);
     }
     fgets(s,MAXLINE,fp);
     sscanf(s,"%lf%lf%lf%lf%lf%lf%lf",&small1,&small2,&small3g
         ,&small4,&small5,&small6,&small7);
     for(i=0;i<bop_types;i++) {
       fgets(s,MAXLINE,fp);
       sscanf(s,"%lf",&pi_p[i]);
     }
     cutmax=0;
     for(i=0;i<npairs;i++) {
       fgets(s,MAXLINE,fp);
       sscanf(s,"%lf",&rcut[i]);
       if(rcut[i]>cutmax)
         cutmax=rcut[i];
       fgets(s,MAXLINE,fp);
       sscanf(s,"%lf%lf%lf%lf",&sigma_c[i],&sigma_a[i],&pi_c[i],&pi_a[i]);
       fgets(s,MAXLINE,fp);
       sscanf(s,"%lf%lf",&sigma_delta[i],&pi_delta[i]);
       fgets(s,MAXLINE,fp);
       sscanf(s,"%lf%lf%lf",&sigma_f[i],&sigma_k[i],&small3[i]);
     }
     for(i=0;i<bop_types;i++)
       for(j=0;j<bop_types;j++)
         for(k=0;k<bop_types;k++) {
           fgets(s,MAXLINE,fp);
           sscanf(s,"%lf%lf%lf",&sigma_g0[i][j][k],&sigma_g1[i][j][k],&sigma_g2[i][j][k]);
         }
     for(i=0;i<npairs;i++) {
       for(j=0;j<nr;j++) {
         fgets(s,MAXLINE,fp);
         sscanf(s,"%lf%lf%lf%lf%lf",&pRepul[i][j],&pRepul[i][j+1]
             ,&pRepul[i][j+2],&pRepul[i][j+3],&pRepul[i][j+4]);
         j+=4;
       }
     }
     for(i=0;i<npairs;i++) {
       for(j=0;j<nr;j++) {
         fgets(s,MAXLINE,fp);
         sscanf(s,"%lf%lf%lf%lf%lf",&pBetaS[i][j],&pBetaS[i][j+1]
             ,&pBetaS[i][j+2],&pBetaS[i][j+3],&pBetaS[i][j+4]);
         j+=4;
       }
     }
     for(i=0;i<npairs;i++) {
       for(j=0;j<nr;j++) {
         fgets(s,MAXLINE,fp);
         sscanf(s,"%lf%lf%lf%lf%lf",&pBetaP[i][j],&pBetaP[i][j+1]
             ,&pBetaP[i][j+2],&pBetaP[i][j+3],&pBetaP[i][j+4]);
         j+=4;
       }
     }
     for(i=0;i<npairs;i++) {
       for(j=0;j<nBOt;j++) {
         fgets(s,MAXLINE,fp);
         sscanf(s,"%lf%lf%lf%lf%lf",&FsigBO[i][j],&FsigBO[i][j+1]
             ,&FsigBO[i][j+2],&FsigBO[i][j+3],&FsigBO[i][j+4]);
         j+=4;
       }
     }
     for(i=0;i<bop_types;i++) {
       fgets(s,MAXLINE,fp);
       sscanf(s,"%lf",&pro_delta[i]);
     }
     for(i=0;i<bop_types;i++) {
       fgets(s,MAXLINE,fp);
       sscanf(s,"%lf",&pro[i]);
     }
     for(i=0;i<npairs;i++) {
       dr[i]=rcut[i]/((double)nr-1.0);
       rdr[i]=1.0/dr[i];
     }
     dBO=1.0/((double)nBOt-1.0);
     rdBO=1.0/(double)dBO;
 
     for(i=0;i<npairs;i++) {
       pBetaS1[i][0]=pBetaS[i][1]-pBetaS[i][0];
       pBetaS1[i][1]=0.5*(pBetaS[i][2]-pBetaS[i][0]);
       pBetaS1[i][nr-2]=0.5*(pBetaS[i][nr-1]-pBetaS[i][nr-3]);
       pBetaS1[i][nr-1]=pBetaS[i][nr-1]-pBetaS[i][nr-2];
       pBetaP1[i][0]=pBetaP[i][1]-pBetaP[i][0];
       pBetaP1[i][1]=0.5*(pBetaP[i][2]-pBetaP[i][0]);
       pBetaP1[i][nr-2]=0.5*(pBetaP[i][nr-1]-pBetaP[i][nr-3]);
       pBetaP1[i][nr-1]=pBetaP[i][nr-1]-pBetaP[i][nr-2];
       pRepul1[i][0]=pRepul[i][1]-pRepul[i][0];
       pRepul1[i][1]=0.5*(pRepul[i][2]-pRepul[i][0]);
       pRepul1[i][nr-2]=0.5*(pRepul[i][nr-1]-pRepul[i][nr-3]);
       pRepul1[i][nr-1]=pRepul[i][nr-1]-pRepul[i][nr-2];
       FsigBO1[i][0]=FsigBO[i][1]-FsigBO[i][0];
       FsigBO1[i][1]=0.5*(FsigBO[i][2]-FsigBO[i][0]);
       FsigBO1[i][nBOt-2]=0.5*(FsigBO[i][nBOt-1]-FsigBO[i][nBOt-3]);
       FsigBO1[i][nBOt-1]=FsigBO[i][nBOt-1]-FsigBO[i][nBOt-2];
       for(k=2;k<nr-2;k++) {
         pBetaS1[i][k]=((pBetaS[i][k-2]-pBetaS[i][k+2])
             +8.0*(pBetaS[i][k+1]-pBetaS[i][k-1]))/12.0;
         pBetaP1[i][k]=((pBetaP[i][k-2]-pBetaP[i][k+2])
             +8.0*(pBetaP[i][k+1]-pBetaP[i][k-1]))/12.0;
         pRepul1[i][k]=((pRepul[i][k-2]-pRepul[i][k+2])
             +8.0*(pRepul[i][k+1]-pRepul[i][k-1]))/12.0;
       }
       for(k=2;k<nr-2;k++) {
         FsigBO1[i][k]=((FsigBO[i][k-2]-FsigBO[i][k+2])
             +8.0*(FsigBO[i][k+1]-FsigBO[i][k-1]))/12.0;
       }
       for(k=0;k<nr-1;k++) {
         pBetaS2[i][k]=3.0*(pBetaS[i][k+1]-pBetaS[i][k])
             -2.0*pBetaS1[i][k]-pBetaS1[i][k+1];
         pBetaS3[i][k]=pBetaS1[i][k]+pBetaS1[i][k+1]
             -2.0*(pBetaS[i][k+1]-pBetaS[i][k]);
         pBetaP2[i][k]=3.0*(pBetaP[i][k+1]-pBetaP[i][k])
             -2.0*pBetaP1[i][k]-pBetaP1[i][k+1];
         pBetaP3[i][k]=pBetaP1[i][k]+pBetaP1[i][k+1]
             -2.0*(pBetaP[i][k+1]-pBetaP[i][k]);
         pRepul2[i][k]=3.0*(pRepul[i][k+1]-pRepul[i][k])
             -2.0*pRepul1[i][k]-pRepul1[i][k+1];
         pRepul3[i][k]=pRepul1[i][k]+pRepul1[i][k+1]
             -2.0*(pRepul[i][k+1]-pRepul[i][k]);
       }
       for(k=0;k<nBOt-1;k++) {
         FsigBO2[i][k]=3.0*(FsigBO[i][k+1]-FsigBO[i][k])
             -2.0*FsigBO1[i][k]-FsigBO1[i][k+1];
         FsigBO3[i][k]=FsigBO1[i][k]+FsigBO1[i][k+1]
             -2.0*(FsigBO[i][k+1]-FsigBO[i][k]);
       }
       pBetaS2[i][nr-1]=0.0;
       pBetaS3[i][nr-1]=0.0;
       pBetaP2[i][nr-1]=0.0;
       pBetaP3[i][nr-1]=0.0;
       pRepul2[i][nr-1]=0.0;
       pRepul3[i][nr-1]=0.0;
       FsigBO2[i][nBOt-1]=0.0;
       FsigBO3[i][nBOt-1]=0.0;
       for(k=0;k<nr;k++) {
         pBetaS4[i][k]=pBetaS1[i][k]/dr[i];
         pBetaS5[i][k]=2.0*pBetaS2[i][k]/dr[i];
         pBetaS6[i][k]=3.0*pBetaS3[i][k]/dr[i];
         pBetaP4[i][k]=pBetaP1[i][k]/dr[i];
         pBetaP5[i][k]=2.0*pBetaP2[i][k]/dr[i];
         pBetaP6[i][k]=3.0*pBetaP3[i][k]/dr[i];
         pRepul4[i][k]=pRepul1[i][k]/dr[i];
         pRepul5[i][k]=2.0*pRepul2[i][k]/dr[i];
         pRepul6[i][k]=3.0*pRepul3[i][k]/dr[i];
       }
       for(k=0;k<nBOt;k++) {
         FsigBO4[i][k]=FsigBO1[i][k]/dBO;
         FsigBO5[i][k]=2.0*FsigBO2[i][k]/dBO;
         FsigBO6[i][k]=3.0*FsigBO3[i][k]/dBO;
       }
     }
     fclose(fp);
   }
   MPI_Bcast(&rdBO,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&dBO,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&bop_types,1,MPI_INT,0,world);
   MPI_Bcast(&small1,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&small2,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&small3g,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&small4,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&small5,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&small6,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&small7,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&pro[0],bop_types,MPI_DOUBLE,0,world);
   MPI_Bcast(&pro_delta[0],bop_types,MPI_DOUBLE,0,world);
   MPI_Bcast(&pi_p[0],bop_types,MPI_DOUBLE,0,world);
   MPI_Bcast(&r1[0],npairs,MPI_DOUBLE,0,world);
   MPI_Bcast(&rcut[0],npairs,MPI_DOUBLE,0,world);
   MPI_Bcast(&cutmax,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&sigma_a[0],npairs,MPI_DOUBLE,0,world);
   MPI_Bcast(&sigma_c[0],npairs,MPI_DOUBLE,0,world);
   MPI_Bcast(&sigma_delta[0],npairs,MPI_DOUBLE,0,world);
   MPI_Bcast(&pi_a[0],npairs,MPI_DOUBLE,0,world);
   MPI_Bcast(&pi_c[0],npairs,MPI_DOUBLE,0,world);
   MPI_Bcast(&pi_delta[0],npairs,MPI_DOUBLE,0,world);
   MPI_Bcast(&sigma_f[0],npairs,MPI_DOUBLE,0,world);
   MPI_Bcast(&sigma_k[0],npairs,MPI_DOUBLE,0,world);
   MPI_Bcast(&small3[0],npairs,MPI_DOUBLE,0,world);
   MPI_Bcast(&sigma_g0[0][0][0],bop_types*bop_types*bop_types,MPI_DOUBLE,0,world);
   MPI_Bcast(&sigma_g1[0][0][0],bop_types*bop_types*bop_types,MPI_DOUBLE,0,world);
   MPI_Bcast(&sigma_g2[0][0][0],bop_types*bop_types*bop_types,MPI_DOUBLE,0,world);
   MPI_Bcast(&dr[0],npairs,MPI_DOUBLE,0,world);
   MPI_Bcast(&rdr[0],npairs,MPI_DOUBLE,0,world);
   MPI_Bcast(&pBetaS[0][0],npairs*nr,MPI_DOUBLE,0,world);
   MPI_Bcast(&pBetaS1[0][0],npairs*nr,MPI_DOUBLE,0,world);
   MPI_Bcast(&pBetaS2[0][0],npairs*nr,MPI_DOUBLE,0,world);
   MPI_Bcast(&pBetaS3[0][0],npairs*nr,MPI_DOUBLE,0,world);
   MPI_Bcast(&pBetaS4[0][0],npairs*nr,MPI_DOUBLE,0,world);
   MPI_Bcast(&pBetaS5[0][0],npairs*nr,MPI_DOUBLE,0,world);
   MPI_Bcast(&pBetaS6[0][0],npairs*nr,MPI_DOUBLE,0,world);
   MPI_Bcast(&pBetaP[0][0],npairs*nr,MPI_DOUBLE,0,world);
   MPI_Bcast(&pBetaP1[0][0],npairs*nr,MPI_DOUBLE,0,world);
   MPI_Bcast(&pBetaP2[0][0],npairs*nr,MPI_DOUBLE,0,world);
   MPI_Bcast(&pBetaP3[0][0],npairs*nr,MPI_DOUBLE,0,world);
   MPI_Bcast(&pBetaP4[0][0],npairs*nr,MPI_DOUBLE,0,world);
   MPI_Bcast(&pBetaP5[0][0],npairs*nr,MPI_DOUBLE,0,world);
   MPI_Bcast(&pBetaP6[0][0],npairs*nr,MPI_DOUBLE,0,world);
   MPI_Bcast(&pRepul[0][0],npairs*nr,MPI_DOUBLE,0,world);
   MPI_Bcast(&pRepul1[0][0],npairs*nr,MPI_DOUBLE,0,world);
   MPI_Bcast(&pRepul2[0][0],npairs*nr,MPI_DOUBLE,0,world);
   MPI_Bcast(&pRepul3[0][0],npairs*nr,MPI_DOUBLE,0,world);
   MPI_Bcast(&pRepul4[0][0],npairs*nr,MPI_DOUBLE,0,world);
   MPI_Bcast(&pRepul5[0][0],npairs*nr,MPI_DOUBLE,0,world);
   MPI_Bcast(&pRepul6[0][0],npairs*nr,MPI_DOUBLE,0,world);
   MPI_Bcast(&FsigBO[0][0],npairs*nBOt,MPI_DOUBLE,0,world);
   MPI_Bcast(&FsigBO1[0][0],npairs*nBOt,MPI_DOUBLE,0,world);
   MPI_Bcast(&FsigBO2[0][0],npairs*nBOt,MPI_DOUBLE,0,world);
   MPI_Bcast(&FsigBO3[0][0],npairs*nBOt,MPI_DOUBLE,0,world);
   MPI_Bcast(&FsigBO4[0][0],npairs*nBOt,MPI_DOUBLE,0,world);
   MPI_Bcast(&FsigBO5[0][0],npairs*nBOt,MPI_DOUBLE,0,world);
   MPI_Bcast(&FsigBO6[0][0],npairs*nBOt,MPI_DOUBLE,0,world);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBOP::setPbetaS()
 {
   int i,j,k;
   double r,value,dvalue;
 
   for(i=0;i<npairs;i++) {
     for(j=0;j<nr;j++) {
       r=(double)j*dr[i];
       if(r<rcore)
         r=rcore;
       if(ncutoff==3) {
         if(r>=rcut[i])
           pBetaS[i][j]=0.0;
         else if(r<=r1[i]) {
           value=betaSfunc(i,r);
           dvalue=dBetaSfunc(i,r,value,1.0);
           pBetaS[i][j]=value;
         }
         else {
           value=betaSfunc(i,r1[i]);
           dvalue=dBetaSfunc(i,r1[i],value,1.0);
           pBetaS[i][j]=-(r-rcut[i])*(r-rcut[i])*(value*(2.0*r-3.0*r1[i]+rcut[i])
               -dvalue*(r-r1[i])*(r1[i]-rcut[i]))/((r1[i]-rcut[i])
               *(r1[i]-rcut[i])*(r1[i]-rcut[i]));
         }
       }
       else {
         if(r>=rcut[i])
           pBetaS[i][j]=0.0;
         else {
           value=betaSfunc(i,r);
           dvalue=dBetaSfunc(i,r,value,0.0);
           pBetaS[i][j]=value*cutoff(r1[i],rcut[i],ncutoff,r);
         }
       }
     }
     pBetaS[i][nr-1]=0.0;
     pBetaS1[i][0]=pBetaS[i][1]-pBetaS[i][0];
     pBetaS1[i][1]=0.5*(pBetaS[i][2]-pBetaS[i][0]);
     pBetaS1[i][nr-2]=0.5*(pBetaS[i][nr-1]-pBetaS[i][nr-3]);
     pBetaS1[i][nr-1]=pBetaS[i][nr-1]-pBetaS[i][nr-2];
 
     for(k=2;k<nr-2;k++) {
       pBetaS1[i][k]=((pBetaS[i][k-2]-pBetaS[i][k+2])+8.0*(pBetaS[i][k+1]
           -pBetaS[i][k-1]))/12.0;
     }
     for(k=0;k<nr-1;k++) {
       pBetaS2[i][k]=3.0*(pBetaS[i][k+1]-pBetaS[i][k])-2.0*pBetaS1[i][k]-pBetaS1[i][k+1];
       pBetaS3[i][k]=pBetaS1[i][k]+pBetaS1[i][k+1]-2.0*(pBetaS[i][k+1]-pBetaS[i][k]);
     }
     pBetaS2[i][nr-1]=0.0;
     pBetaS3[i][nr-1]=0.0;
     for(k=0;k<nr;k++) {
       pBetaS4[i][k]=pBetaS1[i][k]/dr[i];
       pBetaS5[i][k]=2.0*pBetaS2[i][k]/dr[i];
       pBetaS6[i][k]=3.0*pBetaS3[i][k]/dr[i];
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBOP::setPbetaP()
 {
   int i,j,k;
   double r,value,dvalue;
 
   for(i=0;i<npairs;i++) {
     for(j=0;j<nr;j++) {
       r=(double)j*dr[i];
       if(r<rcore)
         r=rcore;
       if(ncutoff==3) {
         if(r>=rcut[i])
           pBetaP[i][j]=0.0;
         else if(r<=r1[i]) {
           value=betaPfunc(i,r);
           dvalue=dBetaPfunc(i,r,value,0.0);
           pBetaP[i][j]=value;
         }
         else {
           value=betaPfunc(i,r1[i]);
           dvalue=dBetaPfunc(i,r1[i],value,1.0);
           pBetaP[i][j]=-(r-rcut[i])*(r-rcut[i])*(value*(2.0*r-3.0*r1[i]
               +rcut[i])-dvalue*(r-r1[1])*(r1[i]-rcut[i]))/((r1[i]-rcut[i])
               *(r1[i]-rcut[i])*(r1[i]-rcut[i]));
         }
       }
       else {
         if(r>=rcut[i])
           pBetaP[i][j]=0.0;
         else {
           value=betaPfunc(i,r);
           dvalue=dBetaPfunc(i,r,value,0.0);
           pBetaP[i][j]=value*cutoff(r1[i],rcut[i],ncutoff,r);
         }
       }
     }
     pBetaP[i][nr-1]=0.0;
     pBetaP1[i][0]=pBetaP[i][1]-pBetaP[i][0];
     pBetaP1[i][1]=0.5*(pBetaP[i][2]-pBetaP[i][0]);
     pBetaP1[i][nr-2]=0.5*(pBetaP[i][nr-1]-pBetaP[i][nr-3]);
     pBetaP1[i][nr-1]=pBetaP[i][nr-1]-pBetaP[i][nr-2];
     for(k=2;k<nr-2;k++)
       pBetaP1[i][k]=((pBetaP[i][k-2]-pBetaP[i][k+2])+8.0*(pBetaP[i][k+1]
           -pBetaP[i][k-1]))/12.0;
     for(k=0;k<nr-1;k++) {
       pBetaP2[i][k]=3.0*(pBetaP[i][k+1]-pBetaP[i][k])-2.0*pBetaP1[i][k]-pBetaP1[i][k+1];
       pBetaP3[i][k]=pBetaP1[i][k]+pBetaP1[i][k+1]-2.0*(pBetaP[i][k+1]-pBetaP[i][k]);
     }
     pBetaP2[i][nr-1]=0.0;
     pBetaP3[i][nr-1]=0.0;
     for(k=0;k<nr;k++) {
       pBetaP4[i][k]=pBetaP1[i][k]/dr[i];
       pBetaP5[i][k]=2.0*pBetaP2[i][k]/dr[i];
       pBetaP6[i][k]=3.0*pBetaP3[i][k]/dr[i];
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBOP::setPrepul()
 {
   int i,j,k;
   double r,value,dvalue;
 
   for(i=0;i<npairs;i++) {
     for(j=0;j<nr;j++) {
       r=(double)j*dr[i];
       if(r<rcore)
         r=rcore;
       if(ncutoff==3) {
         if(r>=rcut[i])
           pRepul[i][j]=0.0;
         else if(r<=r1[i]) {
           value=repulfunc(i,r);
           dvalue=dRepulfunc(i,r,value,0.0);
           pRepul[i][j]=value;
         }
         else {
           value=repulfunc(i,r1[i]);
           dvalue=dRepulfunc(i,r1[i],value,1.0);
           pRepul[i][j]=-(r-rcut[i])*(r-rcut[i])*(value*(2.0*r-3.0*r1[i]+rcut[i])
               -dvalue*(r-r1[i])*(r1[i]-rcut[i]))/((r1[i]-rcut[i])
               *(r1[i]-rcut[i])*(r1[i]-rcut[i]));
         }
       }
       else {
         if(r>=rcut[i])
           pRepul[i][j]=0.0;
         else {
           value=repulfunc(i,r);
           dvalue=dRepulfunc(i,r,value,0.0);
           pRepul[i][j]=value*cutoff(r1[i],rcut[i],ncutoff,r);
         }
       }
     }
     pRepul[i][nr-1]=0.0;
     pRepul1[i][0]=pRepul[i][1]-pRepul[i][0];
     pRepul1[i][1]=0.5*(pRepul[i][2]-pRepul[i][0]);
     pRepul1[i][nr-2]=0.5*(pRepul[i][nr-1]-pRepul[i][nr-3]);
     pRepul1[i][nr-1]=pRepul[i][nr-1]-pRepul[i][nr-2];
     for(k=2;k<nr-2;k++)
       pRepul1[i][k]=((pRepul[i][k-2]-pRepul[i][k+2])+8.0*(pRepul[i][k+1]
           -pRepul[i][k-1]))/12.0;
     for(k=0;k<nr-1;k++) {
       pRepul2[i][k]=3.0*(pRepul[i][k+1]-pRepul[i][k])-2.0*pRepul1[i][k]-pRepul1[i][k+1];
       pRepul3[i][k]=pRepul1[i][k]+pRepul1[i][k+1]-2.0*(pRepul[i][k+1]-pRepul[i][k]);
     }
     pRepul2[i][nr-1]=0.0;
     pRepul3[i][nr-1]=0.0;
     for(k=0;k<nr;k++) {
       pRepul4[i][k]=pRepul1[i][k]/dr[i];
       pRepul5[i][k]=2.0*pRepul2[i][k]/dr[i];
       pRepul6[i][k]=3.0*pRepul3[i][k]/dr[i];
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairBOP::betaSfunc(int i,double r)
 {
   double temp_value;
 
   if(nfunc==1) {
     temp_value=pow(sigma_r0[i]/r,sigma_n[i])*exp(sigma_n[i]*pow(sigma_r0[i]
         /sigma_rc[i],sigma_nc[i])-sigma_n[i]*pow(r/sigma_rc[i],sigma_nc[i]));
     temp_value=sigma_beta0[i]*temp_value;
   }
   if(nfunc==2)
     temp_value=sigma_beta0[i]*exp(-sigma_n[i]*r);
   if(nfunc==3)
     temp_value=sigma_beta0[i]/pow(r,sigma_n[i]);
   return(temp_value);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairBOP::dBetaSfunc(int i,double r,double value,double dmore)
 {
   double temp_dvalue;
 
   if(nfunc==1)
     if(dmore==1.0)
       temp_dvalue=-sigma_n[i]*value/r*(1.0+sigma_nc[i]
           *pow(r/sigma_rc[i],sigma_nc[i]));
   if(nfunc==2)
     if(dmore==1.0)
       temp_dvalue=-sigma_n[i]*value;
   if(nfunc==3)
     if(dmore==1.0)
       temp_dvalue=-sigma_n[i]*value/r;
   return(temp_dvalue);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairBOP::betaPfunc(int i,double r)
 {
   double temp_value;
 
   if(nfunc==1) {
     temp_value=pow(pi_r0[i]/r,pi_n[i])*exp(pi_n[i]*pow(pi_r0[i]
         /pi_rc[i],pi_nc[i])-pi_n[i]*pow(r/pi_rc[i],pi_nc[i]));
         temp_value=pi_beta0[i]*temp_value;
   }
   if(nfunc==2)
     temp_value=pi_beta0[i]*exp(-pi_n[i]*r);
   if(nfunc==3)
     temp_value=pi_beta0[i]/pow(r,pi_n[i]);
   return(temp_value);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairBOP::dBetaPfunc(int i,double r,double value,double dmore)
 {
   double temp_dvalue;
 
   if(nfunc==1)
     if(dmore==1.0)
       temp_dvalue=-pi_n[i]*value/r*(1.0+pi_nc[i]*pow(r/pi_rc[i],pi_nc[i]));
   if(nfunc==2)
     if(dmore==1.0)
       temp_dvalue=-pi_n[i]*value;
   if(nfunc==3)
     if(dmore==1.0)
       temp_dvalue=-pi_n[i]*value/r;
   return(temp_dvalue);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairBOP::repulfunc(int i,double r)
 {
   double temp_value;
 
   if(nfunc==1) {
     temp_value=pow(phi_r0[i]/r,phi_m[i])*exp(phi_m[i]*pow(phi_r0[i]/phi_rc[i]
         ,phi_nc[i])-phi_m[i]*pow(r/phi_rc[i],phi_nc[i]));
     temp_value=phi0[i]*temp_value;
   }
   if(nfunc==2)
     temp_value=phi0[i]*exp(-phi_m[i]*r);
   if(nfunc==3)
     temp_value=phi0[i]/pow(r,phi_m[i]);
   return(temp_value);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairBOP::dRepulfunc(int i,double r,double value,double dmore)
 {
   double temp_dvalue;
 
   if(nfunc==1)
     if(dmore==1.0)
       temp_dvalue=-phi_m[i]*value/r*(1.0+phi_nc[i]*pow(r/phi_rc[i],phi_nc[i]));
   if(nfunc==2)
     if(dmore==1.0)
       temp_dvalue=-phi_m[i]*value;
   if(nfunc==3)
     if(dmore==1.0)
       temp_dvalue=-phi_m[i]*value/r;
   return(temp_dvalue);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBOP::setSign()
 {
   int i,j,k;
   double y0,tmp,xBO,fth,cs,bigF;
   double epsilon,fsigma1,slope,sat;
 
   dBO=1.0/(nBOt-1.0);
   rdBO=1.0/dBO;
   for(i=0;i<npairs;i++) {
     for(j=0;j<nBOt;j++) {
       xBO=(double)j*dBO;
       if(which==1.0) {
         fth=0.0;
         if(xBO>alpha)
           fth=4.0/3.0*(xBO-alpha);
         if(sigma_f[i]<=fth)
           FsigBO[i][j]=2.0*sigma_f[i];
         else if(sigma_f[i]>=1.0-fth)
           FsigBO[i][j]=2.0*(1.0-sigma_f[i]);
         else {
           cs=0.0;
           if(xBO<alpha)
             cs=32.0*(alpha-xBO);
           bigF=(sigma_f[i]*(1.0-sigma_f[i])-fth*(1.0-fth))/square(1.0-2.0*fth);
           FsigBO[i][j]=2.0*fth+2.0*bigF*(1.0-2.0*fth)*(1.0+bigF*(1.0-cs*bigF));
         }
       }
       else if(which==2.0) {
         epsilon=0.0000000001;
         fsigma1=sigma_f[i];
         if(fsigma1>0.5)
           fsigma1=1.0-fsigma1;
         y0=alpha1*pow(fsigma1,beta1)*pow(0.5-fsigma1,gamma1);
         slope=(1.0-exp(-alpha2*pow(fsigma1,beta2)))/(1.0-exp(-alpha2*pow(0.5,beta2)));
         sat=alpha3*fsigma1+beta3;
         tmp=y0+slope*xBO+sat;
         FsigBO[i][j]=(tmp-sqrt(tmp*tmp-4.0*(-epsilon*sqrt(1.0+slope*slope)
             +y0*sat+slope*sat*xBO)))/2.0;
       }
     }
     FsigBO1[i][0]=FsigBO[i][1]-FsigBO[i][0];
     FsigBO1[i][1]=0.5*(FsigBO[i][2]-FsigBO[i][0]);
     FsigBO1[i][nBOt-2]=0.5*(FsigBO[i][nBOt-1]-FsigBO[i][nBOt-3]);
     FsigBO1[i][nBOt-1]=FsigBO[i][nBOt-1]-FsigBO[i][nBOt-2];
     for(k=2;k<nBOt-2;k++)
       FsigBO1[i][k]=((FsigBO[i][k-2]-FsigBO[i][k+2])+8.0*(FsigBO[i][k+1]
           -FsigBO[i][k-1]))/12.0;
     for(k=0;k<nBOt-1;k++) {
       FsigBO2[i][k]=3.0*(FsigBO[i][k+1]-FsigBO[i][k])-2.0*FsigBO1[i][k]-FsigBO1[i][k+1];
       FsigBO3[i][k]=FsigBO1[i][k]+FsigBO1[i][k+1]-2.0*(FsigBO[i][k+1]-FsigBO[i][k]);
     }
     FsigBO2[i][nBOt-1]=0.0;
     FsigBO3[i][nBOt-1]=0.0;
     for(k=0;k<nBOt;k++) {
       FsigBO4[i][k]=FsigBO1[i][k]/dBO;
       FsigBO5[i][k]=2.0*FsigBO2[i][k]/dBO;
       FsigBO6[i][k]=3.0*FsigBO3[i][k]/dBO;
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairBOP::cutoff(double rp,double vrcut,int mode,double r)
 {
   double tmp,tmp_beta,tmp_alpha,cut_store;
 
   if(mode==1) {
     tmp=(rsmall-rbig)*(r-rp)/(vrcut-rp)+rbig;
     cut_store=(erfc(tmp)-erfc(rsmall))/(erfc(rbig)-erfc(rsmall));
   }
   else {
     tmp_beta=log(log(rbig)/log(rsmall))/log(rp/vrcut);
     tmp_alpha=-log(rbig)/pow(rp,tmp_beta);
     cut_store=(exp(-tmp_alpha*pow(r,tmp_beta))-exp(-tmp_alpha*pow(vrcut
         ,tmp_beta)))/(exp(-tmp_alpha*pow(rp,tmp_beta))-exp(-tmp_alpha
         *pow(vrcut,tmp_beta)));
   }
   return(cut_store);
 }
 
 /* ----------------------------------------------------------------------
    memory usage of local atom-based arrays
 ------------------------------------------------------------------------- */
 
 double PairBOP::memory_usage()
 {
   int nlocal,nghost,nall;
   int n = atom->ntypes;
   nlocal = atom->nlocal;
   nghost = atom->nghost;
   nall = nlocal + nghost;
   double bytes = 0.0;
 
 // rcut
   bytes += npairs * sizeof (double);
 // dr
   bytes += npairs * sizeof (double);
 // rdr
   bytes += npairs * sizeof (double);
 // setflag
   bytes += (n+1) * (n+1) * sizeof (int);
 // cutsq
   bytes += (n+1) * (n+1) * sizeof (double);
 // cutghost
   bytes += (n+1) * (n+1) * sizeof (double);
 // cutghost
   bytes += (n+1) * (n+1) * sizeof (double);
 // pBetaS
   bytes += npairs * nr * sizeof (double);
 // pBetaS1
   bytes += npairs * nr * sizeof (double);
 // pBetaS2
   bytes += npairs * nr * sizeof (double);
 // pBetaS3
   bytes += npairs * nr * sizeof (double);
 // pBetaS4
   bytes += npairs * nr * sizeof (double);
 // pBetaS5
   bytes += npairs * nr * sizeof (double);
 // pBetaS6
   bytes += npairs * nr * sizeof (double);
 // pBetaP
   bytes += npairs * nr * sizeof (double);
 // pBetaP1
   bytes += npairs * nr * sizeof (double);
 // pBetaP2
   bytes += npairs * nr * sizeof (double);
 // pBetaP3
   bytes += npairs * nr * sizeof (double);
 // pBetaP4
   bytes += npairs * nr * sizeof (double);
 // pBetaP5
   bytes += npairs * nr * sizeof (double);
 // pBetaP6
   bytes += npairs * nr * sizeof (double);
 // pRepul
   bytes += npairs * nr * sizeof (double);
 // pRepul1
   bytes += npairs * nr * sizeof (double);
 // pRepul2
   bytes += npairs * nr * sizeof (double);
 // pRepul3
   bytes += npairs * nr * sizeof (double);
 // pRepul4
   bytes += npairs * nr * sizeof (double);
 // pRepul5
   bytes += npairs * nr * sizeof (double);
 // pRepul6
   bytes += npairs * nr * sizeof (double);
 // FsigBO
   bytes += npairs * nr * sizeof (double);
 // FsigBO1
   bytes += npairs * nr * sizeof (double);
 // FsigBO2
   bytes += npairs * nr * sizeof (double);
 // FsigBO3
   bytes += npairs * nr * sizeof (double);
 // FsigBO4
   bytes += npairs * nr * sizeof (double);
 // FsigBO5
   bytes += npairs * nr * sizeof (double);
 // FsigBO6
   bytes += npairs * nr * sizeof (double);
 // itypeSigBk
   bytes += neigh_total *neigh_ct* sizeof(int);
 // nSigBk
   bytes += neigh_total * sizeof(int);
 // sigB
   bytes += neigh_total * sizeof(int);
 // sigB1
   bytes += neigh_total * sizeof(int);
 // nPiBk
   bytes += neigh_total * sizeof(int);
 // piB
   bytes += neigh_total * sizeof(int);
 // itypePiBk
   bytes += neigh_total *neigh_ct* sizeof(int);
 // BOP_index
     bytes += nall * sizeof(double);
   if(otfly==0) {
 // cosAng
     bytes += cos_total* sizeof(double);
 // dcAng
     bytes += cos_total * 3 * 2 * sizeof(double);
 // disij
     bytes += neigh_total * 3 * sizeof(double);
 // rij
     bytes += neigh_total * sizeof(double);
 // betaS
     bytes += neigh_total * sizeof(double);
 // dBetaS
     bytes += neigh_total * sizeof(double);
 // betaP
     bytes += neigh_total * sizeof(double);
 // dBetaP
     bytes += neigh_total * sizeof(double);
 // repul
     bytes += neigh_total * sizeof(double);
 // dRepul
     bytes += neigh_total * sizeof(double);
 // cos_index
     bytes += nall * sizeof(double);
   }
 // pi_a
   bytes += npairs * sizeof(double);
 // pro_delta
   bytes += npairs * sizeof(double);
 // pi_delta
   bytes += npairs * sizeof(double);
 // pi_p
   bytes += npairs * sizeof(double);
 // pi_c
   bytes += npairs * sizeof(double);
 // sigma_r0
   bytes += npairs * sizeof(double);
 // pi_r0
   bytes += npairs * sizeof(double);
 // phi_r0
   bytes += npairs * sizeof(double);
 // sigma_rc
   bytes += npairs * sizeof(double);
 // pi_rc
   bytes += npairs * sizeof(double);
 // pi_a
   bytes += npairs * sizeof(double);
 // pro_delta
   bytes += npairs * sizeof(double);
 // pi_delta
   bytes += npairs * sizeof(double);
 // pi_p
   bytes += npairs * sizeof(double);
 // pi_c
   bytes += npairs * sizeof(double);
 // sigma_r0
   bytes += npairs * sizeof(double);
 // pi_r0
   bytes += npairs * sizeof(double);
 // phi_r0
   bytes += npairs * sizeof(double);
 // sigma_rc
   bytes += npairs * sizeof(double);
 // pi_rc
   bytes += npairs * sizeof(double);
 // phi_rc
   bytes += npairs * sizeof(double);
 // r1
   bytes += npairs * sizeof(double);
 // sigma_beta0
   bytes += npairs * sizeof(double);
 // pi_beta0
   bytes += npairs * sizeof(double);
 // phi0
   bytes += npairs * sizeof(double);
 // sigma_n
   bytes += npairs * sizeof(double);
 // pi_n
   bytes += npairs * sizeof(double);
 // phi_m
   bytes += npairs * sizeof(double);
 // sigma_nc
   bytes += npairs * sizeof(double);
 // pi_nc
   bytes += npairs * sizeof(double);
 // phi_nc
   bytes += npairs * sizeof(double);
 // pro
   bytes += npairs * sizeof(double);
 // sigma_delta
   bytes += npairs * sizeof(double);
 // sigma_c
   bytes += npairs * sizeof(double);
 // sigma_a
   bytes += npairs * sizeof(double);
 // sigma_g0
   bytes += bop_types * bop_types *bop_types * sizeof(double);
 // sigma_g1
   bytes += bop_types * bop_types *bop_types * sizeof(double);
 // sigma_g2
   bytes += bop_types * bop_types *bop_types * sizeof(double);
 // sigma_g3
   bytes += bop_types * bop_types *bop_types * sizeof(double);
 // sigma_g4
   bytes += bop_types * bop_types *bop_types * sizeof(double);
 // sigma_f
   bytes += npairs * sizeof(double);
 // sigma_k
   bytes += npairs * sizeof(double);
 // small3
   bytes += npairs * sizeof(double);
 // bt_pi
   bytes += maxneigh*(maxneigh/2) *sizeof(B_PI);
 // bt_sigma
   bytes += maxneigh*(maxneigh/2) *sizeof(B_SG);
 
   return bytes;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBOP::memory_theta_create()
 {
   if(maxneigh<8)
     neigh_ct=(maxneigh-1)*(maxneigh-1)*(maxneigh-1);
   else
     neigh_ct=(maxneigh-1)*(maxneigh-1);
   memory->create(itypeSigBk,neigh_total
       ,neigh_ct,"itypeSigBk");
   memory->create(nSigBk,neigh_total,"nSigBk");
   memory->create(sigB,neigh_total,"sigB");
   memory->create(sigB1,neigh_total,"sigB1");
   memory->create(itypePiBk,neigh_total
       ,neigh_ct,"itypePiBk");
   memory->create(nPiBk,neigh_total,"nPiBk");
   memory->create(piB,neigh_total,"piB");
   memory->create(neigh_flag,neigh_total,"neigh_flag");
   if(otfly==0) {
     memory->create(cosAng,cos_total,"BOP:cosAng");
     memory->create(dcAng,cos_total*2,3,2,"BOP:dcAng");
     memory->create(disij,3,neigh_total,"disij");
     memory->create(rij,neigh_total,"rij");
     memory->create(betaS,neigh_total,"betaS");
     memory->create(dBetaS,neigh_total,"dBetaS");
     memory->create(betaP,neigh_total,"betaP");
     memory->create(dBetaP,neigh_total,"dBetaP");
     memory->create(repul,neigh_total,"repul");
     memory->create(dRepul,neigh_total,"dRepul");
   }
   update_list=1;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBOP::memory_theta_grow()
 {
   if(maxneigh<8)
     neigh_ct=(maxneigh-1)*(maxneigh-1)*(maxneigh-1);
   else
     neigh_ct=(maxneigh-1)*(maxneigh-1);
   memory->grow(itypeSigBk,neigh_total
       ,neigh_ct,"itypeSigBk");
   memory->grow(nSigBk,neigh_total,"nSigBk");
   memory->grow(sigB,neigh_total,"sigB");
   memory->grow(sigB1,neigh_total,"sigB1");
   memory->grow(itypePiBk,neigh_total
       ,neigh_ct,"itypePiBk");
   memory->grow(nPiBk,neigh_total,"nPiBk");
   memory->grow(piB,neigh_total,"piB");
   memory->grow(neigh_flag,neigh_total,"neigh_flag");
   if(otfly==0) {
     memory->grow(cosAng,cos_total,"BOP:cosAng");
     memory->grow(dcAng,cos_total*2,3,2,"BOP:dcAng");
     memory->grow(disij,3,neigh_total,"disij");
     memory->grow(rij,neigh_total,"rij");
     memory->grow(betaS,neigh_total,"betaS");
     memory->grow(dBetaS,neigh_total,"dBetaS");
     memory->grow(betaP,neigh_total,"betaP");
     memory->grow(dBetaP,neigh_total,"dBetaP");
     memory->grow(repul,neigh_total,"repul");
     memory->grow(dRepul,neigh_total,"dRepul");
   }
   update_list=1;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBOP::memory_theta_destroy()
 {
 
   memory->destroy(itypeSigBk);
   memory->destroy(nSigBk);
   memory->destroy(sigB);
   memory->destroy(sigB1);
   memory->destroy(itypePiBk);
   memory->destroy(nPiBk);
   memory->destroy(piB);
   memory->destroy(neigh_flag);
   if(otfly==0) {
     memory->destroy(cosAng);
     memory->destroy(dcAng);
     memory->destroy(disij);
     memory->destroy(rij);
     memory->destroy(betaS);
     memory->destroy(dBetaS);
     memory->destroy(betaP);
     memory->destroy(dBetaP);
     memory->destroy(repul);
     memory->destroy(dRepul);
   }
  update_list=0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBOP::create_pi(int n_tot)
 {
   bt_pi = (B_PI *) memory->smalloc(n_tot*sizeof(B_PI),"BOP:bt_pi");
   allocate_pi=1;
 }
 
 void PairBOP::create_sigma(int n_tot)
 {
   bt_sg = (B_SG *) memory->smalloc(n_tot*sizeof(B_SG),"BOP:bt_sg");
   allocate_sigma=1;
 }
 
 void PairBOP::destroy_pi()
 {
   memory->destroy(bt_pi);
   allocate_pi=0;
 }
 
 void PairBOP::destroy_sigma()
 {
   memory->destroy(bt_sg);
   allocate_sigma=0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBOP::grow_pi(int n1, int n2)
 {
   int i,j;
   B_PI *bt_temp;
   bt_temp = (B_PI *) memory->smalloc(n1*sizeof(B_PI),"BOP:b_temp");
   for(i=0;i<n1;i++) {
     bt_temp[i].temp = bt_pi[i].temp;
     bt_temp[i].i = bt_pi[i].i;
     bt_temp[i].j = bt_pi[i].j;
     for(j=0;j<3;j++) {
       bt_temp[i].dAA[j] = bt_pi[i].dAA[j];
       bt_temp[i].dBB[j] = bt_pi[i].dBB[j];
       bt_temp[i].dPiB[j] = bt_pi[i].dPiB[j];
     }
   }
   memory->destroy(bt_pi);
   bt_pi=NULL;
   bt_pi = (B_PI *) memory->smalloc(n2*sizeof(B_PI),"BOP:bt_pi");
   for(i=0;i<n1;i++) {
     bt_pi[i].temp = bt_temp[i].temp;
     bt_pi[i].i = bt_temp[i].i;
     bt_pi[i].j = bt_temp[i].j;
     for(j=0;j<3;j++) {
       bt_pi[i].dAA[j] = bt_temp[i].dAA[j];
       bt_pi[i].dBB[j] = bt_temp[i].dBB[j];
       bt_pi[i].dPiB[j] = bt_temp[i].dPiB[j];
     }
   }
   for(i=n1;i<n2;i++) {
     bt_pi[i].i = -1;
     bt_pi[i].j = -1;
     for(j=0;j<3;j++) {
       bt_pi[i].dAA[j] = 0.0;
       bt_pi[i].dBB[j] = 0.0;
       bt_pi[i].dPiB[j] = 0.0;
     }
   }
   memory->destroy(bt_temp);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBOP::grow_sigma(int n1,int n2)
 {
   int i,j;
   B_SG *bt_temp;
   bt_temp = (B_SG *) memory->smalloc(n1*sizeof(B_SG),"BOP:bt_temp");
   for(i=0;i<n1;i++) {
     bt_temp[i].temp = bt_sg[i].temp;
     bt_temp[i].i = bt_sg[i].i;
     bt_temp[i].j = bt_sg[i].j;
     for(j=0;j<3;j++) {
       bt_temp[i].dAA[j] = bt_sg[i].dAA[j];
       bt_temp[i].dBB[j] = bt_sg[i].dBB[j];
       bt_temp[i].dCC[j] = bt_sg[i].dCC[j];
       bt_temp[i].dDD[j] = bt_sg[i].dDD[j];
       bt_temp[i].dEE[j] = bt_sg[i].dEE[j];
       bt_temp[i].dEE1[j] = bt_sg[i].dEE1[j];
       bt_temp[i].dFF[j] = bt_sg[i].dFF[j];
       bt_temp[i].dAAC[j] = bt_sg[i].dAAC[j];
       bt_temp[i].dBBC[j] = bt_sg[i].dBBC[j];
       bt_temp[i].dCCC[j] = bt_sg[i].dCCC[j];
       bt_temp[i].dDDC[j] = bt_sg[i].dDDC[j];
       bt_temp[i].dEEC[j] = bt_sg[i].dEEC[j];
       bt_temp[i].dFFC[j] = bt_sg[i].dFFC[j];
       bt_temp[i].dGGC[j] = bt_sg[i].dGGC[j];
       bt_temp[i].dUT[j] = bt_sg[i].dUT[j];
       bt_temp[i].dSigB1[j] = bt_sg[i].dSigB1[j];
       bt_temp[i].dSigB[j] = bt_sg[i].dSigB[j];
     }
   }
   memory->destroy(bt_sg);
   bt_sg=NULL;
   bt_sg = (B_SG *) memory->smalloc(n2*sizeof(B_SG),"BOP:bt_sg");
   for(i=0;i<n1;i++) {
     bt_sg[i].temp = bt_temp[i].temp;
     bt_sg[i].i = bt_temp[i].i;
     bt_sg[i].j = bt_temp[i].j;
     for(j=0;j<3;j++) {
       bt_sg[i].dAA[j] = bt_temp[i].dAA[j];
       bt_sg[i].dBB[j] = bt_temp[i].dBB[j];
       bt_sg[i].dCC[j] = bt_temp[i].dCC[j];
       bt_sg[i].dDD[j] = bt_temp[i].dDD[j];
       bt_sg[i].dEE[j] = bt_temp[i].dEE[j];
       bt_sg[i].dEE1[j] = bt_temp[i].dEE1[j];
       bt_sg[i].dFF[j] = bt_temp[i].dFF[j];
       bt_sg[i].dAAC[j] = bt_temp[i].dAAC[j];
       bt_sg[i].dBBC[j] = bt_temp[i].dBBC[j];
       bt_sg[i].dCCC[j] = bt_temp[i].dCCC[j];
       bt_sg[i].dDDC[j] = bt_temp[i].dDDC[j];
       bt_sg[i].dEEC[j] = bt_temp[i].dEEC[j];
       bt_sg[i].dFFC[j] = bt_temp[i].dFFC[j];
       bt_sg[i].dGGC[j] = bt_temp[i].dGGC[j];
       bt_sg[i].dUT[j] = bt_temp[i].dUT[j];
       bt_sg[i].dSigB1[j] = bt_temp[i].dSigB1[j];
       bt_sg[i].dSigB[j] = bt_temp[i].dSigB[j];
     }
   }
   for(i=n1;i<n2;i++) {
     bt_sg[i].i = -1;
     bt_sg[i].j = -1;
     for(j=0;j<3;j++) {
       bt_sg[i].dAA[j] = 0.0;
       bt_sg[i].dBB[j] = 0.0;
       bt_sg[i].dCC[j] = 0.0;
       bt_sg[i].dDD[j] = 0.0;
       bt_sg[i].dEE[j] = 0.0;
       bt_sg[i].dEE1[j] = 0.0;
       bt_sg[i].dFF[j] = 0.0;
       bt_sg[i].dAAC[j] = 0.0;
       bt_sg[i].dBBC[j] = 0.0;
       bt_sg[i].dCCC[j] = 0.0;
       bt_sg[i].dDDC[j] = 0.0;
       bt_sg[i].dEEC[j] = 0.0;
       bt_sg[i].dFFC[j] = 0.0;
       bt_sg[i].dGGC[j] = 0.0;
       bt_sg[i].dUT[j] = 0.0;
       bt_sg[i].dSigB1[j] = 0.0;
       bt_sg[i].dSigB[j] = 0.0;
     }
   }
   memory->destroy(bt_temp);
 }
diff --git a/src/MANYBODY/pair_comb.cpp b/src/MANYBODY/pair_comb.cpp
index 0d32eef0f..be536e979 100644
--- a/src/MANYBODY/pair_comb.cpp
+++ b/src/MANYBODY/pair_comb.cpp
@@ -1,2125 +1,2125 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Tzu-Ray Shan (U Florida, present: tnshan@sandia.gov)
    LAMMPS implementation of the Charge-optimized many-body (COMB) potential
    based on the HELL MD program (Prof Simon Phillpot, UF, sphil@mse.ufl.edu)
    and Aidan Thompson's Tersoff code in LAMMPS
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_comb.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "group.h"
 #include "update.h"
 #include "my_page.h"
 #include "math_const.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
 #define MAXLINE 1024
 #define DELTA 4
 #define PGDELTA 1
 #define MAXNEIGH 24
 
 /* ---------------------------------------------------------------------- */
 
 PairComb::PairComb(LAMMPS *lmp) : Pair(lmp)
 {
   single_enable = 0;
   restartinfo = 0;
   one_coeff = 1;
   manybody_flag = 1;
 
   nmax = 0;
   NCo = NULL;
   bbij = NULL;
 
   nelements = 0;
   elements = NULL;
   nparams = 0;
   maxparam = 0;
   params = NULL;
   elem2param = NULL;
 
   intype = NULL;
   fafb = NULL;
   dfafb = NULL;
   ddfafb = NULL;
   phin = NULL;
   dphin = NULL;
   erpaw = NULL;
   
   sht_num = NULL;
   sht_first = NULL;
 
   ipage = NULL;
   pgsize = oneatom = 0;
 
   // set comm size needed by this Pair
 
   comm_forward = 1;
   comm_reverse = 1;
 }
 
 /* ----------------------------------------------------------------------
    check if allocated, since class can be destructed when incomplete
 ------------------------------------------------------------------------- */
 
 PairComb::~PairComb()
 {
   memory->destroy(NCo);
 
   if (elements)
     for (int i = 0; i < nelements; i++) delete [] elements[i];
 
   delete [] elements;
   memory->sfree(params);
   memory->destroy(elem2param);
 
   memory->destroy(intype);
   memory->destroy(fafb);
   memory->destroy(dfafb);
   memory->destroy(ddfafb);
   memory->destroy(phin);
   memory->destroy(dphin);
   memory->destroy(erpaw);
   memory->destroy(bbij);
   memory->destroy(sht_num);
   memory->sfree(sht_first);
 
   delete [] ipage;
 
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
     delete [] map;
     delete [] esm;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairComb::compute(int eflag, int vflag)
 {
   int i,j,k,ii,jj,kk,inum,jnum,iparam_i;
   int itype,jtype,ktype,iparam_ij,iparam_ijk;
   tagint itag,jtag;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
   double rsq,rsq1,rsq2;
   double delr1[3],delr2[3],fi[3],fj[3],fk[3];
   double zeta_ij,prefactor;
   int *ilist,*jlist,*numneigh,**firstneigh;
   int mr1,mr2,mr3;
   int rsc,inty;
   double elp_ij,filp[3],fjlp[3],fklp[3];
   double iq,jq;
   double yaself;
   double potal,fac11,fac11e;
   double vionij,fvionij,sr1,sr2,sr3,Eov,Fov;
   int sht_jnum, *sht_jlist, nj;
 
   evdwl = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = vflag_atom = 0;
 
   // Build short range neighbor list
 
   Short_neigh();
 
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   tagint *tag = atom->tag;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   int newton_pair = force->newton_pair;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   yaself = vionij = fvionij = Eov = Fov = 0.0;
 
   // self energy correction term: potal
 
   potal_calc(potal,fac11,fac11e);
 
   // loop over full neighbor list of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     itag = tag[i];
     itype = map[type[i]];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     iq = q[i];
     NCo[i] = 0;
     nj = 0;
     iparam_i = elem2param[itype][itype][itype];
 
     // self energy, only on i atom
 
     yaself = self(&params[iparam_i],iq,potal);
 
     if (evflag) ev_tally(i,i,nlocal,0,yaself,0.0,0.0,0.0,0.0,0.0);
 
     // two-body interactions (long and short repulsive)
 
     jlist = firstneigh[i];
     jnum = numneigh[i];
     sht_jlist = sht_first[i];
     sht_jnum = sht_num[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       j &= NEIGHMASK;
       jtag = tag[j];
 
       if (itag > jtag) {
         if ((itag+jtag) % 2 == 0) continue;
       } else if (itag < jtag) {
         if ((itag+jtag) % 2 == 1) continue;
       } else {
         if (x[j][2] < x[i][2]) continue;
         if (x[j][2] == ztmp && x[j][1] < ytmp) continue;
         if (x[j][2] == ztmp && x[j][1] == ytmp && x[j][0] < xtmp) continue;
       }
 
       // Qj calculates 2-body Coulombic
 
       jtype = map[type[j]];
       jq = q[j];
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       iparam_ij = elem2param[itype][jtype][jtype];
 
       // long range q-dependent
 
       if (rsq > params[iparam_ij].lcutsq) continue;
 
       inty = intype[itype][jtype];
 
       // polynomial three-point interpolation
 
       tri_point(rsq, mr1, mr2, mr3, sr1, sr2, sr3, itype);
 
       // 1/r energy and forces
 
       direct(inty,mr1,mr2,mr3,rsq,sr1,sr2,sr3,iq,jq,
              potal,fac11,fac11e,vionij,fvionij);
 
       // field correction to self energy
 
       field(&params[iparam_ij],rsq,iq,jq,vionij,fvionij);
 
       // polarization field
       // sums up long range forces
 
       f[i][0] += delx*fvionij;
       f[i][1] += dely*fvionij;
       f[i][2] += delz*fvionij;
       f[j][0] -= delx*fvionij;
       f[j][1] -= dely*fvionij;
       f[j][2] -= delz*fvionij;
 
       if (evflag)
         ev_tally(i,j,nlocal,newton_pair,0.0,vionij,fvionij,delx,dely,delz);
 
       // short range q-independent
 
       if (rsq > params[iparam_ij].cutsq) continue;
 
       repulsive(&params[iparam_ij],rsq,fpair,eflag,evdwl,iq,jq);
 
       // repulsion is pure two-body, sums up pair repulsive forces
 
       f[i][0] += delx*fpair;
       f[i][1] += dely*fpair;
       f[i][2] += delz*fpair;
       f[j][0] -= delx*fpair;
       f[j][1] -= dely*fpair;
       f[j][2] -= delz*fpair;
 
       if (evflag)
         ev_tally(i,j,nlocal,newton_pair,evdwl,0.0,fpair,delx,dely,delz);
     }
 
     // accumulate coordination number information
 
     if (cor_flag) {
       for (jj = 0; jj < sht_jnum; jj++) {
         j = sht_jlist[jj];
         jtype = map[type[j]];
         iparam_ij = elem2param[itype][jtype][jtype];
 
         if(params[iparam_ij].hfocor > 0.0 ) {
           delr1[0] = x[j][0] - xtmp;
           delr1[1] = x[j][1] - ytmp;
           delr1[2] = x[j][2] - ztmp;
           rsq1 = vec3_dot(delr1,delr1);
 
           if (rsq1 > params[iparam_ij].cutsq) continue;
           NCo[i] += 1;
         }
       }
     }
 
     // three-body interactions
     // half i-j loop
 
     for (jj = 0; jj < sht_jnum; jj++) {
       j = sht_jlist[jj];
 
       jtype = map[type[j]];
       iparam_ij = elem2param[itype][jtype][jtype];
 
       // this Qj for q-dependent BSi
 
       jq = q[j];
 
       delr1[0] = x[j][0] - xtmp;
       delr1[1] = x[j][1] - ytmp;
       delr1[2] = x[j][2] - ztmp;
       rsq1 = vec3_dot(delr1,delr1);
 
       if (rsq1 > params[iparam_ij].cutsq) continue;
       nj ++;
 
       // accumulate bondorder zeta for each i-j interaction via loop over k
 
       zeta_ij = 0.0;
       cuo_flag1 = 0; cuo_flag2 = 0;
 
       for (kk = 0; kk < sht_jnum; kk++) {
         k = sht_jlist[kk];
         if (j == k) continue;
         ktype = map[type[k]];
         iparam_ijk = elem2param[itype][jtype][ktype];
 
         delr2[0] = x[k][0] - xtmp;
         delr2[1] = x[k][1] - ytmp;
         delr2[2] = x[k][2] - ztmp;
         rsq2 = vec3_dot(delr2,delr2);
 
         if (rsq2 > params[iparam_ijk].cutsq) continue;
 
         zeta_ij += zeta(&params[iparam_ijk],rsq1,rsq2,delr1,delr2);
 
         if (params[iparam_ijk].hfocor == -2.0) cuo_flag1 = 1;
         if (params[iparam_ijk].hfocor == -1.0) cuo_flag2 = 1;
       }
 
       if (cuo_flag1 && cuo_flag2) cuo_flag = 1;
       else cuo_flag = 0;
 
       force_zeta(&params[iparam_ij],eflag,i,nj,rsq1,zeta_ij,
                  iq,jq,fpair,prefactor,evdwl);
 
       // over-coordination correction for HfO2
 
       if (cor_flag && NCo[i] != 0)
         Over_cor(&params[iparam_ij],rsq1,NCo[i],Eov, Fov);
       evdwl +=  Eov;
       fpair +=  Fov;
 
       f[i][0] += delr1[0]*fpair;
       f[i][1] += delr1[1]*fpair;
       f[i][2] += delr1[2]*fpair;
       f[j][0] -= delr1[0]*fpair;
       f[j][1] -= delr1[1]*fpair;
       f[j][2] -= delr1[2]*fpair;
 
       if (evflag) ev_tally(i,j,nlocal,newton_pair,
                            evdwl,0.0,-fpair,-delr1[0],-delr1[1],-delr1[2]);
 
       // attractive term via loop over k (3-body forces)
 
       for (kk = 0; kk < sht_jnum; kk++) {
         k = sht_jlist[kk];
         if (j == k) continue;
         ktype = map[type[k]];
         iparam_ijk = elem2param[itype][jtype][ktype];
 
         delr2[0] = x[k][0] - xtmp;
         delr2[1] = x[k][1] - ytmp;
         delr2[2] = x[k][2] - ztmp;
         rsq2 = vec3_dot(delr2,delr2);
         if (rsq2 > params[iparam_ijk].cutsq) continue;
 
         for (rsc = 0; rsc < 3; rsc++)
           fi[rsc] = fj[rsc] = fk[rsc] = 0.0;
 
         attractive(&params[iparam_ijk],prefactor,
                    rsq1,rsq2,delr1,delr2,fi,fj,fk);
 
         // 3-body LP and BB correction and forces
 
         elp_ij = elp(&params[iparam_ijk],rsq1,rsq2,delr1,delr2);
         flp(&params[iparam_ijk],rsq1,rsq2,delr1,delr2,filp,fjlp,fklp);
 
         for (rsc = 0; rsc < 3; rsc++) {
           fi[rsc] += filp[rsc];
           fj[rsc] += fjlp[rsc];
           fk[rsc] += fklp[rsc];
         }
 
         for (rsc = 0; rsc < 3; rsc++) {
           f[i][rsc] += fi[rsc];
           f[j][rsc] += fj[rsc];
           f[k][rsc] += fk[rsc];
         }
 
         if (evflag)
           ev_tally(i,j,nlocal,newton_pair,elp_ij,0.0,0.0,0.0,0.0,0.0);
         if (vflag_atom) v_tally3(i,j,k,fj,fk,delr1,delr2);
 
       }
     }
 
     if (cuo_flag) params[iparam_i].cutsq *= 0.65;
   }
 
   cuo_flag = 0;
 
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairComb::allocate()
 {
  allocated = 1;
  int n = atom->ntypes;
 
  memory->create(setflag,n+1,n+1,"pair:setflag");
  memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
  map = new int[n+1];
  esm = new double[n];
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairComb::settings(int narg, char **arg)
 {
   if (narg > 0) error->all(FLERR,"Illegal pair_style command");
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairComb::coeff(int narg, char **arg)
 {
   int i,j,n;
 
   if (!allocated) allocate();
 
   if (narg != 3 + atom->ntypes)
     error->all(FLERR,"Incorrect args for pair coefficients");
 
   // insure I,J args are * *
 
   if (strcmp(arg[0],"*") != 0 || strcmp(arg[1],"*") != 0)
     error->all(FLERR,"Incorrect args for pair coefficients");
 
   // read args that map atom types to elements in potential file
   // map[i] = which element the Ith atom type is, -1 if NULL
   // nelements = # of unique elements
   // elements = list of element names
 
   if (elements) {
     for (i = 0; i < nelements; i++) delete [] elements[i];
     delete [] elements;
   }
   elements = new char*[atom->ntypes];
   for (i = 0; i < atom->ntypes; i++) elements[i] = NULL;
 
   nelements = 0;
   for (i = 3; i < narg; i++) {
     if (strcmp(arg[i],"NULL") == 0) {
       map[i-2] = -1;
       continue;
     }
     for (j = 0; j < nelements; j++)
       if (strcmp(arg[i],elements[j]) == 0) break;
     map[i-2] = j;
     if (j == nelements) {
       n = strlen(arg[i]) + 1;
       elements[j] = new char[n];
       strcpy(elements[j],arg[i]);
       nelements++;
     }
   }
 
   // read potential file and initialize potential parameters
 
   read_file(arg[2]);
   setup();
 
   n = atom->ntypes;
 
   // generate streitz-mintmire direct 1/r energy look-up table
 
   if (comm->me == 0 && screen) fprintf(screen,"Pair COMB:\n");
   if (comm->me == 0 && screen)
     fprintf(screen,"  generating Coulomb integral lookup table ...\n");
   sm_table();
 
   if (cor_flag && comm->me == 0 && screen)
     fprintf(screen,"  will apply over-coordination correction ...\n");
   if (!cor_flag&& comm->me == 0 && screen)
     fprintf(screen,"  will not apply over-coordination correction ...\n");
 
   // clear setflag since coeff() called once with I,J = * *
 
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   // set setflag i,j for type pairs where both are mapped to elements
 
   int count = 0;
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       if (map[i] >= 0 && map[j] >= 0) {
         setflag[i][j] = 1;
         count++;
       }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairComb::init_style()
 {
   if (atom->tag_enable == 0)
     error->all(FLERR,"Pair style COMB requires atom IDs");
   if (force->newton_pair == 0)
     error->all(FLERR,"Pair style COMB requires newton pair on");
   if (!atom->q_flag)
     error->all(FLERR,"Pair style COMB requires atom attribute q");
 
   // ptr to QEQ fix
 
   //for (i = 0; i < modify->nfix; i++)
   //  if (strcmp(modify->fix[i]->style,"qeq") == 0) break;
   //if (i < modify->nfix) fixqeq = (FixQEQ *) modify->fix[i];
   //else fixqeq = NULL;
 
   // need a full neighbor list
 
-  int irequest = neighbor->request(this);
+  int irequest = neighbor->request(this,instance_me);
   neighbor->requests[irequest]->half = 0;
   neighbor->requests[irequest]->full = 1;
 
   // local Comb neighbor list
   // create pages if first time or if neighbor pgsize/oneatom has changed
 
   int create = 0;
   if (ipage == NULL) create = 1;
   if (pgsize != neighbor->pgsize) create = 1;
   if (oneatom != neighbor->oneatom) create = 1;
 
   if (create) {
     delete [] ipage;
     pgsize = neighbor->pgsize;
     oneatom = neighbor->oneatom;
 
     int nmypage = comm->nthreads;
     ipage = new MyPage<int>[nmypage];
     for (int i = 0; i < nmypage; i++)
       ipage[i].init(oneatom,pgsize);
   }
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairComb::init_one(int i, int j)
 {
   if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set");
   return cutmax;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairComb::read_file(char *file)
 {
   int params_per_line = 49;
   char **words = new char*[params_per_line+1];
 
   memory->sfree(params);
   params = NULL;
   nparams = 0;
   maxparam = 0;
 
   // open file on proc 0
 
   FILE *fp;
   if (comm->me == 0) {
     fp = force->open_potential(file);
     if (fp == NULL) {
       char str[128];
       sprintf(str,"Cannot open COMB potential file %s",file);
       error->one(FLERR,str);
     }
   }
 
   // read each line out of file, skipping blank lines or leading '#'
   // store line of params if all 3 element tags are in element list
 
   int n,nwords,ielement,jelement,kelement;
   char line[MAXLINE],*ptr;
   int eof = 0;
 
   while (1) {
     if (comm->me == 0) {
       ptr = fgets(line,MAXLINE,fp);
       if (ptr == NULL) {
         eof = 1;
         fclose(fp);
       } else n = strlen(line) + 1;
     }
     MPI_Bcast(&eof,1,MPI_INT,0,world);
     if (eof) break;
     MPI_Bcast(&n,1,MPI_INT,0,world);
     MPI_Bcast(line,n,MPI_CHAR,0,world);
 
     // strip comment, skip line if blank
 
     if ((ptr = strchr(line,'#'))) *ptr = '\0';
     nwords = atom->count_words(line);
     if (nwords == 0) continue;
 
     // concatenate additional lines until have params_per_line words
 
     while (nwords < params_per_line) {
       n = strlen(line);
       if (comm->me == 0) {
         ptr = fgets(&line[n],MAXLINE-n,fp);
         if (ptr == NULL) {
           eof = 1;
           fclose(fp);
         } else n = strlen(line) + 1;
       }
       MPI_Bcast(&eof,1,MPI_INT,0,world);
       if (eof) break;
       MPI_Bcast(&n,1,MPI_INT,0,world);
       MPI_Bcast(line,n,MPI_CHAR,0,world);
       if ((ptr = strchr(line,'#'))) *ptr = '\0';
       nwords = atom->count_words(line);
     }
 
     if (nwords != params_per_line)
       error->all(FLERR,"Incorrect format in COMB potential file");
 
     // words = ptrs to all words in line
 
     nwords = 0;
     words[nwords++] = strtok(line," \t\n\r\f");
     while ((words[nwords++] = strtok(NULL," \t\n\r\f"))) continue;
 
     // ielement,jelement,kelement = 1st args
     // if all 3 args are in element list, then parse this line
     // else skip to next line
 
     for (ielement = 0; ielement < nelements; ielement++)
       if (strcmp(words[0],elements[ielement]) == 0) break;
     if (ielement == nelements) continue;
     for (jelement = 0; jelement < nelements; jelement++)
       if (strcmp(words[1],elements[jelement]) == 0) break;
     if (jelement == nelements) continue;
     for (kelement = 0; kelement < nelements; kelement++)
       if (strcmp(words[2],elements[kelement]) == 0) break;
     if (kelement == nelements) continue;
 
     // load up parameter settings and error check their values
 
     if (nparams == maxparam) {
       maxparam += DELTA;
       params = (Param *) memory->srealloc(params,maxparam*sizeof(Param),
                                           "pair:params");
     }
 
     params[nparams].ielement = ielement;
     params[nparams].jelement = jelement;
     params[nparams].kelement = kelement;
     params[nparams].powerm = atof(words[3]);
     params[nparams].c = atof(words[4]);
     params[nparams].d = atof(words[5]);
     params[nparams].h = atof(words[6]);
     params[nparams].powern = atof(words[7]);
     params[nparams].beta = atof(words[8]);
     params[nparams].lam21 = atof(words[9]);
     params[nparams].lam22 = atof(words[10]);
     params[nparams].bigb1 = atof(words[11]);
     params[nparams].bigb2 = atof(words[12]);
     params[nparams].bigr = atof(words[13]);
     params[nparams].bigd = atof(words[14]);
     params[nparams].lam11 = atof(words[15]);
     params[nparams].lam12 = atof(words[16]);
     params[nparams].biga1 = atof(words[17]);
     params[nparams].biga2 = atof(words[18]);
     params[nparams].plp1 = atof(words[19]);
     params[nparams].plp3 = atof(words[20]);
     params[nparams].plp6 = atof(words[21]);
     params[nparams].a123 = atof(words[22]);
     params[nparams].aconf= atof(words[23]);
     params[nparams].addrep = atof(words[24]);
     params[nparams].romigb = atof(words[25]);
     params[nparams].romigc = atof(words[26]);
     params[nparams].romigd = atof(words[27]);
     params[nparams].romiga = atof(words[28]);
     params[nparams].QL1 = atof(words[29]);
     params[nparams].QU1 = atof(words[30]);
     params[nparams].DL1 = atof(words[31]);
     params[nparams].DU1 = atof(words[32]);
     params[nparams].QL2 = atof(words[33]);
     params[nparams].QU2 = atof(words[34]);
     params[nparams].DL2 = atof(words[35]);
     params[nparams].DU2 = atof(words[36]);
     params[nparams].chi = atof(words[37]);
     params[nparams].dj  = atof(words[38]);
     params[nparams].dk  = atof(words[39]);
     params[nparams].dl  = atof(words[40]);
     params[nparams].dm  = atof(words[41]);
     params[nparams].esm1 = atof(words[42]);
     params[nparams].cmn1 = atof(words[43]);
     params[nparams].cml1 = atof(words[44]);
     params[nparams].cmn2 = atof(words[45]);
     params[nparams].cml2 = atof(words[46]);
     params[nparams].coulcut = atof(words[47]);
     params[nparams].hfocor = atof(words[48]);
 
     params[nparams].powermint = int(params[nparams].powerm);
 
     // parameter sanity checks
 
     if (params[nparams].lam11 < 0.0 || params[nparams].lam12 < 0.0 ||
         params[nparams].c < 0.0 || params[nparams].d < 0.0 ||
         params[nparams].powern < 0.0 || params[nparams].beta < 0.0 ||
         params[nparams].lam21 < 0.0 || params[nparams].lam22 < 0.0 ||
         params[nparams].bigb1< 0.0 || params[nparams].bigb2< 0.0 ||
         params[nparams].biga1< 0.0 || params[nparams].biga2< 0.0 ||
         params[nparams].bigr < 0.0 || params[nparams].bigd < 0.0 ||
         params[nparams].bigd > params[nparams].bigr ||
         params[nparams].powerm - params[nparams].powermint != 0.0 ||
         (params[nparams].powermint != 3 && params[nparams].powermint != 1) ||
         params[nparams].plp1 < 0.0 || params[nparams].plp3 < 0.0 ||
         params[nparams].plp6 < 0.0  ||
         params[nparams].a123 > 360.0 || params[nparams].aconf < 0.0 ||
         params[nparams].addrep < 0.0 || params[nparams].romigb < 0.0 ||
         params[nparams].romigc < 0.0 || params[nparams].romigd < 0.0 ||
         params[nparams].romiga < 0.0 ||
         params[nparams].QL1 > 0.0 || params[nparams].QU1 < 0.0 ||
         params[nparams].DL1 < 0.0 || params[nparams].DU1 > 0.0 ||
         params[nparams].QL2 > 0.0 || params[nparams].QU2 < 0.0 ||
         params[nparams].DL2 < 0.0 || params[nparams].DU2 > 0.0 ||
         params[nparams].chi < 0.0 ||
 //        params[nparams].dj < 0.0 || params[nparams].dk < 0.0 ||
 //        params[nparams].dl < 0.0 || params[nparams].dm < 0.0 ||
         params[nparams].esm1 < 0.0)
       error->all(FLERR,"Illegal COMB parameter");
 
     if (params[nparams].lam11 < params[nparams].lam21 ||
         params[nparams].lam12 < params[nparams].lam22 ||
         params[nparams].biga1< params[nparams].bigb1 ||
         params[nparams].biga2< params[nparams].bigb2)
       error->all(FLERR,"Illegal COMB parameter");
 
     nparams++;
   }
 
   delete [] words;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairComb::setup()
 {
   int i,j,k,m,n;
 
   // set elem2param for all element triplet combinations
   // must be a single exact match to lines read from file
   // do not allow for ACB in place of ABC
 
   memory->destroy(elem2param);
   memory->create(elem2param,nelements,nelements,nelements,"pair:elem2param");
 
   for (i = 0; i < nelements; i++)
     for (j = 0; j < nelements; j++)
       for (k = 0; k < nelements; k++) {
         n = -1;
         for (m = 0; m < nparams; m++) {
           if (i == params[m].ielement && j == params[m].jelement &&
               k == params[m].kelement) {
             if (n >= 0) error->all(FLERR,"Potential file has duplicate entry");
             n = m;
           }
         }
         if (n < 0) error->all(FLERR,"Potential file is missing an entry");
         elem2param[i][j][k] = n;
       }
 
   // compute parameter values derived from inputs
 
   for (m = 0; m < nparams; m++) {
     params[m].cut = params[m].bigr + params[m].bigd;
     params[m].cutsq = params[m].cut*params[m].cut;
     params[m].c1 = pow(2.0*params[m].powern*1.0e-16,-1.0/params[m].powern);
     params[m].c2 = pow(2.0*params[m].powern*1.0e-8,-1.0/params[m].powern);
     params[m].c3 = 1.0/params[m].c2;
     params[m].c4 = 1.0/params[m].c1;
     params[m].rlm1 = 0.5*(params[m].lam11+params[m].lam12)*params[m].romigc;
     params[m].rlm2 = 0.5*(params[m].lam21+params[m].lam22)*params[m].romigd;
 
     params[m].Qo1 = (params[m].QU1+params[m].QL1)/2.0; // (A22)
     params[m].dQ1 = (params[m].QU1-params[m].QL1)/2.0; // (A21)
     params[m].aB1 = 1.0 /
       (1.0-pow(fabs(params[m].Qo1/params[m].dQ1),10.0)); // (A20)
     params[m].bB1 = pow(fabs(params[m].aB1),0.1)/params[m].dQ1; // (A19)
     params[m].nD1 = log(params[m].DU1/(params[m].DU1-params[m].DL1))/
                     log(params[m].QU1/(params[m].QU1-params[m].QL1));
     params[m].bD1 = (pow((params[m].DL1-params[m].DU1),(1.0/params[m].nD1)))/
                     (params[m].QU1-params[m].QL1);
 
     params[m].Qo2 = (params[m].QU2+params[m].QL2)/2.0; // (A22)
     params[m].dQ2 = (params[m].QU2-params[m].QL2)/2.0; // (A21)
     params[m].aB2 = 1.0 /
       (1.0-pow(fabs(params[m].Qo2/params[m].dQ2),10.0)); // (A20)
     params[m].bB2 = pow(fabs(params[m].aB2),0.1)/params[m].dQ2; // (A19)
     params[m].nD2 = log(params[m].DU2/(params[m].DU2-params[m].DL2))/
                     log(params[m].QU2/(params[m].QU2-params[m].QL2));
     params[m].bD2 = (pow((params[m].DL2-params[m].DU2),(1.0/params[m].nD2)))/
                     (params[m].QU2-params[m].QL2);
 
     params[m].lcut = params[m].coulcut;
     params[m].lcutsq = params[m].lcut*params[m].lcut;
 
     params[m].gamma = 1.0;        // for the change in pair_comb.h
   }
 
   // set cutmax to max of all params
 
   cutmax = cutmin = 0.0;
   cor_flag = 0;
   for (m = 0; m < nparams; m++) {
     if (params[m].cut > cutmax) cutmax = params[m].cut;
     if (params[m].lcut > cutmax) cutmax = params[m].lcut;
     if (params[m].cutsq > cutmin) cutmin = params[m].cutsq+0.2;
     if (params[m].hfocor > 0.0001) cor_flag = 1;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairComb::repulsive(Param *param, double rsq, double &fforce,
                     int eflag, double &eng, double iq, double jq)
 {
   double r,tmp_fc,tmp_fc_d,tmp_exp,Di,Dj;
   double bigA,Asi,Asj,vrcs,fvrcs,fforce_tmp;
   double rslp,rslp2,rslp4,arr1,arr2,fc2j,fc3j,fcp2j,fcp3j;
 
   double romi = param->addrep;
   double rrcs = param->bigr + param->bigd;
 
   r = sqrt(rsq);
   if (r > rrcs) return ;
 
   tmp_fc = comb_fc(r,param);
   tmp_fc_d = comb_fc_d(r,param);
   tmp_exp = exp(-param->rlm1 * r);
 
   arr1 = 2.22850; arr2 = 1.89350;
   fc2j = comb_fc2(r);
   fc3j = comb_fc3(r);
   fcp2j = comb_fc2_d(r);
   fcp3j = comb_fc3_d(r);
 
   Di = param->DU1 + pow(fabs(param->bD1*(param->QU1-iq)),param->nD1);
   Dj = param->DU2 + pow(fabs(param->bD2*(param->QU2-jq)),param->nD2);
   Asi = param->biga1 * exp(param->lam11*Di);
   Asj = param->biga2 * exp(param->lam12*Dj);
 
   if ( Asi > 0.0 && Asj > 0.0 )
     bigA = sqrt(Asi*Asj)*param->romiga;
   else
     bigA = 0.0;
 
   fforce = -bigA * tmp_exp * (tmp_fc_d - tmp_fc*param->rlm1) / r;
 
   // additional repulsion for TiO2 and HfO2 (switch by cor_flag)
 
   vrcs = 0.0; fvrcs = 0.0;
   if (romi > 0.0) {
     if (!cor_flag) {
       vrcs = romi * pow((1.0-r/rrcs),2.0);
       fvrcs= romi * 2.0 * (r/rrcs-1.0)/rrcs; }
     else if (cor_flag) {
       rslp = ((arr1-r)/(arr1-arr2));
       rslp2 = rslp * rslp; rslp4 = rslp2 * rslp2;
       vrcs = fc2j * fc3j * romi * ((50.0*rslp4-30.0*rslp2+4.50))/8.0;
       fvrcs = fcp2j*fcp3j*romi*rslp*(-25.0*rslp2+7.50)/(arr1-arr2);
     }
     fforce_tmp = fforce*vrcs - (tmp_fc * bigA * tmp_exp * fvrcs);
     fforce += fforce_tmp;
   }
 
   // eng = repulsive energy
 
   if (eflag) eng = (tmp_fc * bigA * tmp_exp)*(1.0+vrcs);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairComb::zeta(Param *param, double rsqij, double rsqik,
                          double *delrij, double *delrik)
 {
   double rij,rik,costheta,arg,ex_delr;
 
   rij = sqrt(rsqij);
   if (rij > param->bigr+param->bigd) return 0.0;
   rik = sqrt(rsqik);
   costheta = vec3_dot(delrij,delrik) / (rij*rik);
 
   if (param->powermint == 3) arg = pow(param->rlm2 * (rij-rik),3.0);
   else arg = param->rlm2 * (rij-rik);
 
   if (arg > 69.0776) ex_delr = 1.e30;
   else if (arg < -69.0776) ex_delr = 0.0;
   else ex_delr = exp(arg);
 
   return comb_fc(rik,param) * comb_gijk(costheta,param) * ex_delr;
 }
 
 /* ----------------------------------------------------------------------
    Legendre polynomial bond angle correction to energy
 ------------------------------------------------------------------------- */
 
 double PairComb::elp(Param *param, double rsqij, double rsqik,
                      double *delrij, double *delrik)
 {
   if (param->aconf > 1.0e-6 || param->plp1 > 1.0e-6 ||
       param->plp3 > 1.0e-6 || param->plp6 > 1.0e-6) {
     double rij,rik,costheta,lp1,lp3,lp6;
     double rmu,rmu2,comtt,fcj,fck;
     double pplp1 = param->plp1, pplp3 = param->plp3, pplp6 = param->plp6;
     double c123 = cos(param->a123*MY_PI/180.0);
 
     // cos(theta) of the i-j-k
     // cutoff function of rik
 
     rij = sqrt(rsqij);
     rik = sqrt(rsqik);
     costheta = vec3_dot(delrij,delrik) / (rij*rik);
     fcj = comb_fc(rij,param);
     fck = comb_fc(rik,param);
     rmu = costheta;
 
     // Legendre Polynomial functions
 
     if (param->plp1 > 1.0e-6 || param->plp3 > 1.0e-6 || param->plp6 > 1.0e-6) {
       rmu2 = rmu*rmu;
       lp1 = rmu; lp3 = 0.5*(5.0*rmu2*rmu-3.0*rmu);
       lp6 = (231.0*rmu2*rmu2*rmu2-315.0*rmu2*rmu2+105.0*rmu2-5.0)/16.0;
       comtt = pplp1*lp1 + pplp3*lp3 + pplp6*lp6;
     } else comtt = 0.0;
 
     // bond-bending terms
 
     if (param->aconf>1e-4) {
       if (param->hfocor >= 0.0)
         comtt += param->aconf *(rmu-c123)*(rmu-c123);
       else if (param->hfocor < 0.0)
         comtt += param->aconf *(4.0-(rmu-c123)*(rmu-c123));
     }
 
     return 0.5 * fcj * fck * comtt;
   }
 
   return 0.0;
 }
 
 /* ----------------------------------------------------------------------
    Legendre polynomial bond angle correction to forces
 ------------------------------------------------------------------------- */
 
 void PairComb::flp(Param *param, double rsqij, double rsqik,
                    double *delrij, double *delrik, double *drilp,
                    double *drjlp, double *drklp)
 {
   double ffj1,ffj2,ffk1,ffk2;
   ffj1 = 0.0; ffj2 = 0.0; ffk1 = 0.0; ffk2 = 0.0;
 
   if (param->aconf > 1.0e-4 || param->plp1 > 1.0e-6 ||
       param->plp3 > 1.0e-6 || param->plp6 > 1.0e-6) {
     double rij,rik,costheta,lp1,lp1_d,lp3,lp3_d,lp6,lp6_d;
     double rmu,rmu2,comtt,comtt_d,com4k,com5,fcj,fck,fck_d;
 
     double pplp1 = param->plp1;
     double pplp3 = param->plp3;
     double pplp6 = param->plp6;
     double c123 = cos(param->a123*MY_PI/180.0);
 
     // fck_d = derivative of cutoff function
 
     rij = sqrt(rsqij); rik = sqrt(rsqik);
     costheta = vec3_dot(delrij,delrik) / (rij*rik);
     fcj = comb_fc(rij,param);
     fck = comb_fc(rik,param);
     fck_d = comb_fc_d(rik,param);
     rmu = costheta;
 
     // Legendre Polynomial functions and derivatives
 
     if (param->plp1 > 1.0e-6 || param->plp3 > 1.0e-6 || param->plp6 > 1.0e-6) {
       rmu2 = rmu*rmu;
       lp1 = rmu; lp3 = (2.5*rmu2*rmu-1.5*rmu);
       lp6 = (231.0*rmu2*rmu2*rmu2-315.0*rmu2*rmu2+105.0*rmu2-5.0)/16.0;
       lp1_d = 1.0;lp3_d = (7.5*rmu2-1.5);
       lp6_d = (1386.0*rmu2*rmu2*rmu-1260.0*rmu2*rmu+210.0)/16.0;
       comtt   = pplp1*lp1   + pplp3*lp3   + pplp6*lp6;
       comtt_d = pplp1*lp1_d + pplp3*lp3_d + pplp6*lp6_d;
     } else {
       comtt = 0.0;
       comtt_d = 0.0;
     }
 
     // bond-bending terms derivatives
 
     if (param->aconf > 1.0e-4) {
       if (param->hfocor >= 0.0) {
         comtt += param->aconf *(rmu-c123)*(rmu-c123);
         comtt_d += 2.0*param->aconf*(rmu-c123);
       } else if (param->hfocor < 0.0) {
         comtt += param->aconf *(4.0-(rmu-c123)*(rmu-c123));
         comtt_d += -2.0*param->aconf*(rmu-c123);
       }
     }
 
     com4k = 2.0 * fcj * fck_d * comtt;
     com5 = fcj * fck * comtt_d;
 
     ffj1 =-0.5*(com5/(rij*rik));
     ffj2 = 0.5*(com5*rmu/rsqij);
     ffk1 = ffj1;
     ffk2 = 0.5*(-com4k/rik+com5*rmu/rsqik);
 
   } else {
     ffj1 = 0.0; ffj2 = 0.0;
     ffk1 = 0.0; ffk2 = 0.0;
   }
 
   // j-atom
 
   vec3_scale(ffj1,delrik,drjlp);             // (k,x[],y[]), y[]=k*x[]
   vec3_scaleadd(ffj2,delrij,drjlp,drjlp);   // (k,x[],y[],z[]), z[]=k*x[]+y[]
 
   // k-atom
 
   vec3_scale(ffk1,delrij,drklp);
   vec3_scaleadd(ffk2,delrik,drklp,drklp);
 
   // i-atom
 
   vec3_add(drjlp,drklp,drilp);                    // (x[],y[],z[]), z[]=x[]+y[]
   vec3_scale(-1.0,drilp,drilp);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairComb::force_zeta(Param *param, int eflag, int i, int j, double rsq,
                 double zeta_ij, double iq, double jq, double &fforce,
                 double &prefactor, double &eng)
 {
   double r,fa,fa_d,bij;
 
   r = sqrt(rsq);
   if (r > param->bigr+param->bigd) return;
   fa = comb_fa(r,param,iq,jq);
   fa_d = comb_fa_d(r,param,iq,jq);
   bij = comb_bij(zeta_ij,param);
   bbij[i][j] = bij;
 
   // force
   fforce = 0.5*bij*fa_d / r;
   prefactor = -0.5*fa * comb_bij_d(zeta_ij,param);
 
   // eng = attractive energy
   if (eflag) eng = 0.5*bij*fa;
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairComb::comb_fc(double r, Param *param)
 {
   double comb_R = param->bigr;
   double comb_D = param->bigd;
 
   if (r < comb_R-comb_D) return 1.0;
   if (r > comb_R+comb_D) return 0.0;
   return 0.5*(1.0 - sin(MY_PI2*(r - comb_R)/comb_D));
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairComb::comb_fc_d(double r, Param *param)
 {
   double comb_R = param->bigr;
   double comb_D = param->bigd;
 
   if (r < comb_R-comb_D) return 0.0;
   if (r > comb_R+comb_D) return 0.0;
   return -(MY_PI4/comb_D) * cos(MY_PI2*(r - comb_R)/comb_D);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairComb::comb_fc2(double r)
 {
   double comb_R = 1.89350;
   double comb_D = comb_R + 0.050;
 
   if (r < comb_R) return 0.0;
   if (r > comb_D) return 1.0;
   return 0.5*(1.0 + cos(MY_PI*(r - comb_R)/(comb_D-comb_R)));
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairComb::comb_fc2_d(double r)
 {
   double comb_R = 1.89350;
   double comb_D = comb_R + 0.050;
 
   if (r < comb_R) return 0.0;
   if (r > comb_D) return 0.0;
   return -(MY_PI2/(comb_D-comb_R)) * sin(MY_PI*(r - comb_R)/(comb_D-comb_R));
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairComb::comb_fc3(double r)
 {
   double comb_R = 2.51350;
   double comb_D = comb_R + 0.050;
 
   if (r < comb_R) return 1.0;
   if (r > comb_D) return 0.0;
   return 0.5*(1.0 + cos(MY_PI*(r - comb_R)/(comb_D-comb_R)));
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairComb::comb_fc3_d(double r)
 {
   double comb_R = 2.51350;
   double comb_D = comb_R + 0.050;
 
   if (r < comb_R) return 0.0;
   if (r > comb_D) return 0.0;
   return -(MY_PI2/(comb_D-comb_R)) * sin(MY_PI*(r - comb_R)/(comb_D-comb_R));
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairComb::self(Param *param, double qi, double selfpot)
 {
  double self_tmp, cmin, cmax, qmin, qmax;
  double s1=param->chi, s2=param->dj, s3=param->dk, s4=param->dl, s5=param->dm;
 
  self_tmp = 0.0;
  qmin = param->QL1*0.90;
  qmax = param->QU1*0.90;
  cmin = cmax = 1000.0;
 
  self_tmp = qi*(s1+qi*(s2+selfpot+qi*(s3+qi*(s4+qi*qi*s5))));
 
  if (qi < qmin) self_tmp += cmin * pow((qi-qmin),4.0);
  if (qi > qmax) self_tmp += cmax * pow((qi-qmax),4.0);
 
  return self_tmp;
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairComb::comb_fa(double r, Param *param, double iq, double jq)
 {
   double bigB,Bsi,Bsj;
   double qi,qj,Di,Dj;
 
   if (r > param->bigr + param->bigd) return 0.0;
   qi = iq; qj = jq;
   Di = Dj = Bsi = Bsj = bigB = 0.0;
   Di = param->DU1 + pow(fabs(param->bD1*(param->QU1-qi)),param->nD1);
   Dj = param->DU2 + pow(fabs(param->bD2*(param->QU2-qj)),param->nD2);
   Bsi = param->bigb1 * exp(param->lam21*Di)*
        (param->aB1-fabs(pow(param->bB1*(qi-param->Qo1),10.0)));
   Bsj = param->bigb2 * exp(param->lam22*Dj)*
        (param->aB2-fabs(pow(param->bB2*(qj-param->Qo2),10.0)));
   if (Bsi > 0.0 && Bsj > 0.0) bigB = sqrt(Bsi*Bsj)*param->romigb;
   else bigB = 0.0;
 
   return -bigB * exp(-param->rlm2 * r) * comb_fc(r,param);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairComb::comb_fa_d(double r, Param *param, double iq, double jq)
 {
   double bigB,Bsi,Bsj;
   double qi,qj,Di,Dj;
 
   if (r > param->bigr + param->bigd) return 0.0;
   qi = iq; qj = jq;
   Di = Dj = Bsi = Bsj = bigB = 0.0;
   Di = param->DU1 + pow(fabs(param->bD1*(param->QU1-qi)),param->nD1);
   Dj = param->DU2 + pow(fabs(param->bD2*(param->QU2-qj)),param->nD2);
   Bsi = param->bigb1 * exp(param->lam21*Di)*
        (param->aB1-fabs(pow(param->bB1*(qi-param->Qo1),10.0)));
   Bsj = param->bigb2 * exp(param->lam22*Dj)*
        (param->aB2-fabs(pow(param->bB2*(qj-param->Qo2),10.0)));
   if (Bsi > 0.0 && Bsj > 0.0) bigB = sqrt(Bsi*Bsj)*param->romigb;
   else bigB = 0.0;
 
   return bigB * exp(-param->rlm2 * r) *
     (param->rlm2 * comb_fc(r,param) - comb_fc_d(r,param));
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairComb::comb_bij(double zeta, Param *param)
 {
   double tmp = param->beta * zeta;
   if (tmp > param->c1) return 1.0/sqrt(tmp);
   if (tmp > param->c2)
     return (1.0 - pow(tmp,-1.0*param->powern) / (2.0*param->powern))/sqrt(tmp);
   if (tmp < param->c4) return 1.0;
   if (tmp < param->c3)
     return 1.0 - pow(tmp,param->powern)/(2.0*param->powern);
   return pow(1.0 + pow(tmp,param->powern), -1.0/(2.0*param->powern));
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairComb::comb_bij_d(double zeta, Param *param)
 {
   double tmp = param->beta * zeta;
   if (tmp > param->c1) return param->beta * -0.5*pow(tmp,-1.5);
   if (tmp > param->c2)
     return param->beta * (-0.5*pow(tmp,-1.5) *
                           (1.0 - 0.5*(1.0 +  1.0/(2.0*param->powern)) *
                            pow(tmp,-param->powern)));
   if (tmp < param->c4) return 0.0;
   if (tmp < param->c3)
     return -0.5*param->beta * pow(tmp,param->powern-1.0);
 
   double tmp_n = pow(tmp,param->powern);
   return -0.5 * pow(1.0+tmp_n, -1.0-(1.0/(2.0*param->powern)))*tmp_n / zeta;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairComb::attractive(Param *param, double prefactor,
                           double rsqij, double rsqik,
                           double *delrij, double *delrik,
                           double *fi, double *fj, double *fk)
 {
   double rij_hat[3],rik_hat[3];
   double rij,rijinv,rik,rikinv;
 
   rij = sqrt(rsqij);
   rijinv = 1.0/rij;
   vec3_scale(rijinv,delrij,rij_hat);
 
   rik = sqrt(rsqik);
   rikinv = 1.0/rik;
   vec3_scale(rikinv,delrik,rik_hat);
 
   comb_zetaterm_d(prefactor,rij_hat,rij,rik_hat,rik,fi,fj,fk,param);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairComb::comb_zetaterm_d(double prefactor, double *rij_hat, double rij,
                                double *rik_hat, double rik, double *dri,
                                double *drj, double *drk, Param *param)
 {
   double gijk,gijk_d,ex_delr,ex_delr_d,fc,dfc,cos_theta,tmp;
   double dcosdri[3],dcosdrj[3],dcosdrk[3];
 
   fc = comb_fc(rik,param);
   dfc = comb_fc_d(rik,param);
   if (param->powermint == 3) tmp = pow(param->rlm2 * (rij-rik),3.0);
   else tmp = param->rlm2 * (rij-rik);
 
   if (tmp > 69.0776) ex_delr = 1.e30;
   else if (tmp < -69.0776) ex_delr = 0.0;
   else ex_delr = exp(tmp); // ex_delr is Ygexp
 
   if (param->powermint == 3)
     ex_delr_d = 3.0*pow(param->rlm2,3.0) * pow(rij-rik,2.0)*ex_delr; // com3
   else ex_delr_d = param->rlm2 * ex_delr; // com3
 
   cos_theta = vec3_dot(rij_hat,rik_hat);
   gijk = comb_gijk(cos_theta,param);
   gijk_d = comb_gijk_d(cos_theta,param);
   costheta_d(rij_hat,rij,rik_hat,rik,dcosdri,dcosdrj,dcosdrk);
 
   // compute the derivative wrt Ri
   // dri = -dfc*gijk*ex_delr*rik_hat;
   // dri += fc*gijk_d*ex_delr*dcosdri;
   // dri += fc*gijk*ex_delr_d*(rik_hat - rij_hat);
   // (k,x[],y[]), y[]=k*x[]
   // (k,x[],y[],z[]), z[]=k*x[]+y[]
 
   vec3_scale(-dfc*gijk*ex_delr,rik_hat,dri);
   vec3_scaleadd(fc*gijk_d*ex_delr,dcosdri,dri,dri);
   vec3_scaleadd(fc*gijk*ex_delr_d,rik_hat,dri,dri);
   vec3_scaleadd(-fc*gijk*ex_delr_d,rij_hat,dri,dri);
   vec3_scale(prefactor,dri,dri);
 
   // compute the derivative wrt Rj
   // drj = fc*gijk_d*ex_delr*dcosdrj;
   // drj += fc*gijk*ex_delr_d*rij_hat;
 
   vec3_scale(fc*gijk_d*ex_delr,dcosdrj,drj);
   vec3_scaleadd(fc*gijk*ex_delr_d,rij_hat,drj,drj);
   vec3_scale(prefactor,drj,drj);
 
   // compute the derivative wrt Rk
   // drk = dfc*gijk*ex_delr*rik_hat;
   // drk += fc*gijk_d*ex_delr*dcosdrk;
   // drk += -fc*gijk*ex_delr_d*rik_hat;
 
   vec3_scale(dfc*gijk*ex_delr,rik_hat,drk);
   vec3_scaleadd(fc*gijk_d*ex_delr,dcosdrk,drk,drk);
   vec3_scaleadd(-fc*gijk*ex_delr_d,rik_hat,drk,drk);
   vec3_scale(prefactor,drk,drk);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairComb::costheta_d(double *rij_hat, double rij,
                              double *rik_hat, double rik,
                              double *dri, double *drj, double *drk)
 {
   // first element is devative wrt Ri, second wrt Rj, third wrt Rk
 
   double cos_theta = vec3_dot(rij_hat,rik_hat);
 
   vec3_scaleadd(-cos_theta,rij_hat,rik_hat,drj);
   vec3_scale(1.0/rij,drj,drj);
   vec3_scaleadd(-cos_theta,rik_hat,rij_hat,drk);
   vec3_scale(1.0/rik,drk,drk);
   vec3_add(drj,drk,dri);
   vec3_scale(-1.0,dri,dri);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairComb::sm_table()
 {
   int i,j,k,m,nntypes,ncoul;
   int inty, itype, jtype;
   int iparam_i, iparam_ij, iparam_ji;
   double r,dra,drin,rc,z,zr,zrc,ea,eb,ea3,eb3,alf;
   double exp2er,exp2ersh,fafash,dfafash,F1,dF1,ddF1,E1,E2,E3,E4;
   double exp2ear,exp2ebr,exp2earsh,exp2ebrsh,fafbsh,dfafbsh;
 
   int n = atom->ntypes;
   int nmax = atom->nmax;
 
   dra  = 0.001;  // lookup table step size
   drin = 0.1;    // starting distance of 1/r
   rc = cutmax;
   alf = 0.20;
 
   nntypes = int((n+1)*n/2); // interaction types
   ncoul = int((rc-drin)/dra)+1;
 
   // allocate arrays
 
   memory->create(intype,n,n,"pair:intype");
   memory->create(fafb,ncoul,nntypes,"pair:fafb");
   memory->create(dfafb,ncoul,nntypes,"pair:dfafb");
   memory->create(ddfafb,ncoul,nntypes,"pair:ddfafb");
   memory->create(phin,ncoul,nntypes,"pair:phin");
   memory->create(dphin,ncoul,nntypes,"pair:dphin");
   memory->create(erpaw,25000,2,"pair:erpaw");
   memory->create(NCo,nmax,"pair:NCo");
   memory->create(bbij,nmax,MAXNEIGH,"pair:bbij");
   memory->create(sht_num,nmax,"pair:sht_num");
   sht_first = (int **) memory->smalloc(nmax*sizeof(int *),"pair:sht_first");
 
   // set interaction number: 0-0=0, 1-1=1, 0-1=1-0=2
 
   m = 0; k = n;
   for (i = 0; i < n; i++) {
     for (j = 0; j < n; j++) {
       if (j == i) {
         intype[i][j] = m;
         m += 1;
       } else if (j != i && j > i) {
         intype[i][j] = k;
         k += 1;
       } else if (j != i && j < i) {
         intype[i][j] = intype[j][i];
       }
     }
   }
 
   // default arrays to zero
 
   for (i = 0; i < ncoul; i ++) {
     for (j = 0; j < nntypes; j ++) {
       fafb[i][j] = 0.0;
       dfafb[i][j] = 0.0;
       ddfafb[i][j] = 0.0;
       phin[i][j] = 0.0;
       dphin[i][j] = 0.0;
     }
   }
 
   // direct 1/r energy with Slater 1S orbital overlap
 
   for (i = 0; i < n; i++) {
     r = drin;
     itype = params[i].ielement;
     iparam_i = elem2param[itype][itype][itype];
     z = params[iparam_i].esm1;
     for (j = 0; j < ncoul; j++) {
       exp2er = exp(-2.0 * z * r);
       phin[j][i] = 1.0 - exp2er * (1.0 + 2.0 * z * r * (1.0 + z * r));
       dphin[j][i] = (4.0 * exp2er * z * z * z * r * r);
       r += dra;
     }
   }
 
   for (i = 0; i < n; i ++) {
     for (j = 0; j < n; j ++) {
       r = drin;
       if (j == i) {
         itype = params[i].ielement;
         inty = intype[itype][itype];
         iparam_i = elem2param[itype][itype][itype];
         z = params[iparam_i].esm1;
         zrc = z * rc;
         exp2ersh = exp(-2.0 * zrc);
         fafash = -exp2ersh * (1.0 / rc +
                               z * (11.0/8.0 + 3.0/4.0*zrc + zrc*zrc/6.0));
         dfafash = exp2ersh * (1.0/(rc*rc) + 2.0*z/rc +
                               z*z*(2.0 + 7.0/6.0*zrc + zrc*zrc/3.0));
         for (k = 0; k < ncoul; k ++) {
           zr = z * r;
           exp2er = exp(-2.0*zr);
           F1 = -exp2er * (1.0 / r +
                           z * (11.0/8.0 + 3.0/4.0*zr + zr*zr/6.0));
           dF1 = exp2er * (1.0/(r*r) + 2.0*z/r +
                           z*z*(2.0 + 7.0/6.0*zr + zr*zr/3.0));
           ddF1 = -exp2er * (2.0/(r*r*r) + 4.0*z/(r*r) -
                             z*z*z/3.0*(17.0/2.0 + 5.0*zr + 2.0*zr*zr));
           fafb[k][inty] = F1-fafash-(r-rc)*dfafash;
           dfafb[k][inty] = (dF1 - dfafash);
           ddfafb[k][inty] = ddF1;
           r += dra;
         }
       } else if (j != i) {
         itype = params[i].ielement;
         jtype = params[j].ielement;
         inty = intype[itype][jtype];
         iparam_ij = elem2param[itype][jtype][jtype];
         ea = params[iparam_ij].esm1;
         ea3 = ea*ea*ea;
         iparam_ji = elem2param[jtype][itype][itype];
         eb = params[iparam_ji].esm1;
         eb3 = eb*eb*eb;
         E1 = ea*eb3*eb/((ea+eb)*(ea+eb)*(ea-eb)*(ea-eb));
         E2 = eb*ea3*ea/((ea+eb)*(ea+eb)*(eb-ea)*(eb-ea));
         E3 = (3.0*ea*ea*eb3*eb-eb3*eb3) /
           ((ea+eb)*(ea+eb)*(ea+eb)*(ea-eb)*(ea-eb)*(ea-eb));
         E4 = (3.0*eb*eb*ea3*ea-ea3*ea3) /
           ((ea+eb)*(ea+eb)*(ea+eb)*(eb-ea)*(eb-ea)*(eb-ea));
         exp2earsh = exp(-2.0*ea*rc);
         exp2ebrsh = exp(-2.0*eb*rc);
         fafbsh = -exp2earsh*(E1 + E3/rc)-exp2ebrsh*(E2 + E4/rc);
         dfafbsh =
           exp2earsh*(2.0*ea*(E1+E3/rc)+E3/(rc*rc)) +
           exp2ebrsh*(2.0*eb*(E2+E4/rc)+E4/(rc*rc));
         for (k = 0; k < ncoul; k ++) {
           exp2ear = exp(-2.0*ea*r);
           exp2ebr = exp(-2.0*eb*r);
           fafb[k][inty] =
             - exp2ear*(E1+E3/r) - exp2ebr*(E2+E4/r)
             - fafbsh - (r-rc) * dfafbsh;
           dfafb[k][inty] = (exp2ear*(2.0*ea*(E1+E3/r) + E3/(r*r))
                             + exp2ebr*(2.0*eb*(E2+E4/r) + E4/(r*r))- dfafbsh);
           ddfafb[k][inty] = (- exp2ear*(E3/(r*r)*(1.0/r+2.0*ea/r+2.0/(r*r))
                                         + 2.0*ea*(E1+E3/r))-
                              exp2ebr*(E4/(r*r)
                                       *(1.0/r+2.0*eb/r+2.0/(r*r)) +
                                       2.0*eb*(E2+E4/r)));
           r += dra;
         }
       }
     }
   }
 
   for (i = 0; i < 25000; i ++) {
     r = dra * i + drin;
     erpaw[i][0] = erfc(r*alf);
     erpaw[i][1] = exp(-r*r*alf*alf);
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairComb::potal_calc(double &calc1, double &calc2, double &calc3)
 {
   double alf,rcoul,esucon;
   int m;
 
   rcoul = 0.0;
   for (m = 0; m < nparams; m++)
     if (params[m].lcut > rcoul) rcoul = params[m].lcut;
 
   alf = 0.20;
   esucon = force->qqr2e;
 
   calc2 = (erfc(rcoul*alf)/rcoul/rcoul+2.0*alf/MY_PIS*
            exp(-alf*alf*rcoul*rcoul)/rcoul)*esucon/rcoul;
   calc3 = (erfc(rcoul*alf)/rcoul)*esucon;
   calc1 = -(alf/MY_PIS*esucon+calc3*0.5);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairComb::tri_point(double rsq, int &mr1, int &mr2,
                          int &mr3, double &sr1, double &sr2,
                          double &sr3, int &itype)
 {
  double r, rin, dr, dd, rr1, rridr, rridr2;
 
  rin = 0.10; dr = 0.0010;
  r = sqrt(rsq);
  if (r < rin + 2.0*dr) r = rin + 2.0*dr;
  if (r > cutmax - 2.0*dr) r = cutmax - 2.0*dr;
  rridr = (r-rin)/dr;
 
  mr1 = int(rridr)-1;
  dd = rridr - float(mr1);
  if (dd > 0.5) mr1 += 1;
  mr2 = mr1 + 1;
  mr3 = mr2 + 1;
 
  rr1 = float(mr1)*dr;
  rridr = (r - rin - rr1)/dr;
  rridr2 = rridr * rridr;
 
  sr1 = (rridr2 - rridr) * 0.50;
  sr2 = 1.0 - rridr2;
  sr3 = (rridr2 + rridr) * 0.50;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairComb::direct(int inty, int mr1, int mr2, int mr3, double rsq,
                       double sr1, double sr2, double sr3,
                       double iq, double jq,
                       double potal, double fac11, double fac11e,
                       double &pot_tmp, double &pot_d)
 {
  double r,erfcc,fafbn1,potij,sme2,esucon;
  double r3,erfcd,dfafbn1,smf2,dvdrr,alf,alfdpi;
 
  r = sqrt(rsq);
  r3 = r * rsq;
  alf = 0.20;
  alfdpi = 2.0*alf/MY_PIS;
  esucon = force->qqr2e;
  pot_tmp = 0.0;
  pot_d = 0.0;
 
  // 1/r energy
 
  erfcc = sr1*erpaw[mr1][0] + sr2*erpaw[mr2][0] + sr3*erpaw[mr3][0];
  fafbn1= sr1*fafb[mr1][inty] + sr2*fafb[mr2][inty] + sr3*fafb[mr3][inty];
  potij = (erfcc/r * esucon - fac11e);
  sme2 = potij + fafbn1 * esucon;
  pot_tmp = 1.0 * iq * jq *sme2;
 
  // 1/r force (wrt r)
 
  erfcd = sr1*erpaw[mr1][1] + sr2*erpaw[mr2][1] + sr3*erpaw[mr3][1];
  dfafbn1= sr1*dfafb[mr1][inty] + sr2*dfafb[mr2][inty] + sr3*dfafb[mr3][inty];
  dvdrr = (erfcc/r3+alfdpi*erfcd/rsq)*esucon-fac11;
  smf2 = dvdrr - dfafbn1 * esucon/r;
  pot_d =  1.0 * iq * jq * smf2;
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairComb::field(Param *param, double rsq, double iq,double jq,
                      double &vionij,double &fvionij)
 {
  double r,r5,r6,rc,rc5,rc6,rf5,drf6,smpn,smpl,rfx1,rfx2;
  double cmi1,cmi2,cmj1,cmj2;
 
  r = sqrt(rsq);
  r5 = r*r*r*r*r;
  r6 = r5 * r;
  rc = param->lcut;
  rc5 = rc*rc*rc*rc*rc;
  rc6 = rc5 * rc;
  cmi1 = param->cmn1;
  cmi2 = param->cmn2;
  cmj1 = param->cml1;
  cmj2 = param->cml2;
  rf5 = 1.0/r5 - 1.0/rc5 + 5.0*(r-rc)/rc6;
  drf6 = 5.0/rc6 - 5.0/r6;
 
  // field correction energy
 
  smpn = rf5*jq*(cmi1+jq*cmi2);
  smpl = rf5*iq*(cmj1+iq*cmj2);
  vionij += 1.0 * (smpn + smpl);
 
  // field correction force
 
  rfx1 = jq*drf6*(cmi1+jq*cmi2)/r;
  rfx2 = iq*drf6*(cmj1+iq*cmj2)/r;
  fvionij -= 1.0 * (rfx1 + rfx2);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairComb::yasu_char(double *qf_fix, int &igroup)
 {
   int i,j,ii,jj,jnum;
   int itype,jtype,iparam_i,iparam_ij;
   tagint itag,jtag;
   double xtmp,ytmp,ztmp;
   double rsq1,delr1[3];
   int *ilist,*jlist,*numneigh,**firstneigh;
   double iq,jq,fqi,fqij,fqjj;
   double potal,fac11,fac11e,sr1,sr2,sr3;
   int mr1,mr2,mr3,inty,nj;
 
 
   double **x = atom->x;
   double *q = atom->q;
   int *type = atom->type;
   tagint *tag = atom->tag;
 
   int inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   int *mask = atom->mask;
   int groupbit = group->bitmask[igroup];
 
   qf = qf_fix;
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     if (mask[i] & groupbit)
       qf[i] = 0.0;
   }
 
   // communicating charge force to all nodes, first forward then reverse
 
   comm->forward_comm_pair(this);
 
   // self energy correction term: potal
 
   potal_calc(potal,fac11,fac11e);
 
   // loop over full neighbor list of my atoms
 
   fqi = fqij = fqjj = 0.0;
 
   for (ii = 0; ii < inum; ii ++) {
     i = ilist[ii];
     itag = tag[i];
     nj = 0;
     if (mask[i] & groupbit) {
       itype = map[type[i]];
       xtmp = x[i][0];
       ytmp = x[i][1];
       ztmp = x[i][2];
       iq = q[i];
       iparam_i = elem2param[itype][itype][itype];
 
       // charge force from self energy
 
       fqi = qfo_self(&params[iparam_i],iq,potal);
 
       // two-body interactions
 
       jlist = firstneigh[i];
       jnum = numneigh[i];
 
       for (jj = 0; jj < jnum; jj++) {
         j = jlist[jj];
         j &= NEIGHMASK;
         jtag = tag[j];
 
         if (itag > jtag) {
           if ((itag+jtag) % 2 == 0) continue;
         } else if (itag < jtag) {
           if ((itag+jtag) % 2 == 1) continue;
         } else {
           if (x[j][2] < x[i][2]) continue;
           if (x[j][2] == ztmp && x[j][1] < ytmp) continue;
           if (x[j][2] == ztmp && x[j][1] == ytmp && x[j][0] < xtmp) continue;
         }
 
         jtype = map[type[j]];
         jq = q[j];
 
         delr1[0] = x[j][0] - xtmp;
         delr1[1] = x[j][1] - ytmp;
         delr1[2] = x[j][2] - ztmp;
         rsq1 = vec3_dot(delr1,delr1);
 
         iparam_ij = elem2param[itype][jtype][jtype];
 
         // long range q-dependent
 
         if (rsq1 > params[iparam_ij].lcutsq) continue;
 
         inty = intype[itype][jtype];
 
         // polynomial three-point interpolation
 
         tri_point(rsq1,mr1,mr2,mr3,sr1,sr2,sr3,itype);
 
         // 1/r charge forces
 
         qfo_direct(inty,mr1,mr2,mr3,rsq1,sr1,sr2,sr3,fac11e,fqij);
         fqi += jq * fqij;  qf[j] += iq * fqij;
 
         // field correction to self energy and charge force
 
         qfo_field(&params[iparam_ij],rsq1,iq,jq,fqij,fqjj);
         fqi += fqij;
         qf[j] += fqjj;
       }
 
         // three-body interactions
 
       for (jj = 0; jj < jnum; jj++) {
         j = jlist[jj];
         j &= NEIGHMASK;
         jtype = map[type[j]];
         jq = q[j];
 
         delr1[0] = x[j][0] - xtmp;
         delr1[1] = x[j][1] - ytmp;
         delr1[2] = x[j][2] - ztmp;
         rsq1 = vec3_dot(delr1,delr1);
 
         iparam_ij = elem2param[itype][jtype][jtype];
 
         if (rsq1 > params[iparam_ij].cutsq) continue;
         nj ++;
 
         // charge force in Aij and Bij
 
         qfo_short(&params[iparam_ij],i,nj,rsq1,iq,jq,fqij,fqjj);
         fqi += fqij;  qf[j] += fqjj;
       }
       qf[i] += fqi;
     }
   }
 
   comm->reverse_comm_pair(this);
 
   // sum charge force on each node and return it
 
   double eneg = 0.0;
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     if (mask[i] & groupbit)
       eneg += qf[i];
   }
   double enegtot;
   MPI_Allreduce(&eneg,&enegtot,1,MPI_DOUBLE,MPI_SUM,world);
   return enegtot;
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairComb::qfo_self(Param *param, double qi, double selfpot)
 {
  double self_d,cmin,cmax,qmin,qmax;
  double s1 = param->chi;
  double s2 = param->dj;
  double s3 = param->dk;
  double s4 = param->dl;
  double s5 = param->dm;
 
  self_d = 0.0;
  qmin = param->QL1*0.90;
  qmax = param->QU1*0.90;
  cmin = cmax = 1000.0;
 
  self_d = s1+qi*(2.0*(s2+selfpot)+qi*(3.0*s3+qi*(4.0*s4+qi*qi*6.0*s5)));
 
  if (qi < qmin) {
    // char str[128];
    // sprintf(str,"Pair COMB charge %.10f with force %.10f hit min barrier",
    // qi,self_d);
    // error->warning(FLERR,str,0);
    self_d += 4.0 * cmin * pow((qi-qmin),3.0);
  }
  if (qi > qmax) {
    // char str[128];
    // sprintf(str,"Pair COMB charge %.10f with force %.10f hit max barrier",
    //           qi,self_d);
    // error->warning(FLERR,str,0);
    self_d += 4.0 * cmax * pow((qi-qmax),3.0);
  }
 
  return self_d;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairComb::qfo_direct(int inty, int mr1, int mr2, int mr3,
                           double rsq, double sr1, double sr2,
                           double sr3, double fac11e, double &fqij)
 {
  double r, erfcc, fafbn1, vm, esucon;
 
  r = sqrt(rsq);
  esucon=force->qqr2e;
 
  // 1/r force (wrt q)
 
  erfcc = sr1*erpaw[mr1][0]   + sr2*erpaw[mr2][0]   + sr3*erpaw[mr3][0];
  fafbn1= sr1*fafb[mr1][inty] + sr2*fafb[mr2][inty] + sr3*fafb[mr3][inty];
  vm = (erfcc/r * esucon - fac11e);
  fqij = 1.0 * (vm+esucon*fafbn1);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairComb::qfo_field(Param *param, double rsq,double iq,double jq,
                          double &fqij, double &fqjj)
 {
  double r,r5,rc,rc5,rc6;
  double cmi1,cmi2,cmj1,cmj2,rf5;
 
  fqij = fqjj = 0.0;
  r  = sqrt(rsq);
  r5 = r*r*r*r*r;
  rc = param->lcut;
  rc5 = rc*rc*rc*rc*rc;
  rc6 = rc5 * rc;
  cmi1 = param->cmn1;
  cmi2 = param->cmn2;
  cmj1 = param->cml1;
  cmj2 = param->cml2;
  rf5 = 1.0/r5 - 1.0/rc5 + 5.0*(r-rc)/rc6;
 
  // field correction charge force
 
  fqij = 1.0 * rf5 * (cmj1 + 2.0 * iq * cmj2);
  fqjj = 1.0 * rf5 * (cmi1 + 2.0 * jq * cmi2);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairComb::qfo_short(Param *param, int i, int j, double rsq,
                          double iq, double jq, double &fqij, double &fqjj)
 {
   double r,tmp_fc,tmp_exp1,tmp_exp2;
   double Asi,Asj,vrcs;
   double romi = param->addrep,rrcs = param->bigr + param->bigd;
   double qi,qj,Di,Dj,Bsi,Bsj;
   double QUchi,QOchi,QUchj,QOchj,YYDiqp,YYDjqp;
   double YYAsiqp,YYAsjqp,YYBsiqp,YYBsjqp;
   double caj,cbj,bij,cfqr,cfqs;
   double romie = param->romiga;
   double romib = param->romigb;
   double ca1,ca2,ca3,ca4;
   double rslp,rslp2,rslp4,arr1,arr2,fc2j,fc3j;
 
   qi = iq; qj = jq; r = sqrt(rsq);
   Di = Dj = Asi = Asj = Bsi = Bsj = 0.0;
   QUchi = QOchi = QUchj = QOchj = YYDiqp = YYDjqp =0.0;
   YYAsiqp = YYAsjqp = YYBsiqp = YYBsjqp = 0.0;
   caj = cbj = vrcs = cfqr = cfqs = 0.0;
 
   tmp_fc = comb_fc(r,param);
   tmp_exp1 = exp(-param->rlm1 * r);
   tmp_exp2 = exp(-param->rlm2 * r);
   bij = bbij[i][j]; //comb_bij(zeta_ij,param);
 
   arr1 = 2.22850; arr2 = 1.89350;
   fc2j = comb_fc2(r);
   fc3j = comb_fc3(r);
 
   vrcs = 0.0;
   if (romi > 0.0) {
     if (!cor_flag) vrcs = romi * pow((1.0-r/rrcs),2.0);
     else if (cor_flag) {
       rslp = ((arr1-r)/(arr1-arr2));
       rslp2 = rslp * rslp; rslp4 = rslp2 * rslp2;
       vrcs = fc2j * fc3j * romi * ((50.0*rslp4-30.0*rslp2+4.50))/8.0;
     }
   }
 
   Di = param->DU1 + pow(fabs(param->bD1*(param->QU1-qi)),param->nD1);
   Dj = param->DU2 + pow(fabs(param->bD2*(param->QU2-qj)),param->nD2);
 
   Asi = param->biga1 * exp(param->lam11*Di);
   Asj = param->biga2 * exp(param->lam12*Dj);
   Bsi = param->bigb1 * exp(param->lam21*Di)*
     (param->aB1-fabs(pow(param->bB1*(qi-param->Qo1),10.0)));
   Bsj = param->bigb2 * exp(param->lam22*Dj)*
     (param->aB2-fabs(pow(param->bB2*(qj-param->Qo2),10.0)));
 
   QUchi = (param->QU1-qi)*param->bD1;
   QUchj = (param->QU2-qj)*param->bD2;
   QOchi = (qi-param->Qo1)*param->bB1;
   QOchj = (qj-param->Qo2)*param->bB2;
 
   if (QUchi == 0.0) YYDiqp = 0.0;
   else YYDiqp = -param->nD1 * QUchi * param->bD1 *
          pow(fabs(QUchi),(param->nD1-2.0));
 
   if (QUchj == 0.0) YYDjqp = 0.0;
   else YYDjqp = -param->nD2 * QUchj * param->bD2 *
          pow(fabs(QUchj),(param->nD2-2.0));
 
   YYAsiqp = Asi * param->lam11 * YYDiqp;
   YYAsjqp = Asj * param->lam12 * YYDjqp;
 
   if (QOchi == 0.0)
     YYBsiqp=Bsi*param->lam21*YYDiqp;
   else
     YYBsiqp=Bsi*param->lam21*YYDiqp-param->bigb1*exp(param->lam21*Di)*
       10.0*QOchi*param->bB1*pow(fabs(QOchi),(10.0-2.0));
 
   if (QOchj == 0.0)
     YYBsjqp=Bsj*param->lam22*YYDjqp;
   else
     YYBsjqp=Bsj*param->lam22*YYDjqp-param->bigb2*exp(param->lam22*Dj)*
       10.0*QOchj*param->bB2*pow(fabs(QOchj),(10.0-2.0));
 
   if (Asi > 0.0 && Asj > 0.0) caj = 1.0/(2.0*sqrt(Asi*Asj)) * romie;
   else caj = 0.0;
 
   if (Bsi > 0.0 && Bsj > 0.0) cbj = 1.0/(2.0*sqrt(Bsi*Bsj)) * romib ;
   else cbj = 0.0;
 
   cfqr =  0.50 * tmp_fc * (1.0 + vrcs); // 0.5 b/c full atom loop
   cfqs = -0.50 * tmp_fc *  bij;
 
   ca1 = Asj * caj * YYAsiqp;
   ca2 = Bsj * cbj * YYBsiqp;
   ca3 = Asi * caj * YYAsjqp;
   ca4 = Bsi * cbj * YYBsjqp;
 
   fqij  = cfqr * tmp_exp1 * ca1;
   fqij += cfqs * tmp_exp2 * ca2;
   fqjj  = cfqr * tmp_exp1 * ca3;
   fqjj += cfqs * tmp_exp2 * ca4;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairComb::Over_cor(Param *param, double rsq1, int NCoi,
                         double &Eov, double &Fov)
 {
   double ECo,BCo,tmp_fc,tmp_fc_d;
   double r = sqrt(rsq1);
   int NCon = NCoi - 7;
 
   tmp_fc = comb_fc(r,param);
   tmp_fc_d = comb_fc(r,param);
   Eov = 0.0; Fov = 0.0;
   ECo = param->hfocor;
   BCo = 0.1;
 
   if (NCon >= 0.20) {
     Eov = tmp_fc * ECo * NCon/(1.0+exp(BCo*NCon));
     Fov = -(tmp_fc_d*Eov + tmp_fc*ECo/(1.0+exp(BCo*NCon)) -
             (tmp_fc*ECo*NCon*BCo*exp(BCo*NCon)) /
             ((1.0+exp(BCo*NCon))*(1.0+exp(BCo*NCon))));
     Fov /= r;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 int PairComb::pack_forward_comm(int n, int *list, double *buf, 
                                 int pbc_flag, int *pbc)
 {
   int i,j,m;
 
   m = 0;
   for (i = 0; i < n; i ++) {
     j = list[i];
     buf[m++] = qf[j];
   }
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairComb::unpack_forward_comm(int n, int first, double *buf)
 {
   int i,m,last;
 
   m = 0;
   last = first + n ;
   for (i = first; i < last; i++) qf[i] = buf[m++];
 }
 
 /* ---------------------------------------------------------------------- */
 
 int PairComb::pack_reverse_comm(int n, int first, double *buf)
 {
   int i,m,last;
 
   m = 0;
   last = first + n;
   for (i = first; i < last; i++) buf[m++] = qf[i];
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairComb::unpack_reverse_comm(int n, int *list, double *buf)
 {
   int i,j,m;
 
   m = 0;
   for (i = 0; i < n; i++) {
     j = list[i];
     qf[j] += buf[m++];
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairComb::Short_neigh()
 {
   int nj;
   int inum,jnum,i,j,ii,jj;
   int *neighptrj,*ilist,*jlist,*numneigh;
   int **firstneigh;
   double xtmp,ytmp,ztmp,rsq,delrj[3];
 
   double **x = atom->x;
 
   if (atom->nmax > nmax) {
     memory->sfree(sht_first);
     nmax = atom->nmax;
     sht_first = (int **) memory->smalloc(nmax*sizeof(int *),
                                          "pair:sht_first");
     memory->grow(sht_num,nmax,"pair:sht_num");
     memory->grow(NCo,nmax,"pair:NCo");
     memory->grow(bbij,nmax,MAXNEIGH,"pair:bbij");
   }
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // create Comb neighbor list
 
   ipage->reset();
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
 
     nj = 0;
     neighptrj = ipage->vget();
 
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
 
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       j &= NEIGHMASK;
 
       delrj[0] = xtmp - x[j][0];
       delrj[1] = ytmp - x[j][1];
       delrj[2] = ztmp - x[j][2];
       rsq = vec3_dot(delrj,delrj);
 
       if (rsq > cutmin) continue;
       neighptrj[nj++] = j;
     }
 
     sht_first[i] = neighptrj;
     sht_num[i] = nj;
     ipage->vgot(nj);
     if (ipage->status())
       error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
   }
 }
 
 /* ----------------------------------------------------------------------
    memory usage of local atom-based arrays
 ------------------------------------------------------------------------- */
 
 double PairComb::memory_usage()
 {
   double bytes = maxeatom * sizeof(double);
   bytes += maxvatom*6 * sizeof(double);
   bytes += nmax * sizeof(int);
   bytes += nmax * sizeof(int *);
 
   for (int i = 0; i < comm->nthreads; i++)
     bytes += ipage[i].size();
 
   bytes += nmax * sizeof(int);
   bytes += MAXNEIGH*nmax * sizeof(double);
   return bytes;
 }
diff --git a/src/MANYBODY/pair_comb3.cpp b/src/MANYBODY/pair_comb3.cpp
index 2cc5afe57..8244df3bc 100644
--- a/src/MANYBODY/pair_comb3.cpp
+++ b/src/MANYBODY/pair_comb3.cpp
@@ -1,3947 +1,3947 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under 
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Ray Shan (Sandia, tnshan@sandia.gov)
    Updates and debug: Tao Liang (U Florida, liang75@ufl.edu)
                       Dundar Yilmaz (dundar.yilmaz@zirve.edu.tr)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_comb3.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "group.h"
 #include "update.h"
 #include "math_const.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
 #define MAXLINE 1024
 #define DELTA 4
 #define PGDELTA 1
 #define MAXNEIGH 24
 
 /* ---------------------------------------------------------------------- */
 
 PairComb3::PairComb3(LAMMPS *lmp) : Pair(lmp)
 {
   single_enable = 0;
   restartinfo = 0;
   one_coeff = 1;
   ghostneigh = 1;
 
   nmax = 0;
   NCo = NULL;
   bbij = NULL;
   
   nelements = 0;
   elements = NULL;
   nparams = 0;
   maxparam = 0;
   params = NULL;
   elem2param = NULL;
 
   intype = NULL;
   afb = NULL;
   dafb = NULL;
   fafb = NULL;
   dfafb = NULL;
   ddfafb = NULL;
   phin = NULL;
   dphin = NULL;
   erpaw = NULL;
   vvdw = NULL;
   vdvdw = NULL;
   dpl = NULL;
   xcctmp = NULL;
   xchtmp = NULL;
   xcotmp = NULL;
 
   sht_num = NULL;
   sht_first = NULL;
 
   ipage = NULL;
   pgsize = oneatom = 0;
 
   cflag = 0;
 
   // set comm size needed by this Pair
   comm_forward = 1;
   comm_reverse = 1;
 }
 
 /* ----------------------------------------------------------------------
    check if allocated, since class can be destructed when incomplete
 ------------------------------------------------------------------------- */
 
 PairComb3::~PairComb3()
 {
   memory->destroy(NCo);
 
   if (elements)
     for (int i = 0; i < nelements; i++) delete [] elements[i];
 
   delete [] elements;
   memory->sfree(params);
   memory->destroy(elem2param);
 
   memory->destroy(afb);
   memory->destroy(dpl);
   memory->destroy(dafb);
   memory->destroy(fafb);
   memory->destroy(phin);
   memory->destroy(bbij);
   memory->destroy(vvdw);
   memory->destroy(vdvdw);
   memory->destroy(dphin);
   memory->destroy(erpaw);
   memory->destroy(dfafb);
   memory->destroy(ddfafb);
   memory->destroy(xcctmp);
   memory->destroy(xchtmp);
   memory->destroy(xcotmp);
   memory->destroy(intype);
   memory->destroy(sht_num);
   memory->sfree(sht_first);
 
   delete [] ipage;
 
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
     memory->destroy(cutghost);
     delete [] map;
     delete [] esm;
   }
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairComb3::allocate()
 {
  allocated = 1;
  int n = atom->ntypes;
 
  memory->create(setflag,n+1,n+1,"pair:setflag");
  memory->create(cutsq,n+1,n+1,"pair:cutsq");
  memory->create(cutghost,n+1,n+1,"pair:cutghost");
 
  map = new int[n+1];
  esm = new double[n]; 
 }
 
 /* ----------------------------------------------------------------------
    global settings 
 ------------------------------------------------------------------------- */
 
 void PairComb3::settings(int narg, char **arg)
 {
 
   if (strcmp(arg[0],"polar_on") == 0) {
     pol_flag = 1;
     if (comm->me == 0) fprintf(screen,
 		    "	PairComb3: polarization is on \n");
   } else if (strcmp(arg[0],"polar_off") == 0) {
     pol_flag = 0;
     if (comm->me == 0) fprintf(screen,
 		    "	PairComb3: polarization is off \n");
   } else {
     error->all(FLERR,"Illegal pair_style command");
   }
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairComb3::coeff(int narg, char **arg)
 {
   int i,j,n;
 
   if (!allocated) allocate();
 
   if (narg != 3 + atom->ntypes)
     error->all(FLERR,"Incorrect args for pair coefficients");
 
   // insure I,J args are * *
   if (strcmp(arg[0],"*") != 0 || strcmp(arg[1],"*") != 0)
     error->all(FLERR,"Incorrect args for pair coefficients");
 
   // read args that map atom types to elements in potential file
   // map[i] = which element the Ith atom type is, -1 if NULL
   // nelements = # of unique elements
   // elements = list of element names
 
   if (elements) {
     for (i = 0; i < nelements; i++) delete [] elements[i];
     delete [] elements;
   }
   elements = new char*[atom->ntypes];
   for (i = 0; i < atom->ntypes; i++) elements[i] = NULL;
 
   nelements = 0;
   for (i = 3; i < narg; i++) {
     if ((strcmp(arg[i],"C") == 0) && (cflag == 0)) {
       if( comm->me == 0) fprintf(screen,
       "	PairComb3: Found C: reading additional library file \n");
     read_lib();
     cflag = 1;
     }
     if (strcmp(arg[i],"NULL") == 0) {
       map[i-2] = -1;
       continue;
     }
     for (j = 0; j < nelements; j++)
       if (strcmp(arg[i],elements[j]) == 0) break;
     map[i-2] = j;
     if (j == nelements) {
       n = strlen(arg[i]) + 1;
       elements[j] = new char[n];
       strcpy(elements[j],arg[i]);
       nelements++;
     }
   }
 
   // read potential file and initialize potential parameters
   
   read_file(arg[2]);
   setup();
 
   n = atom->ntypes;
 
   // generate Wolf 1/r energy and van der Waals look-up tables
   tables();
 
   // clear setflag since coeff() called once with I,J = * *
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   // set setflag i,j for type pairs where both are mapped to elements
   int count = 0;
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       if (map[i] >= 0 && map[j] >= 0) {
 	setflag[i][j] = 1;
 	count++;
       }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairComb3::init_style()
 {
   if (atom->tag_enable == 0)
     error->all(FLERR,"Pair style COMB3 requires atom IDs");
   if (force->newton_pair == 0)
     error->all(FLERR,"Pair style COMB3 requires newton pair on");
   if (!atom->q_flag)
     error->all(FLERR,"Pair style COMB3 requires atom attribute q");
 
   // need a full neighbor list
-  int irequest = neighbor->request(this);
+  int irequest = neighbor->request(this,instance_me);
   neighbor->requests[irequest]->half = 0;
   neighbor->requests[irequest]->full = 1;
   neighbor->requests[irequest]->ghost = 1;
 
   // local Comb neighbor list
   // create pages if first time or if neighbor pgsize/oneatom has changed
 
   int create = 0;
   if (ipage == NULL) create = 1;
   if (pgsize != neighbor->pgsize) create = 1;
   if (oneatom != neighbor->oneatom) create = 1;
 
   if (create) {
     delete [] ipage;
     pgsize = neighbor->pgsize;
     oneatom = neighbor->oneatom;
 
     int nmypage = comm->nthreads;
     ipage = new MyPage<int>[nmypage];
     for (int i = 0; i < nmypage; i++)
       ipage[i].init(oneatom,pgsize);
   }
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairComb3::init_one(int i, int j)
 {
   if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set");
   cutghost[j][i] = cutghost[i][j] = cutmax;
   return cutmax;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairComb3::read_lib()
 {
   unsigned int maxlib = 1024;
   int i,j,k,l,nwords,m;
   int ii,jj,kk,ll,mm,iii;
   char s[maxlib];
   char **words = new char*[80];
 
   MPI_Comm_rank(world,&comm->me);
 
   // open libraray file on proc 0
 
   if(comm->me == 0) {
     FILE *fp = force->open_potential("lib.comb3");
     if (fp == NULL) {
       char str[128];
       sprintf(str,"Cannot open COMB3 C library file \n");
       error->one(FLERR,str);
     }
 
     // read and store at the same time
     fgets(s,maxlib,fp);
     nwords = 0;
     words[nwords++] = strtok(s," \t\n\r\f");
     while ((words[nwords++] = strtok(NULL," \t\n\r\f")))continue;
     ccutoff[0] = atof(words[0]);
     ccutoff[1] = atof(words[1]);
     ccutoff[2] = atof(words[2]); 
     ccutoff[3] = atof(words[3]);
     ccutoff[4] = atof(words[4]);
     ccutoff[5] = atof(words[5]);
 
     fgets(s,maxlib,fp);
     nwords = 0;
     words[nwords++] = strtok(s," \t\n\r\f");
     while ((words[nwords++] = strtok(NULL," \t\n\r\f")))continue;
     ch_a[0] = atof(words[0]);
     ch_a[1] = atof(words[1]);
     ch_a[2] = atof(words[2]); 
     ch_a[3] = atof(words[3]);
     ch_a[4] = atof(words[4]);
     ch_a[5] = atof(words[5]);
     ch_a[6] = atof(words[6]);
     
     fgets(s,maxlib,fp);
     nwords = 0;
     words[nwords++] = strtok(s," \t\n\r\f");
     while ((words[nwords++] = strtok(NULL," \t\n\r\f")))continue;
     nsplpcn = atoi(words[0]);
     nsplrad = atoi(words[1]);
     nspltor = atoi(words[2]);
     
     fgets(s,maxlib,fp);
     nwords = 0;
     words[nwords++] = strtok(s," \t\n\r\f");
     while ((words[nwords++] = strtok(NULL," \t\n\r\f")))continue;
     maxx = atoi(words[0]);
     maxy = atoi(words[1]);
     maxz = atoi(words[2]);
     
     fgets(s,maxlib,fp);
     nwords = 0;
     words[nwords++] = strtok(s," \t\n\r\f");
     while ((words[nwords++] = strtok(NULL," \t\n\r\f")))continue;
     maxxc = atoi(words[0]);
     maxyc = atoi(words[1]);
     maxconj = atoi(words[2]);
 
     for (l=0; l<nsplpcn; l++) { 
       fgets(s,maxlib,fp);
       nwords = 0;
       words[nwords++] = strtok(s," \t\n\r\f");
       while ((words[nwords++] = strtok(NULL," \t\n\r\f")))continue;
       maxxcn[l] = atoi(words[1]);
       vmaxxcn[l] = atof(words[2]);
       dvmaxxcn[l] = atof(words[3]);
     }
     
     fgets(s,maxlib,fp);
     nwords = 0;
     words[nwords++] = strtok(s," \t\n\r\f");
     while ((words[nwords++] = strtok(NULL," \t\n\r\f")))continue;
     ntab = atoi(words[0]);
 
     for (i=0; i<ntab+1; i++){
       fgets(s,maxlib,fp); 
       nwords = 0;
       words[nwords++] = strtok(s," \t\n\r\f");
       while ((words[nwords++] = strtok(NULL," \t\n\r\f")))continue;
       pang[i]    = atof(words[1]);
       dpang[i]   = atof(words[2]);
       ddpang[i]  = atof(words[3]);
     }
 
     for (l=0; l<nsplpcn; l++) 
       for (i=0; i<maxx+1; i++)
         for (j=0; j<maxy+1; j++)
           for (k=0; k<maxz+1; k++) { 
             fgets(s,maxlib,fp); 
             nwords = 0;
             words[nwords++] = strtok(s," \t\n\r\f");
             while ((words[nwords++] = strtok(NULL," \t\n\r\f")))continue; 
             ll = atoi(words[0])-1;
             ii = atoi(words[1]);
             jj = atoi(words[2]);
             kk = atoi(words[3]);
             pcn_grid[ll][ii][jj][kk]     = atof(words[4]);
             pcn_gridx[ll][ii][jj][kk]    = atof(words[5]);
             pcn_gridy[ll][ii][jj][kk]    = atof(words[6]);
             pcn_gridz[ll][ii][jj][kk]    = atof(words[7]);
 	  }
 
     for (l=0; l<nsplpcn; l++)
       for (i=0; i<maxx; i++)
         for (j=0; j<maxy; j++)
           for (k=0; k<maxz; k++) { 
             fgets(s,maxlib,fp); 
             nwords = 0;
             words[nwords++] = strtok(s," \t\n\r\f");
             while ((words[nwords++] = strtok(NULL," \t\n\r\f")))continue;
            ll = atoi(words[0])-1;
            ii = atoi(words[1]);
            jj = atoi(words[2]);
            kk = atoi(words[3]);
            for(iii=0; iii<2; iii++) {
              fgets(s,maxlib,fp); 
              nwords = 0;
              words[nwords++] = strtok(s," \t\n\r\f");
              while ((words[nwords++] = strtok(NULL," \t\n\r\f")))continue;
              for(m=0; m<32 ; m++) {
                mm=iii*32+m;
                pcn_cubs[ll][ii][jj][kk][mm] = atof(words[m]);
 	     }
 	   }
 	  }
 
     for (l=0; l<nsplrad; l++) 
       for (i=0; i<maxxc+1; i++)
         for (j=0; j<maxyc+1; j++)
           for (k=0; k<maxconj; k++) { 
             fgets(s,maxlib,fp); 
             nwords = 0;
             words[nwords++] = strtok(s," \t\n\r\f");
             while ((words[nwords++] = strtok(NULL," \t\n\r\f")))continue;
             ll = atoi(words[0])-1;
             ii = atoi(words[1]);
             jj = atoi(words[2]);
             kk = atoi(words[3])-1;
             rad_grid[ll][ii][jj][kk]     = atof(words[4]);
             rad_gridx[ll][ii][jj][kk]    = atof(words[5]);
             rad_gridy[ll][ii][jj][kk]    = atof(words[6]);
             rad_gridz[ll][ii][jj][kk]    = atof(words[7]);
 	  }
 
     for (l=0; l<nsplrad; l++)
       for (i=0; i<maxxc; i++)
         for (j=0; j<maxyc; j++)
           for (k=0; k<maxconj-1; k++) { 
             fgets(s,maxlib,fp); 
             nwords = 0;
             words[nwords++] = strtok(s," \t\n\r\f");
             while ((words[nwords++] = strtok(NULL," \t\n\r\f")))continue;
             ll = atoi(words[0])-1;
             ii = atoi(words[1]);
             jj = atoi(words[2]);
             kk = atoi(words[3])-1;
             for (iii=0; iii<2; iii++) {
               fgets(s,maxlib,fp);
               nwords = 0;
               words[nwords++] = strtok(s," \t\n\r\f");
               while ((words[nwords++] = strtok(NULL," \t\n\r\f")))continue;
               for(m=0; m<32 ; m++){
                 mm=iii*32+m;
                 rad_spl[ll][ii][jj][kk][mm] = atof(words[m]);
 	      }
 	    }
 	  }
    
     for (l=0; l<nspltor; l++)
       for (i=0; i<maxxc+1; i++)
         for (j=0; j<maxyc+1; j++)
           for (k=0; k<maxconj; k++) { 
             fgets(s,maxlib,fp); 
             nwords = 0;
             words[nwords++] = strtok(s," \t\n\r\f");
             while ((words[nwords++] = strtok(NULL," \t\n\r\f")))continue;
             ll = atoi(words[0])-1;
             ii = atoi(words[1]);
             jj = atoi(words[2]);
             kk = atoi(words[3])-1;
             tor_grid[ll][ii][jj][kk]     = atof(words[4]);
             tor_gridx[ll][ii][jj][kk]    = atof(words[5]);
             tor_gridy[ll][ii][jj][kk]    = atof(words[6]);
             tor_gridz[ll][ii][jj][kk]    = atof(words[7]);
 	  }
     
     for (l=0; l<nspltor; l++)
       for (i=0; i<maxxc; i++)
         for (j=0; j<maxyc; j++)
           for (k=0; k<maxconj-1; k++) { 
             fgets(s,maxlib,fp); 
             nwords = 0;
             words[nwords++] = strtok(s," \t\n\r\f");
             while ((words[nwords++] = strtok(NULL," \t\n\r\f")))continue;
             ll = atoi(words[0])-1;
             ii = atoi(words[1]);
             jj = atoi(words[2]);
             kk = atoi(words[3])-1;
             for(iii=0; iii<2; iii++) {
               fgets(s,maxlib,fp);
               nwords = 0;
               words[nwords++] = strtok(s," \t\n\r\f");
               while ((words[nwords++] = strtok(NULL," \t\n\r\f")))continue;
               for (m=0; m<32 ; m++){
                 mm=iii*32+m;
                 tor_spl[ll][ii][jj][kk][mm] = atof(words[m]);
 	      }
 	    }
 	  }
     
     fclose(fp);
   } 
 
   k = 0;
   for (i=0; i<4; i++)
     for (j=0; j<4; j++) {
       iin2[k][0] = i;
       iin2[k][1] = j;
       k ++;
     }
 
   l = 0;
   for (i=0; i<4; i++)
     for (j=0; j<4; j++)
       for (k=0; k<4; k++) {
         iin3[l][0] = i;
         iin3[l][1] = j;
         iin3[l][2] = k;
         l ++;
       }
 
   MPI_Bcast(&ccutoff[0],6,MPI_DOUBLE,0,world);
   MPI_Bcast(&ch_a[0],7,MPI_DOUBLE,0,world);
   MPI_Bcast(&nsplpcn,1,MPI_INT,0,world);
   MPI_Bcast(&nsplrad,1,MPI_INT,0,world);
   MPI_Bcast(&nspltor,1,MPI_INT,0,world);
   MPI_Bcast(&maxx,1,MPI_INT,0,world);
   MPI_Bcast(&maxy,1,MPI_INT,0,world);
   MPI_Bcast(&maxz,1,MPI_INT,0,world);
   MPI_Bcast(&maxxc,1,MPI_INT,0,world);
   MPI_Bcast(&maxyc,1,MPI_INT,0,world);
   MPI_Bcast(&maxconj,1,MPI_INT,0,world);
   MPI_Bcast(&maxxcn,4,MPI_INT,0,world); 
   MPI_Bcast(&vmaxxcn,4,MPI_DOUBLE,0,world);
   MPI_Bcast(&dvmaxxcn,4,MPI_DOUBLE,0,world);
   MPI_Bcast(&ntab,1,MPI_INT,0,world);
   MPI_Bcast(&pang[0],20001,MPI_DOUBLE,0,world);
   MPI_Bcast(&dpang[0],20001,MPI_DOUBLE,0,world);
   MPI_Bcast(&ddpang[0],20001,MPI_DOUBLE,0,world);
   MPI_Bcast(&pcn_grid[0][0][0][0],500,MPI_DOUBLE,0,world); 
   MPI_Bcast(&pcn_gridx[0][0][0][0],500,MPI_DOUBLE,0,world);
   MPI_Bcast(&pcn_gridy[0][0][0][0],500,MPI_DOUBLE,0,world);
   MPI_Bcast(&pcn_gridz[0][0][0][0],500,MPI_DOUBLE,0,world);
   MPI_Bcast(&pcn_cubs[0][0][0][0][0],16384,MPI_DOUBLE,0,world); 
 
   MPI_Bcast(&rad_grid[0][0][0][0],825,MPI_DOUBLE,0,world); 
   MPI_Bcast(&rad_gridx[0][0][0][0],825,MPI_DOUBLE,0,world);
   MPI_Bcast(&rad_gridy[0][0][0][0],825,MPI_DOUBLE,0,world);
   MPI_Bcast(&rad_gridz[0][0][0][0],825,MPI_DOUBLE,0,world);
   MPI_Bcast(&rad_spl[0][0][0][0][0],30720,MPI_DOUBLE,0,world); 
 
   MPI_Bcast(&tor_grid[0][0][0][0],275,MPI_DOUBLE,0,world);
   MPI_Bcast(&tor_gridx[0][0][0][0],275,MPI_DOUBLE,0,world);
   MPI_Bcast(&tor_gridy[0][0][0][0],275,MPI_DOUBLE,0,world);
   MPI_Bcast(&tor_gridz[0][0][0][0],275,MPI_DOUBLE,0,world);
   MPI_Bcast(&tor_spl[0][0][0][0][0],10240,MPI_DOUBLE,0,world); 
 
   MPI_Bcast(&iin2[0][0],32,MPI_INT,0,world);
   MPI_Bcast(&iin3[0][0],192,MPI_INT,0,world);
   delete [] words;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairComb3::read_file(char *file)
 {
   int params_per_line = 74;
   char **words = new char*[params_per_line+1];
 
   if (params) delete [] params;
   params = NULL;
   nparams = 0;
 
   // open file on proc 0
 
   FILE *fp;
   if (comm->me == 0) {
     fp = fopen(file,"r");
     if (fp == NULL) {
       char str[128];
       sprintf(str,"Cannot open COMB3 potential file %s",file);
       error->one(FLERR,str);
     }
   }
 
   // read each line out of file, skipping blank lines or leading '#'
   // store line of params if all 3 element tags are in element list
 
   int n,nwords,ielement,jelement,kelement;
   char line[MAXLINE],*ptr;
   int eof = 0;
   nwords=0;
   while (1) {
     if (comm->me == 0) {
       ptr = fgets(line,MAXLINE,fp);
       if (ptr == NULL) {
 	eof = 1;
 	fclose(fp);
       } else n = strlen(line) + 1;
     }
     MPI_Bcast(&eof,1,MPI_INT,0,world);
     if (eof) break;
     MPI_Bcast(&n,1,MPI_INT,0,world);
     MPI_Bcast(line,n,MPI_CHAR,0,world);
 
     // strip comment, skip line if blank
 
     if ((ptr = strchr(line,'#'))) *ptr = '\0';
 
     nwords = atom->count_words(line);
     if (nwords == 0) continue;
 
     // concatenate additional lines until have params_per_line words
 
     while (nwords < params_per_line) {
       n = strlen(line);
 
       if (comm->me == 0) {
         ptr = fgets(&line[n],MAXLINE-n,fp);
         if (ptr == NULL) {
 	  eof = 1;
 	  fclose(fp);
         } else n = strlen(line) + 1;
       }
       MPI_Bcast(&eof,1,MPI_INT,0,world);
       if (eof) break;
       MPI_Bcast(&n,1,MPI_INT,0,world);
       MPI_Bcast(line,n,MPI_CHAR,0,world);
       if ((ptr = strchr(line,'#'))) *ptr = '\0';
       nwords = atom->count_words(line);
     }
     if (nwords != params_per_line){
       error->all(FLERR,"Incorrect format in COMB3 potential file");
 }
     // words = ptrs to all words in line
 
     nwords = 0;
     words[nwords++] = strtok(line," \t\n\r\f");
     while ((nwords <= params_per_line) 
            && (words[nwords++] = strtok(NULL," \t\n\r\f"))) {
       continue;
     }
 
     // ielement,jelement,kelement = 1st args
     // if all 3 args are in element list, then parse this line
     // else skip to next line
     
     for (ielement = 0; ielement < nelements; ielement++)
       if (strcmp(words[0],elements[ielement]) == 0) break;
     if (ielement == nelements) continue;
     for (jelement = 0; jelement < nelements; jelement++)
       if (strcmp(words[1],elements[jelement]) == 0) break;
     if (jelement == nelements) continue;
     for (kelement = 0; kelement < nelements; kelement++)
       if (strcmp(words[2],elements[kelement]) == 0) break;
     if (kelement == nelements) continue;
 
     // load up parameter settings and error check their values
 
     if (nparams == maxparam) {
       maxparam += DELTA;
       params = (Param *) memory->srealloc(params,maxparam*sizeof(Param),
 					  "pair:params");
     }
 
     params[nparams].ielement = ielement;
     params[nparams].jelement = jelement;
     params[nparams].kelement = kelement;
     params[nparams].ielementgp = atoi(words[3]);
     params[nparams].jelementgp = atoi(words[4]);
     params[nparams].kelementgp = atoi(words[5]);
     params[nparams].ang_flag = atoi(words[6]);
     params[nparams].pcn_flag = atoi(words[7]);
     params[nparams].rad_flag = atoi(words[8]);
     params[nparams].tor_flag = atoi(words[9]);
     params[nparams].vdwflag = atof(words[10]);
     params[nparams].powerm = atof(words[11]);
     params[nparams].veps = atof(words[12]);
     params[nparams].vsig = atof(words[13]);
     params[nparams].paaa = atof(words[14]); 
     params[nparams].pbbb = atof(words[15]); 
     params[nparams].lami = atof(words[16]);
     params[nparams].alfi = atof(words[17]);
     params[nparams].powern = atof(words[18]);
     params[nparams].QL = atof(words[19]);
     params[nparams].QU = atof(words[20]);
     params[nparams].DL = atof(words[21]);
     params[nparams].DU = atof(words[22]);
     params[nparams].qmin = atof(words[23]);
     params[nparams].qmax = atof(words[24]);
     params[nparams].chi = atof(words[25]);
     params[nparams].dj  = atof(words[26]);
     params[nparams].dk  = atof(words[27]);
     params[nparams].dl  = atof(words[28]);
     params[nparams].esm = atof(words[29]);
     params[nparams].cmn1 = atof(words[30]);
     params[nparams].cmn2 = atof(words[31]);
     params[nparams].pcmn1 = atof(words[32]);
     params[nparams].pcmn2 = atof(words[33]);
     params[nparams].coulcut = atof(words[34]);
     params[nparams].polz = atof(words[35]);
     params[nparams].curl = atof(words[36]);
     params[nparams].curlcut1 = atof(words[37]);
     params[nparams].curlcut2 = atof(words[38]);
     params[nparams].curl0 = atof(words[39]);
     params[nparams].alpha1 = atof(words[40]);
     params[nparams].bigB1 = atof(words[41]);
     params[nparams].alpha2 = atof(words[42]);
     params[nparams].bigB2 = atof(words[43]);
     params[nparams].alpha3 = atof(words[44]);
     params[nparams].bigB3 = atof(words[45]); 
     params[nparams].lambda = atof(words[46]);
     params[nparams].bigA = atof(words[47]);
     params[nparams].beta = atof(words[48]);
     params[nparams].bigr = atof(words[49]);
     params[nparams].bigd = atof(words[50]);
     params[nparams].pcos6 = atof(words[51]);
     params[nparams].pcos5 = atof(words[52]);
     params[nparams].pcos4 = atof(words[53]);
     params[nparams].pcos3 = atof(words[54]);
     params[nparams].pcos2 = atof(words[55]);
     params[nparams].pcos1 = atof(words[56]);
     params[nparams].pcos0 = atof(words[57]);
     params[nparams].pcna = atof(words[58]);
     params[nparams].pcnb = atof(words[59]);
     params[nparams].pcnc = atof(words[60]);
     params[nparams].pcnd = atof(words[61]);
     params[nparams].p6p0 = atof(words[62]);
     params[nparams].p6p1 = atof(words[63]);
     params[nparams].p6p2 = atof(words[64]);
     params[nparams].p6p3 = atof(words[65]);
     params[nparams].p6p4 = atof(words[66]);
     params[nparams].p6p5 = atof(words[67]);
     params[nparams].p6p6 = atof(words[68]);
     params[nparams].ptork1=atof(words[69]);
     params[nparams].ptork2=atof(words[70]);
     params[nparams].addrepr=atof(words[71]);
     params[nparams].addrep=atof(words[72]);
     params[nparams].pcross = atof(words[73]);
     params[nparams].powermint = int(params[nparams].powerm);
 
     // parameter sanity checks
 
     if (params[nparams].lambda < 0.0 || params[nparams].powern < 0.0 || 
 	params[nparams].beta < 0.0 || params[nparams].alpha1 < 0.0 || 
 	params[nparams].bigB1< 0.0 || params[nparams].bigA< 0.0 || 
 	params[nparams].bigB2< 0.0 || params[nparams].alpha2 <0.0 ||
 	params[nparams].bigB3< 0.0 || params[nparams].alpha3 <0.0 ||
 	params[nparams].bigr < 0.0 || params[nparams].bigd < 0.0 ||
 	params[nparams].bigd > params[nparams].bigr ||
 	params[nparams].powerm - params[nparams].powermint != 0.0 ||
 	params[nparams].addrepr < 0.0 || params[nparams].powermint < 1.0 ||
 	params[nparams].QL > 0.0 || params[nparams].QU < 0.0 || 
 	params[nparams].DL < 0.0 || params[nparams].DU > 0.0 ||
 	params[nparams].pcross < 0.0 || 
 	params[nparams].esm < 0.0 || params[nparams].veps < 0.0 || 
 	params[nparams].vsig < 0.0 || params[nparams].vdwflag < 0.0 
 	)
       error->all(FLERR,"Illegal COMB3 parameter");
 
     nparams++;
   } 
 
   delete [] words;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairComb3::setup()
 {
   int i,j,k,m,n;
 
   // set elem2param for all element triplet combinations
   // must be a single exact match to lines read from file
   // do not allow for ACB in place of ABC
 
   memory->destroy(elem2param);
   memory->create(elem2param,nelements,nelements,nelements,"pair:elem2param");
 
   for (i = 0; i < nelements; i++)
     for (j = 0; j < nelements; j++)
       for (k = 0; k < nelements; k++) {
 	n = -1;
 	for (m = 0; m < nparams; m++) {
 	  if (i == params[m].ielement && j == params[m].jelement && 
 	      k == params[m].kelement) {
 	    if (n >= 0) error->all(FLERR,"Potential file has duplicate entry");
 	    n = m;
 	  }
 	}
 	if (n < 0) error->all(FLERR,"Potential file is missing an entry");
 	elem2param[i][j][k] = n;
       }
 
   // compute parameter values derived from inputs
 
   for (m = 0; m < nparams; m++) {
     params[m].cut = params[m].bigr + params[m].bigd;
     params[m].cutsq = params[m].cut*params[m].cut;
     params[m].c1 = pow(2.0*params[m].powern*1.0e-16,-1.0/params[m].powern);
     params[m].c2 = pow(2.0*params[m].powern*1.00e-8,-1.0/params[m].powern);
     params[m].c3 = 1.0/params[m].c2;
     params[m].c4 = 1.0/params[m].c1;
 
     params[m].Qo = (params[m].QU+params[m].QL)/2.0; // (A22)
     params[m].dQ = (params[m].QU-params[m].QL)/2.0; // (A21)
     params[m].aB = 1.0 / 
       (1.0-pow(fabs(params[m].Qo/params[m].dQ),10)); // (A20)
     params[m].bB = pow(fabs(params[m].aB),0.1)/params[m].dQ; // (A19)
     params[m].nD = log(params[m].DU/(params[m].DU-params[m].DL))/
 		    log(params[m].QU/(params[m].QU-params[m].QL));
     params[m].bD = (pow((params[m].DL-params[m].DU),(1.0/params[m].nD)))/
 		    (params[m].QU-params[m].QL);
  
     params[m].lcut = params[m].coulcut;
     params[m].lcutsq = params[m].lcut*params[m].lcut;
   }
 
   // set cutmax to max of all params
 
   cutmin = cutmax = 0.0;
   polar = 0;
   for (m = 0; m < nparams; m++) {
     if (params[m].cutsq > cutmin) cutmin = params[m].cutsq + 2.0;
     if (params[m].lcut > cutmax) cutmax = params[m].lcut;
   }
   chicut1 = 7.0;
   chicut2 = cutmax;
 }  
 
 /* ---------------------------------------------------------------------- */
 
 void PairComb3::Short_neigh()
 {
   int nj,*neighptrj,icontrol;
   int iparam_ij,*ilist,*jlist,*numneigh,**firstneigh;
   int inum,jnum,i,j,ii,jj,itype,jtype;
   double rr1,rsq1,delrj[3];
 
   double **x = atom->x;
   int *type  = atom->type;
 
   if (atom->nmax > nmax) {
     memory->sfree(sht_first);
     nmax = atom->nmax;
     sht_first = (int **) memory->smalloc(nmax*sizeof(int *),
 		    			"pair:sht_first");
     memory->grow(dpl,nmax,3,"pair:dpl");
     memory->grow(xcctmp,nmax,"pair:xcctmp");
     memory->grow(xchtmp,nmax,"pair:xchtmp");
     memory->grow(xcotmp,nmax,"pair:xcotmp");
     memory->grow(NCo,nmax,"pair:NCo");
     memory->grow(sht_num,nmax,"pair:sht_num");
     memory->grow(bbij,nmax,MAXNEIGH,"pair:bbij");
   }
 
   inum = list->inum + list->gnum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // create COMB neighbor list
 
   ipage->reset();
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     dpl[i][0] = dpl[i][1] = dpl[i][2] = 0.0;
 
     nj = 0;
     neighptrj = ipage->vget();
 
     itype = map[type[i]];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     NCo[i] = 0.0;
     xcctmp[i] = 0.0;
     xchtmp[i] = 0.0;
     xcotmp[i] = 0.0;
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj] & NEIGHMASK;
 
       delrj[0] = x[i][0] - x[j][0];
       delrj[1] = x[i][1] - x[j][1];
       delrj[2] = x[i][2] - x[j][2];
       rsq1 = vec3_dot(delrj,delrj);
       jtype = map[type[j]];
       iparam_ij = elem2param[itype][jtype][jtype];
       
       if (rsq1 > cutmin) continue;
 
       neighptrj[nj++] = j;
       rr1 = sqrt(rsq1);
       NCo[i] += comb_fc(rr1,&params[iparam_ij]) * params[iparam_ij].pcross; 
       
       icontrol = params[iparam_ij].jelementgp;
       
       if( icontrol == 1)
           xcctmp[i] += comb_fc(rr1,&params[iparam_ij]) * params[iparam_ij].pcross; 
       if (icontrol == 2) 
 	  xchtmp[i] += comb_fc(rr1,&params[iparam_ij]) * params[iparam_ij].pcross;  
       if (icontrol == 3)
 	  xcotmp[i] += comb_fc(rr1,&params[iparam_ij]) * params[iparam_ij].pcross;  
       
     }
 
     sht_first[i] = neighptrj;
     sht_num[i] = nj;
     ipage->vgot(nj);
     if (ipage->status())
       error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
   }
 
   // communicating coordination number to all nodes
   pack_flag = 2;
   comm->forward_comm_pair(this);
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairComb3::compute(int eflag, int vflag)
 {
   int i,ii,k,kk,j,jj,im,inum,jnum,itype,jtype,ktype;
   int iparam_i,iparam_ij,iparam_ji;
   int iparam_ijk,iparam_jik,iparam_ikj,iparam_jli,iparam_ikl;
   int sht_jnum,*sht_jlist,sht_lnum,*sht_llist;
   int sht_mnum,*sht_mlist,sht_pnum,*sht_plist;
   int *ilist,*jlist,*numneigh,**firstneigh,mr1,mr2,mr3,inty,nj;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
   double rsq,rsq1,rsq2,rsq3,iq,jq,yaself;
   double eng_tmp,vionij,fvionij,sr1,sr2,sr3; 
   double zeta_ij,prefac_ij1,prefac_ij2,prefac_ij3,prefac_ij4,prefac_ij5;
   double zeta_ji,prefac_ji1,prefac_ji2,prefac_ji3,prefac_ji4,prefac_ji5;
   double delrj[3],delrk[3],fi[3],fj[3],fk[3],fl[3];
   double ep6p_ij,ep6p_ji,fip6p[3],fjp6p[3],fkp6p[3],flp6p[3];
   double potal,fac11,fac11e;
   tagint itag, jtag;
   
   int nlocal = atom->nlocal;
   int newton_pair = force->newton_pair;
   tagint *tag = atom->tag;
   int *type = atom->type;
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q; 
 
   // coordination terms
   double xcn, ycn;
   double kcn, lcn;
   int torindx;
 
   // torsion and radical variables
   int l, ll, ltype, m, mm, mtype, p, pp, ptype;
   int iparam_jil, iparam_ijl, iparam_ki, iparam_lj;
   int iparam_jl, iparam_ik, iparam_km, iparam_lp;
   double kconjug, lconjug, kradtot, lradtot;
   double delrl[3], delrm[3], delrp[3], ddprx[3], srmu; 
   double zet_addi,zet_addj;
 
   evdwl = eng_tmp = 0.0;
 
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = vflag_atom = 0;
 
   // Build short range neighbor list
   Short_neigh();
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
   yaself = vionij = fvionij = fpair = 0.0; 
 
   // self energy correction term: potal
   potal_calc(potal,fac11,fac11e);
 
   // generate initial dipole tensor
   if (pol_flag )
     for (ii = 0; ii < inum; ii++) {
       i = ilist[ii];
       itag = tag[i];
       itype = map[type[i]];
       xtmp = x[i][0];
       ytmp = x[i][1];
       ztmp = x[i][2];
       iq = q[i];
       jlist = firstneigh[i];
       jnum = numneigh[i];
 
       for (jj = 0; jj < jnum; jj++) {
         j = jlist[jj] & NEIGHMASK;
         jtag = tag[j];
 
         if (itag > jtag) {
           if ((itag+jtag) % 2 == 0) continue;
         } else if (itag < jtag) {
           if ((itag+jtag) % 2 == 1) continue;
         } else {
           if (x[j][2] < x[i][2]) continue;
           if (x[j][2] == ztmp && x[j][1] < ytmp) continue;
           if (x[j][2] == ztmp && x[j][1] == ytmp && x[j][0] < xtmp) continue;
         } 
 
         jtype = map[type[j]];
         jq = q[j];
 
         delrj[0] = x[j][0] - xtmp;
         delrj[1] = x[j][1] - ytmp;
         delrj[2] = x[j][2] - ztmp;
         rsq = vec3_dot(delrj,delrj);
 
         iparam_ij = elem2param[itype][jtype][jtype];
         iparam_ji = elem2param[jtype][itype][itype];
 
         if (rsq > params[iparam_ij].lcutsq) continue;
 
         tri_point(rsq, mr1, mr2, mr3, sr1, sr2, sr3);
       
         dipole_init(&params[iparam_ij],&params[iparam_ji],fac11,delrj,
                     rsq,mr1,mr2,mr3,sr1,sr2,sr3,iq,jq,i,j);
       }
     }
 
   // loop over full neighbor list of my atoms
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     itag = tag[i];
     itype = map[type[i]];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     iq = q[i];
     nj = 0;
     iparam_i = elem2param[itype][itype][itype];
     
     // self energy, only on i atom
     yaself = self(&params[iparam_i],iq);
 
     // dipole self energy
     if (pol_flag)
       yaself += dipole_self(&params[iparam_i],i);
 
     if (evflag) ev_tally(i,i,nlocal,0,0.0,yaself,0.0,0.0,0.0,0.0);
  
     // two-body interactions (long:R + A, short: only R)
 
     jlist = firstneigh[i];
     jnum = numneigh[i];
     sht_jlist = sht_first[i];
     sht_jnum = sht_num[i];
 
     // long range interactions
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj] & NEIGHMASK;
 
       jtag = tag[j];
 
       if (itag > jtag) {
         if ((itag+jtag) % 2 == 0) continue;
       } else if (itag < jtag) {
         if ((itag+jtag) % 2 == 1) continue;
       } else {
         if (x[j][2] < x[i][2]) continue;
         if (x[j][2] == ztmp && x[j][1] < ytmp) continue;
         if (x[j][2] == ztmp && x[j][1] == ytmp && x[j][0] < xtmp) continue;
       } 
 
       jtype = map[type[j]];
       jq = q[j];
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       iparam_ij = elem2param[itype][jtype][jtype];
       iparam_ji = elem2param[jtype][itype][itype];
 
       if (rsq > params[iparam_ij].lcutsq) continue;
 
       inty = intype[itype][jtype];
 
       // three-point interpolation
       tri_point(rsq, mr1, mr2, mr3, sr1, sr2, sr3);
 
       // Q-indenpendent: van der Waals
       vdwaals(inty,mr1,mr2,mr3,rsq,sr1,sr2,sr3,eng_tmp,fpair);
       evdwl = eng_tmp;
       f[i][0] += delx*fpair;
       f[i][1] += dely*fpair;
       f[i][2] += delz*fpair;
       f[j][0] -= delx*fpair;
       f[j][1] -= dely*fpair;
       f[j][2] -= delz*fpair;
 
       if (evflag)
 	ev_tally(i,j,nlocal,newton_pair,evdwl,0.0,fpair,delx,dely,delz);
  
       // Q-dependent: Coulombic, field, polarization
       // 1/r energy and forces
 
       direct(&params[iparam_ij], &params[iparam_ji], 
 		mr1, mr2, mr3, rsq, sr1, sr2, sr3, iq, jq, 
 		fac11, fac11e, eng_tmp, fvionij, i, j);
 
       vionij = eng_tmp;
 
       // field correction to self energy
       field(&params[iparam_ij], &params[iparam_ji],rsq,iq,jq,
 	     eng_tmp,fvionij);
       vionij += eng_tmp;
 
       // sums up long range Q-dependent forces (excluding dipole)
       f[i][0] += delx*fvionij;
       f[i][1] += dely*fvionij;
       f[i][2] += delz*fvionij;
       f[j][0] -= delx*fvionij;
       f[j][1] -= dely*fvionij;
       f[j][2] -= delz*fvionij;
 
       // sums up long range Q-dependent energies (excluding dipole)
       if (evflag) 
 	ev_tally(i,j,nlocal,newton_pair,0.0,vionij,fvionij,delx,dely,delz);
 
       // polarization field
       if (pol_flag) {
         dipole_calc(&params[iparam_ij], &params[iparam_ji],fac11,
 		delx,dely,delz,rsq,mr1,mr2,mr3,
 		sr1,sr2,sr3,iq,jq,i,j,eng_tmp,fvionij,ddprx);
 	vionij = eng_tmp;
 
         // sums up dipole energies
         if (evflag) 
 	  ev_tally(i,j,nlocal,newton_pair,0.0,vionij,fvionij,delx,dely,delz);
 
         // sums up dipole forces
         f[i][0] += (ddprx[0] + delx*fvionij);
         f[i][1] += (ddprx[1] + dely*fvionij);
         f[i][2] += (ddprx[2] + delz*fvionij);
         f[j][0] -= (ddprx[0] + delx*fvionij);
         f[j][1] -= (ddprx[1] + dely*fvionij);
         f[j][2] -= (ddprx[2] + delz*fvionij);
       }	
       
       if (rsq > params[iparam_ij].cutsq) continue;
 
       repulsive(&params[iparam_ij], &params[iparam_ji], rsq,
 	      fpair, eflag, eng_tmp, iq, jq);
 
       evdwl = eng_tmp;
       
       // repulsion is pure two-body, sums up pair repulsive forces
       f[i][0] += delx*fpair;
       f[i][1] += dely*fpair;
       f[i][2] += delz*fpair;
       f[j][0] -= delx*fpair;
       f[j][1] -= dely*fpair;
       f[j][2] -= delz*fpair;
   
       if (evflag)
 	ev_tally(i,j,nlocal,newton_pair,evdwl,0.0,fpair,delx,dely,delz);
     }
 
     // many-body interactions: start of short-range
     xcn = NCo[i];
     for (jj = 0; jj < sht_jnum; jj++) {
       j = sht_jlist[jj];
       sht_llist = sht_first[j];
       sht_lnum = sht_num[j];
 
       jtag = tag[j];
       if ( jtag <= itag ) continue ;
       ycn = NCo[j];
 
       jtype = map[type[j]];
       iparam_ij = elem2param[itype][jtype][jtype];
       iparam_ji = elem2param[jtype][itype][itype];
 
       delrj[0] = x[j][0] - xtmp;
       delrj[1] = x[j][1] - ytmp;
       delrj[2] = x[j][2] - ztmp;
       rsq1 = vec3_dot(delrj,delrj);
       if (rsq1 > params[iparam_ij].cutsq) continue;
       nj ++;
       
       // this Qj for q-dependent BSi
       jq = q[j];
       
       // accumulate bondorder zeta for each i-j interaction via k and l loops
       zeta_ij = 0.0;  
       bbtor = 0.0; 
       kconjug = 0.0;
 
       for (kk = 0; kk < sht_jnum; kk++) {	// kk is neighbor of ii
 	k = sht_jlist[kk];
 	if (j == k) continue;
 
 	ktype = map[type[k]];
 	iparam_ijk = elem2param[itype][jtype][ktype];
 	iparam_ikj = elem2param[itype][ktype][jtype];
 	iparam_jik = elem2param[jtype][itype][ktype];
 	iparam_ik  = elem2param[itype][ktype][ktype];
 	delrk[0] = x[k][0] - xtmp;
 	delrk[1] = x[k][1] - ytmp;
 	delrk[2] = x[k][2] - ztmp;
 	rsq2 = vec3_dot(delrk,delrk);
      
 	if (rsq2 > params[iparam_ik].cutsq) continue;
 
 	// 3-body zeta in bond order
 	zeta_ij += zeta(&params[iparam_ijk], &params[iparam_ik],
 			rsq1, rsq2, delrj, delrk, i, xcn);
 
 	// radical initialization: apply only to CC,CO,OC bonds
         if (params[iparam_ij].rad_flag > 0 &&
             params[iparam_ik].ielementgp == 1 &&
             params[iparam_ik].jelementgp == 1) {
           iparam_ki = elem2param[ktype][itype][itype];
           kcn=NCo[k];
           kconjug += rad_init(rsq2,&params[iparam_ki],i,kradtot,kcn);
 
         }
 
 	// torsion: i-j-k-l: apply to all C-C bonds
 
 	if( params[iparam_ij].tor_flag != 0 ) {
 	  srmu = vec3_dot(delrj,delrk)/(sqrt(rsq1*rsq2));
 	  srmu = sqrt(1.0-srmu*srmu);
 
 	  if(srmu > 0.1) {
             for (ll = 0; ll < sht_lnum; ll++) {	// ll is neighbor of jj
 	      l = sht_llist[ll];			
 
 	      if(l==i || l==j || l==k) continue; 
 
 	      ltype = map[type[l]];
 
 	      delrl[0] = x[l][0] - x[j][0];	
 	      delrl[1] = x[l][1] - x[j][1];
 	      delrl[2] = x[l][2] - x[j][2];
 	      rsq3 = vec3_dot(delrl,delrl);
 	      iparam_jl = elem2param[jtype][ltype][ltype];
 
 	      if (rsq3 > params[iparam_jl].cutsq) continue;
 
 	      iparam_ikl = elem2param[itype][ktype][ltype]; 
               torindx = params[iparam_ij].tor_flag;
 	      bbtor += bbtor1(torindx, &params[iparam_ikl],&params[iparam_jl],
                        rsq1,rsq2,rsq3,delrj,delrk,delrl,srmu);
 	    }
 	  }
 	}
       } 
 
       zeta_ji = 0.0; 
       lconjug = 0.0;
 
       for (ll = 0; ll < sht_lnum; ll++) {	
 	l = sht_llist[ll];		
 	if (l == i) continue;
 
 	ltype = map[type[l]];
 	iparam_jil = elem2param[jtype][itype][ltype];
 	iparam_ijl = elem2param[itype][jtype][ltype];
 	iparam_jl  = elem2param[jtype][ltype][ltype];
 	iparam_lj  = elem2param[ltype][jtype][jtype];
 
 	delrk[0] = x[l][0] - x[j][0];
 	delrk[1] = x[l][1] - x[j][1];
 	delrk[2] = x[l][2] - x[j][2];
 	rsq2 = vec3_dot(delrk,delrk);
 
 	delrl[0] = x[l][0] - x[j][0];
 	delrl[1] = x[l][1] - x[j][1];
 	delrl[2] = x[l][2] - x[j][2];
 	rsq2 = vec3_dot(delrl,delrl);
 
 	if (rsq2 > params[iparam_jl].cutsq) continue;
 
 	vec3_scale(-1,delrj,delrl);	// ji_hat is -(ij_hat)
 
 	zeta_ji += zeta(&params[iparam_jil], &params[iparam_jl]
 			, rsq1, rsq2, delrl, delrk, j, ycn);
 
 	// radical initialization: apply only to CC,CO,OC bonds
         if(params[iparam_ji].rad_flag > 0
           && params[iparam_jl].ielementgp == 1
           && params[iparam_jl].jelementgp == 1) {
           iparam_lj = elem2param[ltype][jtype][jtype];
           lcn=NCo[l];
           lconjug += rad_init(rsq2,&params[iparam_lj],j,lradtot,lcn);
         }
       } 
 
       force_zeta(&params[iparam_ij], &params[iparam_ji],
 	 rsq1, xcn, ycn, zeta_ij, zeta_ji, fpair,
 	 prefac_ij1, prefac_ij2, prefac_ij3, prefac_ij4, prefac_ij5,
 	 prefac_ji1, prefac_ji2, prefac_ji3, prefac_ji4, prefac_ji5,
 	 eflag, eng_tmp, iq, jq, i, j, nj, bbtor, kconjug, lconjug);
 
       evdwl = eng_tmp;
       selfp6p(&params[iparam_ij],&params[iparam_ji],rsq1,eng_tmp,fpair);
 
       evdwl += eng_tmp;
       f[i][0] += delrj[0]*fpair;
       f[i][1] += delrj[1]*fpair;
       f[i][2] += delrj[2]*fpair;
       f[j][0] -= delrj[0]*fpair;
       f[j][1] -= delrj[1]*fpair;
       f[j][2] -= delrj[2]*fpair;
 
       if (evflag) ev_tally(i,j,nlocal,newton_pair,evdwl,0.0,-fpair,-delrj[0],-delrj[1],-delrj[2]);
 
       // attractive term via loop over k (3-body forces: i-j-k)
       zet_addi=0;
       zet_addj=0;
 
       for (kk = 0; kk < sht_jnum; kk++) {
 	k = sht_jlist[kk];
 	if (j == k) continue;
         sht_mlist = sht_first[k];
         sht_mnum = sht_num[k];
 
 	ktype = map[type[k]];
 	iparam_ijk = elem2param[itype][jtype][ktype];
 	iparam_ikj = elem2param[itype][ktype][jtype];
 	iparam_jik = elem2param[jtype][itype][ktype];
 	iparam_ik  = elem2param[itype][ktype][ktype];
 	delrk[0] = x[k][0] - xtmp;
 	delrk[1] = x[k][1] - ytmp;
 	delrk[2] = x[k][2] - ztmp;
 	rsq2 = vec3_dot(delrk,delrk);
 	if (rsq2 > params[iparam_ik].cutsq) continue;
          
 	// BO-dependent 3-body E & F
 	attractive(&params[iparam_ijk], &params[iparam_jik],&params[iparam_ikj],
 		prefac_ij1, prefac_ij2, prefac_ij3, prefac_ij4, prefac_ij5,
 		rsq1,rsq2,delrj,delrk,fi,fj,fk,i,xcn);
 
 	ep6p_ij = ep6p(&params[iparam_ijk],&params[iparam_ikj],rsq1,rsq2,delrj,delrk,zet_addi);
 	fp6p(&params[iparam_ijk],&params[iparam_ikj],rsq1,rsq2,delrj,delrk,fip6p,fjp6p,fkp6p); 
 
 	// Sums up i-j-k forces: LP contribution
 	for (im = 0; im < 3; im++) {
 	  fi[im] += fip6p[im];
 	  fj[im] += fjp6p[im];
 	  fk[im] += fkp6p[im];
 	}
 
 	// Sums up i-j-k forces: Tallies into global force vector
 	for (im = 0; im < 3; im++) {
 	  f[i][im] += fi[im];
 	  f[j][im] += fj[im];
 	  f[k][im] += fk[im];
 	}
 
 	// torsion and radical: apply to all C-C bonds
 	if( params[iparam_ijk].tor_flag != 0 && fabs(ptorr)>1.0e-8) {
 	  srmu = vec3_dot(delrj,delrk)/(sqrt(rsq1*rsq2));
 	  srmu = sqrt(1.0-srmu*srmu);
 
 	  if(srmu > 0.1) {
             for (ll = 0; ll < sht_lnum; ll++) {	// ll is neighbor of jj
 	      l = sht_llist[ll];			
 	      if (l==i||l==j||l==k) continue; 
 
 	      ltype = map[type[l]];
 
 	      delrl[0] = x[l][0] - x[j][0];	
 	      delrl[1] = x[l][1] - x[j][1];
 	      delrl[2] = x[l][2] - x[j][2];
 	      rsq3 = vec3_dot(delrl,delrl);
 	
 	      iparam_jl = elem2param[jtype][ltype][ltype];
 	      if (rsq3 > params[iparam_jl].cutsq) continue;
 	      iparam_ikl = elem2param[itype][ktype][ltype];  
               torindx = params[iparam_ij].tor_flag;
 	      tor_force(torindx, &params[iparam_ikl], &params[iparam_jl],srmu,
                           rsq1,rsq2,rsq3,delrj,delrk,delrl);
 
 	      for (im = 0; im < 3; im++) {
 		f[i][im] += fi_tor[im];
 		f[j][im] += fj_tor[im];
 		f[k][im] += fk_tor[im];
 		f[l][im] += fl_tor[im];
 	      }
 	    }
 	  }
 	}
 
         if( params[iparam_ijk].rad_flag>=1 &&
           params[iparam_ijk].ielementgp==1 && 
           params[iparam_ijk].kelementgp==1) {
           iparam_ki = elem2param[ktype][itype][itype];
           kcn=NCo[k];
           double rik=sqrt(rsq2);
           kradtot = -comb_fc(rik,&params[iparam_ki])*params[iparam_ki].pcross+kcn;
          
 	  rad_forceik(&params[iparam_ki],rsq2,delrk,kconjug,kradtot);
 
 	  for (im = 0; im < 3; im++) {
 	    f[i][im] += fi_rad[im];
 	    f[k][im] += fk_rad[im];
 	  }
 
           if (fabs(radtmp) > 1.0e-12) {
 	    for (mm = 0; mm < sht_mnum; mm++) {	// mm is neighbor of kk
 	       m = sht_mlist[mm];
 	       if (m == k) continue;		
 
                mtype = map[type[m]];
 
 	      delrm[0] = x[m][0] - x[k][0];	
 	      delrm[1] = x[m][1] - x[k][1];
 	      delrm[2] = x[m][2] - x[k][2];
 	      rsq3 = vec3_dot(delrm,delrm);
 
 	      iparam_km = elem2param[ktype][mtype][mtype];
 	      iparam_ki = elem2param[ktype][itype][itype];
 
 	      if (rsq3 > params[iparam_km].cutsq) continue;
 
 	      rad_force(&params[iparam_km],rsq3,delrm,radtmp);
 
 	      for (im = 0; im < 3; im++) {
 	        f[k][im] += fj_rad[im];
 	        f[m][im] += fk_rad[im];
 	      }
 	    }
 	  }
 	}
 
         if (evflag) 
 	  ev_tally(i,j,nlocal,newton_pair,ep6p_ij,0.0,0.0,0.0,0.0,0.0);
 	if (vflag_atom)
 	  v_tally3(i,j,k,fj,fk,delrj,delrk);
 
       }	// k-loop
 
       // attractive term via loop over l (3-body forces: j-i-l)
       for (ll = 0; ll < sht_lnum; ll++) {
 	l = sht_llist[ll];
 	if (l == i) continue;
 
         sht_plist = sht_first[l];
         sht_pnum = sht_num[l];
 
 	ltype = map[type[l]];
 	iparam_jil = elem2param[jtype][itype][ltype];
 	iparam_jli = elem2param[jtype][ltype][itype];
 	iparam_ijl = elem2param[itype][jtype][ltype];
 	iparam_jl  = elem2param[jtype][ltype][ltype];
 	delrk[0] = x[l][0] - x[j][0];	
 	delrk[1] = x[l][1] - x[j][1];
 	delrk[2] = x[l][2] - x[j][2];
 
 	rsq2 = vec3_dot(delrk,delrk);
 	if (rsq2 > params[iparam_jl].cutsq) continue;
 	vec3_scale(-1,delrj,delrl);
 
 	attractive(&params[iparam_jil],&params[iparam_ijl],&params[iparam_jli],
 		prefac_ji1,prefac_ji2,prefac_ji3,prefac_ji4,prefac_ji5,
 		rsq1,rsq2,delrl,delrk,fj,fi,fl,j,ycn);
 
 	// BO-independent 3-body j-i-l LP and BB correction and forces
 	ep6p_ji = ep6p(&params[iparam_jil],&params[iparam_jli],rsq1,rsq2,delrl,delrk,zet_addj);
 	fp6p(&params[iparam_jil],&params[iparam_jli],rsq1,rsq2,delrl,delrk,fjp6p,fip6p,flp6p); 
 
         if (evflag) 
 	  ev_tally(j,i,nlocal,newton_pair,ep6p_ji,0.0,0.0,0.0,0.0,0.0);
 
 	// BO-dependent 3-body E & F
 	for (im = 0; im < 3; im++) {
 	  fj[im] += fjp6p[im];
 	  fi[im] += fip6p[im];
 	  fl[im] += flp6p[im];
 	}
 
 	// Sums up j-i-l forces: Tallies into global force vector
 	for (im = 0; im < 3; im++) {
 	  f[j][im] += fj[im];
 	  f[i][im] += fi[im];
 	  f[l][im] += fl[im];
 	}
 	
 	// radical i-j-l-p: apply to all CC,CO,OC bonds
 	if( params[iparam_jil].rad_flag >= 1 && 
           params[iparam_jil].ielementgp == 1 &&
           params[iparam_jil].kelementgp == 1 ) {
             iparam_lj = elem2param[ltype][jtype][jtype];
             lcn=NCo[l];
             double rjl=sqrt(rsq2);
             lradtot=-comb_fc(rjl,&params[iparam_lj])*params[iparam_lj].pcross +lcn;
 
             rad_forceik(&params[iparam_lj],rsq2,delrk,lconjug,lradtot);
 
 	    for (im = 0; im < 3; im++) {
 	      f[j][im] += fi_rad[im];
 	      f[l][im] += fk_rad[im];
 	    }
 
             if (fabs(radtmp)>1.0e-12) {
 	      for (pp = 0; pp < sht_pnum; pp++) {	// pp is neighbor of ll
 	        p = sht_plist[pp];
 	        if (p == l) continue;		
 	        ptype = map[type[p]];
 
 	        delrp[0] = x[p][0] - x[l][0];	
 	        delrp[1] = x[p][1] - x[l][1];
 	        delrp[2] = x[p][2] - x[l][2];
 	        rsq3 = vec3_dot(delrp,delrp);
 
 	        iparam_lp = elem2param[ltype][ptype][ptype];
 
 	        if (rsq3 > params[iparam_lp].cutsq) continue;
 
 	        vec3_scale(-1,delrj,delrj);
 	        rad_force(&params[iparam_lp],rsq3,delrp,radtmp);
 	        vec3_scale(-1,delrj,delrj);
 	        for (im = 0; im < 3; im++) {
 	          f[l][im] += fj_rad[im];
 	          f[p][im] += fk_rad[im];
 		}
 	      }
 	    }
 	}
 
 	if (vflag_atom)
 	  v_tally3(j,i,l,fi,fl,delrl,delrk);
       }	
     }
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairComb3::repulsive(Param *parami, Param *paramj, double rsq, 
 	double &fforce,int eflag, double &eng, double iq, double jq)
 {
   double r,tmp_fc,tmp_fc_d,Di,Dj;
   double caj,vrcs,fvrcs;
   double LamDiLamDj,fcdA,rlm1,bigA;
 
   double romi = parami->addrep;
   double rrcs = parami->bigr + parami->bigd;
   double addr = parami->addrepr;
 
   r = sqrt(rsq);
   if (r > rrcs) return ;
 
   tmp_fc = comb_fc(r,parami);
   tmp_fc_d = comb_fc_d(r,parami);
 
   Di = parami->DU + pow(fabs(parami->bD*(parami->QU-iq)),parami->nD);
   Dj = paramj->DU + pow(fabs(paramj->bD*(paramj->QU-jq)),paramj->nD);
 
   bigA = parami->bigA;
   rlm1 = parami->lambda;
 
   fcdA = tmp_fc_d - tmp_fc * rlm1;
   LamDiLamDj = exp(0.5*(parami->lami*Di+paramj->lami*Dj)-rlm1*r);
   caj = bigA * LamDiLamDj;
 
   fforce = -caj * fcdA; 
 
   // additional repulsion 
 
   vrcs = 1.0; fvrcs = 0.0;
   if (romi != 0.0 && r < addr) { 
     vrcs += romi * pow((1.0-r/addr),2.0);
     fvrcs = romi * 2.0 * (r/addr-1.0)/addr; 
     fforce = fforce*vrcs - caj * tmp_fc * vrcs * fvrcs;
   }
   fforce /= r;
 
   // eng = repulsive energy
   eng = caj * tmp_fc * vrcs;
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairComb3::zeta(Param *parami, Param *paramj, double rsqij, 
 	double rsqik, double *delrij, double *delrik, int i, double xcn)
 {
   double rij,rik,costheta,arg,ex_delr,rlm3;
 
   rij = sqrt(rsqij);
   if (rij > parami->bigr+parami->bigd) return 0.0;
   rik = sqrt(rsqik);
   costheta = vec3_dot(delrij,delrik) / (rij*rik);
 
   rlm3 = parami->beta;
   arg = pow(rlm3*(rij-rik),int(parami->powermint));
   if (arg > 69.0776) ex_delr = 1.e30;
   else if (arg < -69.0776) ex_delr = 0.0;
   else ex_delr = exp(arg);
 
   return comb_fc(rik,paramj) * comb_gijk(costheta,parami,xcn) * ex_delr;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairComb3::selfp6p(Param *parami, Param *paramj, double rsq, 
 	double &eng, double &force)
 {
   double r,comtti,comttj,fcj,fcj_d;
 
   r=sqrt(rsq);
   fcj=comb_fc(r,parami);
   fcj_d=comb_fc_d(r,parami);
   comtti = comttj = 0.0;
 
     double pilp0 = parami->p6p0;
     double pilp1 = parami->p6p1, pilp2 = parami->p6p2, pilp3 = parami->p6p3;
     double pilp4 = parami->p6p4, pilp5 = parami->p6p5, pilp6 = parami->p6p6;
     comtti = pilp0 + pilp1 + pilp2 + pilp3 + pilp4 + pilp5 + pilp6;
 
     double pjlp0 = paramj->p6p0;
     double pjlp1 = paramj->p6p1, pjlp2 = paramj->p6p2, pjlp3 = paramj->p6p3;
     double pjlp4 = paramj->p6p4, pjlp5 = paramj->p6p5, pjlp6 = paramj->p6p6;
     comttj = pjlp0 + pjlp1 + pjlp2 + pjlp3 + pjlp4 + pjlp5 + pjlp6;
 
   eng = 0.5 * fcj * (comtti + comttj);
   force += 0.5 * fcj_d * (comtti + comttj)/r;
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairComb3::ep6p(Param *paramj, Param *paramk, double rsqij, double rsqik,
 		     double *delrij, double *delrik , double &zet_add)
 {
   double comtt;
   double pplp0 = paramj->p6p0;
   double pplp1 = paramj->p6p1, pplp2 = paramj->p6p2, pplp3 = paramj->p6p3;
   double pplp4 = paramj->p6p4, pplp5 = paramj->p6p5, pplp6 = paramj->p6p6;
   double rij,rik,costheta,lp0,lp1,lp2,lp3,lp4,lp5,lp6;
   double rmu,rmu2,rmu3,rmu4,rmu5,rmu6,fcj,fck;
     comtt=0.0;
     rij = sqrt(rsqij);
     rik = sqrt(rsqik);
     costheta = vec3_dot(delrij,delrik) / (rij*rik);
     fcj = comb_fc(rij,paramj);
     fck = comb_fc(rik,paramk);
 
     rmu = costheta; 
     rmu2 = rmu *rmu; rmu3 = rmu2*rmu; rmu4 = rmu3*rmu;
     rmu5 = rmu4*rmu; rmu6 = rmu5*rmu;
     lp0 = pplp0;
     lp1 = pplp1*rmu;
     lp2 = pplp2*rmu2;
     lp3 = pplp3*rmu3;
     lp4 = pplp4*rmu4;
     lp5 = pplp5*rmu5;
     lp6 = pplp6*rmu6;
     comtt = lp0 + lp1 + lp2 + lp3 + lp4 + lp5 + lp6;
     return 0.5 * fck * comtt *fcj;
   }
 
 /*---------------------------------------------------------------------- */
 
 void PairComb3::fp6p(Param *paramij,Param *paramik, double rsqij, double rsqik,
 		   double *delrij, double *delrik, double *drilp, 
 		   double *drjlp, double *drklp)
 {
   double pplp0 = paramij->p6p0;
   double pplp1 = paramij->p6p1, pplp2 = paramij->p6p2, pplp3 = paramij->p6p3;
   double pplp4 = paramij->p6p4, pplp5 = paramij->p6p5, pplp6 = paramij->p6p6;
   double ffj1,ffj2,ffk1,ffk2;
   double rij,rik,costheta;
   double rmu,comtt,comtt_d,com4k,com5,com5k,fcj,fcj_d,fck,fck_d;
   double lp0,lp1,lp2,lp3,lp4,lp5,lp6;
   double lp1_d,lp2_d,lp3_d,lp4_d,lp5_d,lp6_d;
   double rmu2, rmu3, rmu4, rmu5, rmu6;
     
     ffj1 = 0.0, ffj2 = 0.0;
     ffk1 = 0.0, ffk2 = 0.0;
 
     rij = sqrt(rsqij); rik = sqrt(rsqik);
     costheta = vec3_dot(delrij,delrik) / (rij*rik);	
     fcj = comb_fc(rij,paramij);
     fck = comb_fc(rik,paramik);
     fcj_d = comb_fc_d(rij,paramij);
     fck_d = comb_fc_d(rik,paramik);
     rmu = costheta; 
 
       rmu2 = rmu *rmu; rmu3 = rmu2*rmu;
       rmu4 = rmu3*rmu; rmu5 = rmu4*rmu; rmu6 = rmu5*rmu;
       lp0 = pplp0;
       lp1 = pplp1*rmu;
       lp2 = pplp2*rmu2;
       lp3 = pplp3*rmu3;
       lp4 = pplp4*rmu4;
       lp5 = pplp5*rmu5;
       lp6 = pplp6*rmu6;
       lp1_d = pplp1;
       lp2_d = pplp2*2.0*rmu;
       lp3_d = pplp3*3.0*rmu2;
       lp4_d = pplp4*4.0*rmu3;
       lp5_d = pplp5*5.0*rmu4;
       lp6_d = pplp6*6.0*rmu5;
       comtt = lp0 + lp1 + lp2 + lp3 + lp4 + lp5 + lp6;
       comtt_d = lp1_d + lp2_d + lp3_d + lp4_d + lp5_d + lp6_d;
     
       com4k = fcj * fck_d * comtt;
       com5  = fcj * fck * comtt_d;
       com5k = fck * comtt * fcj_d; 
 
       ffj1 = 0.5*(-com5/(rij*rik)); 
       ffj2 = 0.5*(com5*rmu/rsqij-com5k/rij); 
       ffk1 = ffj1;
       ffk2 = 0.5*(-com4k/rik+com5*rmu/rsqik);
 
   // j-atom
   vec3_scale(ffj1,delrik,drjlp); 
   vec3_scaleadd(ffj2,delrij,drjlp,drjlp);
   
   // k-atom
   vec3_scale(ffk1,delrij,drklp);
   vec3_scaleadd(ffk2,delrik,drklp,drklp);
   
   // i-atom 
   vec3_add(drjlp,drklp,drilp);		
   vec3_scale(-1.0,drilp,drilp);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairComb3::force_zeta(Param *parami, Param *paramj, double rsq, 
 	double xcn, double ycn, double &zeta_ij, double &zeta_ji, double &fforce, 
 	double &prefac_ij1, double &prefac_ij2, double &prefac_ij3,
 	double &prefac_ij4, double &prefac_ij5,
         double &prefac_ji1, double &prefac_ji2, double &prefac_ji3, 
         double &prefac_ji4, double &prefac_ji5,
         int eflag, double &eng, double iq, double jq, 
 	int i, int j, int nj, double bbtor, double kconjug, double lconjug)
 {
   double r,att_eng,att_force,bij;  // att_eng is -cbj
   double boij, dbij1, dbij2, dbij3, dbij4, dbij5;
   double boji, dbji1, dbji2, dbji3, dbji4, dbji5;
   double pradx, prady;
   r = sqrt(rsq);
 
   if (r > parami->bigr + parami->bigd) return;
   comb_fa(r, parami, paramj, iq, jq, att_eng, att_force);
   comb_bij_d(zeta_ij,parami,r,i,boij,dbij1,dbij2,dbij3,dbij4,dbij5,xcn);
   comb_bij_d(zeta_ji,paramj,r,j,boji,dbji1,dbji2,dbji3,dbji4,dbji5,ycn);
   bij = 0.5*(boij + boji);
 
   // radical energy
 
   if ( parami->rad_flag>0 ) {
     rad_calc( r, parami, paramj, kconjug, lconjug, i, j, xcn, ycn);
     bij +=  brad[0];
     pradx = brad[1]*att_eng;
     prady = brad[2]*att_eng;
     brad[3] = 1.0 * brad[3]*att_eng;
    }
 
   // torsion energy
   if ( parami->tor_flag!=0) {
      tor_calc( r, parami, paramj, kconjug, lconjug, i, j, xcn, ycn);
      bij += btor[0] * bbtor;
      ptorr =  att_eng * btor[0];
      pradx  += 1.0 *  btor[1] * bbtor * att_eng;
      prady  += 1.0 *  btor[2] * bbtor * att_eng;
      brad[3]+= 1.0 *  btor[3] * bbtor * att_eng;
   }
 
   fforce = 1.0*bij*att_force/r; // divide by r will done compute
   bbij[i][nj] = bij;
 
   prefac_ij1 = -0.5*att_eng*dbij1;	// prefac_ij1 = -pfij
   prefac_ij2 = -0.5*att_eng*dbij2;	// prefac_ij2 = -pfij1
   prefac_ij3 = -0.5*att_eng*dbij3;	// prefac_ij3 = -pfij2
   prefac_ij4 = -0.5*att_eng*dbij4;	// prefac_ij4 = -pfij3
   prefac_ij5 = -0.5*att_eng*dbij5;	// prefac_ij5 = -pfij4
 
   prefac_ji1 = -0.5*att_eng*dbji1;	// prefac_ji1 = -pfji
   prefac_ji2 = -0.5*att_eng*dbji2;	// prefac_ji2 = -pfji1
   prefac_ji3 = -0.5*att_eng*dbji3;	// prefac_ji3 = -pfji2
   prefac_ji4 = -0.5*att_eng*dbji4;	// prefac_ji4 = -pfji3
   prefac_ji5 = -0.5*att_eng*dbji5;	// prefac_ji5 = -pfji4
 
   // combines com6 & com7 below
   if ( parami->rad_flag>0 || parami->tor_flag!=0 ) {
     prefac_ij2-=pradx; 
     prefac_ji2-=prady;
   }
  
   // eng = attraction energy
   if (eflag) eng = 1.0*bij*att_eng;
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairComb3::comb_fc(double r, Param *param)
 {
   double r_inn = param->bigr - param->bigd;
   double r_out = param->bigr + param->bigd;
   if (r <= r_inn) return 1.0;
   if (r >= r_out) return 0.0;
   return 0.5*(1.0 + cos(MY_PI*(r-r_inn)/(r_out-r_inn)));
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairComb3::comb_fc_d(double r, Param *param)
 {
   double r_inn = param->bigr - param->bigd;
   double r_out = param->bigr + param->bigd;
   if (r <= r_inn) return 0.0;
   if (r >= r_out) return 0.0;
   return -MY_PI2/(r_out-r_inn)*sin(MY_PI*(r-r_inn)/(r_out-r_inn));
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairComb3::comb_fccc(double xcn)
 {
   double cut1 = ccutoff[0];
   double cut2 = ccutoff[1];
 
   if (xcn <= cut1) return 1.0;
   if (xcn >= cut2) return 0.0;
   return 0.5*(1.0 + cos(MY_PI*(xcn-cut1)/(cut2-cut1)));
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairComb3::comb_fccc_d(double xcn)
 {
   double cut1 = ccutoff[0];
   double cut2 = ccutoff[1];
   
   if (xcn <= cut1) return 0.0;
   if (xcn >= cut2) return 0.0;
   return -MY_PI2/(cut2-cut1)*sin(MY_PI*(xcn-cut1)/(cut2-cut1));
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairComb3::comb_fcch(double xcn)
 {
   double cut1 = ccutoff[2];
   double cut2 = ccutoff[3];
 
   if (xcn <= cut1) return 1.0;
   if (xcn >= cut2) return 0.0;
   return 0.5*(1.0 + cos(MY_PI*(xcn-cut1)/(cut2-cut1)));
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairComb3::comb_fcch_d(double xcn)
 {
   double cut1 = ccutoff[2];
   double cut2 = ccutoff[3];
   
   if (xcn <= cut1) return 0.0;
   if (xcn >= cut2) return 0.0;
   return -MY_PI2/(cut2-cut1)*sin(MY_PI*(xcn-cut1)/(cut2-cut1));
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairComb3::comb_fccch(double xcn)
 {
   double cut1 = ccutoff[4];
   double cut2 = ccutoff[5];
 
   if (xcn <= cut1) return 1.0;
   if (xcn >= cut2) return 0.0;
   return 0.5*(1.0 + cos(MY_PI*(xcn-cut1)/(cut2-cut1)));
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairComb3::comb_fccch_d(double xcn)
 {
   double cut1 = ccutoff[4];
   double cut2 = ccutoff[5];
 
   if (xcn <= cut1) return 0.0;
   if (xcn >= cut2) return 0.0;
   return -MY_PI2/(cut2-cut1)*sin(MY_PI*(xcn-cut1)/(cut2-cut1));
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairComb3::comb_fcsw(double rsq)
 {
   double r = sqrt(rsq);
   
   if (r <= chicut1) return 1.0;
   if (r >= chicut2) return 0.0;
   return 0.5*(1.0 + cos(MY_PI*(r-chicut1)/(chicut2-chicut1)));
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairComb3::self(Param *param, double qi)
 {
  double self_tmp, cmin, cmax, qmin, qmax;
  double s1=param->chi, s2=param->dj, s3=param->dk, s4=param->dl;
 
  self_tmp = 0.0; 
 
  qmin = param->qmin;
  qmax = param->qmax;
  cmin = cmax = 100.0;
  
  self_tmp = qi*(s1+qi*(s2+qi*(s3+qi*s4)));
 
  if (qi < qmin) self_tmp += cmin * pow((qi-qmin),4);
  if (qi > qmax) self_tmp += cmax * pow((qi-qmax),4);
  
  return self_tmp;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairComb3::comb_fa(double r, Param *parami, Param *paramj, double iq, 
 	double jq, double &att_eng, double &att_force)
 {
   double Bsi;
   double qi,qj,Di,Dj;
   double AlfDiAlfDj, YYBn, YYBj;
   double alfij1= parami->alpha1;
   double alfij2= parami->alpha2;
   double alfij3= parami->alpha3;
   double pbij1= parami->bigB1;
   double pbij2= parami->bigB2;
   double pbij3= parami->bigB3;
   if (r > parami->bigr + parami->bigd) Bsi = 0.0;
 
   qi = iq; qj = jq;
   Di = Dj = Bsi = 0.0;
   Di = parami->DU + pow(fabs(parami->bD*(parami->QU-qi)),parami->nD);
   Dj = paramj->DU + pow(fabs(paramj->bD*(paramj->QU-qj)),paramj->nD);
   YYBn = (parami->aB-fabs(pow(parami->bB*(qi-parami->Qo),10)));
   YYBj = (paramj->aB-fabs(pow(paramj->bB*(qj-paramj->Qo),10)));
 
   if (YYBn*YYBj > 0.0 ) {
     AlfDiAlfDj = exp(0.5*(parami->alfi*Di+paramj->alfi*Dj));
     Bsi = (pbij1*exp(-alfij1*r)+pbij2*exp(-alfij2*r)+pbij3*exp(-alfij3*r))*
       sqrt(YYBn*YYBj)*AlfDiAlfDj; 				// Bsi is cbj
 
     att_eng = -Bsi * comb_fc(r,parami);
     att_force = -(Bsi*comb_fc_d(r,parami)-comb_fc(r,parami)*sqrt(YYBn*YYBj)*
 	AlfDiAlfDj*(alfij1*pbij1*exp(-alfij1*r)+
 	alfij2*pbij2*exp(-alfij2*r)+alfij3*pbij3*exp(-alfij3*r)));
 
   } else {
     att_eng = 0.0;
     att_force = 0.0;
   }
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairComb3::comb_bij_d(double zet, Param *param, double r, int i, 
 	double &tbij, double &tbij1, double &tbij2, 
 	double &tbij3, double &tbij4, double &tbij5, double xcn)
 {
   double pcorn,dpcorn,dxccij,dxchij,dxcoij;
   double zeta = zet;
   double zetang,tmp_tbij, pow_n;
 
   pcorn = dpcorn = dxccij = dxchij = dxcoij = 0.0;
   coord(param,r,i,pcorn,dpcorn,dxccij,dxchij,dxcoij,xcn);	// coordination term
 
   zetang=zeta;
   pow_n=param->powern;
   zeta = pow(zetang,pow_n)+pcorn; 
   tmp_tbij=pow_n*pow(zetang,(pow_n-1.0));
 
   if ((1.0 + zeta) < 0.1 ){
     zeta=0.1-1.0;
     tbij = pow(1.0 + zeta, -0.5/pow_n);
     tbij1=0.0;
    }
    else if (zeta > param->c1) { 
     tbij = pow(zeta,-0.5/pow_n);
     tbij1 = -0.5/pow_n*pow(zeta,(-0.5/pow_n-1.0));
    } else if (zeta > param->c2) {
     tbij = pow(zeta,-0.5/pow_n)-0.5/pow_n*pow(zeta,(-0.5/pow_n-1.0));
     tbij1 = -0.5/pow_n/zeta;
    } else if (fabs(zeta) < param->c4) {     
     tbij = 1.0;
     tbij1 = 0.0;
    } else if (fabs(zeta) < param->c3) {
     tbij = 1.0 - zeta/(2.0*pow_n);
     tbij1 = -1/(2.0*pow_n);
    } else {
     tbij = pow(1.0 + zeta, -0.5/pow_n);
     tbij1 = -0.5/pow_n * pow(1.0 + zeta,(-1.0-0.5/pow_n));
    }
 
   tbij2 = tbij1 * dpcorn; 
   tbij3 = tbij1 * dxccij;
   tbij4 = tbij1 * dxchij;
   tbij5 = tbij1 * dxcoij;   
   tbij1 = tbij1 * tmp_tbij;
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairComb3::coord(Param *param, double r, int i,
 	double &pcorn, double &dpcorn, double &dxccij, 
 	double &dxchij, double &dxcoij, double xcn)
 {
   int ixmin,iymin,izmin;
   double xcntot,xcccn,xchcn,xcocn;
   int tri_flag= param-> pcn_flag;
   int jele_gp= param->jelementgp;
   double pan = param->pcna;
   double pbn = param->pcnb;
   double pcn = param->pcnc;
   double pdn = param->pcnd;
 
   xcccn = xchcn = xcocn = 0.0;
 
   xcccn = xcctmp[i];
   xchcn = xchtmp[i];
   xcocn = xcotmp[i];
   xcntot = -comb_fc(r,param)*param->pcross + xcn;
   pcorn = dpcorn = dxccij = dxchij = dxcoij = 0.0;
   pcorn = 0.0; dpcorn = 0.0;
 
   if(xcntot  < 0.0) xcntot  = 0.0;
 
   if (tri_flag>0) {
     if(jele_gp==1) xcccn = xcccn-comb_fc(r,param)*param->pcross;
     if(jele_gp==2) xchcn = xchcn-comb_fc(r,param)*param->pcross;
     if(jele_gp==3) xcocn = xcocn-comb_fc(r,param)*param->pcross;
     if(xcccn < 0.0) xcccn = 0.0;
     if(xchcn < 0.0) xchcn = 0.0;
     if(xcocn < 0.0) xcocn = 0.0;
     if(xcccn > maxx) xcccn = maxx;
     if(xchcn > maxy) xchcn = maxy;
     if(xcocn > maxz) xcocn = maxz;
 
     double xcntritot=xcccn+xchcn+xcocn;
       
     if(xcntritot > maxxcn[tri_flag-1]) {
       pcorn  = vmaxxcn[tri_flag-1]+(xcntot-maxxcn[tri_flag-1])*dvmaxxcn[tri_flag-1];
       dxccij = dxchij = dxcoij = dvmaxxcn[tri_flag-1];
     }
     else {
       ixmin=int(xcccn+1.0e-12);
       iymin=int(xchcn+1.0e-12);
       izmin=int(xcocn+1.0e-12);
       if (fabs(float(ixmin)-xcccn)>1.0e-8 ||
           fabs(float(iymin)-xchcn)>1.0e-8 ||
           fabs(float(izmin)-xcocn)>1.0e-8) {
             cntri_int(tri_flag,xcccn,xchcn,xcocn,ixmin,iymin,izmin,
             pcorn,dxccij,dxchij,dxcoij,param);          
       }
       else  {
         pcorn  = pcn_grid[tri_flag-1][ixmin][iymin][izmin];
         dxccij = pcn_gridx[tri_flag-1][ixmin][iymin][izmin];
         dxchij = pcn_gridy[tri_flag-1][ixmin][iymin][izmin];
         dxcoij = pcn_gridz[tri_flag-1][ixmin][iymin][izmin];
       }
     }
   } else {
     pcorn = pan*xcntot+pbn*exp(pcn*xcntot)+pdn;
     dpcorn = pan+pbn*pcn*exp(pcn*xcntot);
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairComb3::cntri_int(int tri_flag, double xval, double yval, 
                 double zval, int ixmin, int iymin, int izmin, double &vval, 
 		double &dvalx, double &dvaly, double &dvalz, Param *param)
 {
   double x;
   vval = 0.0; dvalx = 0.0; dvaly = 0.0; dvalz = 0.0;
   if(ixmin >= maxx-1) { ixmin=maxx-1; }
   if(iymin >= maxy-1) { iymin=maxy-1; }
   if(izmin >= maxz-1) { izmin=maxz-1; }
   for (int j=0; j<64; j++) {
       x = pcn_cubs[tri_flag-1][ixmin][iymin][izmin][j]
           *pow(xval,iin3[j][0])*pow(yval,iin3[j][1])
           *pow(zval,iin3[j][2]);
     vval += x;
     if(xval>1.0e-8) {dvalx += x*iin3[j][0]/xval;} 
     if(yval>1.0e-8) {dvaly += x*iin3[j][1]/yval;}
     if(zval>1.0e-8) {dvalz += x*iin3[j][2]/zval;}
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairComb3::comb_gijk(double costheta, Param *param, double nco_tmp)
 {
   double rmu1 = costheta; 
   double rmu2 = rmu1*rmu1; 
   double rmu3 = rmu2*rmu1;
   double rmu4 = rmu3*rmu1; 
   double rmu5 = rmu4*rmu1; 
   double rmu6 = rmu5*rmu1;
   double co6 = param->pcos6*rmu6;
   double co5 = param->pcos5*rmu5;
   double co4 = param->pcos4*rmu4;
   double co3 = param->pcos3*rmu3;
   double co2 = param->pcos2*rmu2;
   double co1 = param->pcos1*rmu1;
   double co0 = param->pcos0;
   double pcross = param->pcross;
   double gmu;
 
   if (param->ang_flag==1) {
     double qtheta, gmu1, gmu2, rrmu, astep;
     int k;
 
     qtheta = comb_fccc(nco_tmp);
     astep = 2.0/ntab;
     rrmu = (rmu1+1.0)/astep;
     k = int(rrmu);  
     gmu1 = co6+co5+co4+co3+co2+co1+co0;
     gmu2 = pang[k]+(pang[k+1]-pang[k])*(rrmu-k);
     gmu = gmu2+qtheta*(gmu1-gmu2);  
     return gmu*pcross;
 
   } else if (param->ang_flag==2){
     double qtheta, gmu1, gmu2;
     double ch6 = ch_a[6]*rmu6;
     double ch5 = ch_a[5]*rmu5;
     double ch4 = ch_a[4]*rmu4;
     double ch3 = ch_a[3]*rmu3;
     double ch2 = ch_a[2]*rmu2;
     double ch1 = ch_a[1]*rmu1;
     double ch0 = ch_a[0];
     qtheta = comb_fccch(nco_tmp);
     gmu1 = co6+co5+co4+co3+co2+co1+co0;
     gmu2 = ch6+ch5+ch4+ch3+ch2+ch1+ch0;
     gmu = gmu2+qtheta*(gmu1-gmu2);  
     return gmu*pcross;
   } else {
     gmu = co6+co5+co4+co3+co2+co1+co0;
     return gmu*pcross;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairComb3::comb_gijk_d(double costheta, Param *param, double nco_tmp,
 		double &gijk_d, double &com3jk)
 {
   double rmu1 = costheta; 
   double rmu2 = rmu1*rmu1; 
   double rmu3 = rmu2*rmu1;
   double rmu4 = rmu3*rmu1; 
   double rmu5 = rmu4*rmu1;
   double rmu6 = rmu5*rmu1;
   double co6 = param->pcos6; //*rmu5*6.0;
   double co5 = param->pcos5; //*rmu4*5.0;
   double co4 = param->pcos4; //*rmu3*4.0;
   double co3 = param->pcos3; //*rmu2*3.0;
   double co2 = param->pcos2; //*rmu1*2.0;
   double co1 = param->pcos1;
   double co0 = param->pcos0;
   double pcross = param->pcross;
 
   gijk_d = com3jk = 0.0;
   if (param->ang_flag==1) {
     double qtheta, dqtheta, gmu1, gmu2, dgmu1,dgmu2, rrmu, astep;
     int k;
     qtheta = comb_fccc(nco_tmp);
     dqtheta = comb_fccc_d(nco_tmp);
     
     astep = 2.0/ntab;
     rrmu = (rmu1+1.0)/astep;
     k = int(rrmu);
 
     gmu1 =rmu6*co6+rmu5*co5+rmu4*co4
          +rmu3*co3+rmu2*co2+rmu1*co1+co0;
     dgmu1 =6.0*rmu5*co6+5.0*rmu4*co5+4.0*rmu3*co4
            +3.0*rmu2*co3+2.0*rmu1*co2+co1;
     gmu2 = pang[k]+(pang[k+1]-pang[k])*(rrmu-k);
     dgmu2 = dpang[k]+(dpang[k+1]-dpang[k])*(rrmu-k);
     gijk_d = pcross*(dgmu2+qtheta*(dgmu1-dgmu2));
     com3jk = dqtheta * (gmu1-gmu2);
   } else if(param->ang_flag==2) {
     double qtheta, dqtheta, gmu1, gmu2, dgmu1,dgmu2;
     double ch6 = ch_a[6];
     double ch5 = ch_a[5];
     double ch4 = ch_a[4];
     double ch3 = ch_a[3];
     double ch2 = ch_a[2];
     double ch1 = ch_a[1];
     double ch0 = ch_a[0];
     qtheta = comb_fccch(nco_tmp);
     dqtheta = comb_fccch_d(nco_tmp);
     
     gmu1 =rmu6*co6+rmu5*co5+rmu4*co4
          +rmu3*co3+rmu2*co2+rmu1*co1+co0;
     dgmu1 =6.0*rmu5*co6+5.0*rmu4*co5+4.0*rmu3*co4
            +3.0*rmu2*co3+2.0*rmu1*co2+co1;
     gmu2 =rmu6*ch6+rmu5*ch5+rmu4*ch4
          +rmu3*ch3+rmu2*ch2+rmu1*ch1+ch0;
     dgmu2 =6.0*rmu5*ch6+5.0*rmu4*ch5+4.0*rmu3*ch4
            +3.0*rmu2*ch3+2.0*rmu1*ch2+ch1;
     gijk_d = pcross*(dgmu2+qtheta*(dgmu1-dgmu2));
     com3jk = dqtheta * (gmu1-gmu2);
 
   } else {
     gijk_d = pcross*(6.0*rmu5*co6+5.0*rmu4*co5+4.0*rmu3*co4
                     +3.0*rmu2*co3+2.0*rmu1*co2+co1);
     com3jk = 0.0;
   }
 }
 
 /*------------------------------------------------------------------------- */
 
 void PairComb3::attractive(Param *parami, Param *paramj , Param *paramk, double prefac_ij1, 
 	double prefac_ij2, double prefac_ij3, double prefac_ij4, 
 	double prefac_ij5, double rsqij, double rsqik, double *delrij, 
 	double *delrik, double *fi, double *fj,double *fk, int i, double xcn)
 {
   double rij_hat[3],rik_hat[3];
   double rij,rijinv,rik,rikinv;
 
   rij = sqrt(rsqij);
   rijinv = 1.0/rij;
   vec3_scale(rijinv,delrij,rij_hat);
   
   rik = sqrt(rsqik);
   rikinv = 1.0/rik;
   vec3_scale(rikinv,delrik,rik_hat);
 
   comb_zetaterm_d(prefac_ij1, prefac_ij2, prefac_ij3, prefac_ij4, prefac_ij5,
 	rij_hat, rij,rik_hat, rik, fi, fj, fk, parami, paramj, paramk,xcn);
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairComb3::comb_zetaterm_d(double prefac_ij1, double prefac_ij2,
 	double prefac_ij3, double prefac_ij4, double prefac_ij5,
 	double *rij_hat, double rij, double *rik_hat, double rik, double *dri, 
 	double *drj, double *drk, Param *parami, Param *paramj, Param *paramk, double xcn)
 {
   double gijk,gijk_d,ex_delr,ex_delr_d,fc_k,cos_theta,tmp,rlm3;
   double dcosdri[3],dcosdrj[3],dcosdrk[3],dfc_i,dfc_k;
   double com6, com3j, com3k, com3jk;
 
   int mint = int(parami->powermint);
   double pcrossi = parami->pcross;
   double pcrossj = paramj->pcross;
   double pcrossk = paramk->pcross;
   int icontrol = parami->pcn_flag;
 
   dfc_i = comb_fc_d(rij,parami);
   fc_k = comb_fc(rik,paramk);
   dfc_k = comb_fc_d(rik,paramk);
   rlm3 = parami->beta;
   tmp = pow(rlm3*(rij-rik),mint);
   
   if (tmp > 69.0776) ex_delr = 1.e30;
   else if (tmp < -69.0776) ex_delr = 0.0;
   else ex_delr = exp(tmp);
   ex_delr *= pcrossi;
 
   cos_theta = vec3_dot(rij_hat,rik_hat);
   gijk = comb_gijk(cos_theta,parami,xcn);
   comb_gijk_d(cos_theta,parami,xcn,gijk_d,com3jk);
   costheta_d(rij_hat,rij,rik_hat,rik,dcosdri,dcosdrj,dcosdrk);
 
   // com6 & com7
   if(icontrol > 0){
     if(parami->kelementgp==1) {com6 = prefac_ij3*pcrossk*dfc_k;}
     if(parami->kelementgp==2) {com6 = prefac_ij4*pcrossk*dfc_k;}
     if(parami->kelementgp==3) {com6 = prefac_ij5*pcrossk*dfc_k;}
     if(parami->rad_flag>=1 || parami->tor_flag!=0)
             {com6+=prefac_ij2*pcrossk*dfc_k;} 
   } else {
     com6 = prefac_ij2*pcrossi*dfc_k;
   }
   
   if (parami->ang_flag==1 || parami->ang_flag==2) {
     com3j = com3jk*ex_delr*pcrossk*pcrossj*fc_k*dfc_i;
     com3k = com3jk*ex_delr*pcrossk*pcrossk*fc_k*dfc_k;
   } else {
     com3j = 0.0;
     com3k = 0.0;
   }
 
   ex_delr_d = mint*pow(rlm3,mint)*pow((rij-rik),(mint-1))*ex_delr; // com3
   vec3_scale(-dfc_k*gijk*ex_delr,rik_hat,dri);		// com1
   vec3_scaleadd(fc_k*gijk_d*ex_delr,dcosdri,dri,dri);	// com2
   vec3_scaleadd(fc_k*gijk*ex_delr_d,rik_hat,dri,dri);	// com3 cont'd
   vec3_scaleadd(-fc_k*gijk*ex_delr_d,rij_hat,dri,dri);	// com3 sums j
   vec3_scaleadd(-com3k,rik_hat,dri,dri);   		// com3k
   vec3_scaleadd(-com3j,rij_hat,dri,dri);   		// com3j
   vec3_scale(prefac_ij1,dri,dri);
   vec3_scaleadd(-com6,rik_hat,dri,dri);			// com6
 
   vec3_scale(fc_k*gijk_d*ex_delr,dcosdrj,drj);		// com2
   vec3_scaleadd(fc_k*gijk*ex_delr_d,rij_hat,drj,drj);	// com3 cont'd
   vec3_scaleadd(com3j,rij_hat,drj,drj);   		// com3j
   vec3_scale(prefac_ij1,drj,drj);
   
   vec3_scale(dfc_k*gijk*ex_delr,rik_hat,drk);		// com1
   vec3_scaleadd(fc_k*gijk_d*ex_delr,dcosdrk,drk,drk);	// com2
   vec3_scaleadd(-fc_k*gijk*ex_delr_d,rik_hat,drk,drk);	// com3 cont'd
   vec3_scaleadd(com3k,rik_hat,drk,drk);   		// com3k 
   vec3_scale(prefac_ij1,drk,drk);
   vec3_scaleadd(com6,rik_hat,drk,drk);			// com6
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairComb3::costheta_d(double *rij_hat, double rij, double *rik_hat, 
 	double rik, double *dri, double *drj, double *drk)
 {
   double cos_theta = vec3_dot(rij_hat,rik_hat);
 
   vec3_scaleadd(-cos_theta,rij_hat,rik_hat,drj);
   vec3_scale(1.0/rij,drj,drj);
   vec3_scaleadd(-cos_theta,rik_hat,rij_hat,drk);
   vec3_scale(1.0/rik,drk,drk);
   vec3_add(drj,drk,dri);
   vec3_scale(-1.0,dri,dri);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairComb3::tables()
   
 {
   int i,j,k,m, nntypes, ncoul,nnbuf, ncoul_lim, inty, itype, jtype;
   int iparam_i, iparam_ij, iparam_ji;
   double r,dra,drin,drbuf,rc,z,zr,zrc,ea,eb,ea3,eb3,alf;
   double exp2er,exp2ersh,fafash,dfafash,F1,dF1,ddF1,E1,E2,E3,E4;
   double exp2ear,exp2ebr,exp2earsh,exp2ebrsh,fafbsh,dfafbsh;
   double afbshift, dafbshift, exp2ershift;
 
   int n = nelements;
   
   dra  = 0.001; 
   drin = 0.100; 
   drbuf = 0.100;
   nnbuf = int(drbuf/dra) +1; 
   rc = cutmax;
   alf = 0.20;
   nmax = atom->nmax;
   
   nntypes = int((n+1)*n/2.0)+1;
   ncoul = int((rc-drin)/dra)+ nnbuf;
   ncoul_lim = int(ncoul * 1.20);
   
   // allocate arrays
   memory->create(intype,n,n,"pair:intype");
   memory->create(erpaw,ncoul_lim,3,"pair:erpaw");
   memory->create(fafb,ncoul_lim,nntypes,"pair:fafb");
   memory->create(dfafb,ncoul_lim,nntypes,"pair:dfafb");
   memory->create(ddfafb,ncoul_lim,nntypes,"pair:ddfafb");
   memory->create(phin,ncoul_lim,nntypes,"pair:phin");
   memory->create(dphin,ncoul_lim,nntypes,"pair:dphin");
   memory->create(afb,ncoul_lim,nntypes,"pair:afb");
   memory->create(dafb,ncoul_lim,nntypes,"pair:dafb");
   memory->create(vvdw,ncoul,nntypes,"pair:vvdw");
   memory->create(vdvdw,ncoul,nntypes,"pair:vdvdw");
   memory->create(dpl,nmax,3,"pair:dpl");
   memory->create(bbij,nmax,MAXNEIGH,"pair:bbij");
   memory->create(xcctmp,nmax,"pair:xcctmp");
   memory->create(xchtmp,nmax,"pair:xchtmp");
   memory->create(xcotmp,nmax,"pair:xcotmp");
   memory->create(NCo,nmax,"pair:NCo");
   memory->create(sht_num,nmax,"pair:sht_num");
   sht_first = (int **) memory->smalloc(nmax*sizeof(int *),
         "pair:sht_first");
 
   // set interaction number: 0-0=0, 1-1=1, 0-1=1-0=2
   
   m = 0; k = n;
   for (i = 0; i < n; i++) {
     for (j = 0; j < n; j++) {
       if (j == i) { 
         intype[i][j] = m;
         m += 1;
       } else if (j != i && j > i) {
         intype[i][j] = k;
         k += 1;
       } else if (j != i && j < i) {
         intype[i][j] = intype[j][i];
       }
     }
   }
   
   // default arrays to zero
   
   for (i = 0; i < ncoul; i ++) {
     for (j = 0; j < nntypes; j ++) {
       fafb[i][j]   = 0.0; 
       dfafb[i][j]  = 0.0; 
       ddfafb[i][j] = 0.0; 
       phin[i][j]   = 0.0; 
       dphin[i][j]  = 0.0;
       afb[i][j]    = 0.0; 
       dafb[i][j]   = 0.0;
     }
   }
 
   // direct 1/r energy with Slater 1S orbital overlap
   
   for (i = 0; i < n; i++) {
     r = drin - dra; 
     itype = i;
     iparam_i = elem2param[itype][itype][itype];
     z = params[iparam_i].esm;
     exp2ershift = exp(-2.0*z*rc);
     afbshift = -exp2ershift*(z+1.0/rc);
     dafbshift = exp2ershift*(2.0*z*z+2.0*z/rc+1.0/(rc*rc));
     
     for (j = 0; j < ncoul; j++) {
       exp2er = exp(-2.0 * z * r);
       phin[j][i] = 1.0 - exp2er * (1.0 + 2.0 * z * r * (1.0 + z * r));
       dphin[j][i] = (4.0 * exp2er * z * z * z * r * r);
       afb[j][i] = -exp2er*(z+1.0/r)-afbshift-(r-rc)*dafbshift;
       dafb[j][i] = -(exp2er*(2.0*z*z+2.0*z/r+1.0/(r*r))-dafbshift);
       r += dra;
     }
   }
 
   for (i = 0; i < n; i ++) {
     for (j = 0; j < n; j ++) {
       r = drin - dra; 
       if (j == i) {
         itype = i;
         inty = intype[itype][itype];
         iparam_i = elem2param[itype][itype][itype];
         z = params[iparam_i].esm;
         zrc = z * rc;
         exp2ersh = exp(-2.0 * zrc);
         fafash = -exp2ersh * (1.0 / rc + 
                               z * (11.0/8.0 + 3.0/4.0*zrc + zrc*zrc/6.0));
         dfafash = exp2ersh * (1.0/(rc*rc) + 2.0*z/rc +
                               z*z*(2.0 + 7.0/6.0*zrc + zrc*zrc/3.0));
         for (k = 0; k < ncoul; k ++) {
           zr = z * r; 
           exp2er = exp(-2.0*zr);
           F1 = -exp2er * (1.0 / r + 
                           z * (11.0/8.0 + 3.0/4.0*zr + zr*zr/6.0));
           dF1 = exp2er * (1.0/(r*r) + 2.0*z/r +
                           z*z*(2.0 + 7.0/6.0*zr + zr*zr/3.0));
           ddF1 = -exp2er * (2.0/(r*r*r) + 4.0*z/(r*r) + 4.0*z*z/r +  
                             z*z*z/3.0*(17.0/2.0 + 5.0*zr + 2.0*zr*zr));
           fafb[k][inty] = F1-fafash-(r-rc)*dfafash;
           dfafb[k][inty] = -(dF1 - dfafash);
           ddfafb[k][inty] = ddF1;
                   r += dra; 
         }
       } else if (j != i) {
         itype = i;
         jtype = j;
         inty = intype[itype][jtype];
         iparam_ij = elem2param[itype][jtype][jtype];
         ea = params[iparam_ij].esm;
         ea3 = ea*ea*ea;
         iparam_ji = elem2param[jtype][itype][itype];
         eb = params[iparam_ji].esm;
         eb3 = eb*eb*eb;
         E1 = ea*eb3*eb/((ea+eb)*(ea+eb)*(ea-eb)*(ea-eb));
         E2 = eb*ea3*ea/((ea+eb)*(ea+eb)*(eb-ea)*(eb-ea));
         E3 = (3.0*ea*ea*eb3*eb-eb3*eb3) / 
           ((ea+eb)*(ea+eb)*(ea+eb)*(ea-eb)*(ea-eb)*(ea-eb));
         E4 = (3.0*eb*eb*ea3*ea-ea3*ea3) / 
           ((ea+eb)*(ea+eb)*(ea+eb)*(eb-ea)*(eb-ea)*(eb-ea));
         exp2earsh = exp(-2.0*ea*rc);
         exp2ebrsh = exp(-2.0*eb*rc);
         fafbsh = -exp2earsh*(E1 + E3/rc)-exp2ebrsh*(E2 + E4/rc);
         dfafbsh = 
           exp2earsh*(2.0*ea*(E1+E3/rc)+E3/(rc*rc)) +
           exp2ebrsh*(2.0*eb*(E2+E4/rc)+E4/(rc*rc));
         for (k = 0; k < ncoul; k ++) {
           exp2ear = exp(-2.0*ea*r);
           exp2ebr = exp(-2.0*eb*r);
           fafb[k][inty] = 
             - exp2ear*(E1+E3/r) - exp2ebr*(E2+E4/r)
             - fafbsh - (r-rc) * dfafbsh;
           dfafb[k][inty] = -(exp2ear*(2.0*ea*(E1+E3/r) + E3/(r*r))
                            + exp2ebr*(2.0*eb*(E2+E4/r) + E4/(r*r))- dfafbsh);
           ddfafb[k][inty] = -exp2ear*(4.0*ea*ea*(E1+E3/r)+4.0*ea*E3/(r*r)
                                 +2.0*E3/(r*r*r))
                             -exp2ebr*(4.0*eb*eb*(E2+E4/r)+4.0*eb*E4/(r*r)
                                 +2.0*E4/(r*r*r));
           r += dra; 
         }
       } 
     }
   }
 
   for (i = 0; i < ncoul_lim; i ++) {
     r = dra * (i-1) + drin;
     erpaw[i][0] = erfc(r*alf);
     erpaw[i][1] = exp(-r*r*alf*alf);
   } 
   // end wolf summation
 
   // van der Waals
   int ii,jj;
   double **rvdw, *cc2, *cc3, *vrc, *rrc;
   double r6, r7, r12, r13, rf6, rf12, drf7, drf13;
   double drcc, temp6, temp7, temp12, temp13;
   double vsigt, vepst, vdwt, dvdwt;
 
   vrc = new double[13];
   rrc = new double[13];
   cc2 = new double[nntypes]; 
   cc3 = new double[nntypes]; 
   memory->create(rvdw,2,nntypes,"pair:rvdw");
 
   vrc[0] = rc;
   for (i=1; i<13; i++) {
     vrc[i] = vrc[i-1] * vrc[0];
   }
 
   // generate spline coefficients for CC, CH, HH vdw
   for (ii = 0; ii < n; ii ++) {
     for (jj = ii; jj < n; jj ++) {
       itype = ii;
       jtype = jj;
       inty = intype[itype][jtype];
       iparam_ij = elem2param[itype][jtype][jtype];
       
       // parameter check: eps > 0 
       if(params[iparam_ij].vdwflag > 0) {
 
         if(params[iparam_ij].vdwflag==1){
           rvdw[0][inty] = params[iparam_ij].bigr + params[iparam_ij].bigd;
         }
         else {
           rvdw[0][inty] = params[iparam_ij].bigr - params[iparam_ij].bigd;
         }
                
         rvdw[1][inty] = params[iparam_ij].vsig * 0.950;
 
         // radius check: outter radius vs. sigma
         if( rvdw[0][inty] > rvdw[1][inty] )
           error->all(FLERR,"Error in vdw spline: inner radius > outter radius");
 
         rrc[0] = rvdw[1][inty];
 
         for (i=1; i<13; i++)
           rrc[i] = rrc[i-1] * rrc[0];
 
         drcc = rrc[0] - rvdw[0][inty];
         temp6 = 1.0/rrc[5]-1.0/vrc[5]+6.0*(rrc[0]-vrc[0])/vrc[6];
         temp7 = 6.0*(1.0/vrc[6]-1.0/rrc[6]);
         temp12 = 1.0/rrc[11]-1.0/vrc[11]+(rrc[0]-vrc[0])*12.0/vrc[12];
         temp13 = 12.0*(1.0/vrc[12]-1.0/rrc[12]);
 
         vsigt = params[iparam_ij].vsig;
         vepst = params[iparam_ij].veps;
         vsigt = vsigt*vsigt*vsigt*vsigt*vsigt*vsigt;
 
         vdwt = vepst*(vsigt*vsigt*temp12-vsigt*temp6);
         dvdwt = vepst*(vsigt*vsigt*temp13-vsigt*temp7);
         cc2[inty] = (3.0/drcc*vdwt-dvdwt)/drcc;
         cc3[inty] = (vdwt/(drcc*drcc)-cc2[inty] )/drcc;
       }
     }
   }
 
   // generate vdw look-up table
   for (ii = 0; ii < n; ii ++) {
     for (jj = ii; jj < n; jj ++) {
       itype = ii;
       jtype = jj;
       inty = intype[itype][jtype];
       iparam_ij = elem2param[itype][jtype][jtype];
       r = drin;
       for (k = 0; k < ncoul; k ++) {
         r6 = r*r*r*r*r*r;
         r7 = r6 * r;
         rf6 = 1.0/r6-1.0/vrc[5]+(r-vrc[0])*6.0/vrc[6];
         drf7 = 6.0*(1.0/vrc[6]-1.0/r7);
         vsigt = params[iparam_ij].vsig;
         vepst = params[iparam_ij].veps;
         vsigt = vsigt*vsigt*vsigt*vsigt*vsigt*vsigt;
 
         if(params[iparam_ij].vdwflag>0) {  
           if(r <= rvdw[0][inty]) {
             vvdw[k][inty] = 0.0;
             vdvdw[k][inty] = 0.0;
           } 
           else if ( r > rvdw[0][inty] && r <= rvdw[1][inty]) {
             drcc = r-rvdw[0][inty];
             vvdw[k][inty] = drcc*drcc*(drcc*cc3[inty]+cc2[inty]);
             vdvdw[k][inty] = drcc*(3.0*drcc*cc3[inty]+2.0*cc2[inty]);
           } else {
             r12 = r6*r6;
             r13 = r6*r7;
             rf12 = 1.0/r12-1.0/vrc[11]+(r-vrc[0])*12.0/vrc[12];
             drf13= 12.0*(1.0/vrc[12]-1.0/r13);
             vvdw[k][inty] = vepst*(vsigt*vsigt*rf12-vsigt*rf6);
             vdvdw[k][inty] = vepst*(vsigt*vsigt*drf13-vsigt*drf7);
 	  }
 	} else {
           vvdw[k][inty]=0.0;
           vdvdw[k][inty]=0.0;
 	}
           r += dra; 
       }
     }
   }
 
   delete [] vrc;
   delete [] rrc;
   delete [] cc2;
   delete [] cc3;
   memory->destroy(rvdw);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairComb3::potal_calc(double &calc1, double &calc2, double &calc3)
 {
   double alf,rcoul,esucon;
   int m;
   
   rcoul = 0.0;
   for (m = 0; m < nparams; m++)
     if (params[m].lcut > rcoul) rcoul = params[m].lcut;
   
   alf = 0.20;
   esucon = force->qqr2e;
 
   calc2 = (erfc(rcoul*alf)/rcoul/rcoul+2.0*alf/MY_PIS*
 	   exp(-alf*alf*rcoul*rcoul)/rcoul)*esucon/rcoul;
   calc3 = (erfc(rcoul*alf)/rcoul)*esucon;
   calc1 = -(alf/MY_PIS*esucon+calc3*0.5);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairComb3::tri_point(double rsq, int &mr1, int &mr2, 
 		 int &mr3, double &sr1, double &sr2, double &sr3)
 {
   double r, rin, dr, dd, rr1, rridr, rridr2;
 
   rin = 0.1000; dr = 0.0010;
   r = sqrt(rsq);
   if (r < rin + 2.0*dr)    r = rin + 2.0*dr;
   if (r > cutmax - 2.0*dr) r = cutmax - 2.0*dr;
   rridr = (r-rin)/dr;
 
   mr1 = int(rridr) ;
   dd = rridr - float(mr1);
   if (dd > 0.5) mr1 += 1;
 
   rr1 = float(mr1)*dr;
   rridr = (r - rin - rr1)/dr;
   rridr2 = rridr * rridr;
 
   sr1 = (rridr2 - rridr) * 0.50;
   sr2 = 1.0 - rridr2;
   sr3 = (rridr2 + rridr) * 0.50;
 
   mr2 = mr1 + 1;
   mr3 = mr1 + 2;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairComb3::vdwaals(int inty, int mr1, int mr2, int mr3, double rsq,
 		      double sr1, double sr2, double sr3, 
 		      double &eng, double &fforce)
 {
   double r = sqrt(rsq);
 
   eng = 1.0*(sr1*vvdw[mr1-1][inty]+sr2*vvdw[mr2-1][inty]+sr3*vvdw[mr3-1][inty]);
   fforce = -1.0/r*(sr1*vdvdw[mr1-1][inty]+sr2*vdvdw[mr2-1][inty]+sr3*vdvdw[mr3-1][inty]);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairComb3::direct(Param *parami, Param *paramj, int mr1, 
 	int mr2, int mr3, double rsq, double sr1, double sr2, double sr3, 
 	double iq, double jq, double fac11, double fac11e, 
 	double &pot_tmp, double &for_tmp, int i, int j)
 {
   double r,erfcc,fafbnl,potij,esucon;
   double r3,erfcd,dfafbnl,smf2,dvdrr,alf,alfdpi;
   double afbn,afbj,sme1n,sme1j,sme1,sme2,dafbn, dafbj,smf1n,smf1j;
   double curli = parami->curl;
   double curlj = paramj->curl;
   int inti = parami->ielement;
   int intj = paramj->ielement;
   int inty = intype[inti][intj];
 
   double curlij0 = parami->curl0;
   double curlji0 = paramj->curl0;
   double curlij1,curlji1,dcurlij,dcurlji;
   double fcp1j,xcoij,xcoji;
   int icurl, jcurl;
   int ielegp = parami->ielementgp;
   int jelegp = paramj->ielementgp;
 
   r = sqrt(rsq);
   r3 = r * rsq;
   alf = 0.20;
   alfdpi = 2.0*alf/MY_PIS;
   esucon = force->qqr2e;
   pot_tmp = for_tmp = 0.0;
   icurl=jcurl=0;
 
   if(ielegp==2 && curli>curlij0) {
     icurl=1;
     curlij1=curli;
   }
   
   if(jelegp==2 && curlj>curlji0) {
     jcurl=1;
     curlji1=curlj;
   }
 
   if(icurl==1 || jcurl ==1) {
     xcoij = xcotmp[i];
     xcoji = xcotmp[j];
     fcp1j = comb_fc_d(r,parami);   
 
     if(icurl==1) {
       curli=curlij1+(curlij0-curlij1)*comb_fc_curl(xcoij,parami);
       dcurlij=fcp1j*(curlij0-curlij1)*comb_fc_curl_d(xcoij,parami);
     }
 
     if(jcurl==1) {
       curlj=curlji1+(curlji0-curlji1)*comb_fc_curl(xcoji,paramj);
       dcurlji=fcp1j*(curlji0-curlji1)*comb_fc_curl_d(xcoji,paramj);
     }
   }
 
   erfcc = sr1*erpaw[mr1][0] + sr2*erpaw[mr2][0] + sr3*erpaw[mr3][0];
   afbn = sr1*afb[mr1][inti] + sr2*afb[mr2][inti] + sr3*afb[mr3][inti];
   afbj = sr1*afb[mr1][intj] + sr2*afb[mr2][intj] + sr3*afb[mr3][intj];
   fafbnl= sr1*fafb[mr1][inty] + sr2*fafb[mr2][inty] + sr3*fafb[mr3][inty];
   potij = (erfcc/r * esucon - fac11e);
 
   sme1n = iq*curlj*(afbn-fafbnl)*esucon;
   sme1j = jq*curli*(afbj-fafbnl)*esucon;
   sme1 = sme1n + sme1j;
   sme2 = (potij + fafbnl * esucon) * iq * jq;
   pot_tmp = 1.0 * (sme1+sme2); 
 
   // 1/r force (wrt r)
 
   erfcd = sr1*erpaw[mr1][1] + sr2*erpaw[mr2][1] + sr3*erpaw[mr3][1];
   dafbn = sr1*dafb[mr1][inti] + sr2*dafb[mr2][inti] + sr3*dafb[mr3][inti];
   dafbj = sr1*dafb[mr1][intj] + sr2*dafb[mr2][intj] + sr3*dafb[mr3][intj];
   dfafbnl= sr1*dfafb[mr1][inty] + sr2*dfafb[mr2][inty] + sr3*dfafb[mr3][inty];
 
   dvdrr = (erfcc/r3+alfdpi*erfcd/rsq)*esucon-fac11;
   smf1n = iq * curlj * (dafbn-dfafbnl)*esucon/r;
   smf1j = jq * curli * (dafbj-dfafbnl)*esucon/r;
 
   if(jcurl==1 && ielegp == 3 && dcurlji != 0.0){
    smf1n += dcurlji*iq*(afbn-fafbnl)*esucon/r;
   }
   if(icurl==1 && jelegp == 3 && dcurlij != 0.0){
    smf1j += dcurlij*jq*(afbj-fafbnl)*esucon/r;
   }
 
   smf2 = dvdrr + dfafbnl * esucon/r;
   for_tmp =  1.0 * iq * jq * smf2 + smf1n + smf1j;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairComb3::field(Param *parami, Param *paramj, double rsq, double iq,
 		double jq, double &eng_tmp,double &for_tmp)
 {
   double r,r3,r4,r5,rc,rc2,rc3,rc4,rc5;
   double cmi1,cmi2,cmj1,cmj2,pcmi1,pcmi2;
   double rf3i,rcf3i,rf5i,rcf5i;
   double drf3i,drcf3i,drf5i,drcf5i;
   double rf3,rf5,drf4,drf6;
   double smpn,smpl,rfx1,rfx2;
 
   r = sqrt(rsq);
   r3 = r * r * r;
   r4 = r3 * r;
   r5 = r4 * r;
   rc = parami->lcut;
   rc2 = rc * rc; 
   rc3 = rc*rc*rc;
   rc4 = rc3 * rc;
   rc5 = rc4 * rc;
   cmi1 = parami->cmn1; 
   cmi2 = parami->cmn2;
   cmj1 = paramj->cmn1; 
   cmj2 = paramj->cmn2;
   pcmi1 = parami->pcmn1; 
   pcmi2 = parami->pcmn2;
 
   rf3i = r3/(pow(r3,2)+pow(pcmi1,3));
   rcf3i = rc3/(pow(rc3,2)+pow(pcmi1,3));
   rf5i = r5/(pow(r5,2)+pow(pcmi2,5));
   rcf5i = rc5/(pow(rc5,2)+pow(pcmi2,5));
 
   drf3i = 3/r*rf3i-6*rsq*rf3i*rf3i;
   drcf3i = 3/rc*rcf3i-6*rc2*rcf3i*rcf3i;
   drf5i = 5/r*rf5i-10*r4*rf5i*rf5i;
   drcf5i = 5/rc*rcf5i-10*rc4*rcf5i*rcf5i;
 
   rf3 = rf3i-rcf3i-(r-rc)*drcf3i;
   rf5 = rf5i-rcf5i-(r-rc)*drcf5i;
   drf4 = drf3i - drcf3i;
   drf6 = drf5i - drcf5i;
 
  // field correction energy
   smpn = jq*(cmi1*rf3+jq*cmi2*rf5);
   smpl = iq*(cmj1*rf3+iq*cmj2*rf5);
   eng_tmp = 1.0 * (smpn + smpl);
 
  // field correction force
   rfx1 = jq*(cmi1*drf4+jq*cmi2*drf6)/r;
   rfx2 = iq*(cmj1*drf4+iq*cmj2*drf6)/r;
   for_tmp -= 1.0 * (rfx1 + rfx2);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairComb3::rad_init(double rsq2,Param *param,int i,
 		double &radtot, double cnconj)
 {
   double r, fc1k, radcut;
 
   r = sqrt(rsq2);
   fc1k = comb_fc(r,param);
   radtot = -fc1k * param->pcross + cnconj;
   radcut = comb_fcch(radtot);
   return fc1k * param->pcross * radcut;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairComb3::rad_calc(double r, Param *parami, Param *paramj, 
 	double kconjug, double lconjug, int i, int j, double xcn, double ycn)
 {
   int ixmin, iymin, izmin;
   int radindx;
   double xrad, yrad, zcon, vrad, pradx, prady, pradz;
 
   vrad = pradx = prady = pradz = 0.0;
   xrad = -comb_fc(r,parami)*parami->pcross + xcn;
   yrad = -comb_fc(r,paramj)*paramj->pcross + ycn;
   zcon = 1.0 + pow(kconjug,2) + pow(lconjug,2);
 
   if(xrad < 0.0) xrad = 0.0;   
   if(yrad < 0.0) yrad = 0.0;
   if(zcon < 1.0) zcon = 1.0;
   if(xrad > maxxc) xrad = maxxc;   
   if(yrad > maxyc) yrad = maxyc;
   if(zcon > maxconj) zcon = maxconj;
   ixmin = int(xrad+1.0e-12);
   iymin = int(yrad+1.0e-12);
   izmin = int(zcon+1.0e-12);
   radindx=parami->rad_flag-1; 
   if (fabs(float(ixmin)-xrad)>1.0e-8 ||
       fabs(float(iymin)-yrad)>1.0e-8 ||
       fabs(float(izmin)-zcon)>1.0e-8) {
     rad_int(radindx,xrad,yrad,zcon,ixmin,iymin,izmin,
 	      vrad,pradx,prady,pradz);
   } else {
     vrad  = rad_grid[radindx][ixmin][iymin][izmin-1];
     pradx = rad_gridx[radindx][ixmin][iymin][izmin-1];
     prady = rad_gridy[radindx][ixmin][iymin][izmin-1];
     pradz = rad_gridz[radindx][ixmin][iymin][izmin-1];
   }
 
   brad[0] = vrad;
   brad[1] = pradx;
   brad[2] = prady;
   brad[3] = pradz;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairComb3::rad_int(int radindx,double xrad, double yrad, double zcon, int l, 
 		int m, int n, double &vrad, double &pradx, double &prady, 
 		double &pradz)
 {
   int j;
   double x;
   vrad = pradx = prady = pradz = 0.0; 
   if(l >= maxxc-1) { l=maxxc-1;}  
   if(m >= maxyc-1) { m=maxyc-1; }
   if(n >= maxconj-1) { n=maxconj-1;}
 
   for (j=0; j<64; j++) {
     x = rad_spl[radindx][l][m][n-1][j] * pow(xrad,iin3[j][0])
 	  * pow(yrad,iin3[j][1]) * pow(zcon,iin3[j][2]);
     vrad  += x;
     if(xrad > 1.0e-8) pradx += x*iin3[j][0]/xrad; 
     if(yrad > 1.0e-8) prady += x*iin3[j][1]/yrad;
     if(zcon > 1.0e-8) pradz += x*iin3[j][2]/zcon;
   }
 }
 
 
 /* ---------------------------------------------------------------------- */
 void PairComb3::rad_forceik(Param *paramk, double rsq2, double *delrk,
         double conjug, double radtot)
 {
   int nm;
   double  rik, fc1k, fcp1k;
   double pradk, ffkk2, fktmp[3];
   double radcut = comb_fcch(radtot);
   double dradcut = comb_fcch_d(radtot);
 
   for (nm=0; nm<3; nm++) {
     fi_rad[nm] =  fk_rad[nm] = 0.0;
   }
     radtmp =0.0;
 
   rik = sqrt(rsq2);
 
   fc1k = comb_fc(rik, paramk);
   fcp1k = comb_fc_d(rik,paramk);
 
   pradk = brad[3]*fcp1k*radcut*paramk->pcross*2.0*conjug;
   radtmp= brad[3]*fc1k*dradcut*paramk->pcross*2.0*conjug;
 
   ffkk2 = -pradk/rik;
 
   for (nm=0; nm<3; nm++) {
     fktmp[nm] = - ffkk2 * delrk[nm];
   }
 
   for (nm=0; nm<3; nm++) {
     fi_rad[nm] =  fktmp[nm]; 
     fk_rad[nm] = -fktmp[nm];     
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairComb3::rad_force(Param *paramm, double rsq3, 
 	double *delrm, double dpradk)
 {
   int nm;
   double rkm, fcp1m;
   double comkm, ffmm2, fkm[3];
 
   for (nm=0; nm<3; nm++) {
     fj_rad[nm] = fk_rad[nm] = 0.0;
     fkm[nm]=0.0;
   }
 
   rkm = sqrt(rsq3);
 
   fcp1m = comb_fc_d(rkm, paramm);
 
   comkm = dpradk * fcp1m *  paramm->pcross; 
   ffmm2 = -comkm/rkm;
 
   for (nm=0; nm<3; nm++) {
     fkm[nm] = -ffmm2 * delrm[nm];
   }
 
   for (nm=0; nm<3; nm++) {
     fj_rad[nm] =  fkm[nm];
     fk_rad[nm] = -fkm[nm];
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairComb3::bbtor1(int torindx, Param *paramk, Param *paraml, 
         double rsq1, double rsq2, double rsq3, double *delrj, 
         double *delrk, double *delrl, double srmu)
 {
   double rmul, rij, rik, rjl;
 
   rij = sqrt(rsq1);
   rik = sqrt(rsq2);
   rjl = sqrt(rsq3);
 
   vec3_scale(-1.0,delrl,delrl);
   rmul = vec3_dot(delrj,delrl)/(rij*rjl);
   vec3_scale(-1.0,delrl,delrl);
   rmul = sqrt(1.0-rmul*rmul);
 
   if(rmul > 0.1 ) {
     double fc1k, fc1l, TT1, TT2, rmut, btt, tork[3], torl[3];
 
     fc1k = comb_fc(rik,paramk); 
     fc1l = comb_fc(rjl,paraml);
 
     TT1 = rik*rjl*rij*rij*srmu*rmul;
     tork[0] = delrk[1]*delrj[2] - delrk[2]*delrj[1];
     torl[0] = delrj[1]*delrl[2] - delrj[2]*delrl[1];
     tork[1] = delrk[2]*delrj[0] - delrk[0]*delrj[2];
     torl[1] = delrj[2]*delrl[0] - delrj[0]*delrl[2];
     tork[2] = delrk[0]*delrj[1] - delrk[1]*delrj[0];
     torl[2] = delrj[0]*delrl[1] - delrj[1]*delrl[0];
     TT2 = vec3_dot(tork,torl);
     rmut = pow((TT2/TT1),2);
     if(torindx>=1) { 
       btt = 1.0 - rmut;
       return btt * fc1k * fc1l; 
     }
     else {
       btt=paramk->ptork1-TT2/TT1; 
       btt=paramk->ptork2*pow(btt,2); 
       return btt * fc1k * fc1l; 
     }
              
   } else {
     return 0.0;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairComb3::tor_calc(double r, Param *parami, Param *paramj, 
 	double kconjug, double lconjug, int i, int j, double xcn, double ycn)
 {
   int ixmin, iymin, izmin;
   double vtor, dtorx, dtory, dtorz;
   double xtor, ytor, zcon;
   int torindx;
 
   vtor = dtorx = dtory = dtorz = 0.0;
   torindx=parami->tor_flag;
 
   if(torindx<0){
     vtor=1.0;
     dtorx=0.0;
     dtory=0.0;
     dtorz=0.0;
   } else {
     xtor = -comb_fc(r, parami) * parami->pcross + xcn;
     ytor = -comb_fc(r, paramj) * paramj->pcross + ycn;
     zcon = 1.0 + pow(kconjug,2) + pow(lconjug,2);
     if (xtor < 0.0) xtor = 0.0;
     if (ytor < 0.0) ytor = 0.0;
     if (zcon < 1.0) zcon = 1.0;
     if (xtor > maxxc) xtor = maxxc;
     if (ytor > maxyc) ytor = maxyc;
     if (zcon > maxconj) zcon = maxconj;
 
     ixmin = int(xtor+1.0e-12);
     iymin = int(ytor+1.0e-12);
     izmin = int(zcon+1.0e-12);
 
     torindx=torindx-1; 
 
     if (fabs(float(ixmin)-xtor)>1.0e-8 ||
       fabs(float(iymin)-ytor)>1.0e-8 ||
       fabs(float(izmin)-zcon)>1.0e-8) {
       tor_int(torindx,xtor,ytor,zcon,ixmin,iymin,izmin,
               vtor,dtorx,dtory,dtorz);
     } else {
       vtor  = tor_grid[torindx][ixmin][iymin][izmin-1];
       dtorx = tor_gridx[torindx][ixmin][iymin][izmin-1];
       dtory = tor_gridy[torindx][ixmin][iymin][izmin-1];
       dtorz = tor_gridz[torindx][ixmin][iymin][izmin-1];
     }
   }
 
   btor[0] = vtor;
   btor[1] = dtorx;
   btor[2] = dtory;
   btor[3] = dtorz;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairComb3::tor_int(int torindx,double xtor, double ytor, double zcon, int l,
 	int m, int n, double &vtor, double &dtorx, double &dtory, double &dtorz)
 {
   int j;
   double x;
 
   vtor = dtorx = dtory = dtorz = 0.0;
   if(l >= maxxc-1) { l=maxxc-1; }  //boundary condition changed
   if(m >= maxyc-1) { m=maxyc-1; }
   if(n >= maxconj-1) { n=maxconj-1; }
 
   for (j=0; j<64; j++) {
     x = tor_spl[torindx][l][m][n-1][j] * pow(xtor,iin3[j][0])
 	  * pow(ytor,iin3[j][1]) * pow(zcon,iin3[j][2]);
     vtor += x;
 
   if(xtor > 1.0e-8 ) dtorx += x*iin3[j][0]/xtor;  
   if(ytor > 1.0e-8 ) dtory += x*iin3[j][1]/ytor;
   if(zcon > 1.0e-8 ) dtorz += x*iin3[j][2]/zcon;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairComb3::tor_force(int torindx, Param *paramk, Param *paraml, 
         double srmu, double rsq1,double rsq2, double rsq3,
         double *delrj, double *delrk, double *delrl)
 {
   int nm;
   double rmu, rmul, srmul, rij, rik, rjl;
 
   for (nm=0; nm<3; nm++) {
     fi_tor[nm] = fj_tor[nm] = fk_tor[nm] = fl_tor[nm] = 0.0;
   }
 
   rij = sqrt(rsq1);
   rik = sqrt(rsq2);
   rjl = sqrt(rsq3);
 
   rmu = vec3_dot(delrj,delrk)/(rij*rik);
   vec3_scale(-1.0,delrl,delrl);
   rmul = vec3_dot(delrj,delrl)/(rij*rjl);
   vec3_scale(-1.0,delrl,delrl);
   srmul = sqrt(1.0-rmul*rmul);
   if(acos(rmul) > MY_PI) srmul = -srmul;
 
   if(srmul > 0.1 ) {
     double fc1k, fcp1k, fc1l, fcp1l, srmul2, dt1dik, dt1djl;
     double TT1, TT2, rmut, btt, tork[3], torl[3];
     double dt2dik[3], dt2djl[3], dt2dij[3], AA, AA2;
     double tfij[4], tfik[2], tfjl[2], tjx[3], tjy[3], tjz[3];
     double tkx[2], tky[2], tkz[2], tlx[2], tly[2], tlz[2];
 
     fc1k  = comb_fc(rik,paramk);
     fcp1k = comb_fc_d(rik,paramk);
     fc1l  = comb_fc(rjl,paraml);
     fcp1l = comb_fc_d(rjl,paraml);
     srmul2 = pow(srmul,2);
 
     TT1 = rik*rjl*rij*rij*srmu*srmul;
     dt1dik = -rmu/pow(srmu,2);
     dt1djl = -rmul/srmul2;
     tork[0] = delrk[1]*delrj[2] - delrk[2]*delrj[1];
     torl[0] = delrj[1]*delrl[2] - delrj[2]*delrl[1];
     tork[1] = delrk[2]*delrj[0] - delrk[0]*delrj[2];
     torl[1] = delrj[2]*delrl[0] - delrj[0]*delrl[2];
     tork[2] = delrk[0]*delrj[1] - delrk[1]*delrj[0];
     torl[2] = delrj[0]*delrl[1] - delrj[1]*delrl[0];
     TT2 = vec3_dot(tork,torl);
 
     dt2dik[0] = -delrj[1]*torl[2] + delrj[2]*torl[1];
     dt2dik[1] = -delrj[2]*torl[0] + delrj[0]*torl[2];
     dt2dik[2] = -delrj[0]*torl[1] + delrj[1]*torl[0];
     dt2djl[0] =  delrj[1]*tork[2] - delrj[2]*tork[1];
     dt2djl[1] =  delrj[2]*tork[0] - delrj[0]*tork[2];
     dt2djl[2] =  delrj[0]*tork[1] - delrj[1]*tork[0];
     dt2dij[0] = -delrk[2]*torl[1] + delrl[2]*tork[1] 
 	       + delrk[1]*torl[2] - delrl[1]*tork[2];
     dt2dij[1] = -delrk[0]*torl[2] + delrl[0]*tork[2] 
 	       + delrk[2]*torl[0] - delrl[2]*tork[0];
     dt2dij[2] = -delrk[1]*torl[0] + delrl[1]*tork[0] 
 	       + delrk[0]*torl[1] - delrl[0]*tork[1];
 
     rmut = TT2/TT1;
 
     if(torindx>=1) {
         btt = 1.0 - pow(rmut,2);
         AA = -2.0 * ptorr * rmut * fc1k * fc1l / TT1;
     }
     else {
         btt=paramk->ptork1-rmut;
         btt=paramk->ptork2*pow(btt,2);
         AA = -2.0 * ptorr * paramk->ptork2 *
           (paramk->ptork1-rmut) * fc1k * fc1l /TT1;              
    }
 
     AA2 = AA * TT2;
     tfij[0] = -(dt1dik*AA2)/rij/rik;
     tfij[1] = AA2/rij/rij - dt1dik*AA2*rmu/rij/rij;
     tfij[2] = -dt1djl*AA2/rij/rjl;
     tfij[3] = AA2/rij/rij - dt1djl*AA2*rmul/rij/rij;
     tfik[0] = tfij[0];
     tfik[1] = (AA2/rik - btt*ptorr*fc1l*fcp1k)/rik - 
 	    dt1dik*AA2*rmu/rik/rik;
     tfjl[0] = tfij[2];
     tfjl[1] = (AA2/rjl - btt*ptorr*fc1k*fcp1l)/rjl - 
 	    dt1djl*AA2*rmul/rjl/rjl;
 
     tjx[0] = tfij[0]*delrk[0] - tfij[1]*delrj[0];
     tjy[0] = tfij[0]*delrk[1] - tfij[1]*delrj[1];
     tjz[0] = tfij[0]*delrk[2] - tfij[1]*delrj[2];
     tjx[1] = -tfij[2]*delrl[0] - tfij[3]*delrj[0];
     tjy[1] = -tfij[2]*delrl[1] - tfij[3]*delrj[1];
     tjz[1] = -tfij[2]*delrl[2] - tfij[3]*delrj[2];
     tjx[2] = -dt2dij[0] * AA;
     tjy[2] = -dt2dij[1] * AA;
     tjz[2] = -dt2dij[2] * AA;
 
     tkx[0] = tfik[0]*delrj[0] - tfik[1]*delrk[0];
     tky[0] = tfik[0]*delrj[1] - tfik[1]*delrk[1];
     tkz[0] = tfik[0]*delrj[2] - tfik[1]*delrk[2];
     tkx[1] = -dt2dik[0] * AA;
     tky[1] = -dt2dik[1] * AA;
     tkz[1] = -dt2dik[2] * AA;
 
     tlx[0] = -tfjl[0]*delrj[0] - tfjl[1]*delrl[0];
     tly[0] = -tfjl[0]*delrj[1] - tfjl[1]*delrl[1];
     tlz[0] = -tfjl[0]*delrj[2] - tfjl[1]*delrl[2];
     tlx[1] = -dt2djl[0] * AA;
     tly[1] = -dt2djl[1] * AA;
     tlz[1] = -dt2djl[2] * AA;
 
     fi_tor[0] = tjx[0]+tjx[1]+tjx[2]+tkx[0]+tkx[1];
     fi_tor[1] = tjy[0]+tjy[1]+tjy[2]+tky[0]+tky[1];
     fi_tor[2] = tjz[0]+tjz[1]+tjz[2]+tkz[0]+tkz[1];
 
     fj_tor[0] = -tjx[0]-tjx[1]-tjx[2]+tlx[0]+tlx[1];
     fj_tor[1] = -tjy[0]-tjy[1]-tjy[2]+tly[0]+tly[1];
     fj_tor[2] = -tjz[0]-tjz[1]-tjz[2]+tlz[0]+tlz[1];
 
     fk_tor[0] = -tkx[0]-tkx[1];
     fk_tor[1] = -tky[0]-tky[1];
     fk_tor[2] = -tkz[0]-tkz[1];
 
     fl_tor[0] = -tlx[0]-tlx[1];
     fl_tor[1] = -tly[0]-tly[1];
     fl_tor[2] = -tlz[0]-tlz[1];
 
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairComb3::combqeq(double *qf_fix, int &igroup)
 {
   int i,j,ii, jj,itype,jtype,jnum;
   int iparam_i,iparam_ji,iparam_ij;
   int *ilist,*jlist,*numneigh,**firstneigh;
   int mr1,mr2,mr3,inty,nj;
   double xtmp,ytmp,ztmp,rsq1,delrj[3];
   double iq,jq,fqi,fqij,fqji,sr1,sr2,sr3;
   double potal,fac11,fac11e;
   int sht_jnum,*sht_jlist;
   tagint itag, jtag;
   
   double **x = atom->x;
   double *q = atom->q;
   tagint *tag = atom->tag;
   int *type = atom->type;
   int inum = list->inum;
   int *mask = atom->mask;
   int groupbit = group->bitmask[igroup];
 
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   qf = qf_fix;
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     if (mask[i] & groupbit)
       qf[i] = 0.0;
       dpl[i][0] = dpl[i][1] = dpl[i][2] = 0.0;
   }
   // communicating charge force to all nodes, first forward then reverse
 
   pack_flag = 1;
   comm->forward_comm_pair(this);
 
   // self energy correction term: potal
 
   potal_calc(potal,fac11,fac11e);
 
   // loop over full neighbor list of my atoms
 
   fqi = fqij = fqji = 0.0;
 
   for (ii = 0; ii < inum; ii ++) {
     i = ilist[ii];
     itag = tag[i];
     nj = 0;
     if (mask[i] & groupbit) {
       itype = map[type[i]];
       xtmp = x[i][0];
       ytmp = x[i][1];
       ztmp = x[i][2];
       iq = q[i];
       iparam_i = elem2param[itype][itype][itype];
 
       // charge force from self energy
       fqi = qfo_self(&params[iparam_i],iq);
 
       jlist = firstneigh[i];
       jnum = numneigh[i];
 
       sht_jlist = sht_first[i];
       sht_jnum = sht_num[i];
 	  
       // two-body interactions
 
       for (jj = 0; jj < jnum; jj++) {
         j = jlist[jj] & NEIGHMASK;
 
 	jtag = tag[j];
         if (itag >= jtag) continue;
 
         jtype = map[type[j]];
         inty = intype[itype][jtype];
         jq = q[j];
 
         delrj[0] = xtmp - x[j][0];
         delrj[1] = ytmp - x[j][1];
         delrj[2] = ztmp - x[j][2];
         rsq1 = vec3_dot(delrj,delrj);
 
         iparam_ij = elem2param[itype][jtype][jtype];
         iparam_ji = elem2param[jtype][itype][itype];
 
         // long range q-dependent
          	
         if (rsq1 > params[iparam_ij].lcutsq) continue;
 
         // polynomial three-point interpolation
         tri_point(rsq1,mr1,mr2,mr3,sr1,sr2,sr3);
 
         // 1/r charge forces
         qfo_direct(&params[iparam_ij],&params[iparam_ji],
 		mr1,mr2,mr3,rsq1,sr1,sr2,sr3,fac11e,fqij,fqji,
 		iq,jq,i,j);
   
         fqi += fqij;  qf[j] += fqji;
 
        // field correction to self energy and charge force
         qfo_field(&params[iparam_ij],&params[iparam_ji],rsq1,
 		iq,jq,fqij,fqji);
 
         fqi += fqij;  qf[j] += fqji;
 
         // polarization field charge force
 	if (pol_flag) {
 	  qfo_dipole(fac11,mr1,mr2,mr3,inty,rsq1,delrj,sr1,sr2,sr3,
 		fqij,fqji,i,j);
 
           fqi += fqij;  qf[j] += fqji;
 	}
       }
 
       for (jj = 0; jj < sht_jnum; jj++) {
         j = sht_jlist[jj];
 
 	jtag = tag[j];
         if (itag >= jtag) continue;
 
         jtype = map[type[j]];
         inty = intype[itype][jtype];
         jq = q[j];
 
         delrj[0] = xtmp - x[j][0];
         delrj[1] = ytmp - x[j][1];
         delrj[2] = ztmp - x[j][2];
         rsq1 = vec3_dot(delrj,delrj);
 
         iparam_ij = elem2param[itype][jtype][jtype];
         iparam_ji = elem2param[jtype][itype][itype];
 
         if (rsq1 >= params[iparam_ij].cutsq) continue;
 	nj ++;
 
         // charge force in Aij and Bij
          qfo_short(&params[iparam_ij],&params[iparam_ji],
 		rsq1,iq,jq,fqij,fqji,i,j,nj);
         
         fqi += fqij;  qf[j] += fqji;
       }
       qf[i] += fqi;
     }	
   }	
   
   comm->reverse_comm_pair(this);
 
   // sum charge force on each node and return it
   
   double eneg = 0.0;
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     if (mask[i] & groupbit){
       eneg += qf[i];
 	  itag=tag[i];
     }
   }
 
   double enegtot;
   MPI_Allreduce(&eneg,&enegtot,1,MPI_DOUBLE,MPI_SUM,world);
   MPI_Bcast(&enegtot,1,MPI_DOUBLE,0,world);
   return enegtot;
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairComb3::qfo_self(Param *param, double qi)
 {
   double self_d,cmin,cmax,qmin,qmax;
   double s1 = param->chi;
   double s2 = param->dj;
   double s3 = param->dk;
   double s4 = param->dl;
 
   self_d = 0.0; 
 
   qmin = param->qmin;
   qmax = param->qmax;
   cmin = cmax = 100.0;
   self_d = s1+qi*(2.0*s2+qi*(3.0*s3+qi*4.0*s4));
  
   if (qi < qmin) self_d += 4.0 * cmin * pow((qi-qmin),3);
   if (qi > qmax) self_d += 4.0 * cmax * pow((qi-qmax),3);
 
   return self_d;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairComb3::qfo_direct(Param *parami, Param *paramj, int mr1, 
 	  int mr2, int mr3, double rsq, double sr1, double sr2, 
 	  double sr3, double fac11e, double &fqij, double &fqji,
 	  double iq, double jq, int i, int j)
 {
   double r, erfcc, fafbnl, vm, vmfafb, esucon;
   double afbn, afbj, sme1n, sme1j;
   double curli = parami->curl;
   double curlj = paramj->curl;
   int inti = parami->ielement;
   int intj = paramj->ielement;
   int inty = intype[inti][intj];
 
   double curlij0 = parami->curl0;
   double curlji0 = paramj->curl0;
   double curlij1,curlji1;
   int icurl, jcurl;
   int ielegp = parami->ielementgp;
   int jelegp = paramj->ielementgp;
 
   r = sqrt(rsq);
   esucon=force->qqr2e;
 
   icurl = jcurl = 0;
   if(ielegp==2 && curli>curlij0) {
     icurl=1;
     curlij1=curli;
   }
 
   if(jelegp==2 && curlj>curlji0) {
     jcurl=1;
     curlji1=curlj;
   }
   if(icurl==1 || jcurl ==1) {
     double xcoij= xcotmp[i];
     double xcoji= xcotmp[j];
    
     if(icurl==1) {
       curli=curlij1+(curlij0-curlij1)*comb_fc_curl(xcoij,parami);
     }
     if(jcurl==1) {
       curlj=curlji1+(curlji0-curlji1)*comb_fc_curl(xcoji,paramj);
     }
   }
 
   // 1/r force (wrt q)
 
   erfcc = sr1*erpaw[mr1][0]   + sr2*erpaw[mr2][0]   + sr3*erpaw[mr3][0];
   fafbnl= sr1*fafb[mr1][inty] + sr2*fafb[mr2][inty] + sr3*fafb[mr3][inty];
   afbn  = sr1*afb[mr1][inti]  + sr2*afb[mr2][inti]  + sr3*afb[mr3][inti];
   afbj  = sr1*afb[mr1][intj]  + sr2*afb[mr2][intj]  + sr3*afb[mr3][intj];
   vm = (erfcc/r * esucon - fac11e);
   vmfafb = vm + esucon * fafbnl;
   sme1n = curlj * (afbn - fafbnl) * esucon;
   sme1j = curli * (afbj - fafbnl) * esucon;
   fqij = 1.0 * (jq * vmfafb + sme1n);
   fqji = 1.0 * (iq * vmfafb + sme1j);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairComb3::qfo_field(Param *parami, Param *paramj, double rsq,
 	double iq,double jq, double &fqij, double &fqji)
 {
   double r,r3,r5,rc,rc2,rc3,rc4,rc5;
   double cmi1,cmi2,cmj1,cmj2,pcmi1,pcmi2;
   double rf3i,rcf3i,rf5i,rcf5i;
   double drcf3i,drcf5i,rf3,rf5;
 
   r  = sqrt(rsq);
   r3 = r * rsq;
   r5 = r3 * rsq;
   rc = parami->lcut;
   rc2=  rc*rc;
   rc3 = rc*rc*rc;
   rc4 = rc3 * rc;
   rc5 = rc4 * rc;
   cmi1 = parami->cmn1;
   cmi2 = parami->cmn2;
   cmj1 = paramj->cmn1;
   cmj2 = paramj->cmn2;
   pcmi1 = parami->pcmn1;
   pcmi2 = parami->pcmn2;
 
   rf3i = r3/(pow(r3,2)+pow(pcmi1,3));
   rcf3i = rc3/(pow(rc3,2)+pow(pcmi1,3));
   rf5i = r5/(pow(r5,2)+pow(pcmi2,5));
   rcf5i = rc5/(pow(rc5,2)+pow(pcmi2,5));
 
   drcf3i = 3/rc*rcf3i-6*rc2*rcf3i*rcf3i;
   drcf5i = 5/rc*rcf5i-10*rc4*rcf5i*rcf5i;
 
   rf3 = rf3i-rcf3i-(r-rc)*drcf3i;
   rf5 = rf5i-rcf5i-(r-rc)*drcf5i;
 
   // field correction charge force
   fqij = 1.0 * cmj1*rf3+2.0*iq*cmj2*rf5;
   fqji = 1.0 * cmi1*rf3+2.0*jq*cmi2*rf5;
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairComb3::qfo_dipole(double fac11, int mr1, int mr2, int mr3,
 	int inty, double rsq, double *delrj, double sr1, double sr2, 
 	double sr3, double &fqij, double &fqji, int i, int j)
 {
   double erfcc, erfcd, dvdrr, dfafbnl, smf2;
   double r, r3, alfdpi, esucon;
 
   r = sqrt(rsq);
   r3 = r * rsq;
   alfdpi = 0.4/MY_PIS;
   esucon = force->qqr2e;
 
   erfcc = sr1*erpaw[mr1][0] + sr2*erpaw[mr2][0] + sr3*erpaw[mr3][0];
   erfcd = sr1*erpaw[mr1][1] + sr2*erpaw[mr2][1] + sr3*erpaw[mr3][1];
   dvdrr = (erfcc/r3+alfdpi*erfcd/rsq)*esucon-fac11;
   dfafbnl= sr1*dfafb[mr1][inty] + sr2*dfafb[mr2][inty] + sr3*dfafb[mr3][inty];
   smf2 = (dvdrr + dfafbnl*esucon)/r;
 
   fqij = dpl[i][0]*delrj[0] + dpl[i][1]*delrj[1] +dpl[i][2]*delrj[2];
   fqji = dpl[j][0]*delrj[0] + dpl[j][1]*delrj[1] +dpl[j][2]*delrj[2];
   fqij *= smf2;
   fqji *= smf2;
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairComb3::qfo_short(Param *parami, Param *paramj, double rsq,
 	double iq, double jq, double &fqij, double &fqji,
 	int i, int j, int nj)
 {
   double r, tmp_fc;
   double Di, Dj, dDi, dDj, Bsi, Bsj, dBsi, dBsj;
   double QUchi, QOchi, QUchj, QOchj;
   double bij, caj, cbj, caqpn, caqpj, cbqpn, cbqpj;
   double LamDiLamDj, AlfDiAlfDj;
   double rlm1 = parami->lambda;
   double alfij1= parami->alpha1;
   double alfij2= parami->alpha2;
   double alfij3= parami->alpha3;
   double pbij1= parami->bigB1;
   double pbij2= parami->bigB2;
   double pbij3= parami->bigB3;
 
   caj = cbj = caqpn = caqpj = cbqpn = cbqpj = 0.0;
   r = sqrt(rsq);
   tmp_fc = comb_fc(r,parami);
   bij = bbij[i][nj];
 
   QUchi = (parami->QU - iq) * parami->bD;
   QUchj = (paramj->QU - jq) * paramj->bD;
   QOchi = (iq - parami->Qo) * parami->bB;
   QOchj = (jq - paramj->Qo) * paramj->bB;
 
   if (iq < parami->QL-0.2) {
     iq = parami->QL-0.2;
     Di = parami->DL;
     dDi = Bsi = dBsi = 0.0;
   } else if (iq > parami->QU+0.2) {
     iq = parami->QU+0.2;
     Di = parami->DU;
     dDi = Bsi = dBsi = 0.0;
   } else {
     Di = parami->DU + pow(QUchi,parami->nD);				// YYDin
     dDi = -parami->nD * parami->bD * pow(QUchi,(parami->nD-1.0));	// YYDiqp
     Bsi = parami->aB - pow(QOchi,10);					// YYBsin
     dBsi = -parami->bB * 10.0 * pow(QOchi,9.0);				// YYBsiqp
   }
     
   if (jq < paramj->QL-0.2) {
     jq = paramj->QL-0.2;
     Dj = paramj->DL;
     dDj = Bsj = dBsj = 0.0;
   } else if (jq > paramj->QU+0.2) {
     jq = paramj->QU+0.2;
     Dj = paramj->DU;
     dDj = Bsj = dBsj = 0.0;
   } else {
     Dj = paramj->DU + pow(QUchj,paramj->nD);				// YYDij
     dDj = -paramj->nD * paramj->bD * pow(QUchj,(paramj->nD-1.0));	// YYDiqpj
     Bsj = paramj->aB - pow(QOchj,10);					// YYBsij
     dBsj = -paramj->bB * 10.0 * pow(QOchj,9.0);				// YYBsiqpj
   }
     
   LamDiLamDj = exp(0.5*(parami->lami*Di+paramj->lami*Dj)-rlm1*r);
   caj = 0.5 * tmp_fc * parami->bigA * LamDiLamDj;
 
   if (Bsi*Bsj > 0.0) {
     AlfDiAlfDj = exp(0.5*(parami->alfi*Di+paramj->alfi*Dj));
     cbj=-0.5*tmp_fc*bij*sqrt(Bsi*Bsj)*AlfDiAlfDj*
                 (pbij1*exp(-alfij1*r)+pbij2*exp(-alfij2*r)+pbij3*exp(-alfij3*r));
     cbqpn = cbj * (parami->alfi * dDi + dBsi/Bsi);
     cbqpj = cbj * (paramj->alfi * dDj + dBsj/Bsj);
   } else {
     cbj = cbqpn = cbqpj = 0.0;
   }
 
   caqpn = caj * parami->lami * dDi;
   caqpj = caj * paramj->lami * dDj;
 
   fqij = 1.0 * (caqpn + cbqpn);
   fqji = 1.0 * (caqpj + cbqpj);
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairComb3::dipole_init(Param *parami, Param *paramj, double fac11,
 	double *delrj, double rsq, int mr1, int mr2, int mr3, double sr1, 
 	double sr2, double sr3, double iq, double jq, int i, int j)
 {
   double erfcc, erfcd, dvdrr, dfafbnl, smf2, phinn, phinj, efn, efj;
   double r, r3, alfdpi, esucon;
   double rcd, rct, tmurn, tmurj, poln[3], polj[3], Qext[3];
   int nm;
   int inti = parami->ielement;
   int intj = paramj->ielement;
   int inty = intype[inti][intj];
   
   for(nm=0; nm<3; nm++) Qext[nm] = 0.0;
 
   r = sqrt(rsq);
   r3 = r * rsq;
   rcd = 1.0/(r3);
   rct = 3.0*rcd/rsq;
   alfdpi = 0.4/MY_PIS;
   esucon = force->qqr2e;
 
   erfcc = sr1*erpaw[mr1][0] + sr2*erpaw[mr2][0] + sr3*erpaw[mr3][0];
   erfcd = sr1*erpaw[mr1][1] + sr2*erpaw[mr2][1] + sr3*erpaw[mr3][1];
   dvdrr = (erfcc/r3+alfdpi*erfcd/rsq)*esucon-fac11;
   dfafbnl= sr1*dfafb[mr1][inty] + sr2*dfafb[mr2][inty] + sr3*dfafb[mr3][inty];
   smf2 = dvdrr/esucon + dfafbnl/r;
   phinn = sr1*phin[mr1][inti] + sr2*phin[mr2][inti] + sr3*phin[mr3][inti];
   phinj = sr1*phin[mr1][intj] + sr2*phin[mr2][intj] + sr3*phin[mr3][intj];
   efn = jq * smf2;
   efj = iq * smf2;
 
   tmurn = dpl[i][0]*delrj[0] + dpl[i][1]*delrj[1] + dpl[i][2]*delrj[2];
   tmurj = dpl[j][0]*delrj[0] + dpl[j][1]*delrj[1] + dpl[j][2]*delrj[2];
 
   for (nm=0; nm<3; nm++) {
     poln[nm] = (tmurj*delrj[nm]*rct - dpl[j][nm]*rcd)*phinj;
     polj[nm] = (tmurn*delrj[nm]*rct - dpl[i][nm]*rcd)*phinn;
   }
 
   for (nm=0; nm<3; nm++) {
     dpl[i][nm] += (Qext[nm]/esucon + delrj[nm]*efn + poln[nm])*parami->polz*0.50;
     dpl[j][nm] += (Qext[nm]/esucon - delrj[nm]*efj + polj[nm])*paramj->polz*0.50;
   } 
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairComb3::dipole_self(Param *parami, int i)
 {
   double esucon = force->qqr2e;
   double apn = parami->polz;
   double selfdpV = 0.0;
 
   if (apn != 0.0) {
       selfdpV= (dpl[i][0]*dpl[i][0]+dpl[i][1]*dpl[i][1]+dpl[i][2]*dpl[i][2])
                 *esucon/(2.0*apn); }
   return selfdpV;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairComb3::dipole_calc(Param *parami, Param *paramj, double fac11, 
 	double delx, double dely, double delz, double rsq,
 	int mr1, int mr2, int mr3, double sr1, double sr2, double sr3, 
 	double iq, double jq, int i, int j, double &vionij, 
 	double &fvionij, double *ddprx)
 {
   double erfcc, erfcd, dvdrr, dfafbnl, ef, phinn, phinj, efn, efj;
   double r, r3, alf, alfdpi, esucon, dphinn, dphinj, ddfafbnl;
   double def, defn, defj, tmun, tmuj, emuTmu, edqn, edqj, ddvdrr;
   double rcd, rct, tmurn, tmurj, tmumu, poln[3], polj[3], delr1[3];
   double demuTmu, ddpr, dcoef;
   int nm;
   int inti = parami->ielement;
   int intj = paramj->ielement;
   int inty = intype[inti][intj];
   
   r = sqrt(rsq);
   r3 = r * rsq;
   esucon = force->qqr2e;
   rcd = esucon/r3;
   rct = 3.0*rcd/rsq;
   alf = 0.2;
   alfdpi = 2.0*alf/MY_PIS;
   delr1[0] = delx;
   delr1[1] = dely;
   delr1[2] = delz;
 
   // generate energy & force information from tables
   erfcc = sr1*erpaw[mr1][0] + sr2*erpaw[mr2][0] + sr3*erpaw[mr3][0];
   erfcd = sr1*erpaw[mr1][1] + sr2*erpaw[mr2][1] + sr3*erpaw[mr3][1];
   dvdrr = (erfcc/r3+alfdpi*erfcd/rsq)*esucon-fac11;
   ddvdrr = (2.0*erfcc/r3 + 2.0*alfdpi*erfcd*(1.0/rsq+alf*alf))*esucon;
   dfafbnl= sr1*dfafb[mr1][inty] + sr2*dfafb[mr2][inty] + sr3*dfafb[mr3][inty];
   phinn = sr1*phin[mr1][inti] + sr2*phin[mr2][inti] + sr3*phin[mr3][inti];
   phinj = sr1*phin[mr1][intj] + sr2*phin[mr2][intj] + sr3*phin[mr3][intj];
   dphinn = sr1*dphin[mr1][inti] + sr2*dphin[mr2][inti] + sr3*dphin[mr3][inti];
   dphinj = sr1*dphin[mr1][intj] + sr2*dphin[mr2][intj] + sr3*dphin[mr3][intj];
   ddfafbnl= sr1*ddfafb[mr1][inty] + sr2*ddfafb[mr2][inty] + sr3*ddfafb[mr3][inty];
   ef = (dvdrr + dfafbnl * esucon)/r;
   efn =  jq * ef;
   efj = -iq * ef;
   def = (ddvdrr + ddfafbnl * esucon)/r;
   defn =  jq * def;
   defj = -iq * def;
 
   // dipole - dipole field tensor (Tij)
   tmurn = dpl[i][0]*delr1[0] + dpl[i][1]*delr1[1] + dpl[i][2]*delr1[2];
   tmurj = dpl[j][0]*delr1[0] + dpl[j][1]*delr1[1] + dpl[j][2]*delr1[2];
   tmumu = dpl[i][0]*dpl[j][0] + dpl[i][1]*dpl[j][1] + dpl[i][2]*dpl[j][2];
 
   for (nm=0; nm<3; nm++) {
     poln[nm] = (tmurj*delr1[nm]*rct - dpl[j][nm]*rcd);
     polj[nm] = (tmurn*delr1[nm]*rct - dpl[i][nm]*rcd);
   }
   tmun = dpl[j][0]*polj[0] + dpl[j][1]*polj[1] + dpl[j][2]*polj[2];
   tmuj = dpl[i][0]*poln[0] + dpl[i][1]*poln[1] + dpl[i][2]*poln[2];
 
   // dipole - dipole energy
   emuTmu = -0.5*(tmun*phinn+tmuj*phinj);
 
   // dipole - charge energy
   edqn = -0.5 * (tmurn * efn);
   edqj = -0.5 * (tmurj * efj);
 
   // overall dipole energy
   vionij = emuTmu + edqn + edqj;
 
   // dipole - dipole force
   demuTmu = (tmun*dphinn + tmuj*dphinj)/r;
   ddpr =  5.0*tmurn*tmurj/rsq - tmumu;
   dcoef = rct * (phinn+phinj);
 
   for (nm = 0; nm < 3; nm ++) {
     ddprx[nm] = dcoef * (ddpr*delr1[nm] - tmurn*dpl[j][nm] - tmurj*dpl[i][nm]) 
 	  + demuTmu * delr1[nm];
   }
 
   // dipole - charge force
   fvionij = -tmurn*defn - tmurj*defj;
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairComb3::comb_fc_curl(double rocn, Param *param)
 {
   double r_inn = param->curlcut1;
   double r_out = param->curlcut2;
   if (rocn <= r_inn) return 1.0;
   if (rocn >= r_out) return 0.0;
   return 0.5*(1.0 + cos(MY_PI*(rocn-r_inn)/(r_out-r_inn)));
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairComb3::comb_fc_curl_d(double rocn, Param *param)
 {
   double r_inn = param->curlcut1;
   double r_out = param->curlcut2;
   if (rocn <= r_inn) return 0.0;
   if (rocn >= r_out) return 0.0;
   return -MY_PI2/(r_out-r_inn)*sin(MY_PI*(rocn-r_inn)/(r_out-r_inn));
 }
 
 /* ---------------------------------------------------------------------- */
 
 int PairComb3::heaviside(double rr)
 {
   if (rr <= 0.0) return 0;
   else return 1;
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairComb3::switching(double rr)
 {
   if (rr <= 0.0) return 1.0;
   else if (rr >= 1.0) return 0.0;
   else return heaviside(-rr)+heaviside(rr)*heaviside(1.0-rr)
 	  * (1.0-(3.0-2.0*rr)*rr*rr);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairComb3::switching_d(double rr)
 {
   if (rr <= 0.0) return 0.0;
   else if (rr >= 1.0) return 0.0;
   else return heaviside(rr)*heaviside(1.0-rr)
 	  * 6.0*rr*(rr-1.0);
 }
 
 /* ---------------------------------------------------------------------- */
 
 int PairComb3::pack_forward_comm(int n, int *list, double *buf, 
                                  int pbc_flag, int *pbc)
 {
   int i,j,m;
 
   m = 0;
   if (pack_flag == 1) {
     for (i = 0; i < n; i ++) {
       j = list[i];
       buf[m++] = qf[j];
     }
   } else if (pack_flag == 2) {
     for (i = 0; i < n; i ++) {
       j = list[i];
       buf[m++] = NCo[j];
     }
   }
   return m;
 }   
     
 /* ---------------------------------------------------------------------- */
     
 void PairComb3::unpack_forward_comm(int n, int first, double *buf)
 {   
   int i,m,last;
   
   m = 0; 
   last = first + n ;
   if (pack_flag == 1) {
     for (i = first; i < last; i++)
       qf[i] = buf[m++];
   } else if (pack_flag == 2) {
     for (i = first; i < last; i++)
       NCo[i] = buf[m++];
   }
 } 
 
 /* ---------------------------------------------------------------------- */
 
 int PairComb3::pack_reverse_comm(int n, int first, double *buf)
 {
   int i,m,last;
   
   m = 0;
   last = first + n; 
   if (pack_flag == 1) {
     for (i = first; i < last; i++)
       buf[m++] = qf[i];
   } else if (pack_flag == 2) {
     for (i = first; i < last; i++)
       buf[m++] = NCo[i];
   }
   return m;
 }   
     
 /* ---------------------------------------------------------------------- */
     
 void PairComb3::unpack_reverse_comm(int n, int *list, double *buf)
 {   
   int i,j,m;
 
   m = 0; 
   if (pack_flag == 1) {
     for (i = 0; i < n; i++) {
       j = list[i];
       qf[j] += buf[m++];
     }
   } else if (pack_flag == 2) {
     for (i = 0; i < n; i++) {
       j = list[i];
       NCo[j] += buf[m++];
     }
   }
 } 
 
 /* ----------------------------------------------------------------------
    memory usage of local atom-based arrays 
 ------------------------------------------------------------------------- */
 
 double PairComb3::memory_usage()
 {
   double bytes = maxeatom * sizeof(double);
   bytes += maxvatom*6 * sizeof(double);
   bytes += nmax * sizeof(int);
   bytes += nmax * 8.0 * sizeof(double);
   bytes += 25000*2*sizeof(double);
 
   for (int i = 0; i < comm->nthreads; i++)
     bytes += ipage[i].size();
 
   return bytes;
 }
diff --git a/src/MANYBODY/pair_eam.cpp b/src/MANYBODY/pair_eam.cpp
index f6d8f8c6d..f53b6a9e3 100644
--- a/src/MANYBODY/pair_eam.cpp
+++ b/src/MANYBODY/pair_eam.cpp
@@ -1,879 +1,879 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing authors: Stephen Foiles (SNL), Murray Daw (SNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_eam.h"
 #include "atom.h"
 #include "force.h"
 #include "comm.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
 #define MAXLINE 1024
 
 /* ---------------------------------------------------------------------- */
 
 PairEAM::PairEAM(LAMMPS *lmp) : Pair(lmp)
 {
   restartinfo = 0;
   manybody_flag = 1;
 
   nmax = 0;
   rho = NULL;
   fp = NULL;
 
   nfuncfl = 0;
   funcfl = NULL;
 
   setfl = NULL;
   fs = NULL;
 
   frho = NULL;
   rhor = NULL;
   z2r = NULL;
 
   frho_spline = NULL;
   rhor_spline = NULL;
   z2r_spline = NULL;
 
   // set comm size needed by this Pair
 
   comm_forward = 1;
   comm_reverse = 1;
 }
 
 /* ----------------------------------------------------------------------
    check if allocated, since class can be destructed when incomplete
 ------------------------------------------------------------------------- */
 
 PairEAM::~PairEAM()
 {
   memory->destroy(rho);
   memory->destroy(fp);
 
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
     delete [] map;
     delete [] type2frho;
     memory->destroy(type2rhor);
     memory->destroy(type2z2r);
   }
 
   if (funcfl) {
     for (int i = 0; i < nfuncfl; i++) {
       delete [] funcfl[i].file;
       memory->destroy(funcfl[i].frho);
       memory->destroy(funcfl[i].rhor);
       memory->destroy(funcfl[i].zr);
     }
     memory->sfree(funcfl);
   }
 
   if (setfl) {
     for (int i = 0; i < setfl->nelements; i++) delete [] setfl->elements[i];
     delete [] setfl->elements;
     delete [] setfl->mass;
     memory->destroy(setfl->frho);
     memory->destroy(setfl->rhor);
     memory->destroy(setfl->z2r);
     delete setfl;
   }
 
   if (fs) {
     for (int i = 0; i < fs->nelements; i++) delete [] fs->elements[i];
     delete [] fs->elements;
     delete [] fs->mass;
     memory->destroy(fs->frho);
     memory->destroy(fs->rhor);
     memory->destroy(fs->z2r);
     delete fs;
   }
 
   memory->destroy(frho);
   memory->destroy(rhor);
   memory->destroy(z2r);
 
   memory->destroy(frho_spline);
   memory->destroy(rhor_spline);
   memory->destroy(z2r_spline);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairEAM::compute(int eflag, int vflag)
 {
   int i,j,ii,jj,m,inum,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
   double rsq,r,p,rhoip,rhojp,z2,z2p,recip,phip,psip,phi;
   double *coeff;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   evdwl = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = eflag_global = eflag_atom = 0;
 
   // grow energy and fp arrays if necessary
   // need to be atom->nmax in length
 
   if (atom->nmax > nmax) {
     memory->destroy(rho);
     memory->destroy(fp);
     nmax = atom->nmax;
     memory->create(rho,nmax,"pair:rho");
     memory->create(fp,nmax,"pair:fp");
   }
 
   double **x = atom->x;
   double **f = atom->f;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   int nall = nlocal + atom->nghost;
   int newton_pair = force->newton_pair;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // zero out density
 
   if (newton_pair) {
     for (i = 0; i < nall; i++) rho[i] = 0.0;
   } else for (i = 0; i < nlocal; i++) rho[i] = 0.0;
 
   // rho = density at each atom
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
 
       if (rsq < cutforcesq) {
         jtype = type[j];
         p = sqrt(rsq)*rdr + 1.0;
         m = static_cast<int> (p);
         m = MIN(m,nr-1);
         p -= m;
         p = MIN(p,1.0);
         coeff = rhor_spline[type2rhor[jtype][itype]][m];
         rho[i] += ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6];
         if (newton_pair || j < nlocal) {
           coeff = rhor_spline[type2rhor[itype][jtype]][m];
           rho[j] += ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6];
         }
       }
     }
   }
 
   // communicate and sum densities
 
   if (newton_pair) comm->reverse_comm_pair(this);
 
   // fp = derivative of embedding energy at each atom
   // phi = embedding energy at each atom
   // if rho > rhomax (e.g. due to close approach of two atoms),
   //   will exceed table, so add linear term to conserve energy
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     p = rho[i]*rdrho + 1.0;
     m = static_cast<int> (p);
     m = MAX(1,MIN(m,nrho-1));
     p -= m;
     p = MIN(p,1.0);
     coeff = frho_spline[type2frho[type[i]]][m];
     fp[i] = (coeff[0]*p + coeff[1])*p + coeff[2];
     if (eflag) {
       phi = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6];
       if (rho[i] > rhomax) phi += fp[i] * (rho[i]-rhomax);
       if (eflag_global) eng_vdwl += phi;
       if (eflag_atom) eatom[i] += phi;
     }
   }
 
   // communicate derivative of embedding function
 
   comm->forward_comm_pair(this);
 
   // compute forces on each atom
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
 
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
 
       if (rsq < cutforcesq) {
         jtype = type[j];
         r = sqrt(rsq);
         p = r*rdr + 1.0;
         m = static_cast<int> (p);
         m = MIN(m,nr-1);
         p -= m;
         p = MIN(p,1.0);
 
         // rhoip = derivative of (density at atom j due to atom i)
         // rhojp = derivative of (density at atom i due to atom j)
         // phi = pair potential energy
         // phip = phi'
         // z2 = phi * r
         // z2p = (phi * r)' = (phi' r) + phi
         // psip needs both fp[i] and fp[j] terms since r_ij appears in two
         //   terms of embed eng: Fi(sum rho_ij) and Fj(sum rho_ji)
         //   hence embed' = Fi(sum rho_ij) rhojp + Fj(sum rho_ji) rhoip
 
         coeff = rhor_spline[type2rhor[itype][jtype]][m];
         rhoip = (coeff[0]*p + coeff[1])*p + coeff[2];
         coeff = rhor_spline[type2rhor[jtype][itype]][m];
         rhojp = (coeff[0]*p + coeff[1])*p + coeff[2];
         coeff = z2r_spline[type2z2r[itype][jtype]][m];
         z2p = (coeff[0]*p + coeff[1])*p + coeff[2];
         z2 = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6];
 
         recip = 1.0/r;
         phi = z2*recip;
         phip = z2p*recip - phi*recip;
         psip = fp[i]*rhojp + fp[j]*rhoip + phip;
         fpair = -psip*recip;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
         if (newton_pair || j < nlocal) {
           f[j][0] -= delx*fpair;
           f[j][1] -= dely*fpair;
           f[j][2] -= delz*fpair;
         }
 
         if (eflag) evdwl = phi;
         if (evflag) ev_tally(i,j,nlocal,newton_pair,
                              evdwl,0.0,fpair,delx,dely,delz);
       }
     }
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairEAM::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
   map = new int[n+1];
   for (int i = 1; i <= n; i++) map[i] = -1;
 
   type2frho = new int[n+1];
   memory->create(type2rhor,n+1,n+1,"pair:type2rhor");
   memory->create(type2z2r,n+1,n+1,"pair:type2z2r");
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairEAM::settings(int narg, char **arg)
 {
   if (narg > 0) error->all(FLERR,"Illegal pair_style command");
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
    read DYNAMO funcfl file
 ------------------------------------------------------------------------- */
 
 void PairEAM::coeff(int narg, char **arg)
 {
   if (!allocated) allocate();
 
   if (narg != 3) error->all(FLERR,"Incorrect args for pair coefficients");
 
   // parse pair of atom types
 
   int ilo,ihi,jlo,jhi;
   force->bounds(arg[0],atom->ntypes,ilo,ihi);
   force->bounds(arg[1],atom->ntypes,jlo,jhi);
 
   // read funcfl file if hasn't already been read
   // store filename in Funcfl data struct
 
   int ifuncfl;
   for (ifuncfl = 0; ifuncfl < nfuncfl; ifuncfl++)
     if (strcmp(arg[2],funcfl[ifuncfl].file) == 0) break;
 
   if (ifuncfl == nfuncfl) {
     nfuncfl++;
     funcfl = (Funcfl *)
       memory->srealloc(funcfl,nfuncfl*sizeof(Funcfl),"pair:funcfl");
     read_file(arg[2]);
     int n = strlen(arg[2]) + 1;
     funcfl[ifuncfl].file = new char[n];
     strcpy(funcfl[ifuncfl].file,arg[2]);
   }
 
   // set setflag and map only for i,i type pairs
   // set mass of atom type if i = j
 
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
     for (int j = MAX(jlo,i); j <= jhi; j++) {
       if (i == j) {
         setflag[i][i] = 1;
         map[i] = ifuncfl;
         atom->set_mass(i,funcfl[ifuncfl].mass);
         count++;
       }
     }
   }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairEAM::init_style()
 {
   // convert read-in file(s) to arrays and spline them
 
   file2array();
   array2spline();
 
-  neighbor->request(this);
+  neighbor->request(this,instance_me);
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairEAM::init_one(int i, int j)
 {
   // single global cutoff = max of cut from all files read in
   // for funcfl could be multiple files
   // for setfl or fs, just one file
 
   if (funcfl) {
     cutmax = 0.0;
     for (int m = 0; m < nfuncfl; m++)
       cutmax = MAX(cutmax,funcfl[m].cut);
   } else if (setfl) cutmax = setfl->cut;
   else if (fs) cutmax = fs->cut;
 
   cutforcesq = cutmax*cutmax;
 
   return cutmax;
 }
 
 /* ----------------------------------------------------------------------
    read potential values from a DYNAMO single element funcfl file
 ------------------------------------------------------------------------- */
 
 void PairEAM::read_file(char *filename)
 {
   Funcfl *file = &funcfl[nfuncfl-1];
 
   int me = comm->me;
   FILE *fptr;
   char line[MAXLINE];
 
   if (me == 0) {
     fptr = force->open_potential(filename);
     if (fptr == NULL) {
       char str[128];
       sprintf(str,"Cannot open EAM potential file %s",filename);
       error->one(FLERR,str);
     }
   }
 
   int tmp;
   if (me == 0) {
     fgets(line,MAXLINE,fptr);
     fgets(line,MAXLINE,fptr);
     sscanf(line,"%d %lg",&tmp,&file->mass);
     fgets(line,MAXLINE,fptr);
     sscanf(line,"%d %lg %d %lg %lg",
            &file->nrho,&file->drho,&file->nr,&file->dr,&file->cut);
   }
 
   MPI_Bcast(&file->mass,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&file->nrho,1,MPI_INT,0,world);
   MPI_Bcast(&file->drho,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&file->nr,1,MPI_INT,0,world);
   MPI_Bcast(&file->dr,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&file->cut,1,MPI_DOUBLE,0,world);
 
   memory->create(file->frho,(file->nrho+1),"pair:frho");
   memory->create(file->rhor,(file->nr+1),"pair:rhor");
   memory->create(file->zr,(file->nr+1),"pair:zr");
 
   if (me == 0) grab(fptr,file->nrho,&file->frho[1]);
   MPI_Bcast(&file->frho[1],file->nrho,MPI_DOUBLE,0,world);
 
   if (me == 0) grab(fptr,file->nr,&file->zr[1]);
   MPI_Bcast(&file->zr[1],file->nr,MPI_DOUBLE,0,world);
 
   if (me == 0) grab(fptr,file->nr,&file->rhor[1]);
   MPI_Bcast(&file->rhor[1],file->nr,MPI_DOUBLE,0,world);
 
   if (me == 0) fclose(fptr);
 }
 
 /* ----------------------------------------------------------------------
    convert read-in funcfl potential(s) to standard array format
    interpolate all file values to a single grid and cutoff
 ------------------------------------------------------------------------- */
 
 void PairEAM::file2array()
 {
   int i,j,k,m,n;
   int ntypes = atom->ntypes;
   double sixth = 1.0/6.0;
 
   // determine max function params from all active funcfl files
   // active means some element is pointing at it via map
 
   int active;
   double rmax;
   dr = drho = rmax = rhomax = 0.0;
 
   for (int i = 0; i < nfuncfl; i++) {
     active = 0;
     for (j = 1; j <= ntypes; j++)
       if (map[j] == i) active = 1;
     if (active == 0) continue;
     Funcfl *file = &funcfl[i];
     dr = MAX(dr,file->dr);
     drho = MAX(drho,file->drho);
     rmax = MAX(rmax,(file->nr-1) * file->dr);
     rhomax = MAX(rhomax,(file->nrho-1) * file->drho);
   }
 
   // set nr,nrho from cutoff and spacings
   // 0.5 is for round-off in divide
 
   nr = static_cast<int> (rmax/dr + 0.5);
   nrho = static_cast<int> (rhomax/drho + 0.5);
 
   // ------------------------------------------------------------------
   // setup frho arrays
   // ------------------------------------------------------------------
 
   // allocate frho arrays
   // nfrho = # of funcfl files + 1 for zero array
 
   nfrho = nfuncfl + 1;
   memory->destroy(frho);
   memory->create(frho,nfrho,nrho+1,"pair:frho");
 
   // interpolate each file's frho to a single grid and cutoff
 
   double r,p,cof1,cof2,cof3,cof4;
 
   n = 0;
   for (i = 0; i < nfuncfl; i++) {
     Funcfl *file = &funcfl[i];
     for (m = 1; m <= nrho; m++) {
       r = (m-1)*drho;
       p = r/file->drho + 1.0;
       k = static_cast<int> (p);
       k = MIN(k,file->nrho-2);
       k = MAX(k,2);
       p -= k;
       p = MIN(p,2.0);
       cof1 = -sixth*p*(p-1.0)*(p-2.0);
       cof2 = 0.5*(p*p-1.0)*(p-2.0);
       cof3 = -0.5*p*(p+1.0)*(p-2.0);
       cof4 = sixth*p*(p*p-1.0);
       frho[n][m] = cof1*file->frho[k-1] + cof2*file->frho[k] +
         cof3*file->frho[k+1] + cof4*file->frho[k+2];
     }
     n++;
   }
 
   // add extra frho of zeroes for non-EAM types to point to (pair hybrid)
   // this is necessary b/c fp is still computed for non-EAM atoms
 
   for (m = 1; m <= nrho; m++) frho[nfrho-1][m] = 0.0;
 
   // type2frho[i] = which frho array (0 to nfrho-1) each atom type maps to
   // if atom type doesn't point to file (non-EAM atom in pair hybrid)
   // then map it to last frho array of zeroes
 
   for (i = 1; i <= ntypes; i++)
     if (map[i] >= 0) type2frho[i] = map[i];
     else type2frho[i] = nfrho-1;
 
   // ------------------------------------------------------------------
   // setup rhor arrays
   // ------------------------------------------------------------------
 
   // allocate rhor arrays
   // nrhor = # of funcfl files
 
   nrhor = nfuncfl;
   memory->destroy(rhor);
   memory->create(rhor,nrhor,nr+1,"pair:rhor");
 
   // interpolate each file's rhor to a single grid and cutoff
 
   n = 0;
   for (i = 0; i < nfuncfl; i++) {
     Funcfl *file = &funcfl[i];
     for (m = 1; m <= nr; m++) {
       r = (m-1)*dr;
       p = r/file->dr + 1.0;
       k = static_cast<int> (p);
       k = MIN(k,file->nr-2);
       k = MAX(k,2);
       p -= k;
       p = MIN(p,2.0);
       cof1 = -sixth*p*(p-1.0)*(p-2.0);
       cof2 = 0.5*(p*p-1.0)*(p-2.0);
       cof3 = -0.5*p*(p+1.0)*(p-2.0);
       cof4 = sixth*p*(p*p-1.0);
       rhor[n][m] = cof1*file->rhor[k-1] + cof2*file->rhor[k] +
         cof3*file->rhor[k+1] + cof4*file->rhor[k+2];
     }
     n++;
   }
 
   // type2rhor[i][j] = which rhor array (0 to nrhor-1) each type pair maps to
   // for funcfl files, I,J mapping only depends on I
   // OK if map = -1 (non-EAM atom in pair hybrid) b/c type2rhor not used
 
   for (i = 1; i <= ntypes; i++)
     for (j = 1; j <= ntypes; j++)
       type2rhor[i][j] = map[i];
 
   // ------------------------------------------------------------------
   // setup z2r arrays
   // ------------------------------------------------------------------
 
   // allocate z2r arrays
   // nz2r = N*(N+1)/2 where N = # of funcfl files
 
   nz2r = nfuncfl*(nfuncfl+1)/2;
   memory->destroy(z2r);
   memory->create(z2r,nz2r,nr+1,"pair:z2r");
 
   // create a z2r array for each file against other files, only for I >= J
   // interpolate zri and zrj to a single grid and cutoff
 
   double zri,zrj;
 
   n = 0;
   for (i = 0; i < nfuncfl; i++) {
     Funcfl *ifile = &funcfl[i];
     for (j = 0; j <= i; j++) {
       Funcfl *jfile = &funcfl[j];
 
       for (m = 1; m <= nr; m++) {
         r = (m-1)*dr;
 
         p = r/ifile->dr + 1.0;
         k = static_cast<int> (p);
         k = MIN(k,ifile->nr-2);
         k = MAX(k,2);
         p -= k;
         p = MIN(p,2.0);
         cof1 = -sixth*p*(p-1.0)*(p-2.0);
         cof2 = 0.5*(p*p-1.0)*(p-2.0);
         cof3 = -0.5*p*(p+1.0)*(p-2.0);
         cof4 = sixth*p*(p*p-1.0);
         zri = cof1*ifile->zr[k-1] + cof2*ifile->zr[k] +
           cof3*ifile->zr[k+1] + cof4*ifile->zr[k+2];
 
         p = r/jfile->dr + 1.0;
         k = static_cast<int> (p);
         k = MIN(k,jfile->nr-2);
         k = MAX(k,2);
         p -= k;
         p = MIN(p,2.0);
         cof1 = -sixth*p*(p-1.0)*(p-2.0);
         cof2 = 0.5*(p*p-1.0)*(p-2.0);
         cof3 = -0.5*p*(p+1.0)*(p-2.0);
         cof4 = sixth*p*(p*p-1.0);
         zrj = cof1*jfile->zr[k-1] + cof2*jfile->zr[k] +
           cof3*jfile->zr[k+1] + cof4*jfile->zr[k+2];
 
         z2r[n][m] = 27.2*0.529 * zri*zrj;
       }
       n++;
     }
   }
 
   // type2z2r[i][j] = which z2r array (0 to nz2r-1) each type pair maps to
   // set of z2r arrays only fill lower triangular Nelement matrix
   // value = n = sum over rows of lower-triangular matrix until reach irow,icol
   // swap indices when irow < icol to stay lower triangular
   // if map = -1 (non-EAM atom in pair hybrid):
   //   type2z2r is not used by non-opt
   //   but set type2z2r to 0 since accessed by opt
 
   int irow,icol;
   for (i = 1; i <= ntypes; i++) {
     for (j = 1; j <= ntypes; j++) {
       irow = map[i];
       icol = map[j];
       if (irow == -1 || icol == -1) {
         type2z2r[i][j] = 0;
         continue;
       }
       if (irow < icol) {
         irow = map[j];
         icol = map[i];
       }
       n = 0;
       for (m = 0; m < irow; m++) n += m + 1;
       n += icol;
       type2z2r[i][j] = n;
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairEAM::array2spline()
 {
   rdr = 1.0/dr;
   rdrho = 1.0/drho;
 
   memory->destroy(frho_spline);
   memory->destroy(rhor_spline);
   memory->destroy(z2r_spline);
 
   memory->create(frho_spline,nfrho,nrho+1,7,"pair:frho");
   memory->create(rhor_spline,nrhor,nr+1,7,"pair:rhor");
   memory->create(z2r_spline,nz2r,nr+1,7,"pair:z2r");
 
   for (int i = 0; i < nfrho; i++)
     interpolate(nrho,drho,frho[i],frho_spline[i]);
 
   for (int i = 0; i < nrhor; i++)
     interpolate(nr,dr,rhor[i],rhor_spline[i]);
 
   for (int i = 0; i < nz2r; i++)
     interpolate(nr,dr,z2r[i],z2r_spline[i]);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairEAM::interpolate(int n, double delta, double *f, double **spline)
 {
   for (int m = 1; m <= n; m++) spline[m][6] = f[m];
 
   spline[1][5] = spline[2][6] - spline[1][6];
   spline[2][5] = 0.5 * (spline[3][6]-spline[1][6]);
   spline[n-1][5] = 0.5 * (spline[n][6]-spline[n-2][6]);
   spline[n][5] = spline[n][6] - spline[n-1][6];
 
   for (int m = 3; m <= n-2; m++)
     spline[m][5] = ((spline[m-2][6]-spline[m+2][6]) +
                     8.0*(spline[m+1][6]-spline[m-1][6])) / 12.0;
 
   for (int m = 1; m <= n-1; m++) {
     spline[m][4] = 3.0*(spline[m+1][6]-spline[m][6]) -
       2.0*spline[m][5] - spline[m+1][5];
     spline[m][3] = spline[m][5] + spline[m+1][5] -
       2.0*(spline[m+1][6]-spline[m][6]);
   }
 
   spline[n][4] = 0.0;
   spline[n][3] = 0.0;
 
   for (int m = 1; m <= n; m++) {
     spline[m][2] = spline[m][5]/delta;
     spline[m][1] = 2.0*spline[m][4]/delta;
     spline[m][0] = 3.0*spline[m][3]/delta;
   }
 }
 
 /* ----------------------------------------------------------------------
    grab n values from file fp and put them in list
    values can be several to a line
    only called by proc 0
 ------------------------------------------------------------------------- */
 
 void PairEAM::grab(FILE *fptr, int n, double *list)
 {
   char *ptr;
   char line[MAXLINE];
 
   int i = 0;
   while (i < n) {
     fgets(line,MAXLINE,fptr);
     ptr = strtok(line," \t\n\r\f");
     list[i++] = atof(ptr);
     while ((ptr = strtok(NULL," \t\n\r\f"))) list[i++] = atof(ptr);
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairEAM::single(int i, int j, int itype, int jtype,
                        double rsq, double factor_coul, double factor_lj,
                        double &fforce)
 {
   int m;
   double r,p,rhoip,rhojp,z2,z2p,recip,phi,phip,psip;
   double *coeff;
 
   r = sqrt(rsq);
   p = r*rdr + 1.0;
   m = static_cast<int> (p);
   m = MIN(m,nr-1);
   p -= m;
   p = MIN(p,1.0);
 
   coeff = rhor_spline[type2rhor[itype][jtype]][m];
   rhoip = (coeff[0]*p + coeff[1])*p + coeff[2];
   coeff = rhor_spline[type2rhor[jtype][itype]][m];
   rhojp = (coeff[0]*p + coeff[1])*p + coeff[2];
   coeff = z2r_spline[type2z2r[itype][jtype]][m];
   z2p = (coeff[0]*p + coeff[1])*p + coeff[2];
   z2 = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6];
 
   recip = 1.0/r;
   phi = z2*recip;
   phip = z2p*recip - phi*recip;
   psip = fp[i]*rhojp + fp[j]*rhoip + phip;
   fforce = -psip*recip;
 
   return phi;
 }
 
 /* ---------------------------------------------------------------------- */
 
 int PairEAM::pack_forward_comm(int n, int *list, double *buf, 
                                int pbc_flag, int *pbc)
 {
   int i,j,m;
 
   m = 0;
   for (i = 0; i < n; i++) {
     j = list[i];
     buf[m++] = fp[j];
   }
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairEAM::unpack_forward_comm(int n, int first, double *buf)
 {
   int i,m,last;
 
   m = 0;
   last = first + n;
   for (i = first; i < last; i++) fp[i] = buf[m++];
 }
 
 /* ---------------------------------------------------------------------- */
 
 int PairEAM::pack_reverse_comm(int n, int first, double *buf)
 {
   int i,m,last;
 
   m = 0;
   last = first + n;
   for (i = first; i < last; i++) buf[m++] = rho[i];
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairEAM::unpack_reverse_comm(int n, int *list, double *buf)
 {
   int i,j,m;
 
   m = 0;
   for (i = 0; i < n; i++) {
     j = list[i];
     rho[j] += buf[m++];
   }
 }
 
 /* ----------------------------------------------------------------------
    memory usage of local atom-based arrays
 ------------------------------------------------------------------------- */
 
 double PairEAM::memory_usage()
 {
   double bytes = maxeatom * sizeof(double);
   bytes += maxvatom*6 * sizeof(double);
   bytes += 2 * nmax * sizeof(double);
   return bytes;
 }
 
 /* ----------------------------------------------------------------------
    swap fp array with one passed in by caller
 ------------------------------------------------------------------------- */
 
 void PairEAM::swap_eam(double *fp_caller, double **fp_caller_hold)
 {
   double *tmp = fp;
   fp = fp_caller;
   *fp_caller_hold = tmp;
 }
diff --git a/src/MANYBODY/pair_eim.cpp b/src/MANYBODY/pair_eim.cpp
index 52706e2a6..9206229e9 100644
--- a/src/MANYBODY/pair_eim.cpp
+++ b/src/MANYBODY/pair_eim.cpp
@@ -1,1171 +1,1171 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Xiaowang Zhou (SNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_eim.h"
 #include "atom.h"
 #include "force.h"
 #include "comm.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
 #define MAXLINE 1024
 
 /* ---------------------------------------------------------------------- */
 
 PairEIM::PairEIM(LAMMPS *lmp) : Pair(lmp)
 {
   single_enable = 0;
   restartinfo = 0;
   one_coeff = 1;
   manybody_flag = 1;
 
   setfl = NULL;
   nmax = 0;
   rho = NULL;
   fp = NULL;
 
   nelements = 0;
   elements = NULL;
 
   negativity = NULL;
   q0 = NULL;
   cutforcesq = NULL;
   Fij = NULL;
   Gij = NULL;
   phiij = NULL;
 
   Fij_spline = NULL;
   Gij_spline = NULL;
   phiij_spline = NULL;
 
   // set comm size needed by this Pair
 
   comm_forward = 1;
   comm_reverse = 1;
 }
 
 /* ----------------------------------------------------------------------
    check if allocated, since class can be destructed when incomplete
 ------------------------------------------------------------------------- */
 
 PairEIM::~PairEIM()
 {
   memory->destroy(rho);
   memory->destroy(fp);
 
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
     delete [] map;
     memory->destroy(type2Fij);
     memory->destroy(type2Gij);
     memory->destroy(type2phiij);
   }
 
   for (int i = 0; i < nelements; i++) delete [] elements[i];
   delete [] elements;
 
   deallocate_setfl();
 
   delete [] negativity;
   delete [] q0;
   memory->destroy(cutforcesq);
   memory->destroy(Fij);
   memory->destroy(Gij);
   memory->destroy(phiij);
 
   memory->destroy(Fij_spline);
   memory->destroy(Gij_spline);
   memory->destroy(phiij_spline);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairEIM::compute(int eflag, int vflag)
 {
   int i,j,ii,jj,m,inum,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
   double rsq,r,p,rhoip,rhojp,phip,phi,coul,coulp,recip,psip;
   double *coeff;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   evdwl = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = eflag_global = eflag_atom = 0;
 
   // grow energy array if necessary
 
   if (atom->nmax > nmax) {
     memory->destroy(rho);
     memory->destroy(fp);
     nmax = atom->nmax;
     memory->create(rho,nmax,"pair:rho");
     memory->create(fp,nmax,"pair:fp");
   }
 
   double **x = atom->x;
   double **f = atom->f;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   int newton_pair = force->newton_pair;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // zero out density
 
   if (newton_pair) {
     m = nlocal + atom->nghost;
     for (i = 0; i < m; i++) {
       rho[i] = 0.0;
       fp[i] = 0.0;
     }
   } else {
     for (i = 0; i < nlocal; i++) {
       rho[i] = 0.0;
       fp[i] = 0.0;
     }
   }
 
   // rho = density at each atom
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       j &= NEIGHMASK;
       jtype = type[j];
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
 
       if (rsq < cutforcesq[itype][jtype]) {
         p = sqrt(rsq)*rdr + 1.0;
         m = static_cast<int> (p);
         m = MIN(m,nr-1);
         p -= m;
         p = MIN(p,1.0);
         coeff = Fij_spline[type2Fij[itype][jtype]][m];
         rho[i] += ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6];
         if (newton_pair || j < nlocal) {
           coeff = Fij_spline[type2Fij[jtype][itype]][m];
           rho[j] += ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6];
         }
       }
     }
   }
 
   // communicate and sum densities
 
   rhofp = 1;
   if (newton_pair) comm->reverse_comm_pair(this);
   comm->forward_comm_pair(this);
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       j &= NEIGHMASK;
       jtype = type[j];
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
 
       if (rsq < cutforcesq[itype][jtype]) {
         p = sqrt(rsq)*rdr + 1.0;
         m = static_cast<int> (p);
         m = MIN(m,nr-1);
         p -= m;
         p = MIN(p,1.0);
         coeff = Gij_spline[type2Gij[itype][jtype]][m];
         fp[i] += rho[j]*(((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6]);
         if (newton_pair || j < nlocal) {
           fp[j] += rho[i]*(((coeff[3]*p + coeff[4])*p + coeff[5])*p +
                            coeff[6]);
         }
       }
     }
   }
 
   // communicate and sum modified densities
 
   rhofp = 2;
   if (newton_pair) comm->reverse_comm_pair(this);
   comm->forward_comm_pair(this);
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     itype = type[i];
     if (eflag) {
       phi = 0.5*rho[i]*fp[i];
       if (eflag_global) eng_vdwl += phi;
       if (eflag_atom) eatom[i] += phi;
     }
   }
 
   // compute forces on each atom
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
 
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       j &= NEIGHMASK;
       jtype = type[j];
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
 
       if (rsq < cutforcesq[itype][jtype]) {
         r = sqrt(rsq);
         p = r*rdr + 1.0;
         m = static_cast<int> (p);
         m = MIN(m,nr-1);
         p -= m;
         p = MIN(p,1.0);
 
         // rhoip = derivative of (density at atom j due to atom i)
         // rhojp = derivative of (density at atom i due to atom j)
         // phi = pair potential energy
         // phip = phi'
 
         coeff = Fij_spline[type2Fij[jtype][itype]][m];
         rhoip = (coeff[0]*p + coeff[1])*p + coeff[2];
         coeff = Fij_spline[type2Fij[itype][jtype]][m];
         rhojp = (coeff[0]*p + coeff[1])*p + coeff[2];
         coeff = phiij_spline[type2phiij[itype][jtype]][m];
         phip = (coeff[0]*p + coeff[1])*p + coeff[2];
         phi = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6];
         coeff = Gij_spline[type2Gij[itype][jtype]][m];
         coul = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6];
         coulp = (coeff[0]*p + coeff[1])*p + coeff[2];
         psip = phip + (rho[i]*rho[j]-q0[itype]*q0[jtype])*coulp +
                fp[i]*rhojp + fp[j]*rhoip;
         recip = 1.0/r;
         fpair = -psip*recip;
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
         if (newton_pair || j < nlocal) {
           f[j][0] -= delx*fpair;
           f[j][1] -= dely*fpair;
           f[j][2] -= delz*fpair;
         }
 
         if (eflag) evdwl = phi-q0[itype]*q0[jtype]*coul;
         if (evflag) ev_tally(i,j,nlocal,newton_pair,
                              evdwl,0.0,fpair,delx,dely,delz);
       }
     }
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairEIM::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
   map = new int[n+1];
   for (int i = 1; i <= n; i++) map[i] = -1;
 
   memory->create(type2Fij,n+1,n+1,"pair:type2Fij");
   memory->create(type2Gij,n+1,n+1,"pair:type2Gij");
   memory->create(type2phiij,n+1,n+1,"pair:type2phiij");
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairEIM::settings(int narg, char **arg)
 {
   if (narg > 0) error->all(FLERR,"Illegal pair_style command");
 }
 
 /* ----------------------------------------------------------------------
    set coeffs from set file
 ------------------------------------------------------------------------- */
 
 void PairEIM::coeff(int narg, char **arg)
 {
   int i,j,m,n;
 
   if (!allocated) allocate();
 
   if (narg < 5) error->all(FLERR,"Incorrect args for pair coefficients");
 
   // insure I,J args are * *
 
   if (strcmp(arg[0],"*") != 0 || strcmp(arg[1],"*") != 0)
     error->all(FLERR,"Incorrect args for pair coefficients");
 
   // read EIM element names before filename
   // nelements = # of EIM elements to read from file
   // elements = list of unique element names
 
   if (nelements) {
     for (i = 0; i < nelements; i++) delete [] elements[i];
     delete [] elements;
   }
   nelements = narg - 3 - atom->ntypes;
   if (nelements < 1) error->all(FLERR,"Incorrect args for pair coefficients");
   elements = new char*[nelements];
 
   for (i = 0; i < nelements; i++) {
     n = strlen(arg[i+2]) + 1;
     elements[i] = new char[n];
     strcpy(elements[i],arg[i+2]);
   }
 
   // read EIM file
 
   deallocate_setfl();
   setfl = new Setfl();
   read_file(arg[2+nelements]);
 
   // read args that map atom types to elements in potential file
   // map[i] = which element the Ith atom type is, -1 if NULL
 
   for (i = 3 + nelements; i < narg; i++) {
     m = i - (3+nelements) + 1;
     for (j = 0; j < nelements; j++)
       if (strcmp(arg[i],elements[j]) == 0) break;
     if (j < nelements) map[m] = j;
     else if (strcmp(arg[i],"NULL") == 0) map[m] = -1;
     else error->all(FLERR,"Incorrect args for pair coefficients");
   }
 
   // clear setflag since coeff() called once with I,J = * *
 
   n = atom->ntypes;
   for (i = 1; i <= n; i++)
     for (j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   // set setflag i,j for type pairs where both are mapped to elements
   // set mass of atom type if i = j
 
   int count = 0;
   for (i = 1; i <= n; i++)
     for (j = i; j <= n; j++)
       if (map[i] >= 0 && map[j] >= 0) {
         setflag[i][j] = 1;
         if (i == j) atom->set_mass(i,setfl->mass[map[i]]);
         count++;
       }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairEIM::init_style()
 {
   // convert read-in file(s) to arrays and spline them
 
   file2array();
   array2spline();
 
-  neighbor->request(this);
+  neighbor->request(this,instance_me);
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairEIM::init_one(int i, int j)
 {
   cutmax = sqrt(cutforcesq[i][j]);
   return cutmax;
 }
 
 /* ----------------------------------------------------------------------
    read potential values from a set file
 ------------------------------------------------------------------------- */
 
 void PairEIM::read_file(char *filename)
 {
   // open potential file
 
   int me = comm->me;
   FILE *fptr;
 
   if (me == 0) {
     fptr = force->open_potential(filename);
     if (fptr == NULL) {
       char str[128];
       sprintf(str,"Cannot open EIM potential file %s",filename);
       error->one(FLERR,str);
     }
   }
 
   int npair = nelements*(nelements+1)/2;
   setfl->ielement = new int[nelements];
   setfl->mass = new double[nelements];
   setfl->negativity = new double[nelements];
   setfl->ra = new double[nelements];
   setfl->ri = new double[nelements];
   setfl->Ec = new double[nelements];
   setfl->q0 = new double[nelements];
   setfl->rcutphiA = new double[npair];
   setfl->rcutphiR = new double[npair];
   setfl->Eb = new double[npair];
   setfl->r0 = new double[npair];
   setfl->alpha = new double[npair];
   setfl->beta = new double[npair];
   setfl->rcutq = new double[npair];
   setfl->Asigma = new double[npair];
   setfl->rq = new double[npair];
   setfl->rcutsigma = new double[npair];
   setfl->Ac = new double[npair];
   setfl->zeta = new double[npair];
   setfl->rs = new double[npair];
   setfl->tp = new int[npair];
 
   if (me == 0)
     if (!grabglobal(fptr))
       error->one(FLERR,"Could not grab global entry from EIM potential file");
   MPI_Bcast(&setfl->division,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&setfl->rbig,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&setfl->rsmall,1,MPI_DOUBLE,0,world);
 
   for (int i = 0; i < nelements; i++) {
     if (me == 0)
       if (!grabsingle(fptr,i))
         error->one(FLERR,"Could not grab element entry from EIM potential file");
     MPI_Bcast(&setfl->ielement[i],1,MPI_INT,0,world);
     MPI_Bcast(&setfl->mass[i],1,MPI_DOUBLE,0,world);
     MPI_Bcast(&setfl->negativity[i],1,MPI_DOUBLE,0,world);
     MPI_Bcast(&setfl->ra[i],1,MPI_DOUBLE,0,world);
     MPI_Bcast(&setfl->ri[i],1,MPI_DOUBLE,0,world);
     MPI_Bcast(&setfl->Ec[i],1,MPI_DOUBLE,0,world);
     MPI_Bcast(&setfl->q0[i],1,MPI_DOUBLE,0,world);
   }
 
   for (int i = 0; i < nelements; i++) {
     for (int j = i; j < nelements; j++) {
       int ij;
       if (i == j) ij = i;
       else if (i < j) ij = nelements*(i+1) - (i+1)*(i+2)/2 + j;
       else ij = nelements*(j+1) - (j+1)*(j+2)/2 + i;
       if (me == 0)
         if (grabpair(fptr,i,j) == 0)
           error->one(FLERR,"Could not grab pair entry from EIM potential file");
       MPI_Bcast(&setfl->rcutphiA[ij],1,MPI_DOUBLE,0,world);
       MPI_Bcast(&setfl->rcutphiR[ij],1,MPI_DOUBLE,0,world);
       MPI_Bcast(&setfl->Eb[ij],1,MPI_DOUBLE,0,world);
       MPI_Bcast(&setfl->r0[ij],1,MPI_DOUBLE,0,world);
       MPI_Bcast(&setfl->alpha[ij],1,MPI_DOUBLE,0,world);
       MPI_Bcast(&setfl->beta[ij],1,MPI_DOUBLE,0,world);
       MPI_Bcast(&setfl->rcutq[ij],1,MPI_DOUBLE,0,world);
       MPI_Bcast(&setfl->Asigma[ij],1,MPI_DOUBLE,0,world);
       MPI_Bcast(&setfl->rq[ij],1,MPI_DOUBLE,0,world);
       MPI_Bcast(&setfl->rcutsigma[ij],1,MPI_DOUBLE,0,world);
       MPI_Bcast(&setfl->Ac[ij],1,MPI_DOUBLE,0,world);
       MPI_Bcast(&setfl->zeta[ij],1,MPI_DOUBLE,0,world);
       MPI_Bcast(&setfl->rs[ij],1,MPI_DOUBLE,0,world);
       MPI_Bcast(&setfl->tp[ij],1,MPI_INT,0,world);
     }
   }
 
   setfl->nr = 5000;
   setfl->cut = 0.0;
   for (int i = 0; i < npair; i++) {
     if (setfl->cut < setfl->rcutphiA[i]) setfl->cut = setfl->rcutphiA[i];
     if (setfl->cut < setfl->rcutphiR[i]) setfl->cut = setfl->rcutphiR[i];
     if (setfl->cut < setfl->rcutq[i]) setfl->cut = setfl->rcutq[i];
     if (setfl->cut < setfl->rcutsigma[i]) setfl->cut = setfl->rcutsigma[i];
   }
   setfl->dr = setfl->cut/(setfl->nr-1.0);
 
   memory->create(setfl->cuts,nelements,nelements,"pair:cuts");
   for (int i = 0; i < nelements; i++) {
     for (int j = 0; j < nelements; j++) {
       if (i > j) {
         setfl->cuts[i][j] = setfl->cuts[j][i];
       } else {
         int ij;
         if (i == j) {
           ij = i;
         } else {
           ij = nelements*(i+1) - (i+1)*(i+2)/2 + j;
         }
         setfl->cuts[i][j] = setfl->rcutphiA[ij];
         if (setfl->cuts[i][j] < setfl->rcutphiR[ij])
           setfl->cuts[i][j] = setfl->rcutphiR[ij];
         if (setfl->cuts[i][j] < setfl->rcutq[ij])
           setfl->cuts[i][j] = setfl->rcutq[ij];
         if (setfl->cuts[i][j] < setfl->rcutsigma[ij])
           setfl->cuts[i][j] = setfl->rcutsigma[ij];
       }
     }
   }
 
   memory->create(setfl->Fij,nelements,nelements,setfl->nr+1,"pair:Fij");
   memory->create(setfl->Gij,nelements,nelements,setfl->nr+1,"pair:Gij");
   memory->create(setfl->phiij,nelements,nelements,setfl->nr+1,"pair:phiij");
 
   for (int i = 0; i < nelements; i++)
     for (int j = 0; j < nelements; j++) {
       for (int k = 0; k < setfl->nr; k++) {
         if (i > j) {
           setfl->phiij[i][j][k+1] = setfl->phiij[j][i][k+1];
         } else {
           double r = k*setfl->dr;
           setfl->phiij[i][j][k+1] = funcphi(i,j,r);
         }
       }
     }
 
   for (int i = 0; i < nelements; i++)
     for (int j = 0; j < nelements; j++) {
       for (int k = 0; k < setfl->nr; k++) {
         double r = k*setfl->dr;
         setfl->Fij[i][j][k+1] = funcsigma(i,j,r);
       }
     }
 
   for (int i = 0; i < nelements; i++)
     for (int j = 0; j < nelements; j++) {
       for (int k = 0; k < setfl->nr; k++) {
         if (i > j) {
           setfl->Gij[i][j][k+1] = setfl->Gij[j][i][k+1];
         } else {
           double r = k*setfl->dr;
           setfl->Gij[i][j][k+1] = funccoul(i,j,r);
         }
       }
     }
 
   // close the potential file
 
   if (me == 0) fclose(fptr);
 }
 
 /* ----------------------------------------------------------------------
    deallocate data associated with setfl file
 ------------------------------------------------------------------------- */
 
 void PairEIM::deallocate_setfl()
 {
   if (!setfl) return;
   delete [] setfl->ielement;
   delete [] setfl->mass;
   delete [] setfl->negativity;
   delete [] setfl->ra;
   delete [] setfl->ri;
   delete [] setfl->Ec;
   delete [] setfl->q0;
   delete [] setfl->rcutphiA;
   delete [] setfl->rcutphiR;
   delete [] setfl->Eb;
   delete [] setfl->r0;
   delete [] setfl->alpha;
   delete [] setfl->beta;
   delete [] setfl->rcutq;
   delete [] setfl->Asigma;
   delete [] setfl->rq;
   delete [] setfl->rcutsigma;
   delete [] setfl->Ac;
   delete [] setfl->zeta;
   delete [] setfl->rs;
   delete [] setfl->tp;
   memory->destroy(setfl->cuts);
   memory->destroy(setfl->Fij);
   memory->destroy(setfl->Gij);
   memory->destroy(setfl->phiij);
   delete setfl;
 }
 
 /* ----------------------------------------------------------------------
    convert read-in potentials to standard array format
    interpolate all file values to a single grid and cutoff
 ------------------------------------------------------------------------- */
 
 void PairEIM::file2array()
 {
   int i,j,m,n;
   int irow,icol;
   int ntypes = atom->ntypes;
 
   delete [] negativity;
   delete [] q0;
   delete [] cutforcesq;
   negativity = new double[ntypes+1];
   q0 = new double[ntypes+1];
   memory->create(cutforcesq,ntypes+1,ntypes+1,"pair:cutforcesq");
   for (i = 1; i <= ntypes; i++) {
     if (map[i] == -1) {
       negativity[i]=0.0;
       q0[i]=0.0;
     } else {
       negativity[i]=setfl->negativity[map[i]];
       q0[i]=setfl->q0[map[i]];
     }
   }
 
   for (i = 1; i <= ntypes; i++)
     for (j = 1; j <= ntypes; j++) {
       if (map[i] == -1 || map[j] == -1) {
         cutforcesq[i][j] = setfl->cut;
         cutforcesq[i][j] =  cutforcesq[i][j]*cutforcesq[i][j];
       } else {
         cutforcesq[i][j] = setfl->cuts[map[i]][map[j]];
         cutforcesq[i][j] =  cutforcesq[i][j]*cutforcesq[i][j];
       }
     }
 
   nr = setfl->nr;
   dr = setfl->dr;
 
   // ------------------------------------------------------------------
   // setup Fij arrays
   // ------------------------------------------------------------------
 
   nFij = nelements*nelements + 1;
   memory->destroy(Fij);
   memory->create(Fij,nFij,nr+1,"pair:Fij");
 
   // copy each element's Fij to global Fij
 
   n=0;
   for (i = 0; i < nelements; i++)
     for (j = 0; j < nelements; j++) {
       for (m = 1; m <= nr; m++) Fij[n][m] = setfl->Fij[i][j][m];
       n++;
     }
 
   // add extra Fij of zeroes for non-EIM types to point to (pair hybrid)
 
   for (m = 1; m <= nr; m++) Fij[nFij-1][m] = 0.0;
 
   // type2Fij[i][j] = which Fij array (0 to nFij-1) each type pair maps to
   // setfl of Fij arrays
   // value = n = sum over rows of matrix until reach irow,icol
   // if atom type doesn't point to element (non-EIM atom in pair hybrid)
   // then map it to last Fij array of zeroes
 
   for (i = 1; i <= ntypes; i++) {
     for (j = 1; j <= ntypes; j++) {
       irow = map[i];
       icol = map[j];
       if (irow == -1 || icol == -1) {
         type2Fij[i][j] = nFij-1;
       } else {
         n = 0;
         for (m = 0; m < irow; m++) n += nelements;
         n += icol;
         type2Fij[i][j] = n;
       }
     }
   }
 
   // ------------------------------------------------------------------
   // setup Gij arrays
   // ------------------------------------------------------------------
 
   nGij = nelements * (nelements+1) / 2 + 1;
   memory->destroy(Gij);
   memory->create(Gij,nGij,nr+1,"pair:Gij");
 
   // copy each element's Gij to global Gij, only for I >= J
 
   n=0;
   for (i = 0; i < nelements; i++)
     for (j = 0; j <= i; j++) {
       for (m = 1; m <= nr; m++) Gij[n][m] = setfl->Gij[i][j][m];
       n++;
     }
 
   // add extra Gij of zeroes for non-EIM types to point to (pair hybrid)
 
   for (m = 1; m <= nr; m++) Gij[nGij-1][m] = 0.0;
 
   // type2Gij[i][j] = which Gij array (0 to nGij-1) each type pair maps to
   // setfl of Gij arrays only fill lower triangular Nelement matrix
   // value = n = sum over rows of lower-triangular matrix until reach irow,icol
   // swap indices when irow < icol to stay lower triangular
   // if atom type doesn't point to element (non-EIM atom in pair hybrid)
   // then map it to last Gij array of zeroes
 
   for (i = 1; i <= ntypes; i++) {
     for (j = 1; j <= ntypes; j++) {
       irow = map[i];
       icol = map[j];
       if (irow == -1 || icol == -1) {
         type2Gij[i][j] = nGij-1;
       } else {
         if (irow < icol) {
           irow = map[j];
           icol = map[i];
         }
         n = 0;
         for (m = 0; m < irow; m++) n += m + 1;
         n += icol;
         type2Gij[i][j] = n;
       }
     }
   }
 
   // ------------------------------------------------------------------
   // setup phiij arrays
   // ------------------------------------------------------------------
 
   nphiij = nelements * (nelements+1) / 2 + 1;
   memory->destroy(phiij);
   memory->create(phiij,nphiij,nr+1,"pair:phiij");
 
   // copy each element pair phiij to global phiij, only for I >= J
 
   n = 0;
   for (i = 0; i < nelements; i++)
     for (j = 0; j <= i; j++) {
       for (m = 1; m <= nr; m++) phiij[n][m] = setfl->phiij[i][j][m];
       n++;
     }
 
   // add extra phiij of zeroes for non-EIM types to point to (pair hybrid)
 
   for (m = 1; m <= nr; m++) phiij[nphiij-1][m] = 0.0;
 
   // type2phiij[i][j] = which phiij array (0 to nphiij-1)
   //                    each type pair maps to
   // setfl of phiij arrays only fill lower triangular Nelement matrix
   // value = n = sum over rows of lower-triangular matrix until reach irow,icol
   // swap indices when irow < icol to stay lower triangular
   // if atom type doesn't point to element (non-EIM atom in pair hybrid)
   // then map it to last phiij array of zeroes
 
   for (i = 1; i <= ntypes; i++) {
     for (j = 1; j <= ntypes; j++) {
       irow = map[i];
       icol = map[j];
       if (irow == -1 || icol == -1) {
         type2phiij[i][j] = nphiij-1;
       } else {
         if (irow < icol) {
           irow = map[j];
           icol = map[i];
         }
         n = 0;
         for (m = 0; m < irow; m++) n += m + 1;
         n += icol;
         type2phiij[i][j] = n;
       }
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairEIM::array2spline()
 {
   rdr = 1.0/dr;
 
   memory->destroy(Fij_spline);
   memory->destroy(Gij_spline);
   memory->destroy(phiij_spline);
 
   memory->create(Fij_spline,nFij,nr+1,7,"pair:Fij");
   memory->create(Gij_spline,nGij,nr+1,7,"pair:Gij");
   memory->create(phiij_spline,nphiij,nr+1,7,"pair:phiij");
 
   for (int i = 0; i < nFij; i++)
     interpolate(nr,dr,Fij[i],Fij_spline[i],0.0);
 
   for (int i = 0; i < nGij; i++)
     interpolate(nr,dr,Gij[i],Gij_spline[i],0.0);
 
   for (int i = 0; i < nphiij; i++)
     interpolate(nr,dr,phiij[i],phiij_spline[i],0.0);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairEIM::interpolate(int n, double delta, double *f,
                           double **spline, double origin)
 {
   for (int m = 1; m <= n; m++) spline[m][6] = f[m];
 
   spline[1][5] = spline[2][6] - spline[1][6];
   spline[2][5] = 0.5 * (spline[3][6]-spline[1][6]);
   spline[n-1][5] = 0.5 * (spline[n][6]-spline[n-2][6]);
   spline[n][5] = 0.0;
 
   for (int m = 3; m <= n-2; m++)
     spline[m][5] = ((spline[m-2][6]-spline[m+2][6]) +
                     8.0*(spline[m+1][6]-spline[m-1][6])) / 12.0;
 
   for (int m = 1; m <= n-1; m++) {
     spline[m][4] = 3.0*(spline[m+1][6]-spline[m][6]) -
       2.0*spline[m][5] - spline[m+1][5];
     spline[m][3] = spline[m][5] + spline[m+1][5] -
       2.0*(spline[m+1][6]-spline[m][6]);
   }
 
   spline[n][4] = 0.0;
   spline[n][3] = 0.0;
 
   for (int m = 1; m <= n; m++) {
     spline[m][2] = spline[m][5]/delta;
     spline[m][1] = 2.0*spline[m][4]/delta;
     spline[m][0] = 3.0*spline[m][3]/delta;
   }
 }
 
 /* ----------------------------------------------------------------------
    grab global line from file and store info in setfl
    return 0 if error
 ------------------------------------------------------------------------- */
 
 int PairEIM::grabglobal(FILE *fptr)
 {
   char line[MAXLINE];
 
   char *pch = NULL, *data = NULL;
   while (pch == NULL) {
     if (fgets(line,MAXLINE,fptr) == NULL) break;
     pch = strstr(line,"global");
     if (pch != NULL) {
       data = strtok (line," \t\n\r\f");
       data = strtok (NULL,"?");
       sscanf(data,"%lg %lg %lg",&setfl->division,&setfl->rbig,&setfl->rsmall);
     }
   }
   if (pch == NULL) return 0;
   return 1;
 }
 
 /* ----------------------------------------------------------------------
    grab elemental line from file and store info in setfl
    return 0 if error
 ------------------------------------------------------------------------- */
 
 int PairEIM::grabsingle(FILE *fptr, int i)
 {
   char line[MAXLINE];
 
   rewind(fptr);
 
   char *pch1 = NULL, *pch2 = NULL, *data = NULL;
   while (pch1 == NULL || pch2 == NULL) {
     if (fgets(line,MAXLINE,fptr) == NULL) break;
     pch1 = strtok (line," \t\n\r\f");
     pch1 = strstr(pch1,"element:");
     if (pch1 != NULL) {
       pch2 = strtok(NULL, " \t\n\r\f");
       if (pch2 != NULL) data = strtok (NULL, "?");
       if (strcmp(pch2,elements[i]) == 0) {
         sscanf(data,"%d %lg %lg %lg %lg %lg %lg",&setfl->ielement[i],
           &setfl->mass[i],&setfl->negativity[i],&setfl->ra[i],
           &setfl->ri[i],&setfl->Ec[i],&setfl->q0[i]);
       } else {
         pch2 = NULL;
       }
     }
   }
   if (pch1 == NULL || pch2 == NULL) return 0;
   return 1;
 }
 
 /* ----------------------------------------------------------------------
    grab pair line from file and store info in setfl
    return 0 if error
 ------------------------------------------------------------------------- */
 
 int PairEIM::grabpair(FILE *fptr, int i, int j)
 {
   char line[MAXLINE];
 
   rewind(fptr);
 
   int ij;
   if (i == j) ij = i;
   else if (i < j) ij = nelements*(i+1) - (i+1)*(i+2)/2 + j;
   else ij = nelements*(j+1) - (j+1)*(j+2)/2 + i;
 
   char *pch1 = NULL, *pch2 = NULL, *pch3 = NULL, *data = NULL;
   while (pch1 == NULL || pch2 == NULL || pch3 == NULL) {
     if (fgets(line,MAXLINE,fptr) == NULL) break;
     pch1 = strtok (line," \t\n\r\f");
     pch1 = strstr(pch1,"pair:");
     if (pch1 != NULL) {
       pch2 = strtok (NULL, " \t\n\r\f");
       if (pch2 != NULL) pch3 = strtok (NULL, " \t\n\r\f");
       if (pch3 != NULL) data = strtok (NULL, "?");
       if ((strcmp(pch2,elements[i]) == 0 &&
         strcmp(pch3,elements[j]) == 0) ||
         (strcmp(pch2,elements[j]) == 0 &&
         strcmp(pch3,elements[i]) == 0)) {
         sscanf(data,"%lg %lg %lg %lg %lg",
           &setfl->rcutphiA[ij],&setfl->rcutphiR[ij],
           &setfl->Eb[ij],&setfl->r0[ij],&setfl->alpha[ij]);
         fgets(line,MAXLINE,fptr);
         sscanf(line,"%lg %lg %lg %lg %lg",
           &setfl->beta[ij],&setfl->rcutq[ij],&setfl->Asigma[ij],
           &setfl->rq[ij],&setfl->rcutsigma[ij]);
         fgets(line,MAXLINE,fptr);
         sscanf(line,"%lg %lg %lg %d",
           &setfl->Ac[ij],&setfl->zeta[ij],&setfl->rs[ij],
           &setfl->tp[ij]);
       } else {
          pch1 = NULL;
          pch2 = NULL;
          pch3 = NULL;
       }
     }
   }
   if (pch1 == NULL || pch2 == NULL || pch3 == NULL) return 0;
   return 1;
 }
 
 /* ----------------------------------------------------------------------
    cutoff function
 ------------------------------------------------------------------------- */
 
 double PairEIM::funccutoff(double rp, double rc, double r)
 {
   double rbig = setfl->rbig;
   double rsmall = setfl->rsmall;
 
   double a = (rsmall-rbig)/(rc-rp)*(r-rp)+rbig;
   a = erfc(a);
   double b = erfc(rbig);
   double c = erfc(rsmall);
   return (a-c)/(b-c);
 }
 
 /* ----------------------------------------------------------------------
    pair interaction function phi
 ------------------------------------------------------------------------- */
 
 double PairEIM::funcphi(int i, int j, double r)
 {
   int ij;
   double value = 0.0;
   if (i == j) ij = i;
   else if (i < j) ij = nelements*(i+1) - (i+1)*(i+2)/2 + j;
   else ij = nelements*(j+1) - (j+1)*(j+2)/2 + i;
   if (r < 0.2) r = 0.2;
   if (setfl->tp[ij] == 1) {
     double a = setfl->Eb[ij]*setfl->alpha[ij] /
       (setfl->beta[ij]-setfl->alpha[ij]);
     double b = setfl->Eb[ij]*setfl->beta[ij] /
       (setfl->beta[ij]-setfl->alpha[ij]);
     if (r < setfl->rcutphiA[ij]) {
       value -= a*exp(-setfl->beta[ij]*(r/setfl->r0[ij]-1.0))*
         funccutoff(setfl->r0[ij],setfl->rcutphiA[ij],r);
     }
     if (r < setfl-> rcutphiR[ij]) {
       value += b*exp(-setfl->alpha[ij]*(r/setfl->r0[ij]-1.0))*
         funccutoff(setfl->r0[ij],setfl->rcutphiR[ij],r);
     }
   } else if (setfl->tp[ij] == 2) {
     double a=setfl->Eb[ij]*setfl->alpha[ij]*pow(setfl->r0[ij],setfl->beta[ij])/
       (setfl->beta[ij]-setfl->alpha[ij]);
     double b=a*setfl->beta[ij]/setfl->alpha[ij]*
       pow(setfl->r0[ij],setfl->alpha[ij]-setfl->beta[ij]);
     if (r < setfl->rcutphiA[ij]) {
       value -= a/pow(r,setfl->beta[ij])*
         funccutoff(setfl->r0[ij],setfl->rcutphiA[ij],r);
     }
     if (r < setfl-> rcutphiR[ij]) {
       value += b/pow(r,setfl->alpha[ij])*
         funccutoff(setfl->r0[ij],setfl->rcutphiR[ij],r);
     }
   }
   return value;
 }
 
 /* ----------------------------------------------------------------------
    ion propensity function sigma
 ------------------------------------------------------------------------- */
 
 double PairEIM::funcsigma(int i, int j, double r)
 {
   int ij;
   double value = 0.0;
   if (i == j) ij = i;
   else if (i < j) ij = nelements*(i+1) - (i+1)*(i+2)/2 + j;
   else ij = nelements*(j+1) - (j+1)*(j+2)/2 + i;
   if (r < 0.2) r = 0.2;
   if (r < setfl->rcutq[ij]) {
     value = setfl->Asigma[ij]*(setfl->negativity[j]-setfl->negativity[i]) *
       funccutoff(setfl->rq[ij],setfl->rcutq[ij],r);
   }
   return value;
 }
 
 /* ----------------------------------------------------------------------
    charge-charge interaction function sigma
 ------------------------------------------------------------------------- */
 
 double PairEIM::funccoul(int i, int j, double r)
 {
   int ij;
   double value = 0.0;
   if (i == j) ij = i;
   else if (i < j) ij = nelements*(i+1) - (i+1)*(i+2)/2 + j;
   else ij = nelements*(j+1) - (j+1)*(j+2)/2 + i;
   if (r < 0.2) r = 0.2;
   if (r < setfl->rcutsigma[ij]) {
     value = setfl->Ac[ij]*exp(-setfl->zeta[ij]*r)*
       funccutoff(setfl->rs[ij],setfl->rcutsigma[ij],r);
   }
   return value;
 }
 
 /* ---------------------------------------------------------------------- */
 
 int PairEIM::pack_forward_comm(int n, int *list, double *buf, 
                                int pbc_flag, int *pbc)
 {
   int i,j,m;
 
   m = 0;
   if (rhofp == 1) {
     for (i = 0; i < n; i++) {
       j = list[i];
       buf[m++] = rho[j];
     }
   }
   if (rhofp == 2) {
     for (i = 0; i < n; i++) {
       j = list[i];
       buf[m++] = fp[j];
     }
   }
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairEIM::unpack_forward_comm(int n, int first, double *buf)
 {
   int i,m,last;
 
   m = 0;
   last = first + n;
   if (rhofp == 1) {
     for (i = first; i < last; i++) rho[i] = buf[m++];
   }
   if (rhofp == 2) {
     for (i = first; i < last; i++) fp[i] = buf[m++];
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 int PairEIM::pack_reverse_comm(int n, int first, double *buf)
 {
   int i,m,last;
 
   m = 0;
   last = first + n;
   if (rhofp == 1) {
     for (i = first; i < last; i++) buf[m++] = rho[i];
   }
   if (rhofp == 2) {
     for (i = first; i < last; i++) buf[m++] = fp[i];
   }
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairEIM::unpack_reverse_comm(int n, int *list, double *buf)
 {
   int i,j,m;
 
   m = 0;
   if (rhofp == 1) {
     for (i = 0; i < n; i++) {
       j = list[i];
       rho[j] += buf[m++];
     }
   }
   if (rhofp == 2) {
     for (i = 0; i < n; i++) {
       j = list[i];
       fp[j] += buf[m++];
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    memory usage of local atom-based arrays
 ------------------------------------------------------------------------- */
 
 double PairEIM::memory_usage()
 {
   double bytes = maxeatom * sizeof(double);
   bytes += maxvatom*6 * sizeof(double);
   bytes += 2 * nmax * sizeof(double);
   return bytes;
 }
diff --git a/src/MANYBODY/pair_lcbop.cpp b/src/MANYBODY/pair_lcbop.cpp
index 4f9f89749..464f50cac 100644
--- a/src/MANYBODY/pair_lcbop.cpp
+++ b/src/MANYBODY/pair_lcbop.cpp
@@ -1,1290 +1,1290 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Dominik Wójt (Wroclaw University of Technology)
      based on pair_airebo by Ase Henry (MIT)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "mpi.h"
 #include "pair_lcbop.h"
 #include "atom.h"
 #include "neighbor.h"
 #include "neigh_request.h"
 #include "force.h"
 #include "comm.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "my_page.h"
 #include "math_const.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
 #define MAXLINE 1024
 #define TOL 1.0e-9
 #define PGDELTA 1
 
 /* ---------------------------------------------------------------------- */
 
 PairLCBOP::PairLCBOP(LAMMPS *lmp) : Pair(lmp) {
   single_enable = 0;
   one_coeff = 1;
   manybody_flag = 1;
   ghostneigh = 1;
 
   maxlocal = 0;
   SR_numneigh = NULL;
   SR_firstneigh = NULL;
   ipage = NULL;
   pgsize = oneatom = 0;
 
   N = NULL;
   M = NULL;
 }
 
 /* ----------------------------------------------------------------------
    check if allocated, since class can be destructed when incomplete
 ------------------------------------------------------------------------- */
 
 PairLCBOP::~PairLCBOP()
 {
   memory->destroy(SR_numneigh);
   memory->sfree(SR_firstneigh);
   delete [] ipage;
   memory->destroy(N);
   memory->destroy(M);
 
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
     memory->destroy(cutghost);
 
     delete [] map;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLCBOP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = vflag_atom = 0;
 
   SR_neigh();
   FSR(eflag,vflag);
   FLR(eflag,vflag);
 
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairLCBOP::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
   memory->create(cutghost,n+1,n+1,"pair:cutghost");
 
   map = new int[n+1];
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairLCBOP::settings(int narg, char **arg) {
   if( narg != 0 ) error->all(FLERR,"Illegal pair_style command");
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairLCBOP::coeff(int narg, char **arg)
 {
   if (!allocated) allocate();
 
   if (narg != 3 + atom->ntypes)
     error->all(FLERR,"Incorrect args for pair coefficients");
 
   // insure I,J args are * *
 
   if (strcmp(arg[0],"*") != 0 || strcmp(arg[1],"*") != 0)
     error->all(FLERR,"Incorrect args for pair coefficients");
 
   // read args that map atom types to C and NULL
   // map[i] = which element (0 for C) the Ith atom type is, -1 if NULL
 
   for (int i = 3; i < narg; i++) {
     if (strcmp(arg[i],"NULL") == 0) {
       map[i-2] = -1;
     } else if (strcmp(arg[i],"C") == 0) {
       map[i-2] = 0;
     } else error->all(FLERR,"Incorrect args for pair coefficients");
   }
 
   // read potential file and initialize fitting splines
 
   read_file(arg[2]);
   spline_init();
 
   // clear setflag since coeff() called once with I,J = * *
 
   int n = atom->ntypes;
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   // set setflag i,j for type pairs where both are mapped to elements
 
   int count = 0;
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       if (map[i] >= 0 && map[j] >= 0) {
         setflag[i][j] = 1;
         count++;
       }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairLCBOP::init_style()
 {
   if (atom->tag_enable == 0)
     error->all(FLERR,"Pair style LCBOP requires atom IDs");
   if (force->newton_pair == 0)
     error->all(FLERR,"Pair style LCBOP requires newton pair on");
 
   // need a full neighbor list, including neighbors of ghosts
 
-  int irequest = neighbor->request(this);
+  int irequest = neighbor->request(this,instance_me);
   neighbor->requests[irequest]->half = 0;
   neighbor->requests[irequest]->full = 1;
   neighbor->requests[irequest]->ghost = 1;
 
   // local SR neighbor list
   // create pages if first time or if neighbor pgsize/oneatom has changed
 
   int create = 0;
   if (ipage == NULL) create = 1;
   if (pgsize != neighbor->pgsize) create = 1;
   if (oneatom != neighbor->oneatom) create = 1;
 
   if (create) {
     delete [] ipage;
     pgsize = neighbor->pgsize;
     oneatom = neighbor->oneatom;
 
     int nmypage = comm->nthreads;
     ipage = new MyPage<int>[nmypage];
     for (int i = 0; i < nmypage; i++)
       ipage[i].init(oneatom,pgsize,PGDELTA);
   }
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairLCBOP::init_one(int i, int j)
 {
   if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set");
 
   // cut3rebo = 3 SR distances
 
   cut3rebo = 3.0 * r_2;
 
   // cutmax = furthest distance from an owned atom
   //          at which another atom will feel force, i.e. the ghost cutoff
   // for SR term in potential:
   //   interaction = M-K-I-J-L-N with I = owned and J = ghost
   //   I to N is max distance = 3 SR distances
   // for V_LR term in potential:
   //   r_2_LR
   // cutghost = SR cutoff used in SR_neigh() for neighbors of ghosts
 
   double cutmax = MAX( cut3rebo,r_2_LR );
 
   cutghost[i][j] = r_2;
   cutLRsq = r_2_LR*r_2_LR;
 
   cutghost[j][i] = cutghost[i][j];
 
   r_2_sq = r_2*r_2;
 
   return cutmax;
 }
 
 /* ----------------------------------------------------------------------
    create SR neighbor list from main neighbor list
    SR neighbor list stores neighbors of ghost atoms
 ------------------------------------------------------------------------- */
 
 void PairLCBOP::SR_neigh()
 {
   int i,j,ii,jj,n,allnum,jnum;
   double xtmp,ytmp,ztmp,delx,dely,delz,rsq,dS;
   int *ilist,*jlist,*numneigh,**firstneigh;
   int *neighptr;
 
   double **x = atom->x;
 
   if (atom->nmax > maxlocal) {  // ensure ther is enough space
     maxlocal = atom->nmax;      // for atoms and ghosts allocated
     memory->destroy(SR_numneigh);
     memory->sfree(SR_firstneigh);
     memory->destroy(N);
     memory->destroy(M);
     memory->create(SR_numneigh,maxlocal,"LCBOP:numneigh");
     SR_firstneigh = (int **) memory->smalloc(maxlocal*sizeof(int *),
                            "LCBOP:firstneigh");
     memory->create(N,maxlocal,"LCBOP:N");
     memory->create(M,maxlocal,"LCBOP:M");
   }
 
   allnum = list->inum + list->gnum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // store all SR neighs of owned and ghost atoms
   // scan full neighbor list of I
 
   ipage->reset();
 
   for (ii = 0; ii < allnum; ii++) {
     i = ilist[ii];
 
     n = 0;
     neighptr = ipage->vget();
 
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     N[i] = 0.0;
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       j &= NEIGHMASK;
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
 
       if (rsq < r_2_sq) {
         neighptr[n++] = j;
         N[i] += f_c(sqrt(rsq),r_1,r_2,&dS);
       }
     }
 
     SR_firstneigh[i] = neighptr;
     SR_numneigh[i] = n;
     ipage->vgot(n);
     if (ipage->status())
       error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
   }
 
   // calculate M_i
 
   for (ii = 0; ii < allnum; ii++) {
     i = ilist[ii];
 
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     M[i] = 0.0;
 
     jlist = SR_firstneigh[i];
     jnum = SR_numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
 
       if (rsq < r_2_sq) {
         double f_c_ij = f_c(sqrt(rsq),r_1,r_2,&dS);
         double Nji = N[j]-f_c_ij;
         // F(xij) = 1-f_c_LR(Nji, 2,3,&dummy)
         M[i] += f_c_ij * ( 1-f_c_LR(Nji, 2,3,&dS) );
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
   Short range forces and energy
 ------------------------------------------------------------------------- */
 
 void PairLCBOP::FSR(int eflag, int vflag)
 {
   int i,j,jj,ii,inum;
   tagint itag,jtag;
   double delx,dely,delz,fpair,xtmp,ytmp,ztmp;
   double r_sq,rijmag,f_c_ij,df_c_ij;
   double VR,dVRdi,VA,Bij,dVAdi,dVA;
   double del[3];
   int *ilist,*SR_neighs;
 
   double **x = atom->x;
   double **f = atom->f;
   tagint *tag = atom->tag;
   int nlocal = atom->nlocal;
   int newton_pair = force->newton_pair;
 
   inum = list->inum;
   ilist = list->ilist;
 
   // two-body interactions from SR neighbor list, skip half of them
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     itag = tag[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     SR_neighs = SR_firstneigh[i];
 
     for (jj = 0; jj < SR_numneigh[i]; jj++) {
       j = SR_neighs[jj];
       jtag = tag[j];
 
       if (itag > jtag) {
         if ((itag+jtag) % 2 == 0) continue;
       } else if (itag < jtag) {
         if ((itag+jtag) % 2 == 1) continue;
       } else {
         if (x[j][2] < ztmp) continue;
         if (x[j][2] == ztmp && x[j][1] < ytmp) continue;
         if (x[j][2] == ztmp && x[j][1] == ytmp && x[j][0] < xtmp) continue;
       }
 
       delx = x[i][0] - x[j][0];
       dely = x[i][1] - x[j][1];
       delz = x[i][2] - x[j][2];
       r_sq = delx*delx + dely*dely + delz*delz;
       rijmag = sqrt(r_sq);
       f_c_ij = f_c( rijmag,r_1,r_2,&df_c_ij );
       if( f_c_ij <= TOL ) continue;
 
       VR = A*exp(-alpha*rijmag);
       dVRdi = -alpha*VR;
       dVRdi = dVRdi*f_c_ij + df_c_ij*VR; // VR -> VR * f_c_ij
       VR *= f_c_ij;
 
       VA = dVA = 0.0;
       {
         double term = B_1 * exp(-beta_1*rijmag);
         VA += term;
         dVA += -beta_1 * term;
         term = B_2 * exp(-beta_2*rijmag);
         VA += term;
         dVA += -beta_2 * term;
       }
       dVA = dVA*f_c_ij + df_c_ij*VA; // VA -> VA * f_c_ij
       VA *= f_c_ij;
       del[0] = delx;
       del[1] = dely;
       del[2] = delz;
       Bij = bondorder(i,j,del,rijmag,VA,f,vflag_atom);
       dVAdi = Bij*dVA;
 
       // F = (dVRdi+dVAdi)*(-grad rijmag)
       // grad_i rijmag =  \vec{rij} /rijmag
       // grad_j rijmag = -\vec{rij} /rijmag
       fpair = -(dVRdi-dVAdi) / rijmag;
       f[i][0] += delx*fpair;
       f[i][1] += dely*fpair;
       f[i][2] += delz*fpair;
       f[j][0] -= delx*fpair;
       f[j][1] -= dely*fpair;
       f[j][2] -= delz*fpair;
 
       double evdwl=0.0;
       if (eflag) evdwl = VR - Bij*VA;
       if (evflag) ev_tally(i,j,nlocal,newton_pair,
         evdwl,0.0,fpair,delx,dely,delz);
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    compute long range forces and energy
 ------------------------------------------------------------------------- */
 
 void PairLCBOP::FLR(int eflag, int vflag)
 {
   int i,j,jj,ii;
   tagint itag,jtag;
   double delx,dely,delz,fpair,xtmp,ytmp,ztmp;
   double r_sq,rijmag,f_c_ij,df_c_ij;
   double V,dVdi;
 
   double **x = atom->x;
   double **f = atom->f;
   tagint *tag = atom->tag;
   int nlocal = atom->nlocal;
   int newton_pair = force->newton_pair;
 
   int inum = list->inum;
   int *ilist = list->ilist;
   int *numneigh = list->numneigh;
   int **firstneigh = list->firstneigh;
 
   // two-body interactions from full neighbor list, skip half of them
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     itag = tag[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     int *neighs = firstneigh[i];
 
     for (jj = 0; jj < numneigh[i]; jj++) {
       j = neighs[jj];
       j &= NEIGHMASK;
       jtag = tag[j];
 
       if (itag > jtag) {
         if ((itag+jtag) % 2 == 0) continue;
       } else if (itag < jtag) {
         if ((itag+jtag) % 2 == 1) continue;
       } else {
         if (x[j][2] < ztmp) continue;
         if (x[j][2] == ztmp && x[j][1] < ytmp) continue;
         if (x[j][2] == ztmp && x[j][1] == ytmp && x[j][0] < xtmp) continue;
       }
 
       delx = x[i][0] - x[j][0];
       dely = x[i][1] - x[j][1];
       delz = x[i][2] - x[j][2];
       r_sq = delx*delx + dely*dely + delz*delz;
       rijmag = sqrt(r_sq);
       f_c_ij = 1-f_c( rijmag,r_1,r_2,&df_c_ij );
       df_c_ij = -df_c_ij;
       // derivative may be inherited from previous call, see f_c_LR definition
       f_c_ij *= f_c_LR( rijmag, r_1_LR, r_2_LR, &df_c_ij );
       if( f_c_ij <= TOL ) continue;
 
       V = dVdi = 0;
       if( rijmag<r_0 ) {
         double exp_part = exp( -lambda_1*(rijmag-r_0) );
         V = eps_1*( exp_part*exp_part - 2*exp_part) + v_1;
         dVdi = 2*eps_1*lambda_1*exp_part*( 1-exp_part );
       } else {
         double exp_part = exp( -lambda_2*(rijmag-r_0) );
         V = eps_2*( exp_part*exp_part - 2*exp_part) + v_2;
         dVdi = 2*eps_2*lambda_2*exp_part*( 1-exp_part );
       }
       dVdi = dVdi*f_c_ij + df_c_ij*V; // V -> V * f_c_ij
       V *= f_c_ij;
 
       // F = (dVdi)*(-grad rijmag)
       // grad_i rijmag =  \vec{rij} /rijmag
       // grad_j rijmag = -\vec{rij} /rijmag
       fpair = -dVdi / rijmag;
       f[i][0] += delx*fpair;
       f[i][1] += dely*fpair;
       f[i][2] += delz*fpair;
       f[j][0] -= delx*fpair;
       f[j][1] -= dely*fpair;
       f[j][2] -= delz*fpair;
 
       double evdwl=0.0;
       if (eflag) evdwl = V;
       if (evflag) ev_tally(i,j,nlocal,newton_pair,
         evdwl,0.0,fpair,delx,dely,delz);
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    forces for Nij and Mij
 ------------------------------------------------------------------------- */
 
 void PairLCBOP::FNij( int i, int j, double factor, double **f, int vflag_atom ) {
   int atomi = i;
   int atomj = j;
   int *SR_neighs = SR_firstneigh[i];
   double **x = atom->x;
   for( int k=0; k<SR_numneigh[i]; k++ ) {
     int atomk = SR_neighs[k];
     if (atomk != atomj) {
       double rik[3];
       rik[0] = x[atomi][0]-x[atomk][0];
       rik[1] = x[atomi][1]-x[atomk][1];
       rik[2] = x[atomi][2]-x[atomk][2];
       double riksq = (rik[0]*rik[0])+(rik[1]*rik[1])+(rik[2]*rik[2]);
       if( riksq > r_1*r_1 ) { // &&  riksq < r_2*r_2, if second condition not fulfilled neighbor would not be in the list
         double rikmag = sqrt(riksq);
         double df_c_ik;
         f_c( rikmag, r_1, r_2, &df_c_ik );
 
         // F = factor*df_c_ik*(-grad rikmag)
         // grad_i rikmag =  \vec{rik} /rikmag
         // grad_k rikmag = -\vec{rik} /rikmag
         double fpair = -factor*df_c_ik / rikmag;
         f[atomi][0] += rik[0]*fpair;
         f[atomi][1] += rik[1]*fpair;
         f[atomi][2] += rik[2]*fpair;
         f[atomk][0] -= rik[0]*fpair;
         f[atomk][1] -= rik[1]*fpair;
         f[atomk][2] -= rik[2]*fpair;
 
         if (vflag_atom) v_tally2(atomi,atomk,fpair,rik);
       }
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLCBOP::FMij( int i, int j, double factor, double **f, int vflag_atom ) {
   int atomi = i;
   int atomj = j;
   int *SR_neighs = SR_firstneigh[i];
   double **x = atom->x;
   for( int k=0; k<SR_numneigh[i]; k++ ) {
     int atomk = SR_neighs[k];
     if (atomk != atomj) {
       double rik[3];
       rik[0] = x[atomi][0]-x[atomk][0];
       rik[1] = x[atomi][1]-x[atomk][1];
       rik[2] = x[atomi][2]-x[atomk][2];
       double rikmag = sqrt((rik[0]*rik[0])+(rik[1]*rik[1])+(rik[2]*rik[2]));
       double df_c_ik;
       double f_c_ik = f_c( rikmag, r_1, r_2, &df_c_ik );
       double Nki = N[k]-(f_c_ik);
 //      double Mij = M[i] - f_c_ij*( 1-f_c(Nji, 2,3,&dummy) );
       double dF=0;
       double Fx = 1-f_c_LR(Nki, 2,3,&dF);
       dF = -dF;
 
       if( df_c_ik > TOL ) {
         double factor2 = factor*df_c_ik*Fx;
         // F = factor2*(-grad rikmag)
         // grad_i rikmag =  \vec{rik} /rikmag
         // grad_k rikmag = -\vec{rik} /rikmag
         double fpair = -factor2 / rikmag;
         f[atomi][0] += rik[0]*fpair;
         f[atomi][1] += rik[1]*fpair;
         f[atomi][2] += rik[2]*fpair;
         f[atomk][0] -= rik[0]*fpair;
         f[atomk][1] -= rik[1]*fpair;
         f[atomk][2] -= rik[2]*fpair;
         if (vflag_atom) v_tally2(atomi,atomk,fpair,rik);
       }
 
       if( dF > TOL ) {
         double factor2 = factor*f_c_ik*dF;
         FNij( atomk, atomi, factor2, f, vflag_atom );
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    Bij function
 ------------------------------------------------------------------------- */
 
 double PairLCBOP::bondorder(int i, int j, double rij[3],
     double rijmag, double VA,
     double **f, int vflag_atom)
 {
 
   double bij, bji;
   /* bij & bji */{
     double rji[3];
     rji[0] = -rij[0]; rji[1] = -rij[1]; rji[2] = -rij[2];
     bij = b(i,j,rij,rijmag,VA,f,vflag_atom);
     bji = b(j,i,rji,rijmag,VA,f,vflag_atom);
   }
 
   double Fij_conj;
   /* F_conj */{
     double dummy;
 
     double df_c_ij;
     double f_c_ij = f_c( rijmag, r_1, r_2, &df_c_ij );
     double Nij = MIN( 3, N[i]-(f_c_ij) );
     double Nji = MIN( 3, N[j]-(f_c_ij) );
 
     // F(xij) = 1-f_c(Nji, 2,3,&dummy)
     double Mij = M[i] - f_c_ij*( 1-f_c(Nji, 2,3,&dummy) );
     double Mji = M[j] - f_c_ij*( 1-f_c(Nij, 2,3,&dummy) );
     Mij = MIN( Mij, 3 );
     Mji = MIN( Mji, 3 );
 
     double Nij_el, dNij_el_dNij, dNij_el_dMij;
     double Nji_el, dNji_el_dNji, dNji_el_dMji;
     {
       double num_Nij_el = 4 - Mij;
       double num_Nji_el = 4 - Mji;
       double den_Nij_el = Nij + 1 - Mij;
       double den_Nji_el = Nji + 1 - Mji;
       Nij_el = num_Nij_el / den_Nij_el;
       Nji_el = num_Nji_el / den_Nji_el;
       dNij_el_dNij = -Nij_el/den_Nij_el;
       dNji_el_dNji = -Nji_el/den_Nji_el;
       dNij_el_dMij = ( -1 + Nij_el ) /den_Nij_el;
       dNji_el_dMji = ( -1 + Nji_el ) /den_Nji_el;
     }
 
     double Nconj;
     double dNconj_dNij;
     double dNconj_dNji;
     double dNconj_dNel;
     {
       double num_Nconj = ( Nij+1 )*( Nji+1 )*( Nij_el+Nji_el ) - 4*( Nij+Nji+2);
       double den_Nconj = Nij*( 3-Nij )*( Nji+1 ) + Nji*( 3-Nji )*( Nij+1 ) + eps;
       Nconj = num_Nconj / den_Nconj;
       if( Nconj <= 0 ) {
         Nconj = 0;
         dNconj_dNij = 0;
         dNconj_dNji = 0;
         dNconj_dNel = 0;
       } else if( Nconj >= 1 ) {
         Nconj = 1;
         dNconj_dNij = 0;
         dNconj_dNji = 0;
         dNconj_dNel = 0;
       } else {
         dNconj_dNij = (
             ( (Nji+1)*(Nij_el + Nji_el)-4)
             - Nconj*( (Nji+1)*(3-2*Nij) + Nji*(3-Nji) )
           ) /den_Nconj;
         dNconj_dNji = (
             ( (Nij+1)*(Nji_el + Nij_el)-4)
             - Nconj*( (Nij+1)*(3-2*Nji) + Nij*(3-Nij) )
           ) /den_Nconj;
         dNconj_dNel = (Nij+1)*(Nji+1) / den_Nconj;
       }
     }
 
     double dF_dNij, dF_dNji, dF_dNconj;
     Fij_conj = F_conj( Nij, Nji, Nconj, &dF_dNij, &dF_dNji, &dF_dNconj );
 
     /*forces for Nij*/
     if( 3-Nij > TOL ) {
       double factor = -VA*0.5*( dF_dNij + dF_dNconj*( dNconj_dNij + dNconj_dNel*dNij_el_dNij ) );
       FNij( i, j, factor, f, vflag_atom );
     }
     /*forces for Nji*/
     if( 3-Nji > TOL ) {
       double factor = -VA*0.5*( dF_dNji + dF_dNconj*( dNconj_dNji + dNconj_dNel*dNji_el_dNji ) );
       FNij( j, i, factor, f, vflag_atom );
     }
     /*forces for Mij*/
     if( 3-Mij > TOL ) {
       double factor = -VA*0.5*( dF_dNconj*dNconj_dNel*dNij_el_dMij );
       FMij( i, j, factor, f, vflag_atom );
     }
     if( 3-Mji > TOL ) {
       double factor = -VA*0.5*( dF_dNconj*dNconj_dNel*dNji_el_dMji );
       FMij( j, i, factor, f, vflag_atom );
     }
   }
 
 
   double Bij = 0.5*( bij + bji + Fij_conj );
   return Bij;
 }
 
 /* ----------------------------------------------------------------------
   bij function
 ------------------------------------------------------------------------- */
 
 double PairLCBOP::b(int i, int j, double rij[3],
                  double rijmag, double VA,
                  double **f, int vflag_atom) {
   int *SR_neighs = SR_firstneigh[i];
   double **x = atom->x;
   int atomi = i;
   int atomj = j;
 
   //calculate bij magnitude
   double bij = 1.0;
   for (int k = 0; k < SR_numneigh[i]; k++) {
     int atomk = SR_neighs[k];
     if (atomk != atomj) {
       double rik[3];
       rik[0] = x[atomi][0]-x[atomk][0];
       rik[1] = x[atomi][1]-x[atomk][1];
       rik[2] = x[atomi][2]-x[atomk][2];
       double rikmag = sqrt((rik[0]*rik[0])+(rik[1]*rik[1])+(rik[2]*rik[2]));
       double delta_ijk = rijmag-rikmag;
       double dummy;
       double f_c_ik = f_c( rikmag, r_1, r_2, &dummy );
       double cos_ijk = ((rij[0]*rik[0])+(rij[1]*rik[1])+(rij[2]*rik[2]))
                 / (rijmag*rikmag);
       cos_ijk = MIN(cos_ijk,1.0);
       cos_ijk = MAX(cos_ijk,-1.0);
 
       double G = gSpline(cos_ijk,   &dummy);
       double H = hSpline(delta_ijk, &dummy);
       bij += (f_c_ik*G*H);
     }
   }
   bij = pow( bij, -delta );
 
   // bij forces
 
   for (int k = 0; k < SR_numneigh[i]; k++) {
     int atomk = SR_neighs[k];
     if (atomk != atomj) {
       double rik[3];
       rik[0] = x[atomi][0]-x[atomk][0];
       rik[1] = x[atomi][1]-x[atomk][1];
       rik[2] = x[atomi][2]-x[atomk][2];
       double rikmag = sqrt((rik[0]*rik[0])+(rik[1]*rik[1])+(rik[2]*rik[2]));
       double delta_ijk = rijmag-rikmag;
       double df_c_ik;
       double f_c_ik = f_c( rikmag, r_1, r_2, &df_c_ik );
       double cos_ijk = ((rij[0]*rik[0])+(rij[1]*rik[1])+(rij[2]*rik[2]))
                 / (rijmag*rikmag);
       cos_ijk = MIN(cos_ijk,1.0);
       cos_ijk = MAX(cos_ijk,-1.0);
 
       double dcos_ijk_dri[3],dcos_ijk_drj[3],dcos_ijk_drk[3];
       dcos_ijk_drj[0] = -rik[0] / (rijmag*rikmag)
              + cos_ijk * rij[0] / (rijmag*rijmag);
       dcos_ijk_drj[1] = -rik[1] / (rijmag*rikmag)
              + cos_ijk * rij[1] / (rijmag*rijmag);
       dcos_ijk_drj[2] = -rik[2] / (rijmag*rikmag)
              + cos_ijk * rij[2] / (rijmag*rijmag);
 
       dcos_ijk_drk[0] = -rij[0] / (rijmag*rikmag)
              + cos_ijk * rik[0] / (rikmag*rikmag);
       dcos_ijk_drk[1] = -rij[1] / (rijmag*rikmag)
              + cos_ijk * rik[1] / (rikmag*rikmag);
       dcos_ijk_drk[2] = -rij[2] / (rijmag*rikmag)
              + cos_ijk * rik[2] / (rikmag*rikmag);
 
       dcos_ijk_dri[0] = -dcos_ijk_drk[0] - dcos_ijk_drj[0];
       dcos_ijk_dri[1] = -dcos_ijk_drk[1] - dcos_ijk_drj[1];
       dcos_ijk_dri[2] = -dcos_ijk_drk[2] - dcos_ijk_drj[2];
 
       double dG, dH;
       double G = gSpline( cos_ijk,   &dG );
       double H = hSpline( delta_ijk, &dH );
       double tmp = -VA*0.5*(-0.5*bij*bij*bij);
 
       double fi[3], fj[3], fk[3];
 
       double tmp2 = -tmp*df_c_ik*G*H/rikmag;
       // F = tmp*df_c_ik*G*H*(-grad rikmag)
       // grad_i rikmag =  \vec{rik} /rikmag
       // grad_k rikmag = -\vec{rik} /rikmag
       fi[0] =  tmp2*rik[0];
       fi[1] =  tmp2*rik[1];
       fi[2] =  tmp2*rik[2];
       fk[0] = -tmp2*rik[0];
       fk[1] = -tmp2*rik[1];
       fk[2] = -tmp2*rik[2];
 
 
       tmp2 = -tmp*f_c_ik*dG*H;
       // F = tmp*f_c_ik*dG*H*(-grad cos_ijk)
       // grad_i cos_ijk = dcos_ijk_dri
       // grad_j cos_ijk = dcos_ijk_drj
       // grad_k cos_ijk = dcos_ijk_drk
       fi[0] += tmp2*dcos_ijk_dri[0];
       fi[1] += tmp2*dcos_ijk_dri[1];
       fi[2] += tmp2*dcos_ijk_dri[2];
       fj[0] =  tmp2*dcos_ijk_drj[0];
       fj[1] =  tmp2*dcos_ijk_drj[1];
       fj[2] =  tmp2*dcos_ijk_drj[2];
       fk[0] += tmp2*dcos_ijk_drk[0];
       fk[1] += tmp2*dcos_ijk_drk[1];
       fk[2] += tmp2*dcos_ijk_drk[2];
 
       tmp2 = -tmp*f_c_ik*G*dH;
       // F = tmp*f_c_ik*G*dH*(-grad delta_ijk)
       // grad_i delta_ijk =  \vec{rij} /rijmag - \vec{rik} /rijmag
       // grad_j delta_ijk = -\vec{rij} /rijmag
       // grad_k delta_ijk =  \vec{rik} /rikmag
       fi[0] += tmp2*( rij[0]/rijmag - rik[0]/rikmag );
       fi[1] += tmp2*( rij[1]/rijmag - rik[1]/rikmag );
       fi[2] += tmp2*( rij[2]/rijmag - rik[2]/rikmag );
       fj[0] += tmp2*( -rij[0]/rijmag );
       fj[1] += tmp2*( -rij[1]/rijmag );
       fj[2] += tmp2*( -rij[2]/rijmag );
       fk[0] += tmp2*( rik[0]/rikmag );
       fk[1] += tmp2*( rik[1]/rikmag );
       fk[2] += tmp2*( rik[2]/rikmag );
 
       f[atomi][0] += fi[0]; f[atomi][1] += fi[1]; f[atomi][2] += fi[2];
       f[atomj][0] += fj[0]; f[atomj][1] += fj[1]; f[atomj][2] += fj[2];
       f[atomk][0] += fk[0]; f[atomk][1] += fk[1]; f[atomk][2] += fk[2];
 
       if (vflag_atom) {
         double rji[3], rki[3];
         rji[0] = -rij[0]; rji[1] = -rij[1]; rji[2] = -rij[2];
         rki[0] = -rik[0]; rki[1] = -rik[1]; rki[2] = -rik[2];
         v_tally3(atomi,atomj,atomk,fj,fk,rji,rki);
       }
     }
   }
 
   return bij;
 }
 
 /* ----------------------------------------------------------------------
    spline interpolation for G
 ------------------------------------------------------------------------- */
 
 void PairLCBOP::g_decompose_x( double x, size_t *field_idx, double *offset ) {
   size_t i=0;
   while( i<(6-1) && !( x<gX[i+1] ) )
     i++;
   *field_idx = i;
   *offset = ( x - gX[i] );
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairLCBOP::gSpline( double x, double *dgdc ) {
   size_t i;
   double x_n;
   g_decompose_x( x, &i, &x_n );
   double sum = 0;
   *dgdc = 0;
   double pow_x_n = 1.0;
   for( size_t j=0; j<5; j++ ) {
       sum += gC[j][i]*pow_x_n;
       *dgdc += gC[j+1][i]*(j+1)*pow_x_n;
       pow_x_n *= x_n;
   }
   sum += gC[5][i]*pow_x_n;
   return sum;
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairLCBOP::hSpline( double x, double *dhdx ) {
   if( x < -d ) {
       double z = kappa*( x+d );
       double y = pow(z, 10.0);
       double w = pow( 1+y, -0.1 );
       *dhdx = kappa*L*w/(1+y);
       return L*( 1 + z*w );
     }
     if( x > d ) {
       *dhdx = R_1;
       return R_0 + R_1*( x-d );
     }
 
       double result = 1 + C_1*x;
       *dhdx    = C_1*result;
     double pow_x = x*x;
       result  += 0.5*C_1*C_1*pow_x;
     pow_x *= x;// == x^3
       *dhdx   += 4*C_4*pow_x;
     pow_x *= x;// == x^4
       result  += C_4*pow_x;
     pow_x *= x;// == x^5
       *dhdx   += 6*C_6*pow_x;
     pow_x *= x;// == x^5
       result += C_6*pow_x;
     return result;
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairLCBOP::F_conj( double N_ij, double N_ji, double N_conj_ij, double *dFN_ij, double *dFN_ji, double *dFN_ij_conj ) {
   size_t N_ij_int         = MIN( static_cast<size_t>( floor( N_ij ) ), 2 ); // 2 is the highest number of field
   size_t N_ji_int         = MIN( static_cast<size_t>( floor( N_ji ) ), 2 ); // cast to suppress warning
   double x                = N_ij - N_ij_int;
   double y                = N_ji - N_ji_int;
   const TF_conj_field &f0 = F_conj_field[N_ij_int][N_ji_int][0];
   const TF_conj_field &f1 = F_conj_field[N_ij_int][N_ji_int][1];
   double F_0 = 0;
   double F_1 = 0;
   double dF_0_dx = 0, dF_0_dy = 0;
   double dF_1_dx = 0, dF_1_dy = 0;
   double l, r;
   if( N_conj_ij < 1 ) {
     l = (1-y)* (1-x);   r = ( f0.f_00 + x*     x*   f0.f_x_10   + y*     y*   f0.f_y_01 );    F_0 += l*r;   dF_0_dx += -(1-y)*r +l*2*x*    f0.f_x_10;    dF_0_dy += -(1-x)*r +l*2*y*    f0.f_y_01;
     l = (1-y)*  x;      r = ( f0.f_10 + (1-x)*(1-x)*f0.f_x_00   + y*     y*   f0.f_y_11 );    F_0 += l*r;   dF_0_dx +=  (1-y)*r -l*2*(1-x)*f0.f_x_00;    dF_0_dy += -x*    r +l*2*y*    f0.f_y_11;
     l = y*     (1-x);   r = ( f0.f_01 + x*     x*   f0.f_x_11   + (1-y)*(1-y)*f0.f_y_00 );    F_0 += l*r;   dF_0_dx += -y*    r +l*2*x*    f0.f_x_11;    dF_0_dy +=  (1-x)*r -l*2*(1-y)*f0.f_y_00;
     l = y*      x;      r = ( f0.f_11 + (1-x)*(1-x)*f0.f_x_01   + (1-y)*(1-y)*f0.f_y_10 );    F_0 += l*r;   dF_0_dx +=  y*    r -l*2*(1-x)*f0.f_x_01;    dF_0_dy +=  x*    r -l*2*(1-y)*f0.f_y_10;
   }
   if( N_conj_ij > 0 ) {
     l = (1-y)* (1-x);   r = ( f0.f_00 + x*     x*   f1.f_x_10   + y*     y*   f1.f_y_01 );    F_1 += l*r;   dF_1_dx += -(1-y)*r +l*2*x*    f1.f_x_10;    dF_1_dy += -(1-x)*r +l*2*y*    f1.f_y_01;
     l = (1-y)*  x;      r = ( f1.f_10 + (1-x)*(1-x)*f1.f_x_00   + y*     y*   f1.f_y_11 );    F_1 += l*r;   dF_1_dx +=  (1-y)*r -l*2*(1-x)*f1.f_x_00;    dF_1_dy += -x*    r +l*2*y*    f1.f_y_11;
     l = y*     (1-x);   r = ( f1.f_01 + x*     x*   f1.f_x_11   + (1-y)*(1-y)*f1.f_y_00 );    F_1 += l*r;   dF_1_dx += -y*    r +l*2*x*    f1.f_x_11;    dF_1_dy +=  (1-x)*r -l*2*(1-y)*f1.f_y_00;
     l = y*      x;      r = ( f1.f_11 + (1-x)*(1-x)*f1.f_x_01   + (1-y)*(1-y)*f1.f_y_10 );    F_1 += l*r;   dF_1_dx +=  y*    r -l*2*(1-x)*f1.f_x_01;    dF_1_dy +=  x*    r -l*2*(1-y)*f1.f_y_10;
   }
   double result = (1-N_conj_ij)*F_0 + N_conj_ij*F_1;
   *dFN_ij = (1-N_conj_ij)*dF_0_dx + N_conj_ij*dF_1_dx;
   *dFN_ji = (1-N_conj_ij)*dF_0_dy + N_conj_ij*dF_1_dy;
   *dFN_ij_conj = -F_0 + F_1;
 
   return result;
 }
 
 /* ----------------------------------------------------------------------
    read LCBOP potential file
 ------------------------------------------------------------------------- */
 
 void PairLCBOP::read_file(char *filename)
 {
   int i,k,l;
   char s[MAXLINE];
 
   MPI_Comm_rank(world,&me);
 
   // read file on proc 0
 
   if (me == 0) {
     FILE *fp = force->open_potential(filename);
     if (fp == NULL) {
       char str[128];
       sprintf(str,"Cannot open LCBOP potential file %s",filename);
       error->one(FLERR,str);
     }
 
     // skip initial comment lines
 
     while (1) {
       fgets(s,MAXLINE,fp);
       if (s[0] != '#') break;
     }
 
     // read parameters
 
     fgets(s,MAXLINE,fp);    sscanf(s,"%lg",&r_1);
     fgets(s,MAXLINE,fp);    sscanf(s,"%lg",&r_2);
     fgets(s,MAXLINE,fp);    sscanf(s,"%lg",&gamma_1);
     fgets(s,MAXLINE,fp);    sscanf(s,"%lg",&A);
     fgets(s,MAXLINE,fp);    sscanf(s,"%lg",&B_1);
     fgets(s,MAXLINE,fp);    sscanf(s,"%lg",&B_2);
     fgets(s,MAXLINE,fp);    sscanf(s,"%lg",&alpha);
     fgets(s,MAXLINE,fp);    sscanf(s,"%lg",&beta_1);
     fgets(s,MAXLINE,fp);    sscanf(s,"%lg",&beta_2);
     fgets(s,MAXLINE,fp);    sscanf(s,"%lg",&d);
     fgets(s,MAXLINE,fp);    sscanf(s,"%lg",&C_1);
     fgets(s,MAXLINE,fp);    sscanf(s,"%lg",&C_4);
     fgets(s,MAXLINE,fp);    sscanf(s,"%lg",&C_6);
     fgets(s,MAXLINE,fp);    sscanf(s,"%lg",&L);
     fgets(s,MAXLINE,fp);    sscanf(s,"%lg",&kappa);
     fgets(s,MAXLINE,fp);    sscanf(s,"%lg",&R_0);
     fgets(s,MAXLINE,fp);    sscanf(s,"%lg",&R_1);
     fgets(s,MAXLINE,fp);    sscanf(s,"%lg",&r_0);
     fgets(s,MAXLINE,fp);    sscanf(s,"%lg",&r_1_LR);
     fgets(s,MAXLINE,fp);    sscanf(s,"%lg",&r_2_LR);
     fgets(s,MAXLINE,fp);    sscanf(s,"%lg",&v_1);
     fgets(s,MAXLINE,fp);    sscanf(s,"%lg",&v_2);
     fgets(s,MAXLINE,fp);    sscanf(s,"%lg",&eps_1);
     fgets(s,MAXLINE,fp);    sscanf(s,"%lg",&eps_2);
     fgets(s,MAXLINE,fp);    sscanf(s,"%lg",&lambda_1);
     fgets(s,MAXLINE,fp);    sscanf(s,"%lg",&lambda_2);
     fgets(s,MAXLINE,fp);    sscanf(s,"%lg",&eps);
     fgets(s,MAXLINE,fp);    sscanf(s,"%lg",&delta);
 
     while (1) {
       fgets(s,MAXLINE,fp);
       if (s[0] != '#') break;
     }
 
     // F_conj spline
 
     for (k = 0; k < 2; k++) { // 2 values of N_ij_conj
       for (l = 0; l < 3; l++) { // 3 types of data: f, dfdx, dfdy
         for (i = 0; i < 4; i++) { // 4x4 matrix
           fgets(s,MAXLINE,fp);
           sscanf(s,"%lg %lg %lg %lg",
             &F_conj_data[i][0][k][l],
             &F_conj_data[i][1][k][l],
             &F_conj_data[i][2][k][l],
             &F_conj_data[i][3][k][l]);
         }
         while (1) { fgets(s,MAXLINE,fp); if (s[0] != '#') break; }
       }
     }
 
     // G spline
 
     // x coordinates of mesh points
     fgets(s,MAXLINE,fp);
     sscanf( s,"%lg %lg %lg %lg %lg %lg",
       &gX[0], &gX[1], &gX[2],
       &gX[3], &gX[4], &gX[5] );
 
     for (i = 0; i < 6; i++) { // for each power in polynomial
       fgets(s,MAXLINE,fp);
       sscanf( s,"%lg %lg %lg %lg %lg",
         &gC[i][0], &gC[i][1], &gC[i][2],
         &gC[i][3], &gC[i][4] );
     }
 
     fclose(fp);
   }
 
   // broadcast read-in and setup values
 
   MPI_Bcast(&r_1      ,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&r_2      ,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&gamma_1  ,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&A        ,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&B_1      ,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&B_2      ,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&alpha    ,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&beta_1   ,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&beta_2   ,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&d        ,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&C_1      ,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&C_4      ,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&C_6      ,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&L        ,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&kappa    ,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&R_0      ,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&R_1      ,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&r_0      ,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&r_1_LR   ,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&r_2_LR   ,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&v_1      ,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&v_2      ,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&eps_1    ,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&eps_2    ,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&lambda_1 ,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&lambda_2 ,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&eps      ,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&delta    ,1,MPI_DOUBLE,0,world);
 
   MPI_Bcast(&gX[0]    ,6,MPI_DOUBLE,0,world);
   MPI_Bcast(&gC[0][0] ,(6-1)*(5+1),MPI_DOUBLE,0,world);
 
   MPI_Bcast(&F_conj_data[0],6*4*4,MPI_DOUBLE,0,world);
 }
 
 /* ----------------------------------------------------------------------
    init coefficients for TF_conj
 ------------------------------------------------------------------------- */
 
 #include <iostream>
 #include <fstream>
 #include <functional>
 template< class function > void print_function( double x_0, double x_1, size_t n, function f, std::ostream &stream ) {
   double dx = (x_1-x_0)/n;
   for( double x=x_0; x<=x_1+0.0001; x+=dx ) {
     double f_val, df;
     f_val = f(x, &df);
     stream << x << " " << f_val << "   " << df << std::endl;
   }
   stream << std::endl;
 }
 
 void PairLCBOP::spline_init() {
   for( size_t N_conj_ij=0; N_conj_ij<2; N_conj_ij++ ) // N_conj_ij
   for( size_t N_ij=0; N_ij<4-1; N_ij++ )
   for( size_t N_ji=0; N_ji<4-1; N_ji++ ) {
     TF_conj_field &field = F_conj_field[N_ij][N_ji][N_conj_ij];
     field.f_00 = F_conj_data[N_ij  ][N_ji  ][N_conj_ij][0];
     field.f_01 = F_conj_data[N_ij  ][N_ji+1][N_conj_ij][0];
     field.f_10 = F_conj_data[N_ij+1][N_ji  ][N_conj_ij][0];
     field.f_11 = F_conj_data[N_ij+1][N_ji+1][N_conj_ij][0];
 
     field.f_x_00 =   F_conj_data[N_ij  ][N_ji  ][N_conj_ij][1] - field.f_10 + field.f_00;
     field.f_x_01 =   F_conj_data[N_ij  ][N_ji+1][N_conj_ij][1] - field.f_11 + field.f_01;
     field.f_x_10 = -(F_conj_data[N_ij+1][N_ji  ][N_conj_ij][1] - field.f_10 + field.f_00);
     field.f_x_11 = -(F_conj_data[N_ij+1][N_ji+1][N_conj_ij][1] - field.f_11 + field.f_01);
 
     field.f_y_00 =   F_conj_data[N_ij  ][N_ji  ][N_conj_ij][2] - field.f_01 + field.f_00;
     field.f_y_01 = -(F_conj_data[N_ij  ][N_ji+1][N_conj_ij][2] - field.f_01 + field.f_00);
     field.f_y_10 =   F_conj_data[N_ij+1][N_ji  ][N_conj_ij][2] - field.f_11 + field.f_10;
     field.f_y_11 = -(F_conj_data[N_ij+1][N_ji+1][N_conj_ij][2] - field.f_11 + field.f_10);
   }
 
   //some testing:
 //  std::ofstream file( "test.txt" );
 //    file << "gX:\n";
 //    file  << gX[0] << " "
 //          << gX[1] << " "
 //          << gX[2] << " "
 //          << gX[3] << " "
 //          << gX[4] << " "
 //          << gX[5] << std::endl;
 //    file << "gC:\n";
 //    for( int i=0; i<6; i++ )
 //      file  << gC[i][0] << " "
 //            << gC[i][1] << " "
 //            << gC[i][2] << " "
 //            << gC[i][3] << " "
 //            << gC[i][4] << std::endl;
 //    file << std::endl;
 //
 //    file << "gamma_1 = " << gamma_1 << std::endl;
 //    file << "r_1 = " << r_1 << std::endl;
 //    file << "r_2 = " << r_2 << std::endl;
 //    file << "A = " << A << std::endl;
 //    file << "B_1 = " << B_1 << std::endl;
 //    file << "B_2 = " << B_2 << std::endl;
 //    file << "alpha = " << alpha << std::endl;
 //    file << "beta_1 = " << beta_1 << std::endl;
 //    file << "beta_2 = " << beta_2 << std::endl;
 //    file << "d = " << d << std::endl;
 //    file << "C_1 = " << C_1 << std::endl;
 //    file << "C_4 = " << C_4 << std::endl;
 //    file << "C_6 = " << C_6 << std::endl;
 //    file << "L = " << L << std::endl;
 //    file << "kappa = " << kappa << std::endl;
 //    file << "R_0 = " << R_0 << std::endl;
 //    file << "R_1 = " << R_1 << std::endl;
 //    file << "r_0 = " << r_0 << std::endl;
 //    file << "r_1_LR = " << r_1_LR << std::endl;
 //    file << "r_2_LR = " << r_2_LR << std::endl;
 //    file << "v_1 = " << v_1 << std::endl;
 //    file << "v_2 = " << v_2 << std::endl;
 //    file << "eps_1 = " << eps_1 << std::endl;
 //    file << "eps_2 = " << eps_2 << std::endl;
 //    file << "lambda_1 = " << lambda_1 << std::endl;
 //    file << "lambda_2 = " << lambda_2 << std::endl;
 //    file << "eps = " << eps << std::endl;
 //    file << "delta = " << delta << std::endl;
 //    file << "r_2_sq = " << r_2_sq << std::endl;
 //    file << std::endl;
 //
 //
 //    file << "gSpline:" << std::endl;
 //    double x_1 = 1, x_0 = -1;
 //    int n=1000;
 //    double dx = (x_1-x_0)/n;
 //    for( double x=x_0; x<=x_1+0.0001; x+=dx ) {
 //      double g, dg;
 //      g = gSpline(x, &dg);
 //      file << x << " " << g << " " << dg << std::endl;
 //    }
 //    file << std::endl;
 //
 //  file << "hSpline:" << std::endl;
 //  double x_1 = 1, x_0 = -1;
 //  int n=1000;
 //  double dx = (x_1-x_0)/n;
 //  for( double x=x_0; x<=x_1+0.0001; x+=dx ) {
 //    double h, dh;
 //    h = hSpline(x, &dh);
 //    file << x << " " << h << " " << dh << std::endl;
 //  }
 //  file << std::endl;
 //
 //
 //  file << "f_c:" << std::endl;
 //  double x_1 = 4, x_0 = 0;
 //  int n=1000;
 //  double dx = (x_1-x_0)/n;
 //  for( double x=x_0; x<=x_1+0.0001; x+=dx ) {
 //    double f, df;
 //    f = f_c(x, r_1, r_2, &df);
 //    file << x << " " << f << " " << df << std::endl;
 //  }
 //  file << std::endl;
 
 //  file << "F_conj_data\n";
 //  for (int k = 0; k < 2; k++) { // 2 values of N_ij_conj
 //    for (int l = 0; l < 3; l++) { // 3 types of data: f, dfdx, dfdy
 //      for (int i = 0; i < 4; i++) { // 4x4 matrix
 //        file
 //          << F_conj_data[i][0][k][l] << " "
 //          << F_conj_data[i][1][k][l] << " "
 //          << F_conj_data[i][2][k][l] << " "
 //          << F_conj_data[i][3][k][l] << std::endl;
 //      }
 //    file << std::endl;
 //    }
 //  }
 //
 //
 //  file << "F_conj_0 ";
 //  double dummy;
 //  for( double y=0; y<=3.0+0.0001; y+=0.1 )
 //    file << y << " ";
 //  file << std::endl;
 //  for( double x=0; x<=3.0+0.0001; x+=0.1 ){
 //    file << x << " ";
 //    for( double y=0; y<=3.0+0.0001; y+=0.1 )
 //      file << F_conj( x, y, 0, &dummy, &dummy, &dummy ) << " ";
 //    file << std::endl;
 //  }
 //
 //  file << "dF0_dx ";
 //  for( double y=0; y<=3.0+0.0001; y+=0.1 )
 //    file << y << " ";
 //  file << std::endl;
 //  for( double x=0; x<=3.0+0.0001; x+=0.1 ){
 //    file << x << " ";
 //    for( double y=0; y<=3.0+0.0001; y+=0.1 ) {
 //      double dF_dx;
 //      F_conj( x, y, 0, &dF_dx, &dummy, &dummy );
 //      file << dF_dx << " ";
 //    }
 //    file << std::endl;
 //  }
 //
 //
 //
 //  file << "F_conj_1 ";
 //  for( double y=0; y<=3.0+0.0001; y+=0.1 )
 //    file << y << " ";
 //  file << std::endl;
 //  for( double x=0; x<=3.0+0.0001; x+=0.1 ){
 //    file << x << " ";
 //    for( double y=0; y<=3.0+0.0001; y+=0.1 )
 //      file << F_conj( x, y, 0, &dummy, &dummy, &dummy ) << " ";
 //    file << std::endl;
 //  }
 
 }
 
 /* ----------------------------------------------------------------------
    memory usage of local atom-based arrays
 ------------------------------------------------------------------------- */
 
 double PairLCBOP::memory_usage()
 {
   double bytes = 0.0;
   bytes += maxlocal * sizeof(int);
   bytes += maxlocal * sizeof(int *);
 
   for (int i = 0; i < comm->nthreads; i++)
     bytes += ipage[i].size();
 
   bytes += 3*maxlocal * sizeof(double);
   return bytes;
 }
diff --git a/src/MANYBODY/pair_nb3b_harmonic.cpp b/src/MANYBODY/pair_nb3b_harmonic.cpp
index b3512f508..b42bb0e9a 100644
--- a/src/MANYBODY/pair_nb3b_harmonic.cpp
+++ b/src/MANYBODY/pair_nb3b_harmonic.cpp
@@ -1,531 +1,531 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under 
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Todd R. Zeitler (SNL)
    (based on Stillinger-Weber pair style)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_nb3b_harmonic.h"
 #include "atom.h"
 #include "neighbor.h"
 #include "neigh_request.h"
 #include "force.h"
 #include "comm.h"
 #include "memory.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
 #define MAXLINE 1024
 #define DELTA 4
 #define SMALL 0.001
 #define PI 3.141592653589793238462643383279 
 
 /* ---------------------------------------------------------------------- */
 
 PairNb3bHarmonic::PairNb3bHarmonic(LAMMPS *lmp) : Pair(lmp)
 {
   single_enable = 0;
   one_coeff = 1;
   manybody_flag = 1;
 
   nelements = 0;
   elements = NULL;
   nparams = maxparam = 0;
   params = NULL;
   elem2param = NULL;
 }
 
 /* ----------------------------------------------------------------------
    check if allocated, since class can be destructed when incomplete
 ------------------------------------------------------------------------- */
 
 PairNb3bHarmonic::~PairNb3bHarmonic()
 {
   if (elements)
     for (int i = 0; i < nelements; i++) delete [] elements[i];
   delete [] elements;
   memory->destroy(params);
   memory->destroy(elem2param);
 
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
     delete [] map;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairNb3bHarmonic::compute(int eflag, int vflag)
 {
   int i,j,k,ii,jj,kk,inum,jnum,jnumm1;
   int itype,jtype,ktype,ijparam,ikparam,ijkparam;
   tagint itag,jtag;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl;
   double rsq,rsq1,rsq2;
   double delr1[3],delr2[3],fj[3],fk[3];
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   evdwl = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   double **x = atom->x;
   double **f = atom->f;
   tagint *tag = atom->tag;
   int *type = atom->type;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over full neighbor list of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     itag = tag[i];
     itype = map[type[i]];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
 
     // two-body interactions, skip half of them
 
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       j &= NEIGHMASK;
       jtag = tag[j];
 
       if (itag > jtag) {
 	if ((itag+jtag) % 2 == 0) continue;
       } else if (itag < jtag) {
 	if ((itag+jtag) % 2 == 1) continue;
       } else {
 	if (x[j][2] < ztmp) continue;
 	if (x[j][2] == ztmp && x[j][1] < ytmp) continue;
 	if (x[j][2] == ztmp && x[j][1] == ytmp && x[j][0] < xtmp) continue;
       }
 
       jtype = map[type[j]];
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
 
       ijparam = elem2param[itype][jtype][jtype];
       if (rsq > params[ijparam].cutsq) continue;
 
     }
 
     jnumm1 = jnum - 1;
 
     for (jj = 0; jj < jnumm1; jj++) {
       j = jlist[jj];
       j &= NEIGHMASK;
       jtype = map[type[j]];
       ijparam = elem2param[itype][jtype][jtype];
       delr1[0] = x[j][0] - xtmp;
       delr1[1] = x[j][1] - ytmp;
       delr1[2] = x[j][2] - ztmp;
       rsq1 = delr1[0]*delr1[0] + delr1[1]*delr1[1] + delr1[2]*delr1[2];
       if (rsq1 > params[ijparam].cutsq) continue;
 
       for (kk = jj+1; kk < jnum; kk++) {
 	k = jlist[kk];
 	k &= NEIGHMASK;
 	ktype = map[type[k]];
 	ikparam = elem2param[itype][ktype][ktype];
 	ijkparam = elem2param[itype][jtype][ktype];
 
 	delr2[0] = x[k][0] - xtmp;
 	delr2[1] = x[k][1] - ytmp;
 	delr2[2] = x[k][2] - ztmp;
 	rsq2 = delr2[0]*delr2[0] + delr2[1]*delr2[1] + delr2[2]*delr2[2];
 	if (rsq2 > params[ikparam].cutsq) continue;
 
         threebody(&params[ijparam],&params[ikparam],&params[ijkparam],
                   rsq1,rsq2,delr1,delr2,fj,fk,eflag,evdwl);
 
 	f[i][0] -= fj[0] + fk[0];
 	f[i][1] -= fj[1] + fk[1];
 	f[i][2] -= fj[2] + fk[2];
 	f[j][0] += fj[0];
 	f[j][1] += fj[1];
 	f[j][2] += fj[2];
 	f[k][0] += fk[0];
 	f[k][1] += fk[1];
 	f[k][2] += fk[2];
 
 	if (evflag) ev_tally3(i,j,k,evdwl,0.0,fj,fk,delr1,delr2);
       }
     }
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairNb3bHarmonic::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
   map = new int[n+1];
 }
 
 /* ----------------------------------------------------------------------
    global settings 
 ------------------------------------------------------------------------- */
 
 void PairNb3bHarmonic::settings(int narg, char **arg)
 {
   if (narg != 0) error->all(FLERR,"Illegal pair_style command");
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairNb3bHarmonic::coeff(int narg, char **arg)
 {
   int i,j,n;
 
   if (!allocated) allocate();
 
   if (narg != 3 + atom->ntypes)
     error->all(FLERR,"Incorrect args for pair coefficients");
 
   // insure I,J args are * *
 
   if (strcmp(arg[0],"*") != 0 || strcmp(arg[1],"*") != 0)
     error->all(FLERR,"Incorrect args for pair coefficients");
 
   // read args that map atom types to elements in potential file
   // map[i] = which element the Ith atom type is, -1 if NULL
   // nelements = # of unique elements
   // elements = list of element names
 
   if (elements) {
     for (i = 0; i < nelements; i++) delete [] elements[i];
     delete [] elements;
   }
   elements = new char*[atom->ntypes];
   for (i = 0; i < atom->ntypes; i++) elements[i] = NULL;
 
   nelements = 0;
   for (i = 3; i < narg; i++) {
     if (strcmp(arg[i],"NULL") == 0) {
       map[i-2] = -1;
       continue;
     }
     for (j = 0; j < nelements; j++)
       if (strcmp(arg[i],elements[j]) == 0) break;
     map[i-2] = j;
     if (j == nelements) {
       n = strlen(arg[i]) + 1;
       elements[j] = new char[n];
       strcpy(elements[j],arg[i]);
       nelements++;
     }
   }
 
   // read potential file and initialize potential parameters
   
   read_file(arg[2]);
   setup();
 
   // clear setflag since coeff() called once with I,J = * *
 
   n = atom->ntypes;
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   // set setflag i,j for type pairs where both are mapped to elements
 
   int count = 0;
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       if (map[i] >= 0 && map[j] >= 0) {
 	setflag[i][j] = 1;
 	count++;
       }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairNb3bHarmonic::init_style()
 {
   if (atom->tag_enable == 0)
     error->all(FLERR,"Pair style nb3b/harmonic requires atom IDs");
   if (force->newton_pair == 0)
     error->all(FLERR,"Pair style nb3b/harmonic requires newton pair on");
 
   // need a full neighbor list
 
-  int irequest = neighbor->request(this);
+  int irequest = neighbor->request(this,instance_me);
   neighbor->requests[irequest]->half = 0;
   neighbor->requests[irequest]->full = 1;
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairNb3bHarmonic::init_one(int i, int j)
 {
   if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set");
 
   return cutmax;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairNb3bHarmonic::read_file(char *file)
 {
   int params_per_line = 6;
   char **words = new char*[params_per_line+1];
 
   memory->sfree(params);
   params = NULL;
   nparams = maxparam = 0;
 
   // open file on proc 0
 
   FILE *fp = NULL;
   if (comm->me == 0) {
     fp = force->open_potential(file);
     if (fp == NULL) {
       char str[128];
       sprintf(str,"Cannot open nb3b/harmonic potential file %s",file);
       error->one(FLERR,str);
     }
   }
 
   // read each set of params from potential file
   // one set of params can span multiple lines
   // store params if all 3 element tags are in element list
 
   int n,nwords,ielement,jelement,kelement;
   char line[MAXLINE],*ptr;
   int eof = 0;
 
   while (1) {
     if (comm->me == 0) {
       ptr = fgets(line,MAXLINE,fp);
       if (ptr == NULL) {
 	eof = 1;
 	fclose(fp);
       } else n = strlen(line) + 1;
     }
     MPI_Bcast(&eof,1,MPI_INT,0,world);
     if (eof) break;
     MPI_Bcast(&n,1,MPI_INT,0,world);
     MPI_Bcast(line,n,MPI_CHAR,0,world);
 
     // strip comment, skip line if blank
 
     if ((ptr = strchr(line,'#'))) *ptr = '\0';
     nwords = atom->count_words(line);
     if (nwords == 0) continue;
 
     // concatenate additional lines until have params_per_line words
 
     while (nwords < params_per_line) {
       n = strlen(line);
       if (comm->me == 0) {
         ptr = fgets(&line[n],MAXLINE-n,fp);
         if (ptr == NULL) {
 	  eof = 1;
 	  fclose(fp);
         } else n = strlen(line) + 1;
       }
       MPI_Bcast(&eof,1,MPI_INT,0,world);
       if (eof) break;
       MPI_Bcast(&n,1,MPI_INT,0,world);
       MPI_Bcast(line,n,MPI_CHAR,0,world);
       if ((ptr = strchr(line,'#'))) *ptr = '\0';
       nwords = atom->count_words(line);
     }
 
     if (nwords != params_per_line)
       error->all(FLERR,"Incorrect format in nb3b/harmonic potential file");
 
     // words = ptrs to all words in line
 
     nwords = 0;
     words[nwords++] = strtok(line," \t\n\r\f");
     while ((words[nwords++] = strtok(NULL," \t\n\r\f"))) continue;
 
     // ielement,jelement,kelement = 1st args
     // if all 3 args are in element list, then parse this line
     // else skip to next entry in file
 
     for (ielement = 0; ielement < nelements; ielement++)
       if (strcmp(words[0],elements[ielement]) == 0) break;
     if (ielement == nelements) continue;
     for (jelement = 0; jelement < nelements; jelement++)
       if (strcmp(words[1],elements[jelement]) == 0) break;
     if (jelement == nelements) continue;
     for (kelement = 0; kelement < nelements; kelement++)
       if (strcmp(words[2],elements[kelement]) == 0) break;
     if (kelement == nelements) continue;
 
     // load up parameter settings and error check their values
 
     if (nparams == maxparam) {
       maxparam += DELTA;
       params = (Param *) memory->srealloc(params,maxparam*sizeof(Param),
 					  "pair:params");
     }
 
     params[nparams].ielement = ielement;
     params[nparams].jelement = jelement;
     params[nparams].kelement = kelement;
     params[nparams].k_theta = atof(words[3]);
     params[nparams].theta0 = atof(words[4]);
     params[nparams].cutoff = atof(words[5]);
 
     if (params[nparams].k_theta < 0.0 || params[nparams].theta0 < 0.0 ||
         params[nparams].cutoff < 0.0) 
       error->all(FLERR,"Illegal nb3b/harmonic parameter");
 
     nparams++;
   }
 
   delete [] words;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairNb3bHarmonic::setup()
 {
   int i,j,k,m,n;
   double rtmp;
 
   // set elem2param for all triplet combinations
   // must be a single exact match to lines read from file
   // do not allow for ACB in place of ABC
 
   memory->destroy(elem2param);
   memory->create(elem2param,nelements,nelements,nelements,"pair:elem2param");
 
   for (i = 0; i < nelements; i++)
     for (j = 0; j < nelements; j++)
       for (k = 0; k < nelements; k++) {
 	n = -1;
 	for (m = 0; m < nparams; m++) {
 	  if (i == params[m].ielement && j == params[m].jelement && 
 	      k == params[m].kelement) {
 	    if (n >= 0) error->all(FLERR,"Potential file has duplicate entry");
 	    n = m;
 	  }
 	}
 //	if (n < 0) error->all(FLERR,"Potential file is missing an entry");
 	elem2param[i][j][k] = n;
       }
 
   // compute parameter values derived from inputs
 
   // set cutsq using shortcut to reduce neighbor list for accelerated
   // calculations. cut must remain unchanged as it is a potential parameter
   // (cut = a*sigma) 
 
   for (m = 0; m < nparams; m++) {
 
     params[m].cut = params[m].cutoff;
     params[m].cutsq = params[m].cut * params[m].cut;
 
     params[m].theta0 = params[m].theta0 / 180.0 * PI;
 
   }
 
   // set cutmax to max of all params
 
   cutmax = 0.0;
   for (m = 0; m < nparams; m++) {
     rtmp = sqrt(params[m].cutsq);
     if (rtmp > cutmax) cutmax = rtmp;
   }
 }  
 
 /* ---------------------------------------------------------------------- */
 
 
 void PairNb3bHarmonic::threebody(Param *paramij, Param *paramik, 
                                  Param *paramijk,
                                  double rsq1, double rsq2,
                                  double *delr1, double *delr2,
                                  double *fj, double *fk, int eflag, double &eng)
 {
   double dtheta,tk;
   double r1,r2,c,s,a,a11,a12,a22;
 
   // angle (cos and sin)
   
   r1 = sqrt(rsq1);
   r2 = sqrt(rsq2);
   
   c = delr1[0]*delr2[0] + delr1[1]*delr2[1] + delr1[2]*delr2[2];
   c /= r1*r2;
   
   if (c > 1.0) c = 1.0;
   if (c < -1.0) c = -1.0;
   
   s = sqrt(1.0 - c*c);
   if (s < SMALL) s = SMALL;
   s = 1.0/s;
   
   // force & energy
   
   dtheta = acos(c) - paramijk->theta0;
   tk = paramijk->k_theta * dtheta;
   
   if (eflag) eng = tk*dtheta;
   
   a = -2.0 * tk * s;
   a11 = a*c / rsq1;
   a12 = -a / (r1*r2);
   a22 = a*c / rsq2;
   
   fj[0] = a11*delr1[0] + a12*delr2[0];
   fj[1] = a11*delr1[1] + a12*delr2[1];
   fj[2] = a11*delr1[2] + a12*delr2[2];
   fk[0] = a22*delr2[0] + a12*delr1[0];
   fk[1] = a22*delr2[1] + a12*delr1[1];
   fk[2] = a22*delr2[2] + a12*delr1[2];
 }
 
diff --git a/src/MANYBODY/pair_sw.cpp b/src/MANYBODY/pair_sw.cpp
index 554d524f0..a4b85177f 100755
--- a/src/MANYBODY/pair_sw.cpp
+++ b/src/MANYBODY/pair_sw.cpp
@@ -1,604 +1,604 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Aidan Thompson (SNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_sw.h"
 #include "atom.h"
 #include "neighbor.h"
 #include "neigh_request.h"
 #include "force.h"
 #include "comm.h"
 #include "memory.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
 #define MAXLINE 1024
 #define DELTA 4
 
 /* ---------------------------------------------------------------------- */
 
 PairSW::PairSW(LAMMPS *lmp) : Pair(lmp)
 {
   single_enable = 0;
   restartinfo = 0;
   one_coeff = 1;
   manybody_flag = 1;
 
   nelements = 0;
   elements = NULL;
   nparams = maxparam = 0;
   params = NULL;
   elem2param = NULL;
 }
 
 /* ----------------------------------------------------------------------
    check if allocated, since class can be destructed when incomplete
 ------------------------------------------------------------------------- */
 
 PairSW::~PairSW()
 {
   if (elements)
     for (int i = 0; i < nelements; i++) delete [] elements[i];
   delete [] elements;
   memory->destroy(params);
   memory->destroy(elem2param);
 
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
     delete [] map;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairSW::compute(int eflag, int vflag)
 {
   int i,j,k,ii,jj,kk,inum,jnum,jnumm1;
   int itype,jtype,ktype,ijparam,ikparam,ijkparam;
   tagint itag,jtag;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
   double rsq,rsq1,rsq2;
   double delr1[3],delr2[3],fj[3],fk[3];
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   evdwl = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   double **x = atom->x;
   double **f = atom->f;
   tagint *tag = atom->tag;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   int newton_pair = force->newton_pair;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over full neighbor list of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     itag = tag[i];
     itype = map[type[i]];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
 
     // two-body interactions, skip half of them
 
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       j &= NEIGHMASK;
       jtag = tag[j];
 
       if (itag > jtag) {
         if ((itag+jtag) % 2 == 0) continue;
       } else if (itag < jtag) {
         if ((itag+jtag) % 2 == 1) continue;
       } else {
         if (x[j][2] < ztmp) continue;
         if (x[j][2] == ztmp && x[j][1] < ytmp) continue;
         if (x[j][2] == ztmp && x[j][1] == ytmp && x[j][0] < xtmp) continue;
       }
 
       jtype = map[type[j]];
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
 
       ijparam = elem2param[itype][jtype][jtype];
       if (rsq > params[ijparam].cutsq) continue;
 
       twobody(&params[ijparam],rsq,fpair,eflag,evdwl);
 
       f[i][0] += delx*fpair;
       f[i][1] += dely*fpair;
       f[i][2] += delz*fpair;
       f[j][0] -= delx*fpair;
       f[j][1] -= dely*fpair;
       f[j][2] -= delz*fpair;
 
       if (evflag) ev_tally(i,j,nlocal,newton_pair,
                            evdwl,0.0,fpair,delx,dely,delz);
     }
 
     jnumm1 = jnum - 1;
 
     for (jj = 0; jj < jnumm1; jj++) {
       j = jlist[jj];
       j &= NEIGHMASK;
       jtype = map[type[j]];
       ijparam = elem2param[itype][jtype][jtype];
       delr1[0] = x[j][0] - xtmp;
       delr1[1] = x[j][1] - ytmp;
       delr1[2] = x[j][2] - ztmp;
       rsq1 = delr1[0]*delr1[0] + delr1[1]*delr1[1] + delr1[2]*delr1[2];
       if (rsq1 > params[ijparam].cutsq) continue;
 
       for (kk = jj+1; kk < jnum; kk++) {
         k = jlist[kk];
         k &= NEIGHMASK;
         ktype = map[type[k]];
         ikparam = elem2param[itype][ktype][ktype];
         ijkparam = elem2param[itype][jtype][ktype];
 
         delr2[0] = x[k][0] - xtmp;
         delr2[1] = x[k][1] - ytmp;
         delr2[2] = x[k][2] - ztmp;
         rsq2 = delr2[0]*delr2[0] + delr2[1]*delr2[1] + delr2[2]*delr2[2];
         if (rsq2 > params[ikparam].cutsq) continue;
 
         threebody(&params[ijparam],&params[ikparam],&params[ijkparam],
                   rsq1,rsq2,delr1,delr2,fj,fk,eflag,evdwl);
 
         f[i][0] -= fj[0] + fk[0];
         f[i][1] -= fj[1] + fk[1];
         f[i][2] -= fj[2] + fk[2];
         f[j][0] += fj[0];
         f[j][1] += fj[1];
         f[j][2] += fj[2];
         f[k][0] += fk[0];
         f[k][1] += fk[1];
         f[k][2] += fk[2];
 
         if (evflag) ev_tally3(i,j,k,evdwl,0.0,fj,fk,delr1,delr2);
       }
     }
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairSW::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
   map = new int[n+1];
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairSW::settings(int narg, char **arg)
 {
   if (narg != 0) error->all(FLERR,"Illegal pair_style command");
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairSW::coeff(int narg, char **arg)
 {
   int i,j,n;
 
   if (!allocated) allocate();
 
   if (narg != 3 + atom->ntypes)
     error->all(FLERR,"Incorrect args for pair coefficients");
 
   // insure I,J args are * *
 
   if (strcmp(arg[0],"*") != 0 || strcmp(arg[1],"*") != 0)
     error->all(FLERR,"Incorrect args for pair coefficients");
 
   // read args that map atom types to elements in potential file
   // map[i] = which element the Ith atom type is, -1 if NULL
   // nelements = # of unique elements
   // elements = list of element names
 
   if (elements) {
     for (i = 0; i < nelements; i++) delete [] elements[i];
     delete [] elements;
   }
   elements = new char*[atom->ntypes];
   for (i = 0; i < atom->ntypes; i++) elements[i] = NULL;
 
   nelements = 0;
   for (i = 3; i < narg; i++) {
     if (strcmp(arg[i],"NULL") == 0) {
       map[i-2] = -1;
       continue;
     }
     for (j = 0; j < nelements; j++)
       if (strcmp(arg[i],elements[j]) == 0) break;
     map[i-2] = j;
     if (j == nelements) {
       n = strlen(arg[i]) + 1;
       elements[j] = new char[n];
       strcpy(elements[j],arg[i]);
       nelements++;
     }
   }
 
   // read potential file and initialize potential parameters
 
   read_file(arg[2]);
   setup();
 
   // clear setflag since coeff() called once with I,J = * *
 
   n = atom->ntypes;
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   // set setflag i,j for type pairs where both are mapped to elements
 
   int count = 0;
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       if (map[i] >= 0 && map[j] >= 0) {
         setflag[i][j] = 1;
         count++;
       }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairSW::init_style()
 {
   if (atom->tag_enable == 0)
     error->all(FLERR,"Pair style Stillinger-Weber requires atom IDs");
   if (force->newton_pair == 0)
     error->all(FLERR,"Pair style Stillinger-Weber requires newton pair on");
 
   // need a full neighbor list
 
-  int irequest = neighbor->request(this);
+  int irequest = neighbor->request(this,instance_me);
   neighbor->requests[irequest]->half = 0;
   neighbor->requests[irequest]->full = 1;
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairSW::init_one(int i, int j)
 {
   if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set");
 
   return cutmax;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairSW::read_file(char *file)
 {
   int params_per_line = 14;
   char **words = new char*[params_per_line+1];
 
   memory->sfree(params);
   params = NULL;
   nparams = maxparam = 0;
 
   // open file on proc 0
 
   FILE *fp;
   if (comm->me == 0) {
     fp = force->open_potential(file);
     if (fp == NULL) {
       char str[128];
       sprintf(str,"Cannot open Stillinger-Weber potential file %s",file);
       error->one(FLERR,str);
     }
   }
 
   // read each set of params from potential file
   // one set of params can span multiple lines
   // store params if all 3 element tags are in element list
 
   int n,nwords,ielement,jelement,kelement;
   char line[MAXLINE],*ptr;
   int eof = 0;
 
   while (1) {
     if (comm->me == 0) {
       ptr = fgets(line,MAXLINE,fp);
       if (ptr == NULL) {
         eof = 1;
         fclose(fp);
       } else n = strlen(line) + 1;
     }
     MPI_Bcast(&eof,1,MPI_INT,0,world);
     if (eof) break;
     MPI_Bcast(&n,1,MPI_INT,0,world);
     MPI_Bcast(line,n,MPI_CHAR,0,world);
 
     // strip comment, skip line if blank
 
     if ((ptr = strchr(line,'#'))) *ptr = '\0';
     nwords = atom->count_words(line);
     if (nwords == 0) continue;
 
     // concatenate additional lines until have params_per_line words
 
     while (nwords < params_per_line) {
       n = strlen(line);
       if (comm->me == 0) {
         ptr = fgets(&line[n],MAXLINE-n,fp);
         if (ptr == NULL) {
           eof = 1;
           fclose(fp);
         } else n = strlen(line) + 1;
       }
       MPI_Bcast(&eof,1,MPI_INT,0,world);
       if (eof) break;
       MPI_Bcast(&n,1,MPI_INT,0,world);
       MPI_Bcast(line,n,MPI_CHAR,0,world);
       if ((ptr = strchr(line,'#'))) *ptr = '\0';
       nwords = atom->count_words(line);
     }
 
     if (nwords != params_per_line)
       error->all(FLERR,"Incorrect format in Stillinger-Weber potential file");
 
     // words = ptrs to all words in line
 
     nwords = 0;
     words[nwords++] = strtok(line," \t\n\r\f");
     while ((words[nwords++] = strtok(NULL," \t\n\r\f"))) continue;
 
     // ielement,jelement,kelement = 1st args
     // if all 3 args are in element list, then parse this line
     // else skip to next entry in file
 
     for (ielement = 0; ielement < nelements; ielement++)
       if (strcmp(words[0],elements[ielement]) == 0) break;
     if (ielement == nelements) continue;
     for (jelement = 0; jelement < nelements; jelement++)
       if (strcmp(words[1],elements[jelement]) == 0) break;
     if (jelement == nelements) continue;
     for (kelement = 0; kelement < nelements; kelement++)
       if (strcmp(words[2],elements[kelement]) == 0) break;
     if (kelement == nelements) continue;
 
     // load up parameter settings and error check their values
 
     if (nparams == maxparam) {
       maxparam += DELTA;
       params = (Param *) memory->srealloc(params,maxparam*sizeof(Param),
                                           "pair:params");
     }
 
     params[nparams].ielement = ielement;
     params[nparams].jelement = jelement;
     params[nparams].kelement = kelement;
     params[nparams].epsilon = atof(words[3]);
     params[nparams].sigma = atof(words[4]);
     params[nparams].littlea = atof(words[5]);
     params[nparams].lambda = atof(words[6]);
     params[nparams].gamma = atof(words[7]);
     params[nparams].costheta = atof(words[8]);
     params[nparams].biga = atof(words[9]);
     params[nparams].bigb = atof(words[10]);
     params[nparams].powerp = atof(words[11]);
     params[nparams].powerq = atof(words[12]);
     params[nparams].tol = atof(words[13]);
 
     if (params[nparams].epsilon < 0.0 || params[nparams].sigma < 0.0 ||
         params[nparams].littlea < 0.0 || params[nparams].lambda < 0.0 ||
         params[nparams].gamma < 0.0 || params[nparams].biga < 0.0 ||
         params[nparams].bigb < 0.0 || params[nparams].powerp < 0.0 ||
         params[nparams].powerq < 0.0 || params[nparams].tol < 0.0)
       error->all(FLERR,"Illegal Stillinger-Weber parameter");
 
     nparams++;
   }
 
   delete [] words;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairSW::setup()
 {
   int i,j,k,m,n;
   double rtmp;
 
   // set elem2param for all triplet combinations
   // must be a single exact match to lines read from file
   // do not allow for ACB in place of ABC
 
   memory->destroy(elem2param);
   memory->create(elem2param,nelements,nelements,nelements,"pair:elem2param");
 
   for (i = 0; i < nelements; i++)
     for (j = 0; j < nelements; j++)
       for (k = 0; k < nelements; k++) {
         n = -1;
         for (m = 0; m < nparams; m++) {
           if (i == params[m].ielement && j == params[m].jelement &&
               k == params[m].kelement) {
             if (n >= 0) error->all(FLERR,"Potential file has duplicate entry");
             n = m;
           }
         }
         if (n < 0) error->all(FLERR,"Potential file is missing an entry");
         elem2param[i][j][k] = n;
       }
 
 
   // compute parameter values derived from inputs
 
   // set cutsq using shortcut to reduce neighbor list for accelerated
   // calculations. cut must remain unchanged as it is a potential parameter
   // (cut = a*sigma)
 
   for (m = 0; m < nparams; m++) {
     params[m].cut = params[m].sigma*params[m].littlea;
 
     rtmp = params[m].cut;
     if (params[m].tol > 0.0) {
       if (params[m].tol > 0.01) params[m].tol = 0.01;
       if (params[m].gamma < 1.0)
         rtmp = rtmp +
           params[m].gamma * params[m].sigma / log(params[m].tol);
       else rtmp = rtmp +
              params[m].sigma / log(params[m].tol);
     }
     params[m].cutsq = rtmp * rtmp;
 
     params[m].sigma_gamma = params[m].sigma*params[m].gamma;
     params[m].lambda_epsilon = params[m].lambda*params[m].epsilon;
     params[m].lambda_epsilon2 = 2.0*params[m].lambda*params[m].epsilon;
     params[m].c1 = params[m].biga*params[m].epsilon *
       params[m].powerp*params[m].bigb *
       pow(params[m].sigma,params[m].powerp);
     params[m].c2 = params[m].biga*params[m].epsilon*params[m].powerq *
       pow(params[m].sigma,params[m].powerq);
     params[m].c3 = params[m].biga*params[m].epsilon*params[m].bigb *
       pow(params[m].sigma,params[m].powerp+1.0);
     params[m].c4 = params[m].biga*params[m].epsilon *
       pow(params[m].sigma,params[m].powerq+1.0);
     params[m].c5 = params[m].biga*params[m].epsilon*params[m].bigb *
       pow(params[m].sigma,params[m].powerp);
     params[m].c6 = params[m].biga*params[m].epsilon *
       pow(params[m].sigma,params[m].powerq);
   }
 
   // set cutmax to max of all params
 
   cutmax = 0.0;
   for (m = 0; m < nparams; m++) {
     rtmp = sqrt(params[m].cutsq);
     if (rtmp > cutmax) cutmax = rtmp;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairSW::twobody(Param *param, double rsq, double &fforce,
                      int eflag, double &eng)
 {
   double r,rinvsq,rp,rq,rainv,rainvsq,expsrainv;
 
   r = sqrt(rsq);
   rinvsq = 1.0/rsq;
   rp = pow(r,-param->powerp);
   rq = pow(r,-param->powerq);
   rainv = 1.0 / (r - param->cut);
   rainvsq = rainv*rainv*r;
   expsrainv = exp(param->sigma * rainv);
   fforce = (param->c1*rp - param->c2*rq +
             (param->c3*rp -param->c4*rq) * rainvsq) * expsrainv * rinvsq;
   if (eflag) eng = (param->c5*rp - param->c6*rq) * expsrainv;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairSW::threebody(Param *paramij, Param *paramik, Param *paramijk,
                        double rsq1, double rsq2,
                        double *delr1, double *delr2,
                        double *fj, double *fk, int eflag, double &eng)
 {
   double r1,rinvsq1,rainv1,gsrainv1,gsrainvsq1,expgsrainv1;
   double r2,rinvsq2,rainv2,gsrainv2,gsrainvsq2,expgsrainv2;
   double rinv12,cs,delcs,delcssq,facexp,facrad,frad1,frad2;
   double facang,facang12,csfacang,csfac1,csfac2;
 
   r1 = sqrt(rsq1);
   rinvsq1 = 1.0/rsq1;
   rainv1 = 1.0/(r1 - paramij->cut);
   gsrainv1 = paramij->sigma_gamma * rainv1;
   gsrainvsq1 = gsrainv1*rainv1/r1;
   expgsrainv1 = exp(gsrainv1);
 
   r2 = sqrt(rsq2);
   rinvsq2 = 1.0/rsq2;
   rainv2 = 1.0/(r2 - paramik->cut);
   gsrainv2 = paramik->sigma_gamma * rainv2;
   gsrainvsq2 = gsrainv2*rainv2/r2;
   expgsrainv2 = exp(gsrainv2);
 
   rinv12 = 1.0/(r1*r2);
   cs = (delr1[0]*delr2[0] + delr1[1]*delr2[1] + delr1[2]*delr2[2]) * rinv12;
   delcs = cs - paramijk->costheta;
   delcssq = delcs*delcs;
 
   facexp = expgsrainv1*expgsrainv2;
 
   // facrad = sqrt(paramij->lambda_epsilon*paramik->lambda_epsilon) *
   //          facexp*delcssq;
 
   facrad = paramijk->lambda_epsilon * facexp*delcssq;
   frad1 = facrad*gsrainvsq1;
   frad2 = facrad*gsrainvsq2;
   facang = paramijk->lambda_epsilon2 * facexp*delcs;
   facang12 = rinv12*facang;
   csfacang = cs*facang;
   csfac1 = rinvsq1*csfacang;
 
   fj[0] = delr1[0]*(frad1+csfac1)-delr2[0]*facang12;
   fj[1] = delr1[1]*(frad1+csfac1)-delr2[1]*facang12;
   fj[2] = delr1[2]*(frad1+csfac1)-delr2[2]*facang12;
 
   csfac2 = rinvsq2*csfacang;
 
   fk[0] = delr2[0]*(frad2+csfac2)-delr1[0]*facang12;
   fk[1] = delr2[1]*(frad2+csfac2)-delr1[1]*facang12;
   fk[2] = delr2[2]*(frad2+csfac2)-delr1[2]*facang12;
 
   if (eflag) eng = facrad;
 }
diff --git a/src/MANYBODY/pair_tersoff.cpp b/src/MANYBODY/pair_tersoff.cpp
index 898252193..1b921359e 100755
--- a/src/MANYBODY/pair_tersoff.cpp
+++ b/src/MANYBODY/pair_tersoff.cpp
@@ -1,778 +1,778 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Aidan Thompson (SNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_tersoff.h"
 #include "atom.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "force.h"
 #include "comm.h"
 #include "memory.h"
 #include "error.h"
 
 #include "math_const.h"
 
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
 #define MAXLINE 1024
 #define DELTA 4
 
 /* ---------------------------------------------------------------------- */
 
 PairTersoff::PairTersoff(LAMMPS *lmp) : Pair(lmp)
 {
   single_enable = 0;
   restartinfo = 0;
   one_coeff = 1;
   manybody_flag = 1;
 
   nelements = 0;
   elements = NULL;
   nparams = maxparam = 0;
   params = NULL;
   elem2param = NULL;
 }
 
 /* ----------------------------------------------------------------------
    check if allocated, since class can be destructed when incomplete
 ------------------------------------------------------------------------- */
 
 PairTersoff::~PairTersoff()
 {
   if (elements)
     for (int i = 0; i < nelements; i++) delete [] elements[i];
   delete [] elements;
   memory->destroy(params);
   memory->destroy(elem2param);
 
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
     delete [] map;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairTersoff::compute(int eflag, int vflag)
 {
   int i,j,k,ii,jj,kk,inum,jnum;
   int itype,jtype,ktype,iparam_ij,iparam_ijk;
   tagint itag,jtag;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
   double rsq,rsq1,rsq2;
   double delr1[3],delr2[3],fi[3],fj[3],fk[3];
   double zeta_ij,prefactor;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   evdwl = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = vflag_atom = 0;
 
   double **x = atom->x;
   double **f = atom->f;
   tagint *tag = atom->tag;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   int newton_pair = force->newton_pair;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over full neighbor list of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     itag = tag[i];
     itype = map[type[i]];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
 
     // two-body interactions, skip half of them
 
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       j &= NEIGHMASK;
       jtag = tag[j];
 
       if (itag > jtag) {
         if ((itag+jtag) % 2 == 0) continue;
       } else if (itag < jtag) {
         if ((itag+jtag) % 2 == 1) continue;
       } else {
         if (x[j][2] < x[i][2]) continue;
         if (x[j][2] == ztmp && x[j][1] < ytmp) continue;
         if (x[j][2] == ztmp && x[j][1] == ytmp && x[j][0] < xtmp) continue;
       }
 
       jtype = map[type[j]];
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
 
       iparam_ij = elem2param[itype][jtype][jtype];
       if (rsq > params[iparam_ij].cutsq) continue;
 
       repulsive(&params[iparam_ij],rsq,fpair,eflag,evdwl);
 
       f[i][0] += delx*fpair;
       f[i][1] += dely*fpair;
       f[i][2] += delz*fpair;
       f[j][0] -= delx*fpair;
       f[j][1] -= dely*fpair;
       f[j][2] -= delz*fpair;
 
       if (evflag) ev_tally(i,j,nlocal,newton_pair,
                            evdwl,0.0,fpair,delx,dely,delz);
     }
 
     // three-body interactions
     // skip immediately if I-J is not within cutoff
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       j &= NEIGHMASK;
       jtype = map[type[j]];
       iparam_ij = elem2param[itype][jtype][jtype];
 
       delr1[0] = x[j][0] - xtmp;
       delr1[1] = x[j][1] - ytmp;
       delr1[2] = x[j][2] - ztmp;
       rsq1 = delr1[0]*delr1[0] + delr1[1]*delr1[1] + delr1[2]*delr1[2];
       if (rsq1 > params[iparam_ij].cutsq) continue;
 
       // accumulate bondorder zeta for each i-j interaction via loop over k
 
       zeta_ij = 0.0;
 
       for (kk = 0; kk < jnum; kk++) {
         if (jj == kk) continue;
         k = jlist[kk];
         k &= NEIGHMASK;
         ktype = map[type[k]];
         iparam_ijk = elem2param[itype][jtype][ktype];
 
         delr2[0] = x[k][0] - xtmp;
         delr2[1] = x[k][1] - ytmp;
         delr2[2] = x[k][2] - ztmp;
         rsq2 = delr2[0]*delr2[0] + delr2[1]*delr2[1] + delr2[2]*delr2[2];
         if (rsq2 > params[iparam_ijk].cutsq) continue;
 
         zeta_ij += zeta(&params[iparam_ijk],rsq1,rsq2,delr1,delr2);
       }
 
       // pairwise force due to zeta
 
       force_zeta(&params[iparam_ij],rsq1,zeta_ij,fpair,prefactor,eflag,evdwl);
 
       f[i][0] += delr1[0]*fpair;
       f[i][1] += delr1[1]*fpair;
       f[i][2] += delr1[2]*fpair;
       f[j][0] -= delr1[0]*fpair;
       f[j][1] -= delr1[1]*fpair;
       f[j][2] -= delr1[2]*fpair;
 
       if (evflag) ev_tally(i,j,nlocal,newton_pair,
                            evdwl,0.0,-fpair,-delr1[0],-delr1[1],-delr1[2]);
 
       // attractive term via loop over k
 
       for (kk = 0; kk < jnum; kk++) {
         if (jj == kk) continue;
         k = jlist[kk];
         k &= NEIGHMASK;
         ktype = map[type[k]];
         iparam_ijk = elem2param[itype][jtype][ktype];
 
         delr2[0] = x[k][0] - xtmp;
         delr2[1] = x[k][1] - ytmp;
         delr2[2] = x[k][2] - ztmp;
         rsq2 = delr2[0]*delr2[0] + delr2[1]*delr2[1] + delr2[2]*delr2[2];
         if (rsq2 > params[iparam_ijk].cutsq) continue;
 
         attractive(&params[iparam_ijk],prefactor,
                    rsq1,rsq2,delr1,delr2,fi,fj,fk);
 
         f[i][0] += fi[0];
         f[i][1] += fi[1];
         f[i][2] += fi[2];
         f[j][0] += fj[0];
         f[j][1] += fj[1];
         f[j][2] += fj[2];
         f[k][0] += fk[0];
         f[k][1] += fk[1];
         f[k][2] += fk[2];
 
         if (vflag_atom) v_tally3(i,j,k,fj,fk,delr1,delr2);
       }
     }
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairTersoff::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
   map = new int[n+1];
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairTersoff::settings(int narg, char **arg)
 {
   if (narg != 0) error->all(FLERR,"Illegal pair_style command");
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairTersoff::coeff(int narg, char **arg)
 {
   int i,j,n;
 
   if (!allocated) allocate();
 
   if (narg != 3 + atom->ntypes)
     error->all(FLERR,"Incorrect args for pair coefficients");
 
   // insure I,J args are * *
 
   if (strcmp(arg[0],"*") != 0 || strcmp(arg[1],"*") != 0)
     error->all(FLERR,"Incorrect args for pair coefficients");
 
   // read args that map atom types to elements in potential file
   // map[i] = which element the Ith atom type is, -1 if NULL
   // nelements = # of unique elements
   // elements = list of element names
 
   if (elements) {
     for (i = 0; i < nelements; i++) delete [] elements[i];
     delete [] elements;
   }
   elements = new char*[atom->ntypes];
   for (i = 0; i < atom->ntypes; i++) elements[i] = NULL;
 
   nelements = 0;
   for (i = 3; i < narg; i++) {
     if (strcmp(arg[i],"NULL") == 0) {
       map[i-2] = -1;
       continue;
     }
     for (j = 0; j < nelements; j++)
       if (strcmp(arg[i],elements[j]) == 0) break;
     map[i-2] = j;
     if (j == nelements) {
       n = strlen(arg[i]) + 1;
       elements[j] = new char[n];
       strcpy(elements[j],arg[i]);
       nelements++;
     }
   }
 
   // read potential file and initialize potential parameters
 
   read_file(arg[2]);
   setup();
 
   // clear setflag since coeff() called once with I,J = * *
 
   n = atom->ntypes;
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   // set setflag i,j for type pairs where both are mapped to elements
 
   int count = 0;
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       if (map[i] >= 0 && map[j] >= 0) {
         setflag[i][j] = 1;
         count++;
       }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairTersoff::init_style()
 {
   if (atom->tag_enable == 0)
     error->all(FLERR,"Pair style Tersoff requires atom IDs");
   if (force->newton_pair == 0)
     error->all(FLERR,"Pair style Tersoff requires newton pair on");
 
   // need a full neighbor list
 
-  int irequest = neighbor->request(this);
+  int irequest = neighbor->request(this,instance_me);
   neighbor->requests[irequest]->half = 0;
   neighbor->requests[irequest]->full = 1;
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairTersoff::init_one(int i, int j)
 {
   if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set");
 
   return cutmax;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairTersoff::read_file(char *file)
 {
   int params_per_line = 17;
   char **words = new char*[params_per_line+1];
 
   memory->sfree(params);
   params = NULL;
   nparams = maxparam = 0;
 
   // open file on proc 0
 
   FILE *fp;
   if (comm->me == 0) {
     fp = force->open_potential(file);
     if (fp == NULL) {
       char str[128];
       sprintf(str,"Cannot open Tersoff potential file %s",file);
       error->one(FLERR,str);
     }
   }
 
   // read each line out of file, skipping blank lines or leading '#'
   // store line of params if all 3 element tags are in element list
 
   int n,nwords,ielement,jelement,kelement;
   char line[MAXLINE],*ptr;
   int eof = 0;
 
   while (1) {
     if (comm->me == 0) {
       ptr = fgets(line,MAXLINE,fp);
       if (ptr == NULL) {
         eof = 1;
         fclose(fp);
       } else n = strlen(line) + 1;
     }
     MPI_Bcast(&eof,1,MPI_INT,0,world);
     if (eof) break;
     MPI_Bcast(&n,1,MPI_INT,0,world);
     MPI_Bcast(line,n,MPI_CHAR,0,world);
 
     // strip comment, skip line if blank
 
     if ((ptr = strchr(line,'#'))) *ptr = '\0';
     nwords = atom->count_words(line);
     if (nwords == 0) continue;
 
     // concatenate additional lines until have params_per_line words
 
     while (nwords < params_per_line) {
       n = strlen(line);
       if (comm->me == 0) {
         ptr = fgets(&line[n],MAXLINE-n,fp);
         if (ptr == NULL) {
           eof = 1;
           fclose(fp);
         } else n = strlen(line) + 1;
       }
       MPI_Bcast(&eof,1,MPI_INT,0,world);
       if (eof) break;
       MPI_Bcast(&n,1,MPI_INT,0,world);
       MPI_Bcast(line,n,MPI_CHAR,0,world);
       if ((ptr = strchr(line,'#'))) *ptr = '\0';
       nwords = atom->count_words(line);
     }
 
     if (nwords != params_per_line)
       error->all(FLERR,"Incorrect format in Tersoff potential file");
 
     // words = ptrs to all words in line
 
     nwords = 0;
     words[nwords++] = strtok(line," \t\n\r\f");
     while ((words[nwords++] = strtok(NULL," \t\n\r\f"))) continue;
 
     // ielement,jelement,kelement = 1st args
     // if all 3 args are in element list, then parse this line
     // else skip to next line
 
     for (ielement = 0; ielement < nelements; ielement++)
       if (strcmp(words[0],elements[ielement]) == 0) break;
     if (ielement == nelements) continue;
     for (jelement = 0; jelement < nelements; jelement++)
       if (strcmp(words[1],elements[jelement]) == 0) break;
     if (jelement == nelements) continue;
     for (kelement = 0; kelement < nelements; kelement++)
       if (strcmp(words[2],elements[kelement]) == 0) break;
     if (kelement == nelements) continue;
 
     // load up parameter settings and error check their values
 
     if (nparams == maxparam) {
       maxparam += DELTA;
       params = (Param *) memory->srealloc(params,maxparam*sizeof(Param),
                                           "pair:params");
     }
 
     params[nparams].ielement = ielement;
     params[nparams].jelement = jelement;
     params[nparams].kelement = kelement;
     params[nparams].powerm = atof(words[3]);
     params[nparams].gamma = atof(words[4]);
     params[nparams].lam3 = atof(words[5]);
     params[nparams].c = atof(words[6]);
     params[nparams].d = atof(words[7]);
     params[nparams].h = atof(words[8]);
     params[nparams].powern = atof(words[9]);
     params[nparams].beta = atof(words[10]);
     params[nparams].lam2 = atof(words[11]);
     params[nparams].bigb = atof(words[12]);
     params[nparams].bigr = atof(words[13]);
     params[nparams].bigd = atof(words[14]);
     params[nparams].lam1 = atof(words[15]);
     params[nparams].biga = atof(words[16]);
 
     // currently only allow m exponent of 1 or 3
 
     params[nparams].powermint = int(params[nparams].powerm);
 
     if (params[nparams].c < 0.0 || params[nparams].d < 0.0 ||
         params[nparams].powern < 0.0 || params[nparams].beta < 0.0 ||
         params[nparams].lam2 < 0.0 || params[nparams].bigb < 0.0 ||
         params[nparams].bigr < 0.0 ||params[nparams].bigd < 0.0 ||
         params[nparams].bigd > params[nparams].bigr ||
         params[nparams].lam1 < 0.0 || params[nparams].biga < 0.0 ||
         params[nparams].powerm - params[nparams].powermint != 0.0 ||
         (params[nparams].powermint != 3 && params[nparams].powermint != 1) ||
         params[nparams].gamma < 0.0)
       error->all(FLERR,"Illegal Tersoff parameter");
 
     nparams++;
   }
 
   delete [] words;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairTersoff::setup()
 {
   int i,j,k,m,n;
 
   // set elem2param for all element triplet combinations
   // must be a single exact match to lines read from file
   // do not allow for ACB in place of ABC
 
   memory->destroy(elem2param);
   memory->create(elem2param,nelements,nelements,nelements,"pair:elem2param");
 
   for (i = 0; i < nelements; i++)
     for (j = 0; j < nelements; j++)
       for (k = 0; k < nelements; k++) {
         n = -1;
         for (m = 0; m < nparams; m++) {
           if (i == params[m].ielement && j == params[m].jelement &&
               k == params[m].kelement) {
             if (n >= 0) error->all(FLERR,"Potential file has duplicate entry");
             n = m;
           }
         }
         if (n < 0) error->all(FLERR,"Potential file is missing an entry");
         elem2param[i][j][k] = n;
       }
 
 
   // compute parameter values derived from inputs
 
   for (m = 0; m < nparams; m++) {
     params[m].cut = params[m].bigr + params[m].bigd;
     params[m].cutsq = params[m].cut*params[m].cut;
 
     params[m].c1 = pow(2.0*params[m].powern*1.0e-16,-1.0/params[m].powern);
     params[m].c2 = pow(2.0*params[m].powern*1.0e-8,-1.0/params[m].powern);
     params[m].c3 = 1.0/params[m].c2;
     params[m].c4 = 1.0/params[m].c1;
   }
 
   // set cutmax to max of all params
 
   cutmax = 0.0;
   for (m = 0; m < nparams; m++)
     if (params[m].cut > cutmax) cutmax = params[m].cut;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairTersoff::repulsive(Param *param, double rsq, double &fforce,
                             int eflag, double &eng)
 {
   double r,tmp_fc,tmp_fc_d,tmp_exp;
 
   r = sqrt(rsq);
   tmp_fc = ters_fc(r,param);
   tmp_fc_d = ters_fc_d(r,param);
   tmp_exp = exp(-param->lam1 * r);
   fforce = -param->biga * tmp_exp * (tmp_fc_d - tmp_fc*param->lam1) / r;
   if (eflag) eng = tmp_fc * param->biga * tmp_exp;
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairTersoff::zeta(Param *param, double rsqij, double rsqik,
                          double *delrij, double *delrik)
 {
   double rij,rik,costheta,arg,ex_delr;
 
   rij = sqrt(rsqij);
   rik = sqrt(rsqik);
   costheta = (delrij[0]*delrik[0] + delrij[1]*delrik[1] +
               delrij[2]*delrik[2]) / (rij*rik);
 
   if (param->powermint == 3) arg = pow(param->lam3 * (rij-rik),3.0);
   else arg = param->lam3 * (rij-rik);
 
   if (arg > 69.0776) ex_delr = 1.e30;
   else if (arg < -69.0776) ex_delr = 0.0;
   else ex_delr = exp(arg);
 
   return ters_fc(rik,param) * ters_gijk(costheta,param) * ex_delr;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairTersoff::force_zeta(Param *param, double rsq, double zeta_ij,
                              double &fforce, double &prefactor,
                              int eflag, double &eng)
 {
   double r,fa,fa_d,bij;
 
   r = sqrt(rsq);
   fa = ters_fa(r,param);
   fa_d = ters_fa_d(r,param);
   bij = ters_bij(zeta_ij,param);
   fforce = 0.5*bij*fa_d / r;
   prefactor = -0.5*fa * ters_bij_d(zeta_ij,param);
   if (eflag) eng = 0.5*bij*fa;
 }
 
 /* ----------------------------------------------------------------------
    attractive term
    use param_ij cutoff for rij test
    use param_ijk cutoff for rik test
 ------------------------------------------------------------------------- */
 
 void PairTersoff::attractive(Param *param, double prefactor,
                              double rsqij, double rsqik,
                              double *delrij, double *delrik,
                              double *fi, double *fj, double *fk)
 {
   double rij_hat[3],rik_hat[3];
   double rij,rijinv,rik,rikinv;
 
   rij = sqrt(rsqij);
   rijinv = 1.0/rij;
   vec3_scale(rijinv,delrij,rij_hat);
 
   rik = sqrt(rsqik);
   rikinv = 1.0/rik;
   vec3_scale(rikinv,delrik,rik_hat);
 
   ters_zetaterm_d(prefactor,rij_hat,rij,rik_hat,rik,fi,fj,fk,param);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairTersoff::ters_fc(double r, Param *param)
 {
   double ters_R = param->bigr;
   double ters_D = param->bigd;
 
   if (r < ters_R-ters_D) return 1.0;
   if (r > ters_R+ters_D) return 0.0;
   return 0.5*(1.0 - sin(MY_PI2*(r - ters_R)/ters_D));
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairTersoff::ters_fc_d(double r, Param *param)
 {
   double ters_R = param->bigr;
   double ters_D = param->bigd;
 
   if (r < ters_R-ters_D) return 0.0;
   if (r > ters_R+ters_D) return 0.0;
   return -(MY_PI4/ters_D) * cos(MY_PI2*(r - ters_R)/ters_D);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairTersoff::ters_fa(double r, Param *param)
 {
   if (r > param->bigr + param->bigd) return 0.0;
   return -param->bigb * exp(-param->lam2 * r) * ters_fc(r,param);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairTersoff::ters_fa_d(double r, Param *param)
 {
   if (r > param->bigr + param->bigd) return 0.0;
   return param->bigb * exp(-param->lam2 * r) *
     (param->lam2 * ters_fc(r,param) - ters_fc_d(r,param));
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairTersoff::ters_bij(double zeta, Param *param)
 {
   double tmp = param->beta * zeta;
   if (tmp > param->c1) return 1.0/sqrt(tmp);
   if (tmp > param->c2)
     return (1.0 - pow(tmp,-param->powern) / (2.0*param->powern))/sqrt(tmp);
   if (tmp < param->c4) return 1.0;
   if (tmp < param->c3)
     return 1.0 - pow(tmp,param->powern)/(2.0*param->powern);
   return pow(1.0 + pow(tmp,param->powern), -1.0/(2.0*param->powern));
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairTersoff::ters_bij_d(double zeta, Param *param)
 {
   double tmp = param->beta * zeta;
   if (tmp > param->c1) return param->beta * -0.5*pow(tmp,-1.5);
   if (tmp > param->c2)
     return param->beta * (-0.5*pow(tmp,-1.5) *
                           (1.0 - 0.5*(1.0 +  1.0/(2.0*param->powern)) *
                            pow(tmp,-param->powern)));
   if (tmp < param->c4) return 0.0;
   if (tmp < param->c3)
     return -0.5*param->beta * pow(tmp,param->powern-1.0);
 
   double tmp_n = pow(tmp,param->powern);
   return -0.5 * pow(1.0+tmp_n, -1.0-(1.0/(2.0*param->powern)))*tmp_n / zeta;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairTersoff::ters_zetaterm_d(double prefactor,
                                   double *rij_hat, double rij,
                                   double *rik_hat, double rik,
                                   double *dri, double *drj, double *drk,
                                   Param *param)
 {
   double gijk,gijk_d,ex_delr,ex_delr_d,fc,dfc,cos_theta,tmp;
   double dcosdri[3],dcosdrj[3],dcosdrk[3];
 
   fc = ters_fc(rik,param);
   dfc = ters_fc_d(rik,param);
   if (param->powermint == 3) tmp = pow(param->lam3 * (rij-rik),3.0);
   else tmp = param->lam3 * (rij-rik);
 
   if (tmp > 69.0776) ex_delr = 1.e30;
   else if (tmp < -69.0776) ex_delr = 0.0;
   else ex_delr = exp(tmp);
 
   if (param->powermint == 3)
     ex_delr_d = 3.0*pow(param->lam3,3.0) * pow(rij-rik,2.0)*ex_delr;
   else ex_delr_d = param->lam3 * ex_delr;
 
   cos_theta = vec3_dot(rij_hat,rik_hat);
   gijk = ters_gijk(cos_theta,param);
   gijk_d = ters_gijk_d(cos_theta,param);
   costheta_d(rij_hat,rij,rik_hat,rik,dcosdri,dcosdrj,dcosdrk);
 
   // compute the derivative wrt Ri
   // dri = -dfc*gijk*ex_delr*rik_hat;
   // dri += fc*gijk_d*ex_delr*dcosdri;
   // dri += fc*gijk*ex_delr_d*(rik_hat - rij_hat);
 
   vec3_scale(-dfc*gijk*ex_delr,rik_hat,dri);
   vec3_scaleadd(fc*gijk_d*ex_delr,dcosdri,dri,dri);
   vec3_scaleadd(fc*gijk*ex_delr_d,rik_hat,dri,dri);
   vec3_scaleadd(-fc*gijk*ex_delr_d,rij_hat,dri,dri);
   vec3_scale(prefactor,dri,dri);
 
   // compute the derivative wrt Rj
   // drj = fc*gijk_d*ex_delr*dcosdrj;
   // drj += fc*gijk*ex_delr_d*rij_hat;
 
   vec3_scale(fc*gijk_d*ex_delr,dcosdrj,drj);
   vec3_scaleadd(fc*gijk*ex_delr_d,rij_hat,drj,drj);
   vec3_scale(prefactor,drj,drj);
 
   // compute the derivative wrt Rk
   // drk = dfc*gijk*ex_delr*rik_hat;
   // drk += fc*gijk_d*ex_delr*dcosdrk;
   // drk += -fc*gijk*ex_delr_d*rik_hat;
 
   vec3_scale(dfc*gijk*ex_delr,rik_hat,drk);
   vec3_scaleadd(fc*gijk_d*ex_delr,dcosdrk,drk,drk);
   vec3_scaleadd(-fc*gijk*ex_delr_d,rik_hat,drk,drk);
   vec3_scale(prefactor,drk,drk);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairTersoff::costheta_d(double *rij_hat, double rij,
                              double *rik_hat, double rik,
                              double *dri, double *drj, double *drk)
 {
   // first element is devative wrt Ri, second wrt Rj, third wrt Rk
 
   double cos_theta = vec3_dot(rij_hat,rik_hat);
 
   vec3_scaleadd(-cos_theta,rij_hat,rik_hat,drj);
   vec3_scale(1.0/rij,drj,drj);
   vec3_scaleadd(-cos_theta,rik_hat,rij_hat,drk);
   vec3_scale(1.0/rik,drk,drk);
   vec3_add(drj,drk,dri);
   vec3_scale(-1.0,dri,dri);
 }
diff --git a/src/MC/fix_bond_create.cpp b/src/MC/fix_bond_create.cpp
index 6b1041a2d..993672939 100755
--- a/src/MC/fix_bond_create.cpp
+++ b/src/MC/fix_bond_create.cpp
@@ -1,1444 +1,1444 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "mpi.h"
 #include "string.h"
 #include "stdlib.h"
 #include "fix_bond_create.h"
 #include "update.h"
 #include "respa.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "force.h"
 #include "pair.h"
 #include "comm.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "random_mars.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
 #define BIG 1.0e20
 #define DELTA 16
 
 /* ---------------------------------------------------------------------- */
 
 FixBondCreate::FixBondCreate(LAMMPS *lmp, int narg, char **arg) :
   Fix(lmp, narg, arg)
 {
   if (narg < 8) error->all(FLERR,"Illegal fix bond/create command");
 
   MPI_Comm_rank(world,&me);
 
   nevery = force->inumeric(FLERR,arg[3]);
   if (nevery <= 0) error->all(FLERR,"Illegal fix bond/create command");
 
   force_reneighbor = 1;
   next_reneighbor = -1;
   vector_flag = 1;
   size_vector = 2;
   global_freq = 1;
   extvector = 0;
 
   iatomtype = force->inumeric(FLERR,arg[4]);
   jatomtype = force->inumeric(FLERR,arg[5]);
   double cutoff = force->numeric(FLERR,arg[6]);
   btype = force->inumeric(FLERR,arg[7]);
 
   if (iatomtype < 1 || iatomtype > atom->ntypes ||
       jatomtype < 1 || jatomtype > atom->ntypes)
     error->all(FLERR,"Invalid atom type in fix bond/create command");
   if (cutoff < 0.0) error->all(FLERR,"Illegal fix bond/create command");
   if (btype < 1 || btype > atom->nbondtypes)
     error->all(FLERR,"Invalid bond type in fix bond/create command");
 
   cutsq = cutoff*cutoff;
 
   // optional keywords
 
   imaxbond = 0;
   inewtype = iatomtype;
   jmaxbond = 0;
   jnewtype = jatomtype;
   fraction = 1.0;
   int seed = 12345;
   atype = dtype = itype = 0;
 
   int iarg = 8;
   while (iarg < narg) {
     if (strcmp(arg[iarg],"iparam") == 0) {
       if (iarg+3 > narg) error->all(FLERR,"Illegal fix bond/create command");
       imaxbond = force->inumeric(FLERR,arg[iarg+1]);
       inewtype = force->inumeric(FLERR,arg[iarg+2]);
       if (imaxbond < 0) error->all(FLERR,"Illegal fix bond/create command");
       if (inewtype < 1 || inewtype > atom->ntypes)
         error->all(FLERR,"Invalid atom type in fix bond/create command");
       iarg += 3;
     } else if (strcmp(arg[iarg],"jparam") == 0) {
       if (iarg+3 > narg) error->all(FLERR,"Illegal fix bond/create command");
       jmaxbond = force->inumeric(FLERR,arg[iarg+1]);
       jnewtype = force->inumeric(FLERR,arg[iarg+2]);
       if (jmaxbond < 0) error->all(FLERR,"Illegal fix bond/create command");
       if (jnewtype < 1 || jnewtype > atom->ntypes)
         error->all(FLERR,"Invalid atom type in fix bond/create command");
       iarg += 3;
     } else if (strcmp(arg[iarg],"prob") == 0) {
       if (iarg+3 > narg) error->all(FLERR,"Illegal fix bond/create command");
       fraction = force->numeric(FLERR,arg[iarg+1]);
       seed = force->inumeric(FLERR,arg[iarg+2]);
       if (fraction < 0.0 || fraction > 1.0)
         error->all(FLERR,"Illegal fix bond/create command");
       if (seed <= 0) error->all(FLERR,"Illegal fix bond/create command");
       iarg += 3;
     } else if (strcmp(arg[iarg],"atype") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix bond/create command");
       atype = force->inumeric(FLERR,arg[iarg+1]);
       if (atype < 0) error->all(FLERR,"Illegal fix bond/create command");
       iarg += 2;
     } else if (strcmp(arg[iarg],"dtype") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix bond/create command");
       dtype = force->inumeric(FLERR,arg[iarg+1]);
       if (dtype < 0) error->all(FLERR,"Illegal fix bond/create command");
       iarg += 2;
     } else if (strcmp(arg[iarg],"itype") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix bond/create command");
       itype = force->inumeric(FLERR,arg[iarg+1]);
       if (itype < 0) error->all(FLERR,"Illegal fix bond/create command");
       iarg += 2;
     } else error->all(FLERR,"Illegal fix bond/create command");
   }
 
   // error check
 
   if (atom->molecular != 1)
     error->all(FLERR,"Cannot use fix bond/create with non-molecular systems");
   if (iatomtype == jatomtype &&
       ((imaxbond != jmaxbond) || (inewtype != jnewtype)))
     error->all(FLERR,
                "Inconsistent iparam/jparam values in fix bond/create command");
 
   // initialize Marsaglia RNG with processor-unique seed
 
   random = new RanMars(lmp,seed + me);
 
   // perform initial allocation of atom-based arrays
   // register with Atom class
   // bondcount values will be initialized in setup()
 
   bondcount = NULL;
   grow_arrays(atom->nmax);
   atom->add_callback(0);
   countflag = 0;
 
   // set comm sizes needed by this fix
   // forward is big due to comm of broken bonds and 1-2 neighbors
 
   comm_forward = MAX(2,2+atom->maxspecial);
   comm_reverse = 2;
 
   // allocate arrays local to this fix
 
   nmax = 0;
   partner = finalpartner = NULL;
   distsq = NULL;
 
   maxcreate = 0;
   created = NULL;
 
   // copy = special list for one atom
   // size = ms^2 + ms is sufficient
   // b/c in rebuild_special() neighs of all 1-2s are added,
   //   then a dedup(), then neighs of all 1-3s are added, then final dedup()
   // this means intermediate size cannot exceed ms^2 + ms
 
   int maxspecial = atom->maxspecial;
   copy = new tagint[maxspecial*maxspecial + maxspecial];
 
   // zero out stats
 
   createcount = 0;
   createcounttotal = 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 FixBondCreate::~FixBondCreate()
 {
   // unregister callbacks to this fix from Atom class
 
   atom->delete_callback(id,0);
 
   delete random;
 
   // delete locally stored arrays
 
   memory->destroy(bondcount);
   memory->destroy(partner);
   memory->destroy(finalpartner);
   memory->destroy(distsq);
   memory->destroy(created);
   delete [] copy;
 }
 
 /* ---------------------------------------------------------------------- */
 
 int FixBondCreate::setmask()
 {
   int mask = 0;
   mask |= POST_INTEGRATE;
   mask |= POST_INTEGRATE_RESPA;
   return mask;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixBondCreate::init()
 {
   if (strstr(update->integrate_style,"respa"))
     nlevels_respa = ((Respa *) update->integrate)->nlevels;
 
   // check cutoff for iatomtype,jatomtype
 
   if (force->pair == NULL || cutsq > force->pair->cutsq[iatomtype][jatomtype])
     error->all(FLERR,"Fix bond/create cutoff is longer than pairwise cutoff");
 
   // enable angle/dihedral/improper creation if atype/dtype/itype
   //   option was used and a force field has been specified
 
   if (atype && force->angle) {
     angleflag = 1;
     if (atype > atom->nangletypes) 
       error->all(FLERR,"Fix bond/create angle type is invalid");
   } else angleflag = 0;
 
   if (dtype && force->dihedral) {
     dihedralflag = 1;
     if (dtype > atom->ndihedraltypes) 
       error->all(FLERR,"Fix bond/create dihedral type is invalid");
   } else dihedralflag = 0;
 
   if (itype && force->improper) {
     improperflag = 1;
     if (itype > atom->nimpropertypes) 
       error->all(FLERR,"Fix bond/create improper type is invalid");
   } else improperflag = 0;
 
   if (force->improper) {
     if (force->improper_match("class2") || force->improper_match("ring"))
       error->all(FLERR,"Cannot yet use fix bond/create with this "
                  "improper style");
   }
 
   // need a half neighbor list, built every Nevery steps
 
-  int irequest = neighbor->request(this);
+  int irequest = neighbor->request(this,instance_me);
   neighbor->requests[irequest]->pair = 0;
   neighbor->requests[irequest]->fix = 1;
   neighbor->requests[irequest]->occasional = 1;
 
   lastcheck = -1;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixBondCreate::init_list(int id, NeighList *ptr)
 {
   list = ptr;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixBondCreate::setup(int vflag)
 {
   int i,j,m;
 
   // compute initial bondcount if this is first run
   // can't do this earlier, in constructor or init, b/c need ghost info
 
   if (countflag) return;
   countflag = 1;
 
   // count bonds stored with each bond I own
   // if newton bond is not set, just increment count on atom I
   // if newton bond is set, also increment count on atom J even if ghost
   // bondcount is long enough to tally ghost atom counts
 
   int *num_bond = atom->num_bond;
   int **bond_type = atom->bond_type;
   tagint **bond_atom = atom->bond_atom;
   int nlocal = atom->nlocal;
   int nghost = atom->nghost;
   int nall = nlocal + nghost;
   int newton_bond = force->newton_bond;
 
   for (i = 0; i < nall; i++) bondcount[i] = 0;
 
   for (i = 0; i < nlocal; i++)
     for (j = 0; j < num_bond[i]; j++) {
       if (bond_type[i][j] == btype) {
         bondcount[i]++;
         if (newton_bond) {
           m = atom->map(bond_atom[i][j]);
           if (m < 0) 
             error->one(FLERR,"Fix bond/create needs ghost atoms "
                        "from further away");
           bondcount[m]++;
         }
       }
     }
 
   // if newton_bond is set, need to sum bondcount
 
   commflag = 1;
   if (newton_bond) comm->reverse_comm_fix(this,1);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixBondCreate::post_integrate()
 {
   int i,j,k,m,n,ii,jj,inum,jnum,itype,jtype,n1,n2,n3,possible;
   double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
   int *ilist,*jlist,*numneigh,**firstneigh;
   tagint *slist;
 
   if (update->ntimestep % nevery) return;
 
   // check that all procs have needed ghost atoms within ghost cutoff
   // only if neighbor list has changed since last check
   // needs to be <= test b/c neighbor list could have been re-built in
   //   same timestep as last post_integrate() call, but afterwards
   // NOTE: no longer think is needed, due to error tests on atom->map()
   // NOTE: if delete, can also delete lastcheck and check_ghosts()
 
   //if (lastcheck <= neighbor->lastcall) check_ghosts();
 
   // acquire updated ghost atom positions
   // necessary b/c are calling this after integrate, but before Verlet comm
 
   comm->forward_comm();
 
   // forward comm of bondcount, so ghosts have it
 
   commflag = 1;
   comm->forward_comm_fix(this,1);
 
   // resize bond partner list and initialize it
   // probability array overlays distsq array
   // needs to be atom->nmax in length
 
   if (atom->nmax > nmax) {
     memory->destroy(partner);
     memory->destroy(finalpartner);
     memory->destroy(distsq);
     nmax = atom->nmax;
     memory->create(partner,nmax,"bond/create:partner");
     memory->create(finalpartner,nmax,"bond/create:finalpartner");
     memory->create(distsq,nmax,"bond/create:distsq");
     probability = distsq;
   }
 
   int nlocal = atom->nlocal;
   int nall = atom->nlocal + atom->nghost;
 
   for (i = 0; i < nall; i++) {
     partner[i] = 0;
     finalpartner[i] = 0;
     distsq[i] = BIG;
   }
 
   // loop over neighbors of my atoms
   // each atom sets one closest eligible partner atom ID to bond with
 
   double **x = atom->x;
   tagint *tag = atom->tag;
   tagint **bond_atom = atom->bond_atom;
   int *num_bond = atom->num_bond;
   int **nspecial = atom->nspecial;
   tagint **special = atom->special;
   int *mask = atom->mask;
   int *type = atom->type;
 
   neighbor->build_one(list,1);
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     if (!(mask[i] & groupbit)) continue;
     itype = type[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       j &= NEIGHMASK;
       if (!(mask[j] & groupbit)) continue;
       jtype = type[j];
 
       possible = 0;
       if (itype == iatomtype && jtype == jatomtype) {
         if ((imaxbond == 0 || bondcount[i] < imaxbond) &&
             (jmaxbond == 0 || bondcount[j] < jmaxbond))
           possible = 1;
       } else if (itype == jatomtype && jtype == iatomtype) {
         if ((jmaxbond == 0 || bondcount[i] < jmaxbond) &&
             (imaxbond == 0 || bondcount[j] < imaxbond))
           possible = 1;
       }
       if (!possible) continue;
 
       // do not allow a duplicate bond to be created
       // check 1-2 neighbors of atom I
 
       for (k = 0; k < nspecial[i][0]; k++)
         if (special[i][k] == tag[j]) possible = 0;
       if (!possible) continue;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       if (rsq >= cutsq) continue;
 
       if (rsq < distsq[i]) {
         partner[i] = tag[j];
         distsq[i] = rsq;
       }
       if (rsq < distsq[j]) {
         partner[j] = tag[i];
         distsq[j] = rsq;
       }
     }
   }
 
   // reverse comm of distsq and partner
   // not needed if newton_pair off since I,J pair was seen by both procs
 
   commflag = 2;
   if (force->newton_pair) comm->reverse_comm_fix(this);
 
   // each atom now knows its winning partner
   // for prob check, generate random value for each atom with a bond partner
   // forward comm of partner and random value, so ghosts have it
 
   if (fraction < 1.0) {
     for (i = 0; i < nlocal; i++)
       if (partner[i]) probability[i] = random->uniform();
   }
 
   commflag = 2;
   comm->forward_comm_fix(this,2);
 
   // create bonds for atoms I own
   // only if both atoms list each other as winning bond partner
   //   and probability constraint is satisfied
   // if other atom is owned by another proc, it should do same thing
 
   int **bond_type = atom->bond_type;
   int newton_bond = force->newton_bond;
 
   ncreate = 0;
   for (i = 0; i < nlocal; i++) {
     if (partner[i] == 0) continue;
     j = atom->map(partner[i]);
     if (partner[j] != tag[i]) continue;
 
     // apply probability constraint using RN for atom with smallest ID
 
     if (fraction < 1.0) {
       if (tag[i] < tag[j]) {
         if (probability[i] >= fraction) continue;
       } else {
         if (probability[j] >= fraction) continue;
       }
     }
 
     // if newton_bond is set, only store with I or J
     // if not newton_bond, store bond with both I and J
     // atom J will also do this consistently, whatever proc it is on
 
     if (!newton_bond || tag[i] < tag[j]) {
       if (num_bond[i] == atom->bond_per_atom)
         error->one(FLERR,"New bond exceeded bonds per atom in fix bond/create");
       bond_type[i][num_bond[i]] = btype;
       bond_atom[i][num_bond[i]] = tag[j];
       num_bond[i]++;
     }
 
     // add a 1-2 neighbor to special bond list for atom I
     // atom J will also do this, whatever proc it is on
     // need to first remove tag[j] from later in list if it appears
     // prevents list from overflowing, will be rebuilt in rebuild_special()
 
     slist = special[i];
     n1 = nspecial[i][0];
     n2 = nspecial[i][1];
     n3 = nspecial[i][2];
     for (m = n1; m < n3; m++)
       if (slist[m] == tag[j]) break;
     if (m < n3) {
       for (n = m; n < n3-1; n++) slist[n] = slist[n+1];
       n3--;
       if (m < n2) n2--;
     }
     if (n3 == atom->maxspecial)
       error->one(FLERR,
                  "New bond exceeded special list size in fix bond/create");
     for (m = n3; m > n1; m--) slist[m] = slist[m-1];
     slist[n1] = tag[j];
     nspecial[i][0] = n1+1;
     nspecial[i][1] = n2+1;
     nspecial[i][2] = n3+1;
 
     // increment bondcount, convert atom to new type if limit reached
     // atom J will also do this, whatever proc it is on
 
     bondcount[i]++;
     if (type[i] == iatomtype) {
       if (bondcount[i] == imaxbond) type[i] = inewtype;
     } else {
       if (bondcount[i] == jmaxbond) type[i] = jnewtype;
     }
 
     // store final created bond partners and count the created bond once
 
     finalpartner[i] = tag[j];
     finalpartner[j] = tag[i];
     if (tag[i] < tag[j]) ncreate++;
   }
 
   // tally stats
 
   MPI_Allreduce(&ncreate,&createcount,1,MPI_INT,MPI_SUM,world);
   createcounttotal += createcount;
   atom->nbonds += createcount;
 
   // trigger reneighboring if any bonds were formed
   // this insures neigh lists will immediately reflect the topology changes
   // done if any bonds created
 
   if (createcount) next_reneighbor = update->ntimestep;
   if (!createcount) return;
 
   // communicate final partner and 1-2 special neighbors
   // 1-2 neighs already reflect created bonds
 
   commflag = 3;
   comm->forward_comm_fix(this);
 
   // create list of broken bonds that influence my owned atoms
   //   even if between owned-ghost or ghost-ghost atoms
   // finalpartner is now set for owned and ghost atoms so loop over nall
   // OK if duplicates in broken list due to ghosts duplicating owned atoms
   // check J < 0 to insure a broken bond to unknown atom is included
   //   i.e. a bond partner outside of cutoff length
 
   ncreate = 0;
   for (i = 0; i < nall; i++) {
     if (finalpartner[i] == 0) continue;
     j = atom->map(finalpartner[i]);
     if (j < 0 || tag[i] < tag[j]) {
       if (ncreate == maxcreate) {
         maxcreate += DELTA;
         memory->grow(created,maxcreate,2,"bond/create:created");
       }
       created[ncreate][0] = tag[i];
       created[ncreate][1] = finalpartner[i];
       ncreate++;
     }
   }
 
   // update special neigh lists of all atoms affected by any created bond
   // also add angles/dihedrals/impropers induced by created bonds
 
   update_topology();
 
   // DEBUG
   //print_bb();
 }
 
 /* ----------------------------------------------------------------------
    insure all atoms 2 hops away from owned atoms are in ghost list
    this allows dihedral 1-2-3-4 to be properly created
      and special list of 1 to be properly updated
    if I own atom 1, but not 2,3,4, and bond 3-4 is added
      then 2,3 will be ghosts and 3 will store 4 as its finalpartner
 ------------------------------------------------------------------------- */
 
 void FixBondCreate::check_ghosts()
 {
   int i,j,n;
   tagint *slist;
 
   int **nspecial = atom->nspecial;
   tagint **special = atom->special;
   int nlocal = atom->nlocal;
 
   int flag = 0;
   for (i = 0; i < nlocal; i++) {
     slist = special[i];
     n = nspecial[i][1];
     for (j = 0; j < n; j++)
       if (atom->map(slist[j]) < 0) flag = 1;
   }
 
   int flagall;
   MPI_Allreduce(&flag,&flagall,1,MPI_INT,MPI_SUM,world);
   if (flagall) 
     error->all(FLERR,"Fix3 bond/create needs ghost atoms from further away");
   lastcheck = update->ntimestep;
 }
 
 /* ----------------------------------------------------------------------
    double loop over my atoms and created bonds
    influenced = 1 if atom's topology is affected by any created bond
      yes if is one of 2 atoms in bond
      yes if either atom ID appears in as 1-2 or 1-3 in atom's special list
      else no
    if influenced by any created bond:
      rebuild the atom's special list of 1-2,1-3,1-4 neighs
      check for angles/dihedrals/impropers to create due modified special list
 ------------------------------------------------------------------------- */
 
 void FixBondCreate::update_topology()
 {
   int i,j,k,n,influence,influenced;
   tagint id1,id2;
   tagint *slist;
 
   tagint *tag = atom->tag;
   int **nspecial = atom->nspecial;
   tagint **special = atom->special;
   int nlocal = atom->nlocal;
 
   nangles = 0;
   ndihedrals = 0;
   nimpropers = 0;
   overflow = 0;
 
   //printf("NCREATE %d: ",ncreate);
   //for (i = 0; i < ncreate; i++)
   //  printf(" %d %d,",created[i][0],created[i][1]);
   //printf("\n");
 
   for (i = 0; i < nlocal; i++) {
     influenced = 0;
     slist = special[i];
 
     for (j = 0; j < ncreate; j++) {
       id1 = created[j][0];
       id2 = created[j][1];
 
       influence = 0;
       if (tag[i] == id1 || tag[i] == id2) influence = 1;
       else {
         n = nspecial[i][1];
         for (k = 0; k < n; k++)
           if (slist[k] == id1 || slist[k] == id2) {
             influence = 1;
             break;
           }
       }
       if (!influence) continue;
       influenced = 1;
     }
 
     // rebuild_special first, since used by create_angles, etc
 
     if (influenced) {
       rebuild_special(i);
       if (angleflag) create_angles(i);
       if (dihedralflag) create_dihedrals(i);
       if (improperflag) create_impropers(i);
     }
   }
 
   int overflowall;
   MPI_Allreduce(&overflow,&overflowall,1,MPI_INT,MPI_SUM,world);
   if (overflowall) error->all(FLERR,"Fix bond/create induced too many "
                               "angles/dihedrals/impropers per atom");
 
   int newton_bond = force->newton_bond;
 
   int all;
   if (angleflag) {
     MPI_Allreduce(&nangles,&all,1,MPI_INT,MPI_SUM,world);
     if (!newton_bond) all /= 3;
     atom->nangles += all;
   }
   if (dihedralflag) {
     MPI_Allreduce(&ndihedrals,&all,1,MPI_INT,MPI_SUM,world);
     if (!newton_bond) all /= 4;
     atom->ndihedrals += all;
   }
   if (improperflag) {
     MPI_Allreduce(&nimpropers,&all,1,MPI_INT,MPI_SUM,world);
     if (!newton_bond) all /= 4;
     atom->nimpropers += all;
   }
 }
 
 /* ----------------------------------------------------------------------
    re-build special list of atom M
    does not affect 1-2 neighs (already include effects of new bond)
    affects 1-3 and 1-4 neighs due to other atom's augmented 1-2 neighs
 ------------------------------------------------------------------------- */
 
 void FixBondCreate::rebuild_special(int m)
 {
   int i,j,n,n1,cn1,cn2,cn3;
   tagint *slist;
 
   tagint *tag = atom->tag;
   int **nspecial = atom->nspecial;
   tagint **special = atom->special;
 
   // existing 1-2 neighs of atom M
 
   slist = special[m];
   n1 = nspecial[m][0];
   cn1 = 0;
   for (i = 0; i < n1; i++)
     copy[cn1++] = slist[i];
 
   // new 1-3 neighs of atom M, based on 1-2 neighs of 1-2 neighs
   // exclude self
   // remove duplicates after adding all possible 1-3 neighs
 
   cn2 = cn1;
   for (i = 0; i < cn1; i++) {
     n = atom->map(copy[i]);
     if (n < 0) 
       error->one(FLERR,"Fix bond/create needs ghost atoms from further away");
     slist = special[n];
     n1 = nspecial[n][0];
     for (j = 0; j < n1; j++)
       if (slist[j] != tag[m]) copy[cn2++] = slist[j];
   }
 
   cn2 = dedup(cn1,cn2,copy);
   if (cn2 > atom->maxspecial)
     error->one(FLERR,"Special list size exceeded in fix bond/create");
 
   // new 1-4 neighs of atom M, based on 1-2 neighs of 1-3 neighs
   // exclude self
   // remove duplicates after adding all possible 1-4 neighs
 
   cn3 = cn2;
   for (i = cn1; i < cn2; i++) {
     n = atom->map(copy[i]);
     if (n < 0) 
       error->one(FLERR,"Fix bond/create needs ghost atoms from further away");
     slist = special[n];
     n1 = nspecial[n][0];
     for (j = 0; j < n1; j++)
       if (slist[j] != tag[m]) copy[cn3++] = slist[j];
   }
 
   cn3 = dedup(cn2,cn3,copy);
   if (cn3 > atom->maxspecial)
     error->one(FLERR,"Special list size exceeded in fix bond/create");
 
   // store new special list with atom M
 
   nspecial[m][0] = cn1;
   nspecial[m][1] = cn2;
   nspecial[m][2] = cn3;
   memcpy(special[m],copy,cn3*sizeof(int));
 }
 
 /* ----------------------------------------------------------------------
    create any angles owned by atom M induced by newly created bonds
    walk special list to find all possible angles to create
    only add an angle if a new bond is one of its 2 bonds (I-J,J-K)
    for newton_bond on, atom M is central atom
    for newton_bond off, atom M is any of 3 atoms in angle
 ------------------------------------------------------------------------- */
 
 void FixBondCreate::create_angles(int m)
 {
   int i,j,n,i2local,n1,n2;
   tagint i1,i2,i3;
   tagint *s1list,*s2list;
 
   tagint *tag = atom->tag;
   int **nspecial = atom->nspecial;
   tagint **special = atom->special;
 
   int num_angle = atom->num_angle[m];
   int *angle_type = atom->angle_type[m];
   tagint *angle_atom1 = atom->angle_atom1[m];
   tagint *angle_atom2 = atom->angle_atom2[m];
   tagint *angle_atom3 = atom->angle_atom3[m];
 
   // atom M is central atom in angle
   // double loop over 1-2 neighs
   // avoid double counting by 2nd loop as j = i+1,N not j = 1,N
   // consider all angles, only add if:
   //   a new bond is in the angle and atom types match
 
   i2 = tag[m];
   n2 = nspecial[m][0];
   s2list = special[m];
 
   for (i = 0; i < n2; i++) {
     i1 = s2list[i];
     for (j = i+1; j < n2; j++) {
       i3 = s2list[j];
 
       // angle = i1-i2-i3
 
       for (n = 0; n < ncreate; n++) {
         if (created[n][0] == i1 && created[n][1] == i2) break;
         if (created[n][0] == i2 && created[n][1] == i1) break;
         if (created[n][0] == i2 && created[n][1] == i3) break;
         if (created[n][0] == i3 && created[n][1] == i2) break;
       }
       if (n == ncreate) continue;
 
       // NOTE: this is place to check atom types of i1,i2,i3
 
       if (num_angle < atom->angle_per_atom) {
         angle_type[num_angle] = atype;
         angle_atom1[num_angle] = i1;
         angle_atom2[num_angle] = i2;
         angle_atom3[num_angle] = i3;
         num_angle++;
         nangles++;
       } else overflow = 1;
     }
   }
 
   atom->num_angle[m] = num_angle;
   if (force->newton_bond) return;
 
   // for newton_bond off, also consider atom M as atom 1 in angle
 
   i1 = tag[m];
   n1 = nspecial[m][0];
   s1list = special[m];
 
   for (i = 0; i < n1; i++) {
     i2 = s1list[i];
     i2local = atom->map(i2);
     if (i2local < 0) 
       error->one(FLERR,"Fix bond/create needs ghost atoms from further away");
     s2list = special[i2local];
     n2 = nspecial[i2local][0];
 
     for (j = 0; j < n2; j++) {
       i3 = s2list[j];
       if (i3 == i1) continue;
 
       // angle = i1-i2-i3
 
       for (n = 0; n < ncreate; n++) {
         if (created[n][0] == i1 && created[n][1] == i2) break;
         if (created[n][0] == i2 && created[n][1] == i1) break;
         if (created[n][0] == i2 && created[n][1] == i3) break;
         if (created[n][0] == i3 && created[n][1] == i2) break;
       }
       if (n == ncreate) continue;
 
       // NOTE: this is place to check atom types of i1,i2,i3
 
       if (num_angle < atom->angle_per_atom) {
         angle_type[num_angle] = atype;
         angle_atom1[num_angle] = i1;
         angle_atom2[num_angle] = i2;
         angle_atom3[num_angle] = i3;
         num_angle++;
         nangles++;
       } else overflow = 1;
     }
   }
 
   atom->num_angle[m] = num_angle;
 }
 
 /* ----------------------------------------------------------------------
    create any dihedrals owned by atom M induced by newly created bonds
    walk special list to find all possible dihedrals to create
    only add a dihedral if a new bond is one of its 3 bonds (I-J,J-K,K-L)
    for newton_bond on, atom M is central atom
    for newton_bond off, atom M is any of 4 atoms in dihedral
 ------------------------------------------------------------------------- */
 
 void FixBondCreate::create_dihedrals(int m)
 {
   int i,j,k,n,i1local,i2local,i3local,n1,n2,n3;
   tagint i1,i2,i3,i4;
   tagint *s1list,*s2list,*s3list;
 
   tagint *tag = atom->tag;
   int **nspecial = atom->nspecial;
   tagint **special = atom->special;
 
   int num_dihedral = atom->num_dihedral[m];
   int *dihedral_type = atom->dihedral_type[m];
   tagint *dihedral_atom1 = atom->dihedral_atom1[m];
   tagint *dihedral_atom2 = atom->dihedral_atom2[m];
   tagint *dihedral_atom3 = atom->dihedral_atom3[m];
   tagint *dihedral_atom4 = atom->dihedral_atom4[m];
 
   // atom M is 2nd atom in dihedral
   // double loop over 1-2 neighs
   // two triple loops: one over neighs at each end of triplet
   // avoid double counting by 2nd loop as j = i+1,N not j = 1,N
   // avoid double counting due to another atom being 2nd atom in same dihedral
   //   by requiring ID of 2nd atom < ID of 3rd atom
   //   don't do this if newton bond off since want to double count
   // consider all dihedrals, only add if:
   //   a new bond is in the dihedral and atom types match
 
   i2 = tag[m];
   n2 = nspecial[m][0];
   s2list = special[m];
 
   for (i = 0; i < n2; i++) {
     i1 = s2list[i];
 
     for (j = i+1; j < n2; j++) {
       i3 = s2list[j];
       if (force->newton_bond && i2 > i3) continue;
       i3local = atom->map(i3);
       if (i3local < 0) 
         error->one(FLERR,"Fix bond/create needs ghost atoms from further away");
       s3list = special[i3local];
       n3 = nspecial[i3local][0];
 
       for (k = 0; k < n3; k++) {
         i4 = s3list[k];
         if (i4 == i1 || i4 == i2 || i4 == i3) continue;
 
         // dihedral = i1-i2-i3-i4
 
         for (n = 0; n < ncreate; n++) {
           if (created[n][0] == i1 && created[n][1] == i2) break;
           if (created[n][0] == i2 && created[n][1] == i1) break;
           if (created[n][0] == i2 && created[n][1] == i3) break;
           if (created[n][0] == i3 && created[n][1] == i2) break;
           if (created[n][0] == i3 && created[n][1] == i4) break;
           if (created[n][0] == i4 && created[n][1] == i3) break;
         }
         if (n < ncreate) {
           // NOTE: this is place to check atom types of i3,i2,i1,i4
           if (num_dihedral < atom->dihedral_per_atom) {
             dihedral_type[num_dihedral] = dtype;
             dihedral_atom1[num_dihedral] = i1;
             dihedral_atom2[num_dihedral] = i2;
             dihedral_atom3[num_dihedral] = i3;
             dihedral_atom4[num_dihedral] = i4;
             num_dihedral++;
             ndihedrals++;
           } else overflow = 1;
         }
       }
     }
   }
 
   for (i = 0; i < n2; i++) {
     i1 = s2list[i];
     if (force->newton_bond && i2 > i1) continue;
     i1local = atom->map(i1);
     if (i1local < 0) 
       error->one(FLERR,"Fix bond/create needs ghost atoms from further away");
     s3list = special[i1local];
     n3 = nspecial[i1local][0];
 
     for (j = i+1; j < n2; j++) {
       i3 = s2list[j];
 
       for (k = 0; k < n3; k++) {
         i4 = s3list[k];
         if (i4 == i1 || i4 == i2 || i4 == i3) continue;
 
         // dihedral = i3-i2-i1-i4
 
         for (n = 0; n < ncreate; n++) {
           if (created[n][0] == i3 && created[n][1] == i2) break;
           if (created[n][0] == i2 && created[n][1] == i3) break;
           if (created[n][0] == i2 && created[n][1] == i1) break;
           if (created[n][0] == i1 && created[n][1] == i2) break;
           if (created[n][0] == i1 && created[n][1] == i4) break;
           if (created[n][0] == i4 && created[n][1] == i1) break;
         }
         if (n < ncreate) {
           // NOTE: this is place to check atom types of i3,i2,i1,i4
           if (num_dihedral < atom->dihedral_per_atom) {
             dihedral_type[num_dihedral] = dtype;
             dihedral_atom1[num_dihedral] = i3;
             dihedral_atom2[num_dihedral] = i2;
             dihedral_atom3[num_dihedral] = i1;
             dihedral_atom4[num_dihedral] = i4;
             num_dihedral++;
             ndihedrals++;
           } else overflow = 1;
         }
       }
     }
   }
 
   atom->num_dihedral[m] = num_dihedral;
   if (force->newton_bond) return;
 
   // for newton_bond off, also consider atom M as atom 1 in dihedral
 
   i1 = tag[m];
   n1 = nspecial[m][0];
   s1list = special[m];
 
   for (i = 0; i < n1; i++) {
     i2 = s1list[i];
     i2local = atom->map(i2);
     if (i2local < 0) 
       error->one(FLERR,"Fix bond/create needs ghost atoms from further away");
     s2list = special[i2local];
     n2 = nspecial[i2local][0];
 
     for (j = 0; j < n2; j++) {
       i3 = s2list[j];
       if (i3 == i1) continue;
       i3local = atom->map(i3);
       if (i3local < 0) 
         error->one(FLERR,"Fix bond/create needs ghost atoms from further away");
       s3list = special[i3local];
       n3 = nspecial[i3local][0];
 
       for (k = 0; k < n3; k++) {
         i4 = s3list[k];
         if (i4 == i1 || i4 == i2 || i4 == i3) continue;
 
         // dihedral = i1-i2-i3-i4
 
         for (n = 0; n < ncreate; n++) {
           if (created[n][0] == i1 && created[n][1] == i2) break;
           if (created[n][0] == i2 && created[n][1] == i1) break;
           if (created[n][0] == i2 && created[n][1] == i3) break;
           if (created[n][0] == i3 && created[n][1] == i2) break;
           if (created[n][0] == i3 && created[n][1] == i4) break;
           if (created[n][0] == i4 && created[n][1] == i3) break;
         }
         if (n < ncreate) {
           // NOTE: this is place to check atom types of i3,i2,i1,i4
           if (num_dihedral < atom->dihedral_per_atom) {
             dihedral_type[num_dihedral] = dtype;
             dihedral_atom1[num_dihedral] = i1;
             dihedral_atom2[num_dihedral] = i2;
             dihedral_atom3[num_dihedral] = i3;
             dihedral_atom4[num_dihedral] = i4;
             num_dihedral++;
             ndihedrals++;
           } else overflow = 1;
         }
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    create any impropers owned by atom M induced by newly created bonds
    walk special list to find all possible impropers to create
    only add an improper if a new bond is one of its 3 bonds (I-J,I-K,I-L)
    for newton_bond on, atom M is central atom
    for newton_bond off, atom M is any of 4 atoms in improper
 ------------------------------------------------------------------------- */
 
 void FixBondCreate::create_impropers(int m)
 {
   int i,j,k,n,i1local,n1,n2;
   tagint i1,i2,i3,i4;
   tagint *s1list,*s2list;
 
   tagint *tag = atom->tag;
   int **nspecial = atom->nspecial;
   tagint **special = atom->special;
 
   int num_improper = atom->num_improper[m];
   int *improper_type = atom->improper_type[m];
   tagint *improper_atom1 = atom->improper_atom1[m];
   tagint *improper_atom2 = atom->improper_atom2[m];
   tagint *improper_atom3 = atom->improper_atom3[m];
   tagint *improper_atom4 = atom->improper_atom4[m];
 
   // atom M is central atom in improper
   // triple loop over 1-2 neighs
   // avoid double counting by 2nd loop as j = i+1,N not j = 1,N
   // consider all impropers, only add if:
   //   a new bond is in the improper and atom types match
 
   i1 = tag[m];
   n1 = nspecial[m][0];
   s1list = special[m];
 
   for (i = 0; i < n1; i++) {
     i2 = s1list[i];
     for (j = i+1; j < n1; j++) {
       i3 = s1list[j];
       for (k = j+1; k < n1; k++) {
         i4 = s1list[k];
 
         // improper = i1-i2-i3-i4
 
         for (n = 0; n < ncreate; n++) {
           if (created[n][0] == i1 && created[n][1] == i2) break;
           if (created[n][0] == i2 && created[n][1] == i1) break;
           if (created[n][0] == i1 && created[n][1] == i3) break;
           if (created[n][0] == i3 && created[n][1] == i1) break;
           if (created[n][0] == i1 && created[n][1] == i4) break;
           if (created[n][0] == i4 && created[n][1] == i1) break;
         }
         if (n == ncreate) continue;
 
         // NOTE: this is place to check atom types of i1,i2,i3,i4
 
         if (num_improper < atom->improper_per_atom) {
           improper_type[num_improper] = itype;
           improper_atom1[num_improper] = i1;
           improper_atom2[num_improper] = i2;
           improper_atom3[num_improper] = i3;
           improper_atom4[num_improper] = i4;
           num_improper++;
           nimpropers++;
         } else overflow = 1;
       }
     }
   }
 
   atom->num_improper[m] = num_improper;
   if (force->newton_bond) return;
 
   // for newton_bond off, also consider atom M as atom 2 in improper
 
   i2 = tag[m];
   n2 = nspecial[m][0];
   s2list = special[m];
 
   for (i = 0; i < n2; i++) {
     i1 = s2list[i];
     i1local = atom->map(i1);
     if (i1local < 0) 
       error->one(FLERR,"Fix bond/create needs ghost atoms from further away");
     s1list = special[i1local];
     n1 = nspecial[i1local][0];
 
     for (j = 0; j < n1; j++) {
       i3 = s1list[j];
       if (i3 == i1 || i3 == i2) continue;
 
       for (k = j+1; k < n1; k++) {
         i4 = s1list[k];
         if (i4 == i1 || i4 == i2) continue;
 
         // improper = i1-i2-i3-i4
 
         for (n = 0; n < ncreate; n++) {
           if (created[n][0] == i1 && created[n][1] == i2) break;
           if (created[n][0] == i2 && created[n][1] == i1) break;
           if (created[n][0] == i1 && created[n][1] == i3) break;
           if (created[n][0] == i3 && created[n][1] == i1) break;
           if (created[n][0] == i1 && created[n][1] == i4) break;
           if (created[n][0] == i4 && created[n][1] == i1) break;
         }
         if (n < ncreate) {
           // NOTE: this is place to check atom types of i3,i2,i1,i4
           if (num_improper < atom->improper_per_atom) {
             improper_type[num_improper] = itype;
             improper_atom1[num_improper] = i1;
             improper_atom2[num_improper] = i2;
             improper_atom3[num_improper] = i3;
             improper_atom4[num_improper] = i4;
             num_improper++;
             nimpropers++;
           } else overflow = 1;
         }
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    remove all ID duplicates in copy from Nstart:Nstop-1
    compare to all previous values in copy
    return N decremented by any discarded duplicates
 ------------------------------------------------------------------------- */
 
 int FixBondCreate::dedup(int nstart, int nstop, tagint *copy)
 {
   int i;
 
   int m = nstart;
   while (m < nstop) {
     for (i = 0; i < m; i++)
       if (copy[i] == copy[m]) {
         copy[m] = copy[nstop-1];
         nstop--;
         break;
       }
     if (i == m) m++;
   }
 
   return nstop;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixBondCreate::post_integrate_respa(int ilevel, int iloop)
 {
   if (ilevel == nlevels_respa-1) post_integrate();
 }
 
 /* ---------------------------------------------------------------------- */
 
 int FixBondCreate::pack_forward_comm(int n, int *list, double *buf,
                                      int pbc_flag, int *pbc)
 {
   int i,j,k,m,ns;
 
   m = 0;
 
   if (commflag == 1) {
     for (i = 0; i < n; i++) {
       j = list[i];
       buf[m++] = ubuf(bondcount[j]).d;
     }
     return m;
   }
 
   if (commflag == 2) {
     for (i = 0; i < n; i++) {
       j = list[i];
       buf[m++] = ubuf(partner[j]).d;
       buf[m++] = probability[j];
     }
     return m;
   }
 
   int **nspecial = atom->nspecial;
   tagint **special = atom->special;
 
   m = 0;
   for (i = 0; i < n; i++) {
     j = list[i];
     buf[m++] = ubuf(finalpartner[j]).d;
     ns = nspecial[j][0];
     buf[m++] = ubuf(ns).d;
     for (k = 0; k < ns; k++)
       buf[m++] = ubuf(special[j][k]).d;
   }
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixBondCreate::unpack_forward_comm(int n, int first, double *buf)
 {
   int i,j,m,ns,last;
 
   m = 0;
   last = first + n;
 
   if (commflag == 1) {
     for (i = first; i < last; i++)
       bondcount[i] = (int) ubuf(buf[m++]).i;
 
   } else if (commflag == 2) {
     for (i = first; i < last; i++) {
       partner[i] = (tagint) ubuf(buf[m++]).i;
       probability[i] = buf[m++];
     }
 
   } else {
     int **nspecial = atom->nspecial;
     tagint **special = atom->special;
 
     m = 0;
     last = first + n;
     for (i = first; i < last; i++) {
       finalpartner[i] = (tagint) ubuf(buf[m++]).i;
       ns = (int) ubuf(buf[m++]).i;
       nspecial[i][0] = ns;
       for (j = 0; j < ns; j++)
         special[i][j] = (tagint) ubuf(buf[m++]).i;
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 int FixBondCreate::pack_reverse_comm(int n, int first, double *buf)
 {
   int i,m,last;
 
   m = 0;
   last = first + n;
 
   if (commflag == 1) {
     for (i = first; i < last; i++)
       buf[m++] = ubuf(bondcount[i]).d;
     return m;
   }
 
   for (i = first; i < last; i++) {
     buf[m++] = ubuf(partner[i]).d;
     buf[m++] = distsq[i];
   }
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixBondCreate::unpack_reverse_comm(int n, int *list, double *buf)
 {
   int i,j,m;
 
   m = 0;
 
   if (commflag == 1) {
     for (i = 0; i < n; i++) {
       j = list[i];
       bondcount[j] += (int) ubuf(buf[m++]).i;
     }
 
   } else {
     for (i = 0; i < n; i++) {
       j = list[i];
       if (buf[m+1] < distsq[j]) {
         partner[j] = (tagint) ubuf(buf[m++]).i;
         distsq[j] = buf[m++];
       } else m += 2;
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    allocate local atom-based arrays
 ------------------------------------------------------------------------- */
 
 void FixBondCreate::grow_arrays(int nmax)
 {
   memory->grow(bondcount,nmax,"bond/create:bondcount");
 }
 
 /* ----------------------------------------------------------------------
    copy values within local atom-based arrays
 ------------------------------------------------------------------------- */
 
 void FixBondCreate::copy_arrays(int i, int j, int delflag)
 {
   bondcount[j] = bondcount[i];
 }
 
 /* ----------------------------------------------------------------------
    pack values in local atom-based arrays for exchange with another proc
 ------------------------------------------------------------------------- */
 
 int FixBondCreate::pack_exchange(int i, double *buf)
 {
   buf[0] = bondcount[i];
   return 1;
 }
 
 /* ----------------------------------------------------------------------
    unpack values in local atom-based arrays from exchange with another proc
 ------------------------------------------------------------------------- */
 
 int FixBondCreate::unpack_exchange(int nlocal, double *buf)
 {
   bondcount[nlocal] = static_cast<int> (buf[0]);
   return 1;
 }
 
 /* ---------------------------------------------------------------------- */
 
 double FixBondCreate::compute_vector(int n)
 {
   if (n == 0) return (double) createcount;
   return (double) createcounttotal;
 }
 
 /* ----------------------------------------------------------------------
    memory usage of local atom-based arrays
 ------------------------------------------------------------------------- */
 
 double FixBondCreate::memory_usage()
 {
   int nmax = atom->nmax;
   double bytes = nmax * sizeof(int);
   bytes = 2*nmax * sizeof(tagint);
   bytes += nmax * sizeof(double);
   return bytes;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixBondCreate::print_bb()
 {
   for (int i = 0; i < atom->nlocal; i++) {
     printf("TAG " TAGINT_FORMAT ": %d nbonds: ",atom->tag[i],atom->num_bond[i]);
     for (int j = 0; j < atom->num_bond[i]; j++) {
       printf(" " TAGINT_FORMAT,atom->bond_atom[i][j]);
     }
     printf("\n");
     printf("TAG " TAGINT_FORMAT ": %d nangles: ",atom->tag[i],atom->num_angle[i]);
     for (int j = 0; j < atom->num_angle[i]; j++) {
       printf(" " TAGINT_FORMAT " " TAGINT_FORMAT " " TAGINT_FORMAT ",",
              atom->angle_atom1[i][j], atom->angle_atom2[i][j],
              atom->angle_atom3[i][j]);
     }
     printf("\n");
     printf("TAG " TAGINT_FORMAT ": %d ndihedrals: ",atom->tag[i],atom->num_dihedral[i]);
     for (int j = 0; j < atom->num_dihedral[i]; j++) {
       printf(" " TAGINT_FORMAT " " TAGINT_FORMAT " " TAGINT_FORMAT " " 
              TAGINT_FORMAT ",", atom->dihedral_atom1[i][j],
 	     atom->dihedral_atom2[i][j],atom->dihedral_atom3[i][j],
 	     atom->dihedral_atom4[i][j]);
     }
     printf("\n");
     printf("TAG " TAGINT_FORMAT ": %d nimpropers: ",atom->tag[i],atom->num_improper[i]);
     for (int j = 0; j < atom->num_improper[i]; j++) {
       printf(" " TAGINT_FORMAT " " TAGINT_FORMAT " " TAGINT_FORMAT " " 
              TAGINT_FORMAT ",",atom->improper_atom1[i][j],
 	     atom->improper_atom2[i][j],atom->improper_atom3[i][j],
 	     atom->improper_atom4[i][j]);
     }
     printf("\n");
     printf("TAG " TAGINT_FORMAT ": %d %d %d nspecial: ",atom->tag[i],
 	   atom->nspecial[i][0],atom->nspecial[i][1],atom->nspecial[i][2]);
     for (int j = 0; j < atom->nspecial[i][2]; j++) {
       printf(" " TAGINT_FORMAT,atom->special[i][j]);
     }
     printf("\n");
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixBondCreate::print_copy(const char *str, tagint m, 
                               int n1, int n2, int n3, int *v)
 {
   printf("%s " TAGINT_FORMAT ": %d %d %d nspecial: ",str,m,n1,n2,n3);
   for (int j = 0; j < n3; j++) printf(" %d",v[j]);
   printf("\n");
 }
diff --git a/src/MC/fix_bond_swap.cpp b/src/MC/fix_bond_swap.cpp
index aaa9befee..448c16415 100644
--- a/src/MC/fix_bond_swap.cpp
+++ b/src/MC/fix_bond_swap.cpp
@@ -1,737 +1,737 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdlib.h"
 #include "string.h"
 #include "fix_bond_swap.h"
 #include "atom.h"
 #include "force.h"
 #include "pair.h"
 #include "bond.h"
 #include "angle.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "group.h"
 #include "comm.h"
 #include "domain.h"
 #include "modify.h"
 #include "compute.h"
 #include "random_mars.h"
 #include "citeme.h"
 #include "memory.h"
 #include "error.h"
 
 #include "update.h"
 
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
 static const char cite_fix_bond_swap[] =
   "neighbor multi command:\n\n"
   "@Article{Auhl03,\n"
   " author = {R. Auhl, R. Everaers, G. S. Grest, K. Kremer, S. J. Plimpton},\n"
   " title = {Equilibration of long chain polymer melts in computer simulations},\n"
   " journal = {J.~Chem.~Phys.},\n"
   " year =    2003,\n"
   " volume =  119,\n"
   " pages =   {12718--12728}\n"
   "}\n\n";
 
 /* ---------------------------------------------------------------------- */
 
 FixBondSwap::FixBondSwap(LAMMPS *lmp, int narg, char **arg) :
   Fix(lmp, narg, arg)
 {
   if (lmp->citeme) lmp->citeme->add(cite_fix_bond_swap);
 
   if (narg != 7) error->all(FLERR,"Illegal fix bond/swap command");
 
   nevery = force->inumeric(FLERR,arg[3]);
   if (nevery <= 0) error->all(FLERR,"Illegal fix bond/swap command");
 
   force_reneighbor = 1;
   next_reneighbor = -1;
   vector_flag = 1;
   size_vector = 2;
   global_freq = 1;
   extvector = 0;
 
   fraction = force->numeric(FLERR,arg[4]);
   double cutoff = force->numeric(FLERR,arg[5]);
   cutsq = cutoff*cutoff;
 
   // initialize Marsaglia RNG with processor-unique seed
 
   int seed = force->inumeric(FLERR,arg[6]);
   random = new RanMars(lmp,seed + comm->me);
 
   // error check
 
   if (atom->molecular != 1)
     error->all(FLERR,"Cannot use fix bond/swap with non-molecular systems");
 
   // create a new compute temp style
   // id = fix-ID + temp, compute group = fix group
 
   int n = strlen(id) + 6;
   id_temp = new char[n];
   strcpy(id_temp,id);
   strcat(id_temp,"_temp");
 
   char **newarg = new char*[3];
   newarg[0] = id_temp;
   newarg[1] = (char *) "all";
   newarg[2] = (char *) "temp";
   modify->add_compute(3,newarg);
   delete [] newarg;
   tflag = 1;
 
   // initialize atom list
 
   nmax = 0;
   alist = NULL;
 
   naccept = foursome = 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 FixBondSwap::~FixBondSwap()
 {
   delete random;
 
   // delete temperature if fix created it
 
   if (tflag) modify->delete_compute(id_temp);
   delete [] id_temp;
 
   memory->destroy(alist);
 }
 
 /* ---------------------------------------------------------------------- */
 
 int FixBondSwap::setmask()
 {
   int mask = 0;
   mask |= POST_INTEGRATE;
   return mask;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixBondSwap::init()
 {
   // require an atom style with molecule IDs
 
   if (atom->molecule == NULL)
     error->all(FLERR,
                "Must use atom style with molecule IDs with fix bond/swap");
 
   int icompute = modify->find_compute(id_temp);
   if (icompute < 0)
     error->all(FLERR,"Temperature ID for fix bond/swap does not exist");
   temperature = modify->compute[icompute];
 
   // pair and bonds must be defined
   // no dihedral or improper potentials allowed
   // special bonds must be 0 1 1
 
   if (force->pair == NULL || force->bond == NULL)
     error->all(FLERR,"Fix bond/swap requires pair and bond styles");
 
   if (force->pair->single_enable == 0)
     error->all(FLERR,"Pair style does not support fix bond/swap");
 
   if (force->angle == NULL && atom->nangles > 0 && comm->me == 0)
     error->warning(FLERR,"Fix bond/swap will ignore defined angles");
 
   if (force->dihedral || force->improper)
     error->all(FLERR,"Fix bond/swap cannot use dihedral or improper styles");
 
   if (force->special_lj[1] != 0.0 || force->special_lj[2] != 1.0 ||
       force->special_lj[3] != 1.0)
     error->all(FLERR,"Fix bond/swap requires special_bonds = 0,1,1");
 
   // need a half neighbor list, built every Nevery steps
 
-  int irequest = neighbor->request(this);
+  int irequest = neighbor->request(this,instance_me);
   neighbor->requests[irequest]->pair = 0;
   neighbor->requests[irequest]->fix = 1;
   neighbor->requests[irequest]->occasional = 1;
 
   // zero out stats
 
   naccept = foursome = 0;
   angleflag = 0;
   if (force->angle) angleflag = 1;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixBondSwap::init_list(int id, NeighList *ptr)
 {
   list = ptr;
 }
 
 /* ----------------------------------------------------------------------
    look for and perform swaps
    NOTE: used to do this every pre_neighbor(), but think that is a bug
          b/c was doing it after exchange() and before neighbor->build()
          which is when neigh lists are actually out-of-date or even bogus,
          now do it based on user-specified Nevery, and trigger reneigh
          if any swaps performed, like fix bond/create
 ------------------------------------------------------------------------- */
 
 void FixBondSwap::post_integrate()
 {
   int i,j,ii,jj,m,inum,jnum;
   int inext,iprev,ilast,jnext,jprev,jlast,ibond,iangle,jbond,jangle;
   int ibondtype,jbondtype,iangletype,inextangletype,jangletype,jnextangletype;
   tagint itag,inexttag,iprevtag,ilasttag,jtag,jnexttag,jprevtag,jlasttag;
   tagint i1,i2,i3,j1,j2,j3;
   int *ilist,*jlist,*numneigh,**firstneigh;
   double delta,factor;
 
   if (update->ntimestep % nevery) return;
 
   // compute current temp for Boltzmann factor test
 
   double t_current = temperature->compute_scalar();
 
   // local ptrs to atom arrays
 
   tagint *tag = atom->tag;
   int *mask = atom->mask;
   tagint *molecule = atom->molecule;
   int *num_bond = atom->num_bond;
   tagint **bond_atom = atom->bond_atom;
   int **bond_type = atom->bond_type;
   int *num_angle = atom->num_angle;
   tagint **angle_atom1 = atom->angle_atom1;
   tagint **angle_atom2 = atom->angle_atom2;
   tagint **angle_atom3 = atom->angle_atom3;
   int **angle_type = atom->angle_type;
   int **nspecial = atom->nspecial;
   tagint **special = atom->special;
   int newton_bond = force->newton_bond;
   int nlocal = atom->nlocal;
 
   type = atom->type;
   x = atom->x;
 
   neighbor->build_one(list,1);
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // randomize list of my owned atoms that are in fix group
   // grow atom list if necessary
 
   if (nlocal > nmax) {
     memory->destroy(alist);
     nmax = atom->nmax;
     memory->create(alist,nmax,"bondswap:alist");
   }
 
   int neligible = 0;
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     if (mask[i] & groupbit)
       alist[neligible++] = i;
   }
 
   int tmp;
   for (i = 0; i < neligible; i++) {
     j = static_cast<int> (random->uniform() * neligible);
     tmp = alist[i];
     alist[i] = alist[j];
     alist[j] = tmp;
   }
 
   // examine ntest of my eligible atoms for potential swaps
   // atom i is randomly selected via atom list
   // look at all j neighbors of atom i
   // atom j must be on-processor (j < nlocal)
   // atom j must be in fix group
   // i and j must be same distance from chain end (mol[i] = mol[j])
   // NOTE: must use extra parens in if test on mask[j] & groupbit
 
   int ntest = static_cast<int> (fraction * neligible);
   int accept = 0;
 
   for (int itest = 0; itest < ntest; itest++) {
     i = alist[itest];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       j &= NEIGHMASK;
       if (j >= nlocal) continue;
       if ((mask[j] & groupbit) == 0) continue;
       if (molecule[i] != molecule[j]) continue;
 
       // look at all bond partners of atoms i and j
       // use num_bond for this, not special list, so also find bondtypes
       // inext,jnext = bonded atoms
       // inext,jnext must be on-processor (inext,jnext < nlocal)
       // inext,jnext must be same dist from chain end (mol[inext] = mol[jnext])
       // since swaps may occur between two ends of a single chain, insure
       //   the 4 atoms are unique (no duplicates): inext != jnext, inext != j
       // all 4 old and new bonds must have length < cutoff
 
       for (ibond = 0; ibond < num_bond[i]; ibond++) {
         inext = atom->map(bond_atom[i][ibond]);
         if (inext >= nlocal || inext < 0) continue;
         ibondtype = bond_type[i][ibond];
 
         for (jbond = 0; jbond < num_bond[j]; jbond++) {
           jnext = atom->map(bond_atom[j][jbond]);
           if (jnext >= nlocal || jnext < 0) continue;
           jbondtype = bond_type[j][jbond];
 
           if (molecule[inext] != molecule[jnext]) continue;
           if (inext == jnext || inext == j) continue;
           if (dist_rsq(i,inext) >= cutsq) continue;
           if (dist_rsq(j,jnext) >= cutsq) continue;
           if (dist_rsq(i,jnext) >= cutsq) continue;
           if (dist_rsq(j,inext) >= cutsq) continue;
 
           // if angles are enabled:
           // find other atoms i,inext,j,jnext are in angles with
           //   and angletypes: i/j angletype, i/j nextangletype
           // use num_angle for this, not special list, so also find angletypes
           // 4 atoms consecutively along 1st chain: iprev,i,inext,ilast
           // 4 atoms consecutively along 2nd chain: jprev,j,jnext,jlast
           // prev or last atom can be non-existent at end of chain
           //   set prev/last = -1 in this case
           // if newton bond = 0, then angles are stored by all 4 atoms
           //   so require that iprev,ilast,jprev,jlast be owned by this proc
           //   so all copies of angles can be updated if a swap takes place
 
           if (angleflag) {
             itag = tag[i];
             inexttag = tag[inext];
             jtag = tag[j];
             jnexttag = tag[jnext];
 
             iprev = -1;
             for (iangle = 0; iangle < num_angle[i]; iangle++) {
               i1 = angle_atom1[i][iangle];
               i2 = angle_atom2[i][iangle];
               i3 = angle_atom3[i][iangle];
               if (i2 == itag && i3 == inexttag) iprev = atom->map(i1);
               else if (i1 == inexttag && i2 == itag) iprev = atom->map(i3);
               if (iprev >= 0) {
                 iangletype = angle_type[i][iangle];
                 break;
               }
             }
             if (!newton_bond && iprev >= nlocal) continue;
 
             ilast = -1;
             for (iangle = 0; iangle < num_angle[inext]; iangle++) {
               i1 = angle_atom1[inext][iangle];
               i2 = angle_atom2[inext][iangle];
               i3 = angle_atom3[inext][iangle];
               if (i1 == itag && i2 == inexttag) ilast = atom->map(i3);
               else if (i2 == inexttag && i3 == itag) ilast = atom->map(i1);
               if (ilast >= 0) {
                 inextangletype = angle_type[inext][iangle];
                 break;
               }
             }
             if (!newton_bond && ilast >= nlocal) continue;
 
             jprev = -1;
             for (jangle = 0; jangle < num_angle[j]; jangle++) {
               j1 = angle_atom1[j][jangle];
               j2 = angle_atom2[j][jangle];
               j3 = angle_atom3[j][jangle];
               if (j2 == jtag && j3 == jnexttag) jprev = atom->map(j1);
               else if (j1 == jnexttag && j2 == jtag) jprev = atom->map(j3);
               if (jprev >= 0) {
                 jangletype = angle_type[j][jangle];
                 break;
               }
             }
             if (!newton_bond && jprev >= nlocal) continue;
 
             jlast = -1;
             for (jangle = 0; jangle < num_angle[jnext]; jangle++) {
               j1 = angle_atom1[jnext][jangle];
               j2 = angle_atom2[jnext][jangle];
               j3 = angle_atom3[jnext][jangle];
               if (j1 == jtag && j2 == jnexttag) jlast = atom->map(j3);
               else if (j2 == jnexttag && j3 == jtag) jlast = atom->map(j1);
               if (jlast >= 0) {
                 jnextangletype = angle_type[jnext][jangle];
                 break;
               }
             }
             if (!newton_bond && jlast >= nlocal) continue;
           }
 
           // valid foursome found between 2 chains:
           //   chains = iprev-i-inext-ilast and jprev-j-jnext-jlast
           //   prev or last values are -1 if do not exist due to end of chain
           //   OK to call angle_eng with -1 atom, since just return 0.0
           // current energy of foursome =
           //   E_nb(i,j) + E_nb(i,jnext) + E_nb(inext,j) + E_nb(inext,jnext) +
           //   E_bond(i,inext) + E_bond(j,jnext) +
           //   E_angle(iprev,i,inext) + E_angle(i,inext,ilast) +
           //   E_angle(jprev,j,jnext) + E_angle(j,jnext,jlast)
           // new energy of foursome with swapped bonds =
           //   E_nb(i,j) + E_nb(i,inext) + E_nb(j,jnext) + E_nb(inext,jnext) +
           //   E_bond(i,jnext) + E_bond(j,inext) +
           //   E_angle(iprev,i,jnext) + E_angle(i,jnext,jlast) +
           //   E_angle(jprev,j,inext) + E_angle(j,inext,ilast)
           // energy delta = add/subtract differing terms between 2 formulas
 
           foursome++;
 
           delta = pair_eng(i,inext) + pair_eng(j,jnext) -
             pair_eng(i,jnext) - pair_eng(inext,j);
           delta += bond_eng(ibondtype,i,jnext) + bond_eng(jbondtype,j,inext) -
             bond_eng(ibondtype,i,inext) - bond_eng(jbondtype,j,jnext);
           if (angleflag)
             delta += angle_eng(iangletype,iprev,i,jnext) +
               angle_eng(jnextangletype,i,jnext,jlast) +
               angle_eng(jangletype,jprev,j,inext) +
               angle_eng(inextangletype,j,inext,ilast) -
               angle_eng(iangletype,iprev,i,inext) -
               angle_eng(inextangletype,i,inext,ilast) -
               angle_eng(jangletype,jprev,j,jnext) -
               angle_eng(jnextangletype,j,jnext,jlast);
 
           // if delta <= 0, accept swap
           // if delta > 0, compute Boltzmann factor with current temperature
           //   only accept if greater than random value
           // whether accept or not, exit test loop
 
           if (delta < 0.0) accept = 1;
           else {
             factor = exp(-delta/force->boltz/t_current);
             if (random->uniform() < factor) accept = 1;
           }
           goto done;
         }
       }
     }
   }
 
  done:
 
   // trigger immediate reneighboring if any swaps occurred
 
   int accept_any;
   MPI_Allreduce(&accept,&accept_any,1,MPI_INT,MPI_SUM,world);
   if (accept_any) next_reneighbor = update->ntimestep;
 
   if (!accept) return;
   naccept++;
 
   // change bond partners of affected atoms
   // on atom i: bond i-inext changes to i-jnext
   // on atom j: bond j-jnext changes to j-inext
   // on atom inext: bond inext-i changes to inext-j
   // on atom jnext: bond jnext-j changes to jnext-i
 
   for (ibond = 0; ibond < num_bond[i]; ibond++)
     if (bond_atom[i][ibond] == tag[inext]) bond_atom[i][ibond] = tag[jnext];
   for (jbond = 0; jbond < num_bond[j]; jbond++)
     if (bond_atom[j][jbond] == tag[jnext]) bond_atom[j][jbond] = tag[inext];
   for (ibond = 0; ibond < num_bond[inext]; ibond++)
     if (bond_atom[inext][ibond] == tag[i]) bond_atom[inext][ibond] = tag[j];
   for (jbond = 0; jbond < num_bond[jnext]; jbond++)
     if (bond_atom[jnext][jbond] == tag[j]) bond_atom[jnext][jbond] = tag[i];
 
   // set global tags of 4 atoms in bonds
 
   itag = tag[i];
   inexttag = tag[inext];
 
   jtag = tag[j];
   jnexttag = tag[jnext];
 
   // change 1st special neighbors of affected atoms: i,j,inext,jnext
   // don't need to change 2nd/3rd special neighbors for any atom
   //   since special bonds = 0 1 1 means they are never used
 
   for (m = 0; m < nspecial[i][0]; m++)
     if (special[i][m] == inexttag) special[i][m] = jnexttag;
   for (m = 0; m < nspecial[j][0]; m++)
     if (special[j][m] == jnexttag) special[j][m] = inexttag;
   for (m = 0; m < nspecial[inext][0]; m++)
     if (special[inext][m] == itag) special[inext][m] = jtag;
   for (m = 0; m < nspecial[jnext][0]; m++)
     if (special[jnext][m] == jtag) special[jnext][m] = itag;
 
   // done if no angles
 
   if (!angleflag) return;
 
   // set global tags of 4 additional atoms in angles, 0 if no angle
 
   if (iprev >= 0) iprevtag = tag[iprev];
   else iprevtag = 0;
   if (ilast >= 0) ilasttag = tag[ilast];
   else ilasttag = 0;
 
   if (jprev >= 0) jprevtag = tag[jprev];
   else jprevtag = 0;
   if (jlast >= 0) jlasttag = tag[jlast];
   else jlasttag = 0;
 
   // change angle partners of affected atoms
   // must check if each angle is stored as a-b-c or c-b-a
   // on atom i:
   //   angle iprev-i-inext changes to iprev-i-jnext
   //   angle i-inext-ilast changes to i-jnext-jlast
   // on atom j:
   //   angle jprev-j-jnext changes to jprev-j-inext
   //   angle j-jnext-jlast changes to j-inext-ilast
   // on atom inext:
   //   angle iprev-i-inext changes to jprev-j-inext
   //   angle i-inext-ilast changes to j-inext-ilast
   // on atom jnext:
   //   angle jprev-j-jnext changes to iprev-i-jnext
   //   angle j-jnext-jlast changes to i-jnext-jlast
 
   for (iangle = 0; iangle < num_angle[i]; iangle++) {
     i1 = angle_atom1[i][iangle];
     i2 = angle_atom2[i][iangle];
     i3 = angle_atom3[i][iangle];
 
     if (i1 == iprevtag && i2 == itag && i3 == inexttag)
       angle_atom3[i][iangle] = jnexttag;
     else if (i1 == inexttag && i2 == itag && i3 == iprevtag)
       angle_atom1[i][iangle] = jnexttag;
     else if (i1 == itag && i2 == inexttag && i3 == ilasttag) {
       angle_atom2[i][iangle] = jnexttag;
       angle_atom3[i][iangle] = jlasttag;
     } else if (i1 == ilasttag && i2 == inexttag && i3 == itag) {
       angle_atom1[i][iangle] = jlasttag;
       angle_atom2[i][iangle] = jnexttag;
     }
   }
 
   for (jangle = 0; jangle < num_angle[j]; jangle++) {
     j1 = angle_atom1[j][jangle];
     j2 = angle_atom2[j][jangle];
     j3 = angle_atom3[j][jangle];
 
     if (j1 == jprevtag && j2 == jtag && j3 == jnexttag)
       angle_atom3[j][jangle] = inexttag;
     else if (j1 == jnexttag && j2 == jtag && j3 == jprevtag)
       angle_atom1[j][jangle] = inexttag;
     else if (j1 == jtag && j2 == jnexttag && j3 == jlasttag) {
       angle_atom2[j][jangle] = inexttag;
       angle_atom3[j][jangle] = ilasttag;
     } else if (j1 == jlasttag && j2 == jnexttag && j3 == jtag) {
       angle_atom1[j][jangle] = ilasttag;
       angle_atom2[j][jangle] = inexttag;
     }
   }
 
   for (iangle = 0; iangle < num_angle[inext]; iangle++) {
     i1 = angle_atom1[inext][iangle];
     i2 = angle_atom2[inext][iangle];
     i3 = angle_atom3[inext][iangle];
 
     if (i1 == iprevtag && i2 == itag && i3 == inexttag) {
       angle_atom1[inext][iangle] = jprevtag;
       angle_atom2[inext][iangle] = jtag;
     } else if (i1 == inexttag && i2 == itag && i3 == iprevtag) {
       angle_atom2[inext][iangle] = jtag;
       angle_atom3[inext][iangle] = jprevtag;
     } else if (i1 == itag && i2 == inexttag && i3 == ilasttag)
       angle_atom1[inext][iangle] = jtag;
     else if (i1 == ilasttag && i2 == inexttag && i3 == itag)
       angle_atom3[inext][iangle] = jtag;
   }
 
   for (jangle = 0; jangle < num_angle[jnext]; jangle++) {
     j1 = angle_atom1[jnext][jangle];
     j2 = angle_atom2[jnext][jangle];
     j3 = angle_atom3[jnext][jangle];
 
     if (j1 == jprevtag && j2 == jtag && j3 == jnexttag) {
       angle_atom1[jnext][jangle] = iprevtag;
       angle_atom2[jnext][jangle] = itag;
     } else if (j1 == jnexttag && j2 == jtag && j3 == jprevtag) {
       angle_atom2[jnext][jangle] = itag;
       angle_atom3[jnext][jangle] = iprevtag;
     } else if (j1 == jtag && j2 == jnexttag && j3 == jlasttag)
       angle_atom1[jnext][jangle] = itag;
     else if (j1 == jlasttag && j2 == jnexttag && j3 == jtag)
       angle_atom3[jnext][jangle] = itag;
   }
 
   // done if newton bond set
 
   if (newton_bond) return;
 
   // change angles stored by iprev,ilast,jprev,jlast
   // on atom iprev: angle iprev-i-inext changes to iprev-i-jnext
   // on atom jprev: angle jprev-j-jnext changes to jprev-j-inext
   // on atom ilast: angle i-inext-ilast changes to j-inext-ilast
   // on atom jlast: angle j-jnext-jlast changes to i-jnext-jlast
 
   for (iangle = 0; iangle < num_angle[iprev]; iangle++) {
     i1 = angle_atom1[iprev][iangle];
     i2 = angle_atom2[iprev][iangle];
     i3 = angle_atom3[iprev][iangle];
 
     if (i1 == iprevtag && i2 == itag && i3 == inexttag)
       angle_atom3[iprev][iangle] = jnexttag;
     else if (i1 == inexttag && i2 == itag && i3 == iprevtag)
       angle_atom1[iprev][iangle] = jnexttag;
   }
 
   for (jangle = 0; jangle < num_angle[jprev]; jangle++) {
     j1 = angle_atom1[jprev][jangle];
     j2 = angle_atom2[jprev][jangle];
     j3 = angle_atom3[jprev][jangle];
 
     if (j1 == jprevtag && j2 == jtag && j3 == jnexttag)
       angle_atom3[jprev][jangle] = inexttag;
     else if (j1 == jnexttag && j2 == jtag && j3 == jprevtag)
       angle_atom1[jprev][jangle] = inexttag;
   }
 
   for (iangle = 0; iangle < num_angle[ilast]; iangle++) {
     i1 = angle_atom1[ilast][iangle];
     i2 = angle_atom2[ilast][iangle];
     i3 = angle_atom3[ilast][iangle];
 
     if (i1 == itag && i2 == inexttag && i3 == ilasttag)
       angle_atom1[ilast][iangle] = jtag;
     else if (i1 == ilasttag && i2 == inexttag && i3 == itag)
       angle_atom3[ilast][iangle] = jtag;
   }
 
   for (jangle = 0; jangle < num_angle[jlast]; jangle++) {
     j1 = angle_atom1[jlast][jangle];
     j2 = angle_atom2[jlast][jangle];
     j3 = angle_atom3[jlast][jangle];
 
     if (j1 == jtag && j2 == jnexttag && j3 == jlasttag)
       angle_atom1[jlast][jangle] = itag;
     else if (j1 == jlasttag && j2 == jnexttag && j3 == jtag)
       angle_atom3[jlast][jangle] = itag;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 int FixBondSwap::modify_param(int narg, char **arg)
 {
   if (strcmp(arg[0],"temp") == 0) {
     if (narg < 2) error->all(FLERR,"Illegal fix_modify command");
     if (tflag) {
       modify->delete_compute(id_temp);
       tflag = 0;
     }
     delete [] id_temp;
     int n = strlen(arg[1]) + 1;
     id_temp = new char[n];
     strcpy(id_temp,arg[1]);
 
     int icompute = modify->find_compute(id_temp);
     if (icompute < 0) 
       error->all(FLERR,"Could not find fix_modify temperature ID");
     temperature = modify->compute[icompute];
 
     if (temperature->tempflag == 0)
       error->all(FLERR,"Fix_modify temperature ID does not "
                  "compute temperature");
     if (temperature->igroup != igroup && comm->me == 0)
       error->warning(FLERR,"Group for fix_modify temp != fix group");
     return 2;
   }
   return 0;
 }
 
 /* ----------------------------------------------------------------------
    compute squared distance between atoms I,J
    must use minimum_image since J was found thru atom->map()
 ------------------------------------------------------------------------- */
 
 double FixBondSwap::dist_rsq(int i, int j)
 {
   double delx = x[i][0] - x[j][0];
   double dely = x[i][1] - x[j][1];
   double delz = x[i][2] - x[j][2];
   domain->minimum_image(delx,dely,delz);
   return (delx*delx + dely*dely + delz*delz);
 }
 
 /* ----------------------------------------------------------------------
    return pairwise interaction energy between atoms I,J
    will always be full non-bond interaction, so factors = 1 in single() call
 ------------------------------------------------------------------------- */
 
 double FixBondSwap::pair_eng(int i, int j)
 {
   double tmp;
   double rsq = dist_rsq(i,j);
   return force->pair->single(i,j,type[i],type[j],rsq,1.0,1.0,tmp);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double FixBondSwap::bond_eng(int btype, int i, int j)
 {
   double tmp;
   double rsq = dist_rsq(i,j);
   return force->bond->single(btype,rsq,i,j,tmp);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double FixBondSwap::angle_eng(int atype, int i, int j, int k)
 {
   // test for non-existent angle at end of chain
 
   if (i == -1 || k == -1) return 0.0;
   return force->angle->single(atype,i,j,k);
 }
 
 /* ----------------------------------------------------------------------
    return bond swapping stats
    n = 1 is # of swaps
    n = 2 is # of attempted swaps
 ------------------------------------------------------------------------- */
 
 double FixBondSwap::compute_vector(int n)
 {
   double one,all;
   if (n == 0) one = naccept;
   else one = foursome;
   MPI_Allreduce(&one,&all,1,MPI_DOUBLE,MPI_SUM,world);
   return all;
 }
 
 /* ----------------------------------------------------------------------
    memory usage of alist
 ------------------------------------------------------------------------- */
 
 double FixBondSwap::memory_usage()
 {
   double bytes = nmax * sizeof(int);
   return bytes;
 }
diff --git a/src/MEAM/pair_meam.cpp b/src/MEAM/pair_meam.cpp
index 2c5bef046..f3fbe0a70 100644
--- a/src/MEAM/pair_meam.cpp
+++ b/src/MEAM/pair_meam.cpp
@@ -1,946 +1,946 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Greg Wagner (SNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_meam.h"
 #include "atom.h"
 #include "force.h"
 #include "comm.h"
 #include "memory.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
 #define MAXLINE 1024
 
 enum{FCC,BCC,HCP,DIM,DIAMOND,B1,C11,L12,B2};
 int nkeywords = 21;
 const char *keywords[] = {"Ec","alpha","rho0","delta","lattce",
                           "attrac","repuls","nn2","Cmin","Cmax","rc","delr",
                           "augt1","gsmooth_factor","re","ialloy",
                           "mixture_ref_t","erose_form","zbl",
                           "emb_lin_neg","bkgd_dyn"};
 
 /* ---------------------------------------------------------------------- */
 
 PairMEAM::PairMEAM(LAMMPS *lmp) : Pair(lmp)
 {
   single_enable = 0;
   restartinfo = 0;
   one_coeff = 1;
   manybody_flag = 1;
 
   nmax = 0;
   rho = rho0 = rho1 = rho2 = rho3 = frhop = NULL;
   gamma = dgamma1 = dgamma2 = dgamma3 = arho2b = NULL;
   arho1 = arho2 = arho3 = arho3b = t_ave = tsq_ave = NULL;
 
   maxneigh = 0;
   scrfcn = dscrfcn = fcpair = NULL;
 
   nelements = 0;
   elements = NULL;
   mass = NULL;
 
   // set comm size needed by this Pair
 
   comm_forward = 38;
   comm_reverse = 30;
 }
 
 /* ----------------------------------------------------------------------
    free all arrays
    check if allocated, since class can be destructed when incomplete
 ------------------------------------------------------------------------- */
 
 PairMEAM::~PairMEAM()
 {
   meam_cleanup_();
 
   memory->destroy(rho);
   memory->destroy(rho0);
   memory->destroy(rho1);
   memory->destroy(rho2);
   memory->destroy(rho3);
   memory->destroy(frhop);
   memory->destroy(gamma);
   memory->destroy(dgamma1);
   memory->destroy(dgamma2);
   memory->destroy(dgamma3);
   memory->destroy(arho2b);
 
   memory->destroy(arho1);
   memory->destroy(arho2);
   memory->destroy(arho3);
   memory->destroy(arho3b);
   memory->destroy(t_ave);
   memory->destroy(tsq_ave);
 
   memory->destroy(scrfcn);
   memory->destroy(dscrfcn);
   memory->destroy(fcpair);
 
   for (int i = 0; i < nelements; i++) delete [] elements[i];
   delete [] elements;
   delete [] mass;
 
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
     delete [] map;
     delete [] fmap;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairMEAM::compute(int eflag, int vflag)
 {
   int i,j,ii,n,inum_half,errorflag;
   int *ilist_half,*numneigh_half,**firstneigh_half;
   int *numneigh_full,**firstneigh_full;
 
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = eflag_global = vflag_global =
          eflag_atom = vflag_atom = 0;
 
   // grow local arrays if necessary
 
   if (atom->nmax > nmax) {
     memory->destroy(rho);
     memory->destroy(rho0);
     memory->destroy(rho1);
     memory->destroy(rho2);
     memory->destroy(rho3);
     memory->destroy(frhop);
     memory->destroy(gamma);
     memory->destroy(dgamma1);
     memory->destroy(dgamma2);
     memory->destroy(dgamma3);
     memory->destroy(arho2b);
     memory->destroy(arho1);
     memory->destroy(arho2);
     memory->destroy(arho3);
     memory->destroy(arho3b);
     memory->destroy(t_ave);
     memory->destroy(tsq_ave);
 
     nmax = atom->nmax;
 
     memory->create(rho,nmax,"pair:rho");
     memory->create(rho0,nmax,"pair:rho0");
     memory->create(rho1,nmax,"pair:rho1");
     memory->create(rho2,nmax,"pair:rho2");
     memory->create(rho3,nmax,"pair:rho3");
     memory->create(frhop,nmax,"pair:frhop");
     memory->create(gamma,nmax,"pair:gamma");
     memory->create(dgamma1,nmax,"pair:dgamma1");
     memory->create(dgamma2,nmax,"pair:dgamma2");
     memory->create(dgamma3,nmax,"pair:dgamma3");
     memory->create(arho2b,nmax,"pair:arho2b");
     memory->create(arho1,nmax,3,"pair:arho1");
     memory->create(arho2,nmax,6,"pair:arho2");
     memory->create(arho3,nmax,10,"pair:arho3");
     memory->create(arho3b,nmax,3,"pair:arho3b");
     memory->create(t_ave,nmax,3,"pair:t_ave");
     memory->create(tsq_ave,nmax,3,"pair:tsq_ave");
   }
 
   // neighbor list info
 
   inum_half = listhalf->inum;
   ilist_half = listhalf->ilist;
   numneigh_half = listhalf->numneigh;
   firstneigh_half = listhalf->firstneigh;
   numneigh_full = listfull->numneigh;
   firstneigh_full = listfull->firstneigh;
 
   // strip neighbor lists of any special bond flags before using with MEAM
   // necessary before doing neigh_f2c and neigh_c2f conversions each step
 
   if (neighbor->ago == 0) {
     neigh_strip(inum_half,ilist_half,numneigh_half,firstneigh_half);
     neigh_strip(inum_half,ilist_half,numneigh_full,firstneigh_full);
   }
 
   // check size of scrfcn based on half neighbor list
 
   int nlocal = atom->nlocal;
   int nall = nlocal + atom->nghost;
 
   n = 0;
   for (ii = 0; ii < inum_half; ii++) n += numneigh_half[ilist_half[ii]];
 
   if (n > maxneigh) {
     memory->destroy(scrfcn);
     memory->destroy(dscrfcn);
     memory->destroy(fcpair);
     maxneigh = n;
     memory->create(scrfcn,maxneigh,"pair:scrfcn");
     memory->create(dscrfcn,maxneigh,"pair:dscrfcn");
     memory->create(fcpair,maxneigh,"pair:fcpair");
   }
 
   // zero out local arrays
 
   for (i = 0; i < nall; i++) {
     rho0[i] = 0.0;
     arho2b[i] = 0.0;
     arho1[i][0] = arho1[i][1] = arho1[i][2] = 0.0;
     for (j = 0; j < 6; j++) arho2[i][j] = 0.0;
     for (j = 0; j < 10; j++) arho3[i][j] = 0.0;
     arho3b[i][0] = arho3b[i][1] = arho3b[i][2] = 0.0;
     t_ave[i][0] = t_ave[i][1] = t_ave[i][2] = 0.0;
     tsq_ave[i][0] = tsq_ave[i][1] = tsq_ave[i][2] = 0.0;
   }
 
   double **x = atom->x;
   double **f = atom->f;
   int *type = atom->type;
   int ntype = atom->ntypes;
 
   // change neighbor list indices to Fortran indexing
 
   neigh_c2f(inum_half,ilist_half,numneigh_half,firstneigh_half);
   neigh_c2f(inum_half,ilist_half,numneigh_full,firstneigh_full);
 
   // 3 stages of MEAM calculation
   // loop over my atoms followed by communication
 
   int ifort;
   int offset = 0;
   errorflag = 0;
 
   for (ii = 0; ii < inum_half; ii++) {
     i = ilist_half[ii];
     ifort = i+1;
     meam_dens_init_(&ifort,&nmax,&ntype,type,fmap,&x[0][0],
                     &numneigh_half[i],firstneigh_half[i],
                     &numneigh_full[i],firstneigh_full[i],
                     &scrfcn[offset],&dscrfcn[offset],&fcpair[offset],
                     rho0,&arho1[0][0],&arho2[0][0],arho2b,
                     &arho3[0][0],&arho3b[0][0],&t_ave[0][0],&tsq_ave[0][0],
                     &errorflag);
     if (errorflag) {
       char str[128];
       sprintf(str,"MEAM library error %d",errorflag);
       error->one(FLERR,str);
     }
     offset += numneigh_half[i];
   }
 
   comm->reverse_comm_pair(this);
 
   meam_dens_final_(&nlocal,&nmax,&eflag_either,&eflag_global,&eflag_atom,
                    &eng_vdwl,eatom,&ntype,type,fmap,
                    &arho1[0][0],&arho2[0][0],arho2b,&arho3[0][0],
                    &arho3b[0][0],&t_ave[0][0],&tsq_ave[0][0],gamma,dgamma1,
                    dgamma2,dgamma3,rho,rho0,rho1,rho2,rho3,frhop,&errorflag);
   if (errorflag) {
     char str[128];
     sprintf(str,"MEAM library error %d",errorflag);
     error->one(FLERR,str);
   }
 
   comm->forward_comm_pair(this);
 
   offset = 0;
 
   // vptr is first value in vatom if it will be used by meam_force()
   // else vatom may not exist, so pass dummy ptr
 
   double *vptr;
   if (vflag_atom) vptr = &vatom[0][0];
   else vptr = &cutmax;
 
   for (ii = 0; ii < inum_half; ii++) {
     i = ilist_half[ii];
     ifort = i+1;
     meam_force_(&ifort,&nmax,&eflag_either,&eflag_global,&eflag_atom,
                 &vflag_atom,&eng_vdwl,eatom,&ntype,type,fmap,&x[0][0],
                 &numneigh_half[i],firstneigh_half[i],
                 &numneigh_full[i],firstneigh_full[i],
                 &scrfcn[offset],&dscrfcn[offset],&fcpair[offset],
                 dgamma1,dgamma2,dgamma3,rho0,rho1,rho2,rho3,frhop,
                 &arho1[0][0],&arho2[0][0],arho2b,&arho3[0][0],&arho3b[0][0],
                 &t_ave[0][0],&tsq_ave[0][0],&f[0][0],vptr,&errorflag);
     if (errorflag) {
       char str[128];
       sprintf(str,"MEAM library error %d",errorflag);
       error->one(FLERR,str);
     }
     offset += numneigh_half[i];
   }
 
   // change neighbor list indices back to C indexing
 
   neigh_f2c(inum_half,ilist_half,numneigh_half,firstneigh_half);
   neigh_f2c(inum_half,ilist_half,numneigh_full,firstneigh_full);
 
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairMEAM::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
   map = new int[n+1];
   fmap = new int[n];
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairMEAM::settings(int narg, char **arg)
 {
   if (narg != 0) error->all(FLERR,"Illegal pair_style command");
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairMEAM::coeff(int narg, char **arg)
 {
   int i,j,m,n;
 
   if (!allocated) allocate();
 
   if (narg < 6) error->all(FLERR,"Incorrect args for pair coefficients");
 
   // insure I,J args are * *
 
   if (strcmp(arg[0],"*") != 0 || strcmp(arg[1],"*") != 0)
     error->all(FLERR,"Incorrect args for pair coefficients");
 
   // read MEAM element names between 2 filenames
   // nelements = # of MEAM elements
   // elements = list of unique element names
 
   if (nelements) {
     for (i = 0; i < nelements; i++) delete [] elements[i];
     delete [] elements;
     delete [] mass;
   }
   nelements = narg - 4 - atom->ntypes;
   if (nelements < 1) error->all(FLERR,"Incorrect args for pair coefficients");
   elements = new char*[nelements];
   mass = new double[nelements];
 
   for (i = 0; i < nelements; i++) {
     n = strlen(arg[i+3]) + 1;
     elements[i] = new char[n];
     strcpy(elements[i],arg[i+3]);
   }
 
   // read MEAM library and parameter files
   // pass all parameters to MEAM package
   // tell MEAM package that setup is done
 
   read_files(arg[2],arg[2+nelements+1]);
   meam_setup_done_(&cutmax);
 
   // read args that map atom types to MEAM elements
   // map[i] = which element the Ith atom type is, -1 if not mapped
 
   for (i = 4 + nelements; i < narg; i++) {
     m = i - (4+nelements) + 1;
     for (j = 0; j < nelements; j++)
       if (strcmp(arg[i],elements[j]) == 0) break;
     if (j < nelements) map[m] = j;
     else if (strcmp(arg[i],"NULL") == 0) map[m] = -1;
     else error->all(FLERR,"Incorrect args for pair coefficients");
   }
 
   // clear setflag since coeff() called once with I,J = * *
 
   n = atom->ntypes;
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   // set setflag i,j for type pairs where both are mapped to elements
   // set mass for i,i in atom class
 
   int count = 0;
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       if (map[i] >= 0 && map[j] >= 0) {
         setflag[i][j] = 1;
         if (i == j) atom->set_mass(i,mass[map[i]]);
         count++;
       }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairMEAM::init_style()
 {
   if (force->newton_pair == 0)
     error->all(FLERR,"Pair style MEAM requires newton pair on");
 
   // need full and half neighbor list
 
-  int irequest_full = neighbor->request(this);
+  int irequest_full = neighbor->request(this,instance_me);
   neighbor->requests[irequest_full]->id = 1;
   neighbor->requests[irequest_full]->half = 0;
   neighbor->requests[irequest_full]->full = 1;
-  int irequest_half = neighbor->request(this);
+  int irequest_half = neighbor->request(this,instance_me);
   neighbor->requests[irequest_half]->id = 2;
   neighbor->requests[irequest_half]->half = 0;
   neighbor->requests[irequest_half]->half_from_full = 1;
   neighbor->requests[irequest_half]->otherlist = irequest_full;
 
   // setup Fortran-style mapping array needed by MEAM package
   // fmap is indexed from 1:ntypes by Fortran and stores a Fortran index
   // if type I is not a MEAM atom, fmap stores a 0
 
   for (int i = 1; i <= atom->ntypes; i++) fmap[i-1] = map[i] + 1;
 }
 
 /* ----------------------------------------------------------------------
    neighbor callback to inform pair style of neighbor list to use
    half or full
 ------------------------------------------------------------------------- */
 
 void PairMEAM::init_list(int id, NeighList *ptr)
 {
   if (id == 1) listfull = ptr;
   else if (id == 2) listhalf = ptr;
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairMEAM::init_one(int i, int j)
 {
   return cutmax;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairMEAM::read_files(char *globalfile, char *userfile)
 {
   // open global meamf file on proc 0
 
   FILE *fp;
   if (comm->me == 0) {
     fp = force->open_potential(globalfile);
     if (fp == NULL) {
       char str[128];
       sprintf(str,"Cannot open MEAM potential file %s",globalfile);
       error->one(FLERR,str);
     }
   }
 
   // allocate parameter arrays
 
   int params_per_line = 19;
 
   int *lat = new int[nelements];
   int *ielement = new int[nelements];
   int *ibar = new int[nelements];
   double *z = new double[nelements];
   double *atwt = new double[nelements];
   double *alpha = new double[nelements];
   double *b0 = new double[nelements];
   double *b1 = new double[nelements];
   double *b2 = new double[nelements];
   double *b3 = new double[nelements];
   double *alat = new double[nelements];
   double *esub = new double[nelements];
   double *asub = new double[nelements];
   double *t0 = new double[nelements];
   double *t1 = new double[nelements];
   double *t2 = new double[nelements];
   double *t3 = new double[nelements];
   double *rozero = new double[nelements];
 
   bool *found = new bool[nelements];
   for (int i = 0; i < nelements; i++) found[i] = false;
 
   // read each set of params from global MEAM file
   // one set of params can span multiple lines
   // store params if element name is in element list
   // if element name appears multiple times, only store 1st entry
 
   int i,n,nwords;
   char **words = new char*[params_per_line+1];
   char line[MAXLINE],*ptr;
   int eof = 0;
 
   int nset = 0;
   while (1) {
     if (comm->me == 0) {
       ptr = fgets(line,MAXLINE,fp);
       if (ptr == NULL) {
         eof = 1;
         fclose(fp);
       } else n = strlen(line) + 1;
     }
     MPI_Bcast(&eof,1,MPI_INT,0,world);
     if (eof) break;
     MPI_Bcast(&n,1,MPI_INT,0,world);
     MPI_Bcast(line,n,MPI_CHAR,0,world);
 
     // strip comment, skip line if blank
 
     if ((ptr = strchr(line,'#'))) *ptr = '\0';
     nwords = atom->count_words(line);
     if (nwords == 0) continue;
 
     // concatenate additional lines until have params_per_line words
 
     while (nwords < params_per_line) {
       n = strlen(line);
       if (comm->me == 0) {
         ptr = fgets(&line[n],MAXLINE-n,fp);
         if (ptr == NULL) {
           eof = 1;
           fclose(fp);
         } else n = strlen(line) + 1;
       }
       MPI_Bcast(&eof,1,MPI_INT,0,world);
       if (eof) break;
       MPI_Bcast(&n,1,MPI_INT,0,world);
       MPI_Bcast(line,n,MPI_CHAR,0,world);
       if ((ptr = strchr(line,'#'))) *ptr = '\0';
       nwords = atom->count_words(line);
     }
 
     if (nwords != params_per_line)
       error->all(FLERR,"Incorrect format in MEAM potential file");
 
     // words = ptrs to all words in line
     // strip single and double quotes from words
 
     nwords = 0;
     words[nwords++] = strtok(line,"' \t\n\r\f");
     while ((words[nwords++] = strtok(NULL,"' \t\n\r\f"))) continue;
 
     // skip if element name isn't in element list
 
     for (i = 0; i < nelements; i++)
       if (strcmp(words[0],elements[i]) == 0) break;
     if (i == nelements) continue;
 
     // skip if element already appeared
 
     if (found[i] == true) continue;
     found[i] = true;
 
     // map lat string to an integer
 
     if (strcmp(words[1],"fcc") == 0) lat[i] = FCC;
     else if (strcmp(words[1],"bcc") == 0) lat[i] = BCC;
     else if (strcmp(words[1],"hcp") == 0) lat[i] = HCP;
     else if (strcmp(words[1],"dim") == 0) lat[i] = DIM;
     else if (strcmp(words[1],"dia") == 0) lat[i] = DIAMOND;
     else error->all(FLERR,"Unrecognized lattice type in MEAM file 1");
 
     // store parameters
 
     z[i] = atof(words[2]);
     ielement[i] = atoi(words[3]);
     atwt[i] = atof(words[4]);
     alpha[i] = atof(words[5]);
     b0[i] = atof(words[6]);
     b1[i] = atof(words[7]);
     b2[i] = atof(words[8]);
     b3[i] = atof(words[9]);
     alat[i] = atof(words[10]);
     esub[i] = atof(words[11]);
     asub[i] = atof(words[12]);
     t0[i] = atof(words[13]);
     t1[i] = atof(words[14]);
     t2[i] = atof(words[15]);
     t3[i] = atof(words[16]);
     rozero[i] = atof(words[17]);
     ibar[i] = atoi(words[18]);
 
     nset++;
   }
 
   // error if didn't find all elements in file
 
   if (nset != nelements)
     error->all(FLERR,"Did not find all elements in MEAM library file");
 
   // pass element parameters to MEAM package
 
   meam_setup_global_(&nelements,lat,z,ielement,atwt,alpha,b0,b1,b2,b3,
                        alat,esub,asub,t0,t1,t2,t3,rozero,ibar);
 
   // set element masses
 
   for (i = 0; i < nelements; i++) mass[i] = atwt[i];
 
   // clean-up memory
 
   delete [] words;
 
   delete [] lat;
   delete [] ielement;
   delete [] ibar;
   delete [] z;
   delete [] atwt;
   delete [] alpha;
   delete [] b0;
   delete [] b1;
   delete [] b2;
   delete [] b3;
   delete [] alat;
   delete [] esub;
   delete [] asub;
   delete [] t0;
   delete [] t1;
   delete [] t2;
   delete [] t3;
   delete [] rozero;
   delete [] found;
 
   // done if user param file is NULL
 
   if (strcmp(userfile,"NULL") == 0) return;
 
   // open user param file on proc 0
 
   if (comm->me == 0) {
     fp = force->open_potential(userfile);
     if (fp == NULL) {
       char str[128];
       sprintf(str,"Cannot open MEAM potential file %s",userfile);
       error->one(FLERR,str);
     }
   }
 
   // read settings
   // pass them one at a time to MEAM package
   // match strings to list of corresponding ints
 
   int which;
   double value;
   int nindex,index[3];
   int maxparams = 6;
   char **params = new char*[maxparams];
   int nparams;
 
   eof = 0;
   while (1) {
     if (comm->me == 0) {
       ptr = fgets(line,MAXLINE,fp);
       if (ptr == NULL) {
         eof = 1;
         fclose(fp);
       } else n = strlen(line) + 1;
     }
     MPI_Bcast(&eof,1,MPI_INT,0,world);
     if (eof) break;
     MPI_Bcast(&n,1,MPI_INT,0,world);
     MPI_Bcast(line,n,MPI_CHAR,0,world);
 
     // strip comment, skip line if blank
 
     if ((ptr = strchr(line,'#'))) *ptr = '\0';
     nparams = atom->count_words(line);
     if (nparams == 0) continue;
 
     // words = ptrs to all words in line
 
     nparams = 0;
     params[nparams++] = strtok(line,"=(), '\t\n\r\f");
     while (nparams < maxparams &&
            (params[nparams++] = strtok(NULL,"=(), '\t\n\r\f")))
       continue;
     nparams--;
 
     for (which = 0; which < nkeywords; which++)
       if (strcmp(params[0],keywords[which]) == 0) break;
     if (which == nkeywords) {
       char str[128];
       sprintf(str,"Keyword %s in MEAM parameter file not recognized",
               params[0]);
       error->all(FLERR,str);
     }
     nindex = nparams - 2;
     for (i = 0; i < nindex; i++) index[i] = atoi(params[i+1]);
 
     // map lattce_meam value to an integer
 
     if (which == 4) {
       if (strcmp(params[nparams-1],"fcc") == 0) value = FCC;
       else if (strcmp(params[nparams-1],"bcc") == 0) value = BCC;
       else if (strcmp(params[nparams-1],"hcp") == 0) value = HCP;
       else if (strcmp(params[nparams-1],"dim") == 0) value = DIM;
       else if (strcmp(params[nparams-1],"dia") == 0) value = DIAMOND;
       else if (strcmp(params[nparams-1],"b1")  == 0) value = B1;
       else if (strcmp(params[nparams-1],"c11") == 0) value = C11;
       else if (strcmp(params[nparams-1],"l12") == 0) value = L12;
       else if (strcmp(params[nparams-1],"b2")  == 0) value = B2;
       else error->all(FLERR,"Unrecognized lattice type in MEAM file 2");
     }
     else value = atof(params[nparams-1]);
 
     // pass single setting to MEAM package
 
     int errorflag = 0;
     meam_setup_param_(&which,&value,&nindex,index,&errorflag);
     if (errorflag) {
       char str[128];
       sprintf(str,"MEAM library error %d",errorflag);
       error->all(FLERR,str);
     }
   }
 
   delete [] params;
 }
 
 /* ---------------------------------------------------------------------- */
 
 int PairMEAM::pack_forward_comm(int n, int *list, double *buf, 
                                 int pbc_flag, int *pbc)
 {
   int i,j,k,m;
 
   m = 0;
   for (i = 0; i < n; i++) {
     j = list[i];
     buf[m++] = rho0[j];
     buf[m++] = rho1[j];
     buf[m++] = rho2[j];
     buf[m++] = rho3[j];
     buf[m++] = frhop[j];
     buf[m++] = gamma[j];
     buf[m++] = dgamma1[j];
     buf[m++] = dgamma2[j];
     buf[m++] = dgamma3[j];
     buf[m++] = arho2b[j];
     buf[m++] = arho1[j][0];
     buf[m++] = arho1[j][1];
     buf[m++] = arho1[j][2];
     buf[m++] = arho2[j][0];
     buf[m++] = arho2[j][1];
     buf[m++] = arho2[j][2];
     buf[m++] = arho2[j][3];
     buf[m++] = arho2[j][4];
     buf[m++] = arho2[j][5];
     for (k = 0; k < 10; k++) buf[m++] = arho3[j][k];
     buf[m++] = arho3b[j][0];
     buf[m++] = arho3b[j][1];
     buf[m++] = arho3b[j][2];
     buf[m++] = t_ave[j][0];
     buf[m++] = t_ave[j][1];
     buf[m++] = t_ave[j][2];
     buf[m++] = tsq_ave[j][0];
     buf[m++] = tsq_ave[j][1];
     buf[m++] = tsq_ave[j][2];
   }
 
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairMEAM::unpack_forward_comm(int n, int first, double *buf)
 {
   int i,k,m,last;
 
   m = 0;
   last = first + n;
   for (i = first; i < last; i++) {
     rho0[i] = buf[m++];
     rho1[i] = buf[m++];
     rho2[i] = buf[m++];
     rho3[i] = buf[m++];
     frhop[i] = buf[m++];
     gamma[i] = buf[m++];
     dgamma1[i] = buf[m++];
     dgamma2[i] = buf[m++];
     dgamma3[i] = buf[m++];
     arho2b[i] = buf[m++];
     arho1[i][0] = buf[m++];
     arho1[i][1] = buf[m++];
     arho1[i][2] = buf[m++];
     arho2[i][0] = buf[m++];
     arho2[i][1] = buf[m++];
     arho2[i][2] = buf[m++];
     arho2[i][3] = buf[m++];
     arho2[i][4] = buf[m++];
     arho2[i][5] = buf[m++];
     for (k = 0; k < 10; k++) arho3[i][k] = buf[m++];
     arho3b[i][0] = buf[m++];
     arho3b[i][1] = buf[m++];
     arho3b[i][2] = buf[m++];
     t_ave[i][0] = buf[m++];
     t_ave[i][1] = buf[m++];
     t_ave[i][2] = buf[m++];
     tsq_ave[i][0] = buf[m++];
     tsq_ave[i][1] = buf[m++];
     tsq_ave[i][2] = buf[m++];
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 int PairMEAM::pack_reverse_comm(int n, int first, double *buf)
 {
   int i,k,m,last;
 
   m = 0;
   last = first + n;
   for (i = first; i < last; i++) {
     buf[m++] = rho0[i];
     buf[m++] = arho2b[i];
     buf[m++] = arho1[i][0];
     buf[m++] = arho1[i][1];
     buf[m++] = arho1[i][2];
     buf[m++] = arho2[i][0];
     buf[m++] = arho2[i][1];
     buf[m++] = arho2[i][2];
     buf[m++] = arho2[i][3];
     buf[m++] = arho2[i][4];
     buf[m++] = arho2[i][5];
     for (k = 0; k < 10; k++) buf[m++] = arho3[i][k];
     buf[m++] = arho3b[i][0];
     buf[m++] = arho3b[i][1];
     buf[m++] = arho3b[i][2];
     buf[m++] = t_ave[i][0];
     buf[m++] = t_ave[i][1];
     buf[m++] = t_ave[i][2];
     buf[m++] = tsq_ave[i][0];
     buf[m++] = tsq_ave[i][1];
     buf[m++] = tsq_ave[i][2];
   }
 
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairMEAM::unpack_reverse_comm(int n, int *list, double *buf)
 {
   int i,j,k,m;
 
   m = 0;
   for (i = 0; i < n; i++) {
     j = list[i];
     rho0[j] += buf[m++];
     arho2b[j] += buf[m++];
     arho1[j][0] += buf[m++];
     arho1[j][1] += buf[m++];
     arho1[j][2] += buf[m++];
     arho2[j][0] += buf[m++];
     arho2[j][1] += buf[m++];
     arho2[j][2] += buf[m++];
     arho2[j][3] += buf[m++];
     arho2[j][4] += buf[m++];
     arho2[j][5] += buf[m++];
     for (k = 0; k < 10; k++) arho3[j][k] += buf[m++];
     arho3b[j][0] += buf[m++];
     arho3b[j][1] += buf[m++];
     arho3b[j][2] += buf[m++];
     t_ave[j][0] += buf[m++];
     t_ave[j][1] += buf[m++];
     t_ave[j][2] += buf[m++];
     tsq_ave[j][0] += buf[m++];
     tsq_ave[j][1] += buf[m++];
     tsq_ave[j][2] += buf[m++];
   }
 }
 
 /* ----------------------------------------------------------------------
    memory usage of local atom-based arrays
 ------------------------------------------------------------------------- */
 
 double PairMEAM::memory_usage()
 {
   double bytes = 11 * nmax * sizeof(double);
   bytes += (3 + 6 + 10 + 3 + 3 + 3) * nmax * sizeof(double);
   bytes += 3 * maxneigh * sizeof(double);
   return bytes;
 }
 
 /* ----------------------------------------------------------------------
    strip special bond flags from neighbor list entries
    are not used with MEAM
    need to do here so Fortran lib doesn't see them
    done once per reneighbor so that neigh_f2c and neigh_c2f don't see them
 ------------------------------------------------------------------------- */
 
 void PairMEAM::neigh_strip(int inum, int *ilist,
                            int *numneigh, int **firstneigh)
 {
   int i,j,ii,jnum;
   int *jlist;
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     jlist = firstneigh[i];
     jnum = numneigh[i];
     for (j = 0; j < jnum; j++) jlist[j] &= NEIGHMASK;
   }
 }
 
 /* ----------------------------------------------------------------------
    toggle neighbor list indices between zero- and one-based values
    needed for access by MEAM Fortran library
 ------------------------------------------------------------------------- */
 
 void PairMEAM::neigh_f2c(int inum, int *ilist, int *numneigh, int **firstneigh)
 {
   int i,j,ii,jnum;
   int *jlist;
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     jlist = firstneigh[i];
     jnum = numneigh[i];
     for (j = 0; j < jnum; j++) jlist[j]--;
   }
 }
 
 void PairMEAM::neigh_c2f(int inum, int *ilist, int *numneigh, int **firstneigh)
 {
   int i,j,ii,jnum;
   int *jlist;
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     jlist = firstneigh[i];
     jnum = numneigh[i];
     for (j = 0; j < jnum; j++) jlist[j]++;
   }
 }
diff --git a/src/MISC/fix_orient_fcc.cpp b/src/MISC/fix_orient_fcc.cpp
index 1813b5dab..b636af290 100644
--- a/src/MISC/fix_orient_fcc.cpp
+++ b/src/MISC/fix_orient_fcc.cpp
@@ -1,603 +1,603 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing authors: Koenraad Janssens and David Olmsted (SNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "string.h"
 #include "stdlib.h"
 #include "mpi.h"
 #include "fix_orient_fcc.h"
 #include "atom.h"
 #include "update.h"
 #include "respa.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "comm.h"
 #include "output.h"
 #include "force.h"
 #include "math_const.h"
 #include "citeme.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 using namespace FixConst;
 using namespace MathConst;
 
 #define BIG 1000000000
 
 static const char cite_fix_orient_fcc[] =
   "fix orient/fcc command:\n\n"
   "@Article{Janssens06,\n"
   " author = {K. G. F. Janssens, D. Olmsted, E.A. Holm, S. M. Foiles, S. J. Plimpton, and P. M. Derlet},\n"
   " title = {Computing the Mobility of Grain Boundaries},\n"
   " journal = {Nature Materials},\n"
   " year =    2006,\n"
   " volume =  5,\n"
   " pages =   {124--127}\n"
   "}\n\n";
 
 /* ---------------------------------------------------------------------- */
 
 FixOrientFCC::FixOrientFCC(LAMMPS *lmp, int narg, char **arg) :
   Fix(lmp, narg, arg)
 {
   if (lmp->citeme) lmp->citeme->add(cite_fix_orient_fcc);
 
   MPI_Comm_rank(world,&me);
 
   if (narg != 11) error->all(FLERR,"Illegal fix orient/fcc command");
 
   scalar_flag = 1;
   global_freq = 1;
   extscalar = 1;
 
   peratom_flag = 1;
   size_peratom_cols = 2;
   peratom_freq = 1;
 
   nstats = force->inumeric(FLERR,arg[3]);
   direction_of_motion = force->inumeric(FLERR,arg[4]);
   a = force->numeric(FLERR,arg[5]);
   Vxi = force->numeric(FLERR,arg[6]);
   uxif_low = force->numeric(FLERR,arg[7]);
   uxif_high = force->numeric(FLERR,arg[8]);
 
   if (direction_of_motion == 0) {
     int n = strlen(arg[9]) + 1;
     chifilename = new char[n];
     strcpy(chifilename,arg[9]);
     n = strlen(arg[10]) + 1;
     xifilename = new char[n];
     strcpy(xifilename,arg[10]);
   } else if (direction_of_motion == 1) {
     int n = strlen(arg[9]) + 1;
     xifilename = new char[n];
     strcpy(xifilename,arg[9]);
     n = strlen(arg[10]) + 1;
     chifilename = new char[n];
     strcpy(chifilename,arg[10]);
   } else error->all(FLERR,"Illegal fix orient/fcc command");
 
   // initializations
 
   half_fcc_nn = 6;
   use_xismooth = false;
   double xicutoff = 1.57;
   xicutoffsq = xicutoff * xicutoff;
   cutsq = 0.5 * a*a*xicutoffsq;
   nmax = 0;
 
   // read xi and chi reference orientations from files
 
   if (me == 0) {
     char line[IMGMAX];
     char *result;
     int count;
 
     FILE *infile = fopen(xifilename,"r");
     if (infile == NULL) error->one(FLERR,"Fix orient/fcc file open failed");
     for (int i = 0; i < 6; i++) {
       result = fgets(line,IMGMAX,infile);
       if (!result) error->one(FLERR,"Fix orient/fcc file read failed");
       count = sscanf(line,"%lg %lg %lg",&Rxi[i][0],&Rxi[i][1],&Rxi[i][2]);
       if (count != 3) error->one(FLERR,"Fix orient/fcc file read failed");
     }
     fclose(infile);
 
     infile = fopen(chifilename,"r");
     if (infile == NULL) error->one(FLERR,"Fix orient/fcc file open failed");
     for (int i = 0; i < 6; i++) {
       result = fgets(line,IMGMAX,infile);
       if (!result) error->one(FLERR,"Fix orient/fcc file read failed");
       count = sscanf(line,"%lg %lg %lg",&Rchi[i][0],&Rchi[i][1],&Rchi[i][2]);
       if (count != 3) error->one(FLERR,"Fix orient/fcc file read failed");
     }
     fclose(infile);
   }
 
   MPI_Bcast(&Rxi[0][0],18,MPI_DOUBLE,0,world);
   MPI_Bcast(&Rchi[0][0],18,MPI_DOUBLE,0,world);
 
   // make copy of the reference vectors
 
   for (int i = 0; i < 6; i++)
     for (int j = 0; j < 3; j++) {
       half_xi_chi_vec[0][i][j] = Rxi[i][j];
       half_xi_chi_vec[1][i][j] = Rchi[i][j];
     }
 
   // compute xiid,xi0,xi1 for all 12 neighbors
   // xi is the favored crystal
   // want order parameter when actual is Rchi
 
   double xi_sq,dxi[3],rchi[3];
 
   xiid = 0.0;
   for (int i = 0; i < 6; i++) {
     rchi[0] = Rchi[i][0];
     rchi[1] = Rchi[i][1];
     rchi[2] = Rchi[i][2];
     find_best_ref(rchi,0,xi_sq,dxi);
     xiid += sqrt(xi_sq);
     for (int j = 0; j < 3; j++) rchi[j] = -rchi[j];
     find_best_ref(rchi,0,xi_sq,dxi);
     xiid += sqrt(xi_sq);
   }
 
   xiid /= 12.0;
   xi0 = uxif_low * xiid;
   xi1 = uxif_high * xiid;
 
   // set comm size needed by this Fix
   // NOTE: doesn't seem that use_xismooth is ever true
 
   if (use_xismooth) comm_forward = 62;
   else comm_forward = 50;
 
   added_energy = 0.0;
 
   nmax = atom->nmax;
   nbr = (Nbr *) memory->smalloc(nmax*sizeof(Nbr),"orient/fcc:nbr");
   memory->create(order,nmax,2,"orient/fcc:order");
   array_atom = order;
 
   // zero the array since a variable may access it before first run
 
   int nlocal = atom->nlocal;
   for (int i = 0; i < nlocal; i++) order[i][0] = order[i][1] = 0.0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 FixOrientFCC::~FixOrientFCC()
 {
   delete [] xifilename;
   delete [] chifilename;
   memory->sfree(nbr);
   memory->destroy(order);
 }
 
 /* ---------------------------------------------------------------------- */
 
 int FixOrientFCC::setmask()
 {
   int mask = 0;
   mask |= POST_FORCE;
   mask |= THERMO_ENERGY;
   mask |= POST_FORCE_RESPA;
   return mask;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixOrientFCC::init()
 {
   if (strstr(update->integrate_style,"respa"))
     nlevels_respa = ((Respa *) update->integrate)->nlevels;
 
   // need a full neighbor list, built whenever re-neighboring occurs
 
-  int irequest = neighbor->request(this);
+  int irequest = neighbor->request(this,instance_me);
   neighbor->requests[irequest]->pair = 0;
   neighbor->requests[irequest]->fix = 1;
   neighbor->requests[irequest]->half = 0;
   neighbor->requests[irequest]->full = 1;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixOrientFCC::init_list(int id, NeighList *ptr)
 {
   list = ptr;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixOrientFCC::setup(int vflag)
 {
   if (strstr(update->integrate_style,"verlet"))
     post_force(vflag);
   else {
     ((Respa *) update->integrate)->copy_flevel_f(nlevels_respa-1);
     post_force_respa(vflag,nlevels_respa-1,0);
     ((Respa *) update->integrate)->copy_f_flevel(nlevels_respa-1);
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixOrientFCC::post_force(int vflag)
 {
   int i,j,k,ii,jj,inum,jnum,m,n,nn,nsort;
   tagint id_self;
   int *ilist,*jlist,*numneigh,**firstneigh;
   double edelta,omega;
   double dx,dy,dz,rsq,xismooth,xi_sq,duxi,duxi_other;
   double dxi[3];
   double *dxiptr;
   bool found_myself;
 
   // set local ptrs
 
   double **x = atom->x;
   double **f = atom->f;
   int *mask = atom->mask;
   tagint *tag = atom->tag;
   int nlocal = atom->nlocal;
   int nall = atom->nlocal + atom->nghost;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // insure nbr and order data structures are adequate size
 
   if (nall > nmax) {
     nmax = nall;
     memory->destroy(nbr);
     memory->destroy(order);
     nbr = (Nbr *) memory->smalloc(nmax*sizeof(Nbr),"orient/fcc:nbr");
     memory->create(order,nmax,2,"orient/fcc:order");
     array_atom = order;
   }
 
   // loop over owned atoms and build Nbr data structure of neighbors
   // use full neighbor list
 
   added_energy = 0.0;
   int count = 0;
   int mincount = BIG;
   int maxcount = 0;
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     if (jnum < mincount) mincount = jnum;
     if (jnum > maxcount) {
       if (maxcount) delete [] sort;
       sort = new Sort[jnum];
       maxcount = jnum;
     }
 
     // loop over all neighbors of atom i
     // for those within cutsq, build sort data structure
     // store local id, rsq, delta vector, xismooth (if included)
 
     nsort = 0;
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       j &= NEIGHMASK;
       count++;
 
       dx = x[i][0] - x[j][0];
       dy = x[i][1] - x[j][1];
       dz = x[i][2] - x[j][2];
       rsq = dx*dx + dy*dy + dz*dz;
 
       if (rsq < cutsq) {
         sort[nsort].id = j;
         sort[nsort].rsq = rsq;
         sort[nsort].delta[0] = dx;
         sort[nsort].delta[1] = dy;
         sort[nsort].delta[2] = dz;
         if (use_xismooth) {
           xismooth = (xicutoffsq - 2.0*rsq/(a*a)) / (xicutoffsq - 1.0);
           sort[nsort].xismooth = 1.0 - fabs(1.0-xismooth);
         }
         nsort++;
       }
     }
 
     // sort neighbors by rsq distance
     // no need to sort if nsort <= 12
 
     if (nsort > 12) qsort(sort,nsort,sizeof(Sort),compare);
 
     // copy up to 12 nearest neighbors into nbr data structure
     // operate on delta vector via find_best_ref() to compute dxi
 
     n = MIN(12,nsort);
     nbr[i].n = n;
     if (n == 0) continue;
 
     double xi_total = 0.0;
     for (j = 0; j < n; j++) {
       find_best_ref(sort[j].delta,0,xi_sq,dxi);
       xi_total += sqrt(xi_sq);
       nbr[i].id[j] = sort[j].id;
       nbr[i].dxi[j][0] = dxi[0]/n;
       nbr[i].dxi[j][1] = dxi[1]/n;
       nbr[i].dxi[j][2] = dxi[2]/n;
       if (use_xismooth) nbr[i].xismooth[j] = sort[j].xismooth;
     }
     xi_total /= n;
     order[i][0] = xi_total;
 
     // compute potential derivative to xi
 
     if (xi_total < xi0) {
       nbr[i].duxi = 0.0;
       edelta = 0.0;
       order[i][1] = 0.0;
     } else if (xi_total > xi1) {
       nbr[i].duxi = 0.0;
       edelta = Vxi;
       order[i][1] = 1.0;
     } else {
       omega = MY_PI2*(xi_total-xi0) / (xi1-xi0);
       nbr[i].duxi = MY_PI*Vxi*sin(2.0*omega) / (2.0*(xi1-xi0));
       edelta = Vxi*(1 - cos(2.0*omega)) / 2.0;
       order[i][1] = omega / MY_PI2;
     }
     added_energy += edelta;
   }
 
   if (maxcount) delete [] sort;
 
   // communicate to acquire nbr data for ghost atoms
 
   comm->forward_comm_fix(this);
 
   // compute grain boundary force on each owned atom
   // skip atoms not in group
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     if (!(mask[i] & groupbit)) continue;
     n = nbr[i].n;
     duxi = nbr[i].duxi;
 
     for (j = 0; j < n; j++) {
       dxiptr = &nbr[i].dxi[j][0];
       if (use_xismooth) {
         xismooth = nbr[i].xismooth[j];
         f[i][0] += duxi * dxiptr[0] * xismooth;
         f[i][1] += duxi * dxiptr[1] * xismooth;
         f[i][2] += duxi * dxiptr[2] * xismooth;
       } else {
         f[i][0] += duxi * dxiptr[0];
         f[i][1] += duxi * dxiptr[1];
         f[i][2] += duxi * dxiptr[2];
       }
 
       // m = local index of neighbor
       // id_self = ID for atom I in atom M's neighbor list
       // if M is local atom, id_self will be local ID of atom I
       // if M is ghost atom, id_self will be global ID of atom I
 
       m = nbr[i].id[j];
       if (m < nlocal) id_self = i;
       else id_self = tag[i];
       found_myself = false;
       nn = nbr[m].n;
 
       for (k = 0; k < nn; k++) {
         if (id_self == nbr[m].id[k]) {
           if (found_myself) error->one(FLERR,"Fix orient/fcc found self twice");
           found_myself = true;
           duxi_other = nbr[m].duxi;
           dxiptr = &nbr[m].dxi[k][0];
           if (use_xismooth) {
             xismooth = nbr[m].xismooth[k];
             f[i][0] -= duxi_other * dxiptr[0] * xismooth;
             f[i][1] -= duxi_other * dxiptr[1] * xismooth;
             f[i][2] -= duxi_other * dxiptr[2] * xismooth;
           } else {
             f[i][0] -= duxi_other * dxiptr[0];
             f[i][1] -= duxi_other * dxiptr[1];
             f[i][2] -= duxi_other * dxiptr[2];
           }
         }
       }
     }
   }
 
   // print statistics every nstats timesteps
 
   if (nstats && update->ntimestep % nstats == 0) {
     int total;
     MPI_Allreduce(&count,&total,1,MPI_INT,MPI_SUM,world);
     double ave = total/atom->natoms;
 
     int min,max;
     MPI_Allreduce(&mincount,&min,1,MPI_INT,MPI_MIN,world);
     MPI_Allreduce(&maxcount,&max,1,MPI_INT,MPI_MAX,world);
 
     if (me == 0) {
       if (screen) fprintf(screen,
                           "orient step " BIGINT_FORMAT ": " BIGINT_FORMAT
                           " atoms have %d neighbors\n",
                           update->ntimestep,atom->natoms,total);
       if (logfile) fprintf(logfile,
                            "orient step " BIGINT_FORMAT ": " BIGINT_FORMAT
                            " atoms have %d neighbors\n",
                            update->ntimestep,atom->natoms,total);
       if (screen)
         fprintf(screen,"  neighs: min = %d, max = %d, ave = %g\n",
                 min,max,ave);
       if (logfile)
         fprintf(logfile,"  neighs: min = %d, max = %d, ave = %g\n",
                 min,max,ave);
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixOrientFCC::post_force_respa(int vflag, int ilevel, int iloop)
 {
   if (ilevel == nlevels_respa-1) post_force(vflag);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double FixOrientFCC::compute_scalar()
 {
   double added_energy_total;
   MPI_Allreduce(&added_energy,&added_energy_total,1,MPI_DOUBLE,MPI_SUM,world);
   return added_energy_total;
 }
 
 /* ---------------------------------------------------------------------- */
 
 int FixOrientFCC::pack_forward_comm(int n, int *list, double *buf,
                                     int pbc_flag, int *pbc)
 {
   int i,j,k,num;
   tagint id;
 
   tagint *tag = atom->tag;
   int nlocal = atom->nlocal;
   int m = 0;
 
   for (i = 0; i < n; i++) {
     k = list[i];
     num = nbr[k].n;
     buf[m++] = num;
     buf[m++] = nbr[k].duxi;
 
     for (j = 0; j < num; j++) {
       if (use_xismooth) buf[m++] = nbr[k].xismooth[j];
       buf[m++] = nbr[k].dxi[j][0];
       buf[m++] = nbr[k].dxi[j][1];
       buf[m++] = nbr[k].dxi[j][2];
 
       // id stored in buf needs to be global ID
       // if k is a local atom, it stores local IDs, so convert to global
       // if k is a ghost atom (already comm'd), its IDs are already global
 
       id = nbr[k].id[j];
       if (k < nlocal) id = tag[id];
       buf[m++] = id;
     }
   }
 
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixOrientFCC::unpack_forward_comm(int n, int first, double *buf)
 {
   int i,j,num;
   int last = first + n;
   int m = 0;
 
   for (i = first; i < last; i++) {
     nbr[i].n = num = static_cast<int> (buf[m++]);
     nbr[i].duxi = buf[m++];
 
     for (j = 0; j < num; j++) {
       if (use_xismooth) nbr[i].xismooth[j] = buf[m++];
       nbr[i].dxi[j][0] = buf[m++];
       nbr[i].dxi[j][1] = buf[m++];
       nbr[i].dxi[j][2] = buf[m++];
       nbr[i].id[j] = static_cast<tagint> (buf[m++]);
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixOrientFCC::find_best_ref(double *displs, int which_crystal,
                                  double &xi_sq, double *dxi)
 {
   int i;
   double dot,tmp;
 
   double  best_dot  = -1.0;         // best is biggest (smallest angle)
   int     best_i    = -1;
   int     best_sign = 0;
 
   for (i = 0; i < half_fcc_nn; i++) {
     dot = displs[0] * half_xi_chi_vec[which_crystal][i][0] +
       displs[1] * half_xi_chi_vec[which_crystal][i][1] +
       displs[2] * half_xi_chi_vec[which_crystal][i][2];
     if (fabs(dot) > best_dot) {
       best_dot = fabs(dot);
       best_i = i;
       if (dot < 0.0) best_sign = -1;
       else best_sign = 1;
     }
   }
 
   xi_sq = 0.0;
   for (i = 0; i < 3; i++) {
     tmp = displs[i] - best_sign * half_xi_chi_vec[which_crystal][best_i][i];
     xi_sq += tmp*tmp;
   }
 
   if (xi_sq > 0.0) {
     double xi = sqrt(xi_sq);
     for (i = 0; i < 3; i++)
       dxi[i] = (best_sign * half_xi_chi_vec[which_crystal][best_i][i] -
                 displs[i]) / xi;
   } else dxi[0] = dxi[1] = dxi[2] = 0.0;
 }
 
 /* ----------------------------------------------------------------------
    compare two neighbors I and J in sort data structure
    called via qsort in post_force() method
    is a static method so can't access sort data structure directly
    return -1 if I < J, 0 if I = J, 1 if I > J
    do comparison based on rsq distance
 ------------------------------------------------------------------------- */
 
 int FixOrientFCC::compare(const void *pi, const void *pj)
 {
   FixOrientFCC::Sort *ineigh = (FixOrientFCC::Sort *) pi;
   FixOrientFCC::Sort *jneigh = (FixOrientFCC::Sort *) pj;
 
   if (ineigh->rsq < jneigh->rsq) return -1;
   else if (ineigh->rsq > jneigh->rsq) return 1;
   return 0;
 }
 
 /* ----------------------------------------------------------------------
    memory usage of local atom-based arrays
 ------------------------------------------------------------------------- */
 
 double FixOrientFCC::memory_usage()
 {
   double bytes = nmax * sizeof(Nbr);
   bytes += 2*nmax * sizeof(double);
   return bytes;
 }
diff --git a/src/MISC/pair_nm_cut_coul_cut.cpp b/src/MISC/pair_nm_cut_coul_cut.cpp
index 6b1d6c542..33bddb522 100644
--- a/src/MISC/pair_nm_cut_coul_cut.cpp
+++ b/src/MISC/pair_nm_cut_coul_cut.cpp
@@ -1,506 +1,506 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing Author: Julien Devemy (ICCF)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_nm_cut_coul_cut.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "math_const.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
 /* ---------------------------------------------------------------------- */
 
 PairNMCutCoulCut::PairNMCutCoulCut(LAMMPS *lmp) : Pair(lmp)
 {
   writedata = 1;
 }
 
 /* ---------------------------------------------------------------------- */
 
 PairNMCutCoulCut::~PairNMCutCoulCut()
 {
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
 
     memory->destroy(cut_lj);
     memory->destroy(cut_ljsq);
     memory->destroy(cut_coul);
     memory->destroy(cut_coulsq);
     memory->destroy(e0);
     memory->destroy(r0);
     memory->destroy(nn);
     memory->destroy(mm);
     memory->destroy(nm);
     memory->destroy(e0nm);
     memory->destroy(r0n);
     memory->destroy(r0m);
     memory->destroy(offset);
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairNMCutCoulCut::compute(int eflag, int vflag)
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
   double rsq,r2inv,factor_coul,factor_lj;
   double r,forcecoul,forcenm,rminv,rninv;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   evdwl = ecoul = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r2inv = 1.0/rsq;
         if (rsq < cut_coulsq[itype][jtype])
           forcecoul = qqrd2e * qtmp*q[j]*sqrt(r2inv);
         else forcecoul = 0.0;
 
         if (rsq < cut_ljsq[itype][jtype]) {
           r = sqrt(rsq);
           rminv = pow(r2inv,mm[itype][jtype]/2.0);
           rninv = pow(r2inv,nn[itype][jtype]/2.0);
           forcenm = e0nm[itype][jtype]*nm[itype][jtype] * 
             (r0n[itype][jtype]/pow(r,nn[itype][jtype]) - 
              r0m[itype][jtype]/pow(r,mm[itype][jtype]));
         } else forcenm = 0.0;
 
         fpair = (factor_coul*forcecoul + factor_lj*forcenm) * r2inv;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
         if (newton_pair || j < nlocal) {
           f[j][0] -= delx*fpair;
           f[j][1] -= dely*fpair;
           f[j][2] -= delz*fpair;
         }
 
         if (eflag) {
           if (rsq < cut_coulsq[itype][jtype])
             ecoul = factor_coul * qqrd2e * qtmp*q[j]*sqrt(r2inv);
           else ecoul = 0.0;
           if (rsq < cut_ljsq[itype][jtype]) {
             evdwl = e0nm[itype][jtype]*(mm[itype][jtype] * 
                                         r0n[itype][jtype]*rninv - 
                                         nn[itype][jtype] * 
                                         r0m[itype][jtype]*rminv) - 
               offset[itype][jtype];
             evdwl *= factor_lj;
           } else evdwl = 0.0;
         }
 
         if (evflag) ev_tally(i,j,nlocal,newton_pair,
                              evdwl,ecoul,fpair,delx,dely,delz);
       }
     }
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairNMCutCoulCut::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
   memory->create(cut_lj,n+1,n+1,"pair:cut_lj");
   memory->create(cut_ljsq,n+1,n+1,"pair:cut_ljsq");
   memory->create(cut_coul,n+1,n+1,"pair:cut_coul");
   memory->create(cut_coulsq,n+1,n+1,"pair:cut_coulsq");
   memory->create(e0,n+1,n+1,"pair:e0");
   memory->create(r0,n+1,n+1,"pair:r0");
   memory->create(nn,n+1,n+1,"pair:nn");
   memory->create(mm,n+1,n+1,"pair:mm");
   memory->create(nm,n+1,n+1,"pair:nm");
   memory->create(e0nm,n+1,n+1,"pair:e0nm");
   memory->create(r0n,n+1,n+1,"pair:r0n");
   memory->create(r0m,n+1,n+1,"pair:r0m");
   memory->create(offset,n+1,n+1,"pair:offset");
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairNMCutCoulCut::settings(int narg, char **arg)
 {
   if (narg < 1 || narg > 2) error->all(FLERR,"Illegal pair_style command");
 
   cut_lj_global = force->numeric(FLERR,arg[0]);
   if (narg == 1) cut_coul_global = cut_lj_global;
   else cut_coul_global = force->numeric(FLERR,arg[1]);
 
   // reset cutoffs that have been explicitly set
 
   if (allocated) {
     int i,j;
     for (i = 1; i <= atom->ntypes; i++)
       for (j = i+1; j <= atom->ntypes; j++)
         if (setflag[i][j]) {
           cut_lj[i][j] = cut_lj_global;
           cut_coul[i][j] = cut_coul_global;
         }
   }
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairNMCutCoulCut::coeff(int narg, char **arg)
 {
   if (narg < 6 || narg > 8)
     error->all(FLERR,"Incorrect args for pair coefficients");
   if (!allocated) allocate();
 
   int ilo,ihi,jlo,jhi;
   force->bounds(arg[0],atom->ntypes,ilo,ihi);
   force->bounds(arg[1],atom->ntypes,jlo,jhi);
 
   double e0_one = force->numeric(FLERR,arg[2]);
   double r0_one = force->numeric(FLERR,arg[3]);
   double nn_one = force->numeric(FLERR,arg[4]);
   double mm_one = force->numeric(FLERR,arg[5]);
 
   double cut_lj_one = cut_lj_global;
   double cut_coul_one = cut_coul_global;
   if (narg >= 7) cut_coul_one = cut_lj_one = force->numeric(FLERR,arg[4]);
   if (narg == 8) cut_coul_one = force->numeric(FLERR,arg[5]);
 
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
     for (int j = MAX(jlo,i); j <= jhi; j++) {
       e0[i][j] = e0_one;
       r0[i][j] = r0_one;
       nn[i][j] = nn_one;
       mm[i][j] = mm_one;
       cut_lj[i][j] = cut_lj_one;
       cut_coul[i][j] = cut_coul_one;
       setflag[i][j] = 1;
       count++;
     }
   }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairNMCutCoulCut::init_style()
 {
   if (!atom->q_flag)
     error->all(FLERR,"Pair style nm/cut/coul/cut requires atom attribute q");
 
-  neighbor->request(this);
+  neighbor->request(this,instance_me);
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairNMCutCoulCut::init_one(int i, int j)
 {
   if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set");
 
   double cut = MAX(cut_lj[i][j],cut_coul[i][j]);
   cut_ljsq[i][j] = cut_lj[i][j] * cut_lj[i][j];
   cut_coulsq[i][j] = cut_coul[i][j] * cut_coul[i][j];
 
   nm[i][j] = nn[i][j]*mm[i][j];
   e0nm[i][j] = e0[i][j]/(nn[i][j]-mm[i][j]);
   r0n[i][j] = pow(r0[i][j],nn[i][j]);
   r0m[i][j] = pow(r0[i][j],mm[i][j]); 
 
   if (offset_flag) {
     offset[i][j] = e0nm[i][j] *
       ((mm[i][j]*r0n[i][j] / pow(cut_lj[i][j],nn[i][j])) - 
        (nn[i][j]*r0m[i][j] / pow(cut_lj[i][j],mm[i][j])));
   } else offset[i][j] = 0.0;
 
   cut_ljsq[j][i] = cut_ljsq[i][j];
   cut_coulsq[j][i] = cut_coulsq[i][j];
   e0[j][i] = e0[i][j];
   nn[j][i] = nn[i][j];
   mm[j][i] = mm[i][j];
   nm[j][i] = nm[i][j];
   r0[j][i] = r0[i][j];
   e0nm[j][i] = e0nm[i][j];
   r0n[j][i] = r0n[i][j];
   r0m[j][i] = r0m[i][j];
   offset[j][i] = offset[i][j];
 
   // compute I,J contribution to long-range tail correction
   // count total # of atoms of type I and J via Allreduce
 
   if (tail_flag) {
     int *type = atom->type;
     int nlocal = atom->nlocal;
 
     double count[2],all[2];
     count[0] = count[1] = 0.0;
     for (int k = 0; k < nlocal; k++) {
       if (type[k] == i) count[0] += 1.0;
       if (type[k] == j) count[1] += 1.0;
     }
     MPI_Allreduce(count,all,2,MPI_DOUBLE,MPI_SUM,world);
 
     double rr1 = mm[i][j]*(nn[i][j]-1)*pow(r0[i][j],nn[i][j]);
     double rr2 = nn[i][j]*(mm[i][j]-1)*pow(r0[i][j],mm[i][j]);
     double p1 = 1-nn[i][j];
     double p2 = 1-mm[i][j];
 
     double rrr1 = pow(r0[i][j],nn[i][j])*(1-nn[i][j]);
     double rrr2 = pow(r0[i][j],mm[i][j])*(1-mm[i][j]);
 
     etail_ij = 2.0*MY_PI*all[0]*all[1]*e0nm[i][j] * 
       (rr1*pow(cut_lj[i][j],p1)-rr2*pow(cut_lj[i][j],p2));
     ptail_ij = 2.0*MY_PI*all[0]*all[1]*e0nm[i][j] * 
       nn[i][j]*mm[i][j]*(rrr1*pow(cut_lj[i][j],p1)-rrr2*pow(cut_lj[i][j],p2));
 
   }
 
   return cut;
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairNMCutCoulCut::write_restart(FILE *fp)
 {
   write_restart_settings(fp);
 
   int i,j;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       fwrite(&setflag[i][j],sizeof(int),1,fp);
       if (setflag[i][j]) {
         fwrite(&e0[i][j],sizeof(double),1,fp);
         fwrite(&r0[i][j],sizeof(double),1,fp);
         fwrite(&nn[i][j],sizeof(double),1,fp);
         fwrite(&mm[i][j],sizeof(double),1,fp);
         fwrite(&cut_lj[i][j],sizeof(double),1,fp);
         fwrite(&cut_coul[i][j],sizeof(double),1,fp);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairNMCutCoulCut::read_restart(FILE *fp)
 {
   read_restart_settings(fp);
   allocate();
 
   int i,j;
   int me = comm->me;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
       MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
       if (setflag[i][j]) {
         if (me == 0) {
           fread(&e0[i][j],sizeof(double),1,fp);
           fread(&r0[i][j],sizeof(double),1,fp);
           fread(&nn[i][j],sizeof(double),1,fp);
           fread(&mm[i][j],sizeof(double),1,fp);
           fread(&cut_lj[i][j],sizeof(double),1,fp);
           fread(&cut_coul[i][j],sizeof(double),1,fp);
         }
         MPI_Bcast(&e0[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&r0[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&nn[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&mm[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&cut_lj[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&cut_coul[i][j],1,MPI_DOUBLE,0,world);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairNMCutCoulCut::write_restart_settings(FILE *fp)
 {
   fwrite(&cut_lj_global,sizeof(double),1,fp);
   fwrite(&cut_coul_global,sizeof(double),1,fp);
   fwrite(&offset_flag,sizeof(int),1,fp);
   fwrite(&mix_flag,sizeof(int),1,fp);
   fwrite(&tail_flag,sizeof(int),1,fp);
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairNMCutCoulCut::read_restart_settings(FILE *fp)
 {
   if (comm->me == 0) {
     fread(&cut_lj_global,sizeof(double),1,fp);
     fread(&cut_coul_global,sizeof(double),1,fp);
     fread(&offset_flag,sizeof(int),1,fp);
     fread(&mix_flag,sizeof(int),1,fp);
     fread(&tail_flag,sizeof(int),1,fp);
   }
   MPI_Bcast(&cut_lj_global,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&cut_coul_global,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&offset_flag,1,MPI_INT,0,world);
   MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
   MPI_Bcast(&tail_flag,1,MPI_INT,0,world);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes to data file
 ------------------------------------------------------------------------- */
 
 void PairNMCutCoulCut::write_data(FILE *fp)
 {
   for (int i = 1; i <= atom->ntypes; i++)
     fprintf(fp,"%d %g %g %g %g\n",i,e0[i][i],r0[i][i],nn[i][i],mm[i][i]);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes all pairs to data file
 ------------------------------------------------------------------------- */
 
 void PairNMCutCoulCut::write_data_all(FILE *fp)
 {
   for (int i = 1; i <= atom->ntypes; i++)
     for (int j = i; j <= atom->ntypes; j++)
       fprintf(fp,"%d %d %g %g %g %g %g\n",i,j,
               e0[i][j],r0[i][j],nn[i][j],mm[i][j],cut_lj[i][j]);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairNMCutCoulCut::single(int i, int j, int itype, int jtype,
                                 double rsq,
                                 double factor_coul, double factor_lj,
                                 double &fforce)
 {
   double r2inv,r,forcecoul,forcenm,phicoul,phinm;
 
   r2inv = 1.0/rsq;
   if (rsq < cut_coulsq[itype][jtype])
     forcecoul = force->qqrd2e * atom->q[i]*atom->q[j]*sqrt(r2inv);
   else forcecoul = 0.0;
   if (rsq < cut_ljsq[itype][jtype]) {
     r = sqrt(rsq);
     forcenm = e0nm[itype][jtype]*nm[itype][jtype] * 
       (r0n[itype][jtype]/pow(r,nn[itype][jtype]) - 
        r0m[itype][jtype]/pow(r,mm[itype][jtype]));
   } else forcenm = 0.0;
   fforce = (factor_coul*forcecoul + factor_lj*forcenm) * r2inv;
 
   double eng = 0.0;
   if (rsq < cut_coulsq[itype][jtype]) {
     phicoul = force->qqrd2e * atom->q[i]*atom->q[j]*sqrt(r2inv);
     eng += factor_coul*phicoul;
   }
   if (rsq < cut_ljsq[itype][jtype]) {
     phinm = e0nm[itype][jtype] * 
       (mm[itype][jtype]*r0n[itype][jtype]/pow(r,nn[itype][jtype]) - 
        nn[itype][jtype]*r0m[itype][jtype]/pow(r,mm[itype][jtype])) -
       offset[itype][jtype];
     eng += factor_lj*phinm;
   }
 
   return eng;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void *PairNMCutCoulCut::extract(const char *str, int &dim)
 {
   dim = 0;
   if (strcmp(str,"cut_coul") == 0) return (void *) &cut_coul;
   dim = 2;
   if (strcmp(str,"e0") == 0) return (void *) e0;
   if (strcmp(str,"r0") == 0) return (void *) r0;
   if (strcmp(str,"nn") == 0) return (void *) nn;
   if (strcmp(str,"mm") == 0) return (void *) mm;
   return NULL;
 }
diff --git a/src/MISC/pair_nm_cut_coul_long.cpp b/src/MISC/pair_nm_cut_coul_long.cpp
index b371e3c07..b613b584c 100644
--- a/src/MISC/pair_nm_cut_coul_long.cpp
+++ b/src/MISC/pair_nm_cut_coul_long.cpp
@@ -1,590 +1,590 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing Author: Julien Devemy (ICCF)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_nm_cut_coul_long.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "kspace.h"
 #include "update.h"
 #include "integrate.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "math_const.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
 #define EWALD_F   1.12837917
 #define EWALD_P   0.3275911
 #define A1        0.254829592
 #define A2       -0.284496736
 #define A3        1.421413741
 #define A4       -1.453152027
 #define A5        1.061405429
 
 /* ---------------------------------------------------------------------- */
 
 PairNMCutCoulLong::PairNMCutCoulLong(LAMMPS *lmp) : Pair(lmp)
 {
   ewaldflag = pppmflag = 1;
   ftable = NULL;
   qdist = 0.0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 PairNMCutCoulLong::~PairNMCutCoulLong()
 {
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
 
     memory->destroy(cut_lj);
     memory->destroy(cut_ljsq);
     memory->destroy(e0);
     memory->destroy(r0);
     memory->destroy(nn);
     memory->destroy(mm);
     memory->destroy(nm);
     memory->destroy(e0nm);
     memory->destroy(r0n);
     memory->destroy(r0m);
     memory->destroy(offset);
   }
   if (ftable) free_tables();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairNMCutCoulLong::compute(int eflag, int vflag)
 {
   int i,j,ii,jj,inum,jnum,itype,jtype,itable;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
   double fraction,table;
   double r,r2inv,factor_coul,factor_lj;
   double forcecoul,forcenm,rminv,rninv;
   double grij,expm2,prefactor,t,erfc;
   int *ilist,*jlist,*numneigh,**firstneigh;
   double rsq;
 
   evdwl = ecoul = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r2inv = 1.0/rsq;
 
         if (rsq < cut_coulsq) {
           if (!ncoultablebits || rsq <= tabinnersq) {
             r = sqrt(rsq);
             grij = g_ewald * r;
             expm2 = exp(-grij*grij);
             t = 1.0 / (1.0 + EWALD_P*grij);
             erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
             prefactor = qqrd2e * qtmp*q[j]/r;
             forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
             if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
           } else {
             union_int_float_t rsq_lookup;
             rsq_lookup.f = rsq;
             itable = rsq_lookup.i & ncoulmask;
             itable >>= ncoulshiftbits;
             fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable];
             table = ftable[itable] + fraction*dftable[itable];
             forcecoul = qtmp*q[j] * table;
             if (factor_coul < 1.0) {
               table = ctable[itable] + fraction*dctable[itable];
               prefactor = qtmp*q[j] * table;
               forcecoul -= (1.0-factor_coul)*prefactor;
             }
           }
         } else forcecoul = 0.0;
 
         if (rsq < cut_ljsq[itype][jtype]) {
           r = sqrt(rsq);
           rminv = pow(r2inv,mm[itype][jtype]/2.0);
           rninv = pow(r2inv,nn[itype][jtype]/2.0);
           forcenm = e0nm[itype][jtype]*nm[itype][jtype] * 
             (r0n[itype][jtype]/pow(r,nn[itype][jtype]) - 
              r0m[itype][jtype]/pow(r,mm[itype][jtype]));
         } else forcenm = 0.0;
 
         fpair = (forcecoul + factor_lj*forcenm) * r2inv;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
         if (newton_pair || j < nlocal) {
           f[j][0] -= delx*fpair;
           f[j][1] -= dely*fpair;
           f[j][2] -= delz*fpair;
         }
 
         if (eflag) {
           if (rsq < cut_coulsq) {
             if (!ncoultablebits || rsq <= tabinnersq)
               ecoul = prefactor*erfc;
             else {
               table = etable[itable] + fraction*detable[itable];
               ecoul = qtmp*q[j] * table;
             }
             if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
           } else ecoul = 0.0;
 
           if (rsq < cut_ljsq[itype][jtype]) {
             evdwl = e0nm[itype][jtype] * 
               (mm[itype][jtype]*r0n[itype][jtype]*rninv - 
                nn[itype][jtype]*r0m[itype][jtype]*rminv) - offset[itype][jtype];
             evdwl *= factor_lj;
           } else evdwl = 0.0;
         }
 
         if (evflag) ev_tally(i,j,nlocal,newton_pair,
                              evdwl,ecoul,fpair,delx,dely,delz);
       }
     }
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairNMCutCoulLong::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
   memory->create(cut_lj,n+1,n+1,"pair:cut_lj");
   memory->create(cut_ljsq,n+1,n+1,"pair:cut_ljsq");
   memory->create(e0,n+1,n+1,"pair:e0");
   memory->create(r0,n+1,n+1,"pair:r0");
   memory->create(nn,n+1,n+1,"pair:nn");
   memory->create(mm,n+1,n+1,"pair:mm");
   memory->create(nm,n+1,n+1,"pair:nm");
   memory->create(e0nm,n+1,n+1,"pair:e0nm");
   memory->create(r0n,n+1,n+1,"pair:r0n");
   memory->create(r0m,n+1,n+1,"pair:r0m");
   memory->create(offset,n+1,n+1,"pair:offset");
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairNMCutCoulLong::settings(int narg, char **arg)
 {
   if (narg < 1 || narg > 2) error->all(FLERR,"Illegal pair_style command");
 
   cut_lj_global = force->numeric(FLERR,arg[0]);
   if (narg == 1) cut_coul = cut_lj_global;
   else cut_coul = force->numeric(FLERR,arg[1]);
 
   // reset cutoffs that have been explicitly set
 
   if (allocated) {
     int i,j;
     for (i = 1; i <= atom->ntypes; i++)
       for (j = i+1; j <= atom->ntypes; j++)
         if (setflag[i][j]) cut_lj[i][j] = cut_lj_global;
   }
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairNMCutCoulLong::coeff(int narg, char **arg)
 {
   if (narg < 6 || narg > 7)
     error->all(FLERR,"Incorrect args for pair coefficients");
   if (!allocated) allocate();
 
   int ilo,ihi,jlo,jhi;
   force->bounds(arg[0],atom->ntypes,ilo,ihi);
   force->bounds(arg[1],atom->ntypes,jlo,jhi);
 
   double e0_one = force->numeric(FLERR,arg[2]);
   double r0_one = force->numeric(FLERR,arg[3]);
   double nn_one = force->numeric(FLERR,arg[4]);
   double mm_one = force->numeric(FLERR,arg[5]);
 
   double cut_lj_one = cut_lj_global;
   if (narg == 7) cut_lj_one = force->numeric(FLERR,arg[4]);
 
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
     for (int j = MAX(jlo,i); j <= jhi; j++) {
       e0[i][j] = e0_one;
       r0[i][j] = r0_one;
       nn[i][j] = nn_one;
       mm[i][j] = mm_one;
       cut_lj[i][j] = cut_lj_one;
       setflag[i][j] = 1;
       count++;
     }
   }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairNMCutCoulLong::init_style()
 {
   if (!atom->q_flag)
     error->all(FLERR,"Pair style nm/cut/coul/long requires atom attribute q");
 
-  neighbor->request(this);
+  neighbor->request(this,instance_me);
 
   cut_coulsq = cut_coul * cut_coul;
   // insure use of KSpace long-range solver, set g_ewald
 
   if (force->kspace == NULL)
     error->all(FLERR,"Pair style requires a KSpace style");
   g_ewald = force->kspace->g_ewald;
 
   // setup force tables
 
   if (ncoultablebits) init_tables(cut_coul,NULL);
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairNMCutCoulLong::init_one(int i, int j)
 {
   if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set");
 
   // include TIP4P qdist in full cutoff, qdist = 0.0 if not TIP4P
 
   double cut = MAX(cut_lj[i][j],cut_coul+2.0*qdist);
   cut_ljsq[i][j] = cut_lj[i][j] * cut_lj[i][j];
 
   nm[i][j] = nn[i][j]*mm[i][j];
   e0nm[i][j] = e0[i][j]/(nn[i][j]-mm[i][j]);
   r0n[i][j] = pow(r0[i][j],nn[i][j]);
   r0m[i][j] = pow(r0[i][j],mm[i][j]); 
 
   if (offset_flag) {
     offset[i][j] = e0nm[i][j] * 
       ((mm[i][j]*r0n[i][j] / pow(cut_lj[i][j],nn[i][j])) - 
        (nn[i][j]*r0m[i][j] / pow(cut_lj[i][j],mm[i][j])));
   } else offset[i][j] = 0.0;
 
   cut_ljsq[j][i] = cut_ljsq[i][j];
   e0[j][i] = e0[i][j];
   nn[j][i] = nn[i][j];
   mm[j][i] = mm[i][j];
   nm[j][i] = nm[i][j];
   r0[j][i] = r0[i][j];
   e0nm[j][i] = e0nm[i][j];
   r0n[j][i] = r0n[i][j];
   r0m[j][i] = r0m[i][j];
   offset[j][i] = offset[i][j];
 
   // compute I,J contribution to long-range tail correction
   // count total # of atoms of type I and J via Allreduce
 
   if (tail_flag) {
     int *type = atom->type;
     int nlocal = atom->nlocal;
 
     double count[2],all[2];
     count[0] = count[1] = 0.0;
     for (int k = 0; k < nlocal; k++) {
       if (type[k] == i) count[0] += 1.0;
       if (type[k] == j) count[1] += 1.0;
     }
     MPI_Allreduce(count,all,2,MPI_DOUBLE,MPI_SUM,world);
 
     double rr1 = mm[i][j]*(nn[i][j]-1)*pow(r0[i][j],nn[i][j]);
     double rr2 = nn[i][j]*(mm[i][j]-1)*pow(r0[i][j],mm[i][j]);
     double p1 = 1-nn[i][j];
     double p2 = 1-mm[i][j];
 
     double rrr1 = pow(r0[i][j],nn[i][j])*(1-nn[i][j]);
     double rrr2 = pow(r0[i][j],mm[i][j])*(1-mm[i][j]);
 
     etail_ij = 2.0*MY_PI*all[0]*all[1]*e0nm[i][j] *
       (rr1*pow(cut_lj[i][j],p1)-rr2*pow(cut_lj[i][j],p2));
     ptail_ij = 2.0*MY_PI*all[0]*all[1]*e0nm[i][j]*nn[i][j]*mm[i][j] * 
       (rrr1*pow(cut_lj[i][j],p1)-rrr2*pow(cut_lj[i][j],p2));
   }
 
   return cut;
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairNMCutCoulLong::write_restart(FILE *fp)
 {
   write_restart_settings(fp);
 
   int i,j;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       fwrite(&setflag[i][j],sizeof(int),1,fp);
       if (setflag[i][j]) {
         fwrite(&e0[i][j],sizeof(double),1,fp);
         fwrite(&r0[i][j],sizeof(double),1,fp);
         fwrite(&nn[i][j],sizeof(double),1,fp);
         fwrite(&mm[i][j],sizeof(double),1,fp);
         fwrite(&cut_lj[i][j],sizeof(double),1,fp);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairNMCutCoulLong::read_restart(FILE *fp)
 {
   read_restart_settings(fp);
 
   allocate();
 
   int i,j;
   int me = comm->me;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
       MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
       if (setflag[i][j]) {
         if (me == 0) {
           fread(&e0[i][j],sizeof(double),1,fp);
           fread(&r0[i][j],sizeof(double),1,fp);
           fread(&nn[i][j],sizeof(double),1,fp);
           fread(&mm[i][j],sizeof(double),1,fp);
           fread(&cut_lj[i][j],sizeof(double),1,fp);
         }
         MPI_Bcast(&e0[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&r0[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&nn[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&mm[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&cut_lj[i][j],1,MPI_DOUBLE,0,world);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairNMCutCoulLong::write_restart_settings(FILE *fp)
 {
   fwrite(&cut_lj_global,sizeof(double),1,fp);
   fwrite(&cut_coul,sizeof(double),1,fp);
   fwrite(&offset_flag,sizeof(int),1,fp);
   fwrite(&mix_flag,sizeof(int),1,fp);
   fwrite(&tail_flag,sizeof(int),1,fp);
   fwrite(&ncoultablebits,sizeof(int),1,fp);
   fwrite(&tabinner,sizeof(double),1,fp);
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairNMCutCoulLong::read_restart_settings(FILE *fp)
 {
   if (comm->me == 0) {
     fread(&cut_lj_global,sizeof(double),1,fp);
     fread(&cut_coul,sizeof(double),1,fp);
     fread(&offset_flag,sizeof(int),1,fp);
     fread(&mix_flag,sizeof(int),1,fp);
     fread(&tail_flag,sizeof(int),1,fp);
     fread(&ncoultablebits,sizeof(int),1,fp);
     fread(&tabinner,sizeof(double),1,fp);
   }
   MPI_Bcast(&cut_lj_global,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&cut_coul,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&offset_flag,1,MPI_INT,0,world);
   MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
   MPI_Bcast(&tail_flag,1,MPI_INT,0,world);
   MPI_Bcast(&ncoultablebits,1,MPI_INT,0,world);
   MPI_Bcast(&tabinner,1,MPI_DOUBLE,0,world);
 }
 
 
 /* ----------------------------------------------------------------------
    proc 0 writes to data file
 ------------------------------------------------------------------------- */
 
 void PairNMCutCoulLong::write_data(FILE *fp)
 {
   for (int i = 1; i <= atom->ntypes; i++)
     fprintf(fp,"%d %g %g %g %g\n",i,e0[i][i],r0[i][i],nn[i][i],mm[i][i]);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes all pairs to data file
 ------------------------------------------------------------------------- */
 
 void PairNMCutCoulLong::write_data_all(FILE *fp)
 {
   for (int i = 1; i <= atom->ntypes; i++)
     for (int j = i; j <= atom->ntypes; j++)
       fprintf(fp,"%d %d %g %g %g %g %g\n",i,j,
               e0[i][j],r0[i][j],nn[i][j],mm[i][j],cut_lj[i][j]);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairNMCutCoulLong::single(int i, int j, int itype, int jtype,
                                  double rsq,
                                  double factor_coul, double factor_lj,
                                  double &fforce)
 {
   double r2inv,r,grij,expm2,t,erfc,prefactor;
   double fraction,table,forcecoul,forcenm,phicoul,phinm;
   int itable;
 
   r2inv = 1.0/rsq;
   if (rsq < cut_coulsq) {
     if (!ncoultablebits || rsq <= tabinnersq) {
       r = sqrt(rsq);
       grij = g_ewald * r;
       expm2 = exp(-grij*grij);
       t = 1.0 / (1.0 + EWALD_P*grij);
       erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
       prefactor = force->qqrd2e * atom->q[i]*atom->q[j]/r;
       forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
       if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
     } else {
       union_int_float_t rsq_lookup_single;
       rsq_lookup_single.f = rsq;
       itable = rsq_lookup_single.i & ncoulmask;
       itable >>= ncoulshiftbits;
       fraction = (rsq_lookup_single.f - rtable[itable]) * drtable[itable];
       table = ftable[itable] + fraction*dftable[itable];
       forcecoul = atom->q[i]*atom->q[j] * table;
       if (factor_coul < 1.0) {
         table = ctable[itable] + fraction*dctable[itable];
         prefactor = atom->q[i]*atom->q[j] * table;
         forcecoul -= (1.0-factor_coul)*prefactor;
       }
     }
   } else forcecoul = 0.0;
 
   if (rsq < cut_ljsq[itype][jtype]) {
     r = sqrt(rsq);
     forcenm = e0nm[itype][jtype]*nm[itype][jtype] * 
       (r0n[itype][jtype]/pow(r,nn[itype][jtype]) - 
        r0m[itype][jtype]/pow(r,mm[itype][jtype]));
   } else forcenm = 0.0;
 
   fforce = (forcecoul + factor_lj*forcenm) * r2inv;
 
   double eng = 0.0;
   if (rsq < cut_coulsq) {
     if (!ncoultablebits || rsq <= tabinnersq)
       phicoul = prefactor*erfc;
     else {
       table = etable[itable] + fraction*detable[itable];
       phicoul = atom->q[i]*atom->q[j] * table;
     }
     if (factor_coul < 1.0) phicoul -= (1.0-factor_coul)*prefactor;
     eng += phicoul;
   }
 
   if (rsq < cut_ljsq[itype][jtype]) {
     phinm = e0nm[itype][jtype] * 
       (mm[itype][jtype]*r0n[itype][jtype]/pow(r,nn[itype][jtype]) - 
        nn[itype][jtype]*r0m[itype][jtype]/pow(r,mm[itype][jtype])) -
       offset[itype][jtype];
     eng += factor_lj*phinm;
   }
 
   return eng;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void *PairNMCutCoulLong::extract(const char *str, int &dim)
 {
   dim = 0;
   if (strcmp(str,"cut_coul") == 0) return (void *) &cut_coul;
   dim = 2;
   if (strcmp(str,"e0") == 0) return (void *) e0;
   if (strcmp(str,"r0") == 0) return (void *) r0;
   if (strcmp(str,"nn") == 0) return (void *) nn;
   if (strcmp(str,"mm") == 0) return (void *) mm;
   return NULL;
 }
diff --git a/src/MOLECULE/pair_hbond_dreiding_lj.cpp b/src/MOLECULE/pair_hbond_dreiding_lj.cpp
index af6385b39..cfb2dfeb5 100644
--- a/src/MOLECULE/pair_hbond_dreiding_lj.cpp
+++ b/src/MOLECULE/pair_hbond_dreiding_lj.cpp
@@ -1,571 +1,571 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Tod A Pascal (Caltech)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_hbond_dreiding_lj.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "molecule.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_request.h"
 #include "neigh_list.h"
 #include "domain.h"
 #include "math_const.h"
 #include "math_special.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 using namespace MathConst;
 using namespace MathSpecial;
 
 #define SMALL 0.001
 #define CHUNK 8
 
 /* ---------------------------------------------------------------------- */
 
 PairHbondDreidingLJ::PairHbondDreidingLJ(LAMMPS *lmp) : Pair(lmp)
 {
   // hbond cannot compute virial as F dot r
   // due to using map() to find bonded H atoms which are not near donor atom
 
   no_virial_fdotr_compute = 1;
   restartinfo = 0;
 
   nparams = maxparam = 0;
   params = NULL;
 
   nextra = 2;
   pvector = new double[2];
 }
 
 /* ---------------------------------------------------------------------- */
 
 PairHbondDreidingLJ::~PairHbondDreidingLJ()
 {
   memory->sfree(params);
   delete [] pvector;
 
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
 
     delete [] donor;
     delete [] acceptor;
     memory->destroy(type2param);
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairHbondDreidingLJ::compute(int eflag, int vflag)
 {
   int i,j,k,m,ii,jj,kk,inum,jnum,knum,itype,jtype,ktype,iatom,imol;
   tagint tagprev;
   double delx,dely,delz,rsq,rsq1,rsq2,r1,r2;
   double factor_hb,force_angle,force_kernel,evdwl,eng_lj,ehbond,force_switch;
   double c,s,a,b,ac,a11,a12,a22,vx1,vx2,vy1,vy2,vz1,vz2,d;
   double fi[3],fj[3],delr1[3],delr2[3];
   double r2inv,r10inv;
   double switch1,switch2;
   int *ilist,*jlist,*numneigh,**firstneigh;
   tagint *klist;
 
   evdwl = ehbond = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   double **x = atom->x;
   double **f = atom->f;
   tagint *tag = atom->tag;
   int *molindex = atom->molindex;
   int *molatom = atom->molatom;
   tagint **special = atom->special;
   int **nspecial = atom->nspecial;
   int *type = atom->type;
   double *special_lj = force->special_lj;
   int molecular = atom->molecular;
   Molecule **onemols = atom->avec->onemols;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // ii = loop over donors
   // jj = loop over acceptors
   // kk = loop over hydrogens bonded to donor
 
   int hbcount = 0;
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     itype = type[i];
     if (!donor[itype]) continue;
     if (molecular == 1) {
       klist = special[i];
       knum = nspecial[i][0];
     } else {
       if (molindex[i] < 0) continue;
       imol = molindex[i];
       iatom = molatom[i];
       klist = onemols[imol]->special[iatom];
       knum = onemols[imol]->nspecial[iatom][0];
       tagprev = tag[i] - iatom - 1;
     }
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_hb = special_lj[sbmask(j)];
       j &= NEIGHMASK;
 
       jtype = type[j];
       if (!acceptor[jtype]) continue;
 
       delx = x[i][0] - x[j][0];
       dely = x[i][1] - x[j][1];
       delz = x[i][2] - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
 
       for (kk = 0; kk < knum; kk++) {
         if (molecular == 1) k = atom->map(klist[kk]);
         else k = atom->map(klist[kk]+tagprev);
         if (k < 0) continue;
         ktype = type[k];
         m = type2param[itype][jtype][ktype];
         if (m < 0) continue;
         const Param &pm = params[m];
 
         if (rsq < pm.cut_outersq) {
           delr1[0] = x[i][0] - x[k][0];
           delr1[1] = x[i][1] - x[k][1];
           delr1[2] = x[i][2] - x[k][2];
           domain->minimum_image(delr1);
           rsq1 = delr1[0]*delr1[0] + delr1[1]*delr1[1] + delr1[2]*delr1[2];
           r1 = sqrt(rsq1);
 
           delr2[0] = x[j][0] - x[k][0];
           delr2[1] = x[j][1] - x[k][1];
           delr2[2] = x[j][2] - x[k][2];
           domain->minimum_image(delr2);
           rsq2 = delr2[0]*delr2[0] + delr2[1]*delr2[1] + delr2[2]*delr2[2];
           r2 = sqrt(rsq2);
 
           // angle (cos and sin)
 
           c = delr1[0]*delr2[0] + delr1[1]*delr2[1] + delr1[2]*delr2[2];
           c /= r1*r2;
           if (c > 1.0) c = 1.0;
           if (c < -1.0) c = -1.0;
           ac = acos(c);
 
           if (ac > pm.cut_angle && ac < (2.0*MY_PI - pm.cut_angle)) {
             s = sqrt(1.0 - c*c);
             if (s < SMALL) s = SMALL;
 
             // LJ-specific kernel
 
             r2inv = 1.0/rsq;
             r10inv = r2inv*r2inv*r2inv*r2inv*r2inv;
             force_kernel = r10inv*(pm.lj1*r2inv - pm.lj2)*r2inv *
               powint(c,pm.ap);
             force_angle = pm.ap * r10inv*(pm.lj3*r2inv - pm.lj4) *
               powint(c,pm.ap-1)*s;
 
             eng_lj = r10inv*(pm.lj3*r2inv - pm.lj4);
 
             force_switch=0.0;
 
             if (rsq > pm.cut_innersq) {
               switch1 = (pm.cut_outersq-rsq) * (pm.cut_outersq-rsq) *
                         (pm.cut_outersq + 2.0*rsq - 3.0*pm.cut_innersq) /
                         pm.denom_vdw;
               switch2 = 12.0*rsq * (pm.cut_outersq-rsq) *
                         (rsq-pm.cut_innersq) / pm.denom_vdw;
 
               force_kernel *= switch1;
               force_angle  *= switch1;
               force_switch  = eng_lj*switch2/rsq;
               eng_lj       *= switch1;
             }
 
             if (eflag) {
               evdwl = eng_lj * powint(c,pm.ap);
               evdwl *= factor_hb;
               ehbond += evdwl;
             }
 
             a = factor_hb*force_angle/s;
             b = factor_hb*force_kernel;
             d = factor_hb*force_switch;
 
             a11 = a*c / rsq1;
             a12 = -a / (r1*r2);
             a22 = a*c / rsq2;
 
             vx1 = a11*delr1[0] + a12*delr2[0];
             vx2 = a22*delr2[0] + a12*delr1[0];
             vy1 = a11*delr1[1] + a12*delr2[1];
             vy2 = a22*delr2[1] + a12*delr1[1];
             vz1 = a11*delr1[2] + a12*delr2[2];
             vz2 = a22*delr2[2] + a12*delr1[2];
 
             fi[0] = vx1 + b*delx + d*delx;
             fi[1] = vy1 + b*dely + d*dely;
             fi[2] = vz1 + b*delz + d*delz;
             fj[0] = vx2 - b*delx - d*delx;
             fj[1] = vy2 - b*dely - d*dely;
             fj[2] = vz2 - b*delz - d*delz;
 
             f[i][0] += fi[0];
             f[i][1] += fi[1];
             f[i][2] += fi[2];
 
             f[j][0] += fj[0];
             f[j][1] += fj[1];
             f[j][2] += fj[2];
 
             f[k][0] -= vx1 + vx2;
             f[k][1] -= vy1 + vy2;
             f[k][2] -= vz1 + vz2;
 
             // KIJ instead of IJK b/c delr1/delr2 are both with respect to k
 
             if (evflag) ev_tally3(k,i,j,evdwl,0.0,fi,fj,delr1,delr2);
 
             hbcount++;
           }
         }
       }
     }
   }
 
   if (eflag_global) {
     pvector[0] = hbcount;
     pvector[1] = ehbond;
   }
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairHbondDreidingLJ::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   // mark all setflag as set, since don't require pair_coeff of all I,J
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 1;
 
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
   donor = new int[n+1];
   acceptor = new int[n+1];
   memory->create(type2param,n+1,n+1,n+1,"pair:type2param");
 
   int i,j,k;
   for (i = 1; i <= n; i++)
     for (j = 1; j <= n; j++)
       for (k = 1; k <= n; k++)
         type2param[i][j][k] = -1;
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairHbondDreidingLJ::settings(int narg, char **arg)
 {
   if (narg != 4) error->all(FLERR,"Illegal pair_style command");
 
   ap_global = force->inumeric(FLERR,arg[0]);
   cut_inner_global = force->numeric(FLERR,arg[1]);
   cut_outer_global = force->numeric(FLERR,arg[2]);
   cut_angle_global = force->numeric(FLERR,arg[3]) * MY_PI/180.0;
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairHbondDreidingLJ::coeff(int narg, char **arg)
 {
   if (narg < 6 || narg > 10)
     error->all(FLERR,"Incorrect args for pair coefficients");
   if (!allocated) allocate();
 
   int ilo,ihi,jlo,jhi,klo,khi;
   force->bounds(arg[0],atom->ntypes,ilo,ihi);
   force->bounds(arg[1],atom->ntypes,jlo,jhi);
   force->bounds(arg[2],atom->ntypes,klo,khi);
 
   int donor_flag;
   if (strcmp(arg[3],"i") == 0) donor_flag = 0;
   else if (strcmp(arg[3],"j") == 0) donor_flag = 1;
   else error->all(FLERR,"Incorrect args for pair coefficients");
 
   double epsilon_one = force->numeric(FLERR,arg[4]);
   double sigma_one = force->numeric(FLERR,arg[5]);
 
   int ap_one = ap_global;
   if (narg > 6) ap_one = force->inumeric(FLERR,arg[6]);
   double cut_inner_one = cut_inner_global;
   double cut_outer_one = cut_outer_global;
   if (narg > 8) {
     cut_inner_one = force->numeric(FLERR,arg[7]);
     cut_outer_one = force->numeric(FLERR,arg[8]);
   }
   if (cut_inner_one>cut_outer_one)
     error->all(FLERR,"Pair inner cutoff >= Pair outer cutoff");
   double cut_angle_one = cut_angle_global;
   if (narg == 10) cut_angle_one = force->numeric(FLERR,arg[9]) * MY_PI/180.0;
   // grow params array if necessary
 
   if (nparams == maxparam) {
     maxparam += CHUNK;
     params = (Param *) memory->srealloc(params,maxparam*sizeof(Param),
                                         "pair:params");
   }
 
   params[nparams].epsilon = epsilon_one;
   params[nparams].sigma = sigma_one;
   params[nparams].ap = ap_one;
   params[nparams].cut_inner = cut_inner_one;
   params[nparams].cut_outer = cut_outer_one;
   params[nparams].cut_innersq = cut_inner_one*cut_inner_one;
   params[nparams].cut_outersq = cut_outer_one*cut_outer_one;
   params[nparams].cut_angle = cut_angle_one;
   params[nparams].denom_vdw =
     (params[nparams].cut_outersq-params[nparams].cut_innersq) *
     (params[nparams].cut_outersq-params[nparams].cut_innersq) *
     (params[nparams].cut_outersq-params[nparams].cut_innersq);
 
   // flag type2param with either i,j = D,A or j,i = D,A
 
   int count = 0;
   for (int i = ilo; i <= ihi; i++)
     for (int j = MAX(jlo,i); j <= jhi; j++)
       for (int k = klo; k <= khi; k++) {
         if (donor_flag == 0) type2param[i][j][k] = nparams;
         else type2param[j][i][k] = nparams;
         count++;
       }
   nparams++;
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairHbondDreidingLJ::init_style()
 {
   // molecular system required to use special list to find H atoms
   // tags required to use special list
   // pair newton on required since are looping over D atoms
   //   and computing forces on A,H which may be on different procs
 
   if (atom->molecular == 0)
     error->all(FLERR,"Pair style hbond/dreiding requires molecular system");
   if (atom->tag_enable == 0)
     error->all(FLERR,"Pair style hbond/dreiding requires atom IDs");
   if (atom->map_style == 0)
     error->all(FLERR,"Pair style hbond/dreiding requires an atom map, "
                "see atom_modify");
   if (force->newton_pair == 0)
     error->all(FLERR,"Pair style hbond/dreiding requires newton pair on");
 
   // set donor[M]/acceptor[M] if any atom of type M is a donor/acceptor
 
   int anyflag = 0;
   int n = atom->ntypes;
   for (int m = 1; m <= n; m++) donor[m] = acceptor[m] = 0;
   for (int i = 1; i <= n; i++)
     for (int j = 1; j <= n; j++)
       for (int k = 1; k <= n; k++)
         if (type2param[i][j][k] >= 0) {
           anyflag = 1;
           donor[i] = 1;
           acceptor[j] = 1;
         }
 
   if (!anyflag) error->all(FLERR,"No pair hbond/dreiding coefficients set");
 
   // set additional param values
   // offset is for LJ only, angle term is not included
 
   for (int m = 0; m < nparams; m++) {
     params[m].lj1 = 60.0*params[m].epsilon*pow(params[m].sigma,12.0);
     params[m].lj2 = 60.0*params[m].epsilon*pow(params[m].sigma,10.0);
     params[m].lj3 = 5.0*params[m].epsilon*pow(params[m].sigma,12.0);
     params[m].lj4 = 6.0*params[m].epsilon*pow(params[m].sigma,10.0);
 
     /*
     if (offset_flag) {
       double ratio = params[m].sigma / params[m].cut_outer;
       params[m].offset = params[m].epsilon *
         ((2.0*pow(ratio,9.0)) - (3.0*pow(ratio,6.0)));
     } else params[m].offset = 0.0;
     */
   }
 
   // full neighbor list request
 
-  int irequest = neighbor->request(this);
+  int irequest = neighbor->request(this,instance_me);
   neighbor->requests[irequest]->half = 0;
   neighbor->requests[irequest]->full = 1;
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairHbondDreidingLJ::init_one(int i, int j)
 {
   int m;
 
   // return maximum cutoff for any K with I,J = D,A or J,I = D,A
   // donor/acceptor is not symmetric, IJ interaction != JI interaction
 
   double cut = 0.0;
   for (int k = 1; k <= atom->ntypes; k++) {
     m = type2param[i][j][k];
     if (m >= 0) cut = MAX(cut,params[m].cut_outer);
     m = type2param[j][i][k];
     if (m >= 0) cut = MAX(cut,params[m].cut_outer);
   }
   return cut;
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairHbondDreidingLJ::single(int i, int j, int itype, int jtype,
                                    double rsq,
                                    double factor_coul, double factor_lj,
                                    double &fforce)
 {
   int k,kk,ktype,knum,m;
   tagint tagprev;
   double eng,eng_lj,force_kernel,force_angle;
   double rsq1,rsq2,r1,r2,c,s,ac,r2inv,r10inv,factor_hb;
   double switch1,switch2;
   double delr1[3],delr2[3];
   tagint *klist;
 
   double **x = atom->x;
   int *type = atom->type;
   double *special_lj = force->special_lj;
 
   eng = 0.0;
   fforce = 0;
 
   // sanity check
 
   if (!donor[itype]) return 0.0;
   if (!acceptor[jtype]) return 0.0;
 
   int molecular = atom->molecular;
   if (molecular == 1) {
     klist = atom->special[i];
     knum = atom->nspecial[i][0];
   } else {
     if (atom->molindex[i] < 0) return 0.0;
     int imol = atom->molindex[i];
     int iatom = atom->molatom[i];
     Molecule **onemols = atom->avec->onemols;
     klist = onemols[imol]->special[iatom];
     knum = onemols[imol]->nspecial[iatom][0];
     tagprev = atom->tag[i] - iatom - 1;
   }
 
   factor_hb = special_lj[sbmask(j)];
 
   for (kk = 0; kk < knum; kk++) {
     if (molecular == 1) k = atom->map(klist[kk]);
     else k = atom->map(klist[kk]+tagprev);
 
     if (k < 0) continue;
     ktype = type[k];
     m = type2param[itype][jtype][ktype];
     if (m < 0) continue;
     const Param &pm = params[m];
 
     delr1[0] = x[i][0] - x[k][0];
     delr1[1] = x[i][1] - x[k][1];
     delr1[2] = x[i][2] - x[k][2];
     domain->minimum_image(delr1);
     rsq1 = delr1[0]*delr1[0] + delr1[1]*delr1[1] + delr1[2]*delr1[2];
     r1 = sqrt(rsq1);
 
     delr2[0] = x[j][0] - x[k][0];
     delr2[1] = x[j][1] - x[k][1];
     delr2[2] = x[j][2] - x[k][2];
     domain->minimum_image(delr2);
     rsq2 = delr2[0]*delr2[0] + delr2[1]*delr2[1] + delr2[2]*delr2[2];
     r2 = sqrt(rsq2);
 
     // angle (cos and sin)
 
     c = delr1[0]*delr2[0] + delr1[1]*delr2[1] + delr1[2]*delr2[2];
     c /= r1*r2;
     if (c > 1.0) c = 1.0;
     if (c < -1.0) c = -1.0;
     ac = acos(c);
 
     if (ac < pm.cut_angle || ac > (2.0*MY_PI - pm.cut_angle)) return 0.0;
     s = sqrt(1.0 - c*c);
     if (s < SMALL) s = SMALL;
 
     // LJ-specific kernel
 
     r2inv = 1.0/rsq;
     r10inv = r2inv*r2inv*r2inv*r2inv*r2inv;
     force_kernel = r10inv*(pm.lj1*r2inv - pm.lj2)*r2inv * powint(c,pm.ap);
     force_angle = pm.ap * r10inv*(pm.lj3*r2inv - pm.lj4) *
       powint(c,pm.ap-1)*s;
 
     // only lj part for now
 
     eng_lj = r10inv*(pm.lj3*r2inv - pm.lj4);
     if (rsq > pm.cut_innersq) {
       switch1 = (pm.cut_outersq-rsq) * (pm.cut_outersq-rsq) *
                 (pm.cut_outersq + 2.0*rsq - 3.0*pm.cut_innersq) / pm.denom_vdw;
       switch2 = 12.0*rsq * (pm.cut_outersq-rsq) *
                 (rsq-pm.cut_innersq) / pm.denom_vdw;
       force_kernel = force_kernel*switch1 + eng_lj*switch2;
       eng_lj *= switch1;
     }
 
     fforce += force_kernel*powint(c,pm.ap) + eng_lj*force_angle;
     eng += eng_lj * powint(c,pm.ap) * factor_hb;
   }
 
   return eng;
 }
diff --git a/src/MOLECULE/pair_hbond_dreiding_morse.cpp b/src/MOLECULE/pair_hbond_dreiding_morse.cpp
index 7fda88230..f0b31b1ba 100644
--- a/src/MOLECULE/pair_hbond_dreiding_morse.cpp
+++ b/src/MOLECULE/pair_hbond_dreiding_morse.cpp
@@ -1,473 +1,473 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
    ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Tod A Pascal (Caltech)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_hbond_dreiding_morse.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "molecule.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_request.h"
 #include "neigh_list.h"
 #include "domain.h"
 #include "math_const.h"
 #include "math_special.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 using namespace MathConst;
 using namespace MathSpecial;
 
 #define SMALL 0.001
 #define CHUNK 8
 
 /* ---------------------------------------------------------------------- */
 
 PairHbondDreidingMorse::PairHbondDreidingMorse(LAMMPS *lmp) :
   PairHbondDreidingLJ(lmp) {}
 
 /* ---------------------------------------------------------------------- */
 
 void PairHbondDreidingMorse::compute(int eflag, int vflag)
 {
   int i,j,k,m,ii,jj,kk,inum,jnum,knum,itype,jtype,ktype,imol,iatom;
   tagint tagprev;
   double delx,dely,delz,rsq,rsq1,rsq2,r1,r2;
   double factor_hb,force_angle,force_kernel,force_switch,evdwl,ehbond;
   double c,s,a,b,d,ac,a11,a12,a22,vx1,vx2,vy1,vy2,vz1,vz2;
   double fi[3],fj[3],delr1[3],delr2[3];
   double r,dr,dexp,eng_morse,switch1,switch2;
   int *ilist,*jlist,*numneigh,**firstneigh;
   tagint *klist;
 
   evdwl = ehbond = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   double **x = atom->x;
   double **f = atom->f;
   tagint *tag = atom->tag;
   int *molindex = atom->molindex;
   int *molatom = atom->molatom;
   tagint **special = atom->special;
   int **nspecial = atom->nspecial;
   int *type = atom->type;
   double *special_lj = force->special_lj;
   int molecular = atom->molecular;
   Molecule **onemols = atom->avec->onemols;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // ii = loop over donors
   // jj = loop over acceptors
   // kk = loop over hydrogens bonded to donor
 
   int hbcount = 0;
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     itype = type[i];
     if (!donor[itype]) continue;
     if (molecular == 1) {
       klist = special[i];
       knum = nspecial[i][0];
     } else {
       if (molindex[i] < 0) continue;
       imol = molindex[i];
       iatom = molatom[i];
       klist = onemols[imol]->special[iatom];
       knum = onemols[imol]->nspecial[iatom][0];
       tagprev = tag[i] - iatom - 1;
     }
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_hb = special_lj[sbmask(j)];
       j &= NEIGHMASK;
 
       jtype = type[j];
       if (!acceptor[jtype]) continue;
 
       delx = x[i][0] - x[j][0];
       dely = x[i][1] - x[j][1];
       delz = x[i][2] - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
 
       for (kk = 0; kk < knum; kk++) {
         if (molecular == 1) k = atom->map(klist[kk]);
         else k = atom->map(klist[kk]+tagprev);
         if (k < 0) continue;
         ktype = type[k];
         m = type2param[itype][jtype][ktype];
         if (m < 0) continue;
         const Param &pm = params[m];
 
         if (rsq < pm.cut_outersq) {
           delr1[0] = x[i][0] - x[k][0];
           delr1[1] = x[i][1] - x[k][1];
           delr1[2] = x[i][2] - x[k][2];
           domain->minimum_image(delr1);
           rsq1 = delr1[0]*delr1[0] + delr1[1]*delr1[1] + delr1[2]*delr1[2];
           r1 = sqrt(rsq1);
 
           delr2[0] = x[j][0] - x[k][0];
           delr2[1] = x[j][1] - x[k][1];
           delr2[2] = x[j][2] - x[k][2];
           domain->minimum_image(delr2);
           rsq2 = delr2[0]*delr2[0] + delr2[1]*delr2[1] + delr2[2]*delr2[2];
           r2 = sqrt(rsq2);
 
           // angle (cos and sin)
 
           c = delr1[0]*delr2[0] + delr1[1]*delr2[1] + delr1[2]*delr2[2];
           c /= r1*r2;
           if (c > 1.0) c = 1.0;
           if (c < -1.0) c = -1.0;
           ac = acos(c);
 
           if (ac > pm.cut_angle && ac < (2.0*MY_PI - pm.cut_angle)) {
             s = sqrt(1.0 - c*c);
             if (s < SMALL) s = SMALL;
 
             // Morse-specific kernel
 
             r = sqrt(rsq);
             dr = r - pm.r0;
             dexp = exp(-pm.alpha * dr);
             eng_morse = pm.d0 * (dexp*dexp - 2.0*dexp);
             force_kernel = pm.morse1*(dexp*dexp - dexp)/r * powint(c,pm.ap);
             force_angle = pm.ap * eng_morse * powint(c,pm.ap-1)*s;
 	    force_switch = 0.0;
 
             if (rsq > pm.cut_innersq) {
               switch1 = (pm.cut_outersq-rsq) * (pm.cut_outersq-rsq) *
                         (pm.cut_outersq + 2.0*rsq - 3.0*pm.cut_innersq) /
                         pm.denom_vdw;
               switch2 = 12.0*rsq * (pm.cut_outersq-rsq) *
                         (rsq-pm.cut_innersq) / pm.denom_vdw;
 
               force_kernel *= switch1;
 	      force_angle  *= switch1;
 	      force_switch  = eng_morse*switch2/rsq;
               eng_morse    *= switch1;
             }
 
             if (eflag) {
               evdwl = eng_morse * powint(c,pm.ap);
               evdwl *= factor_hb;
               ehbond += evdwl;
             }
 
             a = factor_hb*force_angle/s;
             b = factor_hb*force_kernel;
             d = factor_hb*force_switch;
 
             a11 = a*c / rsq1;
             a12 = -a / (r1*r2);
             a22 = a*c / rsq2;
 
             vx1 = a11*delr1[0] + a12*delr2[0];
             vx2 = a22*delr2[0] + a12*delr1[0];
             vy1 = a11*delr1[1] + a12*delr2[1];
             vy2 = a22*delr2[1] + a12*delr1[1];
             vz1 = a11*delr1[2] + a12*delr2[2];
             vz2 = a22*delr2[2] + a12*delr1[2];
 
             fi[0] = vx1 + (b+d)*delx;
             fi[1] = vy1 + (b+d)*dely;
             fi[2] = vz1 + (b+d)*delz;
             fj[0] = vx2 - (b+d)*delx;
             fj[1] = vy2 - (b+d)*dely;
             fj[2] = vz2 - (b+d)*delz;
 
             f[i][0] += fi[0];
             f[i][1] += fi[1];
             f[i][2] += fi[2];
 
             f[j][0] += fj[0];
             f[j][1] += fj[1];
             f[j][2] += fj[2];
 
             f[k][0] -= vx1 + vx2;
             f[k][1] -= vy1 + vy2;
             f[k][2] -= vz1 + vz2;
 
             // KIJ instead of IJK b/c delr1/delr2 are both with respect to k
 
             if (evflag) ev_tally3(k,i,j,evdwl,0.0,fi,fj,delr1,delr2);
 
             hbcount++;
           }
         }
       }
     }
   }
 
   if (eflag_global) {
     pvector[0] = hbcount;
     pvector[1] = ehbond;
   }
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairHbondDreidingMorse::coeff(int narg, char **arg)
 {
   if (narg < 7 || narg > 11)
     error->all(FLERR,"Incorrect args for pair coefficients");
   if (!allocated) allocate();
 
   int ilo,ihi,jlo,jhi,klo,khi;
   force->bounds(arg[0],atom->ntypes,ilo,ihi);
   force->bounds(arg[1],atom->ntypes,jlo,jhi);
   force->bounds(arg[2],atom->ntypes,klo,khi);
 
   int donor_flag;
   if (strcmp(arg[3],"i") == 0) donor_flag = 0;
   else if (strcmp(arg[3],"j") == 0) donor_flag = 1;
   else error->all(FLERR,"Incorrect args for pair coefficients");
 
   double d0_one = force->numeric(FLERR,arg[4]);
   double alpha_one = force->numeric(FLERR,arg[5]);
   double r0_one = force->numeric(FLERR,arg[6]);
 
   int ap_one = ap_global;
   if (narg > 7) ap_one = force->inumeric(FLERR,arg[7]);
   double cut_inner_one = cut_inner_global;
   double cut_outer_one = cut_outer_global;
   if (narg > 9) {
     cut_inner_one = force->numeric(FLERR,arg[8]);
     cut_outer_one = force->numeric(FLERR,arg[9]);
   }
   if (cut_inner_one>cut_outer_one)
     error->all(FLERR,"Pair inner cutoff >= Pair outer cutoff");
   double cut_angle_one = cut_angle_global;
   if (narg > 10) cut_angle_one = force->numeric(FLERR,arg[10]) * MY_PI/180.0;
 
   // grow params array if necessary
 
   if (nparams == maxparam) {
     maxparam += CHUNK;
     params = (Param *) memory->srealloc(params,maxparam*sizeof(Param),
                                         "pair:params");
   }
 
   params[nparams].d0 = d0_one;
   params[nparams].alpha = alpha_one;
   params[nparams].r0 = r0_one;
   params[nparams].ap = ap_one;
   params[nparams].cut_inner = cut_inner_one;
   params[nparams].cut_outer = cut_outer_one;
   params[nparams].cut_innersq = cut_inner_one*cut_inner_one;
   params[nparams].cut_outersq = cut_outer_one*cut_outer_one;
   params[nparams].cut_angle = cut_angle_one;
   params[nparams].denom_vdw =
     (params[nparams].cut_outersq-params[nparams].cut_innersq) *
     (params[nparams].cut_outersq-params[nparams].cut_innersq) *
     (params[nparams].cut_outersq-params[nparams].cut_innersq);
 
   // flag type2param with either i,j = D,A or j,i = D,A
 
   int count = 0;
   for (int i = ilo; i <= ihi; i++)
     for (int j = MAX(jlo,i); j <= jhi; j++)
       for (int k = klo; k <= khi; k++) {
         if (donor_flag == 0) type2param[i][j][k] = nparams;
         else type2param[j][i][k] = nparams;
         count++;
       }
   nparams++;
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairHbondDreidingMorse::init_style()
 {
   // molecular system required to use special list to find H atoms
   // tags required to use special list
   // pair newton on required since are looping over D atoms
   //   and computing forces on A,H which may be on different procs
 
   if (atom->molecular == 0)
     error->all(FLERR,"Pair style hbond/dreiding requires molecular system");
   if (atom->tag_enable == 0)
     error->all(FLERR,"Pair style hbond/dreiding requires atom IDs");
   if (atom->map_style == 0)
     error->all(FLERR,"Pair style hbond/dreiding requires an atom map, "
                "see atom_modify");
   if (force->newton_pair == 0)
     error->all(FLERR,"Pair style hbond/dreiding requires newton pair on");
 
   // set donor[M]/acceptor[M] if any atom of type M is a donor/acceptor
 
   int anyflag = 0;
   int n = atom->ntypes;
   for (int m = 1; m <= n; m++) donor[m] = acceptor[m] = 0;
   for (int i = 1; i <= n; i++)
     for (int j = 1; j <= n; j++)
       for (int k = 1; k <= n; k++)
         if (type2param[i][j][k] >= 0) {
           anyflag = 1;
           donor[i] = 1;
           acceptor[j] = 1;
         }
 
   if (!anyflag) error->all(FLERR,"No pair hbond/dreiding coefficients set");
 
   // set additional param values
   // offset is for Morse only, angle term is not included
 
   for (int m = 0; m < nparams; m++) {
     params[m].morse1 = 2.0*params[m].d0*params[m].alpha;
 
     /*
     if (offset_flag) {
       double alpha_dr = -params[m].alpha * (params[m].cut - params[m].r0);
       params[m].offset = params[m].d0 *
         ((exp(2.0*alpha_dr)) - (2.0*exp(alpha_dr)));
     } else params[m].offset = 0.0;
     */
   }
 
   // full neighbor list request
 
-  int irequest = neighbor->request(this);
+  int irequest = neighbor->request(this,instance_me);
   neighbor->requests[irequest]->half = 0;
   neighbor->requests[irequest]->full = 1;
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairHbondDreidingMorse::single(int i, int j, int itype, int jtype,
                                      double rsq,
                                      double factor_coul, double factor_lj,
                                      double &fforce)
 {
   int k,kk,ktype,knum,m;
   tagint tagprev;
   double eng,eng_morse,force_kernel,force_angle;
   double rsq1,rsq2,r1,r2,c,s,ac,r,dr,dexp,factor_hb;
   double switch1,switch2;
   double delr1[3],delr2[3];
   tagint *klist;
 
   double **x = atom->x;
   int *type = atom->type;
   double *special_lj = force->special_lj;
 
   eng = 0.0;
   fforce = 0;
 
   //sanity check
 
   if (!donor[itype]) return 0.0;
   if (!acceptor[jtype]) return 0.0;
 
   int molecular = atom->molecular;
   if (molecular == 1) {
     klist = atom->special[i];
     knum = atom->nspecial[i][0];
   } else {
     if (atom->molindex[i] < 0) return 0.0;
     int imol = atom->molindex[i];
     int iatom = atom->molatom[i];
     Molecule **onemols = atom->avec->onemols;
     klist = onemols[imol]->special[iatom];
     knum = onemols[imol]->nspecial[iatom][0];
     tagprev = atom->tag[i] - iatom - 1;
   }
 
   factor_hb = special_lj[sbmask(j)];
 
   for (kk = 0; kk < knum; kk++) {
     if (molecular == 1) k = atom->map(klist[kk]);
     else k = atom->map(klist[kk]+tagprev);
 
     if (k < 0) continue;
     ktype = type[k];
     m = type2param[itype][jtype][ktype];
     if (m < 0) continue;
     const Param &pm = params[m];
 
     delr1[0] = x[i][0] - x[k][0];
     delr1[1] = x[i][1] - x[k][1];
     delr1[2] = x[i][2] - x[k][2];
     domain->minimum_image(delr1);
     rsq1 = delr1[0]*delr1[0] + delr1[1]*delr1[1] + delr1[2]*delr1[2];
     r1 = sqrt(rsq1);
 
     delr2[0] = x[j][0] - x[k][0];
     delr2[1] = x[j][1] - x[k][1];
     delr2[2] = x[j][2] - x[k][2];
     domain->minimum_image(delr2);
     rsq2 = delr2[0]*delr2[0] + delr2[1]*delr2[1] + delr2[2]*delr2[2];
     r2 = sqrt(rsq2);
 
     // angle (cos and sin)
 
     c = delr1[0]*delr2[0] + delr1[1]*delr2[1] + delr1[2]*delr2[2];
     c /= r1*r2;
     if (c > 1.0) c = 1.0;
     if (c < -1.0) c = -1.0;
     ac = acos(c);
 
     if (ac < pm.cut_angle || ac > (2.0*MY_PI - pm.cut_angle)) return 0.0;
     s = sqrt(1.0 - c*c);
     if (s < SMALL) s = SMALL;
 
     // Morse-specific kernel
 
     r = sqrt(rsq);
     dr = r - pm.r0;
     dexp = exp(-pm.alpha * dr);
     eng_morse = pm.d0 * (dexp*dexp - 2.0*dexp);  //<-- BUGFIX 2012-11-14
     force_kernel = pm.morse1*(dexp*dexp - dexp)/r * powint(c,pm.ap);
     force_angle = pm.ap * eng_morse * powint(c,pm.ap-1)*s;
 
     if (rsq > pm.cut_innersq) {
       switch1 = (pm.cut_outersq-rsq) * (pm.cut_outersq-rsq) *
                 (pm.cut_outersq + 2.0*rsq - 3.0*pm.cut_innersq) /
                 pm.denom_vdw;
       switch2 = 12.0*rsq * (pm.cut_outersq-rsq) *
                 (rsq-pm.cut_innersq) / pm.denom_vdw;
       force_kernel = force_kernel*switch1 + eng_morse*switch2;
       eng_morse *= switch1;
     }
 
     eng += eng_morse * powint(c,pm.ap)* factor_hb;
     fforce += force_kernel*powint(c,pm.ap) + eng_morse*force_angle;
   }
 
   return eng;
 }
diff --git a/src/MOLECULE/pair_lj_charmm_coul_charmm.cpp b/src/MOLECULE/pair_lj_charmm_coul_charmm.cpp
index 390e21d33..430b805f0 100644
--- a/src/MOLECULE/pair_lj_charmm_coul_charmm.cpp
+++ b/src/MOLECULE/pair_lj_charmm_coul_charmm.cpp
@@ -1,530 +1,530 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Paul Crozier (SNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_lj_charmm_coul_charmm.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
 PairLJCharmmCoulCharmm::PairLJCharmmCoulCharmm(LAMMPS *lmp) : Pair(lmp)
 {
   implicit = 0;
   mix_flag = ARITHMETIC;
   writedata = 1;
 }
 
 /* ---------------------------------------------------------------------- */
 
 PairLJCharmmCoulCharmm::~PairLJCharmmCoulCharmm()
 {
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
 
     memory->destroy(epsilon);
     memory->destroy(sigma);
     memory->destroy(eps14);
     memory->destroy(sigma14);
     memory->destroy(lj1);
     memory->destroy(lj2);
     memory->destroy(lj3);
     memory->destroy(lj4);
     memory->destroy(lj14_1);
     memory->destroy(lj14_2);
     memory->destroy(lj14_3);
     memory->destroy(lj14_4);
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCharmmCoulCharmm::compute(int eflag, int vflag)
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
   double rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
   double philj,switch1,switch2;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   evdwl = ecoul = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
       jtype = type[j];
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
 
       if (rsq < cut_bothsq) {
         r2inv = 1.0/rsq;
 
         if (rsq < cut_coulsq) {
           forcecoul = qqrd2e * qtmp*q[j]*sqrt(r2inv);
           if (rsq > cut_coul_innersq) {
             switch1 = (cut_coulsq-rsq) * (cut_coulsq-rsq) *
               (cut_coulsq + 2.0*rsq - 3.0*cut_coul_innersq) / denom_coul;
             switch2 = 12.0*rsq * (cut_coulsq-rsq) *
               (rsq-cut_coul_innersq) / denom_coul;
             forcecoul *= switch1 + switch2;
           }
         } else forcecoul = 0.0;
 
         if (rsq < cut_ljsq) {
           r6inv = r2inv*r2inv*r2inv;
           forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
           if (rsq > cut_lj_innersq) {
             switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) *
               (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj;
             switch2 = 12.0*rsq * (cut_ljsq-rsq) *
               (rsq-cut_lj_innersq) / denom_lj;
             philj = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]);
             forcelj = forcelj*switch1 + philj*switch2;
           }
         } else forcelj = 0.0;
 
         fpair = (factor_coul*forcecoul + factor_lj*forcelj) * r2inv;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
         if (newton_pair || j < nlocal) {
           f[j][0] -= delx*fpair;
           f[j][1] -= dely*fpair;
           f[j][2] -= delz*fpair;
         }
 
         if (eflag) {
           if (rsq < cut_coulsq) {
             ecoul = qqrd2e * qtmp*q[j]*sqrt(r2inv);
             if (rsq > cut_coul_innersq) {
               switch1 = (cut_coulsq-rsq) * (cut_coulsq-rsq) *
                 (cut_coulsq + 2.0*rsq - 3.0*cut_coul_innersq) /
                 denom_coul;
               ecoul *= switch1;
             }
             ecoul *= factor_coul;
           } else ecoul = 0.0;
           if (rsq < cut_ljsq) {
             evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]);
             if (rsq > cut_lj_innersq) {
               switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) *
                 (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj;
               evdwl *= switch1;
             }
             evdwl *= factor_lj;
           } else evdwl = 0.0;
         }
 
         if (evflag) ev_tally(i,j,nlocal,newton_pair,
                              evdwl,ecoul,fpair,delx,dely,delz);
       }
     }
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairLJCharmmCoulCharmm::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
   memory->create(epsilon,n+1,n+1,"pair:epsilon");
   memory->create(sigma,n+1,n+1,"pair:sigma");
   memory->create(eps14,n+1,n+1,"pair:eps14");
   memory->create(sigma14,n+1,n+1,"pair:sigma14");
   memory->create(lj1,n+1,n+1,"pair:lj1");
   memory->create(lj2,n+1,n+1,"pair:lj2");
   memory->create(lj3,n+1,n+1,"pair:lj3");
   memory->create(lj4,n+1,n+1,"pair:lj4");
   memory->create(lj14_1,n+1,n+1,"pair:lj14_1");
   memory->create(lj14_2,n+1,n+1,"pair:lj14_2");
   memory->create(lj14_3,n+1,n+1,"pair:lj14_3");
   memory->create(lj14_4,n+1,n+1,"pair:lj14_4");
 }
 
 /* ----------------------------------------------------------------------
    global settings
    unlike other pair styles,
      there are no individual pair settings that these override
 ------------------------------------------------------------------------- */
 
 void PairLJCharmmCoulCharmm::settings(int narg, char **arg)
 {
   if (narg != 2 && narg != 4)
     error->all(FLERR,"Illegal pair_style command");
 
   cut_lj_inner = force->numeric(FLERR,arg[0]);
   cut_lj = force->numeric(FLERR,arg[1]);
   if (narg == 2) {
     cut_coul_inner = cut_lj_inner;
     cut_coul = cut_lj;
   } else {
     cut_coul_inner = force->numeric(FLERR,arg[2]);
     cut_coul = force->numeric(FLERR,arg[3]);
   }
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairLJCharmmCoulCharmm::coeff(int narg, char **arg)
 {
   if (narg != 4 && narg != 6)
     error->all(FLERR,"Incorrect args for pair coefficients");
   if (!allocated) allocate();
 
   int ilo,ihi,jlo,jhi;
   force->bounds(arg[0],atom->ntypes,ilo,ihi);
   force->bounds(arg[1],atom->ntypes,jlo,jhi);
 
   double epsilon_one = force->numeric(FLERR,arg[2]);
   double sigma_one = force->numeric(FLERR,arg[3]);
   double eps14_one = epsilon_one;
   double sigma14_one = sigma_one;
   if (narg == 6) {
     eps14_one = force->numeric(FLERR,arg[4]);
     sigma14_one = force->numeric(FLERR,arg[5]);
   }
 
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
     for (int j = MAX(jlo,i); j <= jhi; j++) {
       epsilon[i][j] = epsilon_one;
       sigma[i][j] = sigma_one;
       eps14[i][j] = eps14_one;
       sigma14[i][j] = sigma14_one;
       setflag[i][j] = 1;
       count++;
     }
   }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairLJCharmmCoulCharmm::init_style()
 {
   if (!atom->q_flag)
     error->all(FLERR,
                "Pair style lj/charmm/coul/charmm requires atom attribute q");
 
-  neighbor->request(this);
+  neighbor->request(this,instance_me);
 
   // require cut_lj_inner < cut_lj, cut_coul_inner < cut_coul
 
   if (cut_lj_inner >= cut_lj || cut_coul_inner >= cut_coul)
     error->all(FLERR,"Pair inner cutoff >= Pair outer cutoff");
 
   cut_lj_innersq = cut_lj_inner * cut_lj_inner;
   cut_ljsq = cut_lj * cut_lj;
   cut_coul_innersq = cut_coul_inner * cut_coul_inner;
   cut_coulsq = cut_coul * cut_coul;
   cut_bothsq = MAX(cut_ljsq,cut_coulsq);
 
   denom_lj = (cut_ljsq-cut_lj_innersq) * (cut_ljsq-cut_lj_innersq) *
     (cut_ljsq-cut_lj_innersq);
   denom_coul = (cut_coulsq-cut_coul_innersq) * (cut_coulsq-cut_coul_innersq) *
     (cut_coulsq-cut_coul_innersq);
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairLJCharmmCoulCharmm::init_one(int i, int j)
 {
   if (setflag[i][j] == 0) {
     epsilon[i][j] = mix_energy(epsilon[i][i],epsilon[j][j],
                                sigma[i][i],sigma[j][j]);
     sigma[i][j] = mix_distance(sigma[i][i],sigma[j][j]);
     eps14[i][j] = mix_energy(eps14[i][i],eps14[j][j],
                                sigma14[i][i],sigma14[j][j]);
     sigma14[i][j] = mix_distance(sigma14[i][i],sigma14[j][j]);
   }
 
   double cut = MAX(cut_lj,cut_coul);
 
   lj1[i][j] = 48.0 * epsilon[i][j] * pow(sigma[i][j],12.0);
   lj2[i][j] = 24.0 * epsilon[i][j] * pow(sigma[i][j],6.0);
   lj3[i][j] = 4.0 * epsilon[i][j] * pow(sigma[i][j],12.0);
   lj4[i][j] = 4.0 * epsilon[i][j] * pow(sigma[i][j],6.0);
   lj14_1[i][j] = 48.0 * eps14[i][j] * pow(sigma14[i][j],12.0);
   lj14_2[i][j] = 24.0 * eps14[i][j] * pow(sigma14[i][j],6.0);
   lj14_3[i][j] = 4.0 * eps14[i][j] * pow(sigma14[i][j],12.0);
   lj14_4[i][j] = 4.0 * eps14[i][j] * pow(sigma14[i][j],6.0);
 
   lj1[j][i] = lj1[i][j];
   lj2[j][i] = lj2[i][j];
   lj3[j][i] = lj3[i][j];
   lj4[j][i] = lj4[i][j];
   lj14_1[j][i] = lj14_1[i][j];
   lj14_2[j][i] = lj14_2[i][j];
   lj14_3[j][i] = lj14_3[i][j];
   lj14_4[j][i] = lj14_4[i][j];
 
   return cut;
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairLJCharmmCoulCharmm::write_restart(FILE *fp)
 {
   write_restart_settings(fp);
 
   int i,j;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       fwrite(&setflag[i][j],sizeof(int),1,fp);
       if (setflag[i][j]) {
         fwrite(&epsilon[i][j],sizeof(double),1,fp);
         fwrite(&sigma[i][j],sizeof(double),1,fp);
         fwrite(&eps14[i][j],sizeof(double),1,fp);
         fwrite(&sigma14[i][j],sizeof(double),1,fp);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairLJCharmmCoulCharmm::read_restart(FILE *fp)
 {
   read_restart_settings(fp);
 
   allocate();
 
   int i,j;
   int me = comm->me;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
       MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
       if (setflag[i][j]) {
         if (me == 0) {
           fread(&epsilon[i][j],sizeof(double),1,fp);
           fread(&sigma[i][j],sizeof(double),1,fp);
           fread(&eps14[i][j],sizeof(double),1,fp);
           fread(&sigma14[i][j],sizeof(double),1,fp);
         }
         MPI_Bcast(&epsilon[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&sigma[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&eps14[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&sigma14[i][j],1,MPI_DOUBLE,0,world);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes to data file
 ------------------------------------------------------------------------- */
 
 void PairLJCharmmCoulCharmm::write_data(FILE *fp)
 {
   for (int i = 1; i <= atom->ntypes; i++)
     fprintf(fp,"%d %g %g %g %g\n",
             i,epsilon[i][i],sigma[i][i],eps14[i][i],sigma14[i][i]);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes all pairs to data file
 ------------------------------------------------------------------------- */
 
 void PairLJCharmmCoulCharmm::write_data_all(FILE *fp)
 {
   for (int i = 1; i <= atom->ntypes; i++)
     for (int j = i; j <= atom->ntypes; j++)
       fprintf(fp,"%d %d %g %g %g %g\n",i,j,
               epsilon[i][j],sigma[i][j],eps14[i][j],sigma14[i][j]);
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairLJCharmmCoulCharmm::write_restart_settings(FILE *fp)
 {
   fwrite(&cut_lj_inner,sizeof(double),1,fp);
   fwrite(&cut_lj,sizeof(double),1,fp);
   fwrite(&cut_coul_inner,sizeof(double),1,fp);
   fwrite(&cut_coul,sizeof(double),1,fp);
   fwrite(&offset_flag,sizeof(int),1,fp);
   fwrite(&mix_flag,sizeof(int),1,fp);
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairLJCharmmCoulCharmm::read_restart_settings(FILE *fp)
 {
   if (comm->me == 0) {
     fread(&cut_lj_inner,sizeof(double),1,fp);
     fread(&cut_lj,sizeof(double),1,fp);
     fread(&cut_coul_inner,sizeof(double),1,fp);
     fread(&cut_coul,sizeof(double),1,fp);
     fread(&offset_flag,sizeof(int),1,fp);
     fread(&mix_flag,sizeof(int),1,fp);
   }
   MPI_Bcast(&cut_lj_inner,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&cut_lj,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&cut_coul_inner,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&cut_coul,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&offset_flag,1,MPI_INT,0,world);
   MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairLJCharmmCoulCharmm::single(int i, int j, int itype, int jtype,
                                       double rsq,
                                       double factor_coul, double factor_lj,
                                       double &fforce)
 {
   double r2inv,r6inv,forcecoul,forcelj,phicoul,philj;
   double switch1,switch2;
 
   r2inv = 1.0/rsq;
   if (rsq < cut_coulsq) {
     forcecoul = force->qqrd2e * atom->q[i]*atom->q[j]*sqrt(r2inv);
     if (rsq > cut_coul_innersq) {
       switch1 = (cut_coulsq-rsq) * (cut_coulsq-rsq) *
         (cut_coulsq + 2.0*rsq - 3.0*cut_coul_innersq) / denom_coul;
       switch2 = 12.0*rsq * (cut_coulsq-rsq) *
         (rsq-cut_coul_innersq) / denom_coul;
       forcecoul *= switch1 + switch2;
     }
   } else forcecoul = 0.0;
 
   if (rsq < cut_ljsq) {
     r6inv = r2inv*r2inv*r2inv;
     forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
     if (rsq > cut_lj_innersq) {
       switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) *
         (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj;
       switch2 = 12.0*rsq * (cut_ljsq-rsq) *
         (rsq-cut_lj_innersq) / denom_lj;
       philj = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]);
       forcelj = forcelj*switch1 + philj*switch2;
     }
   } else forcelj = 0.0;
 
   fforce = (factor_coul*forcecoul + factor_lj*forcelj) * r2inv;
 
   double eng = 0.0;
   if (rsq < cut_coulsq) {
     phicoul = force->qqrd2e * atom->q[i]*atom->q[j]*sqrt(r2inv);
     if (rsq > cut_coul_innersq) {
       switch1 = (cut_coulsq-rsq) * (cut_coulsq-rsq) *
         (cut_coulsq + 2.0*rsq - 3.0*cut_coul_innersq) /
         denom_coul;
       phicoul *= switch1;
     }
     eng += factor_coul*phicoul;
   }
   if (rsq < cut_ljsq) {
     philj = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]);
     if (rsq > cut_lj_innersq) {
       switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) *
         (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj;
       philj *= switch1;
     }
     eng += factor_lj*philj;
   }
 
   return eng;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void *PairLJCharmmCoulCharmm::extract(const char *str, int &dim)
 {
   dim = 2;
   if (strcmp(str,"lj14_1") == 0) return (void *) lj14_1;
   if (strcmp(str,"lj14_2") == 0) return (void *) lj14_2;
   if (strcmp(str,"lj14_3") == 0) return (void *) lj14_3;
   if (strcmp(str,"lj14_4") == 0) return (void *) lj14_4;
 
   dim = 0;
   if (strcmp(str,"implicit") == 0) return (void *) &implicit;
 
   return NULL;
 }
diff --git a/src/MOLECULE/pair_lj_cut_tip4p_cut.cpp b/src/MOLECULE/pair_lj_cut_tip4p_cut.cpp
index f76e8260c..3ba9ee7ed 100644
--- a/src/MOLECULE/pair_lj_cut_tip4p_cut.cpp
+++ b/src/MOLECULE/pair_lj_cut_tip4p_cut.cpp
@@ -1,747 +1,747 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Pavel Elkind (Gothenburg University)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_lj_cut_tip4p_cut.h"
 #include "atom.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "domain.h"
 #include "angle.h"
 #include "bond.h"
 #include "comm.h"
 #include "math_const.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS; 
 using namespace MathConst;
 
 /* ---------------------------------------------------------------------- */
 
 PairLJCutTIP4PCut::PairLJCutTIP4PCut(LAMMPS *lmp) : Pair(lmp)
 {
   single_enable = 0;
   writedata = 1;
 
   nmax = 0;
   hneigh = NULL;
   newsite = NULL;
 
   // TIP4P cannot compute virial as F dot r
   // due to finding bonded H atoms which are not near O atom
 
   no_virial_fdotr_compute = 1;
 }
 
 /* ---------------------------------------------------------------------- */
 
 PairLJCutTIP4PCut::~PairLJCutTIP4PCut()
 {
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
 
     memory->destroy(cut_lj);
     memory->destroy(cut_ljsq);
     memory->destroy(epsilon);
     memory->destroy(sigma);
     memory->destroy(lj1);
     memory->destroy(lj2);
     memory->destroy(lj3);
     memory->destroy(lj4);
     memory->destroy(offset);
   }
 
   memory->destroy(hneigh);
   memory->destroy(newsite);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCutTIP4PCut::compute(int eflag, int vflag)
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul;
   double rsq,r2inv,r6inv,forcecoul,forcelj,factor_lj,factor_coul;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   int key;
   int n,vlist[6];
   int iH1,iH2,jH1,jH2;
   double cforce;
   double fO[3],fH[3],fd[3],v[6],xH1[3],xH2[3];
   double *x1,*x2;
 
   evdwl = ecoul = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   // reallocate hneigh & newsite if necessary
   // initialize hneigh[0] to -1 on steps when reneighboring occurred
   // initialize hneigh[2] to 0 every step
 
   int nlocal = atom->nlocal;
   int nall = nlocal + atom->nghost;
 
   if (atom->nmax > nmax) {
     nmax = atom->nmax;
     memory->destroy(hneigh);
     memory->create(hneigh,nmax,3,"pair:hneigh");
     memory->destroy(newsite);
     memory->create(newsite,nmax,3,"pair:newsite");
   }
   if (neighbor->ago == 0)
     for (i = 0; i < nall; i++) hneigh[i][0] = -1;
   for (i = 0; i < nall; i++) hneigh[i][2] = 0;
 
   double **f = atom->f;
   double **x = atom->x;
   double *q = atom->q;
   tagint *tag = atom->tag;
   int *type = atom->type;
   double *special_lj = force->special_lj;
   double *special_coul = force->special_coul;
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
 
     if (itype == typeO) {
       if (hneigh[i][0] < 0) {
         hneigh[i][0] = iH1 = atom->map(tag[i] + 1);
         hneigh[i][1] = iH2 = atom->map(tag[i] + 2);
         hneigh[i][2] = 1;
         if (iH1 == -1 || iH2 == -1)
           error->one(FLERR,"TIP4P hydrogen is missing");
         if (atom->type[iH1] != typeH || atom->type[iH2] != typeH)
           error->one(FLERR,"TIP4P hydrogen has incorrect atom type");
         compute_newsite(x[i],x[iH1],x[iH2],newsite[i]);
       } else {
         iH1 = hneigh[i][0];
         iH2 = hneigh[i][1];
         if (hneigh[i][2] == 0) {
           hneigh[i][2] = 1;
           compute_newsite(x[i],x[iH1],x[iH2],newsite[i]);
         }
       }
       x1 = newsite[i];
     } else x1 = x[i];
 
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       // LJ interaction based on true rsq
 
       if (rsq < cut_ljsq[itype][jtype]) {
         r2inv = 1.0/rsq;
         r6inv = r2inv*r2inv*r2inv;
         forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
         forcelj *= factor_lj * r2inv;
 
         f[i][0] += delx*forcelj;
         f[i][1] += dely*forcelj;
         f[i][2] += delz*forcelj;
         f[j][0] -= delx*forcelj;
         f[j][1] -= dely*forcelj;
         f[j][2] -= delz*forcelj;
 
         if (eflag) {
           evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) -
             offset[itype][jtype];
           evdwl *= factor_lj;
         } else evdwl = 0.0;
 
         if (evflag) ev_tally(i,j,nlocal,newton_pair,
                              evdwl,0.0,forcelj,delx,dely,delz);
       }
 
       // adjust rsq and delxyz for off-site O charge(s) if necessary
       // but only if they are within reach
 
       if (rsq < cut_coulsqplus) {
         if (itype == typeO || jtype == typeO) {
 
           // if atom J = water O, set x2 = offset charge site
           // else x2 = x of atom J
 
           if (jtype == typeO) {
             if (hneigh[j][0] < 0) {
               hneigh[j][0] = jH1 = atom->map(tag[j] + 1);
               hneigh[j][1] = jH2 = atom->map(tag[j] + 2);
               hneigh[j][2] = 1;
               if (jH1 == -1 || jH2 == -1)
                 error->one(FLERR,"TIP4P hydrogen is missing");
               if (atom->type[jH1] != typeH || atom->type[jH2] != typeH)
                 error->one(FLERR,"TIP4P hydrogen has incorrect atom type");
               compute_newsite(x[j],x[jH1],x[jH2],newsite[j]);
             } else {
               jH1 = hneigh[j][0];
               jH2 = hneigh[j][1];
               if (hneigh[j][2] == 0) {
                 hneigh[j][2] = 1;
                 compute_newsite(x[j],x[jH1],x[jH2],newsite[j]);
               }
             }
             x2 = newsite[j];
           } else x2 = x[j];
 
           delx = x1[0] - x2[0];
           dely = x1[1] - x2[1];
           delz = x1[2] - x2[2];
           rsq = delx*delx + dely*dely + delz*delz;
         }
 
         // Coulombic interaction based on modified rsq
 
         if (rsq < cut_coulsq) {
           r2inv = 1.0 / rsq;
           forcecoul = qqrd2e * qtmp * q[j] * sqrt(r2inv);
           cforce = factor_coul * forcecoul * r2inv;
 
         // if i,j are not O atoms, force is applied directly;
         // if i or j are O atoms, force is on fictitious atom & partitioned
         // force partitioning due to Feenstra, J Comp Chem, 20, 786 (1999)
         // f_f = fictitious force, fO = f_f (1 - 2 alpha), fH = alpha f_f
         // preserves total force and torque on water molecule
         // virial = sum(r x F) where each water's atoms are near xi and xj
         // vlist stores 2,4,6 atoms whose forces contribute to virial
 
           n = 0;
           key = 0;
 
           if (itype != typeO) {
             f[i][0] += delx * cforce;
             f[i][1] += dely * cforce;
             f[i][2] += delz * cforce;
 
             if (vflag) {
               v[0] = x[i][0] * delx * cforce;
               v[1] = x[i][1] * dely * cforce;
               v[2] = x[i][2] * delz * cforce;
               v[3] = x[i][0] * dely * cforce;
               v[4] = x[i][0] * delz * cforce;
               v[5] = x[i][1] * delz * cforce;
             }
             vlist[n++] = i;
 
           } else {
             key++;
 
             fd[0] = delx*cforce;
             fd[1] = dely*cforce;
             fd[2] = delz*cforce;
 
             fO[0] = fd[0]*(1.0 - alpha);
             fO[1] = fd[1]*(1.0 - alpha);
             fO[2] = fd[2]*(1.0 - alpha);
 
             fH[0] = 0.5 * alpha * fd[0];
             fH[1] = 0.5 * alpha * fd[1];
             fH[2] = 0.5 * alpha * fd[2];
 
             f[i][0] += fO[0];
             f[i][1] += fO[1];
             f[i][2] += fO[2];
 
             f[iH1][0] += fH[0];
             f[iH1][1] += fH[1];
             f[iH1][2] += fH[2];
 
             f[iH2][0] += fH[0];
             f[iH2][1] += fH[1];
             f[iH2][2] += fH[2];
 
             if(vflag) {
               domain->closest_image(x[i],x[iH1],xH1);
               domain->closest_image(x[i],x[iH2],xH2);
 
               v[0] = x[i][0]*fO[0] + xH1[0]*fH[0] + xH2[0]*fH[0];
               v[1] = x[i][1]*fO[1] + xH1[1]*fH[1] + xH2[1]*fH[1];
               v[2] = x[i][2]*fO[2] + xH1[2]*fH[2] + xH2[2]*fH[2];
               v[3] = x[i][0]*fO[1] + xH1[0]*fH[1] + xH2[0]*fH[1];
               v[4] = x[i][0]*fO[2] + xH1[0]*fH[2] + xH2[0]*fH[2];
               v[5] = x[i][1]*fO[2] + xH1[1]*fH[2] + xH2[1]*fH[2];
             }
             vlist[n++] = i;
             vlist[n++] = iH1;
             vlist[n++] = iH2;
           }
 
           if (jtype != typeO) {
             f[j][0] -= delx * cforce;
             f[j][1] -= dely * cforce;
             f[j][2] -= delz * cforce;
 
             if (vflag) {
               v[0] -= x[j][0] * delx * cforce;
               v[1] -= x[j][1] * dely * cforce;
               v[2] -= x[j][2] * delz * cforce;
               v[3] -= x[j][0] * dely * cforce;
               v[4] -= x[j][0] * delz * cforce;
               v[5] -= x[j][1] * delz * cforce;
             }
             vlist[n++] = j;
 
           } else {
             key += 2;
 
             fd[0] = -delx*cforce;
             fd[1] = -dely*cforce;
             fd[2] = -delz*cforce;
 
             fO[0] = fd[0]*(1 - alpha);
             fO[1] = fd[1]*(1 - alpha);
             fO[2] = fd[2]*(1 - alpha);
 
             fH[0] = 0.5 * alpha * fd[0];
             fH[1] = 0.5 * alpha * fd[1];
             fH[2] = 0.5 * alpha * fd[2];
 
             f[j][0] += fO[0];
             f[j][1] += fO[1];
             f[j][2] += fO[2];
 
             f[jH1][0] += fH[0];
             f[jH1][1] += fH[1];
             f[jH1][2] += fH[2];
 
             f[jH2][0] += fH[0];
             f[jH2][1] += fH[1];
             f[jH2][2] += fH[2];
 
             if (vflag) {
               domain->closest_image(x[j],x[jH1],xH1);
               domain->closest_image(x[j],x[jH2],xH2);
 
               v[0] += x[j][0]*fO[0] + xH1[0]*fH[0] + xH2[0]*fH[0];
               v[1] += x[j][1]*fO[1] + xH1[1]*fH[1] + xH2[1]*fH[1];
               v[2] += x[j][2]*fO[2] + xH1[2]*fH[2] + xH2[2]*fH[2];
               v[3] += x[j][0]*fO[1] + xH1[0]*fH[1] + xH2[0]*fH[1];
               v[4] += x[j][0]*fO[2] + xH1[0]*fH[2] + xH2[0]*fH[2];
               v[5] += x[j][1]*fO[2] + xH1[1]*fH[2] + xH2[1]*fH[2];
             }
             vlist[n++] = j;
             vlist[n++] = jH1;
             vlist[n++] = jH2;
           }
 
           if (eflag) {
             ecoul = qqrd2e * qtmp * q[j] * sqrt(r2inv);
             ecoul *= factor_coul;
           } else ecoul = 0.0;
 
           if (evflag) ev_tally_tip4p(key,vlist,v,ecoul,alpha);
         }
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairLJCutTIP4PCut::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
   memory->create(cut_lj,n+1,n+1,"pair:cut_lj");
   memory->create(cut_ljsq,n+1,n+1,"pair:cut_ljsq");
   memory->create(epsilon,n+1,n+1,"pair:epsilon");
   memory->create(sigma,n+1,n+1,"pair:sigma");
   memory->create(lj1,n+1,n+1,"pair:lj1");
   memory->create(lj2,n+1,n+1,"pair:lj2");
   memory->create(lj3,n+1,n+1,"pair:lj3");
   memory->create(lj4,n+1,n+1,"pair:lj4");
   memory->create(offset,n+1,n+1,"pair:offset");
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairLJCutTIP4PCut::settings(int narg, char **arg)
 {
   if (narg < 6 || narg > 7) error->all(FLERR,"Illegal pair_style command");
 
   typeO = force->inumeric(FLERR,arg[0]);
   typeH = force->inumeric(FLERR,arg[1]);
   typeB = force->inumeric(FLERR,arg[2]);
   typeA = force->inumeric(FLERR,arg[3]);
   qdist = force->numeric(FLERR,arg[4]);
 
   cut_lj_global = force->numeric(FLERR,arg[5]);
   if (narg == 6) cut_coul = cut_lj_global;
   else cut_coul = force->numeric(FLERR,arg[6]);
 
   cut_coulsq = cut_coul * cut_coul;
   cut_coulsqplus = (cut_coul + 2.0*qdist) * (cut_coul + 2.0*qdist);
 
   if (allocated) {
     int i,j;
     for (i = 1; i <= atom->ntypes; i++)
       for (j = i+1; j <= atom->ntypes; j++)
         if (setflag[i][j]) cut_lj[i][j] = cut_lj_global;
   }
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairLJCutTIP4PCut::coeff(int narg, char **arg)
 {
   if (narg < 4 || narg > 5)
     error->all(FLERR,"Incorrect args for pair coefficients");
   if (!allocated) allocate();
 
   int ilo,ihi,jlo,jhi;
   force->bounds(arg[0],atom->ntypes,ilo,ihi);
   force->bounds(arg[1],atom->ntypes,jlo,jhi);
 
   double epsilon_one = force->numeric(FLERR,arg[2]);
   double sigma_one = force->numeric(FLERR,arg[3]);
 
   double cut_lj_one = cut_lj_global;
   if (narg == 5) cut_lj_one = force->numeric(FLERR,arg[4]);
 
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
     for (int j = MAX(jlo,i); j <= jhi; j++) {
       epsilon[i][j] = epsilon_one;
       sigma[i][j] = sigma_one;
       cut_lj[i][j] = cut_lj_one;
       setflag[i][j] = 1;
       count++;
     }
   }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairLJCutTIP4PCut::init_style()
 {
   if (atom->tag_enable == 0)
     error->all(FLERR,"Pair style lj/cut/tip4p/cut requires atom IDs");
   if (!force->newton_pair)
     error->all(FLERR,
                "Pair style lj/cut/tip4p/cut requires newton pair on");
   if (!atom->q_flag)
     error->all(FLERR,
                "Pair style lj/cut/tip4p/cut requires atom attribute q");
   if (force->bond == NULL)
     error->all(FLERR,"Must use a bond style with TIP4P potential");
   if (force->angle == NULL)
     error->all(FLERR,"Must use an angle style with TIP4P potential");
   
-  neighbor->request(this);
+  neighbor->request(this,instance_me);
 
   // set alpha parameter
 
   double theta = force->angle->equilibrium_angle(typeA);
   double blen = force->bond->equilibrium_distance(typeB);
   alpha = qdist / (cos(0.5*theta) * blen);
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairLJCutTIP4PCut::init_one(int i, int j)
 {
   if (setflag[i][j] == 0) {
     epsilon[i][j] = mix_energy(epsilon[i][i],epsilon[j][j],
                                sigma[i][i],sigma[j][j]);
     sigma[i][j] = mix_distance(sigma[i][i],sigma[j][j]);
     cut_lj[i][j] = mix_distance(cut_lj[i][i],cut_lj[j][j]);
   }
 
   // include TIP4P qdist in full cutoff, qdist = 0.0 if not TIP4P
 
   double cut = MAX(cut_lj[i][j],cut_coul+2.0*qdist);
   cut_ljsq[i][j] = cut_lj[i][j] * cut_lj[i][j];
 
   lj1[i][j] = 48.0 * epsilon[i][j] * pow(sigma[i][j],12.0);
   lj2[i][j] = 24.0 * epsilon[i][j] * pow(sigma[i][j],6.0);
   lj3[i][j] = 4.0 * epsilon[i][j] * pow(sigma[i][j],12.0);
   lj4[i][j] = 4.0 * epsilon[i][j] * pow(sigma[i][j],6.0);
 
   if (offset_flag) {
     double ratio = sigma[i][j] / cut_lj[i][j];
     offset[i][j] = 4.0 * epsilon[i][j] * (pow(ratio,12.0) - pow(ratio,6.0));
   } else offset[i][j] = 0.0;
   
   cut_ljsq[j][i] = cut_ljsq[i][j];
   lj1[j][i] = lj1[i][j];
   lj2[j][i] = lj2[i][j];
   lj3[j][i] = lj3[i][j];
   lj4[j][i] = lj4[i][j];
   offset[j][i] = offset[i][j];
   
   // compute I,J contribution to long-range tail correction
   // count total # of atoms of type I and J via Allreduce
 
   if (tail_flag) {
     int *type = atom->type;
     int nlocal = atom->nlocal;
 
     double count[2],all[2];
     count[0] = count[1] = 0.0;
     for (int k = 0; k < nlocal; k++) {
       if (type[k] == i) count[0] += 1.0;
       if (type[k] == j) count[1] += 1.0;
     }
     MPI_Allreduce(count,all,2,MPI_DOUBLE,MPI_SUM,world);
 
     double sig2 = sigma[i][j]*sigma[i][j];
     double sig6 = sig2*sig2*sig2;
     double rc3 = cut_lj[i][j]*cut_lj[i][j]*cut_lj[i][j];
     double rc6 = rc3*rc3;
     double rc9 = rc3*rc6;
     etail_ij = 8.0*MY_PI*all[0]*all[1]*epsilon[i][j] *
       sig6 * (sig6 - 3.0*rc6) / (9.0*rc9);
     ptail_ij = 16.0*MY_PI*all[0]*all[1]*epsilon[i][j] *
       sig6 * (2.0*sig6 - 3.0*rc6) / (9.0*rc9);
   }
 
   // check that LJ epsilon = 0.0 for water H
   // set LJ cutoff to 0.0 for any interaction involving water H
   // so LJ term isn't calculated in compute()
   
   if ((i == typeH && epsilon[i][i] != 0.0) ||
       (j == typeH && epsilon[j][j] != 0.0))
     error->all(FLERR,"Water H epsilon must be 0.0 for "
                "pair style lj/cut/tip4p/cut");
   
   if (i == typeH || j == typeH)
     cut_ljsq[j][i] = cut_ljsq[i][j] = 0.0;
   
   return cut;
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairLJCutTIP4PCut::write_restart(FILE *fp)
 {
   write_restart_settings(fp);
 
   int i,j;
   for (i = 1; i <= atom->ntypes; i++) {
     for (j = i; j <= atom->ntypes; j++) {
       fwrite(&setflag[i][j],sizeof(int),1,fp);
       if (setflag[i][j]){
         fwrite(&epsilon[i][j],sizeof(double),1,fp);
         fwrite(&sigma[i][j],sizeof(double),1,fp);
         fwrite(&cut_lj[i][j],sizeof(double),1,fp);
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairLJCutTIP4PCut::read_restart(FILE *fp)
 {
   read_restart_settings(fp);
   allocate();
 
   int i,j;
   int me = comm->me;
   for (i = 1; i <= atom->ntypes; i++) {
     for (j = i; j <= atom->ntypes; j++) {
       if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
       MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
       if (setflag[i][j]) {
         if (me == 0) {
           fread(&epsilon[i][j],sizeof(double),1,fp);
           fread(&sigma[i][j],sizeof(double),1,fp);
           fread(&cut_lj[i][j],sizeof(double),1,fp);
         }
         MPI_Bcast(&epsilon[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&sigma[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&cut_lj[i][j],1,MPI_DOUBLE,0,world);
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairLJCutTIP4PCut::write_restart_settings(FILE *fp)
 {
   fwrite(&typeO,sizeof(int),1,fp);
   fwrite(&typeH,sizeof(int),1,fp);
   fwrite(&typeB,sizeof(int),1,fp);
   fwrite(&typeA,sizeof(int),1,fp);
   fwrite(&qdist,sizeof(double),1,fp);
 
   fwrite(&cut_lj_global,sizeof(double),1,fp);
   fwrite(&cut_coul,sizeof(double),1,fp);
   fwrite(&offset_flag,sizeof(int),1,fp);
   fwrite(&mix_flag,sizeof(int),1,fp);
   fwrite(&tail_flag,sizeof(int),1,fp);
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairLJCutTIP4PCut::read_restart_settings(FILE *fp)
 {
   if (comm->me == 0) {
     fread(&typeO,sizeof(int),1,fp);
     fread(&typeH,sizeof(int),1,fp);
     fread(&typeB,sizeof(int),1,fp);
     fread(&typeA,sizeof(int),1,fp);
     fread(&qdist,sizeof(double),1,fp);
 
     fread(&cut_lj_global,sizeof(double),1,fp);
     fread(&cut_coul,sizeof(double),1,fp);
     fread(&offset_flag,sizeof(int),1,fp);
     fread(&mix_flag,sizeof(int),1,fp);
     fread(&tail_flag,sizeof(int),1,fp);
   }
 
   MPI_Bcast(&typeO,1,MPI_INT,0,world);
   MPI_Bcast(&typeH,1,MPI_INT,0,world);
   MPI_Bcast(&typeB,1,MPI_INT,0,world);
   MPI_Bcast(&typeA,1,MPI_INT,0,world);
   MPI_Bcast(&qdist,1,MPI_DOUBLE,0,world);
 
   MPI_Bcast(&cut_lj_global,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&cut_coul,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&offset_flag,1,MPI_INT,0,world);
   MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
   MPI_Bcast(&tail_flag,1,MPI_INT,0,world);
 
   cut_coulsq = cut_coul * cut_coul;
   cut_coulsqplus = (cut_coul + 2.0*qdist) * (cut_coul + 2.0*qdist);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes to data file
 ------------------------------------------------------------------------- */
 
 void PairLJCutTIP4PCut::write_data(FILE *fp)
 {
   for (int i = 1; i <= atom->ntypes; i++)
     fprintf(fp,"%d %g %g\n",i,epsilon[i][i],sigma[i][i]);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes all pairs to data file
 ------------------------------------------------------------------------- */
 
 void PairLJCutTIP4PCut::write_data_all(FILE *fp)
 {
   for (int i = 1; i <= atom->ntypes; i++)
     for (int j = i; j <= atom->ntypes; j++)
       fprintf(fp,"%d %d %g %g %g\n",i,j,epsilon[i][j],sigma[i][j],cut_lj[i][j]);
 }
 
 /* ----------------------------------------------------------------------
   compute position xM of fictitious charge site for O atom and 2 H atoms
   return it as xM
 ------------------------------------------------------------------------- */
 
 void PairLJCutTIP4PCut::compute_newsite(double *xO,  double *xH1,
                                         double *xH2, double *xM)
 {
   double delx1 = xH1[0] - xO[0];
   double dely1 = xH1[1] - xO[1];
   double delz1 = xH1[2] - xO[2];
   domain->minimum_image(delx1,dely1,delz1);
 
   double delx2 = xH2[0] - xO[0];
   double dely2 = xH2[1] - xO[1];
   double delz2 = xH2[2] - xO[2];
   domain->minimum_image(delx2,dely2,delz2);
 
   xM[0] = xO[0] + alpha * 0.5 * (delx1 + delx2);
   xM[1] = xO[1] + alpha * 0.5 * (dely1 + dely2);
   xM[2] = xO[2] + alpha * 0.5 * (delz1 + delz2);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void *PairLJCutTIP4PCut::extract(const char *str, int &dim)
 {
   dim = 0;
   if (strcmp(str,"cut_coul") == 0) return (void *) &cut_coul;
   dim = 2;
   if (strcmp(str,"epsilon") == 0) return (void *) epsilon;
   if (strcmp(str,"sigma") == 0) return (void *) sigma;
   return NULL;
 }
 /* ----------------------------------------------------------------------
    memory usage of hneigh
 ------------------------------------------------------------------------- */
 
 double PairLJCutTIP4PCut::memory_usage()
 {
   double bytes = maxeatom * sizeof(double);
   bytes += maxvatom*6 * sizeof(double);
   bytes += 2 * nmax * sizeof(double);
   return bytes;
 }
diff --git a/src/MOLECULE/pair_tip4p_cut.cpp b/src/MOLECULE/pair_tip4p_cut.cpp
index cd344f246..f6a0d0bb4 100644
--- a/src/MOLECULE/pair_tip4p_cut.cpp
+++ b/src/MOLECULE/pair_tip4p_cut.cpp
@@ -1,551 +1,551 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Pavel Elkind (Gothenburg University)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdlib.h"
 #include "pair_tip4p_cut.h"
 #include "atom.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "domain.h"
 #include "angle.h"
 #include "bond.h"
 #include "comm.h"
 #include "math_const.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS; 
 using namespace MathConst;
 
 /* ---------------------------------------------------------------------- */
 
 PairTIP4PCut::PairTIP4PCut(LAMMPS *lmp) : Pair(lmp)
 {
   single_enable = 0;
 
   nmax = 0;
   hneigh = NULL;
   newsite = NULL;
 
   // TIP4P cannot compute virial as F dot r
   // due to finding bonded H atoms which are not near O atom
 
   no_virial_fdotr_compute = 1;
 }
 
 /* ---------------------------------------------------------------------- */
 
 PairTIP4PCut::~PairTIP4PCut()
 {
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
   }
 
   memory->destroy(hneigh);
   memory->destroy(newsite);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairTIP4PCut::compute(int eflag, int vflag)
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,ecoul;
   double rsq,r2inv,forcecoul,factor_coul;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   int key;
   int n,vlist[6];
   int iH1,iH2,jH1,jH2;
   double cforce;
   double fO[3],fH[3],fd[3],v[6],xH1[3],xH2[3];
   double *x1,*x2;
 
   ecoul = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   // reallocate hneigh & newsite if necessary
   // initialize hneigh[0] to -1 on steps when reneighboring occurred
   // initialize hneigh[2] to 0 every step
 
   int nlocal = atom->nlocal;
   int nall = nlocal + atom->nghost;
 
   if (atom->nmax > nmax) {
     nmax = atom->nmax;
     memory->destroy(hneigh);
     memory->create(hneigh,nmax,3,"pair:hneigh");
     memory->destroy(newsite);
     memory->create(newsite,nmax,3,"pair:newsite");
   }
   if (neighbor->ago == 0)
     for (i = 0; i < nall; i++) hneigh[i][0] = -1;
   for (i = 0; i < nall; i++) hneigh[i][2] = 0;
 
   double **f = atom->f;
   double **x = atom->x;
   double *q = atom->q;
   tagint *tag = atom->tag;
   int *type = atom->type;
   double *special_coul = force->special_coul;
   double qqrd2e = force->qqrd2e;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
 
     if (itype == typeO) {
       if (hneigh[i][0] < 0) {
         hneigh[i][0] = iH1 = atom->map(tag[i] + 1);
         hneigh[i][1] = iH2 = atom->map(tag[i] + 2);
         hneigh[i][2] = 1;
         if (iH1 == -1 || iH2 == -1)
           error->one(FLERR,"TIP4P hydrogen is missing");
         if (atom->type[iH1] != typeH || atom->type[iH2] != typeH)
           error->one(FLERR,"TIP4P hydrogen has incorrect atom type");
         compute_newsite(x[i],x[iH1],x[iH2],newsite[i]);
       } else {
         iH1 = hneigh[i][0];
         iH2 = hneigh[i][1];
         if (hneigh[i][2] == 0) {
           hneigh[i][2] = 1;
           compute_newsite(x[i],x[iH1],x[iH2],newsite[i]);
         }
       }
       x1 = newsite[i];
     } else x1 = x[i];
 
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       // adjust rsq and delxyz for off-site O charge(s) if necessary
       // but only if they are within reach
 
       if (rsq < cut_coulsqplus) {
         if (itype == typeO || jtype == typeO) {
 
           // if atom J = water O, set x2 = offset charge site
           // else x2 = x of atom J
 
           if (jtype == typeO) {
             if (hneigh[j][0] < 0) {
               hneigh[j][0] = jH1 = atom->map(tag[j] + 1);
               hneigh[j][1] = jH2 = atom->map(tag[j] + 2);
               hneigh[j][2] = 1;
               if (jH1 == -1 || jH2 == -1)
                 error->one(FLERR,"TIP4P hydrogen is missing");
               if (atom->type[jH1] != typeH || atom->type[jH2] != typeH)
                 error->one(FLERR,"TIP4P hydrogen has incorrect atom type");
               compute_newsite(x[j],x[jH1],x[jH2],newsite[j]);
             } else {
               jH1 = hneigh[j][0];
               jH2 = hneigh[j][1];
               if (hneigh[j][2] == 0) {
                 hneigh[j][2] = 1;
                 compute_newsite(x[j],x[jH1],x[jH2],newsite[j]);
               }
             }
             x2 = newsite[j];
           } else x2 = x[j];
 
           delx = x1[0] - x2[0];
           dely = x1[1] - x2[1];
           delz = x1[2] - x2[2];
           rsq = delx*delx + dely*dely + delz*delz;
         }
 
         // Coulombic interaction based on modified rsq
 
         if (rsq < cut_coulsq) {
           r2inv = 1.0 / rsq;
           forcecoul = qqrd2e * qtmp * q[j] * sqrt(r2inv);
           cforce = factor_coul * forcecoul * r2inv;
 
         // if i,j are not O atoms, force is applied directly;
         // if i or j are O atoms, force is on fictitious atom & partitioned
         // force partitioning due to Feenstra, J Comp Chem, 20, 786 (1999)
         // f_f = fictitious force, fO = f_f (1 - 2 alpha), fH = alpha f_f
         // preserves total force and torque on water molecule
         // virial = sum(r x F) where each water's atoms are near xi and xj
         // vlist stores 2,4,6 atoms whose forces contribute to virial
 
           n = 0;
           key = 0;
 
           if (itype != typeO) {
             f[i][0] += delx * cforce;
             f[i][1] += dely * cforce;
             f[i][2] += delz * cforce;
 
             if (vflag) {
               v[0] = x[i][0] * delx * cforce;
               v[1] = x[i][1] * dely * cforce;
               v[2] = x[i][2] * delz * cforce;
               v[3] = x[i][0] * dely * cforce;
               v[4] = x[i][0] * delz * cforce;
               v[5] = x[i][1] * delz * cforce;
             }
             vlist[n++] = i;
 
           } else {
             key++;
 
             fd[0] = delx*cforce;
             fd[1] = dely*cforce;
             fd[2] = delz*cforce;
 
             fO[0] = fd[0]*(1.0 - alpha);
             fO[1] = fd[1]*(1.0 - alpha);
             fO[2] = fd[2]*(1.0 - alpha);
 
             fH[0] = 0.5 * alpha * fd[0];
             fH[1] = 0.5 * alpha * fd[1];
             fH[2] = 0.5 * alpha * fd[2];
 
             f[i][0] += fO[0];
             f[i][1] += fO[1];
             f[i][2] += fO[2];
 
             f[iH1][0] += fH[0];
             f[iH1][1] += fH[1];
             f[iH1][2] += fH[2];
 
             f[iH2][0] += fH[0];
             f[iH2][1] += fH[1];
             f[iH2][2] += fH[2];
 
             if(vflag) {
               domain->closest_image(x[i],x[iH1],xH1);
               domain->closest_image(x[i],x[iH2],xH2);
 
               v[0] = x[i][0]*fO[0] + xH1[0]*fH[0] + xH2[0]*fH[0];
               v[1] = x[i][1]*fO[1] + xH1[1]*fH[1] + xH2[1]*fH[1];
               v[2] = x[i][2]*fO[2] + xH1[2]*fH[2] + xH2[2]*fH[2];
               v[3] = x[i][0]*fO[1] + xH1[0]*fH[1] + xH2[0]*fH[1];
               v[4] = x[i][0]*fO[2] + xH1[0]*fH[2] + xH2[0]*fH[2];
               v[5] = x[i][1]*fO[2] + xH1[1]*fH[2] + xH2[1]*fH[2];
             }
             vlist[n++] = i;
             vlist[n++] = iH1;
             vlist[n++] = iH2;
           }
 
           if (jtype != typeO) {
             f[j][0] -= delx * cforce;
             f[j][1] -= dely * cforce;
             f[j][2] -= delz * cforce;
 
             if (vflag) {
               v[0] -= x[j][0] * delx * cforce;
               v[1] -= x[j][1] * dely * cforce;
               v[2] -= x[j][2] * delz * cforce;
               v[3] -= x[j][0] * dely * cforce;
               v[4] -= x[j][0] * delz * cforce;
               v[5] -= x[j][1] * delz * cforce;
             }
             vlist[n++] = j;
 
           } else {
             key += 2;
 
             fd[0] = -delx*cforce;
             fd[1] = -dely*cforce;
             fd[2] = -delz*cforce;
 
             fO[0] = fd[0]*(1 - alpha);
             fO[1] = fd[1]*(1 - alpha);
             fO[2] = fd[2]*(1 - alpha);
 
             fH[0] = 0.5 * alpha * fd[0];
             fH[1] = 0.5 * alpha * fd[1];
             fH[2] = 0.5 * alpha * fd[2];
 
             f[j][0] += fO[0];
             f[j][1] += fO[1];
             f[j][2] += fO[2];
 
             f[jH1][0] += fH[0];
             f[jH1][1] += fH[1];
             f[jH1][2] += fH[2];
 
             f[jH2][0] += fH[0];
             f[jH2][1] += fH[1];
             f[jH2][2] += fH[2];
 
             if (vflag) {
               domain->closest_image(x[j],x[jH1],xH1);
               domain->closest_image(x[j],x[jH2],xH2);
 
               v[0] += x[j][0]*fO[0] + xH1[0]*fH[0] + xH2[0]*fH[0];
               v[1] += x[j][1]*fO[1] + xH1[1]*fH[1] + xH2[1]*fH[1];
               v[2] += x[j][2]*fO[2] + xH1[2]*fH[2] + xH2[2]*fH[2];
               v[3] += x[j][0]*fO[1] + xH1[0]*fH[1] + xH2[0]*fH[1];
               v[4] += x[j][0]*fO[2] + xH1[0]*fH[2] + xH2[0]*fH[2];
               v[5] += x[j][1]*fO[2] + xH1[1]*fH[2] + xH2[1]*fH[2];
             }
             vlist[n++] = j;
             vlist[n++] = jH1;
             vlist[n++] = jH2;
           }
 
           if (eflag) {
             ecoul = qqrd2e * qtmp * q[j] * sqrt(r2inv);
             ecoul *= factor_coul;
           } else ecoul = 0.0;
 
           if (evflag) ev_tally_tip4p(key,vlist,v,ecoul,alpha);
         }
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairTIP4PCut::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairTIP4PCut::settings(int narg, char **arg)
 {
   if (narg != 6) error->all(FLERR,"Illegal pair_style command");
 
   typeO = force->inumeric(FLERR,arg[0]);
   typeH = force->inumeric(FLERR,arg[1]);
   typeB = force->inumeric(FLERR,arg[2]);
   typeA = force->inumeric(FLERR,arg[3]);
   qdist = force->numeric(FLERR,arg[4]);
   cut_coul = force->numeric(FLERR,arg[5]);
 
   cut_coulsq = cut_coul * cut_coul;
   cut_coulsqplus = (cut_coul + 2.0*qdist) * (cut_coul + 2.0*qdist);
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairTIP4PCut::coeff(int narg, char **arg)
 {
   if (narg != 2)
     error->all(FLERR,"Incorrect args for pair coefficients");
   if (!allocated) allocate();
 
   int ilo,ihi,jlo,jhi;
   force->bounds(arg[0],atom->ntypes,ilo,ihi);
   force->bounds(arg[1],atom->ntypes,jlo,jhi);
 
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
     for (int j = MAX(jlo,i); j <= jhi; j++) {
       setflag[i][j] = 1;
       count++;
     }
   }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairTIP4PCut::init_style()
 {
   if (atom->tag_enable == 0)
     error->all(FLERR,"Pair style tip4p/cut requires atom IDs");
   if (!force->newton_pair)
     error->all(FLERR,
                "Pair style tip4p/cut requires newton pair on");
   if (!atom->q_flag)
     error->all(FLERR,
                "Pair style tip4p/cut requires atom attribute q");
   if (force->bond == NULL)
     error->all(FLERR,"Must use a bond style with TIP4P potential");
   if (force->angle == NULL)
     error->all(FLERR,"Must use an angle style with TIP4P potential");
   
-  neighbor->request(this);
+  neighbor->request(this,instance_me);
 
   // set alpha parameter
 
   double theta = force->angle->equilibrium_angle(typeA);
   double blen = force->bond->equilibrium_distance(typeB);
   alpha = qdist / (cos(0.5*theta) * blen);
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairTIP4PCut::init_one(int i, int j)
 {
   // include TIP4P qdist in full cutoff, qdist = 0.0 if not TIP4P
 
   return cut_coul+2.0*qdist;
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairTIP4PCut::write_restart(FILE *fp)
 {
   write_restart_settings(fp);
 
   int i,j;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++)
       fwrite(&setflag[i][j],sizeof(int),1,fp);
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairTIP4PCut::read_restart(FILE *fp)
 {
   read_restart_settings(fp);
   allocate();
 
   int i,j;
   int me = comm->me;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
       MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
     }
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairTIP4PCut::write_restart_settings(FILE *fp)
 {
   fwrite(&typeO,sizeof(int),1,fp);
   fwrite(&typeH,sizeof(int),1,fp);
   fwrite(&typeB,sizeof(int),1,fp);
   fwrite(&typeA,sizeof(int),1,fp);
   fwrite(&qdist,sizeof(double),1,fp);
 
   fwrite(&cut_coul,sizeof(double),1,fp);
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairTIP4PCut::read_restart_settings(FILE *fp)
 {
   if (comm->me == 0) {
     fread(&typeO,sizeof(int),1,fp);
     fread(&typeH,sizeof(int),1,fp);
     fread(&typeB,sizeof(int),1,fp);
     fread(&typeA,sizeof(int),1,fp);
     fread(&qdist,sizeof(double),1,fp);
 
     fread(&cut_coul,sizeof(double),1,fp);
   }
 
   MPI_Bcast(&typeO,1,MPI_INT,0,world);
   MPI_Bcast(&typeH,1,MPI_INT,0,world);
   MPI_Bcast(&typeB,1,MPI_INT,0,world);
   MPI_Bcast(&typeA,1,MPI_INT,0,world);
   MPI_Bcast(&qdist,1,MPI_DOUBLE,0,world);
 
   MPI_Bcast(&cut_coul,1,MPI_DOUBLE,0,world);
 
   cut_coulsq = cut_coul * cut_coul;
   cut_coulsqplus = (cut_coul + 2.0*qdist) * (cut_coul + 2.0*qdist);
 }
 
 /* ----------------------------------------------------------------------
   compute position xM of fictitious charge site for O atom and 2 H atoms
   return it as xM
 ------------------------------------------------------------------------- */
 
 void PairTIP4PCut::compute_newsite(double *xO,  double *xH1,
                                         double *xH2, double *xM)
 {
   double delx1 = xH1[0] - xO[0];
   double dely1 = xH1[1] - xO[1];
   double delz1 = xH1[2] - xO[2];
   domain->minimum_image(delx1,dely1,delz1);
 
   double delx2 = xH2[0] - xO[0];
   double dely2 = xH2[1] - xO[1];
   double delz2 = xH2[2] - xO[2];
   domain->minimum_image(delx2,dely2,delz2);
 
   xM[0] = xO[0] + alpha * 0.5 * (delx1 + delx2);
   xM[1] = xO[1] + alpha * 0.5 * (dely1 + dely2);
   xM[2] = xO[2] + alpha * 0.5 * (delz1 + delz2);
 }
 
 /* ----------------------------------------------------------------------
    memory usage of hneigh
 ------------------------------------------------------------------------- */
 
 double PairTIP4PCut::memory_usage()
 {
   double bytes = maxeatom * sizeof(double);
   bytes += maxvatom*6 * sizeof(double);
   bytes += 2 * nmax * sizeof(double);
   return bytes;
 }
diff --git a/src/PERI/fix_peri_neigh.cpp b/src/PERI/fix_peri_neigh.cpp
index 922986c30..1b79a4c60 100644
--- a/src/PERI/fix_peri_neigh.cpp
+++ b/src/PERI/fix_peri_neigh.cpp
@@ -1,645 +1,645 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing authors: Mike Parks (SNL), Ezwanur Rahman, J.T. Foster (UTSA)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "fix_peri_neigh.h"
 #include "pair_peri_pmb.h"
 #include "pair_peri_lps.h"
 #include "pair_peri_ves.h"
 #include "pair_peri_eps.h"
 #include "atom.h"
 #include "domain.h"
 #include "force.h"
 #include "comm.h"
 #include "update.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "pair.h"
 #include "lattice.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
 /* ---------------------------------------------------------------------- */
 
 FixPeriNeigh::FixPeriNeigh(LAMMPS *lmp,int narg, char **arg) :
   Fix(lmp, narg, arg)
 {
   isPMB = isLPS = isVES = isEPS = 0;
   if (force->pair_match("peri/pmb",1)) isPMB = 1;
   if (force->pair_match("peri/lps",1)) isLPS = 1;
   if (force->pair_match("peri/ves",1)) isVES = 1;
   if (force->pair_match("peri/eps",1)) isEPS = 1;
   
   restart_global = 1;
   restart_peratom = 1;
   first = 1;
 
   // perform initial allocation of atom-based arrays
   // register with atom class
   // set maxpartner = 1 as placeholder
 
   maxpartner = 1;
   npartner = NULL;
   partner = NULL;
   deviatorextention = NULL;
   deviatorBackextention = NULL;
   deviatorPlasticextension = NULL;
   lambdaValue = NULL;
   r0 = NULL;
   vinter = NULL;
   wvolume = NULL;
 
   grow_arrays(atom->nmax);
   atom->add_callback(0);
   atom->add_callback(1);
 
   // initialize npartner to 0 so atom migration is OK the 1st time
 
   int nlocal = atom->nlocal;
   for (int i = 0; i < nlocal; i++) npartner[i] = 0;
 
   // set comm sizes needed by this fix
 
   comm_forward = 1;
 }
 
 /* ---------------------------------------------------------------------- */
 
 FixPeriNeigh::~FixPeriNeigh()
 {
   // unregister this fix so atom class doesn't invoke it any more
 
   atom->delete_callback(id,0);
   atom->delete_callback(id,1);
 
   // delete locally stored arrays
 
   memory->destroy(npartner);
   memory->destroy(partner);
   memory->destroy(deviatorextention);
   memory->destroy(deviatorBackextention);
   memory->destroy(deviatorPlasticextension);
   memory->destroy(lambdaValue);
   memory->destroy(r0);
   memory->destroy(vinter);
   memory->destroy(wvolume);
 }
 
 /* ---------------------------------------------------------------------- */
 
 int FixPeriNeigh::setmask()
 {
   int mask = 0;
   return mask;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixPeriNeigh::init()
 {
   if (!first) return;
 
   // need a full neighbor list once
 
-  int irequest = neighbor->request(this);
+  int irequest = neighbor->request(this,instance_me);
   neighbor->requests[irequest]->pair = 0;
   neighbor->requests[irequest]->fix  = 1;
   neighbor->requests[irequest]->half = 0;
   neighbor->requests[irequest]->full = 1;
   neighbor->requests[irequest]->occasional = 1;
 
   // compute PD scale factor, stored in Atom class, used by DumpCFG
 
   int nlocal = atom->nlocal;
   double vone = 0.0;
   for (int i = 0; i < nlocal; i++) vone += atom->vfrac[i];
   double vave;
   MPI_Allreduce(&vone,&vave,1,MPI_DOUBLE,MPI_SUM,world);
   if (atom->natoms) vave /= atom->natoms;
   if (vave > 0.0) atom->pdscale = 1.44 / pow(vave,1.0/3.0);
   else atom->pdscale = 1.0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixPeriNeigh::init_list(int id, NeighList *ptr)
 {
   list = ptr;
 }
 
 /* ----------------------------------------------------------------------
    For minimization: setup as with dynamics
 ------------------------------------------------------------------------- */
 
 void FixPeriNeigh::min_setup(int vflag)
 {
   setup(vflag);
 }
 
 /* ----------------------------------------------------------------------
    create initial list of neighbor partners via call to neighbor->build()
    must be done in setup (not init) since fix init comes before neigh init
 ------------------------------------------------------------------------- */
 
 void FixPeriNeigh::setup(int vflag)
 {
   int i,j,ii,jj,itype,jtype,inum,jnum;
   double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
   int *ilist,*jlist,*numneigh;
   int **firstneigh;
 
   double **x = atom->x;
   double *vfrac = atom->vfrac;
   int *type = atom->type;
   tagint *tag = atom->tag;
   int nlocal = atom->nlocal;
 
  // only build list of bonds on very first run
 
   if (!first) return;
   first = 0;
 
   // build full neighbor list, will copy or build as necessary
 
   neighbor->build_one(list);
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // scan neighbor list to set maxpartner
 
   Pair *anypair = force->pair_match("peri",0);
   double **cutsq = anypair->cutsq;
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
       if (rsq <= cutsq[itype][jtype]) npartner[i]++;
     }
   }
 
   maxpartner = 0;
   for (i = 0; i < nlocal; i++) maxpartner = MAX(maxpartner,npartner[i]);
   int maxall;
   MPI_Allreduce(&maxpartner,&maxall,1,MPI_INT,MPI_MAX,world);
   maxpartner = maxall;
 
   // realloc arrays with correct value for maxpartner
 
   memory->destroy(partner);
   memory->destroy(deviatorextention);
   memory->destroy(deviatorBackextention);
   memory->destroy(deviatorPlasticextension);
   memory->destroy(lambdaValue);  
   memory->destroy(r0);
   memory->destroy(npartner);
 
   npartner = NULL;
   partner = NULL;
   deviatorextention = NULL;
   deviatorBackextention = NULL;
   deviatorPlasticextension = NULL;
   lambdaValue = NULL;
   r0 = NULL;   
   grow_arrays(atom->nmax);
 
   // create partner list and r0 values from neighbor list
   // compute vinter for each atom
 
   for (i = 0; i < nlocal; i++) {
     npartner[i] = 0;
     vinter[i] = 0.0;
     wvolume[i] = 0.0;
     if (isEPS) lambdaValue[i] = 0.0;
   }
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq <= cutsq[itype][jtype]) {
         partner[i][npartner[i]] = tag[j];
         if (isVES)
           deviatorextention[i][npartner[i]] = 
             deviatorBackextention[i][npartner[i]] = 0.0;
         if (isEPS)
            deviatorPlasticextension[i][npartner[i]] = 0.0;
         r0[i][npartner[i]] = sqrt(rsq);   
         npartner[i]++;
         vinter[i] += vfrac[j];
       }
     }
   }
 
   // sanity check: does any atom appear twice in any neigborlist?
   // should only be possible if using pbc and domain < 2*delta
 
   if (domain->xperiodic || domain->yperiodic || domain->zperiodic) {
     for (i = 0; i < nlocal; i++) {
       jnum = npartner[i];
       for (jj = 0; jj < jnum; jj++) {
         for (int kk = jj+1; kk < jnum; kk++) {
           if (partner[i][jj] == partner[i][kk])
             error->one(FLERR,"Duplicate particle in PeriDynamic bond - "
                        "simulation box is too small");
         }
       }
     }
   }
 
   // compute wvolume for each atom
 
   double **x0 = atom->x0;
   double half_lc = 0.5*(domain->lattice->xlattice);
   double vfrac_scale;
   PairPeriLPS *pairlps = static_cast<PairPeriLPS*>(anypair);
   PairPeriVES *pairves = static_cast<PairPeriVES*>(anypair);
   PairPeriEPS *paireps = static_cast<PairPeriEPS*>(anypair);
 
   for (i = 0; i < nlocal; i++) {
     double xtmp0 = x0[i][0];
     double ytmp0 = x0[i][1];
     double ztmp0 = x0[i][2];
     jnum = npartner[i];
     itype = type[i];
 
     // loop over partners of particle i
 
     for (jj = 0; jj < jnum; jj++) {
 
       // if bond already broken, skip this partner
 
       if (partner[i][jj] == 0) continue;
 
       // lookup local index of partner particle
 
       j = atom->map(partner[i][jj]);
 
       // skip if particle is "lost"
 
       if (j < 0) continue;
 
       double delx0 = xtmp0 - x0[j][0];
       double dely0 = ytmp0 - x0[j][1];
       double delz0 = ztmp0 - x0[j][2];
             
       double rsq0 = delx0*delx0 + dely0*dely0 + delz0*delz0;
 
       jtype = type[j];
       double delta = sqrt(cutsq[itype][jtype]);
 
       // scale vfrac[j] if particle j near the horizon
 
       if ((fabs(r0[i][jj] - delta)) <= half_lc)
         vfrac_scale = (-1.0/(2*half_lc))*(r0[i][jj]) +
           (1.0 + ((delta - half_lc)/(2*half_lc) ) );
       else vfrac_scale = 1.0;
 
       // for PMB, influence = 1.0, otherwise invoke influence function
       if (isPMB) 
         wvolume[i] += 1.0 * rsq0 * vfrac[j] * vfrac_scale; 
       else if (isLPS)
         wvolume[i] += pairlps->influence_function(delx0,dely0,delz0) *
           rsq0 * vfrac[j] * vfrac_scale;
       else if (isVES)
         wvolume[i] += pairves->influence_function(delx0,dely0,delz0) *
           rsq0 * vfrac[j] * vfrac_scale;
       else if (isEPS)
         wvolume[i] += paireps->influence_function(delx0,dely0,delz0) *
           rsq0 * vfrac[j] * vfrac_scale;    
     }
   }
 
   // communicate wvolume to ghosts
 
   comm->forward_comm_fix(this);
 
   // bond statistics
 
   int n = 0;
   for (i = 0; i < nlocal; i++) n += npartner[i];
   int nall;
   MPI_Allreduce(&n,&nall,1,MPI_INT,MPI_SUM,world);
 
   if (comm->me == 0) {
     if (screen) {
       fprintf(screen,"Peridynamic bonds:\n");
       fprintf(screen,"  total # of bonds = %d\n",nall);
       fprintf(screen,"  bonds/atom = %g\n",(double)nall/atom->natoms);
     }
     if (logfile) {
       fprintf(logfile,"Peridynamic bonds:\n");
       fprintf(logfile,"  total # of bonds = %d\n",nall);
       fprintf(logfile,"  bonds/atom = %g\n",(double)nall/atom->natoms);
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    memory usage of local atom-based arrays
 ------------------------------------------------------------------------- */
 
 double FixPeriNeigh::memory_usage()
 { 
   int nmax = atom->nmax;
   int bytes = nmax * sizeof(int);
   bytes += nmax*maxpartner * sizeof(tagint);
   bytes += nmax*maxpartner * sizeof(double);
   if (isVES) {
     bytes += nmax*maxpartner * sizeof(double);
     bytes += nmax*maxpartner * sizeof(double);
   }  
   if (isEPS) {
     bytes += nmax*maxpartner * sizeof(double);
     bytes += nmax * sizeof(double);
   }  
   bytes += nmax * sizeof(double);
   bytes += nmax * sizeof(double);
   return bytes; 
 }
 
 /* ----------------------------------------------------------------------
    allocate local atom-based arrays
 ------------------------------------------------------------------------- */
 
 void FixPeriNeigh::grow_arrays(int nmax)
 {
    memory->grow(npartner,nmax,"peri_neigh:npartner");
    memory->grow(partner,nmax,maxpartner,"peri_neigh:partner");
    if (isVES) {
      memory->grow(deviatorextention,nmax,maxpartner,
                   "peri_neigh:deviatorextention");
      memory->grow(deviatorBackextention,nmax,maxpartner,
                   "peri_neigh:deviatorBackextention");
    }
    if (isEPS) memory->grow(deviatorPlasticextension,nmax,maxpartner,
                            "peri_neigh:deviatorPlasticextension");
    memory->grow(r0,nmax,maxpartner,"peri_neigh:r0");
    if (isEPS) memory->grow(lambdaValue,nmax,"peri_neigh:lambdaValue");   
    memory->grow(vinter,nmax,"peri_neigh:vinter");
    memory->grow(wvolume,nmax,"peri_neigh:wvolume");
 }
 
 /* ----------------------------------------------------------------------
    copy values within local atom-based arrays
 ------------------------------------------------------------------------- */
 
 void FixPeriNeigh::copy_arrays(int i, int j, int delflag)
 {
   npartner[j] = npartner[i];
   for (int m = 0; m < npartner[j]; m++) {
     partner[j][m] = partner[i][m];
     if (isVES) {
       deviatorextention[j][m] = deviatorextention[i][m];
       deviatorBackextention[j][m] = deviatorBackextention[i][m];
     }  
     if (isEPS)
       deviatorPlasticextension[j][m] = deviatorPlasticextension[i][m];
     r0[j][m] = r0[i][m];
   }
   if (isEPS) lambdaValue[j] = lambdaValue[i];
   vinter[j] = vinter[i];
   wvolume[j] = wvolume[i];
 }
 
 /* ----------------------------------------------------------------------
    pack values in local atom-based arrays for exchange with another proc
 ------------------------------------------------------------------------- */
 
 int FixPeriNeigh::pack_exchange(int i, double *buf)
 {
   // compact list by eliminating partner = 0 entries
   // set buf[0] after compaction
 
   int m = 1;
   for (int n = 0; n < npartner[i]; n++) {
     if (partner[i][n] == 0) continue;
     buf[m++] = partner[i][n];
     if (isVES) {
       buf[m++] = deviatorextention[i][n];
       buf[m++] = deviatorBackextention[i][n];
     } 
     if (isEPS) buf[m++] = deviatorPlasticextension[i][n];
     buf[m++] = r0[i][n];
   }
   if (isVES) buf[0] = m/4;
   else if (isEPS) buf[0] = m/3;
   else buf[0] = m/2;
   if (isEPS) buf[m++] = lambdaValue[i]; 
   buf[m++] = vinter[i];
   buf[m++] = wvolume[i];
   return m;
 }
 
 /* ----------------------------------------------------------------------
    unpack values in local atom-based arrays from exchange with another proc
 ------------------------------------------------------------------------- */
 
 int FixPeriNeigh::unpack_exchange(int nlocal, double *buf)
 {
   int m = 0;
   npartner[nlocal] = static_cast<int> (buf[m++]);
   for (int n = 0; n < npartner[nlocal]; n++) {
     partner[nlocal][n] = static_cast<tagint> (buf[m++]);
     if (isVES) {   
       deviatorextention[nlocal][n] = buf[m++];
       deviatorBackextention[nlocal][n] = buf[m++];
     }
     if (isEPS) deviatorPlasticextension[nlocal][n] = buf[m++];
     r0[nlocal][n] = buf[m++];     
   }
   if (isEPS) lambdaValue[nlocal] = buf[m++];
   vinter[nlocal] = buf[m++];
   wvolume[nlocal] = buf[m++];
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 int FixPeriNeigh::pack_forward_comm(int n, int *list, double *buf,
                                     int pbc_flag, int *pbc)
 {
   int i,j,m;
 
   m = 0;
   for (i = 0; i < n; i++) {
     j = list[i];
     buf[m++] = wvolume[j];
   }
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixPeriNeigh::unpack_forward_comm(int n, int first, double *buf)
 {
   int i,m,last;
 
   m = 0;
   last = first + n;
   for (i = first; i < last; i++)
     wvolume[i] = buf[m++];
 }
 
 /* ----------------------------------------------------------------------
    pack entire state of Fix into one write
 ------------------------------------------------------------------------- */
 
 void FixPeriNeigh::write_restart(FILE *fp)
 {
   int n = 0;
   double list[2];
   list[n++] = first;
   list[n++] = maxpartner;
 
   if (comm->me == 0) {
     int size = n * sizeof(double);
     fwrite(&size,sizeof(int),1,fp);
     fwrite(list,sizeof(double),n,fp);
   }
 }
 
 /* ----------------------------------------------------------------------
    use state info from restart file to restart the Fix
 ------------------------------------------------------------------------- */
 
 void FixPeriNeigh::restart(char *buf)
 {
   int n = 0;
   double *list = (double *) buf;
 
   first = static_cast<int> (list[n++]);
   maxpartner = static_cast<int> (list[n++]);
 
   // grow 2D arrays now, cannot change size of 2nd array index later
 
   grow_arrays(atom->nmax);
 }
 
 /* ----------------------------------------------------------------------
    pack values in local atom-based arrays for restart file
 ------------------------------------------------------------------------- */
 
 int FixPeriNeigh::pack_restart(int i, double *buf)
 {
   int m = 0;
   if (isVES) buf[m++] = 4*npartner[i] + 4;
   else if (isEPS) buf[m++] = 3*npartner[i] + 5;
   else buf[m++] = 2*npartner[i] + 4;
   buf[m++] = npartner[i];
   for (int n = 0; n < npartner[i]; n++) {
     buf[m++] = partner[i][n];
     if (isVES) { 
       buf[m++] = deviatorextention[i][n];
       buf[m++] = deviatorBackextention[i][n];
     }  
     if (isEPS) buf[m++] = deviatorPlasticextension[i][n];
     buf[m++] = r0[i][n];
   }
   if (isEPS) buf[m++] = lambdaValue[i];
   buf[m++] = vinter[i];
   buf[m++] = wvolume[i];
   return m;  
 }
 
 /* ----------------------------------------------------------------------
    unpack values from atom->extra array to restart the fix
 ------------------------------------------------------------------------- */
 
 void FixPeriNeigh::unpack_restart(int nlocal, int nth)
 {
 
   double **extra = atom->extra;
 
   // skip to Nth set of extra values
 
   int m = 0;
   for (int i = 0; i < nth; i++) m += static_cast<int> (extra[nlocal][m]);
   m++;
 
   npartner[nlocal] = static_cast<int> (extra[nlocal][m++]);
   for (int n = 0; n < npartner[nlocal]; n++) {
     partner[nlocal][n] = static_cast<tagint> (extra[nlocal][m++]);
     if (isVES) { 
       deviatorextention[nlocal][n] = extra[nlocal][m++];
       deviatorBackextention[nlocal][n] = extra[nlocal][m++];
     }  
     if (isEPS) deviatorPlasticextension[nlocal][n] = extra[nlocal][m++];
     r0[nlocal][n] = extra[nlocal][m++];
   }
   if (isEPS) lambdaValue[nlocal] = extra[nlocal][m++];
   vinter[nlocal] = extra[nlocal][m++];
   wvolume[nlocal] = extra[nlocal][m++];  
 }
 
 /* ----------------------------------------------------------------------
    maxsize of any atom's restart data
 ------------------------------------------------------------------------- */
 
 int FixPeriNeigh::maxsize_restart()
 {
   if (isVES) return 4*maxpartner + 4;
   if (isEPS) return 3*maxpartner + 5;
   return 2*maxpartner + 4;  
 }
 
 /* ----------------------------------------------------------------------
    size of atom nlocal's restart data
 ------------------------------------------------------------------------- */
 
 int FixPeriNeigh::size_restart(int nlocal)
 {
   if (isVES) return 4*npartner[nlocal] + 4;
   if (isEPS) return 3*npartner[nlocal] + 5;
   return 2*npartner[nlocal] + 4; 
 }
diff --git a/src/PERI/pair_peri_eps.cpp b/src/PERI/pair_peri_eps.cpp
index b3f701ea1..1d9361a36 100644
--- a/src/PERI/pair_peri_eps.cpp
+++ b/src/PERI/pair_peri_eps.cpp
@@ -1,826 +1,826 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Rezwanur Rahman, John Foster (UTSA)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_peri_eps.h"
 #include "atom.h"
 #include "domain.h"
 #include "lattice.h"
 #include "force.h"
 #include "update.h"
 #include "modify.h"
 #include "fix.h"
 #include "fix_peri_neigh.h"
 #include "comm.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "memory.h"
 #include "error.h"
 #include "update.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
 PairPeriEPS::PairPeriEPS(LAMMPS *lmp) : Pair(lmp)
 {
   for (int i = 0; i < 6; i++) virial[i] = 0.0;
   no_virial_fdotr_compute = 1;
   single_enable = 0;
 
   ifix_peri = -1;
 
   nmax = 0;
   s0_new = NULL;
   theta = NULL;
 
   bulkmodulus = NULL;
   shearmodulus = NULL;
   s00 = alpha = NULL;
   cut = NULL;
   m_yieldstress = NULL;
 
   // set comm size needed by this Pair
   // comm_reverse not needed
 
   comm_forward = 1;
 }
 
 /* ---------------------------------------------------------------------- */
 
 PairPeriEPS::~PairPeriEPS()
 {
   if (ifix_peri >= 0) modify->delete_fix("PERI_NEIGH");
 
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
     memory->destroy(bulkmodulus);
     memory->destroy(shearmodulus);
     memory->destroy(s00);
     memory->destroy(alpha);
     memory->destroy(cut);
     memory->destroy(m_yieldstress);
     memory->destroy(theta);
     memory->destroy(s0_new);
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairPeriEPS::compute(int eflag, int vflag)
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz;
   double xtmp0,ytmp0,ztmp0,delx0,dely0,delz0,rsq0;
   double rsq,r,dr,rk,rkNew,evdwl,fpair,fbond;
   double fbondElastoPlastic,fbondFinal;
   double deltalambda,edpNp1;
   int *ilist,*jlist,*numneigh,**firstneigh;
   double d_ij,delta,stretch;
 
   evdwl = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = eflag_global = eflag_atom = 0;
 
   double **f = atom->f;
   double **x = atom->x;
   int *type = atom->type;
   int nlocal = atom->nlocal;
 
   double *vfrac = atom->vfrac;
   double *s0 = atom->s0;
   double **x0 = atom->x0;
   double **r0 = ((FixPeriNeigh *) modify->fix[ifix_peri])->r0;
   double **deviatorPlasticextension = 
     ((FixPeriNeigh *) modify->fix[ifix_peri])->deviatorPlasticextension;
   tagint **partner = ((FixPeriNeigh *) modify->fix[ifix_peri])->partner;
   int *npartner = ((FixPeriNeigh *) modify->fix[ifix_peri])->npartner;
   double *wvolume = ((FixPeriNeigh *) modify->fix[ifix_peri])->wvolume;
   double *lambdaValue = ((FixPeriNeigh *) modify->fix[ifix_peri])->lambdaValue;
 
   // lc = lattice constant
   // init_style guarantees it's the same in x, y, and z
 
   double lc = domain->lattice->xlattice;
   double half_lc = 0.5*lc;
   double vfrac_scale = 1.0;
 
   // short-range forces
 
   int newton_pair = force->newton_pair;
   int periodic = domain->xperiodic || domain->yperiodic || domain->zperiodic;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
   // need minimg() for x0 difference since not ghosted
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     xtmp0 = x0[i][0];
     ytmp0 = x0[i][1];
     ztmp0 = x0[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
     
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
 
       rsq = delx*delx + dely*dely + delz*delz;
       delx0 = xtmp0 - x0[j][0];
       dely0 = ytmp0 - x0[j][1];
       delz0 = ztmp0 - x0[j][2];
       if (periodic) domain->minimum_image(delx0,dely0,delz0);
       rsq0 = delx0*delx0 + dely0*dely0 + delz0*delz0;
       jtype = type[j];
 
       r = sqrt(rsq);
 
       // short-range interaction distance based on initial particle position
       // 0.9 and 1.35 are constants
 
       d_ij = MIN(0.9*sqrt(rsq0),1.35*lc);
 
       // short-range contact forces
       // 15 is constant taken from the EMU Theory Manual
       // Silling, 12 May 2005, p 18
 
       if (r < d_ij) {
         dr = r - d_ij;
 
         // kshort based upon short-range force constant
         // of the bond-based theory used in PMB model
 
         double kshort = (15.0 * 18.0 * bulkmodulus[itype][itype]) /
           (3.141592653589793 * cutsq[itype][jtype] * cutsq[itype][jtype]);
         rk = (kshort * vfrac[j]) * (dr / cut[itype][jtype]);
 
         if (r > 0.0) fpair = -(rk/r);
         else fpair = 0.0;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
         if (newton_pair || j < nlocal) {
           f[j][0] -= delx*fpair;
           f[j][1] -= dely*fpair;
           f[j][2] -= delz*fpair;
         }
 
         if (eflag) evdwl = 0.5*rk*dr;
         if (evflag) ev_tally(i,j,nlocal,newton_pair,evdwl,0.0,
                              fpair*vfrac[i],delx,dely,delz);
       }
     }
   }
 
   // grow bond forces array if necessary
   
   int  maxpartner = 0;
   for (i = 0; i < nlocal; i++) maxpartner = MAX(maxpartner,npartner[i]);
   
 
   if (atom->nmax > nmax) {
     memory->destroy(s0_new);
     memory->destroy(theta);
     nmax = atom->nmax;
     memory->create(s0_new,nmax,"pair:s0_new");
     memory->create(theta,nmax,"pair:theta");
     
   }
   
   // ******** temp array to store Plastic extension *********** ///
   // create on heap to reduce stack use and to allow for faster zeroing
   double **deviatorPlasticExtTemp;
   memory->create(deviatorPlasticExtTemp,nlocal,maxpartner,"pair:plastext");
   memset(&(deviatorPlasticExtTemp[0][0]),0,sizeof(double)*nlocal*maxpartner);
   // ******** temp array to store Plastic extension *********** ///
                  
    
 
   // compute the dilatation on each particle
   compute_dilatation();
   
   // communicate dilatation (theta) of each particle
   comm->forward_comm_pair(this);
 
   // communicate weighted volume (wvolume) upon every reneighbor
 
   if (neighbor->ago == 0)
     comm->forward_comm_fix(modify->fix[ifix_peri]);
 
   // volume-dependent part of the energy
 
   if (eflag) {
     for (i = 0; i < nlocal; i++) {
       itype = type[i];
       if (eflag_global)
         eng_vdwl += 0.5 * bulkmodulus[itype][itype] * (theta[i] * theta[i]);
       if (eflag_atom)
         eatom[i] += 0.5 * bulkmodulus[itype][itype] * (theta[i] * theta[i]);
     }
   }
 
   // loop over my particles and their partners
   // partner list contains all bond partners, so I-J appears twice
   // if bond already broken, skip this partner
   // first = true if this is first neighbor of particle i
 
   bool first;
   double omega_minus, omega_plus;
 
   for (i = 0; i < nlocal; i++) {
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     xtmp0 = x0[i][0];
     ytmp0 = x0[i][1];
     ztmp0 = x0[i][2];
     itype = type[i];
     jnum = npartner[i];
     first = true;
         
 
     double yieldStress = m_yieldstress[itype][itype];
     double horizon = cut[itype][itype];
     double tdnorm = compute_DeviatoricForceStateNorm(i);
     double pointwiseYieldvalue = 25.0 * yieldStress * 
                             yieldStress / 8 / M_PI / pow(horizon,5);
                                               
         
     double fsurf = (tdnorm * tdnorm)/2 - pointwiseYieldvalue;
     bool elastic = true;
 
     double alphavalue = (15 * shearmodulus[itype][itype]) /wvolume[i];
        
       
     if (fsurf>0) {
       elastic = false;
       deltalambda = ((tdnorm /sqrt(2.0 * pointwiseYieldvalue)) - 1.0) / alphavalue;
       double templambda = lambdaValue[i];
       lambdaValue[i] = templambda + deltalambda;
     } 
 
     for (jj = 0; jj < jnum; jj++) {
       if (partner[i][jj] == 0) continue;
       j = atom->map(partner[i][jj]);
        // check if lost a partner without first breaking bond
 
       if (j < 0) {
         partner[i][jj] = 0;
         continue;
       }
 
       // compute force density, add to PD equation of motion
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       if (periodic) domain->minimum_image(delx,dely,delz);
       rsq = delx*delx + dely*dely + delz*delz;
       delx0 = xtmp0 - x0[j][0];
       dely0 = ytmp0 - x0[j][1];
       delz0 = ztmp0 - x0[j][2];
       if (periodic) domain->minimum_image(delx0,dely0,delz0);
       jtype = type[j];
       delta = cut[itype][jtype];
       r = sqrt(rsq);
       dr = r - r0[i][jj];
 
       // avoid roundoff errors
 
       if (fabs(dr) < 2.2204e-016) {
           dr = 0.0;
       }    
 
       // scale vfrac[j] if particle j near the horizon
 
       if ((fabs(r0[i][jj] - delta)) <= half_lc)
         vfrac_scale = (-1.0/(2*half_lc))*(r0[i][jj]) +
           (1.0 + ((delta - half_lc)/(2*half_lc) ) );
       else vfrac_scale = 1.0;
     
       omega_plus  = influence_function(-1.0*delx0,-1.0*dely0,-1.0*delz0);
       omega_minus = influence_function(delx0,dely0,delz0);
              
       //Elastic Part            
       rk = ((3.0 * bulkmodulus[itype][itype]) * ( (omega_plus * theta[i] / wvolume[i]) +
          ( omega_minus * theta[j] / wvolume[j] ) ) ) * r0[i][jj];
         
       if (r > 0.0) fbond = -((rk/r) * vfrac[j] * vfrac_scale);
       else fbond = 0.0;
       
       //Plastic part
               
       double deviatoric_extension = dr - (theta[i]* r0[i][jj] / 3.0);
       edpNp1 = deviatorPlasticextension[i][jj];
   
       double tdtrialValue = ( 15 * shearmodulus[itype][itype]) *
         ( (omega_plus / wvolume[i]) + (omega_minus / wvolume[j]) ) * 
            (deviatoric_extension - edpNp1);    
          
       if(elastic) {
         rkNew = tdtrialValue;
       }
       else {
         rkNew = (sqrt(2.0*pointwiseYieldvalue) * tdtrialValue) / tdnorm;
         deviatorPlasticExtTemp[i][jj] = edpNp1 + rkNew * deltalambda;
       }  
       
                        
       if (r > 0.0) fbondElastoPlastic = -((rkNew/r) * vfrac[j] * vfrac_scale);
       else fbondElastoPlastic = 0.0;
 
 
       // total Force state: elastic +  plastic
       fbondFinal=fbond+fbondElastoPlastic;
       fbond=fbondFinal;
       
    
       f[i][0] += delx*fbond;
       f[i][1] += dely*fbond;
       f[i][2] += delz*fbond;
       
 
       // since I-J is double counted, set newton off & use 1/2 factor and I,I
 
       if (eflag) evdwl =  (0.5 * 15 * shearmodulus[itype][itype]/wvolume[i] *
                        omega_plus * (deviatoric_extension - edpNp1) * 
                       (deviatoric_extension-edpNp1)) * vfrac[j] * vfrac_scale;
       if (evflag) ev_tally(i,i,nlocal,0,0.5*evdwl,0.0,
                            0.5*fbond*vfrac[i],delx,dely,delz);
 
       // find stretch in bond I-J and break if necessary
       // use s0 from previous timestep
 
 
       stretch = dr / r0[i][jj];
       if (stretch > MIN(s0[i],s0[j])) partner[i][jj] = 0;
 
       // update s0 for next timestep
 
       if (first)
          s0_new[i] = s00[itype][jtype] - (alpha[itype][jtype] * stretch);
       else
          s0_new[i] = MAX(s0_new[i],s00[itype][jtype] -
                          (alpha[itype][jtype] * stretch));
 
       first = false;
     }    
   }
 
   // store new s0
 
   memcpy(s0,s0_new,sizeof(double)*nlocal);
   
   memcpy(&(deviatorPlasticextension[0][0]),
          &(deviatorPlasticExtTemp[0][0]),
          sizeof(double)*nlocal*maxpartner);
   memory->destroy(deviatorPlasticExtTemp);
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairPeriEPS::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
   memory->create(bulkmodulus,n+1,n+1,"pair:bulkmodulus");
   memory->create(shearmodulus,n+1,n+1,"pair:shearmodulus");
   memory->create(s00,n+1,n+1,"pair:s00");
   memory->create(alpha,n+1,n+1,"pair:alpha");
   memory->create(cut,n+1,n+1,"pair:cut");
   memory->create(m_yieldstress,n+1,n+1,"pair:m_yieldstress");
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairPeriEPS::settings(int narg, char **arg)
 {
   if (narg) error->all(FLERR,"Illegal pair_style command");
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairPeriEPS::coeff(int narg, char **arg)
 {
   if (narg != 8) error->all(FLERR,"Incorrect args for pair coefficients");
   if (!allocated) allocate();
 
   int ilo,ihi,jlo,jhi;
   force->bounds(arg[0],atom->ntypes,ilo,ihi);
   force->bounds(arg[1],atom->ntypes,jlo,jhi);
 
   double bulkmodulus_one = atof(arg[2]);
   double shearmodulus_one = atof(arg[3]);
   double cut_one = atof(arg[4]);
   double s00_one = atof(arg[5]);
   double alpha_one = atof(arg[6]);
   double myieldstress_one = atof(arg[7]);
 
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
     for (int j = MAX(jlo,i); j <= jhi; j++) {
       bulkmodulus[i][j] = bulkmodulus_one;
       shearmodulus[i][j] = shearmodulus_one;
       cut[i][j] = cut_one;
       s00[i][j] = s00_one;
       alpha[i][j] = alpha_one;
       m_yieldstress[i][j] = myieldstress_one;
       setflag[i][j] = 1;
       count++;
     }
   }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairPeriEPS::init_one(int i, int j)
 {
   if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set");
 
   bulkmodulus[j][i] = bulkmodulus[i][j];
   shearmodulus[j][i] = shearmodulus[i][j];
   s00[j][i] = s00[i][j];
   alpha[j][i] = alpha[i][j];
   cut[j][i] = cut[i][j];
   m_yieldstress[j][i] = m_yieldstress[i][j];
   return cut[i][j];
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairPeriEPS::init_style()
 {
   // error checks
 
   if (!atom->peri_flag) 
     error->all(FLERR,"Pair style peri requires atom style peri");
   if (atom->map_style == 0)
     error->all(FLERR,"Pair peri requires an atom map, see atom_modify");
 
   if (domain->lattice == NULL)
     error->all(FLERR,"Pair peri requires a lattice be defined");
   if (domain->lattice->xlattice != domain->lattice->ylattice ||
       domain->lattice->xlattice != domain->lattice->zlattice ||
       domain->lattice->ylattice != domain->lattice->zlattice)
     error->all(FLERR,"Pair peri lattice is not identical in x, y, and z");
 
   // if first init, create Fix needed for storing fixed neighbors
 
   if (ifix_peri == -1) {
     char **fixarg = new char*[3];
     fixarg[0] = (char *) "PERI_NEIGH";
     fixarg[1] = (char *) "all";
     fixarg[2] = (char *) "PERI_NEIGH";
     modify->add_fix(3,fixarg);
     delete [] fixarg;
   }
 
   // find associated PERI_NEIGH fix that must exist
   // could have changed locations in fix list since created
 
   for (int i = 0; i < modify->nfix; i++)
     if (strcmp(modify->fix[i]->style,"PERI_NEIGH") == 0) ifix_peri = i;
   if (ifix_peri == -1) error->all(FLERR,"Fix peri neigh does not exist");
 
-  neighbor->request(this);
+  neighbor->request(this,instance_me);
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairPeriEPS::write_restart(FILE *fp)
 {
   int i,j;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       fwrite(&setflag[i][j],sizeof(int),1,fp);
       if (setflag[i][j]) {
         fwrite(&bulkmodulus[i][j],sizeof(double),1,fp);
         fwrite(&shearmodulus[i][j],sizeof(double),1,fp);
         fwrite(&s00[i][j],sizeof(double),1,fp);
         fwrite(&alpha[i][j],sizeof(double),1,fp);
         fwrite(&cut[i][j],sizeof(double),1,fp);
         fwrite(&m_yieldstress[i][j],sizeof(double),1,fp);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairPeriEPS::read_restart(FILE *fp)
 {
   allocate();
 
   int i,j;
   int me = comm->me;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
       MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
       if (setflag[i][j]) {
         if (me == 0) {
           fread(&bulkmodulus[i][j],sizeof(double),1,fp);
           fread(&shearmodulus[i][j],sizeof(double),1,fp);
           fread(&s00[i][j],sizeof(double),1,fp);
           fread(&alpha[i][j],sizeof(double),1,fp);
           fread(&cut[i][j],sizeof(double),1,fp);
           fread(&m_yieldstress[i][j],sizeof(double),1,fp);
         }
         MPI_Bcast(&bulkmodulus[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&shearmodulus[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&s00[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&alpha[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&cut[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&m_yieldstress[i][j],1,MPI_DOUBLE,0,world);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
    memory usage of local atom-based arrays
 ------------------------------------------------------------------------- */
 
 double PairPeriEPS::memory_usage()
 {
   double bytes = 2 * nmax * sizeof(double);
   return bytes;
 }
 
 /* ----------------------------------------------------------------------
    influence function definition
 ------------------------------------------------------------------------- */
 
 double PairPeriEPS::influence_function(double xi_x, double xi_y, double xi_z)
 {
   double r = sqrt(xi_x*xi_x + xi_y*xi_y + xi_z*xi_z);
   double omega;
   
   if (fabs(r) < 2.2204e-016)
     error->one(FLERR,"Divide by 0 in influence function");
   omega = 1.0/r;
   return omega;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairPeriEPS::compute_dilatation()
 {
   int i,j,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz;
   double xtmp0,ytmp0,ztmp0,delx0,dely0,delz0;
   double rsq,r,dr;
   double delta;
 
   double **x = atom->x;
   int *type = atom->type;
   double **x0 = atom->x0;
   int nlocal = atom->nlocal;
   double *vfrac = atom->vfrac;
   double vfrac_scale = 1.0;
 
   double lc = domain->lattice->xlattice;
   double half_lc = 0.5*lc;
 
   double **r0   = ((FixPeriNeigh *) modify->fix[ifix_peri])->r0;
   tagint **partner = ((FixPeriNeigh *) modify->fix[ifix_peri])->partner;
   int *npartner = ((FixPeriNeigh *) modify->fix[ifix_peri])->npartner;
   double *wvolume = ((FixPeriNeigh *) modify->fix[ifix_peri])->wvolume;
 
   int periodic = domain->xperiodic || domain->yperiodic || domain->zperiodic;
 
   // compute the dilatation theta
 
   for (i = 0; i < nlocal; i++) {
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     xtmp0 = x0[i][0];
     ytmp0 = x0[i][1];
     ztmp0 = x0[i][2];
     jnum = npartner[i];
     theta[i] = 0.0;
     itype = type[i];
 
     for (jj = 0; jj < jnum; jj++) {
 
       // if bond already broken, skip this partner
       if (partner[i][jj] == 0) continue;
 
       // look up local index of this partner particle
       j = atom->map(partner[i][jj]);
 
       // skip if particle is "lost"
       if (j < 0) continue;
 
       // compute force density and add to PD equation of motion
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       if (periodic) domain->minimum_image(delx,dely,delz);
       rsq = delx*delx + dely*dely + delz*delz;
       delx0 = xtmp0 - x0[j][0];
       dely0 = ytmp0 - x0[j][1];
       delz0 = ztmp0 - x0[j][2];
       if (periodic) domain->minimum_image(delx0,dely0,delz0);
 
       r = sqrt(rsq);
       dr = r - r0[i][jj];
       if (fabs(dr) < 2.2204e-016) dr = 0.0;
 
       jtype = type[j];
       delta = cut[itype][jtype];
 
       // scale vfrac[j] if particle j near the horizon
 
       if ((fabs(r0[i][jj] - delta)) <= half_lc)
         vfrac_scale = (-1.0/(2*half_lc))*(r0[i][jj]) +
           (1.0 + ((delta - half_lc)/(2*half_lc) ) );
       else vfrac_scale = 1.0;
 
       theta[i] += influence_function(delx0, dely0, delz0) * r0[i][jj] * dr *
         vfrac[j] * vfrac_scale;
 
     }
 
     // if wvolume[i] is zero, then particle i has no bonds
     // therefore, the dilatation is set to
 
     if (wvolume[i] != 0.0) theta[i] = (3.0/wvolume[i]) * theta[i];
     else theta[i] = 0;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairPeriEPS::compute_DeviatoricForceStateNorm(int i)
 {
   int j,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz;
   double xtmp0,ytmp0,ztmp0,delx0,dely0,delz0;
   double rsq,r,dr;
   double tdtrial;
   double norm = 0.0;
 
   double **x = atom->x;
   int *type = atom->type;
   double **x0 = atom->x0;
   double *vfrac = atom->vfrac;
 
   double lc = domain->lattice->xlattice;
   double half_lc = 0.5*lc;
 
   double **r0   = ((FixPeriNeigh *) modify->fix[ifix_peri])->r0;
   tagint **partner = ((FixPeriNeigh *) modify->fix[ifix_peri])->partner;
   int *npartner = ((FixPeriNeigh *) modify->fix[ifix_peri])->npartner;
   double *wvolume = ((FixPeriNeigh *) modify->fix[ifix_peri])->wvolume;
   double **deviatorPlasticextension = 
     ((FixPeriNeigh *) modify->fix[ifix_peri])->deviatorPlasticextension;
 
   int periodic = domain->xperiodic || domain->yperiodic || domain->zperiodic;
   
   // compute the dilatation theta
 
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     xtmp0 = x0[i][0];
     ytmp0 = x0[i][1];
     ztmp0 = x0[i][2];
     jnum = npartner[i];
     itype = type[i];
 
     for (jj = 0; jj < jnum; jj++) {
       if (partner[i][jj] == 0) continue;
       j = atom->map(partner[i][jj]);
        // check if lost a partner without first breaking bond
       if (j < 0) {
         partner[i][jj] = 0;
         continue;
       }     
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       if (periodic) domain->minimum_image(delx,dely,delz);
       rsq = delx*delx + dely*dely + delz*delz;
       delx0 = xtmp0 - x0[j][0];
       dely0 = ytmp0 - x0[j][1];
       delz0 = ztmp0 - x0[j][2];
       if (periodic) domain->minimum_image(delx0,dely0,delz0);
       r = sqrt(rsq);
       dr = r - r0[i][jj];
       if (fabs(dr) < 2.2204e-016) dr = 0.0;
       
       // scale vfrac[j] if particle j near the horizon
       double vfrac_scale;
       
       jtype = type[j];
       double delta = cut[itype][jtype];
 
       // scale vfrac[j] if particle j near the horizon
 
       if ((fabs(r0[i][jj] - delta)) <= half_lc)
         vfrac_scale = (-1.0/(2*half_lc))*(r0[i][jj]) +
           (1.0 + ((delta - half_lc)/(2*half_lc) ) );
       else vfrac_scale = 1.0;
       
       double ed = dr - (theta[i] * r0[i][jj])/3;
       double edPNP1 = deviatorPlasticextension[i][jj];
 
       jtype = type[j];
       delta = cut[itype][jtype];
       
       double omega_plus  = influence_function(-1.0*delx0,-1.0*dely0,-1.0*delz0);
       double omega_minus = influence_function(delx0,dely0,delz0);
       
       tdtrial = ( 15 * shearmodulus[itype][itype]) *
            ((omega_plus * theta[i] / wvolume[i]) +
              ( omega_minus * theta[j] / wvolume[j] ) ) * (ed - edPNP1);
           
       norm += tdtrial * tdtrial * vfrac[j] * vfrac_scale;   
     }
   return sqrt(norm);
 }
 
 
 /* ----------------------------------------------------------------------
    communication routines
 ---------------------------------------------------------------------- */
 
 int PairPeriEPS::pack_forward_comm(int n, int *list, double *buf,
                                    int pbc_flag, int *pbc)
 {
   int i,j,m;
 
   m = 0;
   for (i = 0; i < n; i++) {
     j = list[i];
     buf[m++] = theta[j];
   }
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairPeriEPS::unpack_forward_comm(int n, int first, double *buf)
 {
   int i,m,last;
 
   m = 0;
   last = first + n;
   for (i = first; i < last; i++) {
     theta[i] = buf[m++];
   }
 }
diff --git a/src/PERI/pair_peri_lps.cpp b/src/PERI/pair_peri_lps.cpp
index 8020b6b9b..00c7f3dc3 100644
--- a/src/PERI/pair_peri_lps.cpp
+++ b/src/PERI/pair_peri_lps.cpp
@@ -1,653 +1,653 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Mike Parks (SNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_peri_lps.h"
 #include "atom.h"
 #include "domain.h"
 #include "lattice.h"
 #include "force.h"
 #include "update.h"
 #include "modify.h"
 #include "fix.h"
 #include "fix_peri_neigh.h"
 #include "comm.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "memory.h"
 #include "error.h"
 #include "update.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
 PairPeriLPS::PairPeriLPS(LAMMPS *lmp) : Pair(lmp)
 {
   for (int i = 0; i < 6; i++) virial[i] = 0.0;
   no_virial_fdotr_compute = 1;
   single_enable = 0;
 
   ifix_peri = -1;
 
   nmax = 0;
   s0_new = NULL;
   theta = NULL;
 
   bulkmodulus = NULL;
   shearmodulus = NULL;
   s00 = alpha = NULL;
   cut = NULL;
 
   // set comm size needed by this Pair
   // comm_reverse not needed
 
   comm_forward = 1;  // for passing dilatation (theta)
 }
 
 /* ---------------------------------------------------------------------- */
 
 PairPeriLPS::~PairPeriLPS()
 {
   if (ifix_peri >= 0) modify->delete_fix("PERI_NEIGH");
 
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
     memory->destroy(bulkmodulus);
     memory->destroy(shearmodulus);
     memory->destroy(s00);
     memory->destroy(alpha);
     memory->destroy(cut);
     memory->destroy(theta);
     memory->destroy(s0_new);
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairPeriLPS::compute(int eflag, int vflag)
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz;
   double xtmp0,ytmp0,ztmp0,delx0,dely0,delz0,rsq0;
   double rsq,r,dr,rk,evdwl,fpair,fbond;
   int *ilist,*jlist,*numneigh,**firstneigh;
   double d_ij,delta,stretch;
 
   evdwl = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = eflag_global = eflag_atom = 0;
 
   double **f = atom->f;
   double **x = atom->x;
   int *type = atom->type;
   int nlocal = atom->nlocal;
 
   double *vfrac = atom->vfrac;
   double *s0 = atom->s0;
   double **x0 = atom->x0;
   double **r0   = ((FixPeriNeigh *) modify->fix[ifix_peri])->r0;
   tagint **partner = ((FixPeriNeigh *) modify->fix[ifix_peri])->partner;
   int *npartner = ((FixPeriNeigh *) modify->fix[ifix_peri])->npartner;
   double *wvolume = ((FixPeriNeigh *) modify->fix[ifix_peri])->wvolume;
 
   // lc = lattice constant
   // init_style guarantees it's the same in x, y, and z
 
   double lc = domain->lattice->xlattice;
   double half_lc = 0.5*lc;
   double vfrac_scale = 1.0;
 
   // short-range forces
 
   int newton_pair = force->newton_pair;
   int periodic = domain->xperiodic || domain->yperiodic || domain->zperiodic;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
   // need minimg() for x0 difference since not ghosted
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     xtmp0 = x0[i][0];
     ytmp0 = x0[i][1];
     ztmp0 = x0[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
 
       rsq = delx*delx + dely*dely + delz*delz;
       delx0 = xtmp0 - x0[j][0];
       dely0 = ytmp0 - x0[j][1];
       delz0 = ztmp0 - x0[j][2];
       if (periodic) domain->minimum_image(delx0,dely0,delz0);
       rsq0 = delx0*delx0 + dely0*dely0 + delz0*delz0;
       jtype = type[j];
 
       r = sqrt(rsq);
 
       // short-range interaction distance based on initial particle position
       // 0.9 and 1.35 are constants
 
       d_ij = MIN(0.9*sqrt(rsq0),1.35*lc);
 
       // short-range contact forces
       // 15 is constant taken from the EMU Theory Manual
       // Silling, 12 May 2005, p 18
 
       if (r < d_ij) {
         dr = r - d_ij;
 
         // kshort based upon short-range force constant
         // of the bond-based theory used in PMB model
 
         double kshort = (15.0 * 18.0 * bulkmodulus[itype][itype]) /
           (3.141592653589793 * cutsq[itype][jtype] * cutsq[itype][jtype]);
         rk = (kshort * vfrac[j]) * (dr / cut[itype][jtype]);
 
         if (r > 0.0) fpair = -(rk/r);
         else fpair = 0.0;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
         if (newton_pair || j < nlocal) {
           f[j][0] -= delx*fpair;
           f[j][1] -= dely*fpair;
           f[j][2] -= delz*fpair;
         }
 
         if (eflag) evdwl = 0.5*rk*dr;
         if (evflag) ev_tally(i,j,nlocal,newton_pair,evdwl,0.0,
                              fpair*vfrac[i],delx,dely,delz);
       }
     }
   }
 
   // grow bond forces array if necessary
 
   if (atom->nmax > nmax) {
     memory->destroy(s0_new);
     memory->destroy(theta);
     nmax = atom->nmax;
     memory->create(s0_new,nmax,"pair:s0_new");
     memory->create(theta,nmax,"pair:theta");
   }
 
   // Compute the dilatation on each particle
   compute_dilatation();
 
   // communicate dilatation (theta) of each particle
   comm->forward_comm_pair(this);
   // communicate wighted volume (wvolume) upon every reneighbor
   if (neighbor->ago == 0)
     comm->forward_comm_fix(modify->fix[ifix_peri]);
 
   // Volume-dependent part of the energy
   if (eflag) {
     for (i = 0; i < nlocal; i++) {
       itype = type[i];
       if (eflag_global)
         eng_vdwl += 0.5 * bulkmodulus[itype][itype] * (theta[i] * theta[i]);
       if (eflag_atom)
         eatom[i] += 0.5 * bulkmodulus[itype][itype] * (theta[i] * theta[i]);
     }
   }
 
   // loop over my particles and their partners
   // partner list contains all bond partners, so I-J appears twice
   // if bond already broken, skip this partner
   // first = true if this is first neighbor of particle i
 
   bool first;
   double omega_minus, omega_plus;
 
   for (i = 0; i < nlocal; i++) {
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     xtmp0 = x0[i][0];
     ytmp0 = x0[i][1];
     ztmp0 = x0[i][2];
     itype = type[i];
     jnum = npartner[i];
     first = true;
 
     for (jj = 0; jj < jnum; jj++) {
       if (partner[i][jj] == 0) continue;
       j = atom->map(partner[i][jj]);
 
       // check if lost a partner without first breaking bond
 
       if (j < 0) {
         partner[i][jj] = 0;
         continue;
       }
 
       // compute force density, add to PD equation of motion
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       if (periodic) domain->minimum_image(delx,dely,delz);
       rsq = delx*delx + dely*dely + delz*delz;
       delx0 = xtmp0 - x0[j][0];
       dely0 = ytmp0 - x0[j][1];
       delz0 = ztmp0 - x0[j][2];
       if (periodic) domain->minimum_image(delx0,dely0,delz0);
       jtype = type[j];
       delta = cut[itype][jtype];
       r = sqrt(rsq);
       dr = r - r0[i][jj];
 
       // avoid roundoff errors
 
       if (fabs(dr) < 2.2204e-016) dr = 0.0;
 
       // scale vfrac[j] if particle j near the horizon
 
       if ((fabs(r0[i][jj] - delta)) <= half_lc)
         vfrac_scale = (-1.0/(2*half_lc))*(r0[i][jj]) +
           (1.0 + ((delta - half_lc)/(2*half_lc) ) );
       else vfrac_scale = 1.0;
 
       omega_plus  = influence_function(-1.0*delx0,-1.0*dely0,-1.0*delz0);
       omega_minus = influence_function(delx0,dely0,delz0);
       
       rk = ( (3.0 * bulkmodulus[itype][itype]) -
              (5.0 * shearmodulus[itype][itype]) ) * vfrac[j] * vfrac_scale *
         ( (omega_plus * theta[i] / wvolume[i]) +
           ( omega_minus * theta[j] / wvolume[j] ) ) * r0[i][jj];
       rk +=  15.0 * ( shearmodulus[itype][itype] * vfrac[j] * vfrac_scale ) *
         ( (omega_plus / wvolume[i]) + (omega_minus / wvolume[j]) ) * dr;
 
       if (r > 0.0) fbond = -(rk/r);
       else fbond = 0.0;
 
       f[i][0] += delx*fbond;
       f[i][1] += dely*fbond;
       f[i][2] += delz*fbond;
 
       // since I-J is double counted, set newton off & use 1/2 factor and I,I
 
       double deviatoric_extension = dr - (theta[i]* r0[i][jj] / 3.0);
                   
       
       if (eflag) evdwl = 0.5 * 15 * (shearmodulus[itype][itype]/wvolume[i]) *
                    omega_plus*(deviatoric_extension * deviatoric_extension) *
                    vfrac[j] * vfrac_scale;
       if (evflag) ev_tally(i,i,nlocal,0,0.5*evdwl,0.0,
                            0.5*fbond*vfrac[i],delx,dely,delz);
 
       // find stretch in bond I-J and break if necessary
       // use s0 from previous timestep
 
       stretch = dr / r0[i][jj];
       if (stretch > MIN(s0[i],s0[j])) partner[i][jj] = 0;
 
       // update s0 for next timestep
 
       if (first)
          s0_new[i] = s00[itype][jtype] - (alpha[itype][jtype] * stretch);
       else
          s0_new[i] = MAX(s0_new[i],s00[itype][jtype] -
                          (alpha[itype][jtype] * stretch));
 
       first = false;
     }      
   }
 
   // store new s0
   for (i = 0; i < nlocal; i++) s0[i] = s0_new[i];
 
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairPeriLPS::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
   memory->create(bulkmodulus,n+1,n+1,"pair:bulkmodulus");
   memory->create(shearmodulus,n+1,n+1,"pair:shearmodulus");
   memory->create(s00,n+1,n+1,"pair:s00");
   memory->create(alpha,n+1,n+1,"pair:alpha");
   memory->create(cut,n+1,n+1,"pair:cut");
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairPeriLPS::settings(int narg, char **arg)
 {
   if (narg) error->all(FLERR,"Illegal pair_style command");
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairPeriLPS::coeff(int narg, char **arg)
 {
   if (narg != 7) error->all(FLERR,"Incorrect args for pair coefficients");
   if (!allocated) allocate();
 
   int ilo,ihi,jlo,jhi;
   force->bounds(arg[0],atom->ntypes,ilo,ihi);
   force->bounds(arg[1],atom->ntypes,jlo,jhi);
 
   double bulkmodulus_one = force->numeric(FLERR,arg[2]);
   double shearmodulus_one = force->numeric(FLERR,arg[3]);
   double cut_one = force->numeric(FLERR,arg[4]);
   double s00_one = force->numeric(FLERR,arg[5]);
   double alpha_one = force->numeric(FLERR,arg[6]);
 
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
     for (int j = MAX(jlo,i); j <= jhi; j++) {
       bulkmodulus[i][j] = bulkmodulus_one;
       shearmodulus[i][j] = shearmodulus_one;
       cut[i][j] = cut_one;
       s00[i][j] = s00_one;
       alpha[i][j] = alpha_one;
       setflag[i][j] = 1;
       count++;
     }
   }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairPeriLPS::init_one(int i, int j)
 {
   if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set");
 
   bulkmodulus[j][i] = bulkmodulus[i][j];
   shearmodulus[j][i] = shearmodulus[i][j];
   s00[j][i] = s00[i][j];
   alpha[j][i] = alpha[i][j];
   cut[j][i] = cut[i][j];
 
   return cut[i][j];
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairPeriLPS::init_style()
 {
   // error checks
 
   if (!atom->peri_flag)
     error->all(FLERR,"Pair style peri requires atom style peri");
   if (atom->map_style == 0)
     error->all(FLERR,"Pair peri requires an atom map, see atom_modify");
 
   if (domain->lattice->xlattice != domain->lattice->ylattice ||
       domain->lattice->xlattice != domain->lattice->zlattice ||
       domain->lattice->ylattice != domain->lattice->zlattice)
     error->all(FLERR,"Pair peri lattice is not identical in x, y, and z");
 
   // if first init, create Fix needed for storing fixed neighbors
 
   if (ifix_peri == -1) {
     char **fixarg = new char*[3];
     fixarg[0] = (char *) "PERI_NEIGH";
     fixarg[1] = (char *) "all";
     fixarg[2] = (char *) "PERI_NEIGH";
     modify->add_fix(3,fixarg);
     delete [] fixarg;
   }
 
   // find associated PERI_NEIGH fix that must exist
   // could have changed locations in fix list since created
 
   for (int i = 0; i < modify->nfix; i++)
     if (strcmp(modify->fix[i]->style,"PERI_NEIGH") == 0) ifix_peri = i;
   if (ifix_peri == -1) error->all(FLERR,"Fix peri neigh does not exist");
 
-  neighbor->request(this);
+  neighbor->request(this,instance_me);
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairPeriLPS::write_restart(FILE *fp)
 {
   int i,j;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       fwrite(&setflag[i][j],sizeof(int),1,fp);
       if (setflag[i][j]) {
         fwrite(&bulkmodulus[i][j],sizeof(double),1,fp);
         fwrite(&shearmodulus[i][j],sizeof(double),1,fp);
         fwrite(&s00[i][j],sizeof(double),1,fp);
         fwrite(&alpha[i][j],sizeof(double),1,fp);
         fwrite(&cut[i][j],sizeof(double),1,fp);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairPeriLPS::read_restart(FILE *fp)
 {
   allocate();
 
   int i,j;
   int me = comm->me;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
       MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
       if (setflag[i][j]) {
         if (me == 0) {
           fread(&bulkmodulus[i][j],sizeof(double),1,fp);
           fread(&shearmodulus[i][j],sizeof(double),1,fp);
           fread(&s00[i][j],sizeof(double),1,fp);
           fread(&alpha[i][j],sizeof(double),1,fp);
           fread(&cut[i][j],sizeof(double),1,fp);
         }
         MPI_Bcast(&bulkmodulus[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&shearmodulus[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&s00[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&alpha[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&cut[i][j],1,MPI_DOUBLE,0,world);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
    memory usage of local atom-based arrays
 ------------------------------------------------------------------------- */
 
 double PairPeriLPS::memory_usage()
 {
   double bytes = 2 * nmax * sizeof(double);
   return bytes;
 }
 
 /* ----------------------------------------------------------------------
    influence function definition
 ------------------------------------------------------------------------- */
 
 double PairPeriLPS::influence_function(double xi_x, double xi_y, double xi_z)
 {
   double r = sqrt(xi_x*xi_x + xi_y*xi_y + xi_z*xi_z);
   double omega;
 
   if (fabs(r) < 2.2204e-016)
     error->one(FLERR,"Divide by 0 in influence function of pair peri/lps");
   omega = 1.0/r;
   return omega;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairPeriLPS::compute_dilatation()
 {
   int i,j,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz;
   double xtmp0,ytmp0,ztmp0,delx0,dely0,delz0;
   double rsq,r,dr;
   double delta;
 
   double **x = atom->x;
   int *type = atom->type;
   double **x0 = atom->x0;
   int nlocal = atom->nlocal;
   double *vfrac = atom->vfrac;
   double vfrac_scale = 1.0;
 
   double lc = domain->lattice->xlattice;
   double half_lc = 0.5*lc;
 
   double **r0   = ((FixPeriNeigh *) modify->fix[ifix_peri])->r0;
   tagint **partner = ((FixPeriNeigh *) modify->fix[ifix_peri])->partner;
   int *npartner = ((FixPeriNeigh *) modify->fix[ifix_peri])->npartner;
   double *wvolume = ((FixPeriNeigh *) modify->fix[ifix_peri])->wvolume;
 
   int periodic = domain->xperiodic || domain->yperiodic || domain->zperiodic;
 
   // compute the dilatation theta
 
   for (i = 0; i < nlocal; i++) {
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     xtmp0 = x0[i][0];
     ytmp0 = x0[i][1];
     ztmp0 = x0[i][2];
     jnum = npartner[i];
     theta[i] = 0.0;
     itype = type[i];
 
     for (jj = 0; jj < jnum; jj++) {
 
       // if bond already broken, skip this partner
       if (partner[i][jj] == 0) continue;
 
       // Look up local index of this partner particle
       j = atom->map(partner[i][jj]);
 
       // Skip if particle is "lost"
       if (j < 0) continue;
 
       // Compute force density and add to PD equation of motion
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       if (periodic) domain->minimum_image(delx,dely,delz);
       rsq = delx*delx + dely*dely + delz*delz;
       delx0 = xtmp0 - x0[j][0];
       dely0 = ytmp0 - x0[j][1];
       delz0 = ztmp0 - x0[j][2];
       if (periodic) domain->minimum_image(delx0,dely0,delz0);
 
       r = sqrt(rsq);
       dr = r - r0[i][jj];
       if (fabs(dr) < 2.2204e-016) dr = 0.0;
 
       jtype = type[j];
       delta = cut[itype][jtype];
 
       // scale vfrac[j] if particle j near the horizon
 
       if ((fabs(r0[i][jj] - delta)) <= half_lc)
         vfrac_scale = (-1.0/(2*half_lc))*(r0[i][jj]) +
           (1.0 + ((delta - half_lc)/(2*half_lc) ) );
       else vfrac_scale = 1.0;
 
       theta[i] += influence_function(delx0, dely0, delz0) * r0[i][jj] * dr *
         vfrac[j] * vfrac_scale;
 
     }
 
     // if wvolume[i] is zero, then particle i has no bonds
     // therefore, the dilatation is set to
 
     if (wvolume[i] != 0.0) theta[i] = (3.0/wvolume[i]) * theta[i];
     else theta[i] = 0;
   }
 }
 
 
 /* ----------------------------------------------------------------------
    communication routines
  ---------------------------------------------------------------------- */
 
 int PairPeriLPS::pack_forward_comm(int n, int *list, double *buf,
                                    int pbc_flag, int *pbc)
 {
   int i,j,m;
 
   m = 0;
   for (i = 0; i < n; i++) {
     j = list[i];
     buf[m++] = theta[j];
   }
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairPeriLPS::unpack_forward_comm(int n, int first, double *buf)
 {
   int i,m,last;
 
   m = 0;
   last = first + n;
   for (i = first; i < last; i++) {
     theta[i] = buf[m++];
   }
 }
diff --git a/src/PERI/pair_peri_pmb.cpp b/src/PERI/pair_peri_pmb.cpp
index 77ca26d1e..2b68a2522 100644
--- a/src/PERI/pair_peri_pmb.cpp
+++ b/src/PERI/pair_peri_pmb.cpp
@@ -1,509 +1,509 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Mike Parks (SNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "float.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_peri_pmb.h"
 #include "atom.h"
 #include "domain.h"
 #include "lattice.h"
 #include "force.h"
 #include "update.h"
 #include "modify.h"
 #include "fix.h"
 #include "fix_peri_neigh.h"
 #include "comm.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
 PairPeriPMB::PairPeriPMB(LAMMPS *lmp) : Pair(lmp)
 {
   for (int i = 0; i < 6; i++) virial[i] = 0.0;
   no_virial_fdotr_compute=1;
 
   ifix_peri = -1;
 
   nmax = 0;
   s0_new = NULL;
 
   kspring = NULL;
   s00 = NULL;
   alpha = NULL;
   cut = NULL;
 }
 
 /* ---------------------------------------------------------------------- */
 
 PairPeriPMB::~PairPeriPMB()
 {
   if (ifix_peri >= 0) modify->delete_fix("PERI_NEIGH");
 
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
     memory->destroy(kspring);
     memory->destroy(s00);
     memory->destroy(alpha);
     memory->destroy(cut);
     memory->destroy(s0_new);
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairPeriPMB::compute(int eflag, int vflag)
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz;
   double xtmp0,ytmp0,ztmp0,delx0,dely0,delz0,rsq0;
   double rsq,r,dr,rk,evdwl,fpair,fbond;
   int *ilist,*jlist,*numneigh,**firstneigh;
   double d_ij,delta,stretch;
 
   evdwl = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   double **f = atom->f;
   double **x = atom->x;
   int *type = atom->type;
   int nlocal = atom->nlocal;
 
   double *vfrac = atom->vfrac;
   double *s0 = atom->s0;
   double **x0 = atom->x0;
   double **r0   = ((FixPeriNeigh *) modify->fix[ifix_peri])->r0;
   tagint **partner = ((FixPeriNeigh *) modify->fix[ifix_peri])->partner;
   int *npartner = ((FixPeriNeigh *) modify->fix[ifix_peri])->npartner;
 
   // lc = lattice constant
   // init_style guarantees it's the same in x, y, and z
 
   double lc = domain->lattice->xlattice;
   double half_lc = 0.5*lc;
   double vfrac_scale = 1.0;
 
   // short-range forces
 
   int newton_pair = force->newton_pair;
   int periodic = (domain->xperiodic || domain->yperiodic || domain->zperiodic);
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
   // need minimg() for x0 difference since not ghosted
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     xtmp0 = x0[i][0];
     ytmp0 = x0[i][1];
     ztmp0 = x0[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       delx0 = xtmp0 - x0[j][0];
       dely0 = ytmp0 - x0[j][1];
       delz0 = ztmp0 - x0[j][2];
       if (periodic) domain->minimum_image(delx0,dely0,delz0);
       rsq0 = delx0*delx0 + dely0*dely0 + delz0*delz0;
       jtype = type[j];
 
       r = sqrt(rsq);
 
       // short-range interaction distance based on initial particle position
       // 0.9 and 1.35 are constants
 
       d_ij = MIN(0.9*sqrt(rsq0),1.35*lc);
 
       // short-range contact forces
       // 15 is constant taken from the EMU Theory Manual
       // Silling, 12 May 2005, p 18
 
       if (r < d_ij) {
         dr = r - d_ij;
 
         rk = (15.0 * kspring[itype][jtype] * vfrac[j]) *
           (dr / cut[itype][jtype]);
         if (r > 0.0) fpair = -(rk/r);
         else fpair = 0.0;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
         if (newton_pair || j < nlocal) {
           f[j][0] -= delx*fpair;
           f[j][1] -= dely*fpair;
           f[j][2] -= delz*fpair;
         }
 
         if (eflag) evdwl = 0.5*rk*dr;
         if (evflag) ev_tally(i,j,nlocal,newton_pair,evdwl,0.0,
                              fpair*vfrac[i],delx,dely,delz);
       }
     }
   }
 
   // grow bond forces array if necessary
 
   if (atom->nmax > nmax) {
     memory->destroy(s0_new);
     nmax = atom->nmax;
     memory->create(s0_new,nmax,"pair:s0_new");
   }
 
   // loop over my particles and their partners
   // partner list contains all bond partners, so I-J appears twice
   // if bond already broken, skip this partner
   // first = true if this is first neighbor of particle i
 
   bool first;
 
   for (i = 0; i < nlocal; i++) {
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jnum = npartner[i];
     s0_new[i] = DBL_MAX;
     first = true;
 
     for (jj = 0; jj < jnum; jj++) {
       if (partner[i][jj] == 0) continue;
       j = atom->map(partner[i][jj]);
 
       // check if lost a partner without first breaking bond
 
       if (j < 0) {
         partner[i][jj] = 0;
         continue;
       }
 
       // compute force density, add to PD equation of motion
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       if (periodic) domain->minimum_image(delx,dely,delz);
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
       delta = cut[itype][jtype];
       r = sqrt(rsq);
       dr = r - r0[i][jj];
 
       // avoid roundoff errors
 
       if (fabs(dr) < 2.2204e-016) dr = 0.0;
 
       // scale vfrac[j] if particle j near the horizon
 
       if ((fabs(r0[i][jj] - delta)) <= half_lc)
         vfrac_scale = (-1.0/(2*half_lc))*(r0[i][jj]) +
           (1.0 + ((delta - half_lc)/(2*half_lc) ) );
       else vfrac_scale = 1.0;
 
       stretch = dr / r0[i][jj];
       rk = (kspring[itype][jtype] * vfrac[j]) * vfrac_scale * stretch;
       if (r > 0.0) fbond = -(rk/r);
       else fbond = 0.0;
 
       f[i][0] += delx*fbond;
       f[i][1] += dely*fbond;
       f[i][2] += delz*fbond;
 
       // since I-J is double counted, set newton off & use 1/2 factor and I,I
 
       if (eflag) evdwl = 0.5*rk*dr;
       if (evflag) ev_tally(i,i,nlocal,0,0.5*evdwl,0.0,0.5*fbond*vfrac[i],delx,dely,delz);
 
       // find stretch in bond I-J and break if necessary
       // use s0 from previous timestep
 
       if (stretch > MIN(s0[i],s0[j])) partner[i][jj] = 0;
 
       // update s0 for next timestep
 
       if (first)
          s0_new[i] = s00[itype][jtype] - (alpha[itype][jtype] * stretch);
       else
          s0_new[i] = MAX(s0_new[i],s00[itype][jtype] - (alpha[itype][jtype] * stretch));
       first = false;
     }
   }
 
   // store new s0
   for (i = 0; i < nlocal; i++) s0[i] = s0_new[i];
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairPeriPMB::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
   memory->create(kspring,n+1,n+1,"pair:kspring");
   memory->create(s00,n+1,n+1,"pair:s00");
   memory->create(alpha,n+1,n+1,"pair:alpha");
   memory->create(cut,n+1,n+1,"pair:cut");
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairPeriPMB::settings(int narg, char **arg)
 {
   if (narg) error->all(FLERR,"Illegal pair_style command");
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairPeriPMB::coeff(int narg, char **arg)
 {
   if (narg != 6) error->all(FLERR,"Incorrect args for pair coefficients");
   if (!allocated) allocate();
 
   int ilo,ihi,jlo,jhi;
   force->bounds(arg[0],atom->ntypes,ilo,ihi);
   force->bounds(arg[1],atom->ntypes,jlo,jhi);
 
   double kspring_one = force->numeric(FLERR,arg[2]);
   double cut_one = force->numeric(FLERR,arg[3]);
   double s00_one = force->numeric(FLERR,arg[4]);
   double alpha_one = force->numeric(FLERR,arg[5]);
 
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
     for (int j = MAX(jlo,i); j <= jhi; j++) {
       kspring[i][j] = kspring_one;
       s00[i][j] = s00_one;
       alpha[i][j] = alpha_one;
       cut[i][j] = cut_one;
       setflag[i][j] = 1;
       count++;
     }
   }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairPeriPMB::init_one(int i, int j)
 {
   if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set");
 
   kspring[j][i] = kspring[i][j];
   alpha[j][i] = alpha[i][j];
   s00[j][i] = s00[i][j];
   cut[j][i] = cut[i][j];
 
   return cut[i][j];
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairPeriPMB::init_style()
 {
   // error checks
 
   if (!atom->peri_flag) 
     error->all(FLERR,"Pair style peri requires atom style peri");
   if (atom->map_style == 0)
     error->all(FLERR,"Pair peri requires an atom map, see atom_modify");
 
   if (domain->lattice->xlattice != domain->lattice->ylattice ||
       domain->lattice->xlattice != domain->lattice->zlattice ||
       domain->lattice->ylattice != domain->lattice->zlattice)
     error->all(FLERR,"Pair peri lattice is not identical in x, y, and z");
 
   // if first init, create Fix needed for storing fixed neighbors
 
   if (ifix_peri == -1) {
     char **fixarg = new char*[3];
     fixarg[0] = (char *) "PERI_NEIGH";
     fixarg[1] = (char *) "all";
     fixarg[2] = (char *) "PERI_NEIGH";
     modify->add_fix(3,fixarg);
     delete [] fixarg;
   }
 
   // find associated PERI_NEIGH fix that must exist
   // could have changed locations in fix list since created
 
   for (int i = 0; i < modify->nfix; i++)
     if (strcmp(modify->fix[i]->style,"PERI_NEIGH") == 0) ifix_peri = i;
   if (ifix_peri == -1) error->all(FLERR,"Fix peri neigh does not exist");
 
-  neighbor->request(this);
+  neighbor->request(this,instance_me);
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairPeriPMB::write_restart(FILE *fp)
 {
   int i,j;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       fwrite(&setflag[i][j],sizeof(int),1,fp);
       if (setflag[i][j]) {
         fwrite(&kspring[i][j],sizeof(double),1,fp);
         fwrite(&s00[i][j],sizeof(double),1,fp);
         fwrite(&alpha[i][j],sizeof(double),1,fp);
         fwrite(&cut[i][j],sizeof(double),1,fp);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairPeriPMB::read_restart(FILE *fp)
 {
   allocate();
 
   int i,j;
   int me = comm->me;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
       MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
       if (setflag[i][j]) {
         if (me == 0) {
           fread(&kspring[i][j],sizeof(double),1,fp);
           fread(&s00[i][j],sizeof(double),1,fp);
           fread(&alpha[i][j],sizeof(double),1,fp);
           fread(&cut[i][j],sizeof(double),1,fp);
         }
         MPI_Bcast(&kspring[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&s00[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&alpha[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&cut[i][j],1,MPI_DOUBLE,0,world);
       }
     }
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairPeriPMB::single(int i, int j, int itype, int jtype, double rsq,
                            double factor_coul, double factor_lj,
                            double &fforce)
 {
   double delx0,dely0,delz0,rsq0;
   double d_ij,r,dr,rk,vfrac_scale;
 
   double *vfrac = atom->vfrac;
   double **x0 = atom->x0;
   double **r0   = ((FixPeriNeigh *) modify->fix[ifix_peri])->r0;
   tagint **partner = ((FixPeriNeigh *) modify->fix[ifix_peri])->partner;
   int *npartner = ((FixPeriNeigh *) modify->fix[ifix_peri])->npartner;
 
   double lc = domain->lattice->xlattice;
   double half_lc = 0.5*lc;
 
   delx0 = x0[i][0] - x0[j][0];
   dely0 = x0[i][1] - x0[j][1];
   delz0 = x0[i][2] - x0[j][2];
   int periodic = domain->xperiodic || domain->yperiodic || domain->zperiodic;
   if (periodic) domain->minimum_image(delx0,dely0,delz0);
   rsq0 = delx0*delx0 + dely0*dely0 + delz0*delz0;
 
   d_ij = MIN(0.9*sqrt(rsq0),1.35*lc);
   r = sqrt(rsq);
 
   double energy = 0.0;
   fforce = 0.0;
 
   if (r < d_ij) {
     dr = r - d_ij;
     rk = (15.0 * kspring[itype][jtype] * vfrac[j]) *
       (dr / sqrt(cutsq[itype][jtype]));
     if (r > 0.0) fforce += -(rk/r);
     energy += 0.5*rk*dr;
   }
 
   int jnum = npartner[i];
   for (int jj = 0; jj < jnum; jj++) {
     if (partner[i][jj] == 0) continue;
     if (j < 0) continue;
     if (j == atom->map(partner[i][jj])) {
       dr = r - r0[i][jj];
       if (fabs(dr) < 2.2204e-016) dr = 0.0;
       if ( (fabs(r0[i][jj] - sqrt(cutsq[itype][jtype]))) <= half_lc)
         vfrac_scale = (-1.0/(2*half_lc))*(r0[i][jj]) +
           (1.0 + ((sqrt(cutsq[itype][jtype]) - half_lc)/(2*half_lc)));
       else vfrac_scale = 1.0;
       rk = (kspring[itype][jtype] * vfrac[j] * vfrac_scale) *
         (dr / r0[i][jj]);
       if (r > 0.0) fforce += -(rk/r);
       energy += 0.5*rk*dr;
     }
   }
 
   return energy;
 }
 
 /* ----------------------------------------------------------------------
    memory usage of local atom-based arrays
 ------------------------------------------------------------------------- */
 
 double PairPeriPMB::memory_usage()
 {
   double bytes = nmax * sizeof(double);
   return bytes;
 }
diff --git a/src/PERI/pair_peri_ves.cpp b/src/PERI/pair_peri_ves.cpp
index 243c05068..a7a39cdda 100644
--- a/src/PERI/pair_peri_ves.cpp
+++ b/src/PERI/pair_peri_ves.cpp
@@ -1,723 +1,723 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing authors: Ezwanur Rahman, J.T. Foster (UTSA)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_peri_ves.h"
 #include "atom.h"
 #include "domain.h"
 #include "lattice.h"
 #include "force.h"
 #include "update.h"
 #include "modify.h"
 #include "fix.h"
 #include "fix_peri_neigh.h"
 #include "comm.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "memory.h"
 #include "error.h"
 #include "update.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
 PairPeriVES::PairPeriVES(LAMMPS *lmp) : Pair(lmp)
 {
   for (int i = 0; i < 6; i++) virial[i] = 0.0;
   no_virial_fdotr_compute = 1;
   single_enable = 0;
 
   ifix_peri = -1;
 
   nmax = 0;
   s0_new = NULL;
   theta = NULL;
 
   bulkmodulus = NULL;
   shearmodulus = NULL;
   s00 = alpha = NULL;
   cut = NULL;
   m_lambdai = NULL;
   m_taubi = NULL;
 
   // set comm size needed by this Pair
   // comm_reverse not needed
 
   comm_forward = 1;
 }
 
 /* ---------------------------------------------------------------------- */
 
 PairPeriVES::~PairPeriVES()
 {
   if (ifix_peri >= 0) modify->delete_fix("PERI_NEIGH");
 
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
     memory->destroy(bulkmodulus);
     memory->destroy(shearmodulus);
     memory->destroy(s00);
     memory->destroy(alpha);
     memory->destroy(cut);
     memory->destroy(m_lambdai);
     memory->destroy(m_taubi);
     memory->destroy(theta);
     memory->destroy(s0_new);
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairPeriVES::compute(int eflag, int vflag)
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz;
   double xtmp0,ytmp0,ztmp0,delx0,dely0,delz0,rsq0;
   double rsq,r,dr,rk,evdwl,fpair,fbond;
   double deltaed,fbondViscoElastic,fbondFinal;
   double decay,betai,lambdai,edbNp1,rkNew;
   int *ilist,*jlist,*numneigh,**firstneigh;
   double d_ij,delta,stretch;
 
   evdwl = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = eflag_global = eflag_atom = 0;
 
   double **f = atom->f;
   double **x = atom->x;
   int *type = atom->type;
   int nlocal = atom->nlocal;
 
   double timestepsize = update->dt;
   double *vfrac = atom->vfrac;
   double *s0 = atom->s0;
   double **x0 = atom->x0;
   double **r0 = ((FixPeriNeigh *) modify->fix[ifix_peri])->r0;
   double **deviatorextention = 
     ((FixPeriNeigh *) modify->fix[ifix_peri])->deviatorextention;
   double **deviatorBackextention = 
     ((FixPeriNeigh *) modify->fix[ifix_peri])->deviatorBackextention;
   tagint **partner = ((FixPeriNeigh *) modify->fix[ifix_peri])->partner;
   int *npartner = ((FixPeriNeigh *) modify->fix[ifix_peri])->npartner;
   double *wvolume = ((FixPeriNeigh *) modify->fix[ifix_peri])->wvolume;
 
   // lc = lattice constant
   // init_style guarantees it's the same in x, y, and z
 
   double lc = domain->lattice->xlattice;
   double half_lc = 0.5*lc;
   double vfrac_scale = 1.0;
 
   // short-range forces
 
   int newton_pair = force->newton_pair;
   int periodic = domain->xperiodic || domain->yperiodic || domain->zperiodic;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
   // need minimg() for x0 difference since not ghosted
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     xtmp0 = x0[i][0];
     ytmp0 = x0[i][1];
     ztmp0 = x0[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
 
       rsq = delx*delx + dely*dely + delz*delz;
       delx0 = xtmp0 - x0[j][0];
       dely0 = ytmp0 - x0[j][1];
       delz0 = ztmp0 - x0[j][2];
       if (periodic) domain->minimum_image(delx0,dely0,delz0);
       rsq0 = delx0*delx0 + dely0*dely0 + delz0*delz0;
       jtype = type[j];
 
       r = sqrt(rsq);
 
       // short-range interaction distance based on initial particle position
       // 0.9 and 1.35 are constants
 
       d_ij = MIN(0.9*sqrt(rsq0),1.35*lc);
 
       // short-range contact forces
       // 15 is constant taken from the EMU Theory Manual
       // Silling, 12 May 2005, p 18
 
       if (r < d_ij) {
         dr = r - d_ij;
 
         // kshort based upon short-range force constant
         // of the bond-based theory used in PMB model
 
         double kshort = (15.0 * 18.0 * bulkmodulus[itype][itype]) /
           (3.141592653589793 * cutsq[itype][jtype] * cutsq[itype][jtype]);
         rk = (kshort * vfrac[j]) * (dr / cut[itype][jtype]);
 
         if (r > 0.0) fpair = -(rk/r);
         else fpair = 0.0;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
         if (newton_pair || j < nlocal) {
           f[j][0] -= delx*fpair;
           f[j][1] -= dely*fpair;
           f[j][2] -= delz*fpair;
         }
 
         if (eflag) evdwl = 0.5*rk*dr;
         if (evflag) ev_tally(i,j,nlocal,newton_pair,evdwl,0.0,
                              fpair*vfrac[i],delx,dely,delz);
       }
     }
   }
 
   // grow bond forces array if necessary
 
   if (atom->nmax > nmax) {
     memory->destroy(s0_new);
     memory->destroy(theta);
     nmax = atom->nmax;
     memory->create(s0_new,nmax,"pair:s0_new");
     memory->create(theta,nmax,"pair:theta");
   }
 
   // Compute the dilatation on each particle
   compute_dilatation();
 
   // communicate dilatation (theta) of each particle
   comm->forward_comm_pair(this);
 
   // communicate weighted volume (wvolume) upon every reneighbor
 
   if (neighbor->ago == 0)
     comm->forward_comm_fix(modify->fix[ifix_peri]);
 
   // volume-dependent part of the energy
 
   if (eflag) {
     for (i = 0; i < nlocal; i++) {
       itype = type[i];
       if (eflag_global)
         eng_vdwl += 0.5 * bulkmodulus[itype][itype] * (theta[i] * theta[i]);
       if (eflag_atom)
         eatom[i] += 0.5 * bulkmodulus[itype][itype] * (theta[i] * theta[i]);
     }
   }
 
   // loop over my particles and their partners
   // partner list contains all bond partners, so I-J appears twice
   // if bond already broken, skip this partner
   // first = true if this is first neighbor of particle i
 
   bool first;
   double omega_minus, omega_plus;
 
   for (i = 0; i < nlocal; i++) {
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     xtmp0 = x0[i][0];
     ytmp0 = x0[i][1];
     ztmp0 = x0[i][2];
     itype = type[i];
     jnum = npartner[i];
     first = true;
     
     for (jj = 0; jj < jnum; jj++) {
       if (partner[i][jj] == 0) continue;
       j = atom->map(partner[i][jj]);
 
       // check if lost a partner without first breaking bond
 
       if (j < 0) {
         partner[i][jj] = 0;
         continue;
       }
 
       // compute force density, add to PD equation of motion
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       if (periodic) domain->minimum_image(delx,dely,delz);
       rsq = delx*delx + dely*dely + delz*delz;
       delx0 = xtmp0 - x0[j][0];
       dely0 = ytmp0 - x0[j][1];
       delz0 = ztmp0 - x0[j][2];
       if (periodic) domain->minimum_image(delx0,dely0,delz0);
       jtype = type[j];
       delta = cut[itype][jtype];
       r = sqrt(rsq);
       dr = r - r0[i][jj];
 
       // avoid roundoff errors
 
       if (fabs(dr) < 2.2204e-016) dr = 0.0;
 
       // scale vfrac[j] if particle j near the horizon
 
       if ((fabs(r0[i][jj] - delta)) <= half_lc)
         vfrac_scale = (-1.0/(2*half_lc))*(r0[i][jj]) +
           (1.0 + ((delta - half_lc)/(2*half_lc) ) );
       else vfrac_scale = 1.0;
 
       omega_plus  = influence_function(-1.0*delx0,-1.0*dely0,-1.0*delz0);
       omega_minus = influence_function(delx0,dely0,delz0);
         
       rk = ( (3.0 * bulkmodulus[itype][itype]) * vfrac[j] * vfrac_scale *
         ( (omega_plus * theta[i] / wvolume[i]) +
           ( omega_minus * theta[j] / wvolume[j] ) ) ) * r0[i][jj];
 
       if (r > 0.0) fbond = -(rk/r);
       else fbond = 0.0;
 
       // for viscoelasticity
       lambdai=m_lambdai[itype][itype];
       double taui = m_taubi[itype][itype];  
       double c1 = taui/timestepsize;
       decay=exp(-1.0/c1);
       betai=1.-c1*(1.-decay);
 
       double deviatoric_extension = 
         dr - (theta[i]* r0[i][jj] / 3.0);
       deltaed = deviatoric_extension-deviatorextention[i][jj];
  
       // back extention at current step
 
       edbNp1 = deviatorextention[i][jj]*(1-decay) + 
         deviatorBackextention[i][jj]*decay+betai*deltaed;
 
       rkNew = ((1-lambdai)*15.0) * 
         ( shearmodulus[itype][itype] * vfrac[j] * vfrac_scale ) *
         ( (omega_plus / wvolume[i]) + (omega_minus / wvolume[j]) ) * 
         deviatoric_extension;
       rkNew += (lambdai*15.0) * 
         ( shearmodulus[itype][itype] * vfrac[j] * vfrac_scale ) *
         ( (omega_plus / wvolume[i]) + (omega_minus / wvolume[j]) ) * 
         (deviatoric_extension-edbNp1);
 
       if (r > 0.0) fbondViscoElastic = -(rkNew/r);
       else fbondViscoElastic = 0.0;
 
       // total Force: elastic + viscoelastic 
 
       fbondFinal=fbond+fbondViscoElastic;
       fbond=fbondFinal;
          
       f[i][0] += delx*fbond;
       f[i][1] += dely*fbond;
       f[i][2] += delz*fbond;
 
       // since I-J is double counted, set newton off & use 1/2 factor and I,I
 
       if (eflag) evdwl =  ((0.5 * 15 * (1 - lambdai) * shearmodulus[itype][itype]/wvolume[i] *
                     omega_plus * deviatoric_extension * 
                     deviatoric_extension) + 
                     (0.5 * 15 * lambdai * shearmodulus[itype][itype]/wvolume[i] *
                     omega_plus * (deviatoric_extension-edbNp1) * 
                     (deviatoric_extension-edbNp1))) * vfrac[j] * vfrac_scale;
       if (evflag) ev_tally(i,i,nlocal,0,0.5*evdwl,0.0,
                            0.5*fbond*vfrac[i],delx,dely,delz);
 
       // find stretch in bond I-J and break if necessary
       // use s0 from previous timestep
 
       // store current deviatoric extention
 
       deviatorextention[i][jj]=deviatoric_extension;
       deviatorBackextention[i][jj]=edbNp1;
 
       stretch = dr / r0[i][jj];
       if (stretch > MIN(s0[i],s0[j])) partner[i][jj] = 0;
 
       // update s0 for next timestep
 
       if (first)
          s0_new[i] = s00[itype][jtype] - (alpha[itype][jtype] * stretch);
       else
          s0_new[i] = MAX(s0_new[i],s00[itype][jtype] -
                          (alpha[itype][jtype] * stretch));
 
       first = false;
     }  
   }
 
   // store new s0
 
   for (i = 0; i < nlocal; i++) s0[i] = s0_new[i];
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairPeriVES::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
   memory->create(bulkmodulus,n+1,n+1,"pair:bulkmodulus");
   memory->create(shearmodulus,n+1,n+1,"pair:shearmodulus");
   memory->create(s00,n+1,n+1,"pair:s00");
   memory->create(alpha,n+1,n+1,"pair:alpha");
   memory->create(cut,n+1,n+1,"pair:cut");
   memory->create(m_lambdai,n+1,n+1,"pair:m_lambdai");
   memory->create(m_taubi,n+1,n+1,"pair:m_taubi");
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairPeriVES::settings(int narg, char **arg)
 {
   if (narg) error->all(FLERR,"Illegal pair_style command");
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairPeriVES::coeff(int narg, char **arg)
 {
   if (narg != 9) error->all(FLERR,"Incorrect args for pair coefficients");
   if (!allocated) allocate();
 
   int ilo,ihi,jlo,jhi;
   force->bounds(arg[0],atom->ntypes,ilo,ihi);
   force->bounds(arg[1],atom->ntypes,jlo,jhi);
 
   double bulkmodulus_one = atof(arg[2]);
   double shearmodulus_one = atof(arg[3]);
   double cut_one = atof(arg[4]);
   double s00_one = atof(arg[5]);
   double alpha_one = atof(arg[6]);
   double mlambdai_one = atof(arg[7]);
   double mtaui_one = atof(arg[8]);
 
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
     for (int j = MAX(jlo,i); j <= jhi; j++) {
       bulkmodulus[i][j] = bulkmodulus_one;
       shearmodulus[i][j] = shearmodulus_one;
       cut[i][j] = cut_one;
       s00[i][j] = s00_one;
       alpha[i][j] = alpha_one;
       m_lambdai[i][j] = mlambdai_one;
       m_taubi[i][j] = mtaui_one;
       setflag[i][j] = 1;
       count++;
     }
   }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairPeriVES::init_one(int i, int j)
 {
   if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set");
 
   bulkmodulus[j][i] = bulkmodulus[i][j];
   shearmodulus[j][i] = shearmodulus[i][j];
   s00[j][i] = s00[i][j];
   alpha[j][i] = alpha[i][j];
   cut[j][i] = cut[i][j];
   m_lambdai[j][i] = m_lambdai[i][j];
   m_taubi[j][i] = m_taubi[i][j];
 
   return cut[i][j];
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairPeriVES::init_style()
 {
   // error checks
 
   if (!atom->peri_flag) 
     error->all(FLERR,"Pair style peri requires atom style peri");
   if (atom->map_style == 0)
     error->all(FLERR,"Pair peri requires an atom map, see atom_modify");
 
   if (domain->lattice == NULL)
     error->all(FLERR,"Pair peri requires a lattice be defined");
   if (domain->lattice->xlattice != domain->lattice->ylattice ||
       domain->lattice->xlattice != domain->lattice->zlattice ||
       domain->lattice->ylattice != domain->lattice->zlattice)
     error->all(FLERR,"Pair peri lattice is not identical in x, y, and z");
 
   // if first init, create Fix needed for storing fixed neighbors
 
   if (ifix_peri == -1) {
     char **fixarg = new char*[3];
     fixarg[0] = (char *) "PERI_NEIGH";
     fixarg[1] = (char *) "all";
     fixarg[2] = (char *) "PERI_NEIGH";
     modify->add_fix(3,fixarg);
     delete [] fixarg;
   }
 
   // find associated PERI_NEIGH fix that must exist
   // could have changed locations in fix list since created
 
   for (int i = 0; i < modify->nfix; i++)
     if (strcmp(modify->fix[i]->style,"PERI_NEIGH") == 0) ifix_peri = i;
   if (ifix_peri == -1) error->all(FLERR,"Fix peri neigh does not exist");
 
-  neighbor->request(this);
+  neighbor->request(this,instance_me);
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairPeriVES::write_restart(FILE *fp)
 {
   int i,j;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       fwrite(&setflag[i][j],sizeof(int),1,fp);
       if (setflag[i][j]) {
         fwrite(&bulkmodulus[i][j],sizeof(double),1,fp);
         fwrite(&shearmodulus[i][j],sizeof(double),1,fp);
         fwrite(&s00[i][j],sizeof(double),1,fp);
         fwrite(&alpha[i][j],sizeof(double),1,fp);
         fwrite(&cut[i][j],sizeof(double),1,fp);
         fwrite(&m_lambdai[i][j],sizeof(double),1,fp);
         fwrite(&m_taubi[i][j],sizeof(double),1,fp);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairPeriVES::read_restart(FILE *fp)
 {
   allocate();
 
   int i,j;
   int me = comm->me;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
       MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
       if (setflag[i][j]) {
         if (me == 0) {
           fread(&bulkmodulus[i][j],sizeof(double),1,fp);
           fread(&shearmodulus[i][j],sizeof(double),1,fp);
           fread(&s00[i][j],sizeof(double),1,fp);
           fread(&alpha[i][j],sizeof(double),1,fp);
           fread(&cut[i][j],sizeof(double),1,fp);
           fread(&m_lambdai[i][j],sizeof(double),1,fp);
           fread(&m_taubi[i][j],sizeof(double),1,fp);
         }
         MPI_Bcast(&bulkmodulus[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&shearmodulus[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&s00[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&alpha[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&cut[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&m_lambdai[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&m_taubi[i][j],1,MPI_DOUBLE,0,world);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
    memory usage of local atom-based arrays
 ------------------------------------------------------------------------- */
 
 double PairPeriVES::memory_usage()
 {
   double bytes = 2 * nmax * sizeof(double);
   return bytes;
 }
 
 /* ----------------------------------------------------------------------
    influence function definition
 ------------------------------------------------------------------------- */
 
 double PairPeriVES::influence_function(double xi_x, double xi_y, double xi_z)
 {
   double r = sqrt(xi_x*xi_x + xi_y*xi_y + xi_z*xi_z);
   double omega;
 
   if (fabs(r) < 2.2204e-016)
     error->one(FLERR,"Divide by 0 in influence function of pair peri/lps");
   omega = 1.0/r;
   return omega;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairPeriVES::compute_dilatation()
 {
   int i,j,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz;
   double xtmp0,ytmp0,ztmp0,delx0,dely0,delz0;
   double rsq,r,dr;
   double delta;
 
   double **x = atom->x;
   int *type = atom->type;
   double **x0 = atom->x0;
   int nlocal = atom->nlocal;
   double *vfrac = atom->vfrac;
   double vfrac_scale = 1.0;
 
   double lc = domain->lattice->xlattice;
   double half_lc = 0.5*lc;
 
 
   double **r0   = ((FixPeriNeigh *) modify->fix[ifix_peri])->r0;
   tagint **partner = ((FixPeriNeigh *) modify->fix[ifix_peri])->partner;
   int *npartner = ((FixPeriNeigh *) modify->fix[ifix_peri])->npartner;
   double *wvolume = ((FixPeriNeigh *) modify->fix[ifix_peri])->wvolume;
 
   int periodic = domain->xperiodic || domain->yperiodic || domain->zperiodic;
 
   // compute the dilatation theta
 
   for (i = 0; i < nlocal; i++) {
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     xtmp0 = x0[i][0];
     ytmp0 = x0[i][1];
     ztmp0 = x0[i][2];
     jnum = npartner[i];
     theta[i] = 0.0;
     itype = type[i];
 
     for (jj = 0; jj < jnum; jj++) {
 
       // if bond already broken, skip this partner
 
       if (partner[i][jj] == 0) continue;
 
       // look up local index of this partner particle
 
       j = atom->map(partner[i][jj]);
 
       // skip if particle is "lost"
 
       if (j < 0) continue;
 
       // compute force density and add to PD equation of motion
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       if (periodic) domain->minimum_image(delx,dely,delz);
       rsq = delx*delx + dely*dely + delz*delz;
       delx0 = xtmp0 - x0[j][0];
       dely0 = ytmp0 - x0[j][1];
       delz0 = ztmp0 - x0[j][2];
       if (periodic) domain->minimum_image(delx0,dely0,delz0);
 
       r = sqrt(rsq);
       dr = r - r0[i][jj];
       if (fabs(dr) < 2.2204e-016) dr = 0.0;
 
       jtype = type[j];
       delta = cut[itype][jtype];
 
       // scale vfrac[j] if particle j near the horizon
 
       if ((fabs(r0[i][jj] - delta)) <= half_lc)
         vfrac_scale = (-1.0/(2*half_lc))*(r0[i][jj]) +
           (1.0 + ((delta - half_lc)/(2*half_lc) ) );
       else vfrac_scale = 1.0;
 
       theta[i] += influence_function(delx0, dely0, delz0) * r0[i][jj] * dr *
         vfrac[j] * vfrac_scale;
 
     }
 
     // if wvolume[i] is zero, then particle i has no bonds
     // therefore, the dilatation is set to
 
     if (wvolume[i] != 0.0) theta[i] = (3.0/wvolume[i]) * theta[i];
     else theta[i] = 0;
   }
 }
 
 
 /* ----------------------------------------------------------------------
    communication routines
 ---------------------------------------------------------------------- */
 
 int PairPeriVES::pack_forward_comm(int n, int *list, double *buf,
                                    int pbc_flag, int *pbc)
 {
   int i,j,m;
 
   m = 0;
   for (i = 0; i < n; i++) {
     j = list[i];
     buf[m++] = theta[j];
   }
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairPeriVES::unpack_forward_comm(int n, int first, double *buf)
 {
   int i,m,last;
 
   m = 0;
   last = first + n;
   for (i = first; i < last; i++) {
     theta[i] = buf[m++];
   }
 }
diff --git a/src/QEQ/fix_qeq_dynamic.cpp b/src/QEQ/fix_qeq_dynamic.cpp
index f52f9e0c5..5db7d2871 100644
--- a/src/QEQ/fix_qeq_dynamic.cpp
+++ b/src/QEQ/fix_qeq_dynamic.cpp
@@ -1,294 +1,294 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Ray Shan (Sandia)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "fix_qeq_dynamic.h"
 #include "atom.h"
 #include "comm.h"
 #include "domain.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "update.h"
 #include "force.h"
 #include "group.h"
 #include "pair.h"
 #include "kspace.h"
 #include "respa.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
 FixQEqDynamic::FixQEqDynamic(LAMMPS *lmp, int narg, char **arg) : 
   FixQEq(lmp, narg, arg) 
 {
   qdamp = 0.10;
   qstep = 0.02;
 
   int iarg = 8;
   while (iarg < narg) {
 
     if (strcmp(arg[iarg],"qdamp") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix qeq/dynamic command");
       qdamp = atof(arg[iarg+1]);
       iarg += 2;
     } else if (strcmp(arg[iarg],"qstep") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix qeq/dynamic command");
       qstep = atof(arg[iarg+1]);
       iarg += 2;
     } else error->all(FLERR,"Illegal fix qeq/dynamic command");
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqDynamic::init()
 {
   if (!atom->q_flag)
     error->all(FLERR,"Fix qeq/dynamic requires atom attribute q");
 
   ngroup = group->count(igroup);
   if (ngroup == 0) error->all(FLERR,"Fix qeq/dynamic group has no atoms");
 
-  int irequest = neighbor->request(this);
+  int irequest = neighbor->request(this,instance_me);
   neighbor->requests[irequest]->pair = 0;
   neighbor->requests[irequest]->fix  = 1;
   neighbor->requests[irequest]->half = 1;
   neighbor->requests[irequest]->full = 0;
 
   if (tolerance < 1e-4) 
     if (comm->me == 0)
       error->warning(FLERR,"Fix qeq/dynamic tolerance may be too small"
 		    " for damped dynamics");
 
   if (strstr(update->integrate_style,"respa"))
     nlevels_respa = ((Respa *) update->integrate)->nlevels;
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqDynamic::pre_force(int vflag)
 {
   int i,ii,iloop,inum,*ilist;
   double qmass,dtq2;
   double enegchkall,enegmaxall;
 
   double *q = atom->q;
   int *mask = atom->mask;
 
   double enegchk = 0.0;
   double enegtot = 0.0;
   double enegmax = 0.0;
 
   if (update->ntimestep % nevery) return;
 
   n = atom->nlocal;
   N = atom->nlocal + atom->nghost;
 
   if( atom->nmax > nmax ) reallocate_storage();
 
   inum = list->inum;
   ilist = list->ilist;
 
   qmass  = 0.016;
   dtq2   = 0.5*qstep*qstep/qmass;
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     q1[i] = q2[i] = qf[i] = 0.0;
   }
 
   for (iloop = 0; iloop < maxiter; iloop ++ ) {
     for (ii = 0; ii < inum; ii++) {
       i = ilist[ii];
       if (mask[i] & groupbit) {
         q1[i] += qf[i]*dtq2 - qdamp*q1[i];
         q[i]  += q1[i];
       }
     }
 
     pack_flag = 1;
     comm->forward_comm_fix(this);
 
     enegtot = compute_eneg();
     enegtot /= ngroup;
 
     enegchk = enegmax = 0.0;
 
     for (ii = 0; ii < inum ; ii++) {
       i = ilist[ii];
       if (mask[i] & groupbit) {
         q2[i] = enegtot-qf[i];
         enegmax = MAX(enegmax,fabs(q2[i]));
         enegchk += fabs(q2[i]);
         qf[i] = q2[i];
       }
     }
 
     MPI_Allreduce(&enegchk,&enegchkall,1,MPI_DOUBLE,MPI_SUM,world);
     enegchk = enegchkall/ngroup;
     MPI_Allreduce(&enegmax,&enegmaxall,1,MPI_DOUBLE,MPI_MAX,world);
     enegmax = enegmaxall;
 
     if (enegchk <= tolerance && enegmax <= 100.0*tolerance) break;
 
     for (ii = 0; ii < inum; ii++) {
       i = ilist[ii];
       if (mask[i] & groupbit)
         q1[i] += qf[i]*dtq2 - qdamp*q1[i];
     }
   }
 
   if (comm->me == 0) {
     if (iloop == maxiter) {
       char str[128];
       sprintf(str,"Charges did not converge at step "BIGINT_FORMAT
 		  ": %lg",update->ntimestep,enegchk);
       error->warning(FLERR,str);
     }
   }
 
   if (force->kspace) force->kspace->qsum_qsq();
 }
 
 /* ---------------------------------------------------------------------- */
 
 double FixQEqDynamic::compute_eneg()
 {
   int i, j, ii, jj, inum, jnum, itype;
   int *ilist, *jlist, *numneigh, **firstneigh;
   double eneg, enegtot;
   double r, rsq, delr[3], rinv;
 
   int *type = atom->type;
   int *mask = atom->mask;
   double *q = atom->q;
   double **x = atom->x;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     if (mask[i] & groupbit)
       qf[i] = 0.0;
   }
 
   // communicating charge force to all nodes, first forward then reverse
   pack_flag = 2;
   comm->forward_comm_fix(this);
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     itype = type[i];
 
     if (mask[i] & groupbit) {
 
       qf[i] += chi[itype] + eta[itype] * q[i];
 
       jlist = firstneigh[i];
       jnum = numneigh[i];
 
       for (jj = 0; jj < jnum; jj++) {
         j = jlist[jj];
 	j &= NEIGHMASK;
 
         delr[0] = x[i][0] - x[j][0];
         delr[1] = x[i][1] - x[j][1];
         delr[2] = x[i][2] - x[j][2];
         rsq = delr[0]*delr[0] + delr[1]*delr[1] + delr[2]*delr[2];
 
         if (rsq > cutoff_sq) continue;
 
         r = sqrt(rsq);
 	rinv = 1.0/r;
 	qf[i] += q[j] * rinv;
 	qf[j] += q[i] * rinv;
       }
     }
   }
 
   comm->reverse_comm_fix(this);
 
   // sum charge force on each node and return it
 
   eneg = enegtot = 0.0;
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     if (mask[i] & groupbit)
       eneg += qf[i];
   }
   MPI_Allreduce(&eneg,&enegtot,1,MPI_DOUBLE,MPI_SUM,world);
   return enegtot;
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 int FixQEqDynamic::pack_forward_comm(int n, int *list, double *buf,
                           int pbc_flag, int *pbc)
 {
   int m;
 
   if( pack_flag == 1 )
     for(m = 0; m < n; m++) buf[m] = atom->q[list[m]];
   else if( pack_flag == 2 )
     for(m = 0; m < n; m++) buf[m] = qf[list[m]];
 
   return n;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqDynamic::unpack_forward_comm(int n, int first, double *buf)
 {
   int i, m;
 
   if( pack_flag == 1)
     for(m = 0, i = first; m < n; m++, i++) atom->q[i] = buf[m];
   else if( pack_flag == 2)
     for(m = 0, i = first; m < n; m++, i++) qf[i] = buf[m];
 }
 
 /* ---------------------------------------------------------------------- */
 
 int FixQEqDynamic::pack_reverse_comm(int n, int first, double *buf)
 {
   int i, m;
   for(m = 0, i = first; m < n; m++, i++) buf[m] = qf[i];
   return n;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqDynamic::unpack_reverse_comm(int n, int *list, double *buf)
 {
   int m;
 
   for(m = 0; m < n; m++) qf[list[m]] += buf[m];
 }
 
 /* ---------------------------------------------------------------------- */
diff --git a/src/QEQ/fix_qeq_point.cpp b/src/QEQ/fix_qeq_point.cpp
index 37194ba25..2f9e31c2b 100644
--- a/src/QEQ/fix_qeq_point.cpp
+++ b/src/QEQ/fix_qeq_point.cpp
@@ -1,190 +1,190 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Ray Shan (Sandia)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "fix_qeq_point.h"
 #include "atom.h"
 #include "comm.h"
 #include "domain.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "update.h"
 #include "force.h"
 #include "group.h"
 #include "kspace.h"
 #include "respa.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
 FixQEqPoint::FixQEqPoint(LAMMPS *lmp, int narg, char **arg) :
   FixQEq(lmp, narg, arg) {}
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqPoint::init()
 {
   if (!atom->q_flag) 
     error->all(FLERR,"Fix qeq/point requires atom attribute q");
 
   ngroup = group->count(igroup);
   if (ngroup == 0) error->all(FLERR,"Fix qeq/point group has no atoms");
 
-  int irequest = neighbor->request(this);
+  int irequest = neighbor->request(this,instance_me);
   neighbor->requests[irequest]->pair = 0;
   neighbor->requests[irequest]->fix  = 1;
   neighbor->requests[irequest]->half = 0;
   neighbor->requests[irequest]->full = 1;
 
   int ntypes = atom->ntypes;
   memory->create(shld,ntypes+1,ntypes+1,"qeq:shileding");
 
   if (strstr(update->integrate_style,"respa"))
     nlevels_respa = ((Respa *) update->integrate)->nlevels;
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqPoint::pre_force(int vflag)
 {
   if (update->ntimestep % nevery) return;
 
   n = atom->nlocal;
   N = atom->nlocal + atom->nghost;
 
   if( atom->nmax > nmax ) reallocate_storage();
 
   if( n > n_cap*DANGER_ZONE || m_fill > m_cap*DANGER_ZONE )
     reallocate_matrix();
 
   init_matvec();
   matvecs = CG(b_s, s);    	// CG on s - parallel
   matvecs += CG(b_t, t); 	// CG on t - parallel
   calculate_Q();
 
   if (force->kspace) force->kspace->qsum_qsq();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqPoint::init_matvec()
 {
   compute_H();
 
   int nn, ii, i;
   int *ilist;
 
   nn = list->inum;
   ilist = list->ilist;
 
   for( ii = 0; ii < nn; ++ii ) {
     i = ilist[ii];
     if (atom->mask[i] & groupbit) {
       Hdia_inv[i] = 1. / eta[ atom->type[i] ];
       b_s[i]      = -( chi[atom->type[i]] + chizj[i] );
       b_t[i]      = -1.0;
       t[i] = t_hist[i][2] + 3 * ( t_hist[i][0] - t_hist[i][1] );
       s[i] = 4*(s_hist[i][0]+s_hist[i][2])-(6*s_hist[i][1]+s_hist[i][3]);
     }
   }
 
   pack_flag = 2;
   comm->forward_comm_fix(this); //Dist_vector( s );
   pack_flag = 3;
   comm->forward_comm_fix(this); //Dist_vector( t );
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqPoint::compute_H()
 {
   int inum, jnum, *ilist, *jlist, *numneigh, **firstneigh;
   int i, j, ii, jj, flag;
   double **x, SMALL = 0.0001;
   double dx, dy, dz, r_sqr, r;
 
   tagint *tag = atom->tag;
   x = atom->x;
   int *mask = atom->mask;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // fill in the H matrix
   m_fill = 0;
   r_sqr = 0;
   for( ii = 0; ii < inum; ii++ ) {
     i = ilist[ii];
     if (mask[i] & groupbit) {
       jlist = firstneigh[i];
       jnum = numneigh[i];
       H.firstnbr[i] = m_fill;
 
       for( jj = 0; jj < jnum; jj++ ) {
         j = jlist[jj];
 	j &= NEIGHMASK;
 
         dx = x[j][0] - x[i][0];
         dy = x[j][1] - x[i][1];
         dz = x[j][2] - x[i][2];
         r_sqr = dx*dx + dy*dy + dz*dz;
 
         flag = 0;
         if (r_sqr <= cutoff_sq) {
           if (j < n) flag = 1;
           else if (tag[i] < tag[j]) flag = 1;
           else if (tag[i] == tag[j]) {
             if (dz > SMALL) flag = 1;
             else if (fabs(dz) < SMALL) {
               if (dy > SMALL) flag = 1;
               else if (fabs(dy) < SMALL && dx > SMALL)
                 flag = 1;
 	    }
 	  }
 	}
 
         if( flag ) {
           H.jlist[m_fill] = j;
 	  r = sqrt(r_sqr);
           H.val[m_fill] = 1.0/r;
           m_fill++;
         }
       }
       H.numnbrs[i] = m_fill - H.firstnbr[i];
     }
   }
 
   if (m_fill >= H.m) {
     char str[128];
     sprintf(str,"H matrix size has been exceeded: m_fill=%d H.m=%d\n",
              m_fill, H.m );
     error->warning(FLERR,str);
     error->all(FLERR,"Fix qeq/point has insufficient QEq matrix size");
   }
 }
 
 /* ---------------------------------------------------------------------- */
diff --git a/src/QEQ/fix_qeq_shielded.cpp b/src/QEQ/fix_qeq_shielded.cpp
index 0b6bfed0e..394f457dc 100644
--- a/src/QEQ/fix_qeq_shielded.cpp
+++ b/src/QEQ/fix_qeq_shielded.cpp
@@ -1,254 +1,254 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Ray Shan (Sandia)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "fix_qeq_shielded.h"
 #include "atom.h"
 #include "comm.h"
 #include "domain.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "update.h"
 #include "force.h"
 #include "group.h"
 #include "kspace.h"
 #include "respa.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
 FixQEqShielded::FixQEqShielded(LAMMPS *lmp, int narg, char **arg) :
   FixQEq(lmp, narg, arg) {}
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqShielded::init()
 {
   if (!atom->q_flag) 
     error->all(FLERR,"Fix qeq/shielded requires atom attribute q");
 
   ngroup = group->count(igroup);
   if (ngroup == 0) error->all(FLERR,"Fix qeq/shielded group has no atoms");
 
-  int irequest = neighbor->request(this);
+  int irequest = neighbor->request(this,instance_me);
   neighbor->requests[irequest]->pair = 0;
   neighbor->requests[irequest]->fix  = 1;
   neighbor->requests[irequest]->half = 1;
   neighbor->requests[irequest]->full = 0;
 
   int ntypes = atom->ntypes;
   memory->create(shld,ntypes+1,ntypes+1,"qeq:shileding");
 
   init_shielding();
 
   int i;
   for (i = 1; i <= ntypes; i++) {
     if (gamma[i] == 0.0) 
       error->all(FLERR,"Invalid param file for fix qeq/shielded");
   }
 
   if (strstr(update->integrate_style,"respa"))
     nlevels_respa = ((Respa *) update->integrate)->nlevels;
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqShielded::init_shielding()
 {
   int i,j;
   double d7, swa2, swa3, swb2, swb3;
 
   int ntypes = atom->ntypes;
   for( i = 1; i <= ntypes; ++i )
     for( j = 1; j <= ntypes; ++j )
       shld[i][j] = pow( gamma[i] * gamma[j], -1.5 );
 
   if (fabs(swa) > 0.01 && comm->me == 0)
     error->warning(FLERR,"Fix qeq has non-zero lower Taper radius cutoff");
   if (swb < 0)
     error->all(FLERR, "Fix qeq has negative upper Taper radius cutoff");
   else if (swb < 5 && comm->me == 0)
     error->warning(FLERR,"Fix qeq has very low Taper radius cutoff");
 
   d7 = pow( swb - swa, 7 );
   swa2 = swa*swa;
   swa3 = swa2*swa;
   swb2 = swb*swb;
   swb3 = swb2*swb;
 
   Tap[7] =  20.0 / d7;
   Tap[6] = -70.0 * (swa + swb) / d7;
   Tap[5] =  84.0 * (swa2 + 3.0*swa*swb + swb2) / d7;
   Tap[4] = -35.0 * (swa3 + 9.0*swa2*swb + 9.0*swa*swb2 + swb3 ) / d7;
   Tap[3] = 140.0 * (swa3*swb + 3.0*swa2*swb2 + swa*swb3 ) / d7;
   Tap[2] =-210.0 * (swa3*swb2 + swa2*swb3) / d7;
   Tap[1] = 140.0 * swa3 * swb3 / d7;
   Tap[0] = (-35.0*swa3*swb2*swb2 + 21.0*swa2*swb3*swb2 +
             7.0*swa*swb3*swb3 + swb3*swb3*swb ) / d7;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqShielded::pre_force(int vflag)
 {
   if (update->ntimestep % nevery) return;
 
   n = atom->nlocal;
   N = atom->nlocal + atom->nghost;
 
   if( atom->nmax > nmax ) reallocate_storage();
 
   if( n > n_cap*DANGER_ZONE || m_fill > m_cap*DANGER_ZONE )
     reallocate_matrix();
 
   init_matvec();
   matvecs = CG(b_s, s);    	// CG on s - parallel
   matvecs += CG(b_t, t); 	// CG on t - parallel
   calculate_Q();
 
   if (force->kspace) force->kspace->qsum_qsq();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqShielded::init_matvec()
 {
   compute_H();
 
   int nn, ii, i;
   int *ilist;
 
   nn = list->inum;
   ilist = list->ilist;
 
   for( ii = 0; ii < nn; ++ii ) {
     i = ilist[ii];
     if (atom->mask[i] & groupbit) {
       Hdia_inv[i] = 1. / eta[ atom->type[i] ];
       b_s[i]      = -( chi[atom->type[i]] + chizj[i] );
       b_t[i]      = -1.0;
       t[i] = t_hist[i][2] + 3 * ( t_hist[i][0] - t_hist[i][1] );
       s[i] = 4*(s_hist[i][0]+s_hist[i][2])-(6*s_hist[i][1]+s_hist[i][3]);
     }
   }
 
   pack_flag = 2;
   comm->forward_comm_fix(this); //Dist_vector( s );
   pack_flag = 3;
   comm->forward_comm_fix(this); //Dist_vector( t );
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqShielded::compute_H()
 {
   int inum, jnum, *ilist, *jlist, *numneigh, **firstneigh;
   int i, j, ii, jj, flag;
   double **x, SMALL = 0.0001;
   double dx, dy, dz, r_sqr, r;
 
   int *type = atom->type;
   tagint *tag = atom->tag;
   x = atom->x;
   int *mask = atom->mask;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // fill in the H matrix
   m_fill = 0;
   r_sqr = 0;
   for( ii = 0; ii < inum; ii++ ) {
     i = ilist[ii];
     if (mask[i] & groupbit) {
       jlist = firstneigh[i];
       jnum = numneigh[i];
       H.firstnbr[i] = m_fill;
 
       for( jj = 0; jj < jnum; jj++ ) {
         j = jlist[jj];
 	j &= NEIGHMASK;
 
         dx = x[j][0] - x[i][0];
         dy = x[j][1] - x[i][1];
         dz = x[j][2] - x[i][2];
         r_sqr = dx*dx + dy*dy + dz*dz;
 
         flag = 0;
         if (r_sqr <= cutoff_sq) {
           if (j < n) flag = 1;
           else if (tag[i] < tag[j]) flag = 1;
           else if (tag[i] == tag[j]) {
             if (dz > SMALL) flag = 1;
             else if (fabs(dz) < SMALL) {
               if (dy > SMALL) flag = 1;
               else if (fabs(dy) < SMALL && dx > SMALL)
                 flag = 1;
 	    }
 	  }
 	}
 
         if( flag ) {
           H.jlist[m_fill] = j;
 	  r = sqrt(r_sqr);
           H.val[m_fill] = calculate_H( r, shld[type[i]][type[j]] );
           m_fill++;
         }
       }
       H.numnbrs[i] = m_fill - H.firstnbr[i];
     }
   }
 
   if (m_fill >= H.m) {
     char str[128];
     sprintf(str,"H matrix size has been exceeded: m_fill=%d H.m=%d\n",
              m_fill, H.m );
     error->warning(FLERR,str);
     error->all(FLERR,"Fix qeq/shielded has insufficient QEq matrix size");
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 double FixQEqShielded::calculate_H( double r, double gamma )
 { 
   double Taper, denom;
 
   Taper = Tap[7] * r + Tap[6];
   Taper = Taper * r + Tap[5];
   Taper = Taper * r + Tap[4];
   Taper = Taper * r + Tap[3];
   Taper = Taper * r + Tap[2];
   Taper = Taper * r + Tap[1];
   Taper = Taper * r + Tap[0];
 
   denom = r * r * r + gamma;
   denom = pow(denom,0.3333333333333);
 
   return Taper * EV_TO_KCAL_PER_MOL / denom;
 
 }
diff --git a/src/QEQ/fix_qeq_slater.cpp b/src/QEQ/fix_qeq_slater.cpp
index acf3cc9f6..7c77c696e 100644
--- a/src/QEQ/fix_qeq_slater.cpp
+++ b/src/QEQ/fix_qeq_slater.cpp
@@ -1,425 +1,425 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Ray Shan (Sandia)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "fix_qeq_slater.h"
 #include "atom.h"
 #include "comm.h"
 #include "domain.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "update.h"
 #include "force.h"
 #include "group.h"
 #include "pair.h"
 #include "kspace.h"
 #include "respa.h"
 #include "math_const.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 using namespace MathConst;
 using namespace FixConst;
 
 /* ---------------------------------------------------------------------- */
 
 FixQEqSlater::FixQEqSlater(LAMMPS *lmp, int narg, char **arg) : 
   FixQEq(lmp, narg, arg) 
 {
   alpha = 0.20;
 
   // optional arg
   int iarg = 8;
   while (iarg < narg) {
     if (strcmp(arg[iarg],"alpha") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix qeq/slater command");
       alpha = atof(arg[iarg+1]);
       iarg += 2;
     } else error->all(FLERR,"Illegal fix qeq/slater command");
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqSlater::init()
 {
   if (!atom->q_flag) 
     error->all(FLERR,"Fix qeq/slater requires atom attribute q");
 
   ngroup = group->count(igroup);
   if (ngroup == 0) error->all(FLERR,"Fix qeq/slater group has no atoms");
 
-  int irequest = neighbor->request(this);
+  int irequest = neighbor->request(this,instance_me);
   neighbor->requests[irequest]->pair = 0;
   neighbor->requests[irequest]->fix  = 1;
   neighbor->requests[irequest]->half = 0;
   neighbor->requests[irequest]->full = 1;
 
   int ntypes = atom->ntypes;
   for (int i = 1; i <= ntypes; i++) {
     if (zeta[i] == 0.0) 
       error->all(FLERR,"Invalid param file for fix qeq/slater");
   }
 
   if (strstr(update->integrate_style,"respa"))
     nlevels_respa = ((Respa *) update->integrate)->nlevels;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqSlater::pre_force(int vflag)
 {
   if (update->ntimestep % nevery) return;
 
   n = atom->nlocal;
   N = atom->nlocal + atom->nghost;
 
   if( atom->nmax > nmax ) reallocate_storage();
 
   if( n > n_cap*DANGER_ZONE || m_fill > m_cap*DANGER_ZONE )
     reallocate_matrix();
 
   init_matvec();
   matvecs = CG(b_s, s);    	// CG on s - parallel
   matvecs += CG(b_t, t); 	// CG on t - parallel
   calculate_Q();
 
   if (force->kspace) force->kspace->qsum_qsq();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqSlater::init_matvec()
 {
   compute_H();
 
   int nn, ii, i;
   int *ilist;
 
   nn = list->inum;
   ilist = list->ilist;
 
   for( ii = 0; ii < nn; ++ii ) {
     i = ilist[ii];
     if (atom->mask[i] & groupbit) {
       Hdia_inv[i] = 1. / eta[ atom->type[i] ];
       b_s[i]      = -( chi[atom->type[i]] + chizj[i] );
       b_t[i]      = -1.0;
       t[i] = t_hist[i][2] + 3 * ( t_hist[i][0] - t_hist[i][1] );
       s[i] = 4*(s_hist[i][0]+s_hist[i][2])-(6*s_hist[i][1]+s_hist[i][3]);
     }
   }
 
   pack_flag = 2;
   comm->forward_comm_fix(this); //Dist_vector( s );
   pack_flag = 3;
   comm->forward_comm_fix(this); //Dist_vector( t );
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqSlater::compute_H()
 {
   int i, j, ii, jj, inum, jnum, itype, jtype;
   int *ilist, *jlist, *numneigh, **firstneigh;
 
   double r, rsq, delr[3];
   double zei, zej, zj, zjtmp;
 
   int *type = atom->type;
   double **x = atom->x;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   m_fill = 0;
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     itype = type[i];
     zei = zeta[itype];
 
     jlist = firstneigh[i];
     jnum = numneigh[i];
     H.firstnbr[i] = m_fill;
     zjtmp = 0.0;
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       j &= NEIGHMASK;
 
       jtype = type[j];
       zej = zeta[jtype];
       zj = zcore[jtype];
 
       delr[0] = x[i][0] - x[j][0];
       delr[1] = x[i][1] - x[j][1];
       delr[2] = x[i][2] - x[j][2];
       rsq = delr[0]*delr[0] + delr[1]*delr[1] + delr[2]*delr[2];
 
       if (rsq > cutoff_sq) continue;
 
       r = sqrt(rsq);
       H.jlist[m_fill] = j;
       H.val[m_fill] = calculate_H(zei, zej, zj, r, zjtmp);
       m_fill++;
     }
     H.numnbrs[i] = m_fill - H.firstnbr[i];
     chizj[i] = zjtmp;
   }
 
   if (m_fill >= H.m) {
     char str[128];
     sprintf(str,"H matrix size has been exceeded: m_fill=%d H.m=%d\n",
              m_fill, H.m );
     error->warning(FLERR,str);
     error->all(FLERR,"Fix qeq/slater has insufficient QEq matrix size");
   }
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 double FixQEqSlater::calculate_H(double zei, double zej, double zj, 
 		double r, double &zjtmp)
 {
   double rinv = 1.0/r;
 
   double exp2zir = exp(-2.0*zei*r);
   double zei2 = zei*zei;
   double zei4 = zei2*zei2;
   double zei6 = zei2*zei4;
 
   double exp2zjr = exp(-2.0*zej*r);
   double zej2 = zej*zej;
   double zej4 = zej2*zej2;
   double zej6 = zej2*zej4;
 
   double sm1 = 11.0/8.0;
   double sm2 = 3.00/4.0;
   double sm3 = 1.00/6.0;
 
   double erfcr = erfc(alpha*r);
   double qqrd2e = force->qqrd2e;
 
   double etmp1, etmp2;
   double e1, e2, e3, e4;
   double ci_jfi, ci_fifj;
 
   e1 = e2 = e3 = e4 = 0.0;
   etmp1 = etmp2 = 0.0;
 
   ci_jfi = -zei*exp2zir - rinv*exp2zir;
 
   if (zei == zej) {
     ci_fifj = -exp2zir*(rinv + zei*(sm1 + sm2*zei*r + sm3*zei2*r*r));
   } else {
     e1 = zei*zej4/((zei+zej)*(zei+zej)*(zei-zej)*(zei-zej));
     e2 = zej*zei4/((zei+zej)*(zei+zej)*(zej-zei)*(zej-zei));
     e3 = (3.0*zei2*zej4-zej6) /
          ((zei+zej)*(zei+zej)*(zei+zej)*(zei-zej)*(zei-zej)*(zei-zej));
     e4 = (3.0*zej2*zei4-zei6) /
          ((zei+zej)*(zei+zej)*(zei+zej)*(zej-zei)*(zej-zei)*(zej-zei));
     ci_fifj = -exp2zir*(e1+e3/r) - exp2zjr*(e2+e4/r);
   }
 
   etmp1 = 1.00 * (ci_jfi - ci_fifj);
   etmp2 = 0.50 * (ci_fifj + erfcr*rinv);
 
   zjtmp += qqrd2e * zj * etmp1;
   return qqrd2e * etmp2;
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 double FixQEqSlater::calculate_H_wolf(double zei, double zej, double zj, 
 		double r, double &zjtmp)
 {
   double rinv = 1.0/r;
 
   double exp2zir = exp(-2.0*zei*r);
   double zei2 = zei*zei;
   double zei4 = zei2*zei2;
   double zei6 = zei2*zei4;
 
   double exp2zjr = exp(-2.0*zej*r);
   double zej2 = zej*zej;
   double zej4 = zej2*zej2;
   double zej6 = zej2*zej4;
 
   double sm1 = 11.0/8.0;
   double sm2 = 3.00/4.0;
   double sm3 = 1.00/6.0;
   double e1, e2, e3, e4;
 
   double rc = cutoff;
   double rcinv = 1.0/rc;
   double rcinv2 = rcinv*rcinv;
   double exp2zirsh = exp(-2.0*zei*rc);
   double exp2zjrsh = exp(-2.0*zej*rc);
 
   double eshift, fshift, ci_jfi, ci_fifj;
   double etmp1, etmp2, etmp3;
 
   double a = alpha;
   double erfcr = erfc(a*r);
   double erfcrc = erfc(a*rc);
 
   double qqrd2e = force->qqrd2e;
 
   etmp1 = etmp2 = etmp3 = 0.0;
   e1 = e2 = e3 = e4 = 0.0;
 
   eshift = -zei*exp2zirsh - rcinv*exp2zirsh;
   fshift = 2.0*zei2*exp2zirsh + rcinv2*exp2zirsh + 2.0*zei*rcinv*exp2zirsh;
 
   ci_jfi = -zei*exp2zir - rinv*exp2zir - eshift - (r-rc)*fshift;
 
   if (zei == zej) {
     eshift = -exp2zirsh*(rcinv + zei*(sm1 + sm2*zei*rc + sm3*zei2*rc*rc));
     ci_fifj = -exp2zir*(rinv + zei*(sm1 + sm2*zei*r + sm3*zei2*r*r))
 	      - eshift - (r-rc)*fshift;
   } else {
     e1 = zei*zej4/((zei+zej)*(zei+zej)*(zei-zej)*(zei-zej));
     e2 = zej*zei4/((zei+zej)*(zei+zej)*(zej-zei)*(zej-zei));
     e3 = (3.0*zei2*zej4-zej6) /
          ((zei+zej)*(zei+zej)*(zei+zej)*(zei-zej)*(zei-zej)*(zei-zej));
     e4 = (3.0*zej2*zei4-zei6) /
          ((zei+zej)*(zei+zej)*(zei+zej)*(zej-zei)*(zej-zei)*(zej-zei));
 
     eshift = -exp2zirsh*(e1+e3/rc) - exp2zjrsh*(e2+e4/rc);
     ci_fifj = -exp2zir*(e1+e3/r) - exp2zjr*(e2+e4/r) 
 	      - eshift - (r-rc)*fshift;
   }
 
   etmp1 = erfcr/r - erfcrc/rc;
   etmp2 = 1.00 * (ci_jfi - ci_fifj);
   etmp3 = 0.50 * (etmp1 + ci_fifj);
   
   zjtmp += qqrd2e * zj * etmp2;
   return qqrd2e * etmp3;
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 int FixQEqSlater::CG( double *b, double *x )
 {
   int  i, j;
   double tmp, alfa, beta, b_norm;
   double sig_old, sig_new;
 
   int nn, jj;
   int *ilist;
 
   nn = list->inum;
   ilist = list->ilist;
 
   pack_flag = 1;
   sparse_matvec( &H, x, q );
   comm->reverse_comm_fix( this ); //Coll_Vector( q );
 
   vector_sum( r , 1.,  b, -1., q, nn );
 
   for( jj = 0; jj < nn; ++jj ) {
     j = ilist[jj];
     if (atom->mask[j] & groupbit)
       d[j] = r[j] * Hdia_inv[j]; //pre-condition
   }
 
   b_norm = parallel_norm( b, nn );
   sig_new = parallel_dot( r, d, nn);
 
   for( i = 1; i < maxiter && sqrt(sig_new) / b_norm > tolerance; ++i ) {
     comm->forward_comm_fix(this); //Dist_vector( d );
     sparse_matvec( &H, d, q );
     comm->reverse_comm_fix(this); //Coll_vector( q );
 
     tmp = parallel_dot( d, q, nn);
     alfa = sig_new / tmp;
 
     vector_add( x, alfa, d, nn );
     vector_add( r, -alfa, q, nn );
 
     // pre-conditioning
     for( jj = 0; jj < nn; ++jj ) {
       j = ilist[jj];
       if (atom->mask[j] & groupbit)
         p[j] = r[j] * Hdia_inv[j];
     }
 
     sig_old = sig_new;
     sig_new = parallel_dot( r, p, nn);
 
     beta = sig_new / sig_old;
     vector_sum( d, 1., p, beta, d, nn );
 
   }
 
   if (i >= maxiter && comm->me == 0) {
     char str[128];
     sprintf(str,"Fix qeq/slater CG convergence failed (%g) after %d iterations "
             "at " BIGINT_FORMAT " step",sqrt(sig_new) / b_norm,i,update->ntimestep);
     error->warning(FLERR,str);
   }
 
   return i;
 }
 
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqSlater::sparse_matvec( sparse_matrix *A, double *x, double *b )
 {
   int i, j, itr_j;
   int nn, NN;
 
   nn = atom->nlocal;
   NN = atom->nlocal + atom->nghost;
 
   double r = cutoff;
   double woself = 0.50*erfc(alpha*r)/r + alpha/MY_PIS;
 
   for( i = 0; i < nn; ++i ) {
     if (atom->mask[i] & groupbit)
       b[i] = (eta[atom->type[i]] - 2.0*force->qqr2e*woself) * x[i];
   }
 
   for( i = nn; i < NN; ++i ) {
     if (atom->mask[i] & groupbit)
       b[i] = 0;
   }
 
   for( i = 0; i < nn; ++i ) {
     if (atom->mask[i] & groupbit) {
       for( itr_j=A->firstnbr[i]; itr_j<A->firstnbr[i]+A->numnbrs[i]; itr_j++) {
         j = A->jlist[itr_j];
         b[i] += A->val[itr_j] * x[j];
         b[j] += A->val[itr_j] * x[i];
       }
     }
   }
 
 }
diff --git a/src/REAX/pair_reax.cpp b/src/REAX/pair_reax.cpp
index f73973ac6..dbf271131 100644
--- a/src/REAX/pair_reax.cpp
+++ b/src/REAX/pair_reax.cpp
@@ -1,1068 +1,1068 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing authors: Aidan Thompson (Sandia, athomps@sandia.gov)
                          Hansohl Cho (MIT, hansohl@mit.edu)
    LAMMPS implementation of the Reactive Force Field (ReaxFF) is based on
      Aidan Thompson's GRASP code
        (General Reactive Atomistic Simulation Program)
      and Ardi Van Duin's original ReaxFF code
 ------------------------------------------------------------------------- */
 
 #include "mpi.h"
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_reax.h"
 #include "pair_reax_fortran.h"
 #include "atom.h"
 #include "update.h"
 #include "force.h"
 #include "comm.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
 #define SMALL 0.0001
 
 /* ---------------------------------------------------------------------- */
 
 PairREAX::PairREAX(LAMMPS *lmp) : Pair(lmp)
 {
   single_enable = 0;
   restartinfo = 0;
   one_coeff = 1;
   manybody_flag = 1;
   no_virial_fdotr_compute = 1;
 
   nextra = 14;
   pvector = new double[nextra];
 
   cutmax = 0.0;
   hbcut = 6.0;
   ihbnew = 1;
   itripstaball = 1;
   iprune = 4;
   ihb = 1;
   chpot = 0;
 
   nmax = 0;
   arow_ptr = NULL;
   ch = NULL;
   elcvec = NULL;
   rcg = NULL;
   wcg = NULL;
   pcg = NULL;
   poldcg = NULL;
   qcg = NULL;
 
   matmax = 0;
   aval = NULL;
   acol_ind = NULL;
 
   comm_forward = 1;
   comm_reverse = 1;
 
   precision = 1.0e-6;
 }
 
 /* ----------------------------------------------------------------------
    free all arrays
    check if allocated, since class can be destructed when incomplete
 ------------------------------------------------------------------------- */
 
 PairREAX::~PairREAX()
 {
   delete [] pvector;
 
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
 
     for (int i = 1; i <= atom->ntypes; i++)
       delete [] param_list[i].params;
     delete [] param_list;
 
     delete [] map;
   }
 
   memory->destroy(arow_ptr);
   memory->destroy(ch);
   memory->destroy(elcvec);
   memory->destroy(rcg);
   memory->destroy(wcg);
   memory->destroy(pcg);
   memory->destroy(poldcg);
   memory->destroy(qcg);
 
   memory->destroy(aval);
   memory->destroy(acol_ind);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairREAX::compute(int eflag, int vflag)
 {
   int i,j;
   double evdwl,ecoul;
   double energy_charge_equilibration;
 
   evdwl = ecoul = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else ev_unset();
 
   if (vflag_global) FORTRAN(cbkvirial, CBKVIRIAL).Lvirial = 1;
   else FORTRAN(cbkvirial, CBKVIRIAL).Lvirial = 0;
 
   if (vflag_atom) FORTRAN(cbkvirial, CBKVIRIAL).Latomvirial = 1;
   else FORTRAN(cbkvirial, CBKVIRIAL).Latomvirial = 0;
 
   // reallocate charge equilibration and CG arrays if necessary
 
   if (atom->nmax > nmax) {
     memory->destroy(rcg);
     memory->destroy(wcg);
     memory->destroy(pcg);
     memory->destroy(poldcg);
     memory->destroy(qcg);
 
     nmax = atom->nmax;
     int n = nmax+1;
 
     memory->create(arow_ptr,n,"reax:arow_ptr");
     memory->create(ch,n,"reax:ch");
     memory->create(elcvec,n,"reax:elcvec");
     memory->create(rcg,n,"reax:rcg");
     memory->create(wcg,n,"reax:wcg");
     memory->create(pcg,n,"reax:pcg");
     memory->create(poldcg,n,"reax:poldcg");
     memory->create(qcg,n,"reax:qcg");
   }
 
   // calculate the atomic charge distribution
 
   compute_charge(energy_charge_equilibration);
 
   // transfer LAMMPS positions and neighbor lists to REAX
 
   write_reax_positions();
   write_reax_vlist();
 
   // determine whether this bond is owned by the processor or not
 
   FORTRAN(srtbon1, SRTBON1)(&iprune, &ihb, &hbcut, &ihbnew, &itripstaball);
 
   // communicate with other processors for the atomic bond order calculations
 
   FORTRAN(cbkabo, CBKABO).abo;
 
   // communicate local atomic bond order to ghost atomic bond order
 
   packflag = 0;
   comm->forward_comm_pair(this);
 
   FORTRAN(molec, MOLEC)();
   FORTRAN(encalc, ENCALC)();
   FORTRAN(mdsav, MDSAV)(&comm->me);
 
   // read forces from ReaxFF Fortran
 
   read_reax_forces();
 
   // extract global and per-atom energy from ReaxFF Fortran
   // compute_charge already contributed to eatom
 
   if (eflag_global) {
     evdwl += FORTRAN(cbkenergies, CBKENERGIES).eb;
     evdwl += FORTRAN(cbkenergies, CBKENERGIES).ea;
     evdwl += FORTRAN(cbkenergies, CBKENERGIES).elp;
     evdwl += FORTRAN(cbkenergies, CBKENERGIES).emol;
     evdwl += FORTRAN(cbkenergies, CBKENERGIES).ev;
     evdwl += FORTRAN(cbkenergies, CBKENERGIES).epen;
     evdwl += FORTRAN(cbkenergies, CBKENERGIES).ecoa;
     evdwl += FORTRAN(cbkenergies, CBKENERGIES).ehb;
     evdwl += FORTRAN(cbkenergies, CBKENERGIES).et;
     evdwl += FORTRAN(cbkenergies, CBKENERGIES).eco;
     evdwl += FORTRAN(cbkenergies, CBKENERGIES).ew;
     evdwl += FORTRAN(cbkenergies, CBKENERGIES).efi;
 
     ecoul += FORTRAN(cbkenergies, CBKENERGIES).ep;
     ecoul += energy_charge_equilibration;
 
     eng_vdwl += evdwl;
     eng_coul += ecoul;
 
     // Store the different parts of the energy
     // in a list for output by compute pair command
 
     pvector[0] = FORTRAN(cbkenergies, CBKENERGIES).eb;
     pvector[1] = FORTRAN(cbkenergies, CBKENERGIES).ea;
     pvector[2] = FORTRAN(cbkenergies, CBKENERGIES).elp;
     pvector[3] = FORTRAN(cbkenergies, CBKENERGIES).emol;
     pvector[4] = FORTRAN(cbkenergies, CBKENERGIES).ev;
     pvector[5] = FORTRAN(cbkenergies, CBKENERGIES).epen;
     pvector[6] = FORTRAN(cbkenergies, CBKENERGIES).ecoa;
     pvector[7] = FORTRAN(cbkenergies, CBKENERGIES).ehb;
     pvector[8] = FORTRAN(cbkenergies, CBKENERGIES).et;
     pvector[9] = FORTRAN(cbkenergies, CBKENERGIES).eco;
     pvector[10] = FORTRAN(cbkenergies, CBKENERGIES).ew;
     pvector[11] = FORTRAN(cbkenergies, CBKENERGIES).ep;
     pvector[12] = FORTRAN(cbkenergies, CBKENERGIES).efi;
     pvector[13] = energy_charge_equilibration;
 
   }
 
   if (eflag_atom) {
     int ntotal = atom->nlocal + atom->nghost;
     for (i = 0; i < ntotal; i++)
       eatom[i] += FORTRAN(cbkd,CBKD).estrain[i];
   }
 
   // extract global and per-atom virial from ReaxFF Fortran
 
   if (vflag_global) {
     virial[0] = -FORTRAN(cbkvirial, CBKVIRIAL).virial[0];
     virial[1] = -FORTRAN(cbkvirial, CBKVIRIAL).virial[1];
     virial[2] = -FORTRAN(cbkvirial, CBKVIRIAL).virial[2];
     virial[3] = -FORTRAN(cbkvirial, CBKVIRIAL).virial[3];
     virial[4] = -FORTRAN(cbkvirial, CBKVIRIAL).virial[4];
     virial[5] = -FORTRAN(cbkvirial, CBKVIRIAL).virial[5];
   }
 
   if (vflag_atom) {
     int ntotal = atom->nlocal + atom->nghost;
     j = 0;
     for (i = 0; i < ntotal; i++) {
       vatom[i][0] = -FORTRAN(cbkvirial, CBKVIRIAL).atomvirial[j+0];
       vatom[i][1] = -FORTRAN(cbkvirial, CBKVIRIAL).atomvirial[j+1];
       vatom[i][2] = -FORTRAN(cbkvirial, CBKVIRIAL).atomvirial[j+2];
       vatom[i][3] = -FORTRAN(cbkvirial, CBKVIRIAL).atomvirial[j+3];
       vatom[i][4] = -FORTRAN(cbkvirial, CBKVIRIAL).atomvirial[j+4];
       vatom[i][5] = -FORTRAN(cbkvirial, CBKVIRIAL).atomvirial[j+5];
       j += 6;
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairREAX::write_reax_positions()
 {
   int j, jx, jy, jz, jia;
 
   double **x = atom->x;
   double *q = atom->q;
   int *type = atom->type;
   int *tag = atom->tag;
   int nlocal = atom->nlocal;
   int nghost = atom->nghost;
 
   FORTRAN(rsmall, RSMALL).na = nlocal+nghost;
   FORTRAN(rsmall, RSMALL).na_local = nlocal;
 
   if (nlocal+nghost > ReaxParams::nat)
     error->one(FLERR,"Reax_defs.h setting for NATDEF is too small");
 
   jx = 0;
   jy = ReaxParams::nat;
   jz = 2*ReaxParams::nat;
   jia = 0;
 
   j = 0;
   for (int i = 0; i < nlocal+nghost; i++, j++) {
     FORTRAN(cbkc, CBKC).c[j+jx] = x[i][0];
     FORTRAN(cbkc, CBKC).c[j+jy] = x[i][1];
     FORTRAN(cbkc, CBKC).c[j+jz] = x[i][2];
     FORTRAN(cbkch, CBKCH).ch[j] = q[i];
     FORTRAN(cbkia, CBKIA).ia[j+jia] = map[type[i]];
     FORTRAN(cbkia, CBKIA).iag[j+jia] = map[type[i]];
     FORTRAN(cbkc, CBKC).itag[j] = tag[i];
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairREAX::write_reax_vlist()
 {
   int ii, jj, i, j, iii, jjj;
   double xitmp, yitmp, zitmp;
   double xjtmp, yjtmp, zjtmp;
   int itag,jtag;
   int nvpair, nvlself, nvpairmax;
   int nbond;
   int inum,jnum;
   int *ilist,*jlist,*numneigh,**firstneigh;
   double delr2;
   double delx, dely, delz;
 
   double **x = atom->x;
   int *tag = atom->tag;
   int nlocal = atom->nlocal;
   int nghost = atom->nghost;
 
   nvpairmax = ReaxParams::nneighmax * ReaxParams::nat;
 
   nvpair = 0;
   nvlself =0;
   nbond = 0;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     xitmp = x[i][0];
     yitmp = x[i][1];
     zitmp = x[i][2];
     itag = tag[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       j &= NEIGHMASK;
 
       xjtmp = x[j][0];
       yjtmp = x[j][1];
       zjtmp = x[j][2];
       jtag = tag[j];
 
       delx = xitmp - xjtmp;
       dely = yitmp - yjtmp;
       delz = zitmp - zjtmp;
 
       delr2 = delx*delx+dely*dely+delz*delz;
 
       if (delr2 <= rcutvsq) {
         if (i < j) {
           iii = i+1;
           jjj = j+1;
         } else {
           iii = j+1;
           jjj = i+1;
         }
         if (nvpair >= nvpairmax)
           error->one(FLERR,"Reax_defs.h setting for NNEIGHMAXDEF is too small");
 
         FORTRAN(cbkpairs, CBKPAIRS).nvl1[nvpair] = iii;
         FORTRAN(cbkpairs, CBKPAIRS).nvl2[nvpair] = jjj;
         FORTRAN(cbknvlbo, CBKNVLBO).nvlbo[nvpair] = 0;
 
         if (delr2 <= rcutbsq) {
           FORTRAN(cbknvlbo, CBKNVLBO).nvlbo[nvpair] = 1;
           nbond++;
         }
 
         FORTRAN(cbknvlown, CBKNVLOWN).nvlown[nvpair] = 0;
 
         if (j < nlocal)
           FORTRAN(cbknvlown, CBKNVLOWN).nvlown[nvpair] = 1;
         else if (itag < jtag)
           FORTRAN(cbknvlown, CBKNVLOWN).nvlown[nvpair] = 1;
         else if (itag == jtag) {
           if (delz > SMALL)
             FORTRAN(cbknvlown, CBKNVLOWN).nvlown[nvpair] = 1;
           else if (fabs(delz) < SMALL) {
             if (dely > SMALL)
               FORTRAN(cbknvlown, CBKNVLOWN).nvlown[nvpair] = 1;
             else if (fabs(dely) < SMALL && delx > SMALL)
               FORTRAN(cbknvlown, CBKNVLOWN).nvlown[nvpair] = 1;
           }
         }
         nvpair++;
       }
     }
   }
 
   int ntotal = nlocal + nghost;
 
   for (int i = nlocal; i < ntotal; i++) {
     xitmp = x[i][0];
     yitmp = x[i][1];
     zitmp = x[i][2];
     itag = tag[i];
 
     for (int j = i+1; j < ntotal; j++) {
       xjtmp = x[j][0];
       yjtmp = x[j][1];
       zjtmp = x[j][2];
       jtag = tag[j];
 
       delx = xitmp - xjtmp;
       dely = yitmp - yjtmp;
       delz = zitmp - zjtmp;
 
       delr2 = delx*delx+dely*dely+delz*delz;
 
       // don't need to check the double count since i < j in the ghost region
 
       if (delr2 <= rcutvsq) {
         iii = i+1;
         jjj = j+1;
 
         if (nvpair >= nvpairmax)
           error->one(FLERR,"Reax_defs.h setting for NNEIGHMAXDEF is too small");
 
         FORTRAN(cbkpairs, CBKPAIRS).nvl1[nvpair] = iii;
         FORTRAN(cbkpairs, CBKPAIRS).nvl2[nvpair] = jjj;
         FORTRAN(cbknvlbo, CBKNVLBO).nvlbo[nvpair] = 0;
 
         if (delr2 <= rcutbsq) {
           FORTRAN(cbknvlbo, CBKNVLBO).nvlbo[nvpair] = 1;
           nbond++;
         }
 
         FORTRAN(cbknvlown, CBKNVLOWN).nvlown[nvpair] = 0;
         nvpair++;
       }
     }
   }
 
   FORTRAN(cbkpairs, CBKPAIRS).nvpair = nvpair;
   FORTRAN(cbkpairs, CBKPAIRS).nvlself = nvlself;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairREAX::read_reax_forces()
 {
   double ftmp[3];
 
   double **f = atom->f;
   int ntotal = atom->nlocal + atom->nghost;
 
   int j = 0;
   for (int i = 0; i < ntotal; i++) {
     ftmp[0] = -FORTRAN(cbkd, CBKD).d[j];
     ftmp[1] = -FORTRAN(cbkd, CBKD).d[j+1];
     ftmp[2] = -FORTRAN(cbkd, CBKD).d[j+2];
     f[i][0] = ftmp[0];
     f[i][1] = ftmp[1];
     f[i][2] = ftmp[2];
     j += 3;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairREAX::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
   param_list = new ff_params[n+1];
   for (int i = 1; i <= n; i++)
     param_list[i].params = new double[5];
 
   map = new int[n+1];
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairREAX::settings(int narg, char **arg)
 {
   if (narg != 0 && narg !=4) error->all(FLERR,"Illegal pair_style command");
 
   if (narg == 4) {
     hbcut = force->numeric(FLERR,arg[0]);
     ihbnew = static_cast<int> (force->numeric(FLERR,arg[1]));
     itripstaball = static_cast<int> (force->numeric(FLERR,arg[2]));
     precision = force->numeric(FLERR,arg[3]);
 
     if (hbcut <= 0.0 ||
         (ihbnew != 0 && ihbnew != 1) ||
         (itripstaball != 0 && itripstaball != 1) ||
         precision <= 0.0)
       error->all(FLERR,"Illegal pair_style command");
   }
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairREAX::coeff(int narg, char **arg)
 {
   if (!allocated) allocate();
 
   if (narg != 3 + atom->ntypes)
     error->all(FLERR,"Incorrect args for pair coefficients");
 
   // insure I,J args are * *
 
   if (strcmp(arg[0],"*") != 0 || strcmp(arg[1],"*") != 0)
     error->all(FLERR,"Incorrect args for pair coefficients");
 
   // insure filename is ffield.reax
 
   if (strcmp(arg[2],"ffield.reax") != 0)
     error->all(FLERR,"Incorrect args for pair coefficients");
 
   // read args that map atom types to elements in potential file
   // map[i] = which element the Ith atom type is, -1 if NULL
   // NOTE: for now throw an error if NULL is used to disallow use with hybrid
   //       qEq matrix solver needs to be modified to exclude atoms
 
   for (int i = 3; i < narg; i++) {
     if (strcmp(arg[i],"NULL") == 0) {
       map[i-2] = -1;
       error->all(FLERR,"Cannot currently use pair reax with pair hybrid");
       continue;
     }
     map[i-2] = force->inumeric(FLERR,arg[i]);
   }
 
   int n = atom->ntypes;
 
   int count = 0;
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++) {
       setflag[i][j] = 1;
       count++;
     }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairREAX::init_style()
 {
   if (atom->tag_enable == 0)
     error->all(FLERR,"Pair style reax requires atom IDs");
   if (force->newton_pair == 0)
     error->all(FLERR,"Pair style reax requires newton pair on");
   if (!atom->q_flag)
     error->all(FLERR,"Pair style reax requires atom attribute q");
   if (strcmp(update->unit_style,"real") != 0 && comm->me == 0)
     error->warning(FLERR,"Not using real units with pair reax");
 
-  int irequest = neighbor->request(this);
+  int irequest = neighbor->request(this,instance_me);
   neighbor->requests[irequest]->newton = 2;
 
   FORTRAN(readc, READC)();
   FORTRAN(reaxinit, REAXINIT)();
   FORTRAN(ffinpt, FFINPT)();
   FORTRAN(tap7th, TAP7TH)();
 
   // turn off read_in by fort.3 in REAX Fortran
 
   int ngeofor_tmp = -1;
   FORTRAN(setngeofor, SETNGEOFOR)(&ngeofor_tmp);
   if (comm->me == 0) FORTRAN(readgeo, READGEO)();
 
   // initial setup for cutoff radius of VLIST and BLIST in ReaxFF
 
   double vlbora;
 
   FORTRAN(getswb, GETSWB)(&swb);
   cutmax=MAX(swb, hbcut);
   rcutvsq=cutmax*cutmax;
   FORTRAN(getvlbora, GETVLBORA)(&vlbora);
   rcutbsq=vlbora*vlbora;
 
   // parameters for charge equilibration from ReaxFF input, fort.4
   // verify that no LAMMPS type to REAX type mapping was invalid
 
   int nelements;
   FORTRAN(getnso, GETNSO)(&nelements);
 
   FORTRAN(getswa, GETSWA)(&swa);
   double chi, eta, gamma;
   for (int itype = 1; itype <= atom->ntypes; itype++) {
     if (map[itype] < 1 || map[itype] > nelements)
       error->all(FLERR,"Invalid REAX atom type");
     chi = FORTRAN(cbkchb, CBKCHB).chi[map[itype]-1];
     eta = FORTRAN(cbkchb, CBKCHB).eta[map[itype]-1];
     gamma = FORTRAN(cbkchb, CBKCHB).gam[map[itype]-1];
     param_list[itype].np = 5;
     param_list[itype].rcutsq = cutmax;
     param_list[itype].params[0] = chi;
     param_list[itype].params[1] = eta;
     param_list[itype].params[2] = gamma;
     param_list[itype].params[3] = swa;
     param_list[itype].params[4] = swb;
   }
 
   taper_setup();
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairREAX::init_one(int i, int j)
 {
   return cutmax;
 }
 
 /* ---------------------------------------------------------------------- */
 
 int PairREAX::pack_forward_comm(int n, int *list, double *buf, 
                                 int pbc_flag, int *pbc)
 {
   int i,j,m;
 
   m = 0;
 
   if (packflag == 0) {
     for (i = 0; i < n; i++) {
       j = list[i];
       buf[m++] = FORTRAN(cbkabo, CBKABO).abo[j];
     }
 
   } else {
     for (i = 0; i < n; i++) {
       j = list[i];
       buf[m++] = wcg[j];
     }
   }
 
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairREAX::unpack_forward_comm(int n, int first, double *buf)
 {
   int i,m,last;
 
   m = 0;
   last = first + n;
 
   if (packflag == 0) {
     for (i = first; i < last; i++)
       FORTRAN(cbkabo, CBKABO).abo[i] = buf[m++];
 
   } else {
     for (i = first; i < last; i++)
       wcg[i] = buf[m++];
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 int PairREAX::pack_reverse_comm(int n, int first, double *buf)
 {
   int i,m,last;
 
   m = 0;
   last = first + n;
   for (i = first; i < last; i++)
     buf[m++] = wcg[i];
 
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairREAX::unpack_reverse_comm(int n, int *list, double *buf)
 {
   int i,j,m;
 
   m = 0;
   for (i = 0; i < n; i++) {
     j = list[i];
     wcg[j] += buf[m++];
   }
 }
 
 /* ----------------------------------------------------------------------
    charge equilibration routines
 ------------------------------------------------------------------------- */
 
 /* ---------------------------------------------------------------------- */
 
 void PairREAX::taper_setup()
 {
   double swb2,swa2,swb3,swa3,d1,d7;
 
   d1=swb-swa;
   d7=pow(d1,7.0);
   swa2=swa*swa;
   swa3=swa2*swa;
   swb2=swb*swb;
   swb3=swb2*swb;
 
   swc7=  20.0e0/d7;
   swc6= -70.0e0*(swa+swb)/d7;
   swc5=  84.0e0*(swa2+3.0e0*swa*swb+swb2)/d7;
   swc4= -35.0e0*(swa3+9.0e0*swa2*swb+9.0e0*swa*swb2+swb3)/d7;
   swc3= 140.0e0*(swa3*swb+3.0e0*swa2*swb2+swa*swb3)/d7;
   swc2=-210.0e0*(swa3*swb2+swa2*swb3)/d7;
   swc1= 140.0e0*swa3*swb3/d7;
   swc0=(-35.0e0*swa3*swb2*swb2+21.0e0*swa2*swb3*swb2+
         7.0e0*swa*swb3*swb3+swb3*swb3*swb)/d7;
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairREAX::taper_E(const double &r, const double &r2)
 {
   double r3=r2*r;
   return swc7*r3*r3*r+swc6*r3*r3+swc5*r3*r2+swc4*r2*r2+swc3*r3+swc2*r2+
      swc1*r+swc0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairREAX::taper_F(const double &r, const double &r2)
 {
   double r3=r2*r;
   return 7.0e0*swc7*r3*r3+6.0e0*swc6*r3*r2+5.0e0*swc5*r2*r2+
     4.0e0*swc4*r3+3.0e0*swc3*r2+2.0e0*swc2*r+swc1;
 }
 
 /* ----------------------------------------------------------------------
    compute current charge distributions based on the charge equilibration
 ------------------------------------------------------------------------- */
 
 void PairREAX::compute_charge(double &energy_charge_equilibration)
 {
   double xitmp, yitmp, zitmp;
   double xjtmp, yjtmp, zjtmp;
   int itype, jtype, itag, jtag;
   int ii, jj, i, j;
   double delr2, delr_norm, gamt, hulp1, hulp2;
   double delx, dely, delz;
   double qsum,qi;
   int nmatentries;
   double sw;
   int inum,jnum;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   double **x = atom->x;
   double *q = atom->q;
   int *type = atom->type;
   int *tag = atom->tag;
 
   int nlocal = atom->nlocal;
   int nghost = atom->nghost;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // realloc neighbor based arrays if necessary
 
   int numneigh_total = 0;
   for (ii = 0; ii < inum; ii++)
     numneigh_total += numneigh[ilist[ii]];
 
   if (numneigh_total + 2*nlocal > matmax) {
     memory->destroy(aval);
     memory->destroy(acol_ind);
     matmax = numneigh_total + 2*nlocal;
     memory->create(aval,matmax,"reax:aval");
     memory->create(acol_ind,matmax,"reax:acol_ind");
   }
 
   // build linear system
 
   nmatentries = 0;
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     xitmp = x[i][0];
     yitmp = x[i][1];
     zitmp = x[i][2];
     itype = type[i];
     itag = tag[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     arow_ptr[i] = nmatentries;
     aval[nmatentries] = 2.0*param_list[itype].params[1];
     acol_ind[nmatentries] = i;
     nmatentries++;
 
     aval[nmatentries] = 1.0;
     acol_ind[nmatentries] = nlocal + nghost;
     nmatentries++;
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       j &= NEIGHMASK;
 
       xjtmp = x[j][0];
       yjtmp = x[j][1];
       zjtmp = x[j][2];
       jtype = type[j];
       jtag = tag[j];
 
       delx = xitmp - xjtmp;
       dely = yitmp - yjtmp;
       delz = zitmp - zjtmp;
 
       delr2 = delx*delx+dely*dely+delz*delz;
 
       // avoid counting local-ghost pair twice since
       // ReaxFF uses half neigh list with newton off
 
       if (j >= nlocal) {
         if (itag > jtag) {
           if ((itag+jtag) % 2 == 0) continue;
         } else if (itag < jtag) {
           if ((itag+jtag) % 2 == 1) continue;
         } else {
           if (zjtmp < zitmp) continue;
           if (zjtmp == zitmp && yjtmp < yitmp) continue;
           if (zjtmp == zitmp && yjtmp == yitmp && xjtmp < xitmp) continue;
         }
       }
 
       // rcutvsq = cutmax*cutmax, in ReaxFF
 
       if (delr2 <= rcutvsq) {
         gamt = sqrt(param_list[itype].params[2]*param_list[jtype].params[2]);
         delr_norm = sqrt(delr2);
         sw = taper_E(delr_norm, delr2);
         hulp1=(delr_norm*delr2+(1.0/(gamt*gamt*gamt)));
         hulp2=sw*14.40/cbrt(hulp1);
         aval[nmatentries] = hulp2;
         acol_ind[nmatentries] = j;
         nmatentries++;
       }
     }
   }
 
   // in this case, we don't use Midpoint method
   // so, we don't need to consider ghost-ghost interactions
   // but, need to fill the arow_ptr[] arrays for the ghost atoms
 
   for (i = nlocal; i < nlocal+nghost; i++)
     arow_ptr[i] = nmatentries;
   arow_ptr[nlocal+nghost] = nmatentries;
 
   // add rhs matentries to linear system
 
   for (ii =0; ii<inum; ii++) {
     i = ilist[ii];
     itype = type[i];
     elcvec[i] = -param_list[itype].params[0];
   }
 
   for (i = nlocal; i < nlocal+nghost; i++) elcvec[i] = 0.0;
 
   // assign current charges to charge vector
 
   qsum = 0.0;
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     qi = q[i];
     ch[i] = qi;
     if (i < nlocal) qsum += qi;
   }
 
   for (i = nlocal; i < nlocal+nghost; i++) {
     qi = q[i];
     ch[i] = qi;
   }
 
   double qtot;
   MPI_Allreduce(&qsum,&qtot,1,MPI_DOUBLE,MPI_SUM,world);
   elcvec[nlocal+nghost] = 0.0;
   ch[nlocal+nghost] = chpot;
 
   // solve the linear system using CG sover
 
   charge_reax(nlocal,nghost,ch,aval,acol_ind,arow_ptr,elcvec);
 
   // calculate the charge equilibration energy
 
   energy_charge_equilibration = 0;
 
   // have already updated charge distributions for the current structure
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     itype = type[i];
 
     // 23.02 is the ReaxFF conversion from eV to kcal/mol
     // should really use constants.evfactor ~23.06
     // but that would break consistency with serial ReaxFF code
     // NOTE: this hard-wired real units
     //       if want other units would have to change params[] in file
 
     qi = 23.02 * (param_list[itype].params[0]*ch[i]+
                   param_list[itype].params[1]*ch[i]*ch[i]);
     energy_charge_equilibration += qi;
     if (eflag_atom) eatom[i] += qi;
   }
 
   // copy charge vector back to particles from the calculated values
 
   for (i = 0; i < nlocal+nghost; i++) q[i] = ch[i];
   chpot = ch[nlocal+nghost];
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairREAX::charge_reax(const int & nlocal, const int & nghost,
                            double ch[], double aval[], int acol_ind[],
                            int arow_ptr[], double elcvec[])
 {
   cg_solve(nlocal,nghost,aval,acol_ind,arow_ptr,ch,elcvec);
 }
 
 /* ----------------------------------------------------------------------
    CG solver for linear systems
 ------------------------------------------------------------------------- */
 
 void PairREAX::cg_solve(const int & nlocal, const int & nghost,
                         double aval[], int acol_ind[], int arow_ptr[],
                         double x[], double b[])
 {
   double one, zero, rho, rho_old, alpha, beta, gamma;
   int iter, maxiter;
   int n;
   double sumtmp;
 
   // parallel CG method by A. P. Thompson
   // distributed (partial) vectors: b, r, q, A
   // accumulated (full) vectors: x, w, p
   // r = b-A.x
   // w = r            (ReverseComm + Comm)
 
   double *r = rcg;
   double *w = wcg;
   double *p = pcg;
   double *p_old = poldcg;
   double *q = qcg;
 
   n = nlocal+nghost+1;
 
   one = 1.0;
   zero = 0.0;
   maxiter = 100;
 
   for (int i = 0; i < n; i++) w[i] = 0;
 
   // construct r = b-Ax
 
   sparse_product(n, nlocal, nghost, aval, acol_ind, arow_ptr, x, r);
 
   // not using BLAS library
 
   for (int i=0; i<n; i++) {
     r[i] = b[i] - r[i];
     w[i] = r[i];
   }
 
   packflag = 1;
   comm->reverse_comm_pair(this);
   comm->forward_comm_pair(this);
 
   MPI_Allreduce(&w[n-1], &sumtmp, 1, MPI_DOUBLE, MPI_SUM, world);
   w[n-1] = sumtmp;
   rho_old = one;
 
   for (iter = 1; iter < maxiter; iter++) {
     rho = 0.0;
     for (int i=0; i<nlocal; i++) rho += w[i]*w[i];
 
     MPI_Allreduce(&rho, &sumtmp, 1, MPI_DOUBLE, MPI_SUM, world);
     rho = sumtmp + w[n-1]*w[n-1];
     if (rho < precision) break;
 
     for (int i = 0; i<n; i++) p[i] = w[i];
 
     if (iter > 1) {
       beta = rho/rho_old;
       for (int i = 0; i<n; i++) p[i] += beta*p_old[i];
     }
 
     sparse_product(n, nlocal, nghost, aval, acol_ind, arow_ptr, p, q);
 
     gamma = 0.0;
     for (int i=0; i<n; i++) gamma += p[i]*q[i];
     MPI_Allreduce(&gamma, &sumtmp, 1, MPI_DOUBLE, MPI_SUM, world);
 
     gamma = sumtmp;
     alpha = rho/gamma;
 
     for (int i=0; i<n; i++) {
       x[i] += alpha*p[i];
       r[i] -= alpha*q[i];
       w[i] = r[i];
     }
 
     comm->reverse_comm_pair(this);
     comm->forward_comm_pair(this);
 
     MPI_Allreduce(&w[n-1], &sumtmp, 1, MPI_DOUBLE, MPI_SUM, world);
     w[n-1] = sumtmp;
 
     for (int i=0; i<n; i++) p_old[i] = p[i];
     rho_old = rho;
   }
 }
 
 /* ----------------------------------------------------------------------
    sparse maxtrix operations
 ------------------------------------------------------------------------- */
 
 void PairREAX::sparse_product(const int &n, const int &nlocal,
                               const int &nghost,
                               double aval[], int acol_ind[], int arow_ptr[],
                               double *x, double *r)
 {
   int i,j,jj;
 
   for (i=0; i<n; i++) r[i] = 0.0;
 
   for (i=0; i<nlocal; i++) {
     r[i] += aval[arow_ptr[i]]*x[i];
     for (j=arow_ptr[i]+1; j<arow_ptr[i+1]; j++) {
       jj = acol_ind[j];
       r[i] += aval[j]*x[jj];
       r[jj] += aval[j]*x[i];
     }
   }
 
   for (i=nlocal; i<nlocal+nghost; i++)
     for (j=arow_ptr[i]; j<arow_ptr[i+1]; j++) {
       jj = acol_ind[j];
       r[i] += aval[j]*x[jj];
       r[jj] += aval[j]*x[i];
     }
 }
 
 /* ----------------------------------------------------------------------
    memory usage of local atom-based arrays
 ------------------------------------------------------------------------- */
 
 double PairREAX::memory_usage()
 {
   double bytes = nmax * sizeof(int);
   bytes += 7 * nmax * sizeof(double);
   bytes += matmax * sizeof(int);
   bytes += matmax * sizeof(double);
   return bytes;
 }
diff --git a/src/SNAP/compute_sna_atom.cpp b/src/SNAP/compute_sna_atom.cpp
index 15640c5f7..128eca00a 100644
--- a/src/SNAP/compute_sna_atom.cpp
+++ b/src/SNAP/compute_sna_atom.cpp
@@ -1,279 +1,279 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under 
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 #include "sna.h"
 #include "string.h"
 #include "stdlib.h"
 #include "compute_sna_atom.h"
 #include "atom.h"
 #include "update.h"
 #include "modify.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "force.h"
 #include "pair.h"
 #include "comm.h"
 #include "memory.h"
 #include "error.h"
 #include "openmp_snap.h"
 
 using namespace LAMMPS_NS;
 
 ComputeSNAAtom::ComputeSNAAtom(LAMMPS *lmp, int narg, char **arg) :
   Compute(lmp, narg, arg)
 {
   double rmin0, rfac0;
   int twojmax, switchflag;
   radelem = NULL;
   wjelem = NULL;
 
   int ntypes = atom->ntypes;
   int nargmin = 6+2*ntypes;
   
   if (narg < nargmin) error->all(FLERR,"Illegal compute sna/atom command");
 
   // default values
 
   diagonalstyle = 0;
   rmin0 = 0.0;
   switchflag = 1;
 
   // offset by 1 to match up with types
 
   memory->create(radelem,ntypes+1,"sna/atom:radelem"); 
   memory->create(wjelem,ntypes+1,"sna/atom:wjelem");
 
   rcutfac = atof(arg[3]);
   rfac0 = atof(arg[4]);
   twojmax = atoi(arg[5]);
 
   for(int i = 0; i < ntypes; i++)
     radelem[i+1] = atof(arg[6+i]);
   for(int i = 0; i < ntypes; i++)
     wjelem[i+1] = atof(arg[6+ntypes+i]);
 
   // construct cutsq
 
   double cut;
   cutmax = 0.0;
   memory->create(cutsq,ntypes+1,ntypes+1,"sna/atom:cutsq");
   for(int i = 1; i <= ntypes; i++) {
     cut = 2.0*radelem[i]*rcutfac;
     if (cut > cutmax) cutmax = cut;
     cutsq[i][i] = cut*cut;
     for(int j = i+1; j <= ntypes; j++) {
       cut = (radelem[i]+radelem[j])*rcutfac;
       cutsq[i][j] = cutsq[j][i] = cut*cut;
     }
   }
 
   // process optional args
 
   int iarg = nargmin;
 
   while (iarg < narg) {
     if (strcmp(arg[iarg],"diagonal") == 0) {
       if (iarg+2 > narg) 
 	error->all(FLERR,"Illegal compute sna/atom command");
       diagonalstyle = atoi(arg[iarg+1]);
       if (diagonalstyle < 0 || diagonalstyle > 3)
 	error->all(FLERR,"Illegal compute sna/atom command");
       iarg += 2;
     } else if (strcmp(arg[iarg],"rmin0") == 0) {
       if (iarg+2 > narg) 
 	error->all(FLERR,"Illegal compute sna/atom command");
       rmin0 = atof(arg[iarg+1]);
       iarg += 2;
     } else if (strcmp(arg[iarg],"switchflag") == 0) {
       if (iarg+2 > narg) 
 	error->all(FLERR,"Illegal compute sna/atom command");
       switchflag = atoi(arg[iarg+1]);
       iarg += 2;
     } else error->all(FLERR,"Illegal compute sna/atom command");
   }
 
   snaptr = new SNA*[comm->nthreads];
 #if defined(_OPENMP)
 #pragma omp parallel default(none) shared(lmp,rfac0,twojmax,rmin0,switchflag)
 #endif
   {
     int tid = omp_get_thread_num();
 
     // always unset use_shared_arrays since it does not work with computes
     snaptr[tid] = new SNA(lmp,rfac0,twojmax,diagonalstyle,
                           0 /*use_shared_arrays*/, rmin0,switchflag);
   }
 
   ncoeff = snaptr[0]->ncoeff;
   peratom_flag = 1;
   size_peratom_cols = ncoeff;
 
   nmax = 0;
   njmax = 0;
   sna = NULL;
   
 }
 
 /* ---------------------------------------------------------------------- */
 
 ComputeSNAAtom::~ComputeSNAAtom()
 {
   memory->destroy(sna);
   memory->destroy(radelem);
   memory->destroy(wjelem);
   memory->destroy(cutsq);
   delete [] snaptr;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputeSNAAtom::init()
 {
   if (force->pair == NULL) 
     error->all(FLERR,"Compute sna/atom requires a pair style be defined");
 
   if (cutmax > force->pair->cutforce) 
     error->all(FLERR,"Compute sna/atom cutoff is longer than pairwise cutoff");
 
   // need an occasional full neighbor list
 
-  int irequest = neighbor->request(this);
+  int irequest = neighbor->request(this,instance_me);
   neighbor->requests[irequest]->pair = 0;
   neighbor->requests[irequest]->compute = 1;
   neighbor->requests[irequest]->half = 0;
   neighbor->requests[irequest]->full = 1;
   neighbor->requests[irequest]->occasional = 1;
 
   int count = 0;
   for (int i = 0; i < modify->ncompute; i++)
     if (strcmp(modify->compute[i]->style,"sna/atom") == 0) count++;
   if (count > 1 && comm->me == 0)
     error->warning(FLERR,"More than one compute sna/atom");
 #if defined(_OPENMP)
 #pragma omp parallel default(none)
 #endif
   {
     int tid = omp_get_thread_num();
     snaptr[tid]->init();
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputeSNAAtom::init_list(int id, NeighList *ptr)
 {
   list = ptr;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputeSNAAtom::compute_peratom()
 {
   invoked_peratom = update->ntimestep;
 
   // grow sna array if necessary
 
   if (atom->nlocal > nmax) {
     memory->destroy(sna);
     nmax = atom->nmax;
     memory->create(sna,nmax,size_peratom_cols,"sna/atom:sna");
     array_atom = sna;
   }
 
   // invoke full neighbor list (will copy or build if necessary)
 
   neighbor->build_one(list);
 
   const int inum = list->inum;
   const int* const ilist = list->ilist;
   const int* const numneigh = list->numneigh;
   int** const firstneigh = list->firstneigh;
   int * const type = atom->type;
 
   // compute sna for each atom in group
   // use full neighbor list to count atoms less than cutoff
 
   double** const x = atom->x;
   const int* const mask = atom->mask;
 
 #if defined(_OPENMP)
 #pragma omp parallel for default(none)
 #endif
   for (int ii = 0; ii < inum; ii++) {
     const int tid = omp_get_thread_num();
     const int i = ilist[ii];
     if (mask[i] & groupbit) {
 
       const double xtmp = x[i][0];
       const double ytmp = x[i][1];
       const double ztmp = x[i][2];
       const int itype = type[i];
       const double radi = radelem[itype];
       const int* const jlist = firstneigh[i];
       const int jnum = numneigh[i];
 
       // insure rij, inside, and typej  are of size jnum
       
       snaptr[tid]->grow_rij(jnum);
 
       // rij[][3] = displacements between atom I and those neighbors
       // inside = indices of neighbors of I within cutoff
       // typej = types of neighbors of I within cutoff
 
       int ninside = 0;
       for (int jj = 0; jj < jnum; jj++) {
 	int j = jlist[jj];
 	j &= NEIGHMASK;
 	
 	const double delx = xtmp - x[j][0];
 	const double dely = ytmp - x[j][1];
 	const double delz = ztmp - x[j][2];
 	const double rsq = delx*delx + dely*dely + delz*delz;
 	int jtype = type[j];
 	if (rsq < cutsq[itype][jtype] && rsq>1e-20) {
 	  snaptr[tid]->rij[ninside][0] = delx;
 	  snaptr[tid]->rij[ninside][1] = dely;
 	  snaptr[tid]->rij[ninside][2] = delz;
 	  snaptr[tid]->inside[ninside] = j;
 	  snaptr[tid]->wj[ninside] = wjelem[jtype];
 	  snaptr[tid]->rcutij[ninside] = (radi+radelem[jtype])*rcutfac;
 	  ninside++;
 	}
       }
 
       snaptr[tid]->compute_ui(ninside);
       snaptr[tid]->compute_zi();
       snaptr[tid]->compute_bi();
       snaptr[tid]->copy_bi2bvec();
       for (int icoeff = 0; icoeff < ncoeff; icoeff++)
 	sna[i][icoeff] = snaptr[tid]->bvec[icoeff];
     } else {
       for (int icoeff = 0; icoeff < ncoeff; icoeff++)
 	sna[i][icoeff] = 0.0;
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    memory usage 
 ------------------------------------------------------------------------- */
 
 double ComputeSNAAtom::memory_usage()
 {
   double bytes = nmax*size_peratom_cols * sizeof(double);
   bytes += 3*njmax*sizeof(double);
   bytes += njmax*sizeof(int);
   bytes += snaptr[0]->memory_usage()*comm->nthreads;
   return bytes;
 }
 
diff --git a/src/SNAP/compute_snad_atom.cpp b/src/SNAP/compute_snad_atom.cpp
index cb0f9e045..51b0d5842 100644
--- a/src/SNAP/compute_snad_atom.cpp
+++ b/src/SNAP/compute_snad_atom.cpp
@@ -1,335 +1,335 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under 
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 #include "sna.h"
 #include "string.h"
 #include "stdlib.h"
 #include "compute_snad_atom.h"
 #include "atom.h"
 #include "update.h"
 #include "modify.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "force.h"
 #include "pair.h"
 #include "comm.h"
 #include "memory.h"
 #include "error.h"
 #include "openmp_snap.h"
 
 using namespace LAMMPS_NS;
 
 ComputeSNADAtom::ComputeSNADAtom(LAMMPS *lmp, int narg, char **arg) :
   Compute(lmp, narg, arg)
 {
   double rfac0, rmin0;
   int twojmax, switchflag;
   radelem = NULL;
   wjelem = NULL;
 
   int ntypes = atom->ntypes;
   int nargmin = 6+2*ntypes;
 
   if (narg < nargmin) error->all(FLERR,"Illegal compute snad/atom command");
 
   // default values
 
   diagonalstyle = 0;
   rmin0 = 0.0;
   switchflag = 1;
 
   // process required arguments
   memory->create(radelem,ntypes+1,"sna/atom:radelem"); // offset by 1 to match up with types
   memory->create(wjelem,ntypes+1,"sna/atom:wjelem");
   rcutfac = atof(arg[3]);
   rfac0 = atof(arg[4]);
   twojmax = atoi(arg[5]);
   for(int i = 0; i < ntypes; i++)
     radelem[i+1] = atof(arg[6+i]);
   for(int i = 0; i < ntypes; i++)
     wjelem[i+1] = atof(arg[6+ntypes+i]);
   // construct cutsq
   double cut;
   memory->create(cutsq,ntypes+1,ntypes+1,"sna/atom:cutsq");
   for(int i = 1; i <= ntypes; i++) {
     cut = 2.0*radelem[i]*rcutfac;
     cutsq[i][i] = cut*cut;
     for(int j = i+1; j <= ntypes; j++) {
       cut = (radelem[i]+radelem[j])*rcutfac;
       cutsq[i][j] = cutsq[j][i] = cut*cut;
     }
   }
   
   // process optional args
 
   int iarg = nargmin;
 
   while (iarg < narg) {
     if (strcmp(arg[iarg],"diagonal") == 0) {
       if (iarg+2 > narg) 
 	error->all(FLERR,"Illegal compute snad/atom command");
       diagonalstyle = atof(arg[iarg+1]);
       if (diagonalstyle < 0 || diagonalstyle > 3)
 	error->all(FLERR,"Illegal compute snad/atom command");
       iarg += 2;
     } else if (strcmp(arg[iarg],"rmin0") == 0) {
       if (iarg+2 > narg)
         error->all(FLERR,"Illegal compute snad/atom command");
         rmin0 = atof(arg[iarg+1]);
         iarg += 2;
     } else if (strcmp(arg[iarg],"switchflag") == 0) {
       if (iarg+2 > narg) 
 	error->all(FLERR,"Illegal compute snad/atom command");
       switchflag = atoi(arg[iarg+1]);
       iarg += 2;
     } else error->all(FLERR,"Illegal compute snad/atom command");
   }
 
   snaptr = new SNA*[comm->nthreads];
 #if defined(_OPENMP)
 #pragma omp parallel default(none) shared(lmp,rfac0,twojmax,rmin0,switchflag)
 #endif
   {
     int tid = omp_get_thread_num();
 
     // always unset use_shared_arrays since it does not work with computes
     snaptr[tid] = new SNA(lmp,rfac0,twojmax,diagonalstyle,
                           0 /*use_shared_arrays*/, rmin0,switchflag);
   }
 
   ncoeff = snaptr[0]->ncoeff;
   peratom_flag = 1;
   size_peratom_cols = 3*ncoeff*atom->ntypes;
   comm_reverse = size_peratom_cols;
   nmax = 0;
   njmax = 0;
   snad = NULL;
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 ComputeSNADAtom::~ComputeSNADAtom()
 {
   memory->destroy(snad);
   memory->destroy(radelem);
   memory->destroy(wjelem);
   memory->destroy(cutsq);
   delete [] snaptr;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputeSNADAtom::init()
 {
   if (force->pair == NULL) 
     error->all(FLERR,"Compute snad/atom requires a pair style be defined");
   // TODO: Not sure what to do with this error check since cutoff radius is not
   // a single number
   //if (sqrt(cutsq) > force->pair->cutforce) 
     //error->all(FLERR,"Compute snad/atom cutoff is longer than pairwise cutoff");
 
   // need an occasional full neighbor list
 
-  int irequest = neighbor->request(this);
+  int irequest = neighbor->request(this,instance_me);
   neighbor->requests[irequest]->pair = 0;
   neighbor->requests[irequest]->compute = 1;
   neighbor->requests[irequest]->half = 0;
   neighbor->requests[irequest]->full = 1;
   neighbor->requests[irequest]->occasional = 1;
 
   int count = 0;
   for (int i = 0; i < modify->ncompute; i++)
     if (strcmp(modify->compute[i]->style,"snad/atom") == 0) count++;
   if (count > 1 && comm->me == 0)
     error->warning(FLERR,"More than one compute snad/atom");
 #if defined(_OPENMP)
 #pragma omp parallel default(none)
 #endif
   {
     int tid = omp_get_thread_num();
     snaptr[tid]->init();
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputeSNADAtom::init_list(int id, NeighList *ptr)
 {
   list = ptr;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputeSNADAtom::compute_peratom()
 {
   int ntotal = atom->nlocal + atom->nghost;
 
   invoked_peratom = update->ntimestep;
 
   // grow snad array if necessary
 
   if (ntotal > nmax) {
     memory->destroy(snad);
     nmax = atom->nmax;
     memory->create(snad,nmax,size_peratom_cols,
 		   "snad/atom:snad");
     array_atom = snad;
   }
 
   // clear local array
 
   for (int i = 0; i < ntotal; i++)
     for (int icoeff = 0; icoeff < size_peratom_cols; icoeff++) {
       snad[i][icoeff] = 0.0;
     }
 
   // invoke full neighbor list (will copy or build if necessary)
 
   neighbor->build_one(list);
 
   const int inum = list->inum;
   const int* const ilist = list->ilist;
   const int* const numneigh = list->numneigh;
   int** const firstneigh = list->firstneigh;
   int * const type = atom->type;
 
   // compute sna derivatives for each atom in group
   // use full neighbor list to count atoms less than cutoff
 
   double** const x = atom->x;
   const int* const mask = atom->mask;
 
 #if defined(_OPENMP)
 #pragma omp parallel for default(none)
 #endif
   for (int ii = 0; ii < inum; ii++) {
     const int tid = omp_get_thread_num();
     const int i = ilist[ii];
     if (mask[i] & groupbit) {
 
       const double xtmp = x[i][0];
       const double ytmp = x[i][1];
       const double ztmp = x[i][2];
       const int itype = type[i];
       const double radi = radelem[itype];
       const int* const jlist = firstneigh[i];
       const int jnum = numneigh[i];
 
       const int typeoffset = 3*ncoeff*(atom->type[i]-1);
 
       // insure rij, inside, and typej  are of size jnum
 
       snaptr[tid]->grow_rij(jnum);
 
       // rij[][3] = displacements between atom I and those neighbors
       // inside = indices of neighbors of I within cutoff
       // typej = types of neighbors of I within cutoff
       // note Rij sign convention => dU/dRij = dU/dRj = -dU/dRi
 
       int ninside = 0;
       for (int jj = 0; jj < jnum; jj++) {
 	int j = jlist[jj];
 	j &= NEIGHMASK;
 	
 	const double delx = x[j][0] - xtmp;
 	const double dely = x[j][1] - ytmp;
 	const double delz = x[j][2] - ztmp;
 	const double rsq = delx*delx + dely*dely + delz*delz;
         int jtype = type[j];	
 	if (rsq < cutsq[itype][jtype]&&rsq>1e-20) {
 	  snaptr[tid]->rij[ninside][0] = delx;
 	  snaptr[tid]->rij[ninside][1] = dely;
 	  snaptr[tid]->rij[ninside][2] = delz;
 	  snaptr[tid]->inside[ninside] = j;
 	  snaptr[tid]->wj[ninside] = wjelem[jtype];
 	  snaptr[tid]->rcutij[ninside] = (radi+radelem[jtype])*rcutfac;
 	  ninside++;
 	}
       }
 
       snaptr[tid]->compute_ui(ninside);
       snaptr[tid]->compute_zi();
 
       for (int jj = 0; jj < ninside; jj++) {
 	const int j = snaptr[tid]->inside[jj];
 	snaptr[tid]->compute_duidrj(snaptr[tid]->rij[jj],
 				    snaptr[tid]->wj[jj],
 				    snaptr[tid]->rcutij[jj]);
 	snaptr[tid]->compute_dbidrj();
 	snaptr[tid]->copy_dbi2dbvec();
 
 	// Accumulate -dBi/dRi, -dBi/dRj
 
 	double *snadi = snad[i]+typeoffset;
 	double *snadj = snad[j]+typeoffset;
 
 	for (int icoeff = 0; icoeff < ncoeff; icoeff++) {
 	  snadi[icoeff] += snaptr[tid]->dbvec[icoeff][0];
 	  snadi[icoeff+ncoeff] += snaptr[tid]->dbvec[icoeff][1];
 	  snadi[icoeff+2*ncoeff] += snaptr[tid]->dbvec[icoeff][2];
 	  snadj[icoeff] -= snaptr[tid]->dbvec[icoeff][0];
 	  snadj[icoeff+ncoeff] -= snaptr[tid]->dbvec[icoeff][1];
 	  snadj[icoeff+2*ncoeff] -= snaptr[tid]->dbvec[icoeff][2];
 	}
       }
     }
   }
 
   // communicate snad contributions between neighbor procs
 
   comm->reverse_comm_compute(this);
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 int ComputeSNADAtom::pack_reverse_comm(int n, int first, double *buf)
 {
   int i,m,last,icoeff;
 
   m = 0;
   last = first + n;
   for (i = first; i < last; i++) 
     for (icoeff = 0; icoeff < size_peratom_cols; icoeff++)
       buf[m++] = snad[i][icoeff];
   return comm_reverse;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputeSNADAtom::unpack_reverse_comm(int n, int *list, double *buf)
 {
   int i,j,m,icoeff;
 
   m = 0;
   for (i = 0; i < n; i++) {
     j = list[i];
     for (icoeff = 0; icoeff < size_peratom_cols; icoeff++)
       snad[j][icoeff] += buf[m++];
   }
 }
 
 /* ----------------------------------------------------------------------
    memory usage 
 ------------------------------------------------------------------------- */
 
 double ComputeSNADAtom::memory_usage()
 {
   double bytes = nmax*size_peratom_cols * sizeof(double);
   bytes += 3*njmax*sizeof(double);
   bytes += njmax*sizeof(int);
   bytes += ncoeff*3;
   bytes += snaptr[0]->memory_usage()*comm->nthreads;
   return bytes;
 }
diff --git a/src/SNAP/compute_snav_atom.cpp b/src/SNAP/compute_snav_atom.cpp
index 4e1eaf130..6394321eb 100644
--- a/src/SNAP/compute_snav_atom.cpp
+++ b/src/SNAP/compute_snav_atom.cpp
@@ -1,345 +1,345 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under 
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 #include "sna.h"
 #include "string.h"
 #include "stdlib.h"
 #include "compute_snav_atom.h"
 #include "atom.h"
 #include "update.h"
 #include "modify.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "force.h"
 #include "pair.h"
 #include "comm.h"
 #include "memory.h"
 #include "error.h"
 #include "openmp_snap.h"
 
 using namespace LAMMPS_NS;
 
 ComputeSNAVAtom::ComputeSNAVAtom(LAMMPS *lmp, int narg, char **arg) :
   Compute(lmp, narg, arg)
 {
   double rfac0, rmin0;
   int twojmax, switchflag;
   radelem = NULL;
   wjelem = NULL;
 
   nvirial = 6;
 
   int ntypes = atom->ntypes;
   int nargmin = 6+2*ntypes;
 
   if (narg < nargmin) error->all(FLERR,"Illegal compute snav/atom command");
 
   // default values
 
   diagonalstyle = 0;
   rmin0 = 0.0;
   switchflag = 1;
 
   // process required arguments
   memory->create(radelem,ntypes+1,"sna/atom:radelem"); // offset by 1 to match up with types
   memory->create(wjelem,ntypes+1,"sna/atom:wjelem");
   rcutfac = atof(arg[3]);
   rfac0 = atof(arg[4]);
   twojmax = atoi(arg[5]);
   for(int i = 0; i < ntypes; i++)
     radelem[i+1] = atof(arg[6+i]);
   for(int i = 0; i < ntypes; i++)
     wjelem[i+1] = atof(arg[6+ntypes+i]);
   // construct cutsq
   double cut;
   memory->create(cutsq,ntypes+1,ntypes+1,"sna/atom:cutsq");
   for(int i = 1; i <= ntypes; i++) {
     cut = 2.0*radelem[i]*rcutfac;
     cutsq[i][i] = cut*cut;
     for(int j = i+1; j <= ntypes; j++) {
       cut = (radelem[i]+radelem[j])*rcutfac;
       cutsq[i][j] = cutsq[j][i] = cut*cut;
     }
   }
 
   // process optional args
 
   int iarg = nargmin;
 
   while (iarg < narg) {
     if (strcmp(arg[iarg],"diagonal") == 0) {
       if (iarg+2 > narg) 
 	error->all(FLERR,"Illegal compute snav/atom command");
       diagonalstyle = atof(arg[iarg+1]);
       if (diagonalstyle < 0 || diagonalstyle > 3)
 	error->all(FLERR,"Illegal compute snav/atom command");
       iarg += 2;
     } else if (strcmp(arg[iarg],"rmin0") == 0) {
       if (iarg+2 > narg) 
 	error->all(FLERR,"Illegal compute snav/atom command");
       rmin0 = atof(arg[iarg+1]);
       iarg += 2;
     } else if (strcmp(arg[iarg],"switchflag") == 0) {
       if (iarg+2 > narg) 
 	error->all(FLERR,"Illegal compute snav/atom command");
       switchflag = atoi(arg[iarg+1]);
       iarg += 2;
     } else error->all(FLERR,"Illegal compute snav/atom command");
   }
 
   snaptr = new SNA*[comm->nthreads];
 #if defined(_OPENMP)
 #pragma omp parallel default(none) shared(lmp,rfac0,twojmax,rmin0,switchflag)
 #endif
   {
     int tid = omp_get_thread_num();
 
     // always unset use_shared_arrays since it does not work with computes
     snaptr[tid] = new SNA(lmp,rfac0,twojmax,diagonalstyle,
                           0 /*use_shared_arrays*/, rmin0,switchflag);
   }
 
   ncoeff = snaptr[0]->ncoeff;
   peratom_flag = 1;
   size_peratom_cols = nvirial*ncoeff*atom->ntypes;
   comm_reverse = size_peratom_cols;
 
   nmax = 0;
   njmax = 0;
   snav = NULL;
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 ComputeSNAVAtom::~ComputeSNAVAtom()
 {
   memory->destroy(snav);
   memory->destroy(radelem);
   memory->destroy(wjelem);
   memory->destroy(cutsq);
 delete [] snaptr;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputeSNAVAtom::init()
 {
   if (force->pair == NULL) 
     error->all(FLERR,"Compute snav/atom requires a pair style be defined");
    // TODO: Not sure what to do with this error check since cutoff radius is not
   // a single number
  //if (sqrt(cutsq) > force->pair->cutforce) 
    // error->all(FLERR,"Compute snav/atom cutoff is longer than pairwise cutoff");
 
   // need an occasional full neighbor list
 
-  int irequest = neighbor->request(this);
+  int irequest = neighbor->request(this,instance_me);
   neighbor->requests[irequest]->pair = 0;
   neighbor->requests[irequest]->compute = 1;
   neighbor->requests[irequest]->half = 0;
   neighbor->requests[irequest]->full = 1;
   neighbor->requests[irequest]->occasional = 1;
 
   int count = 0;
   for (int i = 0; i < modify->ncompute; i++)
     if (strcmp(modify->compute[i]->style,"snav/atom") == 0) count++;
   if (count > 1 && comm->me == 0)
     error->warning(FLERR,"More than one compute snav/atom");
 #if defined(_OPENMP)
 #pragma omp parallel default(none)
 #endif
   {
     int tid = omp_get_thread_num();
     snaptr[tid]->init();
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputeSNAVAtom::init_list(int id, NeighList *ptr)
 {
   list = ptr;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputeSNAVAtom::compute_peratom()
 {
   int ntotal = atom->nlocal + atom->nghost;
 
   invoked_peratom = update->ntimestep;
 
   // grow snav array if necessary
 
   if (ntotal > nmax) {
     memory->destroy(snav);
     nmax = atom->nmax;
     memory->create(snav,nmax,size_peratom_cols,
 		   "snav/atom:snav");
     array_atom = snav;
   }
 
   // clear local array
 
   for (int i = 0; i < ntotal; i++)
     for (int icoeff = 0; icoeff < size_peratom_cols; icoeff++) {
       snav[i][icoeff] = 0.0;
     }
 
   // invoke full neighbor list (will copy or build if necessary)
 
   neighbor->build_one(list);
 
   const int inum = list->inum;
   const int* const ilist = list->ilist;
   const int* const numneigh = list->numneigh;
   int** const firstneigh = list->firstneigh;
   int * const type = atom->type;
   // compute sna derivatives for each atom in group
   // use full neighbor list to count atoms less than cutoff
 
   double** const x = atom->x;
   const int* const mask = atom->mask;
 
 #if defined(_OPENMP)
 #pragma omp parallel for default(none)
 #endif
   for (int ii = 0; ii < inum; ii++) {
     const int tid = omp_get_thread_num();
     const int i = ilist[ii];
     if (mask[i] & groupbit) {
 
       const double xtmp = x[i][0];
       const double ytmp = x[i][1];
       const double ztmp = x[i][2];
       const int itype = type[i];
       const double radi = radelem[itype];
 
       const int* const jlist = firstneigh[i];
       const int jnum = numneigh[i];
 
       const int typeoffset = nvirial*ncoeff*(atom->type[i]-1);
 
       // insure rij, inside, and typej  are of size jnum
 
 	  snaptr[tid]->grow_rij(jnum);
 
       // rij[][3] = displacements between atom I and those neighbors
       // inside = indices of neighbors of I within cutoff
       // typej = types of neighbors of I within cutoff
       // note Rij sign convention => dU/dRij = dU/dRj = -dU/dRi
 
       int ninside = 0;
       for (int jj = 0; jj < jnum; jj++) {
 	int j = jlist[jj];
 	j &= NEIGHMASK;
 	
 	const double delx = x[j][0] - xtmp;
 	const double dely = x[j][1] - ytmp;
 	const double delz = x[j][2] - ztmp;
 	const double rsq = delx*delx + dely*dely + delz*delz;
 	int jtype = type[j];
 	if (rsq < cutsq[itype][jtype]&&rsq>1e-20) {
 	  snaptr[tid]->rij[ninside][0] = delx;
 	  snaptr[tid]->rij[ninside][1] = dely;
 	  snaptr[tid]->rij[ninside][2] = delz;
 	  snaptr[tid]->inside[ninside] = j;
 	  snaptr[tid]->wj[ninside] = wjelem[jtype];
 	  snaptr[tid]->rcutij[ninside] = (radi+radelem[jtype])*rcutfac;
 	  ninside++;
 	}
       }
 
       snaptr[tid]->compute_ui(ninside);
       snaptr[tid]->compute_zi();
 
       for (int jj = 0; jj < ninside; jj++) {
 	const int j = snaptr[tid]->inside[jj];
 
 	snaptr[tid]->compute_duidrj(snaptr[tid]->rij[jj],
 				    snaptr[tid]->wj[jj],
 				    snaptr[tid]->rcutij[jj]);
 	snaptr[tid]->compute_dbidrj();
 	snaptr[tid]->copy_dbi2dbvec();
 
 	// Accumulate -dBi/dRi*Ri, -dBi/dRj*Rj
 
 	double *snavi = snav[i]+typeoffset;
 	double *snavj = snav[j]+typeoffset;
 	
 	for (int icoeff = 0; icoeff < ncoeff; icoeff++) {
 	  snavi[icoeff]          += snaptr[tid]->dbvec[icoeff][0]*xtmp;
 	  snavi[icoeff+ncoeff]   += snaptr[tid]->dbvec[icoeff][1]*ytmp;
 	  snavi[icoeff+2*ncoeff] += snaptr[tid]->dbvec[icoeff][2]*ztmp;
 	  snavi[icoeff+3*ncoeff] += snaptr[tid]->dbvec[icoeff][1]*ztmp;
 	  snavi[icoeff+4*ncoeff] += snaptr[tid]->dbvec[icoeff][0]*ztmp;
 	  snavi[icoeff+5*ncoeff] += snaptr[tid]->dbvec[icoeff][0]*ytmp;
 	  snavj[icoeff]          -= snaptr[tid]->dbvec[icoeff][0]*x[j][0];
 	  snavj[icoeff+ncoeff]   -= snaptr[tid]->dbvec[icoeff][1]*x[j][1];
 	  snavj[icoeff+2*ncoeff] -= snaptr[tid]->dbvec[icoeff][2]*x[j][2];
 	  snavj[icoeff+3*ncoeff] -= snaptr[tid]->dbvec[icoeff][1]*x[j][2];
 	  snavj[icoeff+4*ncoeff] -= snaptr[tid]->dbvec[icoeff][0]*x[j][2];
 	  snavj[icoeff+5*ncoeff] -= snaptr[tid]->dbvec[icoeff][0]*x[j][1];
 	}
       }
     }
   }
 
   // communicate snav contributions between neighbor procs
 
   comm->reverse_comm_compute(this);
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 int ComputeSNAVAtom::pack_reverse_comm(int n, int first, double *buf)
 {
   int i,m,last,icoeff;
 
   m = 0;
   last = first + n;
   for (i = first; i < last; i++) 
     for (icoeff = 0; icoeff < size_peratom_cols; icoeff++)
       buf[m++] = snav[i][icoeff];
   return comm_reverse;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputeSNAVAtom::unpack_reverse_comm(int n, int *list, double *buf)
 {
   int i,j,m,icoeff;
 
   m = 0;
   for (i = 0; i < n; i++) {
     j = list[i];
     for (icoeff = 0; icoeff < size_peratom_cols; icoeff++)
       snav[j][icoeff] += buf[m++];
   }
 }
 
 /* ----------------------------------------------------------------------
    memory usage 
 ------------------------------------------------------------------------- */
 
 double ComputeSNAVAtom::memory_usage()
 {
   double bytes = nmax*size_peratom_cols * sizeof(double);
   bytes += 3*njmax*sizeof(double);
   bytes += njmax*sizeof(int);
   bytes += ncoeff*nvirial;
   bytes += snaptr[0]->memory_usage()*comm->nthreads;
   return bytes;
 }
diff --git a/src/SNAP/pair_snap.cpp b/src/SNAP/pair_snap.cpp
index 7440630bc..f7f15593e 100644
--- a/src/SNAP/pair_snap.cpp
+++ b/src/SNAP/pair_snap.cpp
@@ -1,1741 +1,1741 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_snap.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "force.h"
 #include "comm.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "sna.h"
 #include "memory.h"
 #include "error.h"
 #include "openmp_snap.h"
 #include "domain.h"
 #include <cmath>
 
 using namespace LAMMPS_NS;
 
 #define MAXLINE 1024
 #define MAXWORD 3
 
 /* ---------------------------------------------------------------------- */
 
 PairSNAP::PairSNAP(LAMMPS *lmp) : Pair(lmp)
 {
   single_enable = 0;
   restartinfo = 0;
   one_coeff = 1;
   manybody_flag = 1;
 
   nelements = 0;
   elements = NULL;
   radelem = NULL;
   wjelem = NULL;
   coeffelem = NULL;
 
   nmax = 0;
   nthreads = 1;
 
   schedule_user = 0;
   schedule_time_guided = -1;
   schedule_time_dynamic = -1;
   ncalls_neigh =-1;
 
   ilistmask_max = 0;
   ilistmask = NULL;
   ghostinum = 0;
   ghostilist_max = 0;
   ghostilist = NULL;
   ghostnumneigh_max = 0;
   ghostnumneigh = NULL;
   ghostneighs = NULL;
   ghostfirstneigh = NULL;
   ghostneighs_total = 0;
   ghostneighs_max = 0;
 
   i_max = 0;
   i_neighmax = 0;
   i_numpairs = 0;
   i_rij = NULL;
   i_inside = NULL;
   i_wj = NULL;
   i_rcutij = NULL;
   i_ninside = NULL;
   i_pairs = NULL;
   i_uarraytot_r = NULL;
   i_uarraytot_i = NULL;
   i_zarray_r = NULL;
   i_zarray_i =NULL;
 
   use_shared_arrays = 0;
 
 #ifdef TIMING_INFO
   timers[0] = 0;
   timers[1] = 0;
   timers[2] = 0;
   timers[3] = 0;
 #endif
 
   // Need to set this because restart not handled by PairHybrid
 
   sna = NULL;
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 PairSNAP::~PairSNAP()
 {
   if (nelements) {
     for (int i = 0; i < nelements; i++) 
       delete[] elements[i];
     delete[] elements;
     memory->destroy(radelem);
     memory->destroy(wjelem);
     memory->destroy(coeffelem);
   }
 
   // Need to set this because restart not handled by PairHybrid
 
   if (sna) {
 
 #ifdef TIMING_INFO
     double time[5];
     double timeave[5];
     double timeave_mpi[5];
     double timemax_mpi[5];
     
     for (int i = 0; i < 5; i++) {
       time[i] = 0;
       timeave[i] = 0;
       for (int tid = 0; tid<nthreads; tid++) {
 	if (sna[tid]->timers[i]>time[i])
 	  time[i] = sna[tid]->timers[i];
 	timeave[i] += sna[tid]->timers[i];
       }
       timeave[i] /= nthreads;
     }
     MPI_Reduce(timeave, timeave_mpi, 5, MPI_DOUBLE, MPI_SUM, 0, world);
     MPI_Reduce(time, timemax_mpi, 5, MPI_DOUBLE, MPI_MAX, 0, world);
 #endif
     
     for (int tid = 0; tid<nthreads; tid++)
       delete sna[tid];
     delete [] sna;
 
   }
 
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
     memory->destroy(map);
   }
 
 }
 
 void PairSNAP::compute(int eflag, int vflag)
 {
   if (use_optimized)
     compute_optimized(eflag, vflag);
   else
     compute_regular(eflag, vflag);
 }
 
 /* ----------------------------------------------------------------------
    This version is a straightforward implementation
    ---------------------------------------------------------------------- */
 
 void PairSNAP::compute_regular(int eflag, int vflag)
 {
   int i,j,jnum,ninside;
   double delx,dely,delz,evdwl,rsq;
   double fij[3];
   int *jlist,*numneigh,**firstneigh;
   evdwl = 0.0;
 
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   double **x = atom->x;
   double **f = atom->f;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   int newton_pair = force->newton_pair;
   class SNA* snaptr = sna[0];
 
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   for (int ii = 0; ii < list->inum; ii++) {
     i = list->ilist[ii];
 
     const double xtmp = x[i][0];
     const double ytmp = x[i][1];
     const double ztmp = x[i][2];
     const int itype = type[i];
     const int ielem = map[itype];
     const double radi = radelem[ielem];
 
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     // insure rij, inside, wj, and rcutij are of size jnum
       
     snaptr->grow_rij(jnum);
 
     // rij[][3] = displacements between atom I and those neighbors
     // inside = indices of neighbors of I within cutoff
     // wj = weights for neighbors of I within cutoff
     // rcutij = cutoffs for neighbors of I within cutoff
     // note Rij sign convention => dU/dRij = dU/dRj = -dU/dRi
 
     ninside = 0;
     for (int jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       j &= NEIGHMASK;
       delx = x[j][0] - xtmp;
       dely = x[j][1] - ytmp;
       delz = x[j][2] - ztmp;
       rsq = delx*delx + dely*dely + delz*delz;
       int jtype = type[j];
       int jelem = map[jtype];
       
       if (rsq < cutsq[itype][jtype]&&rsq>1e-20) {
 	snaptr->rij[ninside][0] = delx;
 	snaptr->rij[ninside][1] = dely;
 	snaptr->rij[ninside][2] = delz;
 	snaptr->inside[ninside] = j;
 	snaptr->wj[ninside] = wjelem[jelem];
 	snaptr->rcutij[ninside] = (radi + radelem[jelem])*rcutfac;
 	ninside++;
       }
     }
 
     // compute Ui, Zi, and Bi for atom I
     
     snaptr->compute_ui(ninside);
     snaptr->compute_zi();
     if (!gammaoneflag) {
       snaptr->compute_bi();
       snaptr->copy_bi2bvec();
     }
 
     // for neighbors of I within cutoff:
     // compute dUi/drj and dBi/drj
     // Fij = dEi/dRj = -dEi/dRi => add to Fi, subtract from Fj
 
     double* coeffi = coeffelem[ielem];
 
     for (int jj = 0; jj < ninside; jj++) {
       int j = snaptr->inside[jj];
       snaptr->compute_duidrj(snaptr->rij[jj],
 			     snaptr->wj[jj],snaptr->rcutij[jj]);
 
       snaptr->compute_dbidrj();
       snaptr->copy_dbi2dbvec();
 
       fij[0] = 0.0;
       fij[1] = 0.0;
       fij[2] = 0.0;
 
       for (int k = 1; k <= ncoeff; k++) {
 	double bgb;
 	if (gammaoneflag) 
 	  bgb = coeffi[k];
 	else bgb = coeffi[k]*
 	       gamma*pow(snaptr->bvec[k-1],gamma-1.0);
 	fij[0] += bgb*snaptr->dbvec[k-1][0];
 	fij[1] += bgb*snaptr->dbvec[k-1][1];
 	fij[2] += bgb*snaptr->dbvec[k-1][2];
       }
 
       f[i][0] += fij[0];
       f[i][1] += fij[1];
       f[i][2] += fij[2];
       f[j][0] -= fij[0];
       f[j][1] -= fij[1];
       f[j][2] -= fij[2];
 
       if (evflag)
 	ev_tally_xyz(i,j,nlocal,newton_pair,0.0,0.0,
 		     fij[0],fij[1],fij[2],
 		     snaptr->rij[jj][0],snaptr->rij[jj][1],
 		     snaptr->rij[jj][2]);
     }
 
     if (eflag) {
 	
       // evdwl = energy of atom I, sum over coeffs_k * Bi_k
 
       evdwl = coeffi[0];
       if (gammaoneflag) {
 	snaptr->compute_bi();
 	snaptr->copy_bi2bvec();
 	for (int k = 1; k <= ncoeff; k++)
 	  evdwl += coeffi[k]*snaptr->bvec[k-1];
       } else
       	for (int k = 1; k <= ncoeff; k++)
       	  evdwl += coeffi[k]*pow(snaptr->bvec[k-1],gamma);
       ev_tally_full(i,2.0*evdwl,0.0,0.0,delx,dely,delz);
     }
 
   }
   
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 
 /* ----------------------------------------------------------------------
    This version is optimized for threading, micro-load balancing
    ---------------------------------------------------------------------- */
 
 void PairSNAP::compute_optimized(int eflag, int vflag)
 {
   // if reneighboring took place do load_balance if requested
   if (do_load_balance > 0 &&
       (neighbor->ncalls != ncalls_neigh)) {
     ghostinum = 0;
     // reset local ghost neighbor lists
     ncalls_neigh = neighbor->ncalls;
     if (ilistmask_max < list->inum) {
       memory->grow(ilistmask,list->inum,"PairSnap::ilistmask");
       ilistmask_max = list->inum;
     }
     for (int i = 0; i < list->inum; i++)
       ilistmask[i] = 1;
 
     //multiple passes for loadbalancing
     for (int i = 0; i < do_load_balance; i++)
       load_balance();
   }
 
   int numpairs = 0;
   for (int ii = 0; ii < list->inum; ii++) {
     if ((do_load_balance <= 0) || ilistmask[ii]) {
       int i = list->ilist[ii];
       int jnum = list->numneigh[i];
       numpairs += jnum;
     }
   }
 
   if (do_load_balance)
     for (int ii = 0; ii < ghostinum; ii++) {
       int i = ghostilist[ii];
       int jnum = ghostnumneigh[i];
       numpairs += jnum;
     }
 
   // optimized schedule setting
 
   int time_dynamic = 0;
   int time_guided = 0;
 
   if (schedule_user == 0) schedule_user = 4;
 
   switch (schedule_user) {
   case 1:
     omp_set_schedule(omp_sched_static,1);
     break;
   case 2:
     omp_set_schedule(omp_sched_dynamic,1);
     break;
   case 3:
     omp_set_schedule(omp_sched_guided,2);
     break;
   case 4:
     omp_set_schedule(omp_sched_auto,0);
     break;
   case 5:
     if (numpairs < 8*nthreads) omp_set_schedule(omp_sched_dynamic,1);
     else if (schedule_time_guided < 0.0) {
       omp_set_schedule(omp_sched_guided,2);
       if (!eflag && !vflag) time_guided = 1;
     } else if (schedule_time_dynamic<0.0) {
       omp_set_schedule(omp_sched_dynamic,1);
       if (!eflag && !vflag) time_dynamic = 1;
     } else if (schedule_time_guided<schedule_time_dynamic)
       omp_set_schedule(omp_sched_guided,2);
     else
       omp_set_schedule(omp_sched_dynamic,1);
     break;
   }
 
   if (use_shared_arrays)
     build_per_atom_arrays();
 
 #if defined(_OPENMP)
 #pragma omp parallel shared(eflag,vflag,time_dynamic,time_guided) firstprivate(numpairs) default(none)
 #endif
   {
     // begin of pragma omp parallel
 
     int tid = omp_get_thread_num();
     int** pairs_tid_unique = NULL;
 
     int** pairs;
     if (use_shared_arrays) pairs = i_pairs;
     else {
       memory->create(pairs_tid_unique,numpairs,4,"numpairs");
       pairs = pairs_tid_unique;
     }
 
     if (!use_shared_arrays) {
       numpairs = 0;
       for (int ii = 0; ii < list->inum; ii++) {
         if ((do_load_balance <= 0) || ilistmask[ii]) {
           int i = list->ilist[ii];
           int jnum = list->numneigh[i];
           for (int jj = 0; jj<jnum; jj++) {
             pairs[numpairs][0] = i;
             pairs[numpairs][1] = jj;
             pairs[numpairs][2] = -1;
             numpairs++;
           }
         }
       }
 
       for (int ii = 0; ii < ghostinum; ii++) {
         int i = ghostilist[ii];
         int jnum = ghostnumneigh[i];
         for (int jj = 0; jj<jnum; jj++) {
           pairs[numpairs][0] = i;
           pairs[numpairs][1] = jj;
           pairs[numpairs][2] = -1;
           numpairs++;
         }
       }
     }
 
     int ielem;
     int jj,k,jnum,jtype,ninside;
     double delx,dely,delz,evdwl,rsq;
     double fij[3];
     int *jlist,*numneigh,**firstneigh;
     evdwl = 0.0;
 
 #if defined(_OPENMP)
 #pragma omp master
 #endif
     {
       if (eflag || vflag) ev_setup(eflag,vflag);
       else evflag = vflag_fdotr = 0;
     }
 
 #if defined(_OPENMP)
 #pragma omp barrier
     { ; }
 #endif
 
     double **x = atom->x;
     double **f = atom->f;
     int *type = atom->type;
     int nlocal = atom->nlocal;
     int newton_pair = force->newton_pair;
 
     numneigh = list->numneigh;
     firstneigh = list->firstneigh;
 
 #ifdef TIMING_INFO
     // only update micro timers after setup
     static int count=0;
     if (count<2) {
       sna[tid]->timers[0] = 0;
       sna[tid]->timers[1] = 0;
       sna[tid]->timers[2] = 0;
       sna[tid]->timers[3] = 0;
       sna[tid]->timers[4] = 0;
     }
     count++;
 #endif
 
     // did thread start working on interactions of new atom
     int iold = -1;
 
     double starttime, endtime;
     if (time_dynamic || time_guided)
       starttime = MPI_Wtime();
 
 #if defined(_OPENMP)
 #pragma omp for schedule(runtime)
 #endif
     for (int iijj = 0; iijj < numpairs; iijj++) {
       int i = 0;
       if (use_shared_arrays) {
         i = i_pairs[iijj][0];
         if (iold != i) {
           set_sna_to_shared(tid,i_pairs[iijj][3]);
 	  ielem = map[type[i]];
 	}
         iold = i;
       } else {
         i = pairs[iijj][0];
         if (iold != i) {
           iold = i;
           const double xtmp = x[i][0];
           const double ytmp = x[i][1];
           const double ztmp = x[i][2];
           const int itype = type[i];
 	  ielem = map[itype];
 	  const double radi = radelem[ielem];
 
           if (i < nlocal) {
             jlist = firstneigh[i];
             jnum = numneigh[i];
           } else {
             jlist = ghostneighs+ghostfirstneigh[i];
             jnum = ghostnumneigh[i];
           }
 
           // insure rij, inside, wj, and rcutij are of size jnum
 
           sna[tid]->grow_rij(jnum);
 
           // rij[][3] = displacements between atom I and those neighbors
           // inside = indices of neighbors of I within cutoff
           // wj = weights of neighbors of I within cutoff
           // rcutij = cutoffs of neighbors of I within cutoff
           // note Rij sign convention => dU/dRij = dU/dRj = -dU/dRi
 
           ninside = 0;
           for (jj = 0; jj < jnum; jj++) {
             int j = jlist[jj];
             j &= NEIGHMASK;
             delx = x[j][0] - xtmp; //unitialised
             dely = x[j][1] - ytmp;
             delz = x[j][2] - ztmp;
             rsq = delx*delx + dely*dely + delz*delz;
             jtype = type[j];
 	    int jelem = map[jtype];
 
             if (rsq < cutsq[itype][jtype]&&rsq>1e-20) { //unitialised
               sna[tid]->rij[ninside][0] = delx;
               sna[tid]->rij[ninside][1] = dely;
               sna[tid]->rij[ninside][2] = delz;
               sna[tid]->inside[ninside] = j;
               sna[tid]->wj[ninside] = wjelem[jelem];
               sna[tid]->rcutij[ninside] = (radi + radelem[jelem])*rcutfac;
 	      ninside++;
 
               // update index list with inside index
               pairs[iijj + (jj - pairs[iijj][1])][2] =
                 ninside-1; //unitialised
             }
           }
 
           // compute Ui and Zi for atom I
 
           sna[tid]->compute_ui(ninside); //unitialised
           sna[tid]->compute_zi();
         }
       }
 
       // for neighbors of I within cutoff:
       // compute dUi/drj and dBi/drj
       // Fij = dEi/dRj = -dEi/dRi => add to Fi, subtract from Fj
 
       // entry into loop if inside index is set
 
       double* coeffi = coeffelem[ielem];
 
       if (pairs[iijj][2] >= 0) {
         jj = pairs[iijj][2];
         int j = sna[tid]->inside[jj];
         sna[tid]->compute_duidrj(sna[tid]->rij[jj],
 				 sna[tid]->wj[jj],sna[tid]->rcutij[jj]);
 
         sna[tid]->compute_dbidrj();
         sna[tid]->copy_dbi2dbvec();
 	if (!gammaoneflag) {
 	  sna[tid]->compute_bi();
 	  sna[tid]->copy_bi2bvec();
 	}
 
         fij[0] = 0.0;
         fij[1] = 0.0;
         fij[2] = 0.0;
 
         for (k = 1; k <= ncoeff; k++) {
 	  double bgb;
 	  if (gammaoneflag) 
 	    bgb = coeffi[k];
 	  else bgb = coeffi[k]*
 		 gamma*pow(sna[tid]->bvec[k-1],gamma-1.0);
 	  fij[0] += bgb*sna[tid]->dbvec[k-1][0];
 	  fij[1] += bgb*sna[tid]->dbvec[k-1][1];
 	  fij[2] += bgb*sna[tid]->dbvec[k-1][2];
         }
 
 #if defined(_OPENMP)
 #pragma omp critical
 #endif
         {
           f[i][0] += fij[0];
           f[i][1] += fij[1];
           f[i][2] += fij[2];
           f[j][0] -= fij[0];
           f[j][1] -= fij[1];
           f[j][2] -= fij[2];
           if (evflag)
             ev_tally_xyz(i,j,nlocal,newton_pair,0.0,0.0,
                          fij[0],fij[1],fij[2],
                          sna[tid]->rij[jj][0],sna[tid]->rij[jj][1],
                          sna[tid]->rij[jj][2]);
         }
       }
 
       // evdwl = energy of atom I, sum over coeffs_k * Bi_k
       // only call this for first pair of each atom i
       // if atom has no pairs, eatom=0, which is wrong
 
       if (eflag&&pairs[iijj][1] == 0) {
 	evdwl = coeffi[0];
 	if (gammaoneflag) {
 	  sna[tid]->compute_bi();
 	  sna[tid]->copy_bi2bvec();
 	  for (int k = 1; k <= ncoeff; k++)
 	    evdwl += coeffi[k]*sna[tid]->bvec[k-1];
 	} else
 	  for (int k = 1; k <= ncoeff; k++)
 	    evdwl += coeffi[k]*pow(sna[tid]->bvec[k-1],gamma);
 
 #if defined(_OPENMP)
 #pragma omp critical
 #endif
         ev_tally_full(i,2.0*evdwl,0.0,0.0,delx,dely,delz);
       }
 
     }
     if (time_dynamic || time_guided)
       endtime = MPI_Wtime();
     if (time_dynamic) schedule_time_dynamic = endtime - starttime;
     if (time_guided) schedule_time_guided = endtime - starttime;
     if (!use_shared_arrays) memory->destroy(pairs);
 
   }// end of pragma omp parallel
 
   if (vflag_fdotr) virial_fdotr_compute();
 
 }
 
 inline int PairSNAP::equal(double* x,double* y)
 {
   double dist2 =
     (x[0]-y[0])*(x[0]-y[0]) +
     (x[1]-y[1])*(x[1]-y[1]) +
     (x[2]-y[2])*(x[2]-y[2]);
   if (dist2 < 1e-20) return 1;
   return 0;
 }
 
 inline double PairSNAP::dist2(double* x,double* y)
 {
   return
     (x[0]-y[0])*(x[0]-y[0]) +
     (x[1]-y[1])*(x[1]-y[1]) +
     (x[2]-y[2])*(x[2]-y[2]);
 }
 
 // return extra communication cutoff
 // extra_cutoff = max(subdomain_length)
 
 double PairSNAP::extra_cutoff()
 {
   double sublo[3],subhi[3];
 
   if (domain->triclinic == 0) {
     for (int dim = 0 ; dim < 3 ; dim++) {
       sublo[dim] = domain->sublo[dim];
       subhi[dim] = domain->subhi[dim];
     }
   } else {
     domain->lamda2x(domain->sublo_lamda,sublo);
     domain->lamda2x(domain->subhi_lamda,subhi);
   }
 
   double sub_size[3];
   for (int dim = 0; dim < 3; dim++)
     sub_size[dim] = subhi[dim] - sublo[dim];
 
   double max_sub_size = 0;
   for (int dim = 0; dim < 3; dim++)
     max_sub_size = MAX(max_sub_size,sub_size[dim]);
 
   // note: for triclinic, probably need something different
   // see Comm::setup()
 
   return max_sub_size;
 }
 
 // micro load_balancer: each MPI process will
 // check with each of its 26 neighbors,
 // whether an imbalance exists in the number
 // of atoms to calculate forces for.
 // If it does it will set ilistmask of one of
 // its local atoms to zero, and send its Tag
 // to the neighbor process. The neighboring process
 // will check its ghost list for the
 // ghost atom with the same Tag which is closest
 // to its domain center, and build a
 // neighborlist for this ghost atom. For this to work,
 // the communication cutoff has to be
 // as large as the neighbor cutoff +
 // maximum subdomain length.
 
 // Note that at most one atom is exchanged per processor pair.
 
 // Also note that the local atom assignement
 // doesn't change. This load balancer will cause
 // some ghost atoms to have full neighborlists
 // which are unique to PairSNAP.
 // They are not part of the generally accessible neighborlist.
 // At the same time corresponding local atoms on
 // other MPI processes will not be
 // included in the force computation since
 // their ilistmask is 0. This does not effect
 // any other classes which might
 // access the same general neighborlist.
 // Reverse communication (newton on) of forces is required.
 
 // Currently the load balancer does two passes,
 // since its exchanging atoms upstream and downstream.
 
 void PairSNAP::load_balance()
 {
   double sublo[3],subhi[3];
   if (domain->triclinic == 0) {
     double* sublotmp = domain->sublo;
     double* subhitmp = domain->subhi;
     for (int dim = 0 ; dim<3 ; dim++) {
       sublo[dim]=sublotmp[dim];
       subhi[dim]=subhitmp[dim];
     }
   } else {
     double* sublotmp = domain->sublo_lamda;
     double* subhitmp = domain->subhi_lamda;
     domain->lamda2x(sublotmp,sublo);
     domain->lamda2x(subhitmp,subhi);
   }
 
   //if (list->inum==0) list->grow(atom->nmax);
 
   int nlocal = ghostinum;
   for (int i=0; i < list->inum; i++)
     if (ilistmask[i]) nlocal++;
   int ***grid2proc = comm->grid2proc;
   int* procgrid = comm->procgrid;
 
   int nlocal_up,nlocal_down;
   MPI_Request request;
 
   double sub_mid[3];
   for (int dim=0; dim<3; dim++)
     sub_mid[dim] = (subhi[dim] + sublo[dim])/2;
 
   if (comm->cutghostuser <
       neighbor->cutneighmax+extra_cutoff())
     error->all(FLERR,"Communication cutoff is too small "
                "for SNAP micro load balancing.\n"
                "Typically this can happen, if you change "
                "the neighbor skin after your pair_style "
                "command or if your box dimensions grow "
                "during the run.\n"
                "You need to set it via "
                "'communicate single cutoff NUMBER' "
                "to the needed length.");
 
   int nrecv = ghostinum;
   int totalsend = 0;
   int nsend = 0;
   int depth = 1;
 
   for (int dx = -depth; dx < depth+1; dx++)
     for (int dy = -depth; dy < depth+1; dy++)
       for (int dz = -depth; dz < depth+1; dz++) {
 
         if (dx == dy && dy == dz && dz == 0) continue;
 
         int sendloc[3] = {comm->myloc[0],
                           comm->myloc[1], comm->myloc[2]
                          };
         sendloc[0] += dx;
         sendloc[1] += dy;
         sendloc[2] += dz;
         for (int dim = 0; dim < 3; dim++)
           if (sendloc[dim] >= procgrid[dim])
             sendloc[dim] = sendloc[dim] - procgrid[dim];
         for (int dim = 0; dim < 3; dim++)
           if (sendloc[dim] < 0)
             sendloc[dim] = procgrid[dim] + sendloc[dim];
         int recvloc[3] = {comm->myloc[0],
                           comm->myloc[1], comm->myloc[2]
                          };
         recvloc[0] -= dx;
         recvloc[1] -= dy;
         recvloc[2] -= dz;
         for (int dim = 0; dim < 3; dim++)
           if (recvloc[dim] < 0)
             recvloc[dim] = procgrid[dim] + recvloc[dim];
         for (int dim = 0; dim < 3; dim++)
           if (recvloc[dim] >= procgrid[dim])
             recvloc[dim] = recvloc[dim] - procgrid[dim];
 
         int sendproc = grid2proc[sendloc[0]][sendloc[1]][sendloc[2]];
         int recvproc = grid2proc[recvloc[0]][recvloc[1]][recvloc[2]];
 
         // two stage process, first upstream movement, then downstream
 
         MPI_Sendrecv(&nlocal,1,MPI_INT,sendproc,0,
                      &nlocal_up,1,MPI_INT,recvproc,0,world,MPI_STATUS_IGNORE);
         MPI_Sendrecv(&nlocal,1,MPI_INT,recvproc,0,
                      &nlocal_down,1,MPI_INT,sendproc,0,world,MPI_STATUS_IGNORE);
         nsend = 0;
 
         // send upstream
 
         if (nlocal > nlocal_up+1) {
 
           int i = totalsend++;
           while(i < list->inum && ilistmask[i] == 0)
             i = totalsend++;
 
           if (i < list->inum)
             MPI_Isend(&atom->tag[i],1,MPI_INT,recvproc,0,world,&request);
           else {
             int j = -1;
             MPI_Isend(&j,1,MPI_INT,recvproc,0,world,&request);
           }
 
           if (i < list->inum) {
             for (int j = 0; j < list->inum; j++)
               if (list->ilist[j] == i)
                 ilistmask[j] = 0;
             nsend = 1;
           }
         }
 
         // recv downstream
 
         if (nlocal < nlocal_down-1) {
           nlocal++;
           int get_tag = -1;
           MPI_Recv(&get_tag,1,MPI_INT,sendproc,0,world,MPI_STATUS_IGNORE);
 
           // if get_tag -1 the other process didnt have local atoms to send
 
           if (get_tag >= 0) {
             if (ghostinum >= ghostilist_max) {
               memory->grow(ghostilist,ghostinum+10,
                            "PairSnap::ghostilist");
               ghostilist_max = ghostinum+10;
             }
             if (atom->nlocal + atom->nghost >= ghostnumneigh_max) {
               ghostnumneigh_max = atom->nlocal+atom->nghost+100;
               memory->grow(ghostnumneigh,ghostnumneigh_max,
                            "PairSnap::ghostnumneigh");
               memory->grow(ghostfirstneigh,ghostnumneigh_max,
                            "PairSnap::ghostfirstneigh");
             }
 
             // find closest ghost image of the transfered particle
 
             double mindist = 1e200;
             int closestghost = -1;
             for (int j = 0; j < atom->nlocal + atom->nghost; j++)
               if (atom->tag[j] == get_tag)
                 if (dist2(sub_mid, atom->x[j]) < mindist) {
                   closestghost = j;
                   mindist = dist2(sub_mid, atom->x[j]);
                 }
 
             // build neighborlist for this particular
             // ghost atom, and add it to list->ilist
 
             if (ghostneighs_max - ghostneighs_total <
                 neighbor->oneatom) {
               memory->grow(ghostneighs,
                            ghostneighs_total + neighbor->oneatom,
                            "PairSnap::ghostneighs");
               ghostneighs_max = ghostneighs_total + neighbor->oneatom;
             }
 
             int j = closestghost;
 
             ghostilist[ghostinum] = j;
             ghostnumneigh[j] = 0;
             ghostfirstneigh[j] = ghostneighs_total;
 
             ghostinum++;
             int* jlist = ghostneighs + ghostfirstneigh[j];
 
             // find all neighbors by looping
             // over all local and ghost atoms
 
             for (int k = 0; k < atom->nlocal + atom->nghost; k++)
               if (dist2(atom->x[j],atom->x[k]) <
                   neighbor->cutneighmax*neighbor->cutneighmax) {
                 jlist[ghostnumneigh[j]] = k;
                 ghostnumneigh[j]++;
                 ghostneighs_total++;
               }
           }
 
           if (get_tag >= 0) nrecv++;
         }
 
         // decrease nlocal later, so that it is the
         // initial number both for receiving and sending
 
         if (nsend) nlocal--;
 
         // second pass through the grid
 
         MPI_Sendrecv(&nlocal,1,MPI_INT,sendproc,0,
                      &nlocal_up,1,MPI_INT,recvproc,0,world,MPI_STATUS_IGNORE);
         MPI_Sendrecv(&nlocal,1,MPI_INT,recvproc,0,
                      &nlocal_down,1,MPI_INT,sendproc,0,world,MPI_STATUS_IGNORE);
 
         // send downstream
 
         nsend=0;
         if (nlocal > nlocal_down+1) {
           int i = totalsend++;
           while(i < list->inum && ilistmask[i]==0) i = totalsend++;
 
           if (i < list->inum)
             MPI_Isend(&atom->tag[i],1,MPI_INT,sendproc,0,world,&request);
           else {
             int j =- 1;
             MPI_Isend(&j,1,MPI_INT,sendproc,0,world,&request);
           }
 
           if (i < list->inum) {
             for (int j=0; j<list->inum; j++)
               if (list->ilist[j] == i) ilistmask[j] = 0;
             nsend = 1;
           }
         }
 
         // receive upstream
 
         if (nlocal < nlocal_up-1) {
           nlocal++;
           int get_tag = -1;
 
           MPI_Recv(&get_tag,1,MPI_INT,recvproc,0,world,MPI_STATUS_IGNORE);
 
           if (get_tag >= 0) {
             if (ghostinum >= ghostilist_max) {
               memory->grow(ghostilist,ghostinum+10,
                            "PairSnap::ghostilist");
               ghostilist_max = ghostinum+10;
             }
             if (atom->nlocal + atom->nghost >= ghostnumneigh_max) {
               ghostnumneigh_max = atom->nlocal + atom->nghost + 100;
               memory->grow(ghostnumneigh,ghostnumneigh_max,
                            "PairSnap::ghostnumneigh");
               memory->grow(ghostfirstneigh,ghostnumneigh_max,
                            "PairSnap::ghostfirstneigh");
             }
 
             // find closest ghost image of the transfered particle
 
             double mindist = 1e200;
             int closestghost = -1;
             for (int j = 0; j < atom->nlocal + atom->nghost; j++)
               if (atom->tag[j] == get_tag)
                 if (dist2(sub_mid,atom->x[j])<mindist) {
                   closestghost = j;
                   mindist = dist2(sub_mid,atom->x[j]);
                 }
 
             // build neighborlist for this particular ghost atom
 
             if (ghostneighs_max-ghostneighs_total < neighbor->oneatom) {
               memory->grow(ghostneighs,ghostneighs_total + neighbor->oneatom,
                            "PairSnap::ghostneighs");
               ghostneighs_max = ghostneighs_total + neighbor->oneatom;
             }
 
             int j = closestghost;
 
             ghostilist[ghostinum] = j;
             ghostnumneigh[j] = 0;
             ghostfirstneigh[j] = ghostneighs_total;
 
             ghostinum++;
             int* jlist = ghostneighs + ghostfirstneigh[j];
 
             for (int k = 0; k < atom->nlocal + atom->nghost; k++)
               if (dist2(atom->x[j],atom->x[k]) <
                   neighbor->cutneighmax*neighbor->cutneighmax) {
                 jlist[ghostnumneigh[j]] = k;
                 ghostnumneigh[j]++;
                 ghostneighs_total++;
               }
           }
 
           if (get_tag >= 0) nrecv++;
         }
         if (nsend) nlocal--;
       }
 }
 
 void PairSNAP::set_sna_to_shared(int snaid,int i)
 {
   sna[snaid]->rij = i_rij[i];
   sna[snaid]->inside = i_inside[i];
   sna[snaid]->wj = i_wj[i];
   sna[snaid]->rcutij = i_rcutij[i];
   sna[snaid]->zarray_r = i_zarray_r[i];
   sna[snaid]->zarray_i = i_zarray_i[i];
   sna[snaid]->uarraytot_r = i_uarraytot_r[i];
   sna[snaid]->uarraytot_i = i_uarraytot_i[i];
 }
 
 void PairSNAP::build_per_atom_arrays()
 {
 
 #ifdef TIMING_INFO
   clock_gettime(CLOCK_REALTIME,&starttime);
 #endif
 
   int count = 0;
   int neighmax = 0;
   for (int ii = 0; ii < list->inum; ii++)
     if ((do_load_balance <= 0) || ilistmask[ii]) {
       neighmax=MAX(neighmax,list->numneigh[list->ilist[ii]]);
       ++count;
     }
   for (int ii = 0; ii < ghostinum; ii++) {
     neighmax=MAX(neighmax,ghostnumneigh[ghostilist[ii]]);
     ++count;
   }
 
   if (i_max < count || i_neighmax < neighmax) {
     int i_maxt = MAX(count,i_max);
     i_neighmax = MAX(neighmax,i_neighmax);
     memory->destroy(i_rij);
     memory->destroy(i_inside);
     memory->destroy(i_wj);
     memory->destroy(i_rcutij);
     memory->destroy(i_ninside);
     memory->destroy(i_pairs);
     memory->create(i_rij,i_maxt,i_neighmax,3,"PairSNAP::i_rij");
     memory->create(i_inside,i_maxt,i_neighmax,"PairSNAP::i_inside");
     memory->create(i_wj,i_maxt,i_neighmax,"PairSNAP::i_wj");
     memory->create(i_rcutij,i_maxt,i_neighmax,"PairSNAP::i_rcutij");
     memory->create(i_ninside,i_maxt,"PairSNAP::i_ninside");
     memory->create(i_pairs,i_maxt*i_neighmax,4,"PairSNAP::i_pairs");
   }
 
   if (i_max < count) {
     int jdim = sna[0]->twojmax+1;
     memory->destroy(i_uarraytot_r);
     memory->destroy(i_uarraytot_i);
     memory->create(i_uarraytot_r,count,jdim,jdim,jdim,
                    "PairSNAP::i_uarraytot_r");
     memory->create(i_uarraytot_i,count,jdim,jdim,jdim,
                    "PairSNAP::i_uarraytot_i");
     if (i_zarray_r != NULL)
       for (int i = 0; i < i_max; i++) {
         memory->destroy(i_zarray_r[i]);
         memory->destroy(i_zarray_i[i]);
       }
 
     delete [] i_zarray_r;
     delete [] i_zarray_i;
     i_zarray_r = new double*****[count];
     i_zarray_i = new double*****[count];
     for (int i = 0; i < count; i++) {
       memory->create(i_zarray_r[i],jdim,jdim,jdim,jdim,jdim,
                      "PairSNAP::i_zarray_r");
       memory->create(i_zarray_i[i],jdim,jdim,jdim,jdim,jdim,
                      "PairSNAP::i_zarray_i");
     }
   }
 
   if (i_max < count)
     i_max = count;
 
   count = 0;
   i_numpairs = 0;
   for (int ii = 0; ii < list->inum; ii++) {
     if ((do_load_balance <= 0) || ilistmask[ii]) {
       int i = list->ilist[ii];
       int jnum = list->numneigh[i];
       int* jlist = list->firstneigh[i];
       const double xtmp = atom->x[i][0];
       const double ytmp = atom->x[i][1];
       const double ztmp = atom->x[i][2];
       const int itype = atom->type[i];
       const int ielem = map[itype];
       const double radi = radelem[ielem];
       int ninside = 0;
       for (int jj = 0; jj < jnum; jj++) {
         int j = jlist[jj];
         j &= NEIGHMASK;
         const double delx = atom->x[j][0] - xtmp;
         const double dely = atom->x[j][1] - ytmp;
         const double delz = atom->x[j][2] - ztmp;
         const double rsq = delx*delx + dely*dely + delz*delz;
         int jtype = atom->type[j];
 	int jelem = map[jtype];
 
         i_pairs[i_numpairs][0] = i;
         i_pairs[i_numpairs][1] = jj;
         i_pairs[i_numpairs][2] = -1;
         i_pairs[i_numpairs][3] = count;
         if (rsq < cutsq[itype][jtype]&&rsq>1e-20) {
           i_rij[count][ninside][0] = delx;
           i_rij[count][ninside][1] = dely;
           i_rij[count][ninside][2] = delz;
           i_inside[count][ninside] = j;
           i_wj[count][ninside] = wjelem[jelem];
           i_rcutij[count][ninside] = (radi + radelem[jelem])*rcutfac;
 
           // update index list with inside index
           i_pairs[i_numpairs][2] = ninside++;
         }
         i_numpairs++;
       }
       i_ninside[count] = ninside;
       count++;
     }
   }
 
   for (int ii = 0; ii < ghostinum; ii++) {
     int i = ghostilist[ii];
     int jnum = ghostnumneigh[i];
     int* jlist = ghostneighs+ghostfirstneigh[i];
     const double xtmp = atom->x[i][0];
     const double ytmp = atom->x[i][1];
     const double ztmp = atom->x[i][2];
     const int itype = atom->type[i];
     const int ielem = map[itype];
     const double radi = radelem[ielem];
     int ninside = 0;
 
     for (int jj = 0; jj < jnum; jj++) {
       int j = jlist[jj];
       j &= NEIGHMASK;
       const double delx = atom->x[j][0] - xtmp;
       const double dely = atom->x[j][1] - ytmp;
       const double delz = atom->x[j][2] - ztmp;
       const double rsq = delx*delx + dely*dely + delz*delz;
       int jtype = atom->type[j];
       int jelem = map[jtype];
 
       i_pairs[i_numpairs][0] = i;
       i_pairs[i_numpairs][1] = jj;
       i_pairs[i_numpairs][2] = -1;
       i_pairs[i_numpairs][3] = count;
       if (rsq < cutsq[itype][jtype]&&rsq>1e-20) {
         i_rij[count][ninside][0] = delx;
         i_rij[count][ninside][1] = dely;
         i_rij[count][ninside][2] = delz;
         i_inside[count][ninside] = j;
         i_wj[count][ninside] = wjelem[jelem];
         i_rcutij[count][ninside] = (radi + radelem[jelem])*rcutfac;
         // update index list with inside index
         i_pairs[i_numpairs][2] = ninside++;
       }
       i_numpairs++;
     }
     i_ninside[count] = ninside;
     count++;
   }
 #ifdef TIMING_INFO
   clock_gettime(CLOCK_REALTIME,&endtime);
   timers[0]+=(endtime.tv_sec-starttime.tv_sec+1.0*
               (endtime.tv_nsec-starttime.tv_nsec)/1000000000);
 #endif
 #ifdef TIMING_INFO
   clock_gettime(CLOCK_REALTIME,&starttime);
 #endif
 
 #if defined(_OPENMP)
 #pragma omp parallel for shared(count) default(none)
 #endif
   for (int ii=0; ii < count; ii++) {
     int tid = omp_get_thread_num();
     set_sna_to_shared(tid,ii);
     //sna[tid]->compute_ui(i_ninside[ii]);
 #ifdef TIMING_INFO
     clock_gettime(CLOCK_REALTIME,&starttime);
 #endif
     sna[tid]->compute_ui_omp(i_ninside[ii],MAX(int(nthreads/count),1));
 #ifdef TIMING_INFO
     clock_gettime(CLOCK_REALTIME,&endtime);
     sna[tid]->timers[0]+=(endtime.tv_sec-starttime.tv_sec+1.0*
                           (endtime.tv_nsec-starttime.tv_nsec)/1000000000);
 #endif
   }
 
 #ifdef TIMING_INFO
   clock_gettime(CLOCK_REALTIME,&starttime);
 #endif
   for (int ii=0; ii < count; ii++) {
     int tid = 0;//omp_get_thread_num();
     set_sna_to_shared(tid,ii);
     sna[tid]->compute_zi_omp(MAX(int(nthreads/count),1));
   }
 #ifdef TIMING_INFO
   clock_gettime(CLOCK_REALTIME,&endtime);
   sna[0]->timers[1]+=(endtime.tv_sec-starttime.tv_sec+1.0*
                       (endtime.tv_nsec-starttime.tv_nsec)/1000000000);
 #endif
 
 #ifdef TIMING_INFO
   clock_gettime(CLOCK_REALTIME,&endtime);
   timers[1]+=(endtime.tv_sec-starttime.tv_sec+1.0*
               (endtime.tv_nsec-starttime.tv_nsec)/1000000000);
 #endif
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairSNAP::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
   memory->create(map,n+1,"pair:map");
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairSNAP::settings(int narg, char **arg)
 {
 
   // set default values for optional arguments
 
   nthreads = -1;
   use_shared_arrays=-1;
   do_load_balance = 0;
   use_optimized = 1;
 
   // optional arguments
 
   for (int i=0; i < narg; i++) {
     if (i+2>narg) error->all(FLERR,"Illegal pair_style command."
 			     " Too few arguments.");
     if (strcmp(arg[i],"nthreads")==0) {
       nthreads=force->inumeric(FLERR,arg[++i]);
 #if defined(LMP_USER_OMP)
       error->all(FLERR,"Please set number of threads via package omp command");
 #else
       omp_set_num_threads(nthreads);
       comm->nthreads=nthreads;
 #endif
       continue;
     }
     if (strcmp(arg[i],"optimized")==0) {
       use_optimized=force->inumeric(FLERR,arg[++i]);
       continue;
     }
     if (strcmp(arg[i],"shared")==0) {
       use_shared_arrays=force->inumeric(FLERR,arg[++i]);
       continue;
     }
     if (strcmp(arg[i],"loadbalance")==0) {
       do_load_balance = force->inumeric(FLERR,arg[++i]);
       if (do_load_balance) {
 	double mincutoff = extra_cutoff() +
 	  rcutmax + neighbor->skin;
 	if (comm->cutghostuser < mincutoff) {
 	  char buffer[255];
 
 	  //apparently mincutoff is 0 after sprintf command ?????
 
 	  double tmp = mincutoff + 0.1;
 	  sprintf(buffer, "Communication cutoff is too small "
 		  "for SNAP micro load balancing. "
 		  "It will be increased to: %lf.",mincutoff+0.1);
 	  if (comm->me==0)
 	    error->warning(FLERR,buffer);
 
 	  comm->cutghostuser = tmp;
 
 	}
       }
       continue;
     }
     if (strcmp(arg[i],"schedule")==0) {
       i++;
       if (strcmp(arg[i],"static")==0)
 	schedule_user = 1;
       if (strcmp(arg[i],"dynamic")==0)
 	schedule_user = 2;
       if (strcmp(arg[i],"guided")==0)
 	schedule_user = 3;
       if (strcmp(arg[i],"auto")==0)
 	schedule_user = 4;
       if (strcmp(arg[i],"determine")==0)
 	schedule_user = 5;
       if (schedule_user == 0)
 	error->all(FLERR,"Illegal pair_style command."
 		   " Illegal schedule argument.");
       continue;
     }
     char buffer[255];
     sprintf(buffer, "Illegal pair_style command."
 	    " Unrecognized argument: %s.\n",arg[i]);
     error->all(FLERR,buffer);
   }
 
   if (nthreads < 0)
     nthreads = comm->nthreads;
 
   if (use_shared_arrays < 0) {
     if (nthreads > 1 && atom->nlocal <= 2*nthreads)
       use_shared_arrays = 1;
     else use_shared_arrays = 0;
   }
 
   // check if running non-optimized code with
   // optimization flags set
 
   if (!use_optimized)
     if (nthreads > 1 || 
 	use_shared_arrays || 
 	do_load_balance ||
 	schedule_user)
       error->all(FLERR,"Illegal pair_style command."
                  "Advanced options require setting 'optimized 1'.");
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairSNAP::coeff(int narg, char **arg)
 {
   // read SNAP element names between 2 filenames
   // nelements = # of SNAP elements
   // elements = list of unique element names
 
   if (narg < 6) error->all(FLERR,"Incorrect args for pair coefficients");
   if (!allocated) allocate();
 
   if (nelements) {
     for (int i = 0; i < nelements; i++) 
       delete[] elements[i];
     delete[] elements;
     memory->destroy(radelem);
     memory->destroy(wjelem);
     memory->destroy(coeffelem);
   }
 
   nelements = narg - 4 - atom->ntypes;
   if (nelements < 1) error->all(FLERR,"Incorrect args for pair coefficients");
 
   char* type1 = arg[0];
   char* type2 = arg[1];
   char* coefffilename = arg[2];
   char** elemlist = &arg[3];
   char* paramfilename = arg[3+nelements];
   char** elemtypes = &arg[4+nelements];
 
   // insure I,J args are * *
 
   if (strcmp(type1,"*") != 0 || strcmp(type2,"*") != 0)
     error->all(FLERR,"Incorrect args for pair coefficients");
 
   elements = new char*[nelements];
 
   for (int i = 0; i < nelements; i++) {
     char* elemname = elemlist[i];
     int n = strlen(elemname) + 1;
     elements[i] = new char[n];
     strcpy(elements[i],elemname);
   }
 
   // read snapcoeff and snapparam files
 
   read_files(coefffilename,paramfilename);
 
   // read args that map atom types to SNAP elements
   // map[i] = which element the Ith atom type is, -1 if not mapped
   // map[0] is not used
 
   for (int i = 1; i <= atom->ntypes; i++) {
     char* elemname = elemtypes[i-1];
     int jelem;
     for (jelem = 0; jelem < nelements; jelem++)
       if (strcmp(elemname,elements[jelem]) == 0)
 	break;
 
     if (jelem < nelements)
       map[i] = jelem;
     else if (strcmp(elemname,"NULL") == 0) map[i] = -1;
     else error->all(FLERR,"Incorrect args for pair coefficients");
   }
 
   // clear setflag since coeff() called once with I,J = * *
 
   int n = atom->ntypes;
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   // set setflag i,j for type pairs where both are mapped to elements
 
   int count = 0;
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       if (map[i] >= 0 && map[j] >= 0) {
         setflag[i][j] = 1;
         count++;
       }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 
   sna = new SNA*[nthreads];
 
   // allocate memory for per OpenMP thread data which
   // is wrapped into the sna class
 
 #if defined(_OPENMP)
 #pragma omp parallel default(none)
 #endif
   {
     int tid = omp_get_thread_num();
     sna[tid] = new SNA(lmp,rfac0,twojmax,
                        diagonalstyle,use_shared_arrays,
 		       rmin0,switchflag);
     if (!use_shared_arrays)
       sna[tid]->grow_rij(nmax);
   }
 
   if (ncoeff != sna[0]->ncoeff) {
     printf("ncoeff = %d snancoeff = %d \n",ncoeff,sna[0]->ncoeff);
     error->all(FLERR,"Incorrect SNAP parameter file");
   }
 
   // Calculate maximum cutoff for all elements
 
   rcutmax = 0.0;
   for (int ielem = 0; ielem < nelements; ielem++)
     rcutmax = MAX(2.0*radelem[ielem]*rcutfac,rcutmax);
 
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairSNAP::init_style()
 {
   if (force->newton_pair == 0)
     error->all(FLERR,"Pair style SNAP requires newton pair on");
 
   // need a full neighbor list
 
-  int irequest = neighbor->request(this);
+  int irequest = neighbor->request(this,instance_me);
   neighbor->requests[irequest]->half = 0;
   neighbor->requests[irequest]->full = 1;
 
 #if defined(_OPENMP)
 #pragma omp parallel default(none)
 #endif
   {
     int tid = omp_get_thread_num();
     sna[tid]->init();
   }
 
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairSNAP::init_one(int i, int j)
 {
   if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set");
   return (radelem[map[i]] + 
   	  radelem[map[j]])*rcutfac;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairSNAP::read_files(char *coefffilename, char *paramfilename)
 {
 
   // open SNAP ceofficient file on proc 0
 
   FILE *fpcoeff;
   if (comm->me == 0) {
     fpcoeff = force->open_potential(coefffilename);
     if (fpcoeff == NULL) {
       char str[128];
       sprintf(str,"Cannot open SNAP coefficient file %s",coefffilename);
       error->one(FLERR,str);
     }
   }
 
   char line[MAXLINE],*ptr;
   int eof = 0;
 
   int n;
   int nwords = 0;
   while (nwords == 0) {
     if (comm->me == 0) {
       ptr = fgets(line,MAXLINE,fpcoeff);
       if (ptr == NULL) {
         eof = 1;
         fclose(fpcoeff);
       } else n = strlen(line) + 1;
     }
     MPI_Bcast(&eof,1,MPI_INT,0,world);
     if (eof) break;
     MPI_Bcast(&n,1,MPI_INT,0,world);
     MPI_Bcast(line,n,MPI_CHAR,0,world);
 
     // strip comment, skip line if blank
 
     if ((ptr = strchr(line,'#'))) *ptr = '\0';
     nwords = atom->count_words(line);
   }
   if (nwords != 2) 
     error->all(FLERR,"Incorrect format in SNAP coefficient file");
 
   // words = ptrs to all words in line
   // strip single and double quotes from words
 
   char* words[MAXWORD];
   int iword = 0;
   words[iword] = strtok(line,"' \t\n\r\f");
   iword = 1;
   words[iword] = strtok(NULL,"' \t\n\r\f");
 
   int nelemfile = atoi(words[0]);
   ncoeff = atoi(words[1])-1;
 
   // Set up element lists 
 
   memory->create(radelem,nelements,"pair:radelem");
   memory->create(wjelem,nelements,"pair:wjelem");
   memory->create(coeffelem,nelements,ncoeff+1,"pair:coeffelem");
 
   int *found = new int[nelements];
   for (int ielem = 0; ielem < nelements; ielem++) 
     found[ielem] = 0;
 
   // Loop over elements in the SNAP coefficient file
 
   for (int ielemfile = 0; ielemfile < nelemfile; ielemfile++) {
  
     if (comm->me == 0) {
       ptr = fgets(line,MAXLINE,fpcoeff);
       if (ptr == NULL) {
 	eof = 1;
 	fclose(fpcoeff);
       } else n = strlen(line) + 1;
     }
     MPI_Bcast(&eof,1,MPI_INT,0,world);
     if (eof) 
       error->all(FLERR,"Incorrect format in SNAP coefficient file");
     MPI_Bcast(&n,1,MPI_INT,0,world);
     MPI_Bcast(line,n,MPI_CHAR,0,world);
 
     nwords = atom->count_words(line);
     if (nwords != 3) 
       error->all(FLERR,"Incorrect format in SNAP coefficient file");
 
     iword = 0;
     words[iword] = strtok(line,"' \t\n\r\f");
     iword = 1;
     words[iword] = strtok(NULL,"' \t\n\r\f");
     iword = 2;
     words[iword] = strtok(NULL,"' \t\n\r\f");
 
     char* elemtmp = words[0];
     double radtmp = atof(words[1]);
     double wjtmp = atof(words[2]);
 
     // skip if element name isn't in element list
 
     int ielem;
     for (ielem = 0; ielem < nelements; ielem++)
       if (strcmp(elemtmp,elements[ielem]) == 0) break;
     if (ielem == nelements) {
       if (comm->me == 0)
 	for (int icoeff = 0; icoeff <= ncoeff; icoeff++) 
 	  ptr = fgets(line,MAXLINE,fpcoeff);
       continue;
     }
 
     // skip if element already appeared
 
     if (found[ielem]) {
       if (comm->me == 0)
 	for (int icoeff = 0; icoeff <= ncoeff; icoeff++) 
 	  ptr = fgets(line,MAXLINE,fpcoeff);
       continue;
     }
 
     found[ielem] = 1;
     radelem[ielem] = radtmp;
     wjelem[ielem] = wjtmp;
 
 
     if (comm->me == 0) {
       if (screen) fprintf(screen,"SNAP Element = %s, Radius %g, Weight %g \n", 
 			  elements[ielem], radelem[ielem], wjelem[ielem]);
       if (logfile) fprintf(logfile,"SNAP Element = %s, Radius %g, Weight %g \n", 
 			  elements[ielem], radelem[ielem], wjelem[ielem]);
     }
 
     for (int icoeff = 0; icoeff <= ncoeff; icoeff++) { 
       if (comm->me == 0) {
 	ptr = fgets(line,MAXLINE,fpcoeff);
 	if (ptr == NULL) {
 	  eof = 1;
 	  fclose(fpcoeff);
 	} else n = strlen(line) + 1;
       }
 
       MPI_Bcast(&eof,1,MPI_INT,0,world);
       if (eof) 
 	error->all(FLERR,"Incorrect format in SNAP coefficient file");
       MPI_Bcast(&n,1,MPI_INT,0,world);
       MPI_Bcast(line,n,MPI_CHAR,0,world);
 
       nwords = atom->count_words(line);
       if (nwords != 1) 
 	error->all(FLERR,"Incorrect format in SNAP coefficient file");
 
       iword = 0;
       words[iword] = strtok(line,"' \t\n\r\f");
 
       coeffelem[ielem][icoeff] = atof(words[0]);
       
     }
   }
 
   // set flags for required keywords
 
   rcutfacflag = 0;
   twojmaxflag = 0;
 
   // Set defaults for optional keywords
 
   gamma = 1.0;
   gammaoneflag = 1;
   rfac0 = 0.99363;
   rmin0 = 0.0;
   diagonalstyle = 3;
   switchflag = 1;
   // open SNAP parameter file on proc 0
 
   FILE *fpparam;
   if (comm->me == 0) {
     fpparam = force->open_potential(paramfilename);
     if (fpparam == NULL) {
       char str[128];
       sprintf(str,"Cannot open SNAP parameter file %s",paramfilename);
       error->one(FLERR,str);
     }
   }
 
   eof = 0;
   while (1) {
     if (comm->me == 0) {
       ptr = fgets(line,MAXLINE,fpparam);
       if (ptr == NULL) {
         eof = 1;
         fclose(fpparam);
       } else n = strlen(line) + 1;
     }
     MPI_Bcast(&eof,1,MPI_INT,0,world);
     if (eof) break;
     MPI_Bcast(&n,1,MPI_INT,0,world);
     MPI_Bcast(line,n,MPI_CHAR,0,world);
 
     // strip comment, skip line if blank
 
     if ((ptr = strchr(line,'#'))) *ptr = '\0';
     nwords = atom->count_words(line);
     if (nwords == 0) continue;
 
     if (nwords != 2) 
       error->all(FLERR,"Incorrect format in SNAP parameter file");
 
     // words = ptrs to all words in line
     // strip single and double quotes from words
 
     char* keywd = strtok(line,"' \t\n\r\f");
     char* keyval = strtok(NULL,"' \t\n\r\f");
 
     if (comm->me == 0) {
       if (screen) fprintf(screen,"SNAP keyword %s %s \n",keywd,keyval);
       if (logfile) fprintf(logfile,"SNAP keyword %s %s \n",keywd,keyval);
     }
 
     if (strcmp(keywd,"rcutfac") == 0) {
       rcutfac = atof(keyval);
       rcutfacflag = 1;
     } else if (strcmp(keywd,"twojmax") == 0) {
       twojmax = atoi(keyval);
       twojmaxflag = 1;
     } else if (strcmp(keywd,"gamma") == 0)
       gamma = atof(keyval);
     else if (strcmp(keywd,"rfac0") == 0)
       rfac0 = atof(keyval);
     else if (strcmp(keywd,"rmin0") == 0)
       rmin0 = atof(keyval);
     else if (strcmp(keywd,"diagonalstyle") == 0)
       diagonalstyle = atoi(keyval);
     else if (strcmp(keywd,"switchflag") == 0)
       switchflag = atoi(keyval);
     else
       error->all(FLERR,"Incorrect SNAP parameter file");
   }
 
   if (rcutfacflag == 0 || twojmaxflag == 0)
     error->all(FLERR,"Incorrect SNAP parameter file");
 
   if (gamma == 1.0) gammaoneflag = 1;
   else gammaoneflag = 0;
 }
 
 /* ----------------------------------------------------------------------
    memory usage
 ------------------------------------------------------------------------- */
 
 double PairSNAP::memory_usage()
 {
   double bytes = Pair::memory_usage();
   int n = atom->ntypes+1;
   bytes += n*n*sizeof(int);
   bytes += n*n*sizeof(double);
   bytes += 3*nmax*sizeof(double);
   bytes += nmax*sizeof(int);
   bytes += (2*ncoeff+1)*sizeof(double);
   bytes += (ncoeff*3)*sizeof(double);
   bytes += sna[0]->memory_usage()*nthreads;
   return bytes;
 }
 
diff --git a/src/USER-ATC/fix_atc.cpp b/src/USER-ATC/fix_atc.cpp
index 12d4a7c28..ebf2b9608 100644
--- a/src/USER-ATC/fix_atc.cpp
+++ b/src/USER-ATC/fix_atc.cpp
@@ -1,946 +1,946 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    www.cs.sandia.gov/~sjplimp/lammps.html
    Steve Plimpton, sjplimp@sandia.gov, Sandia National Laboratories
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under 
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
    ------------------------------------------------------------------------- */
 
 // LAMMPS
 #include "fix_atc.h"
 #include "fix_nve.h"
 #include "atom.h"
 #include "force.h"
 #include "update.h"
 #include "respa.h"
 #include "error.h"
 #include "neighbor.h"
 #include "neigh_request.h"
 #include "pointers.h"
 #include "comm.h"
 #include "group.h"
 // ATC
 #include "ATC_Method.h"
 #include "ATC_Transfer.h"
 #include "ATC_TransferKernel.h"
 #include "ATC_TransferPartitionOfUnity.h"
 #include "ATC_CouplingEnergy.h"
 #include "ATC_CouplingMomentum.h"
 #include "ATC_CouplingMass.h"
 #include "ATC_CouplingMomentumEnergy.h"
 #include "LammpsInterface.h"
 // other
 #include "stdio.h"
 #include "string.h"
 #include <sstream>
 
 using namespace LAMMPS_NS; 
 using namespace FixConst;
 using std::string;
 
 // main page of doxygen documentation
 /*! \mainpage AtC : Atom-to-Continuum methods
     fix commands:
     - \ref man_fix_atc (links to all related commands)
 */
 
 /* ------------------------------------------------------------------------- */
 
 FixATC::FixATC(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg),
   lammps_(lmp), atc_(NULL)
 {
   // ID GROUP atc PHYSICSTYPE [PARAMETERFILE]
   if (narg < 4 || narg > 5) lmp->error->all(FLERR,"Illegal fix atc command");
 
   // Set LAMMPS pointer on LammpsInterface
   ATC::LammpsInterface::instance()->set_lammps(lmp);
 
   /*! \page man_fix_atc fix atc command 
     \section syntax
     fix <fixID> <group> atc <type> <parameter_file>
     - fixID = name of fix
     - group = name of group fix is to be applied
     - type\n
     = thermal : thermal coupling with fields: temperature  \n
     = two_temperature : electron-phonon coupling with field: temperature and electron_temperature  \n
     = hardy :  on-the-fly post-processing using kernel localization functions (see "related" section for possible fields) \n
     = field :  on-the-fly post-processing using mesh-based localization functions (see "related" section for possible fields) \n
     - parameter_file = name of the file with material parameters. \n
     note: Neither hardy nor field requires a parameter file
     \section examples
     <TT> fix AtC internal atc thermal Ar_thermal.dat </TT> \n
     <TT> fix AtC internal atc two_temperature Ar_ttm.mat </TT> \n
     <TT> fix AtC internal atc hardy  </TT> \n
     <TT> fix AtC internal atc field  </TT> \n
     \section description
     This fix is the beginning to creating a coupled FE/MD simulation and/or
     an on-the-fly estimation of continuum fields. The coupled versions of this
     fix do Verlet integration and the /post-processing does not.
     After instantiating this fix, several other fix_modify commands will be
     needed to set up the problem, e.g. define the finite element mesh and
     prescribe initial and boundary conditions. 
 
     The following coupling example is typical, but non-exhaustive:\n
  
 <TT>
      # ... commands to create and initialize the MD system \n
 
      # initial fix to designate coupling type and group to apply it to \n
      #            tag group          physics     material_file \n
      fix          AtC internal   atc thermal     Ar_thermal.mat\n \n
      # create a uniform 12 x 2 x 2 mesh that covers region contain the group \n
      #                                 nx ny nz region   periodicity \n
      fix_modify   AtC mesh create 12 2  2  mdRegion f p p\n \n
      # specify the control method for the type of coupling \n
      #                         physics         control_type \n
      fix_modify   AtC thermal control flux \n \n
      # specify the initial values for the empirical field "temperature"  \n
      #                                 field       node_group  value \n
      fix_modify   AtC initial temperature all         30.\n \n
      # create an output stream for nodal fields \n
      #                                filename      output_frequency  \n
      fix_modify   AtC output atc_fe_output 100\n \n
 
      run             1000 \n
 </TT>
 
      likewise for this post-processing example: \n
 
 <TT>
      # ... commands to create and initialize the MD system \n
 
      # initial fix to designate post-processing and the group to apply it to \n
      # no material file is allowed nor required \n
      fix         AtC internal atc hardy \n \n
      # for hardy fix, specific kernel function (function type and range) to 
      # be used as a localization function \n
      fix         AtC kernel quartic_sphere 10.0 \n \n
      # create a uniform 1 x 1 x 1 mesh that covers region contain the group \n
      # with periodicity this effectively creats a system average \n
      fix_modify  AtC mesh create 1 1 1 box p p p \n\n
      # change from default lagrangian map to eulerian \n
      #   refreshed every 100 steps \n
      fix_modify  AtC atom_element_map eulerian 100 \n \n
      # start with no field defined \n
      # add mass density, potential energy density, stress and temperature \n
      fix_modify  AtC fields add density energy stress temperature \n\n
      # create an output stream for nodal fields \n
      #                                filename      output_frequency  \n
      fix_modify  AtC output nvtFE 100 text \n
 
      run             1000 \n
 </TT>
 
     the mesh's linear interpolation functions can be used as the localization function \n
     by using the field option: \n
 
 <TT>
      fix         AtC internal atc field \n \n
      fix_modify  AtC mesh create 1 1 1 box p p p \n\n
      ... \n\n
 </TT>
 
     Note coupling and post-processing can be combined in the same simulations 
     using separate fixes.
     \n
     For detailed exposition of the theory and algorithms please see:\n
     - Wagner, GJ; Jones, RE; Templeton, JA; Parks, MA,  <VAR> An
       atomistic-to-continuum coupling method for heat transfer in solids. </VAR>
       Special Issue of Computer Methods and Applied Mechanics (2008) 197:3351. \n
     - Zimmerman, JA; Webb, EB; Hoyt, JJ;. Jones, RE; Klein, PA; Bammann, DJ,
       <VAR> Calculation of stress in atomistic simulation. </VAR>
       Special Issue of Modelling and Simulation in Materials Science and 
       Engineering (2004), 12:S319. \n
     - Zimmerman, JA; Jones, RE; Templeton, JA,
       <VAR> A material frame approach for evaluating continuum variables in
        atomistic simulations. </VAR>
       Journal of Computational Physics (2010), 229:2364. \n
     - Templeton, JA; Jones, RE; Wagner, GJ,  <VAR> Application of a field-based method
       to spatially varying thermal transport problems in molecular dynamics. </VAR>
       Modelling and Simulation in Materials Science and Engineering (2010), 18:085007. \n
     - Jones, RE; Templeton, JA; Wagner, GJ; Olmsted, D; Modine, JA, <VAR>
       Electron transport enhanced molecular dynamics for metals and semi-metals.  </VAR>
       International Journal for Numerical Methods in Engineering (2010), 83:940. \n
     - Templeton, JA; Jones, RE; Lee, JW; Zimmerman, JA; Wong, BM,
       <VAR> A long-range electric field solver for molecular dynamics based on
       atomistic-to-continuum modeling.  </VAR>
       Journal of Chemical Theory and Computation (2011), 7:1736. \n
     - Mandadapu, KK; Templeton, JA; Lee, JW, <VAR> Polarization as a field variable
       from molecular dynamics simulations. </VAR>
       Journal of Chemical Physics (2013), 139:054115. \n
 
     Please refer to the standard
     finite element (FE) texts, e.g. T.J.R Hughes <VAR> The finite element 
     method </VAR>, Dover 2003, for the basics of FE simulation.
 
     \section restrictions
     Thermal and two_temperature (coupling) types use a Verlet time-integration 
     algorithm.
     The hardy type does not contain its own time-integrator and must be used 
     with a separate fix that does contain one, e.g. nve, nvt, etc.
 
     Currently, 
     - the coupling is restricted to thermal physics 
     - the FE computations are done in serial on each processor.
 
     \section related
     fix_modify commands for setup: \n
     - \ref man_mesh_create
     - \ref man_mesh_quadrature
     - \ref man_mesh_read
     - \ref man_mesh_write
     - \ref man_mesh_create_nodeset
     - \ref man_mesh_add_to_nodeset
     - \ref man_mesh_create_faceset_box
     - \ref man_mesh_create_faceset_plane
     - \ref man_mesh_create_elementset
     - \ref man_mesh_delete_elements
     - \ref man_mesh_nodeset_to_elementset
     - \ref man_boundary
     - \ref man_internal_quadrature
     - \ref man_thermal_time_integration
     - \ref man_momentum_time_integration
     - \ref man_electron_integration
     - \ref man_internal_element_set
     - \ref man_decomposition
 
     fix_modify commands for boundary and initial conditions:\n
     - \ref man_initial
     - \ref man_fix_nodes
     - \ref man_unfix_nodes
     - \ref man_fix_flux
     - \ref man_unfix_flux
     - \ref man_source
     - \ref man_remove_source
 
     fix_modify commands for control and filtering: \n
     - \ref man_control
     - \ref man_control_thermal
     - \ref man_control_thermal_correction_max_iterations
     - \ref man_control_momentum
     - \ref man_localized_lambda
     - \ref man_lumped_lambda_solve
     - \ref man_mask_direction
     - \ref man_time_filter
     - \ref man_filter_scale
     - \ref man_filter_type
     - \ref man_equilibrium_start
     - \ref man_extrinsic_exchange
     - \ref man_poisson_solver
 
     fix_modify commands for output: \n
     - \ref man_output
     - \ref man_output_nodeset
     - \ref man_output_elementset
     - \ref man_boundary_integral
     - \ref man_contour_integral
     - \ref man_mesh_output
     - \ref man_write_restart
     - \ref man_read_restart
 
     fix_modify commands for post-processing: \n
     - \ref man_hardy_kernel
     - \ref man_hardy_fields
     - \ref man_hardy_gradients
     - \ref man_hardy_rates
     - \ref man_hardy_computes
     - \ref man_hardy_on_the_fly
     - \ref man_pair_interactions
     - \ref man_sample_frequency
     - \ref man_set
 
     miscellaneous fix_modify commands: \n
     - \ref man_atom_element_map
     - \ref man_atom_weight
     - \ref man_write_atom_weights
     - \ref man_reset_time
     - \ref man_reset_atomic_reference_positions
     - \ref man_fe_md_boundary
     - \ref man_boundary_faceset
     - \ref man_consistent_fe_initialization
     - \ref man_mass_matrix
     - \ref man_material
     - \ref man_atomic_charge
     - \ref man_source_integration
     - \ref man_temperature_definition
     - \ref man_track_displacement
     - \ref man_boundary_dynamics
     - \ref man_add_species
     - \ref man_add_molecule
     - \ref man_remove_species
     - \ref man_remove_molecule
 
     Note: a set of example input files with the attendant material files are 
     included with this package
     \section default
     none
   */
 
 
   // Construct new ATC_Method object
   // note use "unfix" to destroy
 
   int me = ATC::LammpsInterface::instance()->comm_rank();
 
   string groupName(arg[1]);
   int igroup = group->find(groupName.c_str());
   int atomCount = group->count(igroup);
 
   try {
     // Postprocessing
     if (strcmp(arg[3],"field")==0)  
     {
       if (atomCount == 0) {
         if (me==0) printf("ATC: can't construct transfer, no atoms in group \n");
         throw;
       }
       if (narg < 5) {
         if (me==0) printf("ATC: constructing shape function field estimate\n");
         atc_ = new ATC::ATC_TransferPartitionOfUnity(groupName,
                                                      array_atom,
                                                      this);
       } 
       else { 
         if (me==0) printf("ATC: constructing shape function field estimate with parameter file %s\n",arg[4]);
         string matParamFile = arg[4];
         atc_ = new ATC::ATC_TransferPartitionOfUnity(groupName,
                                                      array_atom, this,
                                                      matParamFile);
       }
     }
     else if (strcmp(arg[3],"hardy")==0) 
     {
       if (atomCount == 0) {
         if (me==0) printf("ATC: Can't construct transfer, no atoms in group \n");
         throw;
       }
       if (narg < 5) {
         if (me==0) printf("ATC: constructing kernel field estimate\n");
         atc_ = new ATC::ATC_TransferKernel(groupName,
                                            array_atom,
                                            this);
       } 
       else { 
         if (me==0) printf("ATC: constructing kernel field estimate with parameter file %s\n",arg[4]);
         string matParamFile = arg[4];
         atc_ = new ATC::ATC_TransferKernel(groupName,
                                            array_atom, this,
                                            matParamFile);
       }
     }
     // PhysicsTypes
     else if (strcmp(arg[3],"thermal")==0) 
     {
       string matParamFile = arg[4];
       if (me==0) printf("ATC: constructing thermal coupling with parameter file %s\n",arg[4]);
       atc_ = new ATC::ATC_CouplingEnergy(groupName,
                                          array_atom, this,
                                          matParamFile);
     }
     else if (strcmp(arg[3],"two_temperature")==0) 
     {
       string matParamFile = arg[4];
       if (me==0) printf("ATC: constructing two_temperature coupling with parameter file %s\n",arg[4]);
       atc_ = new ATC::ATC_CouplingEnergy(groupName,
                                          array_atom, this,
                                          matParamFile, ATC::TWO_TEMPERATURE);
     }
     else if (strcmp(arg[3],"drift_diffusion")==0) 
     {
       string matParamFile = arg[4];
       if (me==0) printf("ATC: constructing drift_diffusion coupling with parameter file %s\n",arg[4]);
       atc_ = new ATC::ATC_CouplingEnergy(groupName,
                                          array_atom, this,
                                          matParamFile, ATC::DRIFT_DIFFUSION);
     }
     else if (strcmp(arg[3],"drift_diffusion-equilibrium")==0) 
     {
       string matParamFile = arg[4];
       if (me==0) printf("ATC: constructing drift_diffusion-equilibrium coupling with parameter file %s\n",arg[4]);
       atc_ = new ATC::ATC_CouplingEnergy(groupName,
                                          array_atom, this,
                                          matParamFile, ATC::DRIFT_DIFFUSION_EQUILIBRIUM);
     }
     else if (strcmp(arg[3],"drift_diffusion-schrodinger")==0) 
     {
       string matParamFile = arg[4];
       if (me==0) printf("ATC: constructing drift_diffusion-schrodinger coupling with parameter file %s\n",arg[4]);
       atc_ = new ATC::ATC_CouplingEnergy(groupName,
                                          array_atom, this,
                                          matParamFile, ATC::DRIFT_DIFFUSION_SCHRODINGER);
     }
     else if (strcmp(arg[3],"drift_diffusion-schrodinger-slice")==0) 
     {
       string matParamFile = arg[4];
       if (me==0) printf("Constructing ATC transfer (drift_diffusion-schrodinger-slice) with parameter file %s\n",arg[4]);
       atc_ = new ATC::ATC_CouplingEnergy(groupName,
                                          array_atom, this,
                                          matParamFile, ATC::DRIFT_DIFFUSION_SCHRODINGER_SLICE);
     }
     else if (strcmp(arg[3],"convective_drift_diffusion")==0) 
     {
       string matParamFile = arg[4];
       if (me==0) printf("ATC: constructing convective_drift_diffusion coupling with parameter file %s\n",arg[4]);
       atc_ = new ATC::ATC_CouplingEnergy(groupName,
                                          array_atom, this,
                                          matParamFile, ATC::CONVECTIVE_DRIFT_DIFFUSION);
     }
     else if (strcmp(arg[3],"convective_drift_diffusion-equilibrium")==0) 
     {
       string matParamFile = arg[4];
       if (me==0) printf("ATC: constructing convective_drift_diffusion-equilibrium coupling with parameter file %s\n",arg[4]);
       atc_ = new ATC::ATC_CouplingEnergy(groupName,
                                          array_atom, this,
                                          matParamFile, ATC::CONVECTIVE_DRIFT_DIFFUSION_EQUILIBRIUM);
     }
     else if (strcmp(arg[3],"convective_drift_diffusion-schrodinger")==0) 
     {
       string matParamFile = arg[4];
       if (me==0) printf("ATC: constructing convective_drift_diffusion-schrodinger coupling with parameter file %s\n",arg[4]);
       atc_ = new ATC::ATC_CouplingEnergy(groupName,
                                          array_atom, this,
                                          matParamFile, ATC::CONVECTIVE_DRIFT_DIFFUSION_SCHRODINGER);
     }
     else if (strcmp(arg[3],"elastic")==0) 
     {
       string matParamFile = arg[4];
       if (me==0) printf("ATC: constructing elastic coupling with parameter file %s\n",arg[4]);
       atc_ = new ATC::ATC_CouplingMomentum(groupName,
                                            array_atom, this,
                                            matParamFile,
                                            ATC::ELASTIC);
     }
     else if (strcmp(arg[3],"electrostatic")==0) 
     {
       string matParamFile = arg[4];
       if (me==0) printf("ATC: constructing electrostatic mechanical coupling with parameter file %s\n",arg[4]);
       atc_ = new ATC::ATC_CouplingMomentum(groupName,
                                            array_atom, this,
                                            matParamFile,
                                            ATC::ELASTIC,
                                            ATC::ELECTROSTATIC);
     }
     else if (strcmp(arg[3],"electrostatic-equilibrium")==0) 
     {
       string matParamFile = arg[4];
       if (me==0) printf("ATC: constructing equilibrium electrostatic coupling with parameter file %s\n",arg[4]);
       atc_ = new ATC::ATC_CouplingMomentum(groupName,
                                            array_atom, this,
                                            matParamFile,
                                            ATC::ELASTIC,
                                            ATC::ELECTROSTATIC_EQUILIBRIUM);
     }
     else if (strcmp(arg[3],"shear")==0)  
     {
       string matParamFile = arg[4];
       if (me==0) printf("ATC: constructing viscous/shear coupling with parameter file %s\n",arg[4]);
       atc_ = new ATC::ATC_CouplingMomentum(groupName,
                                            array_atom, this,
                                            matParamFile,
                                            ATC::SHEAR);
     }
     else if (strcmp(arg[3],"species")==0)
     {
       string matParamFile = arg[4];
       if (me==0) printf("ATC: constructing species diffusion coupling with parameter file %s\n",arg[4]);
       atc_ = new ATC::ATC_CouplingMass(groupName,
                                        array_atom, this,
                                        matParamFile);
     }
     else if (strcmp(arg[3],"species_electrostatic")==0)
     {
       string matParamFile = arg[4];
       if (me==0) printf("ATC: constructing electrostatic species coupling with parameter file %s\n",arg[4]);
       atc_ = new ATC::ATC_CouplingMass(groupName,
                                        array_atom, this,
                                        matParamFile, ATC::FEM_EFIELD);
     }
     else if (strcmp(arg[3],"thermo_elastic")==0) 
     {
       string matParamFile = arg[4];
       if (me==0) printf("ATC: constructing thermo-mechanical coupling with parameter file %s\n",arg[4]);
       atc_ = new ATC::ATC_CouplingMomentumEnergy(groupName,
                                                  array_atom, this,
                                                  matParamFile);
     }
     else 
     {
       lmp->error->all(FLERR,"Unknown physics type in ATC");
     }
   }
   catch (ATC::ATC_Error& atcError) {
     ATC::LammpsInterface::instance()->print_msg(atcError.error_description());
     throw;
   }
 
   lmp->atom->add_callback(0);
 
   // we write our own restart file
   restart_global = 0; 
   
   
 
   // Set output computation data based on transfer info
   scalar_flag = atc_->scalar_flag();
   vector_flag = atc_->vector_flag();
   size_vector = atc_->size_vector();
   global_freq = atc_->global_freq();
   extscalar = atc_->extscalar();
   extvector = atc_->extvector();
   extlist = atc_->extlist();
   thermo_energy = atc_->thermo_energy_flag();
 
   // set pointer for output
   peratom_flag = atc_->peratom_flag();
   size_peratom_cols = atc_->size_peratom_cols();
   peratom_freq = atc_->peratom_freq();
   
 
   // set comm size needed by this fix
   comm_forward = atc_->comm_forward(); 
 
   // call this fix every step
   nevery = 1; 
 }
 
 /*----------------------------------------------------------------------- */
 FixATC::~FixATC()
 {
   if (lmp->atom) lmp->atom->delete_callback(id,0);
   if (atc_) delete atc_;
 }
 
 /* ---------------------------------------------------------------------- */
 
 int FixATC::setmask()
 {
   int mask = 0;
   mask |= INITIAL_INTEGRATE;
   mask |= POST_INTEGRATE;
   mask |= FINAL_INTEGRATE;
   mask |= PRE_EXCHANGE;
   mask |= PRE_NEIGHBOR;
   mask |= PRE_FORCE;
   mask |= POST_FORCE;
   mask |= MIN_PRE_EXCHANGE;
   mask |= MIN_PRE_NEIGHBOR;
   mask |= MIN_PRE_FORCE;
   mask |= MIN_POST_FORCE;
   mask |= THERMO_ENERGY;
   mask |= POST_RUN;
   mask |= END_OF_STEP;
   return mask;
 }
 
 /* ---------------------------------------------------------------------- */
 
 int FixATC::modify_param(int narg, char** arg)
 {
   bool match; 
 
   // pass on to transfer layer
   try {
     match = atc_->modify(narg,arg);
   }
   catch (ATC::ATC_Error& atcError) {
     ATC::LammpsInterface::instance()->print_msg(atcError.error_description());
     throw;
   }
 
   if (!match) return 0;
   return narg;
 }
 
 /* ----------------------------------------------------------------------
    create initial list of neighbor partners via call to neighbor->build()
    must be done in setup (not init) since fix init comes before neigh init
    ------------------------------------------------------------------------- */
 
 void FixATC::init()
 {
   // Guarantee construction of full neighborlist
-  int irequest = neighbor->request(this);
+  int irequest = neighbor->request(this,instance_me);
   neighbor->requests[irequest]->pair = 0;
   neighbor->requests[irequest]->fix = 1;
   neighbor->requests[irequest]->half = 0;
   neighbor->requests[irequest]->full = 1;
 
   // create computes, if necessary
   atc_->init_computes();
 }
 
 void FixATC::min_setup(int vflag)
 {
   setup(vflag);
 }
 
 void FixATC::setup(int vflag)
 {
   comm->forward_comm_fix(this);
 
   try {
     atc_->initialize();
   }
   catch (ATC::ATC_Error& atcError) {
     ATC::LammpsInterface::instance()->print_msg(atcError.error_description());
     throw;
   }
 }
 
 /* ----------------------------------------------------------------------
    pass throughs to atc functions to handle swapping atom data on
    when they move processors
    ------------------------------------------------------------------------- */
 void FixATC::pre_exchange()
 {
   try {
     atc_->pre_exchange();
   }
   catch (ATC::ATC_Error& atcError) {
     ATC::LammpsInterface::instance()->print_msg(atcError.error_description());
     throw;
   }
 }
 void FixATC::setup_pre_exchange()
 {
   if (atc_->is_initialized()) {
     try {
       atc_->setup_pre_exchange();
     }
     catch (ATC::ATC_Error& atcError) {
       ATC::LammpsInterface::instance()->print_msg(atcError.error_description());
       throw;
     }
   }
 }
 void FixATC::min_pre_exchange()
 {
   try {
     atc_->pre_exchange();
   }
   catch (ATC::ATC_Error& atcError) {
     ATC::LammpsInterface::instance()->print_msg(atcError.error_description());
     throw;
   }
 }
 void FixATC::min_setup_pre_exchange()
 {
   try {
     atc_->setup_pre_exchange();
   }
   catch (ATC::ATC_Error& atcError) {
     ATC::LammpsInterface::instance()->print_msg(atcError.error_description());
     throw;
   }
 }
 
 double FixATC::memory_usage()
 {
   double bytes = (double) atc_->memory_usage() * sizeof(double);
   return bytes;
 }
 
 void FixATC::grow_arrays(int nmax)
 {
   atc_->grow_arrays(nmax);
 }
 
 void FixATC::copy_arrays(int i, int j, int delflag)
 {
   atc_->copy_arrays(i,j);
 }
 
 int FixATC::pack_exchange(int i, double * buf)
 {
   int num = atc_->pack_exchange(i,buf);
   return num;
 }
 
 int FixATC::unpack_exchange(int nlocal, double * buf)
 {
   int num = atc_->unpack_exchange(nlocal,buf);
   return num;
 }
 
 int FixATC::pack_forward_comm(int n, int *list, double *buf, int pbc_flag, int *pbc)
 {
   int num = atc_->pack_comm(n, list, buf, pbc_flag, pbc);
   return num;
 }
 
 void FixATC::unpack_forward_comm(int n, int first, double *buf)
 {
   atc_->unpack_comm(n, first, buf);
 }
 
 
 /* ----------------------------------------------------------------------
    pack values in local atom-based arrays for restart file
    ------------------------------------------------------------------------- */
 
 int FixATC::pack_restart(int i, double *buf){
   return 0;
 }
 
 /* ----------------------------------------------------------------------
    unpack values from atom->extra array to restart the fix
    ------------------------------------------------------------------------- */
 
 void FixATC::unpack_restart(int nlocal, int nth){
 }
 
 /* ----------------------------------------------------------------------
    maxsize of any atom's restart data
    ------------------------------------------------------------------------- */
  
 int FixATC::maxsize_restart(){
   return 0;
 }
  
 /* ----------------------------------------------------------------------
    size of atom nlocal's restart data
    ------------------------------------------------------------------------- */
  
 int FixATC::size_restart(int nlocal){
   return 0;
 }
 
 /* ----------------------------------------------------------------------
    pack entire state of Fix into one write
    ------------------------------------------------------------------------- */
 
 void FixATC::write_restart(FILE *fp){
   
   char ** args = new char*[2];
   args[0] = new char[50];
   args[1] = new char[50];
   sprintf(args[0],"write_restart");
   sprintf(args[1],"ATC.restart");
 
   // Then call all objects I own to write their data
   if (comm->me == 0) {
     atc_->modify(2,args);
   }
 
   delete [] args[0];
   delete [] args[1];
   delete [] args;
 }
 
 /* ----------------------------------------------------------------------
    use state info from restart file to restart the Fix
    ------------------------------------------------------------------------- */
 
 void FixATC::restart(char *buf){
   
   char ** args = new char*[2];
   args[0] = new char[50];
   args[1] = new char[50];
   sprintf(args[0],"read_restart");
   sprintf(args[1],"ATC.restart");
 
   // Then call all objects I own to write their data
   if (comm->me == 0) {
     atc_->modify(2,args);
   }
 
   delete [] args[0];
   delete [] args[1];
   delete [] args;
 }
 
 /* ----------------------------------------------------------------------
    allow for both per-type and per-atom mass
    ------------------------------------------------------------------------- */
 
 void FixATC::initial_integrate(int vflag)
 {
   try {
     atc_->pre_init_integrate();
   }
   catch (ATC::ATC_Error& atcError) {
     ATC::LammpsInterface::instance()->print_msg(atcError.error_description());
     throw;
   }
   // integration of atoms, if desired
   try {
     atc_->init_integrate();
   }
   catch (ATC::ATC_Error& atcError) {
     ATC::LammpsInterface::instance()->print_msg(atcError.error_description());
     throw;
   }
 }
 
 void FixATC::post_integrate()
 {
   try {
     atc_->post_init_integrate();
   }
   catch (ATC::ATC_Error& atcError) {
     ATC::LammpsInterface::instance()->print_msg(atcError.error_description());
     throw;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixATC::final_integrate()
 {
   try {
     atc_->pre_final_integrate();
   }
   catch (ATC::ATC_Error& atcError) {
     ATC::LammpsInterface::instance()->print_msg(atcError.error_description());
     throw;
   }
 
   try {
     atc_->final_integrate();
   }
   catch (ATC::ATC_Error& atcError) {
     ATC::LammpsInterface::instance()->print_msg(atcError.error_description());
     throw;
   }
 }
 
 void FixATC::end_of_step()
 {
   try {
     atc_->post_final_integrate();
   }
   catch (ATC::ATC_Error& atcError) {
     ATC::LammpsInterface::instance()->print_msg(atcError.error_description());
     throw;
   }
   try {
     atc_->end_of_step();
   }
   catch (ATC::ATC_Error& atcError) {
     ATC::LammpsInterface::instance()->print_msg(atcError.error_description());
     throw;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 void FixATC::init_list(int id, NeighList *ptr) {
   ATC::LammpsInterface::instance()->set_list(id,ptr);
 }
 /* ---------------------------------------------------------------------- */
 void FixATC::pre_neighbor()
 {
   try {
     atc_->pre_neighbor();
     comm->forward_comm_fix(this);  
   }
   catch (ATC::ATC_Error& atcError) {
     ATC::LammpsInterface::instance()->print_msg(atcError.error_description());
     throw;
   }
 }
 /* ---------------------------------------------------------------------- */
 void FixATC::pre_force(int vflag)
 {
   
   try {
     atc_->pre_force(); 
   }
   catch (ATC::ATC_Error& atcError) {
     ATC::LammpsInterface::instance()->print_msg(atcError.error_description());
     throw;
   }
 }
 /* ---------------------------------------------------------------------- */
 void FixATC::post_force(int vflag)
 {
   
   try {
     atc_->post_force(); 
   }
   catch (ATC::ATC_Error& atcError) {
     ATC::LammpsInterface::instance()->print_msg(atcError.error_description());
     throw;
   }
 }
 /* ---------------------------------------------------------------------- */
 void FixATC::post_run()
 {
   try {
     atc_->finish();
   }
   catch (ATC::ATC_Error& atcError) {
     ATC::LammpsInterface::instance()->print_msg(atcError.error_description());
     throw;
   }
 }
 /* ---------------------------------------------------------------------- */
 void FixATC::setup_pre_neighbor()
 {
   if (atc_->is_initialized()) {
     try {
       atc_->pre_neighbor();
     }
     catch (ATC::ATC_Error& atcError) {
       ATC::LammpsInterface::instance()->print_msg(atcError.error_description());
       throw;
     }
   }
 }
 /* ---------------------------------------------------------------------- */
 void FixATC::min_setup_pre_neighbor()
 {
   if (atc_->is_initialized()) {
     try {
       atc_->pre_neighbor();
     }
     catch (ATC::ATC_Error& atcError) {
       ATC::LammpsInterface::instance()->print_msg(atcError.error_description());
       throw;
     }
   }
 }
 /* ---------------------------------------------------------------------- */
 void FixATC::min_pre_force(int vflag)
 {
   try {
     atc_->min_pre_force(); 
   }
   catch (ATC::ATC_Error& atcError) {
     ATC::LammpsInterface::instance()->print_msg(atcError.error_description());
     throw;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 void FixATC::min_post_force(int vflag)
 {
   try {
     atc_->min_post_force(); 
   }
   catch (ATC::ATC_Error& atcError) {
     ATC::LammpsInterface::instance()->print_msg(atcError.error_description());
     throw;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 double FixATC::compute_scalar() 
 {
   return atc_->compute_scalar();
 }
 /* ---------------------------------------------------------------------- */
 double FixATC::compute_vector(int n) 
 {
   return atc_->compute_vector(n);
 }
 /* ---------------------------------------------------------------------- */
 double FixATC::compute_array(int irow, int icol) 
 {
   return atc_->compute_array(irow,icol);
 }
 
diff --git a/src/USER-AWPMD/pair_awpmd_cut.cpp b/src/USER-AWPMD/pair_awpmd_cut.cpp
index e9e763b84..9813c15d5 100644
--- a/src/USER-AWPMD/pair_awpmd_cut.cpp
+++ b/src/USER-AWPMD/pair_awpmd_cut.cpp
@@ -1,753 +1,753 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Ilya Valuev (JIHT, Moscow, Russia)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_awpmd_cut.h"
 #include "atom.h"
 #include "update.h"
 #include "min.h"
 #include "domain.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "memory.h"
 #include "error.h"
 
 #include "TCP/wpmd_split.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
 PairAWPMDCut::PairAWPMDCut(LAMMPS *lmp) : Pair(lmp)
 {
   single_enable = 0;
 
   nmax = 0;
   min_var = NULL;
   min_varforce = NULL;
   nextra = 4;
   pvector = new double[nextra];
 
   ermscale=1.;
   width_pbc=0.;
   wpmd= new AWPMD_split();
 
   half_box_length=0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 PairAWPMDCut::~PairAWPMDCut()
 {
   delete [] pvector;
   memory->destroy(min_var);
   memory->destroy(min_varforce);
 
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
     memory->destroy(cut);
   }
 
   delete wpmd;
 }
 
 
 struct cmp_x{
   double tol;
   double **xx;
   cmp_x(double **xx_=NULL, double tol_=1e-12):xx(xx_),tol(tol_){}
   bool operator()(const pair<int,int> &left, const pair<int,int> &right) const {
     if(left.first==right.first){
       double d=xx[left.second][0]-xx[right.second][0];
       if(d<-tol)
         return true;
       else if(d>tol)
         return false;
       d=xx[left.second][1]-xx[right.second][1];
       if(d<-tol)
         return true;
       else if(d>tol)
         return false;
       d=xx[left.second][2]-xx[right.second][2];
       if(d<-tol)
         return true;
       else
         return false;
     }
     else
       return left.first<right.first;
   }
 };
 
 /* ---------------------------------------------------------------------- */
 
 void PairAWPMDCut::compute(int eflag, int vflag)
 {
 
   // pvector = [KE, Pauli, ecoul, radial_restraint]
   for (int i=0; i<4; i++) pvector[i] = 0.0;
 
   if (eflag || vflag)
     ev_setup(eflag,vflag);
   else
     evflag = vflag_fdotr = 0; //??
 
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   double *erforce = atom->erforce;
   double *eradius = atom->eradius;
   int *spin = atom->spin;
   int *type = atom->type;
   int *etag = atom->etag;
   double **v = atom->v;
 
   int nlocal = atom->nlocal;
   int nghost = atom->nghost;
   int ntot=nlocal+nghost;
 
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
   int inum = list->inum;
   int *ilist = list->ilist;
   int *numneigh = list->numneigh;
   int **firstneigh = list->firstneigh;
 
 
 
 
   // width pbc
   if(width_pbc<0)
     wpmd->Lextra=2*half_box_length;
   else
     wpmd->Lextra=width_pbc;
 
   wpmd->newton_pair=newton_pair;
 
 
 
 # if 1
   // mapping of the LAMMPS numbers to the AWPMC numbers
   vector<int> gmap(ntot,-1);
 
   for (int ii = 0; ii < inum; ii++) {
     int i = ilist[ii];
     // local particles are all there
     gmap[i]=0;
     Vector_3 ri=Vector_3(x[i][0],x[i][1],x[i][2]);
     int itype = type[i];
     int *jlist = firstneigh[i];
     int jnum = numneigh[i];
     for (int jj = 0; jj < jnum; jj++) {
       int j = jlist[jj];
       j &= NEIGHMASK;
       if(j>=nlocal){ // this is a ghost
         Vector_3 rj=Vector_3(x[j][0],x[j][1],x[j][2]);
         int jtype = type[j];
         double rsq=(ri-rj).norm2();
         if (rsq < cutsq[itype][jtype])
           gmap[j]=0; //bingo, this ghost is really needed
 
       }
     }
   }
 
 # else  // old mapping
   // mapping of the LAMMPS numbers to the AWPMC numbers
   vector<int> gmap(ntot,-1);
   // map for filtering the clones out: [tag,image] -> id
   typedef  map< pair<int,int>, int, cmp_x >  map_t;
   cmp_x cmp(x);
   map_t idmap(cmp);
   for (int ii = 0; ii < inum; ii++) {
     int i = ilist[ii];
     // local particles are all there
     idmap[make_pair(atom->tag[i],i)]=i;
     bool i_local= i<nlocal ? true : false;
     if(i_local)
       gmap[i]=0;
     else if(gmap[i]==0) // this is a ghost which already has been tested
       continue;
     Vector_3 ri=Vector_3(x[i][0],x[i][1],x[i][2]);
     int itype = type[i];
     int *jlist = firstneigh[i];
     int jnum = numneigh[i];
     for (int jj = 0; jj < jnum; jj++) {
       int j = jlist[jj];
       j &= NEIGHMASK;
 
       pair<map_t::iterator,bool> res=idmap.insert(make_pair(make_pair(atom->tag[j],j),j));
       bool have_it=!res.second;
       if(have_it){ // the clone of this particle is already listed
         if(res.first->second!=j) // check that was not the very same particle
           gmap[j]=-1; // filter out
         continue;
       }
 
       bool j_local= j<nlocal ? true : false;
       if((i_local && !j_local) || (j_local && !i_local)){ // some of them is a ghost
         Vector_3 rj=Vector_3(x[j][0],x[j][1],x[j][2]);
         int jtype = type[j];
         double rsq=(ri-rj).norm2();
         if (rsq < cutsq[itype][jtype]){
           if(!i_local){
             gmap[i]=0; //bingo, this ghost is really needed
             break; // don't need to continue j loop
           }
           else
             gmap[j]=0; //bingo, this ghost is really needed
         }
       }
     }
   }
 # endif
   // prepare the solver object
   wpmd->reset();
 
   map<int,vector<int> > etmap;
   // add particles to the AWPMD solver object
   for (int i = 0; i < ntot; i++) {
     //int i = ilist[ii];
     if(gmap[i]<0) // this particle was filtered out
       continue;
     if(spin[i]==0)  // this is an ion
       gmap[i]=wpmd->add_ion(q[i], Vector_3(x[i][0],x[i][1],x[i][2]),i<nlocal ? atom->tag[i] : -atom->tag[i]);
     else if(spin[i]==1 || spin[i]==-1){ // electron, sort them according to the tag
       etmap[etag[i]].push_back(i);
     }
     else
       error->all(FLERR,fmt("Invalid spin value (%d) for particle %d !",spin[i],i));
   }
   // ion force vector
   Vector_3 *fi=NULL;
   if(wpmd->ni)
     fi= new Vector_3[wpmd->ni];
 
   // adding electrons
   for(map<int,vector<int> >::iterator it=etmap.begin(); it!= etmap.end(); ++it){
     vector<int> &el=it->second;
     if(!el.size()) // should not happen
       continue;
     int s=spin[el[0]] >0 ? 0 : 1;
     wpmd->add_electron(s); // starts adding the spits
     for(size_t k=0;k<el.size();k++){
       int i=el[k];
       if(spin[el[0]]!=spin[i])
         error->all(FLERR,fmt("WP splits for one electron should have the same spin (at particles %d, %d)!",el[0],i));
       double m= atom->mass ? atom->mass[type[i]] : force->e_mass;
       Vector_3 xx=Vector_3(x[i][0],x[i][1],x[i][2]);
       Vector_3 rv=m*Vector_3(v[i][0],v[i][1],v[i][2]);
       double pv=ermscale*m*atom->ervel[i];
       Vector_2 cc=Vector_2(atom->cs[2*i],atom->cs[2*i+1]);
       gmap[i]=wpmd->add_split(xx,rv,atom->eradius[i],pv,cc,1.,atom->q[i],i<nlocal ? atom->tag[i] : -atom->tag[i]);
       // resetting for the case constraints were applied
       v[i][0]=rv[0]/m;
       v[i][1]=rv[1]/m;
       v[i][2]=rv[2]/m;
       atom->ervel[i]=pv/(m*ermscale);
     }
   }
   wpmd->set_pbc(NULL); // not required for LAMMPS
   wpmd->interaction(0x1|0x4|0x10,fi);
 
    // get forces from the AWPMD solver object
   for (int ii = 0; ii < inum; ii++) {
     int i = ilist[ii];
     if(gmap[i]<0) // this particle was filtered out
       continue;
     if(spin[i]==0){  // this is an ion, copying forces
       int ion=gmap[i];
       f[i][0]=fi[ion][0];
       f[i][0]=fi[ion][1];
       f[i][0]=fi[ion][2];
     }
     else { // electron
       int iel=gmap[i];
       int s=spin[i] >0 ? 0 : 1;
       wpmd->get_wp_force(s,iel,(Vector_3 *)f[i],(Vector_3 *)(atom->vforce+3*i),atom->erforce+i,atom->ervelforce+i,(Vector_2 *)(atom->csforce+2*i));
     }
   }
 
   if(fi)
     delete [] fi;
 
   // update LAMMPS energy
   if (eflag_either) {
     if (eflag_global){
       eng_coul+= wpmd->get_energy();
       // pvector = [KE, Pauli, ecoul, radial_restraint]
       pvector[0] = wpmd->Ee[0]+wpmd->Ee[1];
       pvector[2] = wpmd->Eii+wpmd->Eei[0]+wpmd->Eei[1]+wpmd->Eee;
       pvector[1] = pvector[0] + pvector[2] - wpmd->Edk - wpmd->Edc - wpmd->Eii;  // All except diagonal terms
       pvector[3] = wpmd->Ew;
     }
 
     if (eflag_atom) {
       // transfer per-atom energies here
       for (int i = 0; i < ntot; i++) {
         if(gmap[i]<0) // this particle was filtered out
           continue;
         if(spin[i]==0){
           eatom[i]=wpmd->Eiep[gmap[i]]+wpmd->Eiip[gmap[i]];
         }
         else {
           int s=spin[i] >0 ? 0 : 1;
           eatom[i]=wpmd->Eep[s][gmap[i]]+wpmd->Eeip[s][gmap[i]]+wpmd->Eeep[s][gmap[i]]+wpmd->Ewp[s][gmap[i]];
         }
       }
     }
   }
   if (vflag_fdotr) {
     virial_fdotr_compute();
     if (flexible_pressure_flag)
        virial_eradius_compute();
   }
 }
 
 /* ----------------------------------------------------------------------
    electron width-specific contribution to global virial
 ------------------------------------------------------------------------- */
 
 void PairAWPMDCut::virial_eradius_compute()
 {
   double *eradius = atom->eradius;
   double *erforce = atom->erforce;
   double e_virial;
   int *spin = atom->spin;
 
   // sum over force on all particles including ghosts
 
   if (neighbor->includegroup == 0) {
     int nall = atom->nlocal + atom->nghost;
     for (int i = 0; i < nall; i++) {
       if (spin[i]) {
         e_virial = erforce[i]*eradius[i]/3;
         virial[0] += e_virial;
         virial[1] += e_virial;
         virial[2] += e_virial;
       }
     }
 
   // neighbor includegroup flag is set
   // sum over force on initial nfirst particles and ghosts
 
   } else {
     int nall = atom->nfirst;
     for (int i = 0; i < nall; i++) {
       if (spin[i]) {
         e_virial = erforce[i]*eradius[i]/3;
         virial[0] += e_virial;
         virial[1] += e_virial;
         virial[2] += e_virial;
       }
     }
 
     nall = atom->nlocal + atom->nghost;
     for (int i = atom->nlocal; i < nall; i++) {
       if (spin[i]) {
         e_virial = erforce[i]*eradius[i]/3;
         virial[0] += e_virial;
         virial[1] += e_virial;
         virial[2] += e_virial;
       }
     }
   }
 }
 
 
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairAWPMDCut::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
   memory->create(cut,n+1,n+1,"pair:cut");
 }
 
 /* ---------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 // the format is: pair_style awpmd/cut [<global_cutoff|-1> [command1] [command2] ...]
 // commands:
 // [hartree|dproduct|uhf]  -- quantum approximation level (default is hartree)
 // [free|pbc <length|-1>|fix <w0|-1>|relax|harm <w0>] -- width restriction (default is free)
 // [ermscale <number>]  -- scaling factor between electron mass and effective width mass (used for equations of motion only) (default is 1)
 // [flex_press]  -- set flexible pressure flag
 // -1 for length means default setting (L/2 for cutoff and L for width PBC)
 
 void PairAWPMDCut::settings(int narg, char **arg){
   if (narg < 1) error->all(FLERR,"Illegal pair_style command");
 
   cut_global = force->numeric(FLERR,arg[0]);
 
   ermscale=1.;
   width_pbc=0.;
 
   for(int i=1;i<narg;i++){
     // reading commands
     if(!strcmp(arg[i],"hartree"))
       wpmd->approx=AWPMD::HARTREE;
     else if(!strcmp(arg[i],"dproduct"))
       wpmd->approx=AWPMD::DPRODUCT;
     else if(!strcmp(arg[i],"uhf"))
       wpmd->approx=AWPMD::UHF;
     else if(!strcmp(arg[i],"free"))
       wpmd->constraint=AWPMD::NONE;
     else if(!strcmp(arg[i],"fix")){
       wpmd->constraint=AWPMD::FIX;
       i++;
       if(i>=narg)
         error->all(FLERR,"Setting 'fix' should be followed by a number in awpmd/cut");
       wpmd->w0=force->numeric(FLERR,arg[i]);
     }
     else if(!strcmp(arg[i],"harm")){
       wpmd->constraint=AWPMD::HARM;
       i++;
       if(i>=narg)
         error->all(FLERR,"Setting 'harm' should be followed by a number in awpmd/cut");
       wpmd->w0=force->numeric(FLERR,arg[i]);
       wpmd->set_harm_constr(wpmd->w0);
     }
     else if(!strcmp(arg[i],"pbc")){
       i++;
       if(i>=narg)
         error->all(FLERR,"Setting 'pbc' should be followed by a number in awpmd/cut");
       width_pbc=force->numeric(FLERR,arg[i]);
     }
     else if(!strcmp(arg[i],"relax"))
       wpmd->constraint=AWPMD::RELAX;
     else if(!strcmp(arg[i],"ermscale")){
       i++;
       if(i>=narg)
         error->all(FLERR,"Setting 'ermscale' should be followed by a number in awpmd/cut");
       ermscale=force->numeric(FLERR,arg[i]);
     }
     else if(!strcmp(arg[i],"flex_press"))
       flexible_pressure_flag = 1;
   }
 
 
   // reset cutoffs that have been explicitly set
   /*
   if (allocated) {
     int i,j;
     for (i = 1; i <= atom->ntypes; i++)
       for (j = i+1; j <= atom->ntypes; j++)
         if (setflag[i][j]) cut[i][j] = cut_global;
   }*/
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 // pair settings are as usual
 void PairAWPMDCut::coeff(int narg, char **arg)
 {
   if (narg < 2 || narg > 3) error->all(FLERR,"Incorrect args for pair coefficients");
 
   /*if(domain->xperiodic == 1 || domain->yperiodic == 1 ||
     domain->zperiodic == 1) {*/
   double delx = domain->boxhi[0]-domain->boxlo[0];
   double dely = domain->boxhi[1]-domain->boxlo[1];
   double delz = domain->boxhi[2]-domain->boxlo[2];
   half_box_length = 0.5 * MIN(delx, MIN(dely, delz));
   //}
   if(cut_global<0)
     cut_global=half_box_length;
 
   if (!allocated)
     allocate();
   else{
     int i,j;
     for (i = 1; i <= atom->ntypes; i++)
       for (j = i+1; j <= atom->ntypes; j++)
         if (setflag[i][j]) cut[i][j] = cut_global;
   }
 
   int ilo,ihi,jlo,jhi;
   force->bounds(arg[0],atom->ntypes,ilo,ihi);
   force->bounds(arg[1],atom->ntypes,jlo,jhi);
 
   double cut_one = cut_global;
   if (narg == 3) cut_one = force->numeric(FLERR,arg[2]);
 
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
     for (int j = MAX(jlo,i); j <= jhi; j++) {
       cut[i][j] = cut_one;
       setflag[i][j] = 1;
       count++;
     }
   }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairAWPMDCut::init_style()
 {
   // error and warning checks
 
   if (!atom->q_flag || !atom->spin_flag ||
       !atom->eradius_flag || !atom->erforce_flag )  // TO DO: adjust this to match approximation used
     error->all(FLERR,"Pair awpmd/cut requires atom attributes "
                "q, spin, eradius, erforce");
 
   /*
   if(vflag_atom){ // can't compute virial per atom
     //warning->
     error->all(FLERR,"Pair style awpmd can't compute per atom virials");
   }*/
 
   // add hook to minimizer for eradius and erforce
 
   if (update->whichflag == 2)
     int ignore = update->minimize->request(this,1,0.01);
 
   // make sure to use the appropriate timestep when using real units
 
   /*if (update->whichflag == 1) {
     if (force->qqr2e == 332.06371 && update->dt == 1.0)
       error->all(FLERR,"You must lower the default real units timestep for pEFF ");
   }*/
 
   // need a half neigh list and optionally a granular history neigh list
 
-  //int irequest = neighbor->request(this);
+  //int irequest = neighbor->request(this,instance_me);
 
   //if (atom->tag_enable == 0)
   //  error->all(FLERR,"Pair style reax requires atom IDs");
 
   //if (force->newton_pair == 0)
     //error->all(FLERR,"Pair style awpmd requires newton pair on");
 
   //if (strcmp(update->unit_style,"real") != 0 && comm->me == 0)
     //error->warning(FLERR,"Not using real units with pair reax");
 
-  int irequest = neighbor->request(this);
+  int irequest = neighbor->request(this,instance_me);
   neighbor->requests[irequest]->newton = 2;
 
   if(force->e_mass==0. || force->hhmrr2e==0. || force->mvh2r==0.)
     error->all(FLERR,"Pair style awpmd requires e_mass and conversions hhmrr2e, mvh2r to be properly set for unit system");
 
   wpmd->me=force->e_mass;
   wpmd->h2_me=force->hhmrr2e/force->e_mass;
   wpmd->one_h=force->mvh2r;
   wpmd->coul_pref=force->qqrd2e;
 
   wpmd->calc_ii=1;
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairAWPMDCut::init_one(int i, int j)
 {
   if (setflag[i][j] == 0)
     cut[i][j] = mix_distance(cut[i][i],cut[j][j]);
 
   return cut[i][j];
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairAWPMDCut::write_restart(FILE *fp)
 {
   write_restart_settings(fp);
 
   int i,j;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       fwrite(&setflag[i][j],sizeof(int),1,fp);
       if (setflag[i][j]) fwrite(&cut[i][j],sizeof(double),1,fp);
     }
 }
 
 /* ----------------------------------------------------------------------
    proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairAWPMDCut::read_restart(FILE *fp)
 {
   read_restart_settings(fp);
   allocate();
 
   int i,j;
   int me = comm->me;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
       MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
       if (setflag[i][j]) {
         if (me == 0) fread(&cut[i][j],sizeof(double),1,fp);
         MPI_Bcast(&cut[i][j],1,MPI_DOUBLE,0,world);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairAWPMDCut::write_restart_settings(FILE *fp)
 {
   fwrite(&cut_global,sizeof(double),1,fp);
   fwrite(&offset_flag,sizeof(int),1,fp);
   fwrite(&mix_flag,sizeof(int),1,fp);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairAWPMDCut::read_restart_settings(FILE *fp)
 {
   if (comm->me == 0) {
     fread(&cut_global,sizeof(double),1,fp);
     fread(&offset_flag,sizeof(int),1,fp);
     fread(&mix_flag,sizeof(int),1,fp);
   }
   MPI_Bcast(&cut_global,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&offset_flag,1,MPI_INT,0,world);
   MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
 }
 
 /* ----------------------------------------------------------------------
    returns pointers to the log() of electron radius and corresponding force
    minimizer operates on log(radius) so radius never goes negative
    these arrays are stored locally by pair style
 ------------------------------------------------------------------------- */
 
 void PairAWPMDCut::min_xf_pointers(int ignore, double **xextra, double **fextra)
 {
   // grow arrays if necessary
   // need to be atom->nmax in length
   int nvar=atom->nmax*(3+1+1+2);  // w(1), vel(3),  pw(1), cs(2)
 
   if (nvar > nmax) {
     memory->destroy(min_var);
     memory->destroy(min_varforce);
     nmax = nvar;
     memory->create(min_var,nmax,"pair:min_var");
     memory->create(min_varforce,nmax,"pair:min_varforce");
   }
 
   *xextra = min_var;
   *fextra = min_varforce;
 }
 
 /* ----------------------------------------------------------------------
    minimizer requests the log() of electron radius and corresponding force
    calculate and store in min_eradius and min_erforce
 ------------------------------------------------------------------------- */
 
 void PairAWPMDCut::min_xf_get(int ignore)
 {
   double *eradius = atom->eradius;
   double *erforce = atom->erforce;
   double **v=atom->v;
   double *vforce=atom->vforce;
   double *ervel=atom->ervel;
   double *ervelforce=atom->ervelforce;
   double *cs=atom->cs;
   double *csforce=atom->csforce;
 
   int *spin = atom->spin;
   int nlocal = atom->nlocal;
 
   for (int i = 0; i < nlocal; i++)
     if (spin[i]) {
       min_var[7*i] = log(eradius[i]);
       min_varforce[7*i] = eradius[i]*erforce[i];
       for(int j=0;j<3;j++){
         min_var[7*i+1+3*j] = v[i][j];
         min_varforce[7*i+1+3*j] = vforce[3*i+j];
       }
       min_var[7*i+4] = ervel[i];
       min_varforce[7*i+4] = ervelforce[i];
       min_var[7*i+5] = cs[2*i];
       min_varforce[7*i+5] = csforce[2*i];
       min_var[7*i+6] = cs[2*i+1];
       min_varforce[7*i+6] = csforce[2*i+1];
 
     } else {
       for(int j=0;j<7;j++)
         min_var[7*i+j] = min_varforce[7*i+j] = 0.0;
     }
 }
 
 /* ----------------------------------------------------------------------
    propagate the minimizer values to the atom values
 ------------------------------------------------------------------------- */
 
 void PairAWPMDCut::min_x_set(int ignore)
 {
   double *eradius = atom->eradius;
   double **v=atom->v;
   double *ervel=atom->ervel;
   double *cs=atom->cs;
 
   int *spin = atom->spin;
   int nlocal = atom->nlocal;
 
   for (int i = 0; i < nlocal; i++) {
     if (spin[i]){
       eradius[i]=exp(min_var[7*i]);
       for(int j=0;j<3;j++)
         v[i][j]=min_var[7*i+1+3*j];
       ervel[i]=min_var[7*i+4];
       cs[2*i]=min_var[7*i+5];
       cs[2*i+1]=min_var[7*i+6];
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    memory usage of local atom-based arrays
 ------------------------------------------------------------------------- */
 
 double PairAWPMDCut::memory_usage()
 {
   double bytes = maxeatom * sizeof(double);
   bytes += maxvatom*6 * sizeof(double);
   bytes += 2 * nmax * sizeof(double);
   return bytes;
 }
diff --git a/src/USER-CG-CMM/pair_lj_sdk_coul_long.cpp b/src/USER-CG-CMM/pair_lj_sdk_coul_long.cpp
index fd1cae009..591a81fe5 100644
--- a/src/USER-CG-CMM/pair_lj_sdk_coul_long.cpp
+++ b/src/USER-CG-CMM/pair_lj_sdk_coul_long.cpp
@@ -1,658 +1,658 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Axel Kohlmeyer (Temple U)
    This style is a simplified re-implementation of the CG/CMM pair style
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_lj_sdk_coul_long.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "kspace.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "update.h"
 #include "integrate.h"
 #include "math_const.h"
 #include "memory.h"
 #include "error.h"
 
 #include "lj_sdk_common.h"
 
 using namespace LAMMPS_NS;
 using namespace MathConst;
 using namespace LJSDKParms;
 
 #define EWALD_F   1.12837917
 #define EWALD_P   0.3275911
 #define A1        0.254829592
 #define A2       -0.284496736
 #define A3        1.421413741
 #define A4       -1.453152027
 #define A5        1.061405429
 
 /* ---------------------------------------------------------------------- */
 
 PairLJSDKCoulLong::PairLJSDKCoulLong(LAMMPS *lmp) : Pair(lmp)
 {
   ewaldflag = pppmflag = 1;
   respa_enable = 0;
   writedata = 1;
   ftable = NULL;
 }
 
 /* ---------------------------------------------------------------------- */
 
 PairLJSDKCoulLong::~PairLJSDKCoulLong()
 {
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(lj_type);
     memory->destroy(cutsq);
 
     memory->destroy(cut_lj);
     memory->destroy(cut_ljsq);
     memory->destroy(epsilon);
     memory->destroy(sigma);
     memory->destroy(lj1);
     memory->destroy(lj2);
     memory->destroy(lj3);
     memory->destroy(lj4);
     memory->destroy(offset);
 
     memory->destroy(rminsq);
     memory->destroy(emin);
 
     allocated = 0;
   }
   if (ftable) free_tables();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJSDKCoulLong::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
   } else evflag = vflag_fdotr = 0;
 
   if (evflag) {
     if (eflag) {
       if (force->newton_pair) eval<1,1,1>();
       else eval<1,1,0>();
     } else {
       if (force->newton_pair) eval<1,0,1>();
       else eval<1,0,0>();
     }
   } else {
     if (force->newton_pair) eval<0,0,1>();
     else eval<0,0,0>();
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ---------------------------------------------------------------------- */
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
 void PairLJSDKCoulLong::eval()
 {
   int i,ii,j,jj,jtype,itable;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
   double fraction,table;
   double r,rsq,r2inv,forcecoul,forcelj,factor_coul,factor_lj;
   double grij,expm2,prefactor,t,erfc;
 
   const double * const * const x = atom->x;
   double * const * const f = atom->f;
   const double * const q = atom->q;
   const int * const type = atom->type;
   const int nlocal = atom->nlocal;
   const double * const special_coul = force->special_coul;
   const double * const special_lj = force->special_lj;
   const double qqrd2e = force->qqrd2e;
   double fxtmp,fytmp,fztmp;
 
   const int inum = list->inum;
   const int * const ilist = list->ilist;
   const int * const numneigh = list->numneigh;
   const int * const * const firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
 
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     fxtmp=fytmp=fztmp=0.0;
 
     const int itype = type[i];
     const int * const jlist = firstneigh[i];
     const int jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       forcecoul = forcelj = evdwl = ecoul = 0.0;
 
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r2inv = 1.0/rsq;
         const int ljt = lj_type[itype][jtype];
 
         if (rsq < cut_coulsq) {
           if (!ncoultablebits || rsq <= tabinnersq) {
             r = sqrt(rsq);
             grij = g_ewald * r;
             expm2 = exp(-grij*grij);
             t = 1.0 / (1.0 + EWALD_P*grij);
             erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
             prefactor = qqrd2e * qtmp*q[j]/r;
             forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
             if (EFLAG) ecoul = prefactor*erfc;
             if (factor_coul < 1.0) {
               forcecoul -= (1.0-factor_coul)*prefactor;
               if (EFLAG) ecoul -= (1.0-factor_coul)*prefactor;
             }
           } else {
             union_int_float_t rsq_lookup;
             rsq_lookup.f = rsq;
             itable = rsq_lookup.i & ncoulmask;
             itable >>= ncoulshiftbits;
             fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable];
             table = ftable[itable] + fraction*dftable[itable];
             forcecoul = qtmp*q[j] * table;
             if (EFLAG) ecoul = qtmp*q[j] * 
                          (etable[itable] + fraction*detable[itable]);
             if (factor_coul < 1.0) {
               table = ctable[itable] + fraction*dctable[itable];
               prefactor = qtmp*q[j] * table;
               forcecoul -= (1.0-factor_coul)*prefactor;
               if (EFLAG) ecoul -= (1.0-factor_coul)*prefactor;
             }
           }
         }
 
         if (rsq < cut_ljsq[itype][jtype]) {
 
           if (ljt == LJ12_4) {
             const double r4inv=r2inv*r2inv;
             forcelj = r4inv*(lj1[itype][jtype]*r4inv*r4inv
                              - lj2[itype][jtype]);
 
             if (EFLAG)
               evdwl = r4inv*(lj3[itype][jtype]*r4inv*r4inv
                              - lj4[itype][jtype]) - offset[itype][jtype];
 
           } else if (ljt == LJ9_6) {
             const double r3inv = r2inv*sqrt(r2inv);
             const double r6inv = r3inv*r3inv;
             forcelj = r6inv*(lj1[itype][jtype]*r3inv
                              - lj2[itype][jtype]);
             if (EFLAG)
               evdwl = r6inv*(lj3[itype][jtype]*r3inv
                              - lj4[itype][jtype]) - offset[itype][jtype];
 
           } else if (ljt == LJ12_6) {
             const double r6inv = r2inv*r2inv*r2inv;
             forcelj = r6inv*(lj1[itype][jtype]*r6inv
                              - lj2[itype][jtype]);
             if (EFLAG)
               evdwl = r6inv*(lj3[itype][jtype]*r6inv
                              - lj4[itype][jtype]) - offset[itype][jtype];
           }
           forcelj *= factor_lj;
           if (EFLAG) evdwl *= factor_lj;
         }
 
         fpair = (forcecoul + forcelj) * r2inv;
 
         fxtmp += delx*fpair;
         fytmp += dely*fpair;
         fztmp += delz*fpair;
         if (NEWTON_PAIR || j < nlocal) {
           f[j][0] -= delx*fpair;
           f[j][1] -= dely*fpair;
           f[j][2] -= delz*fpair;
         }
 
         if (EVFLAG) ev_tally(i,j,nlocal,NEWTON_PAIR,
                              evdwl,ecoul,fpair,delx,dely,delz);
       }
     }
     f[i][0] += fxtmp;
     f[i][1] += fytmp;
     f[i][2] += fztmp;
   }
 }
 
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairLJSDKCoulLong::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   memory->create(lj_type,n+1,n+1,"pair:lj_type");
   for (int i = 1; i <= n; i++) {
     for (int j = i; j <= n; j++) {
       setflag[i][j] = 0;
       lj_type[i][j] = LJ_NOT_SET;
     }
   }
 
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
   memory->create(cut_lj,n+1,n+1,"pair:cut_lj");
   memory->create(cut_ljsq,n+1,n+1,"pair:cut_ljsq");
   memory->create(epsilon,n+1,n+1,"pair:epsilon");
   memory->create(sigma,n+1,n+1,"pair:sigma");
 
   memory->create(lj1,n+1,n+1,"pair:lj1");
   memory->create(lj2,n+1,n+1,"pair:lj2");
   memory->create(lj3,n+1,n+1,"pair:lj3");
   memory->create(lj4,n+1,n+1,"pair:lj4");
 
   memory->create(offset,n+1,n+1,"pair:offset");
 
   memory->create(rminsq,n+1,n+1,"pair:rminsq");
   memory->create(emin,n+1,n+1,"pair:emin");
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairLJSDKCoulLong::settings(int narg, char **arg)
 {
  if (narg < 1 || narg > 2) error->all(FLERR,"Illegal pair_style command");
 
   cut_lj_global = force->numeric(FLERR,arg[0]);
   if (narg == 1) cut_coul = cut_lj_global;
   else cut_coul = force->numeric(FLERR,arg[1]);
 
   // reset cutoffs that have been explicitly set
 
   if (allocated) {
     int i,j;
     for (i = 1; i <= atom->ntypes; i++)
       for (j = i+1; j <= atom->ntypes; j++)
         if (setflag[i][j]) cut_lj[i][j] = cut_lj_global;
   }
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairLJSDKCoulLong::coeff(int narg, char **arg)
 {
   if (narg < 5 || narg > 6) 
     error->all(FLERR,"Incorrect args for pair coefficients");
   if (!allocated) allocate();
 
   int ilo,ihi,jlo,jhi;
   force->bounds(arg[0],atom->ntypes,ilo,ihi);
   force->bounds(arg[1],atom->ntypes,jlo,jhi);
 
   int lj_type_one = find_lj_type(arg[2],lj_type_list);
   if (lj_type_one == LJ_NOT_SET)
     error->all(FLERR,"Cannot parse LJ type flag.");
 
   double epsilon_one = force->numeric(FLERR,arg[3]);
   double sigma_one = force->numeric(FLERR,arg[4]);
 
   double cut_lj_one = cut_lj_global;
   if (narg == 6) cut_lj_one = force->numeric(FLERR,arg[5]);
 
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
     for (int j = MAX(jlo,i); j <= jhi; j++) {
       lj_type[i][j] = lj_type_one;
       epsilon[i][j] = epsilon_one;
       sigma[i][j] = sigma_one;
       cut_lj[i][j] = cut_lj_one;
       setflag[i][j] = 1;
       count++;
     }
   }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairLJSDKCoulLong::init_style()
 {
   if (!atom->q_flag)
     error->all(FLERR,"Pair style lj/cut/coul/long requires atom attribute q");
 
-  neighbor->request(this);
+  neighbor->request(this,instance_me);
 
   cut_coulsq = cut_coul * cut_coul;
 
   // insure use of KSpace long-range solver, set g_ewald
 
   if (force->kspace == NULL)
     error->all(FLERR,"Pair style requires a KSpace style");
   g_ewald = force->kspace->g_ewald;
 
   // setup force tables (no rRESPA support yet)
 
   if (ncoultablebits) init_tables(cut_coul,NULL);
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairLJSDKCoulLong::init_one(int i, int j)
 {
   if (setflag[i][j] == 0)
     error->all(FLERR,"No mixing support for lj/sdk/coul/long. "
                "Coefficients for all pairs need to be set explicitly.");
 
   const int ljt = lj_type[i][j];
 
   if (ljt == LJ_NOT_SET)
     error->all(FLERR,"unrecognized LJ parameter flag");
 
   double cut = MAX(cut_lj[i][j],cut_coul);
   cut_ljsq[i][j] = cut_lj[i][j] * cut_lj[i][j];
 
   lj1[i][j] = lj_prefact[ljt] * lj_pow1[ljt] * epsilon[i][j] * 
     pow(sigma[i][j],lj_pow1[ljt]);
   lj2[i][j] = lj_prefact[ljt] * lj_pow2[ljt] * epsilon[i][j] * 
     pow(sigma[i][j],lj_pow2[ljt]);
   lj3[i][j] = lj_prefact[ljt] * epsilon[i][j] * pow(sigma[i][j],lj_pow1[ljt]);
   lj4[i][j] = lj_prefact[ljt] * epsilon[i][j] * pow(sigma[i][j],lj_pow2[ljt]);
 
   if (offset_flag) {
     double ratio = sigma[i][j] / cut_lj[i][j];
     offset[i][j] = lj_prefact[ljt] * epsilon[i][j] * 
       (pow(ratio,lj_pow1[ljt]) - pow(ratio,lj_pow2[ljt]));
   } else offset[i][j] = 0.0;
 
   cut_ljsq[j][i] = cut_ljsq[i][j];
   cut_lj[j][i] = cut_lj[i][j];
   lj1[j][i] = lj1[i][j];
   lj2[j][i] = lj2[i][j];
   lj3[j][i] = lj3[i][j];
   lj4[j][i] = lj4[i][j];
   offset[j][i] = offset[i][j];
   lj_type[j][i] = lj_type[i][j];
 
   // compute LJ derived parameters for SDK angle potential (LJ only!)
 
   const double eps = epsilon[i][j];
   const double sig = sigma[i][j];
   const double rmin = sig*exp(1.0/(lj_pow1[ljt]-lj_pow2[ljt])
                               *log(lj_pow1[ljt]/lj_pow2[ljt]) );
   rminsq[j][i] = rminsq[i][j] = rmin*rmin;
 
   const double ratio = sig/rmin;
   const double emin_one = lj_prefact[ljt] * eps * (pow(ratio,lj_pow1[ljt])
                                                    - pow(ratio,lj_pow2[ljt]));
   emin[j][i] = emin[i][j] = emin_one;
 
   // compute I,J contribution to long-range tail correction
   // count total # of atoms of type I and J via Allreduce
 
   if (tail_flag)
     error->all(FLERR,"Tail flag not supported by lj/sdk/coul/long pair style");
 
   return cut;
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairLJSDKCoulLong::write_restart(FILE *fp)
 {
   write_restart_settings(fp);
 
   int i,j;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       fwrite(&setflag[i][j],sizeof(int),1,fp);
       if (setflag[i][j]) {
         fwrite(&lj_type[i][j],sizeof(int),1,fp);
         fwrite(&epsilon[i][j],sizeof(double),1,fp);
         fwrite(&sigma[i][j],sizeof(double),1,fp);
         fwrite(&cut_lj[i][j],sizeof(double),1,fp);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairLJSDKCoulLong::read_restart(FILE *fp)
 {
   read_restart_settings(fp);
   allocate();
 
   int i,j;
   int me = comm->me;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
       MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
       if (setflag[i][j]) {
         if (me == 0) {
           fread(&lj_type[i][j],sizeof(int),1,fp);
           fread(&epsilon[i][j],sizeof(double),1,fp);
           fread(&sigma[i][j],sizeof(double),1,fp);
           fread(&cut_lj[i][j],sizeof(double),1,fp);
         }
         MPI_Bcast(&lj_type[i][j],1,MPI_INT,0,world);
         MPI_Bcast(&epsilon[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&sigma[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&cut_lj[i][j],1,MPI_DOUBLE,0,world);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairLJSDKCoulLong::write_restart_settings(FILE *fp)
 {
   fwrite(&cut_lj_global,sizeof(double),1,fp);
   fwrite(&cut_coul,sizeof(double),1,fp);
   fwrite(&offset_flag,sizeof(int),1,fp);
   fwrite(&mix_flag,sizeof(int),1,fp);
   fwrite(&tail_flag,sizeof(int),1,fp);
   fwrite(&ncoultablebits,sizeof(int),1,fp);
   fwrite(&tabinner,sizeof(double),1,fp);
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairLJSDKCoulLong::read_restart_settings(FILE *fp)
 {
   if (comm->me == 0) {
     fread(&cut_lj_global,sizeof(double),1,fp);
     fread(&cut_coul,sizeof(double),1,fp);
     fread(&offset_flag,sizeof(int),1,fp);
     fread(&mix_flag,sizeof(int),1,fp);
     fread(&tail_flag,sizeof(int),1,fp);
     fread(&ncoultablebits,sizeof(int),1,fp);
     fread(&tabinner,sizeof(double),1,fp);
   }
   MPI_Bcast(&cut_lj_global,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&cut_coul,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&offset_flag,1,MPI_INT,0,world);
   MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
   MPI_Bcast(&tail_flag,1,MPI_INT,0,world);
   MPI_Bcast(&ncoultablebits,1,MPI_INT,0,world);
   MPI_Bcast(&tabinner,1,MPI_DOUBLE,0,world);
 }
 
 /* ----------------------------------------------------------------------
    lj/sdk does not support per atom type output with mixing
 ------------------------------------------------------------------------- */
 
 void PairLJSDKCoulLong::write_data(FILE *)
 {
   error->one(FLERR, "Pair style lj/sdk/coul/* requires using "
              "write_data with the 'pair ij' option");
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes all pairs to data file
 ------------------------------------------------------------------------- */
 
 void PairLJSDKCoulLong::write_data_all(FILE *fp)
 {
   for (int i = 1; i <= atom->ntypes; i++)
     for (int j = i; j <= atom->ntypes; j++)
       fprintf(fp,"%d %d %s %g %g %g\n",i,j,lj_type_list[lj_type[i][j]],
               epsilon[i][j],sigma[i][j],cut_lj[i][j]);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairLJSDKCoulLong::single(int i, int j, int itype, int jtype,
                                  double rsq,
                                  double factor_coul, double factor_lj,
                                  double &fforce)
 {
   double r2inv,r,grij,expm2,t,erfc,prefactor;
   double fraction,table,forcecoul,forcelj,phicoul,philj;
   int itable;
 
   forcecoul = forcelj = phicoul = philj = 0.0;
 
   r2inv = 1.0/rsq;
   if (rsq < cut_coulsq) {
     if (!ncoultablebits || rsq <= tabinnersq) {
       r = sqrt(rsq);
       grij = g_ewald * r;
       expm2 = exp(-grij*grij);
       t = 1.0 / (1.0 + EWALD_P*grij);
       erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
       prefactor = force->qqrd2e * atom->q[i]*atom->q[j]/r;
       forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
       phicoul = prefactor*erfc;
       if (factor_coul < 1.0) {
         forcecoul -= (1.0-factor_coul)*prefactor;
         phicoul -= (1.0-factor_coul)*prefactor;
       }
     } else {
       union_int_float_t rsq_lookup_single;
       rsq_lookup_single.f = rsq;
       itable = rsq_lookup_single.i & ncoulmask;
       itable >>= ncoulshiftbits;
       fraction = (rsq_lookup_single.f - rtable[itable]) * drtable[itable];
       table = ftable[itable] + fraction*dftable[itable];
       forcecoul = atom->q[i]*atom->q[j] * table;
       table = etable[itable] + fraction*detable[itable];
       phicoul = atom->q[i]*atom->q[j] * table;
       if (factor_coul < 1.0) {
         table = ctable[itable] + fraction*dctable[itable];
         prefactor = atom->q[i]*atom->q[j] * table;
         forcecoul -= (1.0-factor_coul)*prefactor;
         phicoul -= (1.0-factor_coul)*prefactor;
       }
     }
   }
 
   if (rsq < cut_ljsq[itype][jtype]) {
     const int ljt = lj_type[itype][jtype];
     const double ljpow1 = lj_pow1[ljt];
     const double ljpow2 = lj_pow2[ljt];
     const double ljpref = lj_prefact[ljt];
 
     const double ratio = sigma[itype][jtype]/sqrt(rsq);
     const double eps = epsilon[itype][jtype];
 
     forcelj = factor_lj * ljpref*eps * (ljpow1*pow(ratio,ljpow1)
                           - ljpow2*pow(ratio,ljpow2))/rsq;
     philj = factor_lj * (ljpref*eps * (pow(ratio,ljpow1) - pow(ratio,ljpow2))
                          - offset[itype][jtype]);
   }
 
   fforce = (forcecoul + forcelj) * r2inv;
 
   return phicoul + philj;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void *PairLJSDKCoulLong::extract(const char *str, int &dim)
 {
   dim = 2;
   if (strcmp(str,"epsilon") == 0) return (void *) epsilon;
   if (strcmp(str,"sigma") == 0) return (void *) sigma;
   if (strcmp(str,"lj_type") == 0) return (void *) lj_type;
   if (strcmp(str,"lj1") == 0) return (void *) lj1;
   if (strcmp(str,"lj2") == 0) return (void *) lj2;
   if (strcmp(str,"lj3") == 0) return (void *) lj3;
   if (strcmp(str,"lj4") == 0) return (void *) lj4;
   if (strcmp(str,"rminsq") == 0) return (void *) rminsq;
   if (strcmp(str,"emin") == 0) return (void *) emin;
 
   dim = 0;
   if (strcmp(str,"cut_coul") == 0) return (void *) &cut_coul;
   return NULL;
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairLJSDKCoulLong::memory_usage()
 {
   double bytes = Pair::memory_usage();
   int n = atom->ntypes;
 
   // setflag/lj_type
   bytes += 2 * (n+1)*(n+1)*sizeof(int);
   // lj_cut/lj_cutsq/epsilon/sigma/offset/lj1/lj2/lj3/lj4/rminsq/emin
   bytes += 11 * (n+1)*(n+1)*sizeof(double);
 
   if (ncoultablebits) {
     int ntable = 1<<ncoultablebits;
     bytes += 8 * ntable*sizeof(double);
   }
 
   return bytes;
 }
diff --git a/src/USER-CUDA/pair_born_coul_long_cuda.cpp b/src/USER-CUDA/pair_born_coul_long_cuda.cpp
index ad0b4caac..d01a5bf47 100644
--- a/src/USER-CUDA/pair_born_coul_long_cuda.cpp
+++ b/src/USER-CUDA/pair_born_coul_long_cuda.cpp
@@ -1,183 +1,183 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
 
    Original Version:
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    See the README file in the top-level LAMMPS directory.
 
    Contributing author: Paul Crozier (SNL)
    -----------------------------------------------------------------------
 
    USER-CUDA Package and associated modifications:
    https://sourceforge.net/projects/lammpscuda/
 
    Christian Trott, christian.trott@tu-ilmenau.de
    Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
    Theoretical Physics II, University of Technology Ilmenau, Germany
 
    See the README file in the USER-CUDA directory.
 
    This software is distributed under the GNU General Public License.
 ------------------------------------------------------------------------- */
 
 #include <cmath>
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
 #include "pair_born_coul_long_cuda.h"
 #include "pair_born_coul_long_cuda_cu.h"
 #include "cuda_data.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "kspace.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "cuda_neigh_list.h"
 #include "update.h"
 #include "integrate.h"
 #include "respa.h"
 #include "memory.h"
 #include "error.h"
 #include "user_cuda.h"
 
 using namespace LAMMPS_NS;
 
 #define EWALD_F   1.12837917
 #define EWALD_P   0.3275911
 #define A1        0.254829592
 #define A2       -0.284496736
 #define A3        1.421413741
 #define A4       -1.453152027
 #define A5        1.061405429
 /* ---------------------------------------------------------------------- */
 
 PairBornCoulLongCuda::PairBornCoulLongCuda(LAMMPS *lmp) : PairBornCoulLong(lmp)
 {
   cuda = lmp->cuda;
    if(cuda == NULL)
         error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS..");
 
         allocated2 = false;
         cuda->shared_data.pair.cudable_force = 1;
         cuda->shared_data.pair.use_block_per_atom = 0;
         cuda->setSystemParams();
 }
 
 /* ----------------------------------------------------------------------
    remember pointer to arrays in cuda shared data
 ------------------------------------------------------------------------- */
 
 void PairBornCoulLongCuda::allocate()
 {
         if(! allocated) PairBornCoulLong::allocate();
         if(! allocated2)
         {
                 allocated2 = true;
                 cuda->shared_data.pair.cut     = cut_lj;
                 cuda->shared_data.pair.coeff1  = rhoinv;
                 cuda->shared_data.pair.coeff2  = sigma;
                 cuda->shared_data.pair.coeff3  = a;
                 cuda->shared_data.pair.coeff4  = c;
                 cuda->shared_data.pair.coeff5  = d;
                 cuda->shared_data.pair.offset  = offset;
                 cuda->shared_data.pair.special_lj  = force->special_lj;
                 cuda->shared_data.pair.special_coul  = force->special_coul;
         }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBornCoulLongCuda::compute(int eflag, int vflag)
 {
         MYDBG( printf("PairBornCoulLongCuda compute start\n"); fflush(stdout);)
         if (eflag || vflag) ev_setup(eflag,vflag);
         if(eflag) cuda->cu_eng_vdwl->upload();
         if(eflag) cuda->cu_eng_coul->upload();
         if(vflag) cuda->cu_virial->upload();
         #ifdef CUDA_USE_BINNING
         Cuda_PairBornCoulLongCuda(& cuda->shared_data, eflag, vflag);
         #else
         Cuda_PairBornCoulLongCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom);
         #endif
     if(not cuda->shared_data.pair.collect_forces_later)
     {
           if(eflag) cuda->cu_eng_vdwl->download();
           if(eflag) cuda->cu_eng_coul->download();
           if(vflag) cuda->cu_virial->download();
     }
         MYDBG( printf("PairBornCoulLongCuda compute end\n"); fflush(stdout);)
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBornCoulLongCuda::settings(int narg, char **arg)
 {
         PairBornCoulLong::settings(narg, arg);
         cuda->shared_data.pair.cut_global = (F_CFLOAT) cut_lj_global;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBornCoulLongCuda::coeff(int narg, char **arg)
 {
         PairBornCoulLong::coeff(narg, arg);
         allocate();
 }
 
 void PairBornCoulLongCuda::init_style()
 {
   if (!atom->q_flag)
     error->all(FLERR,"Pair style born/coul/long requires atom attribute q");
   // request regular or rRESPA neighbor lists
 
   int irequest;
 
   if (strstr(update->integrate_style,"respa")) error->all(FLERR,"Integrate Style Respa is not supported by pair style buck/coul/long/cuda");
 
-          irequest = neighbor->request(this);
+  irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->full = 1;
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->cudable = 1;
 
 
   cut_coulsq = cut_coul * cut_coul;
   cuda->shared_data.pair.cut_coulsq_global=cut_coulsq;
 
   if (force->kspace == NULL)
     error->all(FLERR,"Pair style is incompatible with KSpace style");
   g_ewald = force->kspace->g_ewald;
   cuda->shared_data.pair.g_ewald=g_ewald;
   cuda->shared_data.pppm.qqrd2e=force->qqrd2e;
 
 
   if(ncoultablebits) error->warning(FLERR,"# CUDA: You asked for the usage of Coulomb Tables. This is not supported in CUDA Pair forces. Setting is ignored.\n");
 }
 
 void PairBornCoulLongCuda::init_list(int id, NeighList *ptr)
 {
         MYDBG(printf("# CUDA PairBornCoulLongCuda::init_list\n");)
         PairBornCoulLong::init_list(id, ptr);
         #ifndef CUDA_USE_BINNING
         // right now we can only handle verlet (id 0), not respa
         if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
         // see Neighbor::init() for details on lammps lists' logic
         #endif
         MYDBG(printf("# CUDA PairBornCoulLongCuda::init_list end\n");)
 }
 
 void PairBornCoulLongCuda::ev_setup(int eflag, int vflag)
 {
         int maxeatomold=maxeatom;
         PairBornCoulLong::ev_setup(eflag,vflag);
 
   if (eflag_atom && atom->nmax > maxeatomold)
         {delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_CFLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax  );}
 
   if (vflag_atom && atom->nmax > maxeatomold)
         {delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_CFLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.vatom , atom->nmax, 6  );}
 
 }
diff --git a/src/USER-CUDA/pair_buck_coul_cut_cuda.cpp b/src/USER-CUDA/pair_buck_coul_cut_cuda.cpp
index 5259b0c50..4291b8275 100644
--- a/src/USER-CUDA/pair_buck_coul_cut_cuda.cpp
+++ b/src/USER-CUDA/pair_buck_coul_cut_cuda.cpp
@@ -1,170 +1,170 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
 
    Original Version:
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    See the README file in the top-level LAMMPS directory.
 
    Contributing author: Paul Crozier (SNL)
    -----------------------------------------------------------------------
 
    USER-CUDA Package and associated modifications:
    https://sourceforge.net/projects/lammpscuda/
 
    Christian Trott, christian.trott@tu-ilmenau.de
    Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
    Theoretical Physics II, University of Technology Ilmenau, Germany
 
    See the README file in the USER-CUDA directory.
 
    This software is distributed under the GNU General Public License.
 ------------------------------------------------------------------------- */
 
 #include <cmath>
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
 #include "pair_buck_coul_cut_cuda.h"
 #include "pair_buck_coul_cut_cuda_cu.h"
 #include "cuda_data.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "kspace.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "cuda_neigh_list.h"
 #include "update.h"
 #include "integrate.h"
 #include "respa.h"
 #include "memory.h"
 #include "error.h"
 #include "user_cuda.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
 PairBuckCoulCutCuda::PairBuckCoulCutCuda(LAMMPS *lmp) : PairBuckCoulCut(lmp)
 {
   cuda = lmp->cuda;
    if(cuda == NULL)
         error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS..");
 
         allocated2 = false;
         cuda->shared_data.pair.cudable_force = 1;
         cuda->shared_data.pair.use_block_per_atom = 0;
         cuda->setSystemParams();
 }
 
 /* ----------------------------------------------------------------------
    remember pointer to arrays in cuda shared data
 ------------------------------------------------------------------------- */
 
 void PairBuckCoulCutCuda::allocate()
 {
         if(! allocated) PairBuckCoulCut::allocate();
         if(! allocated2)
         {
                 allocated2 = true;
                 cuda->shared_data.pair.cut_coul     = cut_coul;
                 cuda->shared_data.pair.cut     = cut_lj;
                 cuda->shared_data.pair.coeff1  = rhoinv;
                 cuda->shared_data.pair.coeff2  = buck1;
                 cuda->shared_data.pair.coeff3  = buck2;
                 cuda->shared_data.pair.coeff4  = a;
                 cuda->shared_data.pair.coeff5  = c;
                 cuda->shared_data.pair.offset  = offset;
                 cuda->shared_data.pair.special_lj  = force->special_lj;
                 cuda->shared_data.pair.special_coul  = force->special_coul;
         }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBuckCoulCutCuda::compute(int eflag, int vflag)
 {
         MYDBG( printf("PairBuckCoulCutCuda compute start\n"); fflush(stdout);)
         if (eflag || vflag) ev_setup(eflag,vflag);
         if(eflag) cuda->cu_eng_vdwl->upload();
         if(eflag) cuda->cu_eng_coul->upload();
         if(vflag) cuda->cu_virial->upload();
 
         Cuda_PairBuckCoulCutCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom);
 
     if(not cuda->shared_data.pair.collect_forces_later)
     {
           if(eflag) cuda->cu_eng_vdwl->download();
           if(eflag) cuda->cu_eng_coul->download();
           if(vflag) cuda->cu_virial->download();
     }
         MYDBG( printf("PairBuckCoulCutCuda compute end\n"); fflush(stdout);)
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBuckCoulCutCuda::settings(int narg, char **arg)
 {
         PairBuckCoulCut::settings(narg, arg);
         cuda->shared_data.pair.cut_coul_global = (F_CFLOAT) cut_coul_global;
         cuda->shared_data.pair.cut_global = (F_CFLOAT) cut_lj_global;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBuckCoulCutCuda::coeff(int narg, char **arg)
 {
         PairBuckCoulCut::coeff(narg, arg);
         allocate();
 }
 
 void PairBuckCoulCutCuda::init_style()
 {
   if (!atom->q_flag)
     error->all(FLERR,"Pair style buck/coul/long requires atom attribute q");
   // request regular or rRESPA neighbor lists
 
   int irequest;
 
   if (strstr(update->integrate_style,"respa")) error->all(FLERR,"Integrate Style Respa is not supported by pair style buck/coul/long/cuda");
 
-          irequest = neighbor->request(this);
+  irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->full = 1;
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->cudable = 1;
 
 
   cuda->shared_data.pppm.qqrd2e=force->qqrd2e;
 
    cuda->shared_data.pair.cut_coulsq_global=cut_coul_global * cut_coul_global;
 
   if(ncoultablebits) error->warning(FLERR,"# CUDA: You asked for the usage of Coulomb Tables. This is not supported in CUDA Pair forces. Setting is ignored.\n");
 }
 
 void PairBuckCoulCutCuda::init_list(int id, NeighList *ptr)
 {
         MYDBG(printf("# CUDA PairBuckCoulCutCuda::init_list\n");)
         PairBuckCoulCut::init_list(id, ptr);
         #ifndef CUDA_USE_BINNING
         // right now we can only handle verlet (id 0), not respa
         if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
         // see Neighbor::init() for details on lammps lists' logic
         #endif
         MYDBG(printf("# CUDA PairBuckCoulCutCuda::init_list end\n");)
 }
 
 void PairBuckCoulCutCuda::ev_setup(int eflag, int vflag)
 {
         int maxeatomold=maxeatom;
         PairBuckCoulCut::ev_setup(eflag,vflag);
 
   if (eflag_atom && atom->nmax > maxeatomold)
         {delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_CFLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax  );}
 
   if (vflag_atom && atom->nmax > maxeatomold)
         {delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_CFLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.vatom , atom->nmax, 6  );}
 
 }
diff --git a/src/USER-CUDA/pair_buck_coul_long_cuda.cpp b/src/USER-CUDA/pair_buck_coul_long_cuda.cpp
index ce9ed568f..8c8d66716 100644
--- a/src/USER-CUDA/pair_buck_coul_long_cuda.cpp
+++ b/src/USER-CUDA/pair_buck_coul_long_cuda.cpp
@@ -1,181 +1,181 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
 
    Original Version:
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    See the README file in the top-level LAMMPS directory.
 
    Contributing author: Paul Crozier (SNL)
    -----------------------------------------------------------------------
 
    USER-CUDA Package and associated modifications:
    https://sourceforge.net/projects/lammpscuda/
 
    Christian Trott, christian.trott@tu-ilmenau.de
    Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
    Theoretical Physics II, University of Technology Ilmenau, Germany
 
    See the README file in the USER-CUDA directory.
 
    This software is distributed under the GNU General Public License.
 ------------------------------------------------------------------------- */
 
 #include <cmath>
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
 #include "pair_buck_coul_long_cuda.h"
 #include "pair_buck_coul_long_cuda_cu.h"
 #include "cuda_data.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "kspace.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "cuda_neigh_list.h"
 #include "update.h"
 #include "integrate.h"
 #include "respa.h"
 #include "memory.h"
 #include "error.h"
 #include "user_cuda.h"
 
 using namespace LAMMPS_NS;
 
 #define EWALD_F   1.12837917
 #define EWALD_P   0.3275911
 #define A1        0.254829592
 #define A2       -0.284496736
 #define A3        1.421413741
 #define A4       -1.453152027
 #define A5        1.061405429
 /* ---------------------------------------------------------------------- */
 
 PairBuckCoulLongCuda::PairBuckCoulLongCuda(LAMMPS *lmp) : PairBuckCoulLong(lmp)
 {
   cuda = lmp->cuda;
    if(cuda == NULL)
         error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS..");
 
         allocated2 = false;
         cuda->shared_data.pair.cudable_force = 1;
         cuda->shared_data.pair.use_block_per_atom = 0;
         cuda->setSystemParams();
 }
 
 /* ----------------------------------------------------------------------
    remember pointer to arrays in cuda shared data
 ------------------------------------------------------------------------- */
 
 void PairBuckCoulLongCuda::allocate()
 {
         if(! allocated) PairBuckCoulLong::allocate();
         if(! allocated2)
         {
                 allocated2 = true;
                 cuda->shared_data.pair.cut     = cut_lj;
                 cuda->shared_data.pair.coeff1  = rhoinv;
                 cuda->shared_data.pair.coeff2  = buck1;
                 cuda->shared_data.pair.coeff3  = buck2;
                 cuda->shared_data.pair.coeff4  = a;
                 cuda->shared_data.pair.coeff5  = c;
                 cuda->shared_data.pair.offset  = offset;
                 cuda->shared_data.pair.special_lj  = force->special_lj;
                 cuda->shared_data.pair.special_coul  = force->special_coul;
         }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBuckCoulLongCuda::compute(int eflag, int vflag)
 {
         MYDBG( printf("PairBuckCoulLongCuda compute start\n"); fflush(stdout);)
         if (eflag || vflag) ev_setup(eflag,vflag);
         if(eflag) cuda->cu_eng_vdwl->upload();
         if(eflag) cuda->cu_eng_coul->upload();
         if(vflag) cuda->cu_virial->upload();
 
         Cuda_PairBuckCoulLongCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom);
 
     if(not cuda->shared_data.pair.collect_forces_later)
     {
           if(eflag) cuda->cu_eng_vdwl->download();
           if(eflag) cuda->cu_eng_coul->download();
           if(vflag) cuda->cu_virial->download();
     }
         MYDBG( printf("PairBuckCoulLongCuda compute end\n"); fflush(stdout);)
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBuckCoulLongCuda::settings(int narg, char **arg)
 {
         PairBuckCoulLong::settings(narg, arg);
         cuda->shared_data.pair.cut_global = (F_CFLOAT) cut_lj_global;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBuckCoulLongCuda::coeff(int narg, char **arg)
 {
         PairBuckCoulLong::coeff(narg, arg);
         allocate();
 }
 
 void PairBuckCoulLongCuda::init_style()
 {
   if (!atom->q_flag)
     error->all(FLERR,"Pair style buck/coul/long requires atom attribute q");
   // request regular or rRESPA neighbor lists
 
   int irequest;
 
   if (strstr(update->integrate_style,"respa")) error->all(FLERR,"Integrate Style Respa is not supported by pair style buck/coul/long/cuda");
 
-          irequest = neighbor->request(this);
+  irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->full = 1;
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->cudable = 1;
 
 
   cut_coulsq = cut_coul * cut_coul;
   cuda->shared_data.pair.cut_coulsq_global=cut_coulsq;
 
   if (force->kspace == NULL)
     error->all(FLERR,"Pair style is incompatible with KSpace style");
   g_ewald = force->kspace->g_ewald;
   cuda->shared_data.pair.g_ewald=g_ewald;
   cuda->shared_data.pppm.qqrd2e=force->qqrd2e;
 
 
   if(ncoultablebits) error->warning(FLERR,"# CUDA: You asked for the usage of Coulomb Tables. This is not supported in CUDA Pair forces. Setting is ignored.\n");
 }
 
 void PairBuckCoulLongCuda::init_list(int id, NeighList *ptr)
 {
         MYDBG(printf("# CUDA PairBuckCoulLongCuda::init_list\n");)
         PairBuckCoulLong::init_list(id, ptr);
         #ifndef CUDA_USE_BINNING
         // right now we can only handle verlet (id 0), not respa
         if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
         // see Neighbor::init() for details on lammps lists' logic
         #endif
         MYDBG(printf("# CUDA PairBuckCoulLongCuda::init_list end\n");)
 }
 
 void PairBuckCoulLongCuda::ev_setup(int eflag, int vflag)
 {
         int maxeatomold=maxeatom;
         PairBuckCoulLong::ev_setup(eflag,vflag);
 
   if (eflag_atom && atom->nmax > maxeatomold)
         {delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_CFLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax  );}
 
   if (vflag_atom && atom->nmax > maxeatomold)
         {delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_CFLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.vatom , atom->nmax, 6  );}
 
 }
diff --git a/src/USER-CUDA/pair_buck_cuda.cpp b/src/USER-CUDA/pair_buck_cuda.cpp
index e42176cae..bcb9314c5 100644
--- a/src/USER-CUDA/pair_buck_cuda.cpp
+++ b/src/USER-CUDA/pair_buck_cuda.cpp
@@ -1,166 +1,166 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
 
    Original Version:
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    See the README file in the top-level LAMMPS directory.
 
    Contributing author: Paul Crozier (SNL)
    -----------------------------------------------------------------------
 
    USER-CUDA Package and associated modifications:
    https://sourceforge.net/projects/lammpscuda/
 
    Christian Trott, christian.trott@tu-ilmenau.de
    Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
    Theoretical Physics II, University of Technology Ilmenau, Germany
 
    See the README file in the USER-CUDA directory.
 
    This software is distributed under the GNU General Public License.
 ------------------------------------------------------------------------- */
 
 #include <cmath>
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
 #include "pair_buck_cuda.h"
 #include "pair_buck_cuda_cu.h"
 #include "cuda_data.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "kspace.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "cuda_neigh_list.h"
 #include "update.h"
 #include "integrate.h"
 #include "respa.h"
 #include "memory.h"
 #include "error.h"
 #include "user_cuda.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
 PairBuckCuda::PairBuckCuda(LAMMPS *lmp) : PairBuck(lmp)
 {
   cuda = lmp->cuda;
    if(cuda == NULL)
         error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS..");
 
         allocated2 = false;
         cuda->shared_data.pair.cudable_force = 1;
         cuda->shared_data.pair.use_block_per_atom = 0;
         cuda->setSystemParams();
 }
 
 /* ----------------------------------------------------------------------
    remember pointer to arrays in cuda shared data
 ------------------------------------------------------------------------- */
 
 void PairBuckCuda::allocate()
 {
         if(! allocated) PairBuck::allocate();
         if(! allocated2)
         {
                 allocated2 = true;
                 cuda->shared_data.pair.cut     = cut;
                 cuda->shared_data.pair.coeff1  = rhoinv;
                 cuda->shared_data.pair.coeff2  = buck1;
                 cuda->shared_data.pair.coeff3  = buck2;
                 cuda->shared_data.pair.coeff4  = a;
                 cuda->shared_data.pair.coeff5  = c;
                 cuda->shared_data.pair.offset  = offset;
                 cuda->shared_data.pair.special_lj  = force->special_lj;
         }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBuckCuda::compute(int eflag, int vflag)
 {
         MYDBG( printf("PairBuckCuda compute start\n"); fflush(stdout);)
         if (eflag || vflag) ev_setup(eflag,vflag);
         if(eflag) cuda->cu_eng_vdwl->upload();
         if(eflag) cuda->cu_eng_coul->upload();
         if(vflag) cuda->cu_virial->upload();
 
         Cuda_PairBuckCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom);
 
     if(not cuda->shared_data.pair.collect_forces_later)
     {
           if(eflag) cuda->cu_eng_vdwl->download();
           if(eflag) cuda->cu_eng_coul->download();
           if(vflag) cuda->cu_virial->download();
     }
         MYDBG( printf("PairBuckCuda compute end\n"); fflush(stdout);)
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBuckCuda::settings(int narg, char **arg)
 {
         PairBuck::settings(narg, arg);
         cuda->shared_data.pair.cut_global = (F_CFLOAT) cut_global;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBuckCuda::coeff(int narg, char **arg)
 {
         PairBuck::coeff(narg, arg);
         allocate();
 }
 
 void PairBuckCuda::init_style()
 {
   if (!atom->q_flag)
     error->all(FLERR,"Pair style buck/coul/long requires atom attribute q");
   // request regular or rRESPA neighbor lists
 
   int irequest;
 
   if (strstr(update->integrate_style,"respa")) error->all(FLERR,"Integrate Style Respa is not supported by pair style buck/coul/long/cuda");
 
-          irequest = neighbor->request(this);
+  irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->full = 1;
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->cudable = 1;
 
 
   cuda->shared_data.pppm.qqrd2e=force->qqrd2e;
 
 
   if(ncoultablebits) error->warning(FLERR,"# CUDA: You asked for the usage of Coulomb Tables. This is not supported in CUDA Pair forces. Setting is ignored.\n");
 }
 
 void PairBuckCuda::init_list(int id, NeighList *ptr)
 {
         MYDBG(printf("# CUDA PairBuckCuda::init_list\n");)
         PairBuck::init_list(id, ptr);
         #ifndef CUDA_USE_BINNING
         // right now we can only handle verlet (id 0), not respa
         if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
         // see Neighbor::init() for details on lammps lists' logic
         #endif
         MYDBG(printf("# CUDA PairBuckCuda::init_list end\n");)
 }
 
 void PairBuckCuda::ev_setup(int eflag, int vflag)
 {
         int maxeatomold=maxeatom;
         PairBuck::ev_setup(eflag,vflag);
 
   if (eflag_atom && atom->nmax > maxeatomold)
         {delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_CFLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax  );}
 
   if (eflag_atom && atom->nmax > maxeatomold)
         {delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_CFLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.vatom , atom->nmax, 6  );}
 
 }
diff --git a/src/USER-CUDA/pair_eam_cuda.cpp b/src/USER-CUDA/pair_eam_cuda.cpp
index 514650069..f3dcdd16f 100644
--- a/src/USER-CUDA/pair_eam_cuda.cpp
+++ b/src/USER-CUDA/pair_eam_cuda.cpp
@@ -1,265 +1,265 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
 
    Original Version:
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    See the README file in the top-level LAMMPS directory.
 
    -----------------------------------------------------------------------
 
    USER-CUDA Package and associated modifications:
    https://sourceforge.net/projects/lammpscuda/
 
    Christian Trott, christian.trott@tu-ilmenau.de
    Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
    Theoretical Physics II, University of Technology Ilmenau, Germany
 
    See the README file in the USER-CUDA directory.
 
    This software is distributed under the GNU General Public License.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Paul Crozier (SNL)
 ------------------------------------------------------------------------- */
 
 #include <cmath>
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
 #include "pair_eam_cuda.h"
 #include "pair_eam_cuda_cu.h"
 #include "pair_virial_compute_cu.h"
 #include "cuda_data.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "cuda_neigh_list.h"
 #include "update.h"
 #include "integrate.h"
 #include "respa.h"
 #include "memory.h"
 #include "error.h"
 #include "user_cuda.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
 PairEAMCuda::PairEAMCuda(LAMMPS* lmp) : PairEAM(lmp)
 {
   cuda = lmp->cuda;
 
   if(cuda == NULL)
     error->all(FLERR, "You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS..");
 
   allocated2 = false;
   cuda->shared_data.pair.cudable_force = 1;
   cuda->shared_data.pair.override_block_per_atom = 0;
 
   cuda->setSystemParams();
   cu_rho = NULL;
   cu_fp = NULL;
   cu_frho_spline = NULL;
   cu_z2r_spline = NULL;
   cu_rhor_spline = NULL;
 }
 
 /* ----------------------------------------------------------------------
    remember pointer to arrays in cuda shared data
 ------------------------------------------------------------------------- */
 
 void PairEAMCuda::allocate()
 {
   if(! allocated) PairEAM::allocate();
 
   cuda->shared_data.pair.cutsq     = cutsq;
   cuda->shared_data.pair.cut_global = (F_CFLOAT) cutforcesq;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairEAMCuda::compute(int eflag, int vflag)
 {
   cuda->shared_data.pair.cut_global = (F_CFLOAT) cutforcesq;
   cuda->shared_data.pair.use_block_per_atom = 0;
   cuda->shared_data.pair.collect_forces_later = 0;
 
   if(atom->nmax > nmax || cuda->finished_setup == false) {
     memory->destroy(rho);
     memory->destroy(fp);
     nmax = atom->nmax;
     memory->create(rho, nmax, "pair:rho");
     memory->create(fp, nmax, "pair:fp");
     delete cu_rho;
     delete cu_fp;
     cu_rho = new cCudaData<double, F_CFLOAT, x> (rho, atom->nmax);
     cu_fp  = new cCudaData<double, F_CFLOAT, x> (fp, atom->nmax);
     Cuda_PairEAMCuda_Init(&cuda->shared_data, rdr, rdrho, nfrho, nrhor, nr, nrho, nz2r,
                           cu_frho_spline->dev_data(), cu_rhor_spline->dev_data(), cu_z2r_spline->dev_data(),
                           cu_rho->dev_data(), cu_fp->dev_data(), type2frho, type2z2r, type2rhor);
   }
 
 
 
   if(eflag || vflag) ev_setup(eflag, vflag);
 
   if(eflag) cuda->cu_eng_vdwl->upload();
 
   if(vflag) cuda->cu_virial->upload();
 
   Cuda_PairEAM1Cuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom);
 
   comm->forward_comm_pair(this);
 
   Cuda_PairEAM2Cuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom);
 
   if(eflag) cuda->cu_eng_vdwl->download();
 
   if(vflag) cuda->cu_virial->download();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairEAMCuda::settings(int narg, char** arg)
 {
   PairEAM::settings(narg, arg);
   cuda->shared_data.pair.cut_global = (F_CFLOAT) cutforcesq;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairEAMCuda::coeff(int narg, char** arg)
 {
   PairEAM::coeff(narg, arg);
   allocate();
 }
 
 void PairEAMCuda::init_style()
 {
   MYDBG(printf("# CUDA PairEAMCuda::init_style start\n");)
   // request regular or rRESPA neighbor lists
   file2array();
   array2spline();
   int irequest;
 
 
-  irequest = neighbor->request(this);
+  irequest = neighbor->request(this,instance_me);
   neighbor->requests[irequest]->full = 1;
   neighbor->requests[irequest]->half = 0;
   neighbor->requests[irequest]->cudable = 1;
 
   delete cu_rhor_spline;
   delete cu_z2r_spline;
   delete cu_frho_spline;
 
   cu_rhor_spline = new cCudaData<double, F_CFLOAT, xyz>((double*)rhor_spline, nrhor, nr + 1, EAM_COEFF_LENGTH);
   cu_z2r_spline = new cCudaData<double, F_CFLOAT, xyz>((double*)z2r_spline, nz2r, nr + 1, EAM_COEFF_LENGTH);
   cu_frho_spline = new cCudaData<double, F_CFLOAT, xyz>((double*)frho_spline, nfrho, nrho + 1, EAM_COEFF_LENGTH);
 
   cu_rhor_spline->upload();
   cu_z2r_spline->upload();
   cu_frho_spline->upload();
 
   MYDBG(printf("# CUDA PairEAMCuda::init_style end\n");)
 }
 
 void PairEAMCuda::init_list(int id, NeighList* ptr)
 {
   MYDBG(printf("# CUDA PairEAMCuda::init_list\n");)
   PairEAM::init_list(id, ptr);
 
   // right now we can only handle verlet (id 0), not respa
   if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
 
   // see Neighbor::init() for details on lammps lists' logic
   MYDBG(printf("# CUDA PairEAMCuda::init_list end\n");)
 }
 
 void PairEAMCuda::array2spline()
 {
   rdr = 1.0 / dr;
   rdrho = 1.0 / drho;
 
   memory->destroy(frho_spline);
   memory->destroy(rhor_spline);
   memory->destroy(z2r_spline);
 
   memory->create(frho_spline, nfrho, nrho + 1, 8, "pair:frho");
   memory->create(rhor_spline, nrhor, nr + 1, 8, "pair:rhor");
   memory->create(z2r_spline, nz2r, nr + 1, 8, "pair:z2r");
 
   for(int i = 0; i < nfrho; i++) {
     interpolate(nrho, drho, frho[i], frho_spline[i]);
 
     for(int j = 0; j < nrho + 1; j++)
       frho_spline[i][j][7] = frho_spline[i][j][3];
   }
 
   for(int i = 0; i < nrhor; i++) {
     interpolate(nr, dr, rhor[i], rhor_spline[i]);
 
     for(int j = 0; j < nr + 1; j++)
       rhor_spline[i][j][7] = rhor_spline[i][j][3];
   }
 
   for(int i = 0; i < nz2r; i++) {
     interpolate(nr, dr, z2r[i], z2r_spline[i]);
 
     for(int j = 0; j < nr + 1; j++)
       z2r_spline[i][j][7] = z2r_spline[i][j][3];
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 int PairEAMCuda::pack_forward_comm(int n, int* iswap, double* buf, 
                                    int pbc_flag, int* pbc)
 {
   Cuda_PairEAMCuda_PackComm(&cuda->shared_data, n, *iswap, buf);
 
   if(sizeof(F_CFLOAT) < sizeof(double)) return n;
   else return n;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairEAMCuda::unpack_forward_comm(int n, int first, double* buf)
 {
   Cuda_PairEAMCuda_UnpackComm(&cuda->shared_data, n, first, buf, cu_fp->dev_data());
 }
 
 void PairEAMCuda::ev_setup(int eflag, int vflag)
 {
   int maxeatomold = maxeatom;
   PairEAM::ev_setup(eflag, vflag);
 
   if(eflag_atom && atom->nmax > maxeatomold) {
     delete cuda->cu_eatom;
     cuda->cu_eatom = new cCudaData<double, ENERGY_CFLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax);
   }
 
   if(vflag_atom && atom->nmax > maxeatomold) {
     delete cuda->cu_vatom;
     cuda->cu_vatom = new cCudaData<double, ENERGY_CFLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.vatom , atom->nmax, 6);
   }
 
 }
diff --git a/src/USER-CUDA/pair_gran_hooke_cuda.cpp b/src/USER-CUDA/pair_gran_hooke_cuda.cpp
index ad64dbeef..88cf2a2c2 100644
--- a/src/USER-CUDA/pair_gran_hooke_cuda.cpp
+++ b/src/USER-CUDA/pair_gran_hooke_cuda.cpp
@@ -1,250 +1,250 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
 
    Original Version:
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    See the README file in the top-level LAMMPS directory.
 
    -----------------------------------------------------------------------
 
    USER-CUDA Package and associated modifications:
    https://sourceforge.net/projects/lammpscuda/
 
    Christian Trott, christian.trott@tu-ilmenau.de
    Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
    Theoretical Physics II, University of Technology Ilmenau, Germany
 
    See the README file in the USER-CUDA directory.
 
    This software is distributed under the GNU General Public License.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Paul Crozier (SNL)
 ------------------------------------------------------------------------- */
 
 #include <cmath>
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
 #include "pair_gran_hooke_cuda.h"
 #include "pair_gran_hooke_cuda_cu.h"
 #include "cuda_data.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "modify.h"
 #include "fix_pour.h"
 #include "cuda_neigh_list.h"
 #include "update.h"
 #include "integrate.h"
 #include "respa.h"
 #include "memory.h"
 #include "error.h"
 #include "user_cuda.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
 PairGranHookeCuda::PairGranHookeCuda(LAMMPS *lmp) : PairGranHooke(lmp)
 {
   cuda = lmp->cuda;
    if(cuda == NULL)
         error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS..");
 
         allocated2 = false;
         cuda->shared_data.pair.cudable_force = 1;
         cuda->setSystemParams();
 }
 
 /* ----------------------------------------------------------------------
    remember pointer to arrays in cuda shared data
 ------------------------------------------------------------------------- */
 
 void PairGranHookeCuda::allocate()
 {
         if(! allocated) PairGranHooke::allocate();
         if(! allocated2)
         {
                 allocated2 = true;
                  int n = atom->ntypes;
                 cuda->shared_data.pair.cutsq     = cutsq;
                 memory->create(cuda->shared_data.pair.coeff1,n+1,n+1,
                                "pair:cuda_coeff1");
                 memory->create(cuda->shared_data.pair.coeff2,
                                n+1,n+1,"pair:cuda_coeff2");
                 cuda->shared_data.pair.coeff1[0][0]=kn;
                 cuda->shared_data.pair.coeff1[0][1]=kt;
                 cuda->shared_data.pair.coeff1[1][0]=gamman;
                 cuda->shared_data.pair.coeff1[1][1]=gammat;
                 cuda->shared_data.pair.coeff2[0][0]=xmu;
                 cuda->shared_data.pair.coeff2[0][1]=dampflag;
         }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairGranHookeCuda::compute(int eflag, int vflag)
 {
              cuda->shared_data.pair.use_block_per_atom = 0;
         //cuda->cu_debugdata->memset_device(0);
         if (eflag || vflag) ev_setup(eflag,vflag);
         if(eflag) cuda->cu_eng_vdwl->upload();
         if(vflag) cuda->cu_virial->upload();
 
         Cuda_PairGranHookeCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom);
 
     if(not cuda->shared_data.pair.collect_forces_later)
     {
           if(eflag) cuda->cu_eng_vdwl->download();
           if(vflag) cuda->cu_virial->download();
     }
         //cuda->cu_debugdata->download();
         //printf("%lf %lf %lf %lf %lf %lf\n",1.0e-6*cuda->debugdata[0],1.0e-6*cuda->debugdata[1],1.0e-6*cuda->debugdata[2],1.0e-6*cuda->debugdata[3],1.0e-6*cuda->debugdata[4],1.0e-6*cuda->debugdata[5]);
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairGranHookeCuda::settings(int narg, char **arg)
 {
         PairGranHooke::settings(narg, arg);
  }
 
 /* ---------------------------------------------------------------------- */
 
 void PairGranHookeCuda::coeff(int narg, char **arg)
 {
         PairGranHooke::coeff(narg, arg);
         allocate();
 }
 
 void PairGranHookeCuda::init_style()
 {
         int i;
         MYDBG(printf("# CUDA PairGranHookeCuda::init_style start\n"); )
   // request regular or rRESPA neighbor lists
 
   int irequest;
 
   if (update->whichflag == 0 && strstr(update->integrate_style,"respa")) {
 
   }
   else
   {
-          irequest = neighbor->request(this);
+    irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->full = 1;
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->gran = 1;
     neighbor->requests[irequest]->cudable = 1;
     //neighbor->style=0; //0=NSQ neighboring
   }
 
   if (!atom->radius_flag || !atom->omega_flag || !atom->torque_flag)
     error->all(FLERR,"Pair granular requires atom attributes radius, omega, torque");
   if (comm->ghost_velocity == 0)
     error->all(FLERR,"Pair granular requires ghost atoms store velocity");
 
   // need a half neigh list and optionally a granular history neigh list
 
   dt = update->dt;
 
   // check for Fix freeze and set freeze_group_bit
 
   for (i = 0; i < modify->nfix; i++)
     if (strcmp(modify->fix[i]->style,"freeze") == 0) break;
   if (i < modify->nfix) freeze_group_bit = modify->fix[i]->groupbit;
   else freeze_group_bit = 0;
 
   cuda->shared_data.pair.freeze_group_bit=freeze_group_bit;
 
   // check for FixPour and FixDeposit so can extract particle radii
 
   int ipour;
   for (ipour = 0; ipour < modify->nfix; ipour++)
     if (strcmp(modify->fix[ipour]->style,"pour") == 0) break;
   if (ipour == modify->nfix) ipour = -1;
 
   int idep;
   for (idep = 0; idep < modify->nfix; idep++)
     if (strcmp(modify->fix[idep]->style,"deposit") == 0) break;
   if (idep == modify->nfix) idep = -1;
 
   // set maxrad_dynamic and maxrad_frozen for each type
   // include future FixPour and FixDeposit particles as dynamic
 
   int itype;
   for (i = 1; i <= atom->ntypes; i++) {
     onerad_dynamic[i] = onerad_frozen[i] = 0.0;
     if (ipour >= 0) {
       itype = i;
       onerad_dynamic[i] = 
         *((double *) modify->fix[ipour]->extract("radius",itype));
     }
     if (idep >= 0) {
       itype = i;
       onerad_dynamic[i] = 
         *((double *) modify->fix[idep]->extract("radius",itype));
     }
   }
 
   double *radius = atom->radius;
   int *mask = atom->mask;
   int *type = atom->type;
   int nlocal = atom->nlocal;
 
   for (i = 0; i < nlocal; i++)
     if (mask[i] & freeze_group_bit)
       onerad_frozen[type[i]] = MAX(onerad_frozen[type[i]],radius[i]);
     else
       onerad_dynamic[type[i]] = MAX(onerad_dynamic[type[i]],radius[i]);
 
   MPI_Allreduce(&onerad_dynamic[1],&maxrad_dynamic[1],atom->ntypes,
                 MPI_DOUBLE,MPI_MAX,world);
   MPI_Allreduce(&onerad_frozen[1],&maxrad_frozen[1],atom->ntypes,
                 MPI_DOUBLE,MPI_MAX,world);
 }
 
 void PairGranHookeCuda::init_list(int id, NeighList *ptr)
 {
         MYDBG(printf("# CUDA PairGranHookeCuda::init_list\n");)
         PairGranHooke::init_list(id, ptr);
         #ifndef CUDA_USE_BINNING
         // right now we can only handle verlet (id 0), not respa
         if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
         // see Neighbor::init() for details on lammps lists' logic
         #endif
         MYDBG(printf("# CUDA PairGranHookeCuda::init_list end\n");)
 }
 
 void PairGranHookeCuda::ev_setup(int eflag, int vflag)
 {
         int maxeatomold=maxeatom;
         PairGranHooke::ev_setup(eflag,vflag);
 
   if (eflag_atom && atom->nmax > maxeatomold)
         {delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_CFLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax  );}
 
   if (eflag_atom && atom->nmax > maxeatomold)
         {delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_CFLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.eatom , atom->nmax, 6  );}
 
 }
diff --git a/src/USER-CUDA/pair_lj96_cut_cuda.cpp b/src/USER-CUDA/pair_lj96_cut_cuda.cpp
index 3f449de84..7edb722d3 100644
--- a/src/USER-CUDA/pair_lj96_cut_cuda.cpp
+++ b/src/USER-CUDA/pair_lj96_cut_cuda.cpp
@@ -1,179 +1,179 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
 
    Original Version:
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    See the README file in the top-level LAMMPS directory.
 
    -----------------------------------------------------------------------
 
    USER-CUDA Package and associated modifications:
    https://sourceforge.net/projects/lammpscuda/
 
    Christian Trott, christian.trott@tu-ilmenau.de
    Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
    Theoretical Physics II, University of Technology Ilmenau, Germany
 
    See the README file in the USER-CUDA directory.
 
    This software is distributed under the GNU General Public License.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Paul Crozier (SNL)
 ------------------------------------------------------------------------- */
 
 #include <cmath>
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
 #include "pair_lj96_cut_cuda.h"
 #include "pair_lj96_cut_cuda_cu.h"
 #include "cuda_data.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "cuda_neigh_list.h"
 #include "update.h"
 #include "integrate.h"
 #include "respa.h"
 #include "memory.h"
 #include "error.h"
 #include "user_cuda.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
 PairLJ96CutCuda::PairLJ96CutCuda(LAMMPS *lmp) : PairLJ96Cut(lmp)
 {
   cuda = lmp->cuda;
    if(cuda == NULL)
         error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS..");
 
         allocated2 = false;
         cuda->shared_data.pair.cudable_force = 1;
         cuda->setSystemParams();
 }
 
 /* ----------------------------------------------------------------------
    remember pointer to arrays in cuda shared data
 ------------------------------------------------------------------------- */
 
 void PairLJ96CutCuda::allocate()
 {
         if(! allocated) PairLJ96Cut::allocate();
         if(! allocated2)
         {
                 allocated2 = true;
                 cuda->shared_data.pair.cut     = cut;
                 cuda->shared_data.pair.coeff1  = lj1;
                 cuda->shared_data.pair.coeff2  = lj2;
                 cuda->shared_data.pair.coeff3  = lj3;
                 cuda->shared_data.pair.coeff4  = lj4;
                 cuda->shared_data.pair.offset  = offset;
                 cuda->shared_data.pair.special_lj  = force->special_lj;
                 cuda->shared_data.pair.special_coul  = force->special_coul;
         }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJ96CutCuda::compute(int eflag, int vflag)
 {
         if (eflag || vflag) ev_setup(eflag,vflag);
         if(eflag) cuda->cu_eng_vdwl->upload();
         if(vflag) cuda->cu_virial->upload();
 
         Cuda_PairLJ96CutCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom);
 
     if(not cuda->shared_data.pair.collect_forces_later)
     {
           if(eflag) cuda->cu_eng_vdwl->download();
           if(vflag) cuda->cu_virial->download();
     }
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJ96CutCuda::settings(int narg, char **arg)
 {
         PairLJ96Cut::settings(narg, arg);
         cuda->shared_data.pair.cut_global = (F_CFLOAT) cut_global;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJ96CutCuda::coeff(int narg, char **arg)
 {
         PairLJ96Cut::coeff(narg, arg);
         allocate();
 }
 
 void PairLJ96CutCuda::init_style()
 {
         MYDBG(printf("# CUDA PairLJ96CutCuda::init_style start\n"); )
   // request regular or rRESPA neighbor lists
 
   int irequest;
 
   if (update->whichflag == 0 && strstr(update->integrate_style,"respa")) {
 
   }
   else
   {
-          irequest = neighbor->request(this);
+    irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->full = 1;
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->cudable = 1;
     //neighbor->style=0; //0=NSQ neighboring
   }
 
 
   cut_respa = NULL;
   MYDBG(printf("# CUDA PairLJ96CutCuda::init_style end\n"); )
 }
 
 void PairLJ96CutCuda::init_list(int id, NeighList *ptr)
 {
         MYDBG(printf("# CUDA PairLJ96CutCuda::init_list\n");)
         PairLJ96Cut::init_list(id, ptr);
         #ifndef CUDA_USE_BINNING
         // right now we can only handle verlet (id 0), not respa
         if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
         // see Neighbor::init() for details on lammps lists' logic
         #endif
         MYDBG(printf("# CUDA PairLJ96CutCuda::init_list end\n");)
 }
 
 void PairLJ96CutCuda::ev_setup(int eflag, int vflag)
 {
         int maxeatomold=maxeatom;
         PairLJ96Cut::ev_setup(eflag,vflag);
 
   if (eflag_atom && atom->nmax > maxeatomold)
         {delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_CFLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax  );}
 
   if (vflag_atom && atom->nmax > maxeatomold)
         {delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_CFLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.vatom , atom->nmax, 6  );}
 
 }
diff --git a/src/USER-CUDA/pair_lj_charmm_coul_charmm_cuda.cpp b/src/USER-CUDA/pair_lj_charmm_coul_charmm_cuda.cpp
index f9c0a928e..3a0ad0e28 100644
--- a/src/USER-CUDA/pair_lj_charmm_coul_charmm_cuda.cpp
+++ b/src/USER-CUDA/pair_lj_charmm_coul_charmm_cuda.cpp
@@ -1,188 +1,188 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
 
    Original Version:
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    See the README file in the top-level LAMMPS directory.
 
    Contributing author: Paul Crozier (SNL)
    -----------------------------------------------------------------------
 
    USER-CUDA Package and associated modifications:
    https://sourceforge.net/projects/lammpscuda/
 
    Christian Trott, christian.trott@tu-ilmenau.de
    Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
    Theoretical Physics II, University of Technology Ilmenau, Germany
 
    See the README file in the USER-CUDA directory.
 
    This software is distributed under the GNU General Public License.
 ------------------------------------------------------------------------- */
 
 #include <cmath>
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
 #include "pair_lj_charmm_coul_charmm_cuda.h"
 #include "pair_lj_charmm_coul_charmm_cuda_cu.h"
 #include "cuda_data.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "kspace.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "cuda_neigh_list.h"
 #include "update.h"
 #include "integrate.h"
 #include "respa.h"
 #include "memory.h"
 #include "error.h"
 #include "user_cuda.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
 PairLJCharmmCoulCharmmCuda::PairLJCharmmCoulCharmmCuda(LAMMPS *lmp) : PairLJCharmmCoulCharmm(lmp)
 {
   cuda = lmp->cuda;
    if(cuda == NULL)
         error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS..");
 
         allocated2 = false;
         cuda->shared_data.pair.cudable_force = 1;
         cuda->shared_data.pair.use_block_per_atom = 0;
         cuda->setSystemParams();
 }
 
 /* ----------------------------------------------------------------------
    remember pointer to arrays in cuda shared data
 ------------------------------------------------------------------------- */
 
 void PairLJCharmmCoulCharmmCuda::allocate()
 {
         if(! allocated) PairLJCharmmCoulCharmm::allocate();
         if(! allocated2)
         {
                 allocated2 = true;
                 cuda->shared_data.pair.coeff1  = lj1;
                 cuda->shared_data.pair.coeff2  = lj2;
                 cuda->shared_data.pair.coeff3  = lj3;
                 cuda->shared_data.pair.coeff4  = lj4;
                 cuda->shared_data.pair.special_lj  = force->special_lj;
                 cuda->shared_data.pair.special_coul  = force->special_coul;
             cu_lj1_gm = new cCudaData<double, F_CFLOAT, x> ((double*)lj1, &cuda->shared_data.pair.coeff1_gm, (atom->ntypes+1)*(atom->ntypes+1));
             cu_lj2_gm = new cCudaData<double, F_CFLOAT, x> ((double*)lj2, &cuda->shared_data.pair.coeff2_gm, (atom->ntypes+1)*(atom->ntypes+1));
             cu_lj3_gm = new cCudaData<double, F_CFLOAT, x> ((double*)lj3, &cuda->shared_data.pair.coeff3_gm, (atom->ntypes+1)*(atom->ntypes+1));
             cu_lj4_gm = new cCudaData<double, F_CFLOAT, x> ((double*)lj4, &cuda->shared_data.pair.coeff4_gm, (atom->ntypes+1)*(atom->ntypes+1));
         }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCharmmCoulCharmmCuda::compute(int eflag, int vflag)
 {
           if (eflag || vflag) ev_setup(eflag,vflag);
         if(not cuda->shared_data.pair.collect_forces_later)
         {
           if(eflag) cuda->cu_eng_vdwl->upload();
           if(eflag) cuda->cu_eng_coul->upload();
           if(vflag) cuda->cu_virial->upload();
         }
 
         Cuda_PairLJCharmmCoulCharmmCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom,denom_lj,cut_coul_innersq,denom_coul);
 
         if(not cuda->shared_data.pair.collect_forces_later)
         {
           if(eflag) cuda->cu_eng_vdwl->download();
           if(eflag) cuda->cu_eng_coul->download();
           if(vflag) cuda->cu_virial->download();
         }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCharmmCoulCharmmCuda::settings(int narg, char **arg)
 {
         PairLJCharmmCoulCharmm::settings(narg, arg);
         cuda->shared_data.pair.cut_global = (X_CFLOAT) cut_lj;
         cuda->shared_data.pair.cut_coulsq_global = (X_CFLOAT) cut_coulsq;
         cuda->shared_data.pair.cut_inner_global = (F_CFLOAT) cut_lj_inner;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCharmmCoulCharmmCuda::coeff(int narg, char **arg)
 {
         PairLJCharmmCoulCharmm::coeff(narg, arg);
         allocate();
 }
 
 void PairLJCharmmCoulCharmmCuda::init_style()
 {
   if (!atom->q_flag)
     error->all(FLERR,"Pair style lj/charmm/coul/long requires atom attribute q");
   // request regular or rRESPA neighbor lists
 
         if(atom->molecular)
         {
           cuda->shared_data.pair.collect_forces_later = 1;
         }
 
   int irequest;
 
-           irequest = neighbor->request(this);
+  irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->full = 1;
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->cudable = 1;
 
    if (cut_lj_inner >= cut_lj || cut_coul_inner >= cut_coul)
     error->all(FLERR,"Pair inner cutoff >= Pair outer cutoff");
 
   cut_lj_innersq = cut_lj_inner * cut_lj_inner;
   cut_ljsq = cut_lj * cut_lj;
   cut_coul_innersq = cut_coul_inner * cut_coul_inner;
   cut_coulsq = cut_coul * cut_coul;
   cut_bothsq = MAX(cut_ljsq,cut_coulsq);
 
   denom_lj = (cut_ljsq-cut_lj_innersq) * (cut_ljsq-cut_lj_innersq) *
     (cut_ljsq-cut_lj_innersq);
   denom_coul = (cut_coulsq-cut_coul_innersq) * (cut_coulsq-cut_coul_innersq) *
     (cut_coulsq-cut_coul_innersq);
 
   cut_coulsq = cut_coul * cut_coul;
 
   cuda->shared_data.pair.cut_coulsq_global=cut_coulsq;
 
   cuda->shared_data.pppm.qqrd2e=force->qqrd2e;
 }
 
 void PairLJCharmmCoulCharmmCuda::init_list(int id, NeighList *ptr)
 {
         MYDBG(printf("# CUDA PairLJCharmmCoulCharmmCuda::init_list\n");)
         PairLJCharmmCoulCharmm::init_list(id, ptr);
         #ifndef CUDA_USE_BINNING
         // right now we can only handle verlet (id 0), not respa
         if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
         // see Neighbor::init() for details on lammps lists' logic
         #endif
         MYDBG(printf("# CUDA PairLJCharmmCoulCharmmCuda::init_list end\n");)
 }
 
 void PairLJCharmmCoulCharmmCuda::ev_setup(int eflag, int vflag)
 {
         int maxeatomold=maxeatom;
         PairLJCharmmCoulCharmm::ev_setup(eflag,vflag);
 
   if (eflag_atom && atom->nmax > maxeatomold)
         {delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_CFLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax  );}
 
   if (vflag_atom && atom->nmax > maxeatomold)
         {delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_CFLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.vatom , atom->nmax, 6  );}
 
 }
diff --git a/src/USER-CUDA/pair_lj_charmm_coul_charmm_implicit_cuda.cpp b/src/USER-CUDA/pair_lj_charmm_coul_charmm_implicit_cuda.cpp
index 2b226af6b..c2f2ca871 100644
--- a/src/USER-CUDA/pair_lj_charmm_coul_charmm_implicit_cuda.cpp
+++ b/src/USER-CUDA/pair_lj_charmm_coul_charmm_implicit_cuda.cpp
@@ -1,183 +1,183 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
 
    Original Version:
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    See the README file in the top-level LAMMPS directory.
 
    Contributing author: Paul Crozier (SNL)
    -----------------------------------------------------------------------
 
    USER-CUDA Package and associated modifications:
    https://sourceforge.net/projects/lammpscuda/
 
    Christian Trott, christian.trott@tu-ilmenau.de
    Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
    Theoretical Physics II, University of Technology Ilmenau, Germany
 
    See the README file in the USER-CUDA directory.
 
    This software is distributed under the GNU General Public License.
 ------------------------------------------------------------------------- */
 
 #include <cmath>
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
 #include "pair_lj_charmm_coul_charmm_implicit_cuda.h"
 #include "pair_lj_charmm_coul_charmm_implicit_cuda_cu.h"
 #include "cuda_data.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "kspace.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "cuda_neigh_list.h"
 #include "update.h"
 #include "integrate.h"
 #include "respa.h"
 #include "memory.h"
 #include "error.h"
 #include "user_cuda.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
 PairLJCharmmCoulCharmmImplicitCuda::PairLJCharmmCoulCharmmImplicitCuda(LAMMPS *lmp) : PairLJCharmmCoulCharmmImplicit(lmp)
 {
   cuda = lmp->cuda;
    if(cuda == NULL)
         error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS..");
 
         allocated2 = false;
         cuda->shared_data.pair.cudable_force = 1;
         cuda->shared_data.pair.collect_forces_later = 1;
         cuda->setSystemParams();
 }
 
 /* ----------------------------------------------------------------------
    remember pointer to arrays in cuda shared data
 ------------------------------------------------------------------------- */
 
 void PairLJCharmmCoulCharmmImplicitCuda::allocate()
 {
         if(! allocated) PairLJCharmmCoulCharmmImplicit::allocate();
         if(! allocated2)
         {
                 allocated2 = true;
                 cuda->shared_data.pair.coeff1  = lj1;
                 cuda->shared_data.pair.coeff2  = lj2;
                 cuda->shared_data.pair.coeff3  = lj3;
                 cuda->shared_data.pair.coeff4  = lj4;
                 cuda->shared_data.pair.special_lj  = force->special_lj;
                 cuda->shared_data.pair.special_coul  = force->special_coul;
             cu_lj1_gm = new cCudaData<double, F_CFLOAT, x> ((double*)lj1, &cuda->shared_data.pair.coeff1_gm, (atom->ntypes+1)*(atom->ntypes+1));
             cu_lj2_gm = new cCudaData<double, F_CFLOAT, x> ((double*)lj2, &cuda->shared_data.pair.coeff2_gm, (atom->ntypes+1)*(atom->ntypes+1));
             cu_lj3_gm = new cCudaData<double, F_CFLOAT, x> ((double*)lj3, &cuda->shared_data.pair.coeff3_gm, (atom->ntypes+1)*(atom->ntypes+1));
             cu_lj4_gm = new cCudaData<double, F_CFLOAT, x> ((double*)lj4, &cuda->shared_data.pair.coeff4_gm, (atom->ntypes+1)*(atom->ntypes+1));
         }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCharmmCoulCharmmImplicitCuda::compute(int eflag, int vflag)
 {
         if (eflag || vflag) ev_setup(eflag,vflag);
         if(not cuda->shared_data.pair.collect_forces_later)
         {
           if(eflag) cuda->cu_eng_vdwl->upload();
           if(eflag) cuda->cu_eng_coul->upload();
           if(vflag) cuda->cu_virial->upload();
         }
 
         Cuda_PairLJCharmmCoulCharmmImplicitCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom,denom_lj,cut_coul_innersq,denom_coul);
 
         if(not cuda->shared_data.pair.collect_forces_later)
         {
           if(eflag) cuda->cu_eng_vdwl->download();
           if(eflag) cuda->cu_eng_coul->download();
           if(vflag) cuda->cu_virial->download();
         }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCharmmCoulCharmmImplicitCuda::settings(int narg, char **arg)
 {
         PairLJCharmmCoulCharmmImplicit::settings(narg, arg);
         cuda->shared_data.pair.cut_global = (X_CFLOAT) cut_lj;
         cuda->shared_data.pair.cut_coulsq_global = (X_CFLOAT) cut_coulsq;
         cuda->shared_data.pair.cut_inner_global = (F_CFLOAT) cut_lj_inner;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCharmmCoulCharmmImplicitCuda::coeff(int narg, char **arg)
 {
         PairLJCharmmCoulCharmmImplicit::coeff(narg, arg);
         allocate();
 }
 
 void PairLJCharmmCoulCharmmImplicitCuda::init_style()
 {
   if (!atom->q_flag)
     error->all(FLERR,"Pair style lj/charmm/coul/long requires atom attribute q");
   // request regular or rRESPA neighbor lists
 
   int irequest;
 
-           irequest = neighbor->request(this);
+  irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->full = 1;
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->cudable = 1;
 
    if (cut_lj_inner >= cut_lj || cut_coul_inner >= cut_coul)
     error->all(FLERR,"Pair inner cutoff >= Pair outer cutoff");
 
   cut_lj_innersq = cut_lj_inner * cut_lj_inner;
   cut_ljsq = cut_lj * cut_lj;
   cut_coul_innersq = cut_coul_inner * cut_coul_inner;
   cut_coulsq = cut_coul * cut_coul;
   cut_bothsq = MAX(cut_ljsq,cut_coulsq);
 
   denom_lj = (cut_ljsq-cut_lj_innersq) * (cut_ljsq-cut_lj_innersq) *
     (cut_ljsq-cut_lj_innersq);
   denom_coul = (cut_coulsq-cut_coul_innersq) * (cut_coulsq-cut_coul_innersq) *
     (cut_coulsq-cut_coul_innersq);
 
   cut_coulsq = cut_coul * cut_coul;
 
   cuda->shared_data.pair.cut_coulsq_global=cut_coulsq;
 
   cuda->shared_data.pppm.qqrd2e=force->qqrd2e;
 }
 
 void PairLJCharmmCoulCharmmImplicitCuda::init_list(int id, NeighList *ptr)
 {
         MYDBG(printf("# CUDA PairLJCharmmCoulCharmmImplicitCuda::init_list\n");)
         PairLJCharmmCoulCharmmImplicit::init_list(id, ptr);
         #ifndef CUDA_USE_BINNING
         // right now we can only handle verlet (id 0), not respa
         if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
         // see Neighbor::init() for details on lammps lists' logic
         #endif
         MYDBG(printf("# CUDA PairLJCharmmCoulCharmmImplicitCuda::init_list end\n");)
 }
 
 void PairLJCharmmCoulCharmmImplicitCuda::ev_setup(int eflag, int vflag)
 {
         int maxeatomold=maxeatom;
         PairLJCharmmCoulCharmmImplicit::ev_setup(eflag,vflag);
 
   if (eflag_atom && atom->nmax > maxeatomold)
         {delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_CFLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax  );}
 
   if (vflag_atom && atom->nmax > maxeatomold)
         {delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_CFLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.vatom , atom->nmax, 6  );}
 
 }
diff --git a/src/USER-CUDA/pair_lj_charmm_coul_long_cuda.cpp b/src/USER-CUDA/pair_lj_charmm_coul_long_cuda.cpp
index 08d83d32e..b228bd6f4 100644
--- a/src/USER-CUDA/pair_lj_charmm_coul_long_cuda.cpp
+++ b/src/USER-CUDA/pair_lj_charmm_coul_long_cuda.cpp
@@ -1,196 +1,196 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
 
    Original Version:
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    See the README file in the top-level LAMMPS directory.
 
    Contributing author: Paul Crozier (SNL)
    -----------------------------------------------------------------------
 
    USER-CUDA Package and associated modifications:
    https://sourceforge.net/projects/lammpscuda/
 
    Christian Trott, christian.trott@tu-ilmenau.de
    Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
    Theoretical Physics II, University of Technology Ilmenau, Germany
 
    See the README file in the USER-CUDA directory.
 
    This software is distributed under the GNU General Public License.
 ------------------------------------------------------------------------- */
 
 #include <cmath>
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
 #include "pair_lj_charmm_coul_long_cuda.h"
 #include "pair_lj_charmm_coul_long_cuda_cu.h"
 #include "cuda_data.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "kspace.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "cuda_neigh_list.h"
 #include "update.h"
 #include "integrate.h"
 #include "respa.h"
 #include "memory.h"
 #include "error.h"
 #include "user_cuda.h"
 
 using namespace LAMMPS_NS;
 
 #define EWALD_F   1.12837917
 #define EWALD_P   0.3275911
 #define A1        0.254829592
 #define A2       -0.284496736
 #define A3        1.421413741
 #define A4       -1.453152027
 #define A5        1.061405429
 /* ---------------------------------------------------------------------- */
 
 PairLJCharmmCoulLongCuda::PairLJCharmmCoulLongCuda(LAMMPS *lmp) : PairLJCharmmCoulLong(lmp)
 {
   cuda = lmp->cuda;
    if(cuda == NULL)
         error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS..");
 
         allocated2 = false;
         cuda->shared_data.pair.cudable_force = 1;
         cuda->shared_data.pair.collect_forces_later = 1;
         cuda->setSystemParams();
 }
 
 /* ----------------------------------------------------------------------
    remember pointer to arrays in cuda shared data
 ------------------------------------------------------------------------- */
 
 void PairLJCharmmCoulLongCuda::allocate()
 {
         if(! allocated) PairLJCharmmCoulLong::allocate();
         if(! allocated2)
         {
                 allocated2 = true;
                 //cuda->shared_data.pair.cut     = cut_lj;
                 cuda->shared_data.pair.coeff1  = lj1;
                 cuda->shared_data.pair.coeff2  = lj2;
                 cuda->shared_data.pair.coeff3  = lj3;
                 cuda->shared_data.pair.coeff4  = lj4;
                 cuda->shared_data.pair.offset  = offset;
                 cuda->shared_data.pair.special_lj  = force->special_lj;
                 cuda->shared_data.pair.special_coul  = force->special_coul;
             cu_lj1_gm = new cCudaData<double, F_CFLOAT, x> ((double*)lj1, &cuda->shared_data.pair.coeff1_gm, (atom->ntypes+1)*(atom->ntypes+1));
             cu_lj2_gm = new cCudaData<double, F_CFLOAT, x> ((double*)lj2, &cuda->shared_data.pair.coeff2_gm, (atom->ntypes+1)*(atom->ntypes+1));
             cu_lj3_gm = new cCudaData<double, F_CFLOAT, x> ((double*)lj3, &cuda->shared_data.pair.coeff3_gm, (atom->ntypes+1)*(atom->ntypes+1));
             cu_lj4_gm = new cCudaData<double, F_CFLOAT, x> ((double*)lj4, &cuda->shared_data.pair.coeff4_gm, (atom->ntypes+1)*(atom->ntypes+1));
         }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCharmmCoulLongCuda::compute(int eflag, int vflag)
 {
         if (eflag || vflag) ev_setup(eflag,vflag);
         if(not cuda->shared_data.pair.collect_forces_later)
         {
           if(eflag) cuda->cu_eng_vdwl->upload();
           if(eflag) cuda->cu_eng_coul->upload();
           if(vflag) cuda->cu_virial->upload();
         }
 
         Cuda_PairLJCharmmCoulLongCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom,denom_lj);
 
         if(not cuda->shared_data.pair.collect_forces_later)
         {
           if(eflag) cuda->cu_eng_vdwl->download();
           if(eflag) cuda->cu_eng_coul->download();
           if(vflag) cuda->cu_virial->download();
         }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCharmmCoulLongCuda::settings(int narg, char **arg)
 {
         PairLJCharmmCoulLong::settings(narg, arg);
         cuda->shared_data.pair.cut_global = (X_CFLOAT) cut_lj;
         cuda->shared_data.pair.cut_coulsq_global = (X_CFLOAT) cut_coulsq;
         cuda->shared_data.pair.cut_inner_global = (F_CFLOAT) cut_lj_inner;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCharmmCoulLongCuda::coeff(int narg, char **arg)
 {
         PairLJCharmmCoulLong::coeff(narg, arg);
         allocate();
 }
 
 void PairLJCharmmCoulLongCuda::init_style()
 {
   if (!atom->q_flag)
     error->all(FLERR,"Pair style lj/charmm/coul/long requires atom attribute q");
   // request regular or rRESPA neighbor lists
 
   int irequest;
 
 
-          irequest = neighbor->request(this);
+  irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->full = 1;
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->cudable = 1;
 
   if (cut_lj_inner >= cut_lj)
     error->all(FLERR,"Pair inner cutoff >= Pair outer cutoff");
 
   cut_lj_innersq = cut_lj_inner * cut_lj_inner;
   cut_ljsq = cut_lj * cut_lj;
   cut_coulsq = cut_coul * cut_coul;
   cut_bothsq = MAX(cut_ljsq,cut_coulsq);
 
   denom_lj = (cut_ljsq-cut_lj_innersq) * (cut_ljsq-cut_lj_innersq) *
     (cut_ljsq-cut_lj_innersq);
 
   cut_coulsq = cut_coul * cut_coul;
   cuda->shared_data.pair.cut_coulsq_global=cut_coulsq;
 
   if (force->kspace == NULL)
     error->all(FLERR,"Pair style is incompatible with KSpace style");
   g_ewald = force->kspace->g_ewald;
   cuda->shared_data.pair.g_ewald=g_ewald;
   cuda->shared_data.pppm.qqrd2e=force->qqrd2e;
 
 
   if(ncoultablebits) error->warning(FLERR,"# CUDA: You asked for the usage of Coulomb Tables. This is not supported in CUDA Pair forces. Setting is ignored.\n");
 }
 
 void PairLJCharmmCoulLongCuda::init_list(int id, NeighList *ptr)
 {
         MYDBG(printf("# CUDA PairLJCharmmCoulLongCuda::init_list\n");)
         PairLJCharmmCoulLong::init_list(id, ptr);
         #ifndef CUDA_USE_BINNING
         // right now we can only handle verlet (id 0), not respa
         if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
         // see Neighbor::init() for details on lammps lists' logic
         #endif
         MYDBG(printf("# CUDA PairLJCharmmCoulLongCuda::init_list end\n");)
 }
 
 void PairLJCharmmCoulLongCuda::ev_setup(int eflag, int vflag)
 {
         int maxeatomold=maxeatom;
         PairLJCharmmCoulLong::ev_setup(eflag,vflag);
 
   if (eflag_atom && atom->nmax > maxeatomold)
         {delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_CFLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax  );}
 
   if (vflag_atom && atom->nmax > maxeatomold)
         {delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_CFLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.vatom , atom->nmax, 6  );}
 
 }
diff --git a/src/USER-CUDA/pair_lj_class2_coul_cut_cuda.cpp b/src/USER-CUDA/pair_lj_class2_coul_cut_cuda.cpp
index 20ec9bd1a..01b6dc071 100644
--- a/src/USER-CUDA/pair_lj_class2_coul_cut_cuda.cpp
+++ b/src/USER-CUDA/pair_lj_class2_coul_cut_cuda.cpp
@@ -1,162 +1,162 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
 
    Original Version:
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    See the README file in the top-level LAMMPS directory.
 
    Contributing author: Paul Crozier (SNL)
    -----------------------------------------------------------------------
 
    USER-CUDA Package and associated modifications:
    https://sourceforge.net/projects/lammpscuda/
 
    Christian Trott, christian.trott@tu-ilmenau.de
    Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
    Theoretical Physics II, University of Technology Ilmenau, Germany
 
    See the README file in the USER-CUDA directory.
 
    This software is distributed under the GNU General Public License.
 ------------------------------------------------------------------------- */
 
 #include <cmath>
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
 #include "pair_lj_class2_coul_cut_cuda.h"
 #include "pair_lj_class2_coul_cut_cuda_cu.h"
 #include "cuda_data.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "kspace.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "cuda_neigh_list.h"
 #include "update.h"
 #include "integrate.h"
 #include "respa.h"
 #include "memory.h"
 #include "error.h"
 #include "user_cuda.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
 PairLJClass2CoulCutCuda::PairLJClass2CoulCutCuda(LAMMPS *lmp) : PairLJClass2CoulCut(lmp)
 {
   cuda = lmp->cuda;
    if(cuda == NULL)
         error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS..");
 
         allocated2 = false;
         cuda->shared_data.pair.cudable_force = 1;
         cuda->setSystemParams();
 }
 
 /* ----------------------------------------------------------------------
    remember pointer to arrays in cuda shared data
 ------------------------------------------------------------------------- */
 
 void PairLJClass2CoulCutCuda::allocate()
 {
         if(! allocated) PairLJClass2CoulCut::allocate();
         if(! allocated2)
         {
                 allocated2 = true;
                 cuda->shared_data.pair.cut     = cut_lj;
                 cuda->shared_data.pair.cut_coul= cut_coul;
                 cuda->shared_data.pair.coeff1  = lj1;
                 cuda->shared_data.pair.coeff2  = lj2;
                 cuda->shared_data.pair.coeff3  = lj3;
                 cuda->shared_data.pair.coeff4  = lj4;
                 cuda->shared_data.pair.offset  = offset;
                 cuda->shared_data.pair.special_lj  = force->special_lj;
                 cuda->shared_data.pair.special_coul  = force->special_coul;
         }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJClass2CoulCutCuda::compute(int eflag, int vflag)
 {
         if (eflag || vflag) ev_setup(eflag,vflag);
         if(eflag) cuda->cu_eng_vdwl->upload();
         if(eflag) cuda->cu_eng_coul->upload();
         if(vflag) cuda->cu_virial->upload();
 
         Cuda_PairLJClass2CoulCutCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom);
 
     if(not cuda->shared_data.pair.collect_forces_later)
     {
           if(eflag) cuda->cu_eng_vdwl->download();
           if(eflag) cuda->cu_eng_coul->download();
           if(vflag) cuda->cu_virial->download();
     }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJClass2CoulCutCuda::settings(int narg, char **arg)
 {
         PairLJClass2CoulCut::settings(narg, arg);
         cuda->shared_data.pair.cut_global = (F_CFLOAT) cut_lj_global;
         cuda->shared_data.pair.cut_coul_global = (F_CFLOAT) cut_coul_global;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJClass2CoulCutCuda::coeff(int narg, char **arg)
 {
         PairLJClass2CoulCut::coeff(narg, arg);
         allocate();
 }
 
 void PairLJClass2CoulCutCuda::init_style()
 {
   if (!atom->q_flag)
     error->all(FLERR,"Pair style lj/cut/coul/cut/cuda requires atom attribute q");
   // request regular or rRESPA neighbor lists
 
   int irequest;
 
 
-  irequest = neighbor->request(this);
+  irequest = neighbor->request(this,instance_me);
   neighbor->requests[irequest]->full = 1;
   neighbor->requests[irequest]->half = 0;
   neighbor->requests[irequest]->cudable = 1;
 
 
   cuda->shared_data.pppm.qqrd2e=force->qqrd2e;
 
 }
 
 void PairLJClass2CoulCutCuda::init_list(int id, NeighList *ptr)
 {
         MYDBG(printf("# CUDA PairLJClass2CoulCutCuda::init_list\n");)
         PairLJClass2CoulCut::init_list(id, ptr);
         #ifndef CUDA_USE_BINNING
         // right now we can only handle verlet (id 0), not respa
         if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
         // see Neighbor::init() for details on lammps lists' logic
         #endif
         MYDBG(printf("# CUDA PairLJClass2CoulCutCuda::init_list end\n");)
 }
 
 void PairLJClass2CoulCutCuda::ev_setup(int eflag, int vflag)
 {
         int maxeatomold=maxeatom;
         PairLJClass2CoulCut::ev_setup(eflag,vflag);
 
   if (eflag_atom && atom->nmax > maxeatomold)
         {delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_CFLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax  );}
 
   if (vflag_atom && atom->nmax > maxeatomold)
         {delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_CFLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.vatom , atom->nmax, 6  );}
 
 }
diff --git a/src/USER-CUDA/pair_lj_class2_coul_long_cuda.cpp b/src/USER-CUDA/pair_lj_class2_coul_long_cuda.cpp
index c02694c4f..20f257ffe 100644
--- a/src/USER-CUDA/pair_lj_class2_coul_long_cuda.cpp
+++ b/src/USER-CUDA/pair_lj_class2_coul_long_cuda.cpp
@@ -1,175 +1,175 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
 
    Original Version:
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    See the README file in the top-level LAMMPS directory.
 
    Contributing author: Paul Crozier (SNL)
    -----------------------------------------------------------------------
 
    USER-CUDA Package and associated modifications:
    https://sourceforge.net/projects/lammpscuda/
 
    Christian Trott, christian.trott@tu-ilmenau.de
    Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
    Theoretical Physics II, University of Technology Ilmenau, Germany
 
    See the README file in the USER-CUDA directory.
 
    This software is distributed under the GNU General Public License.
 ------------------------------------------------------------------------- */
 
 #include <cmath>
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
 #include "pair_lj_class2_coul_long_cuda.h"
 #include "pair_lj_class2_coul_long_cuda_cu.h"
 #include "cuda_data.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "kspace.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "cuda_neigh_list.h"
 #include "update.h"
 #include "integrate.h"
 #include "respa.h"
 #include "memory.h"
 #include "error.h"
 #include "user_cuda.h"
 
 using namespace LAMMPS_NS;
 
 #define EWALD_F   1.12837917
 #define EWALD_P   0.3275911
 #define A1        0.254829592
 #define A2       -0.284496736
 #define A3        1.421413741
 #define A4       -1.453152027
 #define A5        1.061405429
 /* ---------------------------------------------------------------------- */
 
 PairLJClass2CoulLongCuda::PairLJClass2CoulLongCuda(LAMMPS *lmp) : PairLJClass2CoulLong(lmp)
 {
   cuda = lmp->cuda;
    if(cuda == NULL)
         error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS..");
 
         allocated2 = false;
         cuda->shared_data.pair.cudable_force = 1;
         cuda->setSystemParams();
 }
 
 /* ----------------------------------------------------------------------
    remember pointer to arrays in cuda shared data
 ------------------------------------------------------------------------- */
 
 void PairLJClass2CoulLongCuda::allocate()
 {
         if(! allocated) PairLJClass2CoulLong::allocate();
         if(! allocated2)
         {
                 allocated2 = true;
                 cuda->shared_data.pair.cut     = cut_lj;
                 cuda->shared_data.pair.coeff1  = lj1;
                 cuda->shared_data.pair.coeff2  = lj2;
                 cuda->shared_data.pair.coeff3  = lj3;
                 cuda->shared_data.pair.coeff4  = lj4;
                 cuda->shared_data.pair.offset  = offset;
                 cuda->shared_data.pair.special_lj  = force->special_lj;
                 cuda->shared_data.pair.special_coul  = force->special_coul;
         }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJClass2CoulLongCuda::compute(int eflag, int vflag)
 {
         if (eflag || vflag) ev_setup(eflag,vflag);
         if(eflag) cuda->cu_eng_vdwl->upload();
         if(eflag) cuda->cu_eng_coul->upload();
         if(vflag) cuda->cu_virial->upload();
 
         Cuda_PairLJClass2CoulLongCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom);
 
     if(not cuda->shared_data.pair.collect_forces_later)
     {
           if(eflag) cuda->cu_eng_vdwl->download();
           if(eflag) cuda->cu_eng_coul->download();
           if(vflag) cuda->cu_virial->download();
     }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJClass2CoulLongCuda::settings(int narg, char **arg)
 {
         PairLJClass2CoulLong::settings(narg, arg);
         cuda->shared_data.pair.cut_global = (F_CFLOAT) cut_lj_global;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJClass2CoulLongCuda::coeff(int narg, char **arg)
 {
         PairLJClass2CoulLong::coeff(narg, arg);
         allocate();
 }
 
 void PairLJClass2CoulLongCuda::init_style()
 {
   if (!atom->q_flag)
     error->all(FLERR,"Pair style lj/cut/coul/long requires atom attribute q");
   // request regular or rRESPA neighbor lists
 
   int irequest;
 
 
-          irequest = neighbor->request(this);
+  irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->full = 1;
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->cudable = 1;
 
   cut_coulsq = cut_coul * cut_coul;
   cuda->shared_data.pair.cut_coul_global=cut_coul;
   cuda->shared_data.pair.cut_coulsq_global=cut_coulsq;
   // set rRESPA cutoffs
 
   if (force->newton) error->warning(FLERR,"Pair style uses does not use \"newton\" setting. You might test if \"newton off\" makes the simulation run faster.");
   if (force->kspace == NULL)
     error->all(FLERR,"Pair style is incompatible with KSpace style");
   g_ewald = force->kspace->g_ewald;
   cuda->shared_data.pair.g_ewald=g_ewald;
   cuda->shared_data.pppm.qqrd2e=force->qqrd2e;
 
 
   if(ncoultablebits) error->warning(FLERR,"# CUDA: You asked for the usage of Coulomb Tables. This is not supported in CUDA Pair forces. Setting is ignored.\n");
 }
 
 void PairLJClass2CoulLongCuda::init_list(int id, NeighList *ptr)
 {
         MYDBG(printf("# CUDA PairLJClass2CoulLongCuda::init_list\n");)
         PairLJClass2CoulLong::init_list(id, ptr);
         if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
         // see Neighbor::init() for details on lammps lists' logic
         MYDBG(printf("# CUDA PairLJClass2CoulLongCuda::init_list end\n");)
 }
 
 void PairLJClass2CoulLongCuda::ev_setup(int eflag, int vflag)
 {
         int maxeatomold=maxeatom;
         PairLJClass2CoulLong::ev_setup(eflag,vflag);
 
   if (eflag_atom && atom->nmax > maxeatomold)
         {delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_CFLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax  );}
 
   if (vflag_atom && atom->nmax > maxeatomold)
         {delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_CFLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.vatom , atom->nmax, 6  );}
 
 }
diff --git a/src/USER-CUDA/pair_lj_class2_cuda.cpp b/src/USER-CUDA/pair_lj_class2_cuda.cpp
index 5dfb6ed65..6b9f686c1 100644
--- a/src/USER-CUDA/pair_lj_class2_cuda.cpp
+++ b/src/USER-CUDA/pair_lj_class2_cuda.cpp
@@ -1,167 +1,167 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
 
    Original Version:
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    See the README file in the top-level LAMMPS directory.
 
    -----------------------------------------------------------------------
 
    USER-CUDA Package and associated modifications:
    https://sourceforge.net/projects/lammpscuda/
 
    Christian Trott, christian.trott@tu-ilmenau.de
    Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
    Theoretical Physics II, University of Technology Ilmenau, Germany
 
    See the README file in the USER-CUDA directory.
 
    This software is distributed under the GNU General Public License.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Paul Crozier (SNL)
 ------------------------------------------------------------------------- */
 
 #include <cmath>
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
 #include "pair_lj_class2_cuda.h"
 #include "pair_lj_class2_cuda_cu.h"
 #include "cuda_data.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "cuda_neigh_list.h"
 #include "update.h"
 #include "integrate.h"
 #include "respa.h"
 #include "memory.h"
 #include "error.h"
 #include "user_cuda.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
 PairLJClass2Cuda::PairLJClass2Cuda(LAMMPS *lmp) : PairLJClass2(lmp)
 {
   cuda = lmp->cuda;
    if(cuda == NULL)
         error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS..");
 
         allocated2 = false;
         cuda->shared_data.pair.cudable_force = 1;
         cuda->setSystemParams();
 }
 
 /* ----------------------------------------------------------------------
    remember pointer to arrays in cuda shared data
 ------------------------------------------------------------------------- */
 
 void PairLJClass2Cuda::allocate()
 {
         if(! allocated) PairLJClass2::allocate();
         if(! allocated2)
         {
                 allocated2 = true;
                 cuda->shared_data.pair.cut     = cut;
                 cuda->shared_data.pair.coeff1  = lj1;
                 cuda->shared_data.pair.coeff2  = lj2;
                 cuda->shared_data.pair.coeff3  = lj3;
                 cuda->shared_data.pair.coeff4  = lj4;
                 cuda->shared_data.pair.offset  = offset;
                 cuda->shared_data.pair.special_lj  = force->special_lj;
                 cuda->shared_data.pair.special_coul  = force->special_coul;
         }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJClass2Cuda::compute(int eflag, int vflag)
 {
         if (eflag || vflag) ev_setup(eflag,vflag);
         if(eflag) cuda->cu_eng_vdwl->upload();
         if(vflag) cuda->cu_virial->upload();
 
         Cuda_PairLJClass2Cuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom);
 
     if(not cuda->shared_data.pair.collect_forces_later)
     {
           if(eflag) cuda->cu_eng_vdwl->download();
           if(vflag) cuda->cu_virial->download();
     }
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJClass2Cuda::settings(int narg, char **arg)
 {
         PairLJClass2::settings(narg, arg);
         cuda->shared_data.pair.cut_global = (F_CFLOAT) cut_global;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJClass2Cuda::coeff(int narg, char **arg)
 {
         PairLJClass2::coeff(narg, arg);
         allocate();
 }
 
 void PairLJClass2Cuda::init_style()
 {
         MYDBG(printf("# CUDA PairLJClass2Cuda::init_style start\n"); )
   // request regular or rRESPA neighbor lists
 
   int irequest;
 
-          irequest = neighbor->request(this);
+        irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->full = 1;
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->cudable = 1;
     //neighbor->style=0; //0=NSQ neighboring
   MYDBG(printf("# CUDA PairLJClass2Cuda::init_style end\n"); )
 }
 
 void PairLJClass2Cuda::init_list(int id, NeighList *ptr)
 {
         MYDBG(printf("# CUDA PairLJClass2Cuda::init_list\n");)
         PairLJClass2::init_list(id, ptr);
         if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
         // see Neighbor::init() for details on lammps lists' logic
         MYDBG(printf("# CUDA PairLJClass2Cuda::init_list end\n");)
 }
 
 void PairLJClass2Cuda::ev_setup(int eflag, int vflag)
 {
         int maxeatomold=maxeatom;
         PairLJClass2::ev_setup(eflag,vflag);
 
   if (eflag_atom && atom->nmax > maxeatomold)
         {delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_CFLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax  );}
 
   if (vflag_atom && atom->nmax > maxeatomold)
         {delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_CFLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.vatom , atom->nmax, 6  );}
 
 }
diff --git a/src/USER-CUDA/pair_lj_cut_coul_cut_cuda.cpp b/src/USER-CUDA/pair_lj_cut_coul_cut_cuda.cpp
index cf1fc7e9f..3872be0d0 100644
--- a/src/USER-CUDA/pair_lj_cut_coul_cut_cuda.cpp
+++ b/src/USER-CUDA/pair_lj_cut_coul_cut_cuda.cpp
@@ -1,162 +1,162 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
 
    Original Version:
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    See the README file in the top-level LAMMPS directory.
 
    Contributing author: Paul Crozier (SNL)
    -----------------------------------------------------------------------
 
    USER-CUDA Package and associated modifications:
    https://sourceforge.net/projects/lammpscuda/
 
    Christian Trott, christian.trott@tu-ilmenau.de
    Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
    Theoretical Physics II, University of Technology Ilmenau, Germany
 
    See the README file in the USER-CUDA directory.
 
    This software is distributed under the GNU General Public License.
 ------------------------------------------------------------------------- */
 
 #include <cmath>
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
 #include "pair_lj_cut_coul_cut_cuda.h"
 #include "pair_lj_cut_coul_cut_cuda_cu.h"
 #include "cuda_data.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "kspace.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "cuda_neigh_list.h"
 #include "update.h"
 #include "integrate.h"
 #include "respa.h"
 #include "memory.h"
 #include "error.h"
 #include "user_cuda.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
 PairLJCutCoulCutCuda::PairLJCutCoulCutCuda(LAMMPS *lmp) : PairLJCutCoulCut(lmp)
 {
   cuda = lmp->cuda;
    if(cuda == NULL)
         error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS..");
 
         allocated2 = false;
         cuda->shared_data.pair.cudable_force = 1;
         cuda->setSystemParams();
 }
 
 /* ----------------------------------------------------------------------
    remember pointer to arrays in cuda shared data
 ------------------------------------------------------------------------- */
 
 void PairLJCutCoulCutCuda::allocate()
 {
         if(! allocated) PairLJCutCoulCut::allocate();
         if(! allocated2)
         {
                 allocated2 = true;
                 cuda->shared_data.pair.cut     = cut_lj;
                 cuda->shared_data.pair.cut_coul= cut_coul;
                 cuda->shared_data.pair.coeff1  = lj1;
                 cuda->shared_data.pair.coeff2  = lj2;
                 cuda->shared_data.pair.coeff3  = lj3;
                 cuda->shared_data.pair.coeff4  = lj4;
                 cuda->shared_data.pair.offset  = offset;
                 cuda->shared_data.pair.special_lj  = force->special_lj;
                 cuda->shared_data.pair.special_coul  = force->special_coul;
         }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCutCoulCutCuda::compute(int eflag, int vflag)
 {
         if (eflag || vflag) ev_setup(eflag,vflag);
         if(eflag) cuda->cu_eng_vdwl->upload();
         if(eflag) cuda->cu_eng_coul->upload();
         if(vflag) cuda->cu_virial->upload();
 
         Cuda_PairLJCutCoulCutCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom);
 
     if(not cuda->shared_data.pair.collect_forces_later)
     {
           if(eflag) cuda->cu_eng_vdwl->download();
           if(eflag) cuda->cu_eng_coul->download();
           if(vflag) cuda->cu_virial->download();
     }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCutCoulCutCuda::settings(int narg, char **arg)
 {
         PairLJCutCoulCut::settings(narg, arg);
         cuda->shared_data.pair.cut_global = (F_CFLOAT) cut_lj_global;
         cuda->shared_data.pair.cut_coul_global = (F_CFLOAT) cut_coul_global;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCutCoulCutCuda::coeff(int narg, char **arg)
 {
         PairLJCutCoulCut::coeff(narg, arg);
         allocate();
 }
 
 void PairLJCutCoulCutCuda::init_style()
 {
   if (!atom->q_flag)
     error->all(FLERR,"Pair style lj/cut/coul/cut/cuda requires atom attribute q");
   // request regular or rRESPA neighbor lists
 
   int irequest;
 
 
-  irequest = neighbor->request(this);
+  irequest = neighbor->request(this,instance_me);
   neighbor->requests[irequest]->full = 1;
   neighbor->requests[irequest]->half = 0;
   neighbor->requests[irequest]->cudable = 1;
 
 
   cuda->shared_data.pppm.qqrd2e=force->qqrd2e;
 
 }
 
 void PairLJCutCoulCutCuda::init_list(int id, NeighList *ptr)
 {
         MYDBG(printf("# CUDA PairLJCutCoulCutCuda::init_list\n");)
         PairLJCutCoulCut::init_list(id, ptr);
         #ifndef CUDA_USE_BINNING
         // right now we can only handle verlet (id 0), not respa
         if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
         // see Neighbor::init() for details on lammps lists' logic
         #endif
         MYDBG(printf("# CUDA PairLJCutCoulCutCuda::init_list end\n");)
 }
 
 void PairLJCutCoulCutCuda::ev_setup(int eflag, int vflag)
 {
         int maxeatomold=maxeatom;
         PairLJCutCoulCut::ev_setup(eflag,vflag);
 
   if (eflag_atom && atom->nmax > maxeatomold)
         {delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_CFLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax  );}
 
   if (vflag_atom && atom->nmax > maxeatomold)
         {delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_CFLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.vatom , atom->nmax, 6  );}
 
 }
diff --git a/src/USER-CUDA/pair_lj_cut_coul_debye_cuda.cpp b/src/USER-CUDA/pair_lj_cut_coul_debye_cuda.cpp
index 790565324..43bcce68c 100644
--- a/src/USER-CUDA/pair_lj_cut_coul_debye_cuda.cpp
+++ b/src/USER-CUDA/pair_lj_cut_coul_debye_cuda.cpp
@@ -1,163 +1,163 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
 
    Original Version:
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    See the README file in the top-level LAMMPS directory.
 
    Contributing author: Paul Crozier (SNL)
    -----------------------------------------------------------------------
 
    USER-CUDA Package and associated modifications:
    https://sourceforge.net/projects/lammpscuda/
 
    Christian Trott, christian.trott@tu-ilmenau.de
    Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
    Theoretical Physics II, University of Technology Ilmenau, Germany
 
    See the README file in the USER-CUDA directory.
 
    This software is distributed under the GNU General Public License.
 ------------------------------------------------------------------------- */
 
 #include <cmath>
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
 #include "pair_lj_cut_coul_debye_cuda.h"
 #include "pair_lj_cut_coul_debye_cuda_cu.h"
 #include "cuda_data.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "kspace.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "cuda_neigh_list.h"
 #include "update.h"
 #include "integrate.h"
 #include "respa.h"
 #include "memory.h"
 #include "error.h"
 #include "user_cuda.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
 PairLJCutCoulDebyeCuda::PairLJCutCoulDebyeCuda(LAMMPS *lmp) : PairLJCutCoulDebye(lmp)
 {
   cuda = lmp->cuda;
    if(cuda == NULL)
         error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS..");
 
         allocated2 = false;
         cuda->shared_data.pair.cudable_force = 1;
         cuda->setSystemParams();
 }
 
 /* ----------------------------------------------------------------------
    remember pointer to arrays in cuda shared data
 ------------------------------------------------------------------------- */
 
 void PairLJCutCoulDebyeCuda::allocate()
 {
         if(! allocated) PairLJCutCoulDebye::allocate();
         if(! allocated2)
         {
                 allocated2 = true;
                 cuda->shared_data.pair.cut     = cut_lj;
                 cuda->shared_data.pair.cut_coul= cut_coul;
                 cuda->shared_data.pair.coeff1  = lj1;
                 cuda->shared_data.pair.coeff2  = lj2;
                 cuda->shared_data.pair.coeff3  = lj3;
                 cuda->shared_data.pair.coeff4  = lj4;
                 cuda->shared_data.pair.offset  = offset;
                 cuda->shared_data.pair.special_lj  = force->special_lj;
                 cuda->shared_data.pair.special_coul  = force->special_coul;
         }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCutCoulDebyeCuda::compute(int eflag, int vflag)
 {
         if (eflag || vflag) ev_setup(eflag,vflag);
         if(eflag) cuda->cu_eng_vdwl->upload();
         if(eflag) cuda->cu_eng_coul->upload();
         if(vflag) cuda->cu_virial->upload();
 
         Cuda_PairLJCutCoulDebyeCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom);
 
     if(not cuda->shared_data.pair.collect_forces_later)
     {
           if(eflag) cuda->cu_eng_vdwl->download();
           if(eflag) cuda->cu_eng_coul->download();
           if(vflag) cuda->cu_virial->download();
     }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCutCoulDebyeCuda::settings(int narg, char **arg)
 {
         PairLJCutCoulDebye::settings(narg, arg);
         cuda->shared_data.pair.cut_global = (F_CFLOAT) cut_lj_global;
         cuda->shared_data.pair.cut_coul_global = (F_CFLOAT) cut_coul_global;
         cuda->shared_data.pair.kappa = (F_CFLOAT) kappa;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCutCoulDebyeCuda::coeff(int narg, char **arg)
 {
         PairLJCutCoulDebye::coeff(narg, arg);
         allocate();
 }
 
 void PairLJCutCoulDebyeCuda::init_style()
 {
   if (!atom->q_flag)
     error->all(FLERR,"Pair style lj/cut/coul/debye/cuda requires atom attribute q");
   // request regular or rRESPA neighbor lists
 
   int irequest;
 
 
-  irequest = neighbor->request(this);
+  irequest = neighbor->request(this,instance_me);
   neighbor->requests[irequest]->full = 1;
   neighbor->requests[irequest]->half = 0;
   neighbor->requests[irequest]->cudable = 1;
 
 
   cuda->shared_data.pppm.qqrd2e=force->qqrd2e;
 
 }
 
 void PairLJCutCoulDebyeCuda::init_list(int id, NeighList *ptr)
 {
         MYDBG(printf("# CUDA PairLJCutCoulDebyeCuda::init_list\n");)
         PairLJCutCoulDebye::init_list(id, ptr);
         #ifndef CUDA_USE_BINNING
         // right now we can only handle verlet (id 0), not respa
         if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
         // see Neighbor::init() for details on lammps lists' logic
         #endif
         MYDBG(printf("# CUDA PairLJCutCoulDebyeCuda::init_list end\n");)
 }
 
 void PairLJCutCoulDebyeCuda::ev_setup(int eflag, int vflag)
 {
         int maxeatomold=maxeatom;
         PairLJCutCoulDebye::ev_setup(eflag,vflag);
 
   if (eflag_atom && atom->nmax > maxeatomold)
         {delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_CFLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax  );}
 
   if (vflag_atom && atom->nmax > maxeatomold)
         {delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_CFLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.vatom , atom->nmax, 6  );}
 
 }
diff --git a/src/USER-CUDA/pair_lj_cut_coul_long_cuda.cpp b/src/USER-CUDA/pair_lj_cut_coul_long_cuda.cpp
index 4185ba3a8..52397f942 100644
--- a/src/USER-CUDA/pair_lj_cut_coul_long_cuda.cpp
+++ b/src/USER-CUDA/pair_lj_cut_coul_long_cuda.cpp
@@ -1,216 +1,216 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
 
    Original Version:
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    See the README file in the top-level LAMMPS directory.
 
    Contributing author: Paul Crozier (SNL)
    -----------------------------------------------------------------------
 
    USER-CUDA Package and associated modifications:
    https://sourceforge.net/projects/lammpscuda/
 
    Christian Trott, christian.trott@tu-ilmenau.de
    Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
    Theoretical Physics II, University of Technology Ilmenau, Germany
 
    See the README file in the USER-CUDA directory.
 
    This software is distributed under the GNU General Public License.
 ------------------------------------------------------------------------- */
 
 #include <cmath>
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
 #include "pair_lj_cut_coul_long_cuda.h"
 #include "pair_lj_cut_coul_long_cuda_cu.h"
 #include "cuda_data.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "kspace.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "cuda_neigh_list.h"
 #include "update.h"
 #include "integrate.h"
 #include "respa.h"
 #include "memory.h"
 #include "error.h"
 #include "user_cuda.h"
 
 using namespace LAMMPS_NS;
 
 #define EWALD_F   1.12837917
 #define EWALD_P   0.3275911
 #define A1        0.254829592
 #define A2       -0.284496736
 #define A3        1.421413741
 #define A4       -1.453152027
 #define A5        1.061405429
 /* ---------------------------------------------------------------------- */
 
 PairLJCutCoulLongCuda::PairLJCutCoulLongCuda(LAMMPS *lmp) : PairLJCutCoulLong(lmp)
 {
   cuda = lmp->cuda;
   if(cuda == NULL)
       error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS..");
 
         allocated2 = false;
         cuda->shared_data.pair.cudable_force = 1;
         cuda->setSystemParams();
 }
 
 /* ----------------------------------------------------------------------
    remember pointer to arrays in cuda shared data
 ------------------------------------------------------------------------- */
 
 void PairLJCutCoulLongCuda::allocate()
 {
         if(! allocated) PairLJCutCoulLong::allocate();
         if(! allocated2)
         {
                 allocated2 = true;
                 cuda->shared_data.pair.cut     = cut_lj;
                 cuda->shared_data.pair.coeff1  = lj1;
                 cuda->shared_data.pair.coeff2  = lj2;
                 cuda->shared_data.pair.coeff3  = lj3;
                 cuda->shared_data.pair.coeff4  = lj4;
                 cuda->shared_data.pair.offset  = offset;
                 cuda->shared_data.pair.special_lj  = force->special_lj;
                 cuda->shared_data.pair.special_coul  = force->special_coul;
         }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCutCoulLongCuda::compute(int eflag, int vflag)
 {
         if (eflag || vflag) ev_setup(eflag,vflag);
         if(eflag) cuda->cu_eng_vdwl->upload();
         if(eflag) cuda->cu_eng_coul->upload();
         if(vflag) cuda->cu_virial->upload();
 
         Cuda_PairLJCutCoulLongCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom);
 
     if(not cuda->shared_data.pair.collect_forces_later)
     {
           if(eflag) cuda->cu_eng_vdwl->download();
           if(eflag) cuda->cu_eng_coul->download();
           if(vflag) cuda->cu_virial->download();
     }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCutCoulLongCuda::settings(int narg, char **arg)
 {
         PairLJCutCoulLong::settings(narg, arg);
         cuda->shared_data.pair.cut_global = (F_CFLOAT) cut_lj_global;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCutCoulLongCuda::coeff(int narg, char **arg)
 {
         PairLJCutCoulLong::coeff(narg, arg);
         allocate();
 }
 
 void PairLJCutCoulLongCuda::init_style()
 {
   if (!atom->q_flag)
     error->all(FLERR,"Pair style lj/cut/coul/long requires atom attribute q");
   // request regular or rRESPA neighbor lists
 
   int irequest;
 
   if (update->whichflag == 0 && strstr(update->integrate_style,"respa")) {
     int respa = 0;
     if (((Respa *) update->integrate)->level_inner >= 0) respa = 1;
     if (((Respa *) update->integrate)->level_middle >= 0) respa = 2;
 
-    if (respa == 0) irequest = neighbor->request(this);
+    if (respa == 0) irequest = neighbor->request(this,instance_me);
     else if (respa == 1) {
-      irequest = neighbor->request(this);
+      irequest = neighbor->request(this,instance_me);
       neighbor->requests[irequest]->id = 1;
       neighbor->requests[irequest]->half = 0;
       neighbor->requests[irequest]->respainner = 1;
-      irequest = neighbor->request(this);
+      irequest = neighbor->request(this,instance_me);
       neighbor->requests[irequest]->id = 3;
       neighbor->requests[irequest]->half = 0;
       neighbor->requests[irequest]->respaouter = 1;
     } else {
-      irequest = neighbor->request(this);
+      irequest = neighbor->request(this,instance_me);
       neighbor->requests[irequest]->id = 1;
       neighbor->requests[irequest]->half = 0;
       neighbor->requests[irequest]->respainner = 1;
-      irequest = neighbor->request(this);
+      irequest = neighbor->request(this,instance_me);
       neighbor->requests[irequest]->id = 2;
       neighbor->requests[irequest]->half = 0;
       neighbor->requests[irequest]->respamiddle = 1;
-      irequest = neighbor->request(this);
+      irequest = neighbor->request(this,instance_me);
       neighbor->requests[irequest]->id = 3;
       neighbor->requests[irequest]->half = 0;
       neighbor->requests[irequest]->respaouter = 1;
     }
 
   }
   else
   {
-          irequest = neighbor->request(this);
+    irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->full = 1;
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->cudable = 1;
   }
 
   cut_coulsq = cut_coul * cut_coul;
   cuda->shared_data.pair.cut_coul_global=cut_coul;
   cuda->shared_data.pair.cut_coulsq_global=cut_coulsq;
   // set rRESPA cutoffs
 
   if (strstr(update->integrate_style,"respa") &&
       ((Respa *) update->integrate)->level_inner >= 0)
     cut_respa = ((Respa *) update->integrate)->cutoff;
   else cut_respa = NULL;
 
   if (force->newton) error->warning(FLERR,"Pair style uses does not use \"newton\" setting. You might test if \"newton off\" makes the simulation run faster.");
   if (force->kspace == NULL)
     error->all(FLERR,"Pair style is incompatible with KSpace style");
   g_ewald = force->kspace->g_ewald;
   cuda->shared_data.pair.g_ewald=g_ewald;
   cuda->shared_data.pppm.qqrd2e=force->qqrd2e;
 
 
   if(ncoultablebits) error->warning(FLERR,"# CUDA: You asked for the usage of Coulomb Tables. This is not supported in CUDA Pair forces. Setting is ignored.\n");
 }
 
 void PairLJCutCoulLongCuda::init_list(int id, NeighList *ptr)
 {
         MYDBG(printf("# CUDA PairLJCutCoulLongCuda::init_list\n");)
         PairLJCutCoulLong::init_list(id, ptr);
         #ifndef CUDA_USE_BINNING
         // right now we can only handle verlet (id 0), not respa
         if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
         // see Neighbor::init() for details on lammps lists' logic
         #endif
         MYDBG(printf("# CUDA PairLJCutCoulLongCuda::init_list end\n");)
 }
 
 void PairLJCutCoulLongCuda::ev_setup(int eflag, int vflag)
 {
         int maxeatomold=maxeatom;
         PairLJCutCoulLong::ev_setup(eflag,vflag);
 
   if (eflag_atom && atom->nmax > maxeatomold)
         {delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_CFLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax  );}
 
   if (vflag_atom && atom->nmax > maxeatomold)
         {delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_CFLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.vatom , atom->nmax, 6  );}
 
 }
diff --git a/src/USER-CUDA/pair_lj_cut_cuda.cpp b/src/USER-CUDA/pair_lj_cut_cuda.cpp
index 34fbdfdf7..a5d4f47a5 100644
--- a/src/USER-CUDA/pair_lj_cut_cuda.cpp
+++ b/src/USER-CUDA/pair_lj_cut_cuda.cpp
@@ -1,179 +1,179 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
 
    Original Version:
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    See the README file in the top-level LAMMPS directory.
 
    -----------------------------------------------------------------------
 
    USER-CUDA Package and associated modifications:
    https://sourceforge.net/projects/lammpscuda/
 
    Christian Trott, christian.trott@tu-ilmenau.de
    Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
    Theoretical Physics II, University of Technology Ilmenau, Germany
 
    See the README file in the USER-CUDA directory.
 
    This software is distributed under the GNU General Public License.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Paul Crozier (SNL)
 ------------------------------------------------------------------------- */
 
 #include <cmath>
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
 #include "pair_lj_cut_cuda.h"
 #include "pair_lj_cut_cuda_cu.h"
 #include "cuda_data.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "cuda_neigh_list.h"
 #include "update.h"
 #include "integrate.h"
 #include "respa.h"
 #include "memory.h"
 #include "error.h"
 #include "user_cuda.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
 PairLJCutCuda::PairLJCutCuda(LAMMPS *lmp) : PairLJCut(lmp)
 {
   cuda = lmp->cuda;
    if(cuda == NULL)
         error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS..");
 
         allocated2 = false;
         cuda->shared_data.pair.cudable_force = 1;
         cuda->setSystemParams();
 }
 
 /* ----------------------------------------------------------------------
    remember pointer to arrays in cuda shared data
 ------------------------------------------------------------------------- */
 
 void PairLJCutCuda::allocate()
 {
         if(! allocated) PairLJCut::allocate();
         if(! allocated2)
         {
                 allocated2 = true;
                 cuda->shared_data.pair.cut     = cut;
                 cuda->shared_data.pair.coeff1  = lj1;
                 cuda->shared_data.pair.coeff2  = lj2;
                 cuda->shared_data.pair.coeff3  = lj3;
                 cuda->shared_data.pair.coeff4  = lj4;
                 cuda->shared_data.pair.offset  = offset;
                 cuda->shared_data.pair.special_lj  = force->special_lj;
                 cuda->shared_data.pair.special_coul  = force->special_coul;
         }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCutCuda::compute(int eflag, int vflag)
 {
         if (eflag || vflag) ev_setup(eflag,vflag);
         if(eflag) cuda->cu_eng_vdwl->upload();
         if(vflag) cuda->cu_virial->upload();
 
         Cuda_PairLJCutCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom);
 
     if(not cuda->shared_data.pair.collect_forces_later)
     {
           if(eflag) cuda->cu_eng_vdwl->download();
           if(vflag) cuda->cu_virial->download();
     }
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCutCuda::settings(int narg, char **arg)
 {
         PairLJCut::settings(narg, arg);
         cuda->shared_data.pair.cut_global = (F_CFLOAT) cut_global;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCutCuda::coeff(int narg, char **arg)
 {
         PairLJCut::coeff(narg, arg);
         allocate();
 }
 
 void PairLJCutCuda::init_style()
 {
         MYDBG(printf("# CUDA PairLJCutCuda::init_style start\n"); )
   // request regular or rRESPA neighbor lists
 
   int irequest;
 
   if (update->whichflag == 0 && strstr(update->integrate_style,"respa")) {
 
   }
   else
   {
-          irequest = neighbor->request(this);
+    irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->full = 1;
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->cudable = 1;
     //neighbor->style=0; //0=NSQ neighboring
   }
 
 
   cut_respa = NULL;
   MYDBG(printf("# CUDA PairLJCutCuda::init_style end\n"); )
 }
 
 void PairLJCutCuda::init_list(int id, NeighList *ptr)
 {
         MYDBG(printf("# CUDA PairLJCutCuda::init_list\n");)
         PairLJCut::init_list(id, ptr);
         #ifndef CUDA_USE_BINNING
         // right now we can only handle verlet (id 0), not respa
         if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
         // see Neighbor::init() for details on lammps lists' logic
         #endif
         MYDBG(printf("# CUDA PairLJCutCuda::init_list end\n");)
 }
 
 void PairLJCutCuda::ev_setup(int eflag, int vflag)
 {
         int maxeatomold=maxeatom;
         PairLJCut::ev_setup(eflag,vflag);
 
   if (eflag_atom && atom->nmax > maxeatomold)
         {delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_CFLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax  );}
 
   if (vflag_atom && atom->nmax > maxeatomold)
         {delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_CFLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.vatom , atom->nmax, 6  );}
 
 }
diff --git a/src/USER-CUDA/pair_lj_cut_experimental_cuda.cpp b/src/USER-CUDA/pair_lj_cut_experimental_cuda.cpp
index 4700d609a..f60aaa6f3 100644
--- a/src/USER-CUDA/pair_lj_cut_experimental_cuda.cpp
+++ b/src/USER-CUDA/pair_lj_cut_experimental_cuda.cpp
@@ -1,178 +1,178 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
 
    Original Version:
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    See the README file in the top-level LAMMPS directory.
 
    -----------------------------------------------------------------------
 
    USER-CUDA Package and associated modifications:
    https://sourceforge.net/projects/lammpscuda/
 
    Christian Trott, christian.trott@tu-ilmenau.de
    Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
    Theoretical Physics II, University of Technology Ilmenau, Germany
 
    See the README file in the USER-CUDA directory.
 
    This software is distributed under the GNU General Public License.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Paul Crozier (SNL)
 ------------------------------------------------------------------------- */
 
 #include <cmath>
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
 #include "pair_lj_cut_experimental_cuda.h"
 #include "pair_lj_cut_experimental_cuda_cu.h"
 #include "cuda_data.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "cuda_neigh_list.h"
 #include "update.h"
 #include "integrate.h"
 #include "respa.h"
 #include "memory.h"
 #include "error.h"
 #include "user_cuda.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
 PairLJCutExperimentalCuda::PairLJCutExperimentalCuda(LAMMPS *lmp) : PairLJCut(lmp)
 {
   cuda = lmp->cuda;
    if(cuda == NULL)
         error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS..");
 
         allocated2 = false;
         cuda->shared_data.pair.cudable_force = 1;
         cuda->setSystemParams();
 }
 
 /* ----------------------------------------------------------------------
    remember pointer to arrays in cuda shared data
 ------------------------------------------------------------------------- */
 
 void PairLJCutExperimentalCuda::allocate()
 {
         if(! allocated) PairLJCut::allocate();
         if(! allocated2)
         {
                 allocated2 = true;
                 cuda->shared_data.pair.cut     = cut;
                 cuda->shared_data.pair.coeff1  = lj1;
                 cuda->shared_data.pair.coeff2  = lj2;
                 cuda->shared_data.pair.coeff3  = lj3;
                 cuda->shared_data.pair.coeff4  = lj4;
                 cuda->shared_data.pair.offset  = offset;
                 cuda->shared_data.pair.special_lj  = force->special_lj;
                 cuda->shared_data.pair.special_coul  = force->special_coul;
         }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCutExperimentalCuda::compute(int eflag, int vflag)
 {
         if (eflag || vflag) ev_setup(eflag,vflag);
         if(eflag) cuda->cu_eng_vdwl->upload();
         if(vflag) cuda->cu_virial->upload();
         Cuda_PairLJCutExperimentalCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom);
 
     if(not cuda->shared_data.pair.collect_forces_later)
     {
           CudaWrapper_Sync();
           if(eflag) cuda->cu_eng_vdwl->download();
           if(vflag) cuda->cu_virial->download();
     }
  }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCutExperimentalCuda::settings(int narg, char **arg)
 {
         PairLJCut::settings(narg, arg);
         cuda->shared_data.pair.cut_global = (F_CFLOAT) cut_global;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCutExperimentalCuda::coeff(int narg, char **arg)
 {
         PairLJCut::coeff(narg, arg);
         allocate();
 }
 
 void PairLJCutExperimentalCuda::init_style()
 {
         MYDBG(printf("# CUDA PairLJCutExperimentalCuda::init_style start\n"); )
   // request regular or rRESPA neighbor lists
 
   int irequest;
 
   if (update->whichflag == 0 && strstr(update->integrate_style,"respa")) {
 
   }
   else
   {
-          irequest = neighbor->request(this);
+    irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->full = 1;
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->cudable = 1;
     //neighbor->style=0; //0=NSQ neighboring
   }
 
 
   cut_respa = NULL;
   MYDBG(printf("# CUDA PairLJCutExperimentalCuda::init_style end\n"); )
 }
 
 void PairLJCutExperimentalCuda::init_list(int id, NeighList *ptr)
 {
         MYDBG(printf("# CUDA PairLJCutExperimentalCuda::init_list\n");)
         PairLJCut::init_list(id, ptr);
         #ifndef CUDA_USE_BINNING
         // right now we can only handle verlet (id 0), not respa
         if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
         // see Neighbor::init() for details on lammps lists' logic
         #endif
         MYDBG(printf("# CUDA PairLJCutExperimentalCuda::init_list end\n");)
 }
 
 void PairLJCutExperimentalCuda::ev_setup(int eflag, int vflag)
 {
         int maxeatomold=maxeatom;
         PairLJCut::ev_setup(eflag,vflag);
 
   if (eflag_atom && atom->nmax > maxeatomold)
         {delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_CFLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax  );}
 
   if (vflag_atom && atom->nmax > maxeatomold)
         {delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_CFLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.vatom , atom->nmax, 6  );}
 
 }
diff --git a/src/USER-CUDA/pair_lj_expand_cuda.cpp b/src/USER-CUDA/pair_lj_expand_cuda.cpp
index 6d3680a5b..a102dea75 100644
--- a/src/USER-CUDA/pair_lj_expand_cuda.cpp
+++ b/src/USER-CUDA/pair_lj_expand_cuda.cpp
@@ -1,180 +1,180 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
 
    Original Version:
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    See the README file in the top-level LAMMPS directory.
 
    -----------------------------------------------------------------------
 
    USER-CUDA Package and associated modifications:
    https://sourceforge.net/projects/lammpscuda/
 
    Christian Trott, christian.trott@tu-ilmenau.de
    Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
    Theoretical Physics II, University of Technology Ilmenau, Germany
 
    See the README file in the USER-CUDA directory.
 
    This software is distributed under the GNU General Public License.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Paul Crozier (SNL)
 ------------------------------------------------------------------------- */
 
 #include <cmath>
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
 #include "pair_lj_expand_cuda.h"
 #include "pair_lj_expand_cuda_cu.h"
 #include "cuda_data.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "cuda_neigh_list.h"
 #include "update.h"
 #include "integrate.h"
 #include "respa.h"
 #include "memory.h"
 #include "error.h"
 #include "user_cuda.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
 PairLJExpandCuda::PairLJExpandCuda(LAMMPS *lmp) : PairLJExpand(lmp)
 {
   cuda = lmp->cuda;
    if(cuda == NULL)
         error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS..");
 
         allocated2 = false;
         cuda->shared_data.pair.cudable_force = 1;
         cuda->setSystemParams();
 }
 
 /* ----------------------------------------------------------------------
    remember pointer to arrays in cuda shared data
 ------------------------------------------------------------------------- */
 
 void PairLJExpandCuda::allocate()
 {
         if(! allocated) PairLJExpand::allocate();
         if(! allocated2)
         {
                 allocated2 = true;
                 cuda->shared_data.pair.cut     = cut;
                 cuda->shared_data.pair.cutsq   = cutsq;
                 cuda->shared_data.pair.coeff1  = lj1;
                 cuda->shared_data.pair.coeff2  = lj2;
                 cuda->shared_data.pair.coeff3  = lj3;
                 cuda->shared_data.pair.coeff4  = lj4;
                 cuda->shared_data.pair.coeff5  = shift;
                 cuda->shared_data.pair.offset  = offset;
                 cuda->shared_data.pair.special_lj  = force->special_lj;
                 cuda->shared_data.pair.special_coul  = force->special_coul;
         }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJExpandCuda::compute(int eflag, int vflag)
 {
         if (eflag || vflag) ev_setup(eflag,vflag);
         if(eflag) cuda->cu_eng_vdwl->upload();
         if(vflag) cuda->cu_virial->upload();
 
         Cuda_PairLJExpandCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom);
 
     if(not cuda->shared_data.pair.collect_forces_later)
     {
           if(eflag) cuda->cu_eng_vdwl->download();
           if(vflag) cuda->cu_virial->download();
     }
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJExpandCuda::settings(int narg, char **arg)
 {
         PairLJExpand::settings(narg, arg);
         cuda->shared_data.pair.cut_global = (F_CFLOAT) cut_global;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJExpandCuda::coeff(int narg, char **arg)
 {
         PairLJExpand::coeff(narg, arg);
         allocate();
 }
 
 void PairLJExpandCuda::init_style()
 {
         MYDBG(printf("# CUDA PairLJExpandCuda::init_style start\n"); )
   // request regular or rRESPA neighbor lists
 
   int irequest;
 
   if (update->whichflag == 0 && strstr(update->integrate_style,"respa")) {
 
   }
   else
   {
-          irequest = neighbor->request(this);
+    irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->full = 1;
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->cudable = 1;
     //neighbor->style=0; //0=NSQ neighboring
   }
 
 
   MYDBG(printf("# CUDA PairLJExpandCuda::init_style end\n"); )
 }
 
 void PairLJExpandCuda::init_list(int id, NeighList *ptr)
 {
         MYDBG(printf("# CUDA PairLJExpandCuda::init_list\n");)
         PairLJExpand::init_list(id, ptr);
         #ifndef CUDA_USE_BINNING
         // right now we can only handle verlet (id 0), not respa
         if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
         // see Neighbor::init() for details on lammps lists' logic
         #endif
         MYDBG(printf("# CUDA PairLJExpandCuda::init_list end\n");)
 }
 
 void PairLJExpandCuda::ev_setup(int eflag, int vflag)
 {
         int maxeatomold=maxeatom;
         PairLJExpand::ev_setup(eflag,vflag);
 
   if (eflag_atom && atom->nmax > maxeatomold)
         {delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_CFLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax  );}
 
   if (vflag_atom && atom->nmax > maxeatomold)
         {delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_CFLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.vatom , atom->nmax, 6  );}
 
 }
diff --git a/src/USER-CUDA/pair_lj_gromacs_coul_gromacs_cuda.cpp b/src/USER-CUDA/pair_lj_gromacs_coul_gromacs_cuda.cpp
index 96973daac..73df6a66c 100644
--- a/src/USER-CUDA/pair_lj_gromacs_coul_gromacs_cuda.cpp
+++ b/src/USER-CUDA/pair_lj_gromacs_coul_gromacs_cuda.cpp
@@ -1,194 +1,194 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
 
    Original Version:
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    See the README file in the top-level LAMMPS directory.
 
    Contributing author: Paul Crozier (SNL)
    -----------------------------------------------------------------------
 
    USER-CUDA Package and associated modifications:
    https://sourceforge.net/projects/lammpscuda/
 
    Christian Trott, christian.trott@tu-ilmenau.de
    Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
    Theoretical Physics II, University of Technology Ilmenau, Germany
 
    See the README file in the USER-CUDA directory.
 
    This software is distributed under the GNU General Public License.
 ------------------------------------------------------------------------- */
 
 #include <cmath>
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
 #include "pair_lj_gromacs_coul_gromacs_cuda.h"
 #include "pair_lj_gromacs_coul_gromacs_cuda_cu.h"
 #include "cuda_data.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "kspace.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "cuda_neigh_list.h"
 #include "update.h"
 #include "integrate.h"
 #include "respa.h"
 #include "memory.h"
 #include "error.h"
 #include "user_cuda.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
 PairLJGromacsCoulGromacsCuda::PairLJGromacsCoulGromacsCuda(LAMMPS *lmp) : PairLJGromacsCoulGromacs(lmp)
 {
   cuda = lmp->cuda;
    if(cuda == NULL)
         error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS..");
 
         allocated2 = false;
         cuda->shared_data.pair.cudable_force = 1;
         cuda->shared_data.pair.use_block_per_atom = 0;
         cuda->setSystemParams();
 }
 
 /* ----------------------------------------------------------------------
    remember pointer to arrays in cuda shared data
 ------------------------------------------------------------------------- */
 
 void PairLJGromacsCoulGromacsCuda::allocate()
 {
         if(! allocated) PairLJGromacsCoulGromacs::allocate();
         if(! allocated2)
         {
                 allocated2 = true;
                 cuda->shared_data.pair.coeff1  = lj1;
                 cuda->shared_data.pair.coeff2  = lj2;
                 cuda->shared_data.pair.coeff3  = lj3;
                 cuda->shared_data.pair.coeff4  = lj4;
                 cuda->shared_data.pair.coeff5  = ljsw1;
                 cuda->shared_data.pair.coeff6  = ljsw2;
                 cuda->shared_data.pair.coeff7  = ljsw3;
                 cuda->shared_data.pair.coeff8  = ljsw4;
                 cuda->shared_data.pair.coeff9  = ljsw5;
                 cuda->shared_data.pair.special_lj  = force->special_lj;
                 cuda->shared_data.pair.special_coul  = force->special_coul;
             cu_lj1_gm = new cCudaData<double, F_CFLOAT, x> ((double*)lj1, &cuda->shared_data.pair.coeff1_gm, (atom->ntypes+1)*(atom->ntypes+1));
             cu_lj2_gm = new cCudaData<double, F_CFLOAT, x> ((double*)lj2, &cuda->shared_data.pair.coeff2_gm, (atom->ntypes+1)*(atom->ntypes+1));
             cu_lj3_gm = new cCudaData<double, F_CFLOAT, x> ((double*)lj3, &cuda->shared_data.pair.coeff3_gm, (atom->ntypes+1)*(atom->ntypes+1));
             cu_lj4_gm = new cCudaData<double, F_CFLOAT, x> ((double*)lj4, &cuda->shared_data.pair.coeff4_gm, (atom->ntypes+1)*(atom->ntypes+1));
             cu_ljsw1_gm = new cCudaData<double, F_CFLOAT, x> ((double*)ljsw1, &cuda->shared_data.pair.coeff5_gm, (atom->ntypes+1)*(atom->ntypes+1));
             cu_ljsw2_gm = new cCudaData<double, F_CFLOAT, x> ((double*)ljsw2, &cuda->shared_data.pair.coeff6_gm, (atom->ntypes+1)*(atom->ntypes+1));
             cu_ljsw3_gm = new cCudaData<double, F_CFLOAT, x> ((double*)ljsw3, &cuda->shared_data.pair.coeff7_gm, (atom->ntypes+1)*(atom->ntypes+1));
             cu_ljsw4_gm = new cCudaData<double, F_CFLOAT, x> ((double*)ljsw4, &cuda->shared_data.pair.coeff8_gm, (atom->ntypes+1)*(atom->ntypes+1));
             cu_ljsw5_gm = new cCudaData<double, F_CFLOAT, x> ((double*)ljsw5, &cuda->shared_data.pair.coeff9_gm, (atom->ntypes+1)*(atom->ntypes+1));
         }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJGromacsCoulGromacsCuda::compute(int eflag, int vflag)
 {
           if (eflag || vflag) ev_setup(eflag,vflag);
         if(not cuda->shared_data.pair.collect_forces_later)
         {
           if(eflag) cuda->cu_eng_vdwl->upload();
           if(eflag) cuda->cu_eng_coul->upload();
           if(vflag) cuda->cu_virial->upload();
         }
 
         Cuda_PairLJGromacsCoulGromacsCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom,cut_coul_inner,coulsw1,coulsw2,coulsw5);
 
         if(not cuda->shared_data.pair.collect_forces_later)
         {
           if(eflag) cuda->cu_eng_vdwl->download();
           if(eflag) cuda->cu_eng_coul->download();
           if(vflag) cuda->cu_virial->download();
         }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJGromacsCoulGromacsCuda::settings(int narg, char **arg)
 {
         PairLJGromacsCoulGromacs::settings(narg, arg);
         cuda->shared_data.pair.cut_global = (X_CFLOAT) cut_lj;
         cuda->shared_data.pair.cut_coulsq_global = (X_CFLOAT) cut_coulsq;
         cuda->shared_data.pair.cut_inner_global = (F_CFLOAT) cut_lj_inner;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJGromacsCoulGromacsCuda::coeff(int narg, char **arg)
 {
         PairLJGromacsCoulGromacs::coeff(narg, arg);
         allocate();
 }
 
 void PairLJGromacsCoulGromacsCuda::init_style()
 {
   if (!atom->q_flag)
     error->all(FLERR,"Pair style lj/gromacs/coul/gromacs requires atom attribute q");
   // request regular or rRESPA neighbor lists
 
         if(atom->molecular)
         {
           cuda->shared_data.pair.collect_forces_later = 1;
         }
 
   int irequest;
 
-           irequest = neighbor->request(this);
+  irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->full = 1;
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->cudable = 1;
 
    if (cut_lj_inner >= cut_lj || cut_coul_inner >= cut_coul)
     error->all(FLERR,"Pair inner cutoff >= Pair outer cutoff");
 
   cut_lj_innersq = cut_lj_inner * cut_lj_inner;
   cut_ljsq = cut_lj * cut_lj;
   cut_coul_innersq = cut_coul_inner * cut_coul_inner;
   cut_coulsq = cut_coul * cut_coul;
   cut_bothsq = MAX(cut_ljsq,cut_coulsq);
 
 
   cut_coulsq = cut_coul * cut_coul;
 
   cuda->shared_data.pair.cut_coulsq_global=cut_coulsq;
 
   cuda->shared_data.pppm.qqrd2e=force->qqrd2e;
 }
 
 void PairLJGromacsCoulGromacsCuda::init_list(int id, NeighList *ptr)
 {
         MYDBG(printf("# CUDA PairLJGromacsCoulGromacsCuda::init_list\n");)
         PairLJGromacsCoulGromacs::init_list(id, ptr);
         #ifndef CUDA_USE_BINNING
         // right now we can only handle verlet (id 0), not respa
         if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
         // see Neighbor::init() for details on lammps lists' logic
         #endif
         MYDBG(printf("# CUDA PairLJGromacsCoulGromacsCuda::init_list end\n");)
 }
 
 void PairLJGromacsCoulGromacsCuda::ev_setup(int eflag, int vflag)
 {
         int maxeatomold=maxeatom;
         PairLJGromacsCoulGromacs::ev_setup(eflag,vflag);
 
   if (eflag_atom && atom->nmax > maxeatomold)
         {delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_CFLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax  );}
 
   if (vflag_atom && atom->nmax > maxeatomold)
         {delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_CFLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.vatom , atom->nmax, 6  );}
 
 }
diff --git a/src/USER-CUDA/pair_lj_gromacs_cuda.cpp b/src/USER-CUDA/pair_lj_gromacs_cuda.cpp
index 6a0f965b2..b2786d81a 100644
--- a/src/USER-CUDA/pair_lj_gromacs_cuda.cpp
+++ b/src/USER-CUDA/pair_lj_gromacs_cuda.cpp
@@ -1,177 +1,177 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
 
    Original Version:
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    See the README file in the top-level LAMMPS directory.
 
    Contributing author: Paul Crozier (SNL)
    -----------------------------------------------------------------------
 
    USER-CUDA Package and associated modifications:
    https://sourceforge.net/projects/lammpscuda/
 
    Christian Trott, christian.trott@tu-ilmenau.de
    Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
    Theoretical Physics II, University of Technology Ilmenau, Germany
 
    See the README file in the USER-CUDA directory.
 
    This software is distributed under the GNU General Public License.
 ------------------------------------------------------------------------- */
 
 #include <cmath>
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
 #include "pair_lj_gromacs_cuda.h"
 #include "pair_lj_gromacs_cuda_cu.h"
 #include "cuda_data.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "kspace.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "cuda_neigh_list.h"
 #include "update.h"
 #include "integrate.h"
 #include "respa.h"
 #include "memory.h"
 #include "error.h"
 #include "user_cuda.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
 PairLJGromacsCuda::PairLJGromacsCuda(LAMMPS *lmp) : PairLJGromacs(lmp)
 {
   cuda = lmp->cuda;
    if(cuda == NULL)
         error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS..");
 
         allocated2 = false;
         cuda->shared_data.pair.cudable_force = 1;
         cuda->shared_data.pair.use_block_per_atom = 0;
         cuda->setSystemParams();
 }
 
 /* ----------------------------------------------------------------------
    remember pointer to arrays in cuda shared data
 ------------------------------------------------------------------------- */
 
 void PairLJGromacsCuda::allocate()
 {
         if(! allocated) PairLJGromacs::allocate();
         if(! allocated2)
         {
                 allocated2 = true;
                 cuda->shared_data.pair.cut = cut;
                 cuda->shared_data.pair.cut_inner = cut_inner;
                 cuda->shared_data.pair.coeff1  = lj1;
                 cuda->shared_data.pair.coeff2  = lj2;
                 cuda->shared_data.pair.coeff3  = lj3;
                 cuda->shared_data.pair.coeff4  = lj4;
                 cuda->shared_data.pair.coeff5  = ljsw1;
                 cuda->shared_data.pair.coeff6  = ljsw2;
                 cuda->shared_data.pair.coeff7  = ljsw3;
                 cuda->shared_data.pair.coeff8  = ljsw4;
                 cuda->shared_data.pair.coeff9  = ljsw5;
                 cuda->shared_data.pair.special_lj  = force->special_lj;
             cu_lj1_gm = new cCudaData<double, F_CFLOAT, x> ((double*)lj1, &cuda->shared_data.pair.coeff1_gm, (atom->ntypes+1)*(atom->ntypes+1));
             cu_lj2_gm = new cCudaData<double, F_CFLOAT, x> ((double*)lj2, &cuda->shared_data.pair.coeff2_gm, (atom->ntypes+1)*(atom->ntypes+1));
             cu_lj3_gm = new cCudaData<double, F_CFLOAT, x> ((double*)lj3, &cuda->shared_data.pair.coeff3_gm, (atom->ntypes+1)*(atom->ntypes+1));
             cu_lj4_gm = new cCudaData<double, F_CFLOAT, x> ((double*)lj4, &cuda->shared_data.pair.coeff4_gm, (atom->ntypes+1)*(atom->ntypes+1));
             cu_ljsw1_gm = new cCudaData<double, F_CFLOAT, x> ((double*)ljsw1, &cuda->shared_data.pair.coeff5_gm, (atom->ntypes+1)*(atom->ntypes+1));
             cu_ljsw2_gm = new cCudaData<double, F_CFLOAT, x> ((double*)ljsw2, &cuda->shared_data.pair.coeff6_gm, (atom->ntypes+1)*(atom->ntypes+1));
             cu_ljsw3_gm = new cCudaData<double, F_CFLOAT, x> ((double*)ljsw3, &cuda->shared_data.pair.coeff7_gm, (atom->ntypes+1)*(atom->ntypes+1));
             cu_ljsw4_gm = new cCudaData<double, F_CFLOAT, x> ((double*)ljsw4, &cuda->shared_data.pair.coeff8_gm, (atom->ntypes+1)*(atom->ntypes+1));
             cu_ljsw5_gm = new cCudaData<double, F_CFLOAT, x> ((double*)ljsw5, &cuda->shared_data.pair.coeff9_gm, (atom->ntypes+1)*(atom->ntypes+1));
         }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJGromacsCuda::compute(int eflag, int vflag)
 {
           if (eflag || vflag) ev_setup(eflag,vflag);
         if(not cuda->shared_data.pair.collect_forces_later)
         {
           if(eflag) cuda->cu_eng_vdwl->upload();
           if(vflag) cuda->cu_virial->upload();
         }
 
         Cuda_PairLJGromacsCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom);
 
         if(not cuda->shared_data.pair.collect_forces_later)
         {
           if(eflag) cuda->cu_eng_vdwl->download();
           if(vflag) cuda->cu_virial->download();
         }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJGromacsCuda::settings(int narg, char **arg)
 {
         PairLJGromacs::settings(narg, arg);
         cuda->shared_data.pair.cut_global = (F_CFLOAT) cut_global;
         cuda->shared_data.pair.cut_inner_global = (F_CFLOAT) cut_inner_global;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJGromacsCuda::coeff(int narg, char **arg)
 {
         PairLJGromacs::coeff(narg, arg);
         allocate();
 }
 
 void PairLJGromacsCuda::init_style()
 {
   // request regular or rRESPA neighbor lists
 
         if(atom->molecular)
         {
           cuda->shared_data.pair.collect_forces_later = 1;
         }
 
   int irequest;
 
-           irequest = neighbor->request(this);
+  irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->full = 1;
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->cudable = 1;
 
 
 
 }
 
 void PairLJGromacsCuda::init_list(int id, NeighList *ptr)
 {
         MYDBG(printf("# CUDA PairLJGromacsCuda::init_list\n");)
         PairLJGromacs::init_list(id, ptr);
         #ifndef CUDA_USE_BINNING
         // right now we can only handle verlet (id 0), not respa
         if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
         // see Neighbor::init() for details on lammps lists' logic
         #endif
         MYDBG(printf("# CUDA PairLJGromacsCuda::init_list end\n");)
 }
 
 void PairLJGromacsCuda::ev_setup(int eflag, int vflag)
 {
         int maxeatomold=maxeatom;
         PairLJGromacs::ev_setup(eflag,vflag);
 
   if (eflag_atom && atom->nmax > maxeatomold)
         {delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_CFLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax  );}
 
   if (vflag_atom && atom->nmax > maxeatomold)
         {delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_CFLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.vatom , atom->nmax, 6  );}
 
 }
diff --git a/src/USER-CUDA/pair_lj_sdk_coul_long_cuda.cpp b/src/USER-CUDA/pair_lj_sdk_coul_long_cuda.cpp
index f953ed0df..aca0f6d01 100644
--- a/src/USER-CUDA/pair_lj_sdk_coul_long_cuda.cpp
+++ b/src/USER-CUDA/pair_lj_sdk_coul_long_cuda.cpp
@@ -1,193 +1,193 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
 
    Original Version:
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    See the README file in the top-level LAMMPS directory.
 
    -----------------------------------------------------------------------
 
    USER-CUDA Package and associated modifications:
    https://sourceforge.net/projects/lammpscuda/
 
    Christian Trott, christian.trott@tu-ilmenau.de
    Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
    Theoretical Physics II, University of Technology Ilmenau, Germany
 
    See the README file in the USER-CUDA directory.
 
    This software is distributed under the GNU General Public License.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Paul Crozier (SNL)
 ------------------------------------------------------------------------- */
 
 #include <cmath>
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
 #include "pair_lj_sdk_coul_long_cuda.h"
 #include "pair_lj_sdk_coul_long_cuda_cu.h"
 #include "cuda_data.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "kspace.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "cuda_neigh_list.h"
 #include "update.h"
 #include "integrate.h"
 #include "respa.h"
 #include "memory.h"
 #include "error.h"
 #include "user_cuda.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
 PairLJSDKCoulLongCuda::PairLJSDKCoulLongCuda(LAMMPS *lmp) : PairLJSDKCoulLong(lmp)
 {
   cuda = lmp->cuda;
    if(cuda == NULL)
         error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS..");
 
         allocated2 = false;
         lj_type_double = NULL;
         cuda->shared_data.pair.cudable_force = 1;
         cuda->setSystemParams();
 }
 
 /* ----------------------------------------------------------------------
    remember pointer to arrays in cuda shared data
 ------------------------------------------------------------------------- */
 
 void PairLJSDKCoulLongCuda::allocate()
 {
         if(! allocated) PairLJSDKCoulLong::allocate();
         int n = atom->ntypes;
         if(! allocated2)
         {
                 allocated2 = true;
 
 
                   memory->create(lj_type_double,n+1,n+1,"pairlj:ljtypedouble");
 
                 cuda->shared_data.pair.cut     = cut_lj;
                 cuda->shared_data.pair.cut_coul= NULL;
                 cuda->shared_data.pair.coeff1  = lj1;
                 cuda->shared_data.pair.coeff2  = lj2;
                 cuda->shared_data.pair.coeff3  = lj3;
                 cuda->shared_data.pair.coeff4  = lj4;
                 cuda->shared_data.pair.coeff5  = lj_type_double;
                 cuda->shared_data.pair.offset  = offset;
                 cuda->shared_data.pair.special_lj  = force->special_lj;
                 cuda->shared_data.pair.special_coul  = force->special_coul;
 
         }
           for (int i = 1; i <= n; i++) {
       for (int j = i; j <= n; j++) {
         lj_type_double[i][j] = lj_type[i][j];
         lj_type_double[j][i] = lj_type[i][j];
       }
     }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJSDKCoulLongCuda::compute(int eflag, int vflag)
 {
         if (eflag || vflag) ev_setup(eflag,vflag);
         if(eflag) cuda->cu_eng_vdwl->upload();
         if(eflag) cuda->cu_eng_coul->upload();
         if(vflag) cuda->cu_virial->upload();
 
         Cuda_PairLJSDKCoulLongCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom);
 
     if(not cuda->shared_data.pair.collect_forces_later)
     {
           if(eflag) cuda->cu_eng_vdwl->download();
           if(eflag) cuda->cu_eng_coul->download();
           if(vflag) cuda->cu_virial->download();
     }
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJSDKCoulLongCuda::settings(int narg, char **arg)
 {
         PairLJSDKCoulLong::settings(narg, arg);
         cuda->shared_data.pair.cut_global = (F_CFLOAT) cut_lj_global;
         cuda->shared_data.pair.cut_coul_global = (F_CFLOAT) cut_coul;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJSDKCoulLongCuda::coeff(int narg, char **arg)
 {
         PairLJSDKCoulLong::coeff(narg, arg);
         allocate();
 }
 
 void PairLJSDKCoulLongCuda::init_style()
 {
         MYDBG(printf("# CUDA PairLJSDKCoulLongCuda::init_style start\n"); )
   // request regular or rRESPA neighbor lists
 
   int irequest;
 
-          irequest = neighbor->request(this);
+        irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->full = 1;
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->cudable = 1;
 
   g_ewald = force->kspace->g_ewald;
   cuda->shared_data.pair.g_ewald=g_ewald;
   cuda->shared_data.pppm.qqrd2e=force->qqrd2e;
   if (force->newton) error->warning(FLERR,"Pair style uses does not use \"newton\" setting. You might test if \"newton off\" makes the simulation run faster.");
   MYDBG(printf("# CUDA PairLJSDKCoulLongCuda::init_style end\n"); )
 }
 
 void PairLJSDKCoulLongCuda::init_list(int id, NeighList *ptr)
 {
         MYDBG(printf("# CUDA PairLJSDKCoulLongCuda::init_list\n");)
         PairLJSDKCoulLong::init_list(id, ptr);
         #ifndef CUDA_USE_BINNING
         // right now we can only handle verlet (id 0), not respa
         if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
         // see Neighbor::init() for details on lammps lists' logic
         #endif
         MYDBG(printf("# CUDA PairLJSDKCoulLongCuda::init_list end\n");)
 }
 
 void PairLJSDKCoulLongCuda::ev_setup(int eflag, int vflag)
 {
         int maxeatomold=maxeatom;
         PairLJSDKCoulLong::ev_setup(eflag,vflag);
 
   if (eflag_atom && atom->nmax > maxeatomold)
         {delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_CFLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax  );}
 
   if (vflag_atom && atom->nmax > maxeatomold)
         {delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_CFLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.vatom , atom->nmax, 6  );}
 
 }
diff --git a/src/USER-CUDA/pair_lj_sdk_cuda.cpp b/src/USER-CUDA/pair_lj_sdk_cuda.cpp
index f3c9f39cf..f6eba1ba7 100644
--- a/src/USER-CUDA/pair_lj_sdk_cuda.cpp
+++ b/src/USER-CUDA/pair_lj_sdk_cuda.cpp
@@ -1,184 +1,184 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
 
    Original Version:
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    See the README file in the top-level LAMMPS directory.
 
    -----------------------------------------------------------------------
 
    USER-CUDA Package and associated modifications:
    https://sourceforge.net/projects/lammpscuda/
 
    Christian Trott, christian.trott@tu-ilmenau.de
    Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
    Theoretical Physics II, University of Technology Ilmenau, Germany
 
    See the README file in the USER-CUDA directory.
 
    This software is distributed under the GNU General Public License.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Paul Crozier (SNL)
 ------------------------------------------------------------------------- */
 
 #include <cmath>
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
 #include "pair_lj_sdk_cuda.h"
 #include "pair_lj_sdk_cuda_cu.h"
 #include "cuda_data.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "cuda_neigh_list.h"
 #include "update.h"
 #include "integrate.h"
 #include "respa.h"
 #include "memory.h"
 #include "error.h"
 #include "user_cuda.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
 PairLJSDKCuda::PairLJSDKCuda(LAMMPS *lmp) : PairLJSDK(lmp)
 {
   cuda = lmp->cuda;
    if(cuda == NULL)
         error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS..");
 
         allocated2 = false;
         lj_type_double = NULL;
         cuda->shared_data.pair.cudable_force = 1;
         cuda->setSystemParams();
 }
 
 /* ----------------------------------------------------------------------
    remember pointer to arrays in cuda shared data
 ------------------------------------------------------------------------- */
 
 void PairLJSDKCuda::allocate()
 {
         if(! allocated) PairLJSDK::allocate();
         int n = atom->ntypes;
         if(! allocated2)
         {
                 allocated2 = true;
 
 
                   memory->create(lj_type_double,n+1,n+1,"pairlj:ljtypedouble");
 
                 cuda->shared_data.pair.cut     = cut;
                 cuda->shared_data.pair.coeff1  = lj1;
                 cuda->shared_data.pair.coeff2  = lj2;
                 cuda->shared_data.pair.coeff3  = lj3;
                 cuda->shared_data.pair.coeff4  = lj4;
                 cuda->shared_data.pair.coeff5  = lj_type_double;
             /*cu_lj1_gm = new cCudaData<double, F_CFLOAT, x> ((double*)lj1, &cuda->shared_data.pair.coeff1_gm, (atom->ntypes+1)*(atom->ntypes+1));
             cu_lj2_gm = new cCudaData<double, F_CFLOAT, x> ((double*)lj2, &cuda->shared_data.pair.coeff2_gm, (atom->ntypes+1)*(atom->ntypes+1));
             cu_lj3_gm = new cCudaData<double, F_CFLOAT, x> ((double*)lj3, &cuda->shared_data.pair.coeff3_gm, (atom->ntypes+1)*(atom->ntypes+1));
             cu_lj4_gm = new cCudaData<double, F_CFLOAT, x> ((double*)lj4, &cuda->shared_data.pair.coeff4_gm, (atom->ntypes+1)*(atom->ntypes+1));
             cu_lj_type_double_gm = new cCudaData<double, F_CFLOAT, x> ((double*)lj_type_double, &cuda->shared_data.pair.coeff5_gm, (atom->ntypes+1)*(atom->ntypes+1));*/
                 cuda->shared_data.pair.offset  = offset;
                 cuda->shared_data.pair.special_lj  = force->special_lj;
         }
           for (int i = 1; i <= n; i++) {
       for (int j = i; j <= n; j++) {
         lj_type_double[i][j] = lj_type[i][j];
         lj_type_double[j][i] = lj_type[i][j];
       }
     }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJSDKCuda::compute(int eflag, int vflag)
 {
         if (eflag || vflag) ev_setup(eflag,vflag);
         if(eflag) cuda->cu_eng_vdwl->upload();
         if(vflag) cuda->cu_virial->upload();
 
         Cuda_PairLJSDKCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom);
 
     if(not cuda->shared_data.pair.collect_forces_later)
     {
           if(eflag) cuda->cu_eng_vdwl->download();
           if(vflag) cuda->cu_virial->download();
     }
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJSDKCuda::settings(int narg, char **arg)
 {
         PairLJSDK::settings(narg, arg);
         cuda->shared_data.pair.cut_global = (F_CFLOAT) cut_global;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJSDKCuda::coeff(int narg, char **arg)
 {
         PairLJSDK::coeff(narg, arg);
         allocate();
 }
 
 void PairLJSDKCuda::init_style()
 {
   MYDBG(printf("# CUDA PairLJSDKCuda::init_style start\n"); )
 
-  int irequest = neighbor->request(this);
+    int irequest = neighbor->request(this,instance_me);
   neighbor->requests[irequest]->full = 1;
   neighbor->requests[irequest]->half = 0;
   neighbor->requests[irequest]->cudable = 1;
 
   MYDBG(printf("# CUDA PairLJSDKCuda::init_style end\n"); )
 }
 
 void PairLJSDKCuda::init_list(int id, NeighList *ptr)
 {
         MYDBG(printf("# CUDA PairLJSDKCuda::init_list\n");)
         PairLJSDK::init_list(id, ptr);
         #ifndef CUDA_USE_BINNING
         // right now we can only handle verlet (id 0), not respa
         if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
         // see Neighbor::init() for details on lammps lists' logic
         #endif
         MYDBG(printf("# CUDA PairLJSDKCuda::init_list end\n");)
 }
 
 void PairLJSDKCuda::ev_setup(int eflag, int vflag)
 {
         int maxeatomold=maxeatom;
         PairLJSDK::ev_setup(eflag,vflag);
 
   if (eflag_atom && atom->nmax > maxeatomold)
         {delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_CFLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax  );}
 
   if (vflag_atom && atom->nmax > maxeatomold)
         {delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_CFLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.vatom , atom->nmax, 6  );}
 
 }
diff --git a/src/USER-CUDA/pair_lj_smooth_cuda.cpp b/src/USER-CUDA/pair_lj_smooth_cuda.cpp
index 89f4cc4b7..3a51e94fe 100644
--- a/src/USER-CUDA/pair_lj_smooth_cuda.cpp
+++ b/src/USER-CUDA/pair_lj_smooth_cuda.cpp
@@ -1,177 +1,177 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
 
    Original Version:
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    See the README file in the top-level LAMMPS directory.
 
    Contributing author: Paul Crozier (SNL)
    -----------------------------------------------------------------------
 
    USER-CUDA Package and associated modifications:
    https://sourceforge.net/projects/lammpscuda/
 
    Christian Trott, christian.trott@tu-ilmenau.de
    Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
    Theoretical Physics II, University of Technology Ilmenau, Germany
 
    See the README file in the USER-CUDA directory.
 
    This software is distributed under the GNU General Public License.
 ------------------------------------------------------------------------- */
 
 #include <cmath>
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
 #include "pair_lj_smooth_cuda.h"
 #include "pair_lj_smooth_cuda_cu.h"
 #include "cuda_data.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "kspace.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "cuda_neigh_list.h"
 #include "update.h"
 #include "integrate.h"
 #include "respa.h"
 #include "memory.h"
 #include "error.h"
 #include "user_cuda.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
 PairLJSmoothCuda::PairLJSmoothCuda(LAMMPS *lmp) : PairLJSmooth(lmp)
 {
   cuda = lmp->cuda;
    if(cuda == NULL)
         error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS..");
 
         allocated2 = false;
         cuda->shared_data.pair.cudable_force = 1;
         cuda->shared_data.pair.use_block_per_atom = 0;
         cuda->setSystemParams();
 }
 
 /* ----------------------------------------------------------------------
    remember pointer to arrays in cuda shared data
 ------------------------------------------------------------------------- */
 
 void PairLJSmoothCuda::allocate()
 {
         if(! allocated) PairLJSmooth::allocate();
         if(! allocated2)
         {
                 allocated2 = true;
                 cuda->shared_data.pair.cut = cut;
                 cuda->shared_data.pair.cut_inner = cut_inner;
                 cuda->shared_data.pair.coeff1  = lj1;
                 cuda->shared_data.pair.coeff2  = lj2;
                 cuda->shared_data.pair.coeff3  = lj3;
                 cuda->shared_data.pair.coeff4  = lj4;
                 cuda->shared_data.pair.coeff5  = ljsw1;
                 cuda->shared_data.pair.coeff6  = ljsw2;
                 cuda->shared_data.pair.coeff7  = ljsw3;
                 cuda->shared_data.pair.coeff8  = ljsw4;
                 cuda->shared_data.pair.coeff9  = ljsw0;
                 cuda->shared_data.pair.special_lj  = force->special_lj;
             cu_lj1_gm = new cCudaData<double, F_CFLOAT, x> ((double*)lj1, &cuda->shared_data.pair.coeff1_gm, (atom->ntypes+1)*(atom->ntypes+1));
             cu_lj2_gm = new cCudaData<double, F_CFLOAT, x> ((double*)lj2, &cuda->shared_data.pair.coeff2_gm, (atom->ntypes+1)*(atom->ntypes+1));
             cu_lj3_gm = new cCudaData<double, F_CFLOAT, x> ((double*)lj3, &cuda->shared_data.pair.coeff3_gm, (atom->ntypes+1)*(atom->ntypes+1));
             cu_lj4_gm = new cCudaData<double, F_CFLOAT, x> ((double*)lj4, &cuda->shared_data.pair.coeff4_gm, (atom->ntypes+1)*(atom->ntypes+1));
             cu_ljsw0_gm = new cCudaData<double, F_CFLOAT, x> ((double*)ljsw0, &cuda->shared_data.pair.coeff9_gm, (atom->ntypes+1)*(atom->ntypes+1));
             cu_ljsw1_gm = new cCudaData<double, F_CFLOAT, x> ((double*)ljsw1, &cuda->shared_data.pair.coeff5_gm, (atom->ntypes+1)*(atom->ntypes+1));
             cu_ljsw2_gm = new cCudaData<double, F_CFLOAT, x> ((double*)ljsw2, &cuda->shared_data.pair.coeff6_gm, (atom->ntypes+1)*(atom->ntypes+1));
             cu_ljsw3_gm = new cCudaData<double, F_CFLOAT, x> ((double*)ljsw3, &cuda->shared_data.pair.coeff7_gm, (atom->ntypes+1)*(atom->ntypes+1));
             cu_ljsw4_gm = new cCudaData<double, F_CFLOAT, x> ((double*)ljsw4, &cuda->shared_data.pair.coeff8_gm, (atom->ntypes+1)*(atom->ntypes+1));
         }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJSmoothCuda::compute(int eflag, int vflag)
 {
           if (eflag || vflag) ev_setup(eflag,vflag);
         if(not cuda->shared_data.pair.collect_forces_later)
         {
           if(eflag) cuda->cu_eng_vdwl->upload();
           if(vflag) cuda->cu_virial->upload();
         }
 
         Cuda_PairLJSmoothCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom);
 
         if(not cuda->shared_data.pair.collect_forces_later)
         {
           if(eflag) cuda->cu_eng_vdwl->download();
           if(vflag) cuda->cu_virial->download();
         }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJSmoothCuda::settings(int narg, char **arg)
 {
         PairLJSmooth::settings(narg, arg);
         cuda->shared_data.pair.cut_global = (F_CFLOAT) cut_global;
         cuda->shared_data.pair.cut_inner_global = (F_CFLOAT) cut_inner_global;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJSmoothCuda::coeff(int narg, char **arg)
 {
         PairLJSmooth::coeff(narg, arg);
         allocate();
 }
 
 void PairLJSmoothCuda::init_style()
 {
   // request regular or rRESPA neighbor lists
 
         if(atom->molecular)
         {
           cuda->shared_data.pair.collect_forces_later = 1;
         }
 
   int irequest;
 
-           irequest = neighbor->request(this);
+  irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->full = 1;
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->cudable = 1;
 
 
 
 }
 
 void PairLJSmoothCuda::init_list(int id, NeighList *ptr)
 {
         MYDBG(printf("# CUDA PairLJSmoothCuda::init_list\n");)
         PairLJSmooth::init_list(id, ptr);
         #ifndef CUDA_USE_BINNING
         // right now we can only handle verlet (id 0), not respa
         if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
         // see Neighbor::init() for details on lammps lists' logic
         #endif
         MYDBG(printf("# CUDA PairLJSmoothCuda::init_list end\n");)
 }
 
 void PairLJSmoothCuda::ev_setup(int eflag, int vflag)
 {
         int maxeatomold=maxeatom;
         PairLJSmooth::ev_setup(eflag,vflag);
 
   if (eflag_atom && atom->nmax > maxeatomold)
         {delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_CFLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax  );}
 
   if (vflag_atom && atom->nmax > maxeatomold)
         {delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_CFLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.vatom , atom->nmax, 6  );}
 
 }
diff --git a/src/USER-CUDA/pair_morse_cuda.cpp b/src/USER-CUDA/pair_morse_cuda.cpp
index 6b39ccb08..a38712aab 100644
--- a/src/USER-CUDA/pair_morse_cuda.cpp
+++ b/src/USER-CUDA/pair_morse_cuda.cpp
@@ -1,177 +1,177 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
 
    Original Version:
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    See the README file in the top-level LAMMPS directory.
 
    -----------------------------------------------------------------------
 
    USER-CUDA Package and associated modifications:
    https://sourceforge.net/projects/lammpscuda/
 
    Christian Trott, christian.trott@tu-ilmenau.de
    Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
    Theoretical Physics II, University of Technology Ilmenau, Germany
 
    See the README file in the USER-CUDA directory.
 
    This software is distributed under the GNU General Public License.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Paul Crozier (SNL)
 ------------------------------------------------------------------------- */
 
 #include <cmath>
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
 #include "pair_morse_cuda.h"
 #include "pair_morse_cuda_cu.h"
 #include "cuda_data.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "cuda_neigh_list.h"
 #include "update.h"
 #include "integrate.h"
 #include "respa.h"
 #include "memory.h"
 #include "error.h"
 #include "user_cuda.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
 PairMorseCuda::PairMorseCuda(LAMMPS *lmp) : PairMorse(lmp)
 {
   cuda = lmp->cuda;
    if(cuda == NULL)
         error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS..");
 
         allocated2 = false;
         cuda->shared_data.pair.cudable_force = 1;
         cuda->setSystemParams();
 }
 
 /* ----------------------------------------------------------------------
    remember pointer to arrays in cuda shared data
 ------------------------------------------------------------------------- */
 
 void PairMorseCuda::allocate()
 {
         if(! allocated) PairMorse::allocate();
         if(! allocated2)
         {
                 allocated2 = true;
                 cuda->shared_data.pair.cut     = cut;
                 cuda->shared_data.pair.coeff1  = r0;
                 cuda->shared_data.pair.coeff2  = alpha;
                 cuda->shared_data.pair.coeff3  = morse1;
                 cuda->shared_data.pair.coeff4  = d0;
                 cuda->shared_data.pair.offset  = offset;
                 cuda->shared_data.pair.special_lj  = force->special_lj;
         }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairMorseCuda::compute(int eflag, int vflag)
 {
         if (eflag || vflag) ev_setup(eflag,vflag);
         if(eflag) cuda->cu_eng_vdwl->upload();
         if(vflag) cuda->cu_virial->upload();
 
         Cuda_PairMorseCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom);
 
     if(not cuda->shared_data.pair.collect_forces_later)
     {
           if(eflag) cuda->cu_eng_vdwl->download();
           if(vflag) cuda->cu_virial->download();
     }
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairMorseCuda::settings(int narg, char **arg)
 {
         PairMorse::settings(narg, arg);
         cuda->shared_data.pair.cut_global = (F_CFLOAT) cut_global;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairMorseCuda::coeff(int narg, char **arg)
 {
         PairMorse::coeff(narg, arg);
         allocate();
 }
 
 void PairMorseCuda::init_style()
 {
         MYDBG(printf("# CUDA PairMorseCuda::init_style start\n"); )
   // request regular or rRESPA neighbor lists
 
   int irequest;
 
   if (update->whichflag == 0 && strstr(update->integrate_style,"respa")) {
 
   }
   else
   {
-          irequest = neighbor->request(this);
+    irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->full = 1;
     neighbor->requests[irequest]->half = 0;
     neighbor->requests[irequest]->cudable = 1;
     //neighbor->style=0; //0=NSQ neighboring
   }
 
 
   MYDBG(printf("# CUDA PairMorseCuda::init_style end\n"); )
 }
 
 void PairMorseCuda::init_list(int id, NeighList *ptr)
 {
         MYDBG(printf("# CUDA PairMorseCuda::init_list\n");)
         PairMorse::init_list(id, ptr);
         #ifndef CUDA_USE_BINNING
         // right now we can only handle verlet (id 0), not respa
         if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
         // see Neighbor::init() for details on lammps lists' logic
         #endif
         MYDBG(printf("# CUDA PairMorseCuda::init_list end\n");)
 }
 
 void PairMorseCuda::ev_setup(int eflag, int vflag)
 {
         int maxeatomold=maxeatom;
         PairMorse::ev_setup(eflag,vflag);
 
   if (eflag_atom && atom->nmax > maxeatomold)
         {delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_CFLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax  );}
 
   if (vflag_atom && atom->nmax > maxeatomold)
         {delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_CFLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.vatom , atom->nmax, 6  );}
 
 }
diff --git a/src/USER-CUDA/pair_sw_cuda.cpp b/src/USER-CUDA/pair_sw_cuda.cpp
index 534e14599..1dfccc4e7 100644
--- a/src/USER-CUDA/pair_sw_cuda.cpp
+++ b/src/USER-CUDA/pair_sw_cuda.cpp
@@ -1,207 +1,207 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
 
    Original Version:
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    See the README file in the top-level LAMMPS directory.
 
    -----------------------------------------------------------------------
 
    USER-CUDA Package and associated modifications:
    https://sourceforge.net/projects/lammpscuda/
 
    Christian Trott, christian.trott@tu-ilmenau.de
    Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
    Theoretical Physics II, University of Technology Ilmenau, Germany
 
    See the README file in the USER-CUDA directory.
 
    This software is distributed under the GNU General Public License.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Paul Crozier (SNL)
 ------------------------------------------------------------------------- */
 
 #include <cmath>
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
 #include "pair_sw_cuda.h"
 #include "cuda_data.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "cuda_neigh_list.h"
 #include "update.h"
 #include "integrate.h"
 #include "respa.h"
 #include "memory.h"
 #include "error.h"
 #include "user_cuda.h"
 
 using namespace LAMMPS_NS;
 
 
 
 
 /* ---------------------------------------------------------------------- */
 
 PairSWCuda::PairSWCuda(LAMMPS *lmp) : PairSW(lmp)
 {
   cuda = lmp->cuda;
   if(cuda == NULL)
         error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS..");
 
         allocated2 = false;
         params_f = NULL;
         cuda->setSystemParams();
   cuda->shared_data.pair.cudable_force = 1;
   cuda->shared_data.pair.override_block_per_atom = 0;
   cuda->shared_data.pair.neighall = true;
   init = false;
 }
 
 /* ----------------------------------------------------------------------
    remember pointer to arrays in cuda shared data
 ------------------------------------------------------------------------- */
 
 void PairSWCuda::allocate()
 {
         if(! allocated) PairSW::allocate();
         if(! allocated2)
         {
                 allocated2 = true;
                 cuda->shared_data.pair.cutsq   = cutsq;
                 cuda->shared_data.pair.special_lj  = force->special_lj;
                 cuda->shared_data.pair.special_coul  = force->special_coul;
         }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairSWCuda::compute(int eflag, int vflag)
 {
   if(!init) {Cuda_PairSWCuda_Init(&cuda->shared_data,params_f,map, &elem2param[0][0][0],nelements); init=true;}
         if (eflag || vflag) ev_setup(eflag,vflag);
         if(eflag) cuda->cu_eng_vdwl->upload();
         if(vflag) cuda->cu_virial->upload();
 
         Cuda_PairSWCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom);//,&elem2param[0][0][0],map
   if(not cuda->shared_data.pair.collect_forces_later)
   {
           if(eflag) cuda->cu_eng_vdwl->download();
           if(vflag) cuda->cu_virial->download();
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairSWCuda::settings(int narg, char **arg)
 {
         PairSW::settings(narg, arg);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairSWCuda::coeff(int narg, char **arg)
 {
         PairSW::coeff(narg, arg);
         allocate();
   params_f = (ParamSW_Float *) memory->srealloc(params_f,maxparam*sizeof(ParamSW_Float),
         "pair:params_f");
   for(int i=0;i<maxparam;i++)
   {
     printf("%e %e\n",params[i].cut,params[i].cutsq);
     params_f[i].cut = params[i].cut;
     params_f[i].cutsq = params[i].cutsq;
     params_f[i].c1 = params[i].c1;
     params_f[i].c2 = params[i].c2;
     params_f[i].c3 = params[i].c3;
     params_f[i].c4 = params[i].c4;
     params_f[i].c5 = params[i].c5;
     params_f[i].c6 = params[i].c6;
     params_f[i].ielement = params[i].ielement;
     params_f[i].jelement = params[i].jelement;
     params_f[i].kelement = params[i].kelement;
     params_f[i].epsilon = params[i].epsilon;
     params_f[i].sigma = params[i].sigma;
     params_f[i].littlea = params[i].littlea;
     params_f[i].lambda = params[i].lambda;
     params_f[i].costheta = params[i].costheta;
     params_f[i].tol = params[i].tol;
     params_f[i].sigma_gamma = params[i].sigma_gamma;
     params_f[i].lambda_epsilon = params[i].lambda_epsilon;
     params_f[i].lambda_epsilon2 = params[i].lambda_epsilon2;
     params_f[i].gamma = params[i].gamma;
 
     params_f[i].biga = params[i].biga;
     params_f[i].bigb = params[i].bigb;
     params_f[i].gamma = params[i].gamma;
     params_f[i].powerp = params[i].powerp;
     params_f[i].powerq = params[i].powerq;
   }
   cuda->shared_data.pair.cut_global = cutmax;
 }
 
 void PairSWCuda::init_style()
 {
         MYDBG(printf("# CUDA PairSWCuda::init_style start\n"); )
 
   int irequest;
 
-        irequest = neighbor->request(this);
+        irequest = neighbor->request(this,instance_me);
   neighbor->requests[irequest]->full = 1;
   neighbor->requests[irequest]->half = 0;
   neighbor->requests[irequest]->cudable = 1;
   neighbor->requests[irequest]->ghost = 1;
 
 
   MYDBG(printf("# CUDA PairSWCuda::init_style end\n"); )
 }
 
 void PairSWCuda::init_list(int id, NeighList *ptr)
 {
         MYDBG(printf("# CUDA PairSWCuda::init_list\n");)
         PairSW::init_list(id, ptr);
         // right now we can only handle verlet (id 0), not respa
         if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
         // see Neighbor::init() for details on lammps lists' logic
         MYDBG(printf("# CUDA PairSWCuda::init_list end\n");)
   cu_params_f = (ParamSW_Float*) CudaWrapper_AllocCudaData(sizeof(ParamSW_Float)*maxparam);
   CudaWrapper_UploadCudaData((void*) params_f,(void*) cu_params_f,sizeof(ParamSW_Float)*maxparam);
   cu_elem2param = new cCudaData<int, int, xyz > ((int*) elem2param, nelements,nelements,nelements);
   cu_elem2param->upload();
   cu_map = new cCudaData<int, int, x > ( map,atom->ntypes+1 );
   cu_map->upload();
 }
 
 void PairSWCuda::ev_setup(int eflag, int vflag)
 {
         int maxeatomold=maxeatom;
         PairSW::ev_setup(eflag,vflag);
 
   if (eflag_atom && atom->nmax > maxeatomold)
         {delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_CFLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax  );}
 
   if (vflag_atom && atom->nmax > maxeatomold)
         {delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_CFLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.vatom , atom->nmax, 6  );}
 }
diff --git a/src/USER-CUDA/pair_tersoff_cuda.cpp b/src/USER-CUDA/pair_tersoff_cuda.cpp
index 2d0aadb54..f22b55128 100644
--- a/src/USER-CUDA/pair_tersoff_cuda.cpp
+++ b/src/USER-CUDA/pair_tersoff_cuda.cpp
@@ -1,204 +1,204 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
 
    Original Version:
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    See the README file in the top-level LAMMPS directory.
 
    -----------------------------------------------------------------------
 
    USER-CUDA Package and associated modifications:
    https://sourceforge.net/projects/lammpscuda/
 
    Christian Trott, christian.trott@tu-ilmenau.de
    Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
    Theoretical Physics II, University of Technology Ilmenau, Germany
 
    See the README file in the USER-CUDA directory.
 
    This software is distributed under the GNU General Public License.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Paul Crozier (SNL)
 ------------------------------------------------------------------------- */
 
 #include <cmath>
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
 #include "pair_tersoff_cuda.h"
 #include "cuda_data.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "cuda_neigh_list.h"
 #include "update.h"
 #include "integrate.h"
 #include "respa.h"
 #include "memory.h"
 #include "error.h"
 #include "user_cuda.h"
 
 using namespace LAMMPS_NS;
 
 
 
 
 /* ---------------------------------------------------------------------- */
 
 PairTersoffCuda::PairTersoffCuda(LAMMPS *lmp) : PairTersoff(lmp)
 {
   cuda = lmp->cuda;
   if(cuda == NULL)
         error->all(FLERR,"You cannot use a /cuda class, without activating 'cuda' acceleration. Provide '-c on' as command-line argument to LAMMPS..");
 
         allocated2 = false;
         params_f = NULL;
         cuda->setSystemParams();
   cuda->shared_data.pair.cudable_force = 1;
   cuda->shared_data.pair.override_block_per_atom = 0;
   cuda->shared_data.pair.neighall = true;
   init = false;
   iszbl = false;
 }
 
 /* ----------------------------------------------------------------------
    remember pointer to arrays in cuda shared data
 ------------------------------------------------------------------------- */
 
 void PairTersoffCuda::allocate()
 {
         if(! allocated) PairTersoff::allocate();
         if(! allocated2)
         {
                 allocated2 = true;
                 cuda->shared_data.pair.cutsq   = cutsq;
                 cuda->shared_data.pair.special_lj  = force->special_lj;
                 cuda->shared_data.pair.special_coul  = force->special_coul;
         }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairTersoffCuda::compute(int eflag, int vflag)
 {
   if(!init) {Cuda_PairTersoffCuda_Init(&cuda->shared_data,params_f,map, &elem2param[0][0][0],nelements,iszbl); init=true;}
         if (eflag || vflag) ev_setup(eflag,vflag);
         if(eflag) cuda->cu_eng_vdwl->upload();
         if(vflag) cuda->cu_virial->upload();
 
         Cuda_PairTersoffCuda(& cuda->shared_data, & cuda_neigh_list->sneighlist, eflag, vflag, eflag_atom, vflag_atom);//,&elem2param[0][0][0],map
   if(not cuda->shared_data.pair.collect_forces_later)
   {
           if(eflag) cuda->cu_eng_vdwl->download();
           if(vflag) cuda->cu_virial->download();
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairTersoffCuda::settings(int narg, char **arg)
 {
         PairTersoff::settings(narg, arg);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairTersoffCuda::coeff(int narg, char **arg)
 {
         PairTersoff::coeff(narg, arg);
         allocate();
   params_f = (Param_Float *) memory->srealloc(params_f,maxparam*sizeof(Param_Float),
         "pair:params_f");
   for(int i=0;i<maxparam;i++)
   {
     params_f[i].lam1 = params[i].lam1;
     params_f[i].lam2 = params[i].lam2;
     params_f[i].lam3 = params[i].lam3;
     params_f[i].c = params[i].c;
     params_f[i].d = params[i].d;
     params_f[i].h = params[i].h;
     params_f[i].gamma = params[i].gamma;
     params_f[i].powerm = params[i].powerm;
     params_f[i].powern = params[i].powern;
     params_f[i].beta = params[i].beta;
     params_f[i].biga = params[i].biga;
     params_f[i].bigb = params[i].bigb;
     params_f[i].bigd = params[i].bigd;
     params_f[i].bigr = params[i].bigr;
     params_f[i].cut = params[i].cut;
     params_f[i].cutsq = params[i].cutsq;
     params_f[i].c1 = params[i].c1;
     params_f[i].c2 = params[i].c2;
     params_f[i].c3 = params[i].c3;
     params_f[i].c4 = params[i].c4;
     params_f[i].ielement = params[i].ielement;
     params_f[i].jelement = params[i].jelement;
     params_f[i].kelement = params[i].kelement;
     params_f[i].powermint = params[i].powermint;
   }
   cuda->shared_data.pair.cut_global = cutmax;
 }
 
 void PairTersoffCuda::init_style()
 {
         MYDBG(printf("# CUDA PairTersoffCuda::init_style start\n"); )
 
   int irequest;
 
-        irequest = neighbor->request(this);
+        irequest = neighbor->request(this,instance_me);
   neighbor->requests[irequest]->full = 1;
   neighbor->requests[irequest]->half = 0;
   neighbor->requests[irequest]->cudable = 1;
   neighbor->requests[irequest]->ghost = 1;
 
 
   MYDBG(printf("# CUDA PairTersoffCuda::init_style end\n"); )
 }
 
 void PairTersoffCuda::init_list(int id, NeighList *ptr)
 {
         MYDBG(printf("# CUDA PairTersoffCuda::init_list\n");)
         PairTersoff::init_list(id, ptr);
         // right now we can only handle verlet (id 0), not respa
         if(id == 0) cuda_neigh_list = cuda->registerNeighborList(ptr);
         // see Neighbor::init() for details on lammps lists' logic
         MYDBG(printf("# CUDA PairTersoffCuda::init_list end\n");)
   cu_params_f = (Param_Float*) CudaWrapper_AllocCudaData(sizeof(Param_Float)*maxparam);
   CudaWrapper_UploadCudaData((void*) params_f,(void*) cu_params_f,sizeof(Param_Float)*maxparam);
   cu_elem2param = new cCudaData<int, int, xyz > ((int*) elem2param, nelements,nelements,nelements);
   cu_elem2param->upload();
   cu_map = new cCudaData<int, int, x > ( map,atom->ntypes+1 );
   cu_map->upload();
 }
 
 void PairTersoffCuda::ev_setup(int eflag, int vflag)
 {
         int maxeatomold=maxeatom;
         PairTersoff::ev_setup(eflag,vflag);
 
   if (eflag_atom && atom->nmax > maxeatomold)
         {delete cuda->cu_eatom; cuda->cu_eatom = new cCudaData<double, ENERGY_CFLOAT, x > ((double*)eatom, & cuda->shared_data.atom.eatom , atom->nmax  );}
 
   if (vflag_atom && atom->nmax > maxeatomold)
         {delete cuda->cu_vatom; cuda->cu_vatom = new cCudaData<double, ENERGY_CFLOAT, yx > ((double*)vatom, & cuda->shared_data.atom.vatom , atom->nmax, 6  );}
 }
diff --git a/src/USER-EFF/pair_eff_cut.cpp b/src/USER-EFF/pair_eff_cut.cpp
index 06b596231..d8635c10a 100644
--- a/src/USER-EFF/pair_eff_cut.cpp
+++ b/src/USER-EFF/pair_eff_cut.cpp
@@ -1,1073 +1,1073 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Andres Jaramillo-Botero
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_eff_cut.h"
 #include "pair_eff_inline.h"
 #include "atom.h"
 #include "update.h"
 #include "min.h"
 #include "domain.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "memory.h"
 #include "error.h"
 #include "atom_vec_electron.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
 PairEffCut::PairEffCut(LAMMPS *lmp) : Pair(lmp)
 {
   single_enable = 0;
 
   nmax = 0;
   min_eradius = NULL;
   min_erforce = NULL;
   nextra = 4;
   pvector = new double[nextra];
 }
 
 /* ---------------------------------------------------------------------- */
 
 PairEffCut::~PairEffCut()
 {
   delete [] pvector;
   memory->destroy(min_eradius);
   memory->destroy(min_erforce);
 
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
     memory->destroy(cut);
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairEffCut::compute(int eflag, int vflag)
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,energy;
   double eke,ecoul,epauli,errestrain,halfcoul,halfpauli;
   double fpair,fx,fy,fz;
   double e1rforce,e2rforce,e1rvirial,e2rvirial;
   double s_fpair, s_e1rforce, s_e2rforce;
   double ecp_epauli, ecp_fpair, ecp_e1rforce, ecp_e2rforce;
   double rsq,rc;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   energy = eke = epauli = ecp_epauli = ecoul = errestrain = 0.0;
   // pvector = [KE, Pauli, ecoul, radial_restraint]
   for (i=0; i<4; i++) pvector[i] = 0.0;
 
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   double *erforce = atom->erforce;
   double *eradius = atom->eradius;
   int *spin = atom->spin;
   int *type = atom->type;
   int nlocal = atom->nlocal;
 
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     // add electron wavefuntion kinetic energy (not pairwise)
 
     if (abs(spin[i])==1 || spin[i]==2) {
       // reset energy and force temp variables
       eke = epauli = ecoul = 0.0;
       fpair = e1rforce = e2rforce = 0.0;
       s_fpair = 0.0;
 
       KinElec(eradius[i],&eke,&e1rforce);
 
       // Fixed-core
       if (spin[i] == 2) {
         // KE(2s)+Coul(1s-1s)+Coul(2s-nuclei)+Pauli(2s)
         eke *= 2;
         ElecNucElec(q[i],0.0,eradius[i],&ecoul,&fpair,&e1rforce);
         ElecNucElec(q[i],0.0,eradius[i],&ecoul,&fpair,&e1rforce);
         ElecElecElec(0.0,eradius[i],eradius[i],&ecoul,&fpair,&e1rforce,&e2rforce);
 
         // opposite spin electron interactions
         PauliElecElec(0,0.0,eradius[i],eradius[i],
             &epauli,&s_fpair,&e1rforce,&e2rforce);
 
         // fix core electron size, i.e. don't contribute to ervirial
         e2rforce = e1rforce = 0.0;
       }
 
       // apply unit conversion factors
       eke *= hhmss2e;
       ecoul *= qqrd2e;
       fpair *= qqrd2e;
       epauli *= hhmss2e;
       s_fpair *= hhmss2e;
       e1rforce *= hhmss2e;
 
       // Sum up contributions
       energy = eke + epauli + ecoul;
       fpair = fpair + s_fpair;
 
       erforce[i] += e1rforce;
 
       // Tally energy and compute radial atomic virial contribution
       if (evflag) {
         ev_tally_eff(i,i,nlocal,newton_pair,energy,0.0);
         if (pressure_with_evirials_flag) // iff flexible pressure flag on
           ev_tally_eff(i,i,nlocal,newton_pair,0.0,e1rforce*eradius[i]);
       }
       if (eflag_global) {
         pvector[0] += eke;
         pvector[1] += epauli;
         pvector[2] += ecoul;
       }
     }
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       rc = sqrt(rsq);
 
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
 
         energy = ecoul = epauli = ecp_epauli = 0.0;
         fx = fy = fz = fpair = s_fpair = ecp_fpair = 0.0;
 
         double taper = sqrt(cutsq[itype][jtype]);
         double dist = rc / taper;
         double spline = cutoff(dist);
         double dspline = dcutoff(dist) / taper;
 
         // nucleus (i) - nucleus (j) Coul interaction
 
         if (spin[i] == 0 && spin[j] == 0) {
           double qxq = q[i]*q[j];
 
           ElecNucNuc(qxq, rc, &ecoul, &fpair);
         }
 
         // fixed-core (i) - nucleus (j) nuclear Coul interaction
         else if (spin[i] == 2 && spin[j] == 0) {
           double qxq = q[i]*q[j];
           e1rforce = 0.0;
 
           ElecNucNuc(qxq, rc, &ecoul, &fpair);
           ElecNucElec(q[j],rc,eradius[i],&ecoul,&fpair,&e1rforce);
           ElecNucElec(q[j],rc,eradius[i],&ecoul,&fpair,&e1rforce);
         }
 
         // nucleus (i) - fixed-core (j) nuclear Coul interaction
         else if (spin[i] == 0 && spin[j] == 2) {
           double qxq = q[i]*q[j];
           e1rforce = 0.0;
 
           ElecNucNuc(qxq, rc, &ecoul, &fpair);
           ElecNucElec(q[i],rc,eradius[j],&ecoul,&fpair,&e1rforce);
           ElecNucElec(q[i],rc,eradius[j],&ecoul,&fpair,&e1rforce);
         }
 
         // pseudo-core nucleus (i) - nucleus (j) interaction
         else if (spin[i] == 3 && spin[j] == 0) {
           double qxq = q[i]*q[j];
 
           ElecCoreNuc(qxq, rc, eradius[i], &ecoul, &fpair);
         }
 
         else if (spin[i] == 4 && spin[j] == 0) {
           double qxq = q[i]*q[j];
 
           ElecCoreNuc(qxq, rc, eradius[i], &ecoul, &fpair);
         }
 
         // nucleus (i) - pseudo-core nucleus (j) interaction
         else if (spin[i] == 0 && spin[j] == 3) {
           double qxq = q[i]*q[j];
 
           ElecCoreNuc(qxq, rc, eradius[j], &ecoul, &fpair);
         }
 
         else if (spin[i] == 0 && spin[j] == 4) {
           double qxq = q[i]*q[j];
 
           ElecCoreNuc(qxq, rc, eradius[j], &ecoul, &fpair);
         }
 
         // nucleus (i) - electron (j) Coul interaction
 
         else if  (spin[i] == 0 && abs(spin[j]) == 1) {
           e1rforce = 0.0;
 
           ElecNucElec(q[i],rc,eradius[j],&ecoul,&fpair,&e1rforce);
 
           e1rforce = spline * qqrd2e * e1rforce;
           erforce[j] += e1rforce;
 
           // Radial electron virial, iff flexible pressure flag set
           if (evflag && pressure_with_evirials_flag) {
             e1rvirial = eradius[j] * e1rforce;
             ev_tally_eff(j,j,nlocal,newton_pair,0.0,e1rvirial);
           }
         }
 
         // electron (i) - nucleus (j) Coul interaction
 
         else if (abs(spin[i]) == 1 && spin[j] == 0) {
           e1rforce = 0.0;
 
           ElecNucElec(q[j],rc,eradius[i],&ecoul,&fpair,&e1rforce);
 
           e1rforce = spline * qqrd2e * e1rforce;
           erforce[i] += e1rforce;
 
           // Radial electron virial, iff flexible pressure flag set
           if (evflag && pressure_with_evirials_flag) {
             e1rvirial = eradius[i] * e1rforce;
             ev_tally_eff(i,i,nlocal,newton_pair,0.0,e1rvirial);
           }
         }
 
         // electron (i) - electron (j) interactions
 
         else if (abs(spin[i]) == 1 && abs(spin[j]) == 1) {
           e1rforce = e2rforce = 0.0;
           s_e1rforce = s_e2rforce = 0.0;
 
           ElecElecElec(rc,eradius[i],eradius[j],&ecoul,&fpair,
                        &e1rforce,&e2rforce);
           PauliElecElec(spin[i] == spin[j],rc,eradius[i],eradius[j],
                        &epauli,&s_fpair,&s_e1rforce,&s_e2rforce);
 
           // Apply conversion factor
           epauli *= hhmss2e;
           s_fpair *= hhmss2e;
 
           e1rforce = spline * (qqrd2e * e1rforce + hhmss2e * s_e1rforce);
           erforce[i] += e1rforce;
           e2rforce = spline * (qqrd2e * e2rforce + hhmss2e * s_e2rforce);
           erforce[j] += e2rforce;
 
           // Radial electron virial, iff flexible pressure flag set
           if (evflag && pressure_with_evirials_flag) {
             e1rvirial = eradius[i] * e1rforce;
             e2rvirial = eradius[j] * e2rforce;
             ev_tally_eff(i,j,nlocal,newton_pair,0.0,e1rvirial+e2rvirial);
           }
         }
 
         // fixed-core (i) - electron (j) interactions
 
         else if (spin[i] == 2 && abs(spin[j]) == 1) {
           e1rforce = e2rforce = 0.0;
           s_e1rforce = s_e2rforce = 0.0;
 
           ElecNucElec(q[i],rc,eradius[j],&ecoul,&fpair,&e2rforce);
           ElecElecElec(rc,eradius[i],eradius[j],&ecoul,&fpair,
                          &e1rforce,&e2rforce);
           ElecElecElec(rc,eradius[i],eradius[j],&ecoul,&fpair,
                          &e1rforce,&e2rforce);
           PauliElecElec(0,rc,eradius[i],eradius[j],&epauli,
                        &s_fpair,&s_e1rforce,&s_e2rforce);
           PauliElecElec(1,rc,eradius[i],eradius[j],&epauli,
                        &s_fpair,&s_e1rforce,&s_e2rforce);
 
           // Apply conversion factor
           epauli *= hhmss2e;
           s_fpair *= hhmss2e;
 
           // only update virial for j electron
           e2rforce = spline * (qqrd2e * e2rforce + hhmss2e * s_e2rforce);
           erforce[j] += e2rforce;
 
           // Radial electron virial, iff flexible pressure flag set
           if (evflag && pressure_with_evirials_flag) {
             e2rvirial = eradius[j] * e2rforce;
             ev_tally_eff(j,j,nlocal,newton_pair,0.0,e2rvirial);
           }
         }
 
         // electron (i) - fixed-core (j) interactions
 
         else if (abs(spin[i]) == 1 && spin[j] == 2) {
           e1rforce = e2rforce = 0.0;
           s_e1rforce = s_e2rforce = 0.0;
 
           ElecNucElec(q[j],rc,eradius[i],&ecoul,&fpair,&e2rforce);
           ElecElecElec(rc,eradius[j],eradius[i],&ecoul,&fpair,
                          &e1rforce,&e2rforce);
           ElecElecElec(rc,eradius[j],eradius[i],&ecoul,&fpair,
                          &e1rforce,&e2rforce);
 
           PauliElecElec(0,rc,eradius[j],eradius[i],&epauli,
                        &s_fpair,&s_e1rforce,&s_e2rforce);
           PauliElecElec(1,rc,eradius[j],eradius[i],&epauli,
                        &s_fpair,&s_e1rforce,&s_e2rforce);
 
           // Apply conversion factor
           epauli *= hhmss2e;
           s_fpair *= hhmss2e;
 
           // only update virial for i electron
           e2rforce = spline * (qqrd2e * e2rforce + hhmss2e * s_e2rforce);
           erforce[i] += e2rforce;
 
           // add radial atomic virial, iff flexible pressure flag set
           if (evflag && pressure_with_evirials_flag) {
             e2rvirial = eradius[i] * e2rforce;
             ev_tally_eff(i,i,nlocal,newton_pair,0.0,e2rvirial);
           }
         }
 
         // fixed-core (i) - fixed-core (j) interactions
 
         else if (spin[i] == 2 && spin[j] == 2) {
           e1rforce = e2rforce = 0.0;
           s_e1rforce = s_e2rforce = 0.0;
           double qxq = q[i]*q[j];
 
           ElecNucNuc(qxq, rc, &ecoul, &fpair);
           ElecNucElec(q[i],rc,eradius[j],&ecoul,&fpair,&e1rforce);
           ElecNucElec(q[i],rc,eradius[j],&ecoul,&fpair,&e1rforce);
           ElecNucElec(q[j],rc,eradius[i],&ecoul,&fpair,&e1rforce);
           ElecNucElec(q[j],rc,eradius[i],&ecoul,&fpair,&e1rforce);
           ElecElecElec(rc,eradius[i],eradius[j],&ecoul,&fpair,
                          &e1rforce,&e2rforce);
           ElecElecElec(rc,eradius[i],eradius[j],&ecoul,&fpair,
                          &e1rforce,&e2rforce);
           ElecElecElec(rc,eradius[i],eradius[j],&ecoul,&fpair,
                          &e1rforce,&e2rforce);
           ElecElecElec(rc,eradius[i],eradius[j],&ecoul,&fpair,
                          &e1rforce,&e2rforce);
 
           PauliElecElec(0,rc,eradius[i],eradius[j],&epauli,
                        &s_fpair,&s_e1rforce,&s_e2rforce);
           PauliElecElec(1,rc,eradius[i],eradius[j],&epauli,
                        &s_fpair,&s_e1rforce,&s_e2rforce);
           epauli *= 2;
           s_fpair *= 2;
 
           // Apply conversion factor
           epauli *= hhmss2e;
           s_fpair *= hhmss2e;
         }
 
         // pseudo-core (i) - electron/fixed-core electrons (j) interactions
 
         else if (spin[i] == 3 && (abs(spin[j]) == 1 || spin[j] == 2)) {
           e2rforce = ecp_e2rforce = 0.0;
 
           if (((PAULI_CORE_D[ecp_type[itype]]) == 0.0) && ((PAULI_CORE_E[ecp_type[itype]]) == 0.0)) {
             if (abs(spin[j]) == 1) {
               ElecCoreElec(q[i],rc,eradius[i],eradius[j],&ecoul,
                           &fpair,&e2rforce);
               PauliCoreElec(rc,eradius[j],&ecp_epauli,&ecp_fpair,
                           &ecp_e2rforce,PAULI_CORE_A[ecp_type[itype]], PAULI_CORE_B[ecp_type[itype]],
                           PAULI_CORE_C[ecp_type[itype]]);
             } else { // add second s electron contribution from fixed-core
               double qxq = q[i]*q[j];
               ElecCoreNuc(qxq, rc, eradius[j], &ecoul, &fpair);
               ElecCoreElec(q[i],rc,eradius[i],eradius[j],&ecoul,
                           &fpair,&e2rforce);
               ElecCoreElec(q[i],rc,eradius[i],eradius[j],&ecoul,
                           &fpair,&e2rforce);
               PauliCoreElec(rc,eradius[j],&ecp_epauli,&ecp_fpair,
                           &ecp_e2rforce,PAULI_CORE_A[ecp_type[itype]], PAULI_CORE_B[ecp_type[itype]],
                           PAULI_CORE_C[ecp_type[itype]]);
               PauliCoreElec(rc,eradius[j],&ecp_epauli,&ecp_fpair,
                           &ecp_e2rforce,PAULI_CORE_A[ecp_type[itype]], PAULI_CORE_B[ecp_type[itype]],
                           PAULI_CORE_C[ecp_type[itype]]);
             }
           } else {
             if (abs(spin[j]) == 1) {
               ElecCoreElec(q[i],rc,eradius[i],eradius[j],&ecoul,
                           &fpair,&e2rforce);
               PauliCorePElec(rc,eradius[j],&ecp_epauli,&ecp_fpair,
                           &ecp_e2rforce,PAULI_CORE_A[ecp_type[itype]],PAULI_CORE_B[ecp_type[itype]],
                           PAULI_CORE_C[ecp_type[itype]],PAULI_CORE_D[ecp_type[itype]],PAULI_CORE_E[ecp_type[itype]]);
             } else { // add second s electron contribution from fixed-core
               double qxq = q[i]*q[j];
               ElecCoreNuc(qxq, rc, eradius[j], &ecoul, &fpair);
               ElecCoreElec(q[i],rc,eradius[i],eradius[j],&ecoul,
                           &fpair,&e2rforce);
               ElecCoreElec(q[i],rc,eradius[i],eradius[j],&ecoul,
                           &fpair,&e2rforce);
               PauliCorePElec(rc,eradius[j],&ecp_epauli,&ecp_fpair,
                           &ecp_e2rforce,PAULI_CORE_A[ecp_type[itype]], PAULI_CORE_B[ecp_type[itype]],
                           PAULI_CORE_C[ecp_type[itype]],PAULI_CORE_D[ecp_type[itype]],PAULI_CORE_E[ecp_type[itype]]);
               PauliCorePElec(rc,eradius[j],&ecp_epauli,&ecp_fpair,
                           &ecp_e2rforce,PAULI_CORE_A[ecp_type[itype]], PAULI_CORE_B[ecp_type[itype]],
                           PAULI_CORE_C[ecp_type[itype]],PAULI_CORE_D[ecp_type[itype]],PAULI_CORE_E[ecp_type[itype]]);
             }
           }
 
           // Apply conversion factor from Hartree to kcal/mol
           ecp_epauli *= h2e;
           ecp_fpair *= h2e;
 
           // only update virial for j electron
           e2rforce = spline * (qqrd2e * e2rforce + h2e * ecp_e2rforce);
           erforce[j] += e2rforce;
 
           // add radial atomic virial, iff flexible pressure flag set
           if (evflag && pressure_with_evirials_flag) {
             e2rvirial = eradius[j] * e2rforce;
             ev_tally_eff(j,j,nlocal,newton_pair,0.0,e2rvirial);
           }
         }
 
         // electron/fixed-core electrons (i) - pseudo-core (j) interactions
 
         else if ((abs(spin[i]) == 1 || spin[i] == 2) && spin[j] == 3) {
           e1rforce = ecp_e1rforce = 0.0;
 
           if (((PAULI_CORE_D[ecp_type[jtype]]) == 0.0) && ((PAULI_CORE_E[ecp_type[jtype]]) == 0.0)) {
             if (abs(spin[i]) == 1) {
               ElecCoreElec(q[j],rc,eradius[j],eradius[i],&ecoul,
                           &fpair,&e1rforce);
               PauliCoreElec(rc,eradius[i],&ecp_epauli,&ecp_fpair,
                           &ecp_e1rforce,PAULI_CORE_A[ecp_type[jtype]],PAULI_CORE_B[ecp_type[jtype]],
                           PAULI_CORE_C[ecp_type[jtype]]);
             } else {
               double qxq = q[i]*q[j];
               ElecCoreNuc(qxq,rc,eradius[i],&ecoul,&fpair);
               ElecCoreElec(q[j],rc,eradius[j],eradius[i],&ecoul,
                           &fpair,&e1rforce);
               ElecCoreElec(q[j],rc,eradius[j],eradius[i],&ecoul,
                           &fpair,&e1rforce);
               PauliCoreElec(rc,eradius[i],&ecp_epauli,&ecp_fpair,
                           &ecp_e1rforce,PAULI_CORE_A[ecp_type[jtype]], PAULI_CORE_B[ecp_type[jtype]],
                           PAULI_CORE_C[ecp_type[jtype]]);
               PauliCoreElec(rc,eradius[i],&ecp_epauli,&ecp_fpair,
                           &ecp_e1rforce,PAULI_CORE_A[ecp_type[jtype]], PAULI_CORE_B[ecp_type[jtype]],
                           PAULI_CORE_C[ecp_type[jtype]]);
             }
           } else {
             if (abs(spin[i]) == 1) {
               ElecCoreElec(q[j],rc,eradius[j],eradius[i],&ecoul,
                           &fpair,&e1rforce);
               PauliCorePElec(rc,eradius[i],&ecp_epauli,&ecp_fpair,
                           &ecp_e1rforce,PAULI_CORE_A[ecp_type[jtype]],PAULI_CORE_B[ecp_type[jtype]],
                           PAULI_CORE_C[ecp_type[jtype]],PAULI_CORE_D[ecp_type[jtype]],PAULI_CORE_E[ecp_type[jtype]]);
             } else {
               double qxq = q[i]*q[j];
               ElecCoreNuc(qxq,rc,eradius[i],&ecoul,&fpair);
               ElecCoreElec(q[j],rc,eradius[j],eradius[i],&ecoul,
                           &fpair,&e1rforce);
               ElecCoreElec(q[j],rc,eradius[j],eradius[i],&ecoul,
                           &fpair,&e1rforce);
               PauliCorePElec(rc,eradius[i],&ecp_epauli,&ecp_fpair,
                           &ecp_e1rforce,PAULI_CORE_A[ecp_type[jtype]], PAULI_CORE_B[ecp_type[jtype]],
                           PAULI_CORE_C[ecp_type[jtype]],PAULI_CORE_D[ecp_type[jtype]],PAULI_CORE_E[ecp_type[jtype]]);
               PauliCorePElec(rc,eradius[i],&ecp_epauli,&ecp_fpair,
                           &ecp_e1rforce,PAULI_CORE_A[ecp_type[jtype]], PAULI_CORE_B[ecp_type[jtype]],
                           PAULI_CORE_C[ecp_type[jtype]],PAULI_CORE_D[ecp_type[jtype]],PAULI_CORE_E[ecp_type[jtype]]);
             }
           }
 
           // Apply conversion factor from Hartree to kcal/mol
           ecp_epauli *= h2e;
           ecp_fpair *= h2e;
 
           // only update virial for j electron
           e1rforce = spline * (qqrd2e * e1rforce + h2e * ecp_e1rforce);
           erforce[i] += e1rforce;
 
           // add radial atomic virial, iff flexible pressure flag set
           if (evflag && pressure_with_evirials_flag) {
             e1rvirial = eradius[i] * e1rforce;
             ev_tally_eff(i,i,nlocal,newton_pair,0.0,e1rvirial);
           }
         }
 
         // pseudo-core (i) - pseudo-core (j) interactions
 
         else if (spin[i] == 3 && spin[j] == 3) {
           double qxq = q[i]*q[j];
 
           ElecCoreCore(qxq,rc,eradius[i],eradius[j],&ecoul,&fpair);
         }
 
         // Apply Coulomb conversion factor for all cases
         ecoul *= qqrd2e;
         fpair *= qqrd2e;
 
         // Sum up energy and force contributions
         epauli += ecp_epauli;
         energy = ecoul + epauli;
         fpair = fpair + s_fpair + ecp_fpair;
 
         // Apply cutoff spline
         fpair = fpair * spline - energy * dspline;
         energy = spline * energy;
 
         // Tally cartesian forces
         SmallRForce(delx,dely,delz,rc,fpair,&fx,&fy,&fz);
         f[i][0] += fx;
         f[i][1] += fy;
         f[i][2] += fz;
         if (newton_pair || j < nlocal) {
           f[j][0] -= fx;
           f[j][1] -= fy;
           f[j][2] -= fz;
         }
 
         // Tally energy (in ecoul) and compute normal pressure virials
         if (evflag) ev_tally_xyz(i,j,nlocal,newton_pair,0.0,
                              energy,fx,fy,fz,delx,dely,delz);
         if (eflag_global) {
           if (newton_pair) {
             pvector[1] += spline * epauli;
             pvector[2] += spline * ecoul;
           }
           else {
             halfpauli = 0.5 * spline * epauli;
             halfcoul = 0.5 * spline * ecoul;
             if (i < nlocal) {
               pvector[1] += halfpauli;
               pvector[2] += halfcoul;
             }
             if (j < nlocal) {
               pvector[1] += halfpauli;
               pvector[2] += halfcoul;
             }
           }
         }
 
       }
     }
 
     // limit electron stifness (size) for periodic systems, to max=half-box-size
 
     if (abs(spin[i]) == 1 && limit_eradius_flag) {
       double half_box_length=0, dr, kfactor=hhmss2e*1.0;
       e1rforce = errestrain = 0.0;
 
       if (domain->xperiodic == 1 || domain->yperiodic == 1 ||
           domain->zperiodic == 1) {
         delx = domain->boxhi[0]-domain->boxlo[0];
         dely = domain->boxhi[1]-domain->boxlo[1];
         delz = domain->boxhi[2]-domain->boxlo[2];
         half_box_length = 0.5 * MIN(delx, MIN(dely, delz));
         if (eradius[i] > half_box_length) {
           dr = eradius[i]-half_box_length;
           errestrain=0.5*kfactor*dr*dr;
           e1rforce=-kfactor*dr;
           if (eflag_global) pvector[3] += errestrain;
 
           erforce[i] += e1rforce;
 
           // Tally radial restrain energy and add radial restrain virial
           if (evflag) {
             ev_tally_eff(i,i,nlocal,newton_pair,errestrain,0.0);
             if (pressure_with_evirials_flag)  // flexible electron pressure
               ev_tally_eff(i,i,nlocal,newton_pair,0.0,eradius[i]*e1rforce);
           }
         }
       }
     }
 
   }
   if (vflag_fdotr) {
     virial_fdotr_compute();
     if (pressure_with_evirials_flag) virial_eff_compute();
   }
 }
 
 /* ----------------------------------------------------------------------
    eff-specific contribution to global virial
 ------------------------------------------------------------------------- */
 
 void PairEffCut::virial_eff_compute()
 {
   double *eradius = atom->eradius;
   double *erforce = atom->erforce;
   double e_virial;
   int *spin = atom->spin;
 
   // sum over force on all particles including ghosts
 
   if (neighbor->includegroup == 0) {
     int nall = atom->nlocal + atom->nghost;
     for (int i = 0; i < nall; i++) {
       if (spin[i]) {
         e_virial = erforce[i]*eradius[i]/3;
         virial[0] += e_virial;
         virial[1] += e_virial;
         virial[2] += e_virial;
       }
     }
 
   // neighbor includegroup flag is set
   // sum over force on initial nfirst particles and ghosts
 
   } else {
     int nall = atom->nfirst;
     for (int i = 0; i < nall; i++) {
       if (spin[i]) {
         e_virial = erforce[i]*eradius[i]/3;
         virial[0] += e_virial;
         virial[1] += e_virial;
         virial[2] += e_virial;
       }
     }
 
     nall = atom->nlocal + atom->nghost;
     for (int i = atom->nlocal; i < nall; i++) {
       if (spin[i]) {
         e_virial = erforce[i]*eradius[i]/3;
         virial[0] += e_virial;
         virial[1] += e_virial;
         virial[2] += e_virial;
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    tally eng_vdwl and virial into per-atom accumulators
    for virial radial electronic contributions
 ------------------------------------------------------------------------- */
 
 void PairEffCut::ev_tally_eff(int i, int j, int nlocal, int newton_pair,
                               double energy, double e_virial)
 {
   double energyhalf;
   double partial_evirial = e_virial/3.0;
   double half_partial_evirial = partial_evirial/2;
 
   int *spin = atom->spin;
 
   if (eflag_either) {
     if (eflag_global) {
       if (newton_pair)
         eng_coul += energy;
       else {
         energyhalf = 0.5*energy;
         if (i < nlocal)
           eng_coul += energyhalf;
         if (j < nlocal)
           eng_coul += energyhalf;
       }
     }
     if (eflag_atom) {
       if (newton_pair || i < nlocal) eatom[i] += 0.5 * energy;
       if (newton_pair || j < nlocal) eatom[j] += 0.5 * energy;
     }
   }
 
   if (vflag_either) {
     if (vflag_global) {
       if (spin[i] && i < nlocal) {
         virial[0] += half_partial_evirial;
         virial[1] += half_partial_evirial;
         virial[2] += half_partial_evirial;
       }
       if (spin[j] && j < nlocal) {
         virial[0] += half_partial_evirial;
         virial[1] += half_partial_evirial;
         virial[2] += half_partial_evirial;
       }
     }
     if (vflag_atom) {
       if (spin[i]) {
         if (newton_pair || i < nlocal) {
           vatom[i][0] += half_partial_evirial;
           vatom[i][1] += half_partial_evirial;
           vatom[i][2] += half_partial_evirial;
         }
       }
       if (spin[j]) {
         if (newton_pair || j < nlocal) {
           vatom[j][0] += half_partial_evirial;
           vatom[j][1] += half_partial_evirial;
           vatom[j][2] += half_partial_evirial;
         }
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairEffCut::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
   memory->create(cut,n+1,n+1,"pair:cut");
 }
 
 /* ---------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairEffCut::settings(int narg, char **arg)
 {
   if (narg < 1)
     error->all(FLERR,"Illegal pair_style command");
 
   // Defaults ECP parameters for C (radius=0.154)
   PAULI_CORE_A[6] = 22.721015;
   PAULI_CORE_B[6] = 0.728733;
   PAULI_CORE_C[6] = 1.103199;
   PAULI_CORE_D[6] = 17.695345;
   PAULI_CORE_E[6] = 6.693621;
 
   // Defaults ECP parameters for N (radius=0.394732)
   PAULI_CORE_A[7] = 16.242367;
   PAULI_CORE_B[7] = 0.602818;
   PAULI_CORE_C[7] = 1.081856;
   PAULI_CORE_D[7] = 7.150803;
   PAULI_CORE_E[7] = 5.351936;
 
   // Defaults p-element ECP parameters for Oxygen (radius=0.15)
   PAULI_CORE_A[8] = 29.5185;
   PAULI_CORE_B[8] = 0.32995;
   PAULI_CORE_C[8] = 1.21676;
   PAULI_CORE_D[8] = 11.98757;
   PAULI_CORE_E[8] = 3.073417;
 
   // Defaults ECP parameters for Al (radius=1.660)
   PAULI_CORE_A[13] = 0.486;
   PAULI_CORE_B[13] = 1.049;
   PAULI_CORE_C[13] = 0.207;
   PAULI_CORE_D[13] = 0.0;
   PAULI_CORE_E[13] = 0.0;
 
   // Defaults ECP parameters for Si (radius=1.691)
   PAULI_CORE_A[14] = 0.320852;
   PAULI_CORE_B[14] = 2.283269;
   PAULI_CORE_C[14] = 0.814857;
   PAULI_CORE_D[14] = 0.0;
   PAULI_CORE_E[14] = 0.0;
 
   cut_global = force->numeric(FLERR,arg[0]);
   limit_eradius_flag = 0;
   pressure_with_evirials_flag = 0;
 
   int atype;
   int iarg = 1;
   int ecp_found = 0;
 
   while (iarg < narg) {
     if (strcmp(arg[iarg],"limit/eradius") == 0) {
       limit_eradius_flag = 1; 
       iarg += 1;
     }
     else if (strcmp(arg[iarg],"pressure/evirials") == 0) {
       pressure_with_evirials_flag = 1; 
       iarg += 1;
     }
     else if (strcmp(arg[iarg],"ecp") == 0) {
       iarg += 1;
       while (iarg < narg) {
         atype = force->inumeric(FLERR,arg[iarg]);
         if (strcmp(arg[iarg+1],"C") == 0) ecp_type[atype] = 6;
         else if (strcmp(arg[iarg+1],"N") == 0) ecp_type[atype] = 7;
         else if (strcmp(arg[iarg+1],"O") == 0) ecp_type[atype] = 8;
         else if (strcmp(arg[iarg+1],"Al") == 0) ecp_type[atype] = 13;
         else if (strcmp(arg[iarg+1],"Si") == 0) ecp_type[atype] = 14;
         else error->all(FLERR, "Note: there are no default parameters for this atom ECP\n"); 
         iarg += 2;
         ecp_found = 1;
       } 
     }
   }
 
   if (!ecp_found && atom->ecp_flag) 
     error->all(FLERR,"Need to specify ECP type on pair_style command");
 
   // Need to introduce 2 new constants w/out changing update.cpp
   if (force->qqr2e==332.06371) {        // i.e. Real units chosen
     h2e = 627.509;                      // hartree->kcal/mol
     hhmss2e = 175.72044219620075;       // hartree->kcal/mol * (Bohr->Angstrom)^2
   } else if (force->qqr2e==1.0) {        // electron units
     h2e = 1.0;
     hhmss2e = 1.0;
   } else error->all(FLERR,"Check your units");
 
   // reset cutoffs that have been explicitly set
 
   if (allocated) {
     int i,j;
     for (i = 1; i <= atom->ntypes; i++)
       for (j = i+1; j <= atom->ntypes; j++)
         if (setflag[i][j]) cut[i][j] = cut_global;
   }
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairEffCut::init_style()
 {
   // error and warning checks
 
   if (!atom->q_flag || !atom->spin_flag ||
       !atom->eradius_flag || !atom->erforce_flag)
     error->all(FLERR,"Pair eff/cut requires atom attributes "
                "q, spin, eradius, erforce");
 
   // add hook to minimizer for eradius and erforce
 
   if (update->whichflag == 2)
     update->minimize->request(this,1,0.01);
 
   // make sure to use the appropriate timestep when using real units
 
   if (update->whichflag == 1) {
     if (force->qqr2e == 332.06371 && update->dt == 1.0)
       error->all(FLERR,"You must lower the default real units timestep for pEFF ");
   }
 
   // need a half neigh list and optionally a granular history neigh list
 
-  neighbor->request(this);
+  neighbor->request(this,instance_me);
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type electron pairs (ECP-only)
 ------------------------------------------------------------------------- */
 
 void PairEffCut::coeff(int narg, char **arg)
 {
   if (!allocated) allocate();
 
   if ((strcmp(arg[0],"*") == 0) || (strcmp(arg[1],"*") == 0)) {
     int ilo,ihi,jlo,jhi;
     force->bounds(arg[0],atom->ntypes,ilo,ihi);
     force->bounds(arg[1],atom->ntypes,jlo,jhi);
 
     double cut_one = cut_global;
     if (narg == 3) cut_one = force->numeric(FLERR,arg[2]);
 
     int count = 0;
     for (int i = ilo; i <= ihi; i++) {
       for (int j = MAX(jlo,i); j <= jhi; j++) {
         cut[i][j] = cut_one;
         setflag[i][j] = 1;
         count++;
       }
     }
     if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
   } else {
     int ecp;
     ecp = force->inumeric(FLERR,arg[0]);
     if (strcmp(arg[1],"s") ==0) {
       PAULI_CORE_A[ecp_type[ecp]] = force->numeric(FLERR,arg[2]);
       PAULI_CORE_B[ecp_type[ecp]] = force->numeric(FLERR,arg[3]);
       PAULI_CORE_C[ecp_type[ecp]] = force->numeric(FLERR,arg[4]);
       PAULI_CORE_D[ecp_type[ecp]] = 0.0;
       PAULI_CORE_E[ecp_type[ecp]] = 0.0;
     } else if (strcmp(arg[1],"p") ==0) {
       PAULI_CORE_A[ecp_type[ecp]] = force->numeric(FLERR,arg[2]);
       PAULI_CORE_B[ecp_type[ecp]] = force->numeric(FLERR,arg[3]);
       PAULI_CORE_C[ecp_type[ecp]] = force->numeric(FLERR,arg[4]);
       PAULI_CORE_D[ecp_type[ecp]] = force->numeric(FLERR,arg[5]);
       PAULI_CORE_E[ecp_type[ecp]] = force->numeric(FLERR,arg[6]);
     } else error->all(FLERR,"Illegal pair_coeff command"); 
   }
 }
 
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairEffCut::init_one(int i, int j)
 {
   if (setflag[i][j] == 0)
     cut[i][j] = mix_distance(cut[i][i],cut[j][j]);
 
   return cut[i][j];
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairEffCut::write_restart(FILE *fp)
 {
   write_restart_settings(fp);
 
   int i,j;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       fwrite(&setflag[i][j],sizeof(int),1,fp);
       if (setflag[i][j]) fwrite(&cut[i][j],sizeof(double),1,fp);
     }
 }
 
 /* ----------------------------------------------------------------------
    proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairEffCut::read_restart(FILE *fp)
 {
   read_restart_settings(fp);
   allocate();
 
   int i,j;
   int me = comm->me;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
       MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
       if (setflag[i][j]) {
         if (me == 0) fread(&cut[i][j],sizeof(double),1,fp);
         MPI_Bcast(&cut[i][j],1,MPI_DOUBLE,0,world);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairEffCut::write_restart_settings(FILE *fp)
 {
   fwrite(&cut_global,sizeof(double),1,fp);
   fwrite(&offset_flag,sizeof(int),1,fp);
   fwrite(&mix_flag,sizeof(int),1,fp);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairEffCut::read_restart_settings(FILE *fp)
 {
   if (comm->me == 0) {
     fread(&cut_global,sizeof(double),1,fp);
     fread(&offset_flag,sizeof(int),1,fp);
     fread(&mix_flag,sizeof(int),1,fp);
   }
   MPI_Bcast(&cut_global,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&offset_flag,1,MPI_INT,0,world);
   MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
 }
 
 /* ----------------------------------------------------------------------
    returns pointers to the log() of electron radius and corresponding force
    minimizer operates on log(radius) so radius never goes negative
    these arrays are stored locally by pair style
 ------------------------------------------------------------------------- */
 
 void PairEffCut::min_xf_pointers(int ignore, double **xextra, double **fextra)
 {
   // grow arrays if necessary
   // need to be atom->nmax in length
 
   if (atom->nmax > nmax) {
     memory->destroy(min_eradius);
     memory->destroy(min_erforce);
     nmax = atom->nmax;
     memory->create(min_eradius,nmax,"pair:min_eradius");
     memory->create(min_erforce,nmax,"pair:min_erforce");
   }
 
   *xextra = min_eradius;
   *fextra = min_erforce;
 }
 
 /* ----------------------------------------------------------------------
    minimizer requests the log() of electron radius and corresponding force
    calculate and store in min_eradius and min_erforce
 ------------------------------------------------------------------------- */
 
 void PairEffCut::min_xf_get(int ignore)
 {
   double *eradius = atom->eradius;
   double *erforce = atom->erforce;
   int *spin = atom->spin;
   int nlocal = atom->nlocal;
 
   for (int i = 0; i < nlocal; i++)
     if (spin[i]) {
       min_eradius[i] = log(eradius[i]);
       min_erforce[i] = eradius[i]*erforce[i];
     } else min_eradius[i] = min_erforce[i] = 0.0;
 }
 
 /* ----------------------------------------------------------------------
    minimizer has changed the log() of electron radius
    propagate the change back to eradius
 ------------------------------------------------------------------------- */
 
 void PairEffCut::min_x_set(int ignore)
 {
   double *eradius = atom->eradius;
   int *spin = atom->spin;
   int nlocal = atom->nlocal;
 
   for (int i = 0; i < nlocal; i++)
     if (spin[i]) eradius[i] = exp(min_eradius[i]);
 }
 
 /* ----------------------------------------------------------------------
    memory usage of local atom-based arrays
 ------------------------------------------------------------------------- */
 
 double PairEffCut::memory_usage()
 {
   double bytes = maxeatom * sizeof(double);
   bytes += maxvatom*6 * sizeof(double);
   bytes += 2 * nmax * sizeof(double);
   return bytes;
 }
diff --git a/src/USER-FEP/pair_coul_cut_soft.cpp b/src/USER-FEP/pair_coul_cut_soft.cpp
index 831f212c7..dbb63f19a 100644
--- a/src/USER-FEP/pair_coul_cut_soft.cpp
+++ b/src/USER-FEP/pair_coul_cut_soft.cpp
@@ -1,376 +1,376 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Soft-core version: Agilio Padua (Univ Blaise Pascal & CNRS)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_coul_cut_soft.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
 PairCoulCutSoft::PairCoulCutSoft(LAMMPS *lmp) : Pair(lmp) {}
 
 /* ---------------------------------------------------------------------- */
 
 PairCoulCutSoft::~PairCoulCutSoft()
 {
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
 
     memory->destroy(cut);
     memory->destroy(lambda);
     memory->destroy(lam1);
     memory->destroy(lam2);
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairCoulCutSoft::compute(int eflag, int vflag)
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,ecoul,fpair;
   double rsq,forcecoul,factor_coul;
   double denc;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   ecoul = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
 
         denc = sqrt(lam2[itype][jtype] + rsq);
         forcecoul = qqrd2e * lam1[itype][jtype] * qtmp*q[j] / (denc*denc*denc);
 
         fpair = factor_coul*forcecoul;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
         if (newton_pair || j < nlocal) {
           f[j][0] -= delx*fpair;
           f[j][1] -= dely*fpair;
           f[j][2] -= delz*fpair;
         }
 
         if (eflag)
           ecoul = factor_coul * qqrd2e * lam1[itype][jtype] * qtmp*q[j] / denc;
 
         if (evflag) ev_tally(i,j,nlocal,newton_pair,
                              0.0,ecoul,fpair,delx,dely,delz);
       }
     }
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairCoulCutSoft::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
   memory->create(cut,n+1,n+1,"pair:cut");
   memory->create(lambda,n+1,n+1,"pair:lambda");
   memory->create(lam1,n+1,n+1,"pair:lam1");
   memory->create(lam2,n+1,n+1,"pair:lam2");
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairCoulCutSoft::settings(int narg, char **arg)
 {
   if (narg != 3) error->all(FLERR,"Illegal pair_style command");
 
   nlambda = force->numeric(FLERR,arg[0]);
   alphac  = force->numeric(FLERR,arg[1]);
 
   cut_global = force->numeric(FLERR,arg[2]);
 
   // reset cutoffs that have been explicitly set
 
   if (allocated) {
     int i,j;
     for (i = 1; i <= atom->ntypes; i++)
       for (j = i+1; j <= atom->ntypes; j++)
         if (setflag[i][j]) cut[i][j] = cut_global;
   }
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairCoulCutSoft::coeff(int narg, char **arg)
 {
   if (narg < 3 || narg > 4) 
     error->all(FLERR,"Incorrect args for pair coefficients");
   if (!allocated) allocate();
 
   int ilo,ihi,jlo,jhi;
   force->bounds(arg[0],atom->ntypes,ilo,ihi);
   force->bounds(arg[1],atom->ntypes,jlo,jhi);
 
   double lambda_one = force->numeric(FLERR,arg[2]);
 
   double cut_one = cut_global;
   if (narg == 4) cut_one = force->numeric(FLERR,arg[3]);
 
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
     for (int j = MAX(jlo,i); j <= jhi; j++) {
       lambda[i][j] = lambda_one;
       cut[i][j] = cut_one;
       setflag[i][j] = 1;
       count++;
     }
   }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairCoulCutSoft::init_style()
 {
   if (!atom->q_flag)
     error->all(FLERR,"Pair style coul/cut/soft requires atom attribute q");
 
-  neighbor->request(this);
+  neighbor->request(this,instance_me);
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairCoulCutSoft::init_one(int i, int j)
 {
   if (setflag[i][j] == 0) {
     if (lambda[i][i] != lambda[j][j])
       error->all(FLERR,"Pair coul/cut/soft different lambda values in mix");
     lambda[i][j] = lambda[i][i];
     cut[i][j] = mix_distance(cut[i][i],cut[j][j]);
   }
 
   lam1[i][j] = pow(lambda[i][j], nlambda);
   lam2[i][j] = alphac * (1.0 - lambda[i][j])*(1.0 - lambda[i][j]);
 
   cut[j][i] = cut[i][j];
   lambda[j][i] = lambda[i][j];
   lam1[j][i] = lam1[i][j];
   lam2[j][i] = lam2[i][j];
 
   return cut[i][j];
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairCoulCutSoft::write_restart(FILE *fp)
 {
   write_restart_settings(fp);
 
   int i,j;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       fwrite(&setflag[i][j],sizeof(int),1,fp);
       if (setflag[i][j]) {
         fwrite(&lambda[i][j],sizeof(double),1,fp);
         fwrite(&cut[i][j],sizeof(double),1,fp);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairCoulCutSoft::read_restart(FILE *fp)
 {
   read_restart_settings(fp);
   allocate();
 
   int i,j;
   int me = comm->me;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
       MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
       if (setflag[i][j]) {
         if (me == 0) {
           fread(&lambda[i][j],sizeof(double),1,fp);
           fread(&cut[i][j],sizeof(double),1,fp);
         }
         MPI_Bcast(&lambda[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&cut[i][j],1,MPI_DOUBLE,0,world);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairCoulCutSoft::write_restart_settings(FILE *fp)
 {
   fwrite(&nlambda,sizeof(double),1,fp);
   fwrite(&alphac,sizeof(double),1,fp);
 
   fwrite(&cut_global,sizeof(double),1,fp);
   fwrite(&offset_flag,sizeof(int),1,fp);
   fwrite(&mix_flag,sizeof(int),1,fp);
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairCoulCutSoft::read_restart_settings(FILE *fp)
 {
   if (comm->me == 0) {
     fread(&nlambda,sizeof(double),1,fp);
     fread(&alphac,sizeof(double),1,fp);
 
     fread(&cut_global,sizeof(double),1,fp);
     fread(&offset_flag,sizeof(int),1,fp);
     fread(&mix_flag,sizeof(int),1,fp);
   }
   MPI_Bcast(&nlambda,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&alphac,1,MPI_DOUBLE,0,world);
 
   MPI_Bcast(&cut_global,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&offset_flag,1,MPI_INT,0,world);
   MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes to data file
 ------------------------------------------------------------------------- */
 
 void PairCoulCutSoft::write_data(FILE *fp)
 {
   for (int i = 1; i <= atom->ntypes; i++)
     fprintf(fp,"%d %g\n",i,lambda[i][i]);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes all pairs to data file
 ------------------------------------------------------------------------- */
 
 void PairCoulCutSoft::write_data_all(FILE *fp)
 {
   for (int i = 1; i <= atom->ntypes; i++)
     for (int j = i; j <= atom->ntypes; j++)
       fprintf(fp,"%d %d %g\n",i,j,lambda[i][j]);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairCoulCutSoft::single(int i, int j, int itype, int jtype,
                            double rsq, double factor_coul, double factor_lj,
                            double &fforce)
 {
   double forcecoul,phicoul;
   double denc;
 
   if (rsq < cutsq[itype][jtype]) {
     denc = sqrt(lam2[itype][jtype] + rsq);
     forcecoul = force->qqrd2e * lam1[itype][jtype] * atom->q[i]*atom->q[j] /
       (denc*denc*denc);
   } else forcecoul = 0.0; 
   fforce = factor_coul*forcecoul;
 
   if (rsq < cutsq[itype][jtype])
     phicoul = force->qqrd2e * lam1[itype][jtype] * atom->q[i]*atom->q[j] / denc;
   else phicoul = 0.0;
   return factor_coul*phicoul;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void *PairCoulCutSoft::extract(const char *str, int &dim)
 {
   dim = 2;
   if (strcmp(str,"lambda") == 0) return (void *) lambda;
   return NULL;
 }
diff --git a/src/USER-FEP/pair_coul_long_soft.cpp b/src/USER-FEP/pair_coul_long_soft.cpp
index d39674400..be1d9a246 100644
--- a/src/USER-FEP/pair_coul_long_soft.cpp
+++ b/src/USER-FEP/pair_coul_long_soft.cpp
@@ -1,395 +1,395 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Paul Crozier (SNL)
    Soft-core version: Agilio Padua (Univ Blaise Pascal & CNRS)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_coul_long_soft.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "kspace.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "update.h"
 #include "integrate.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
 #define EWALD_F   1.12837917
 #define EWALD_P   0.3275911
 #define A1        0.254829592
 #define A2       -0.284496736
 #define A3        1.421413741
 #define A4       -1.453152027
 #define A5        1.061405429
 
 /* ---------------------------------------------------------------------- */
 
 PairCoulLongSoft::PairCoulLongSoft(LAMMPS *lmp) : Pair(lmp)
 {
   ewaldflag = pppmflag = 1;
   qdist = 0.0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 PairCoulLongSoft::~PairCoulLongSoft()
 {
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
 
     memory->destroy(scale);
 
     memory->destroy(lambda);
     memory->destroy(lam1);
     memory->destroy(lam2);
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairCoulLongSoft::compute(int eflag, int vflag)
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,ecoul,fpair;
   double r,rsq,forcecoul,factor_coul;
   double grij,expm2,prefactor,t,erfc;
   double denc;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   ecoul = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cut_coulsq) {
 
         r = sqrt(rsq);
         grij = g_ewald * r;
         expm2 = exp(-grij*grij);
         t = 1.0 / (1.0 + EWALD_P*grij);
         erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
 
         denc = sqrt(lam2[itype][jtype] + rsq);
         prefactor = qqrd2e * lam1[itype][jtype] * qtmp*q[j] / (denc*denc*denc);
 
         forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
         if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
 
         fpair = forcecoul;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
         if (newton_pair || j < nlocal) {
           f[j][0] -= delx*fpair;
           f[j][1] -= dely*fpair;
           f[j][2] -= delz*fpair;
         }
 
         if (eflag) {
           prefactor = qqrd2e * lam1[itype][jtype] * qtmp*q[j] / denc;
           ecoul = prefactor*erfc;
           if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
         }
 
         if (evflag) ev_tally(i,j,nlocal,newton_pair,
                              0.0,ecoul,fpair,delx,dely,delz);
       }
     }
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairCoulLongSoft::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
   memory->create(scale,n+1,n+1,"pair:scale");
 
   memory->create(lambda,n+1,n+1,"pair:lambda");
   memory->create(lam1,n+1,n+1,"pair:lam1");
   memory->create(lam2,n+1,n+1,"pair:lam2");
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairCoulLongSoft::settings(int narg, char **arg)
 {
   if (narg != 3) error->all(FLERR,"Illegal pair_style command");
 
   nlambda = force->numeric(FLERR,arg[0]);
   alphac  = force->numeric(FLERR,arg[1]);
 
   cut_coul = force->numeric(FLERR,arg[2]);
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairCoulLongSoft::coeff(int narg, char **arg)
 {
   if (narg != 3) 
     error->all(FLERR,"Incorrect args for pair coefficients");
   if (!allocated) allocate();
 
   int ilo,ihi,jlo,jhi;
   force->bounds(arg[0],atom->ntypes,ilo,ihi);
   force->bounds(arg[1],atom->ntypes,jlo,jhi);
 
   double lambda_one = force->numeric(FLERR,arg[2]);
 
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
     for (int j = MAX(jlo,i); j <= jhi; j++) {
       lambda[i][j] = lambda_one;
       scale[i][j] = 1.0;
       setflag[i][j] = 1;
       count++;
     }
   }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairCoulLongSoft::init_style()
 {
   if (!atom->q_flag)
     error->all(FLERR,"Pair style lj/cut/coul/long requires atom attribute q");
 
-  neighbor->request(this);
+  neighbor->request(this,instance_me);
 
   cut_coulsq = cut_coul * cut_coul;
 
   // insure use of KSpace long-range solver, set g_ewald
 
  if (force->kspace == NULL)
     error->all(FLERR,"Pair style requires a KSpace style");
   g_ewald = force->kspace->g_ewald;
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairCoulLongSoft::init_one(int i, int j)
 {
   if (setflag[i][j] == 0) {
     if (lambda[i][i] != lambda[j][j])
       error->all(FLERR,"Pair coul/cut/soft different lambda values in mix");
     lambda[i][j] = lambda[i][i];
   }
 
   lam1[i][j] = pow(lambda[i][j], nlambda);
   lam2[i][j] = alphac * (1.0 - lambda[i][j])*(1.0 - lambda[i][j]);
 
   scale[j][i] = scale[i][j];
   lambda[j][i] = lambda[i][j];
   lam1[j][i] = lam1[i][j];
   lam2[j][i] = lam2[i][j];
 
   return cut_coul+2.0*qdist;
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairCoulLongSoft::write_restart(FILE *fp)
 {
   write_restart_settings(fp);
 
   int i,j;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       fwrite(&setflag[i][j],sizeof(int),1,fp);
       if (setflag[i][j])
         fwrite(&lambda[i][j],sizeof(double),1,fp);
     }
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairCoulLongSoft::read_restart(FILE *fp)
 {
   read_restart_settings(fp);
 
   allocate();
 
   int i,j;
   int me = comm->me;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
       MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
       if (setflag[i][j]) {
         if (me == 0)
           fread(&lambda[i][j],sizeof(double),1,fp);
         MPI_Bcast(&lambda[i][j],1,MPI_DOUBLE,0,world);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairCoulLongSoft::write_restart_settings(FILE *fp)
 {
   fwrite(&nlambda,sizeof(double),1,fp);
   fwrite(&alphac,sizeof(double),1,fp);
 
   fwrite(&cut_coul,sizeof(double),1,fp);
   fwrite(&offset_flag,sizeof(int),1,fp);
   fwrite(&mix_flag,sizeof(int),1,fp);
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairCoulLongSoft::read_restart_settings(FILE *fp)
 {
   if (comm->me == 0) {
     fread(&nlambda,sizeof(double),1,fp);
     fread(&alphac,sizeof(double),1,fp);
 
     fread(&cut_coul,sizeof(double),1,fp);
     fread(&offset_flag,sizeof(int),1,fp);
     fread(&mix_flag,sizeof(int),1,fp);
   }
   MPI_Bcast(&nlambda,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&alphac,1,MPI_DOUBLE,0,world);
 
   MPI_Bcast(&cut_coul,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&offset_flag,1,MPI_INT,0,world);
   MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairCoulLongSoft::single(int i, int j, int itype, int jtype,
                             double rsq,
                             double factor_coul, double factor_lj,
                             double &fforce)
 {
   double r,grij,expm2,t,erfc,prefactor;
   double forcecoul,phicoul;
   double denc;
 
   if (rsq < cut_coulsq) {
     r = sqrt(rsq);
     grij = g_ewald * r;
     expm2 = exp(-grij*grij);
     t = 1.0 / (1.0 + EWALD_P*grij);
     erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
     
     denc = sqrt(lam2[itype][jtype] + rsq);
     prefactor = force->qqrd2e * lam1[itype][jtype] * atom->q[i]*atom->q[j] /
       (denc*denc*denc);
 
     forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
     if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
   } else forcecoul = 0.0;
 
   fforce = forcecoul;
 
   if (rsq < cut_coulsq) {
     prefactor = force->qqrd2e * lam1[itype][jtype] * atom->q[i]*atom->q[j] / denc;
     phicoul = prefactor*erfc;
     if (factor_coul < 1.0) phicoul -= (1.0-factor_coul)*prefactor;
   } else phicoul = 0.0;
 
   return phicoul;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void *PairCoulLongSoft::extract(const char *str, int &dim)
 {
   dim = 0;
   if (strcmp(str,"cut_coul") == 0) return (void *) &cut_coul;
   dim = 2;
   if (strcmp(str,"scale") == 0) return (void *) scale;
   if (strcmp(str,"lambda") == 0) return (void *) lambda;
   
   return NULL;
 }
diff --git a/src/USER-FEP/pair_lj_charmm_coul_long_soft.cpp b/src/USER-FEP/pair_lj_charmm_coul_long_soft.cpp
index 3ba892c1e..ee24b3695 100644
--- a/src/USER-FEP/pair_lj_charmm_coul_long_soft.cpp
+++ b/src/USER-FEP/pair_lj_charmm_coul_long_soft.cpp
@@ -1,1024 +1,1024 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Paul Crozier (SNL)
    Soft-core version: Agilio Padua (Univ Blaise Pascal & CNRS)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_lj_charmm_coul_long_soft.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "kspace.h"
 #include "update.h"
 #include "integrate.h"
 #include "respa.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
 #define EWALD_F   1.12837917
 #define EWALD_P   0.3275911
 #define A1        0.254829592
 #define A2       -0.284496736
 #define A3        1.421413741
 #define A4       -1.453152027
 #define A5        1.061405429
 
 /* ---------------------------------------------------------------------- */
 
 PairLJCharmmCoulLongSoft::PairLJCharmmCoulLongSoft(LAMMPS *lmp) : Pair(lmp)
 {
   respa_enable = 1;
   ewaldflag = pppmflag = 1;
   implicit = 0;
   mix_flag = ARITHMETIC;
   writedata = 1;
 }
 
 /* ---------------------------------------------------------------------- */
 
 PairLJCharmmCoulLongSoft::~PairLJCharmmCoulLongSoft()
 {
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
 
     memory->destroy(epsilon);
     memory->destroy(sigma);
     memory->destroy(lambda);
     memory->destroy(eps14);
     memory->destroy(sigma14);
     memory->destroy(lj1);
     memory->destroy(lj2);
     memory->destroy(lj3);
     memory->destroy(lj4);
     memory->destroy(lj14_1);
     memory->destroy(lj14_2);
     memory->destroy(lj14_3);
     memory->destroy(lj14_4);
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCharmmCoulLongSoft::compute(int eflag, int vflag)
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
   double r,rsq,forcecoul,forcelj,factor_coul,factor_lj;
   double grij,expm2,prefactor,t,erfc;
   double philj,switch1,switch2;
   double denc, denlj, r4sig6;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   evdwl = ecoul = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cut_bothsq) {
 
         if (rsq < cut_coulsq) {
           r = sqrt(rsq);
           grij = g_ewald * r;
           expm2 = exp(-grij*grij);
           t = 1.0 / (1.0 + EWALD_P*grij);
           erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
 
           denc = sqrt(lj4[itype][jtype] + rsq);
           prefactor = qqrd2e * lj1[itype][jtype] * qtmp*q[j] / (denc*denc*denc);
 
           forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
           if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
         } else forcecoul = 0.0;
 
         if (rsq < cut_ljsq) {
           r4sig6 = rsq*rsq / lj2[itype][jtype];
           denlj = lj3[itype][jtype] + rsq*r4sig6;
           forcelj = lj1[itype][jtype] * epsilon[itype][jtype] * 
             (48.0*r4sig6/(denlj*denlj*denlj) - 24.0*r4sig6/(denlj*denlj));
 
           if (rsq > cut_lj_innersq) {
             switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) *
               (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj;
             switch2 = 12.0 * (cut_ljsq-rsq) * (rsq-cut_lj_innersq) / denom_lj;
             philj = lj1[itype][jtype] * 4.0 * epsilon[itype][jtype] * 
               (1.0/(denlj*denlj) - 1.0/denlj);
             forcelj = forcelj*switch1 + philj*switch2;
           }
         } else forcelj = 0.0;
 
         fpair = forcecoul + factor_lj*forcelj;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
         if (newton_pair || j < nlocal) {
           f[j][0] -= delx*fpair;
           f[j][1] -= dely*fpair;
           f[j][2] -= delz*fpair;
         }
 
         if (eflag) {
           if (rsq < cut_coulsq) {
             prefactor = qqrd2e * lj1[itype][jtype] * qtmp*q[j] / denc;
             ecoul = prefactor*erfc;
             if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
           } else ecoul = 0.0;
 
           if (rsq < cut_ljsq) {
             evdwl = lj1[itype][jtype] * 4.0 * epsilon[itype][jtype] * 
               (1.0/(denlj*denlj) - 1.0/denlj);
             if (rsq > cut_lj_innersq) {
               switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) *
                 (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj;
               evdwl *= switch1;
             }
             evdwl *= factor_lj;
           } else evdwl = 0.0;
         }
 
         if (evflag) ev_tally(i,j,nlocal,newton_pair,
                              evdwl,ecoul,fpair,delx,dely,delz);
       }
     }
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCharmmCoulLongSoft::compute_inner()
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,fpair;
   double rsq,forcecoul,forcelj,factor_coul,factor_lj;
   double rsw;
   double denc, denlj, r4sig6;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
   inum = listinner->inum;
   ilist = listinner->ilist;
   numneigh = listinner->numneigh;
   firstneigh = listinner->firstneigh;
 
   double cut_out_on = cut_respa[0];
   double cut_out_off = cut_respa[1];
 
   double cut_out_diff = cut_out_off - cut_out_on;
   double cut_out_on_sq = cut_out_on*cut_out_on;
   double cut_out_off_sq = cut_out_off*cut_out_off;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
 
       if (rsq < cut_out_off_sq) {
         jtype = type[j];
 
         denc = sqrt(lj4[itype][jtype] + rsq);
         forcecoul = qqrd2e * lj1[itype][jtype] * qtmp*q[j] / (denc*denc*denc);
         if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*forcecoul;
 
         r4sig6 = rsq*rsq / lj2[itype][jtype];
         denlj = lj3[itype][jtype] + rsq*r4sig6;
         forcelj = lj1[itype][jtype] * epsilon[itype][jtype] * 
           (48.0*r4sig6/(denlj*denlj*denlj) - 24.0*r4sig6/(denlj*denlj));
 
         fpair = forcecoul + factor_lj*forcelj;
 
         if (rsq > cut_out_on_sq) {
           rsw = (sqrt(rsq) - cut_out_on)/cut_out_diff;
           fpair *= 1.0 + rsw*rsw*(2.0*rsw-3.0);
         }
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
         if (newton_pair || j < nlocal) {
           f[j][0] -= delx*fpair;
           f[j][1] -= dely*fpair;
           f[j][2] -= delz*fpair;
         }
       }
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCharmmCoulLongSoft::compute_middle()
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,fpair;
   double rsq,forcecoul,forcelj,factor_coul,factor_lj;
   double philj,switch1,switch2;
   double rsw;
   double denc, denlj, r4sig6;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
   inum = listmiddle->inum;
   ilist = listmiddle->ilist;
   numneigh = listmiddle->numneigh;
   firstneigh = listmiddle->firstneigh;
 
   double cut_in_off = cut_respa[0];
   double cut_in_on = cut_respa[1];
   double cut_out_on = cut_respa[2];
   double cut_out_off = cut_respa[3];
 
   double cut_in_diff = cut_in_on - cut_in_off;
   double cut_out_diff = cut_out_off - cut_out_on;
   double cut_in_off_sq = cut_in_off*cut_in_off;
   double cut_in_on_sq = cut_in_on*cut_in_on;
   double cut_out_on_sq = cut_out_on*cut_out_on;
   double cut_out_off_sq = cut_out_off*cut_out_off;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
 
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
 
       if (rsq < cut_out_off_sq && rsq > cut_in_off_sq) {
         jtype = type[j];
 
         denc = sqrt(lj4[itype][jtype] + rsq);
         forcecoul = qqrd2e * lj1[itype][jtype] * qtmp*q[j] / (denc*denc*denc);
         if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*forcecoul;
 
         r4sig6 = rsq*rsq / lj2[itype][jtype];
         denlj = lj3[itype][jtype] + rsq*r4sig6;
         forcelj = lj1[itype][jtype] * epsilon[itype][jtype] * 
           (48.0*r4sig6/(denlj*denlj*denlj) - 24.0*r4sig6/(denlj*denlj));
 
         if (rsq > cut_lj_innersq) {
           switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) *
             (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj;
           switch2 = 12.0 * (cut_ljsq-rsq) * (rsq-cut_lj_innersq) / denom_lj;
           philj = lj1[itype][jtype] * 4.0 * epsilon[itype][jtype] * 
               (1.0/(denlj*denlj) - 1.0/denlj);
           forcelj = forcelj*switch1 + philj*switch2;
         }
 
         fpair = forcecoul + factor_lj*forcelj;
 
         if (rsq < cut_in_on_sq) {
           rsw = (sqrt(rsq) - cut_in_off)/cut_in_diff;
           fpair *= rsw*rsw*(3.0 - 2.0*rsw);
         }
         if (rsq > cut_out_on_sq) {
           rsw = (sqrt(rsq) - cut_out_on)/cut_out_diff;
           fpair *= 1.0 + rsw*rsw*(2.0*rsw - 3.0);
         }
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
         if (newton_pair || j < nlocal) {
           f[j][0] -= delx*fpair;
           f[j][1] -= dely*fpair;
           f[j][2] -= delz*fpair;
         }
       }
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCharmmCoulLongSoft::compute_outer(int eflag, int vflag)
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
   double r,rsq,forcecoul,forcelj,factor_coul,factor_lj;
   double grij,expm2,fprefactor,eprefactor,t,erfc;
   double philj,switch1,switch2;
   double rsw;
   double denc, denlj, r4sig6;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   evdwl = ecoul = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = 0;
 
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
   inum = listouter->inum;
   ilist = listouter->ilist;
   numneigh = listouter->numneigh;
   firstneigh = listouter->firstneigh;
 
   double cut_in_off = cut_respa[2];
   double cut_in_on = cut_respa[3];
 
   double cut_in_diff = cut_in_on - cut_in_off;
   double cut_in_off_sq = cut_in_off*cut_in_off;
   double cut_in_on_sq = cut_in_on*cut_in_on;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
 
       if (rsq < cut_bothsq) {
         jtype = type[j];
 
         if (rsq < cut_coulsq) {
           r = sqrt(rsq);
           grij = g_ewald * r;
           expm2 = exp(-grij*grij);
           t = 1.0 / (1.0 + EWALD_P*grij);
           erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
 
           denc = sqrt(lj4[itype][jtype] + rsq);
           fprefactor = qqrd2e * lj1[itype][jtype] * qtmp*q[j] /
             (denc*denc*denc);
 
           forcecoul = fprefactor * (erfc + EWALD_F*grij*expm2 - 1.0);
 
           if (rsq > cut_in_off_sq) {
             if (rsq < cut_in_on_sq) {
               rsw = (r - cut_in_off)/cut_in_diff;
               forcecoul += fprefactor*rsw*rsw*(3.0 - 2.0*rsw);
               if (factor_coul < 1.0)
                 forcecoul -=
                   (1.0-factor_coul)*fprefactor*rsw*rsw*(3.0 - 2.0*rsw);
             } else {
               forcecoul += fprefactor;
               if (factor_coul < 1.0)
                 forcecoul -= (1.0-factor_coul)*fprefactor;
             }
           }
         } else forcecoul = 0.0;
 
         if (rsq < cut_ljsq && rsq > cut_in_off_sq) {
           r4sig6 = rsq*rsq / lj2[itype][jtype];
           denlj = lj3[itype][jtype] + rsq*r4sig6;
           forcelj = lj1[itype][jtype] * epsilon[itype][jtype] * 
             (48.0*r4sig6/(denlj*denlj*denlj) - 24.0*r4sig6/(denlj*denlj));
 
           if (rsq > cut_lj_innersq) {
             switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) *
               (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj;
             switch2 = 12.0 * (cut_ljsq-rsq) * (rsq-cut_lj_innersq) / denom_lj;
             philj = lj1[itype][jtype] * 4.0 * epsilon[itype][jtype] * 
               (1.0/(denlj*denlj) - 1.0/denlj);
             forcelj = forcelj*switch1 + philj*switch2;
           }
           if (rsq < cut_in_on_sq) {
             rsw = (sqrt(rsq) - cut_in_off)/cut_in_diff;
             forcelj *= rsw*rsw*(3.0 - 2.0*rsw);
           }
         } else forcelj = 0.0;
 
         fpair = forcecoul + forcelj;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
         if (newton_pair || j < nlocal) {
           f[j][0] -= delx*fpair;
           f[j][1] -= dely*fpair;
           f[j][2] -= delz*fpair;
         }
 
         if (eflag) {
           if (rsq < cut_coulsq) {
             eprefactor = qqrd2e * lj1[itype][jtype] * qtmp*q[j] / denc;
             ecoul = eprefactor*erfc;
             if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*eprefactor;
           } else ecoul = 0.0;
 
           if (rsq < cut_ljsq) {
             evdwl = lj1[itype][jtype] * 4.0 * epsilon[itype][jtype] * 
               (1.0/(denlj*denlj) - 1.0/denlj);
             if (rsq > cut_lj_innersq) {
               switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) *
                 (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj;
               evdwl *= switch1;
             }
             evdwl *= factor_lj;
           } else evdwl = 0.0;
         }
 
         if (vflag) {
           if (rsq < cut_coulsq) {
             forcecoul = fprefactor * (erfc + EWALD_F*grij*expm2);
             if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*fprefactor;
           } else forcecoul = 0.0;
 
           if (rsq <= cut_in_off_sq) {
             r4sig6 = rsq*rsq / lj2[itype][jtype];
             denlj = lj3[itype][jtype] + rsq*r4sig6;
             forcelj = lj1[itype][jtype] * epsilon[itype][jtype] * 
               (48.0*r4sig6/(denlj*denlj*denlj) - 24.0*r4sig6/(denlj*denlj));
 
             if (rsq > cut_lj_innersq) {
               switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) *
                 (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj;
               switch2 = 12.0 * (cut_ljsq-rsq) * (rsq-cut_lj_innersq) / denom_lj;
               philj = lj1[itype][jtype] * 4.0 * epsilon[itype][jtype] * 
                 (1.0/(denlj*denlj) - 1.0/denlj);
               forcelj = forcelj*switch1 + philj*switch2;
             }
           } else if (rsq <= cut_in_on_sq) {
             forcelj = lj1[itype][jtype] * epsilon[itype][jtype] * 
               (48.0*r4sig6/(denlj*denlj*denlj) - 24.0*r4sig6/(denlj*denlj));
             if (rsq > cut_lj_innersq) {
               switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) *
                 (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj;
               switch2 = 12.0 * (cut_ljsq-rsq) * (rsq-cut_lj_innersq) / denom_lj;
               philj = lj1[itype][jtype] * 4.0 * epsilon[itype][jtype] * 
                 (1.0/(denlj*denlj) - 1.0/denlj);
               forcelj = forcelj*switch1 + philj*switch2;
             }
           }
           fpair = forcecoul + factor_lj*forcelj;
         }
 
         if (evflag) ev_tally(i,j,nlocal,newton_pair,
                              evdwl,ecoul,fpair,delx,dely,delz);
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairLJCharmmCoulLongSoft::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
   memory->create(epsilon,n+1,n+1,"pair:epsilon");
   memory->create(sigma,n+1,n+1,"pair:sigma");
   memory->create(lambda,n+1,n+1,"pair:lambda");
   memory->create(eps14,n+1,n+1,"pair:eps14");
   memory->create(sigma14,n+1,n+1,"pair:sigma14");
   memory->create(lj1,n+1,n+1,"pair:lj1");
   memory->create(lj2,n+1,n+1,"pair:lj2");
   memory->create(lj3,n+1,n+1,"pair:lj3");
   memory->create(lj4,n+1,n+1,"pair:lj4");
   memory->create(lj14_1,n+1,n+1,"pair:lj14_1");
   memory->create(lj14_2,n+1,n+1,"pair:lj14_2");
   memory->create(lj14_3,n+1,n+1,"pair:lj14_3");
   memory->create(lj14_4,n+1,n+1,"pair:lj14_4");
 }
 
 /* ----------------------------------------------------------------------
    global settings
    unlike other pair styles,
      there are no individual pair settings that these override
 ------------------------------------------------------------------------- */
 
 void PairLJCharmmCoulLongSoft::settings(int narg, char **arg)
 {
   if (narg != 5 && narg != 6) error->all(FLERR,"Illegal pair_style command");
 
   nlambda = force->numeric(FLERR,arg[0]);
   alphalj = force->numeric(FLERR,arg[1]);
   alphac  = force->numeric(FLERR,arg[2]);
 
   cut_lj_inner = force->numeric(FLERR,arg[3]);
   cut_lj = force->numeric(FLERR,arg[4]);
   if (narg == 5) cut_coul = cut_lj;
   else cut_coul = force->numeric(FLERR,arg[5]);
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairLJCharmmCoulLongSoft::coeff(int narg, char **arg)
 {
   if (narg != 5 && narg != 7) error->all(FLERR,"Illegal pair_coeff command");
   if (!allocated) allocate();
 
   int ilo,ihi,jlo,jhi;
   force->bounds(arg[0],atom->ntypes,ilo,ihi);
   force->bounds(arg[1],atom->ntypes,jlo,jhi);
 
   double epsilon_one = force->numeric(FLERR,arg[2]);
   double sigma_one = force->numeric(FLERR,arg[3]);
   double lambda_one = force->numeric(FLERR,arg[4]);
 
   double eps14_one = epsilon_one;
   double sigma14_one = sigma_one;
   if (narg == 7) {
     eps14_one = force->numeric(FLERR,arg[5]);
     sigma14_one = force->numeric(FLERR,arg[6]);
   }
 
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
     for (int j = MAX(jlo,i); j <= jhi; j++) {
       epsilon[i][j] = epsilon_one;
       sigma[i][j] = sigma_one;
       lambda[i][j] = lambda_one;
       eps14[i][j] = eps14_one;
       sigma14[i][j] = sigma14_one;
       setflag[i][j] = 1;
       count++;
     }
   }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairLJCharmmCoulLongSoft::init_style()
 {
   if (!atom->q_flag)
     error->all(FLERR,
                "Pair style lj/charmm/coul/long/soft requires atom attribute q");
 
   // request regular or rRESPA neighbor lists
 
   int irequest;
 
   if (update->whichflag == 1 && strstr(update->integrate_style,"respa")) {
     int respa = 0;
     if (((Respa *) update->integrate)->level_inner >= 0) respa = 1;
     if (((Respa *) update->integrate)->level_middle >= 0) respa = 2;
 
-    if (respa == 0) irequest = neighbor->request(this);
+    if (respa == 0) irequest = neighbor->request(this,instance_me);
     else if (respa == 1) {
-      irequest = neighbor->request(this);
+      irequest = neighbor->request(this,instance_me);
       neighbor->requests[irequest]->id = 1;
       neighbor->requests[irequest]->half = 0;
       neighbor->requests[irequest]->respainner = 1;
-      irequest = neighbor->request(this);
+      irequest = neighbor->request(this,instance_me);
       neighbor->requests[irequest]->id = 3;
       neighbor->requests[irequest]->half = 0;
       neighbor->requests[irequest]->respaouter = 1;
     } else {
-      irequest = neighbor->request(this);
+      irequest = neighbor->request(this,instance_me);
       neighbor->requests[irequest]->id = 1;
       neighbor->requests[irequest]->half = 0;
       neighbor->requests[irequest]->respainner = 1;
-      irequest = neighbor->request(this);
+      irequest = neighbor->request(this,instance_me);
       neighbor->requests[irequest]->id = 2;
       neighbor->requests[irequest]->half = 0;
       neighbor->requests[irequest]->respamiddle = 1;
-      irequest = neighbor->request(this);
+      irequest = neighbor->request(this,instance_me);
       neighbor->requests[irequest]->id = 3;
       neighbor->requests[irequest]->half = 0;
       neighbor->requests[irequest]->respaouter = 1;
     }
 
-  } else irequest = neighbor->request(this);
+  } else irequest = neighbor->request(this,instance_me);
 
   // require cut_lj_inner < cut_lj
 
   if (cut_lj_inner >= cut_lj)
     error->all(FLERR,"Pair inner cutoff >= Pair outer cutoff");
 
   cut_lj_innersq = cut_lj_inner * cut_lj_inner;
   cut_ljsq = cut_lj * cut_lj;
   cut_coulsq = cut_coul * cut_coul;
   cut_bothsq = MAX(cut_ljsq,cut_coulsq);
 
   denom_lj = (cut_ljsq-cut_lj_innersq) * (cut_ljsq-cut_lj_innersq) *
     (cut_ljsq-cut_lj_innersq);
 
   // set & error check interior rRESPA cutoffs
 
   if (strstr(update->integrate_style,"respa") &&
       ((Respa *) update->integrate)->level_inner >= 0) {
     cut_respa = ((Respa *) update->integrate)->cutoff;
     if (MIN(cut_lj,cut_coul) < cut_respa[3])
       error->all(FLERR,"Pair cutoff < Respa interior cutoff");
     if (cut_lj_inner < cut_respa[1])
       error->all(FLERR,"Pair inner cutoff < Respa interior cutoff");
   } else cut_respa = NULL;
 
   // insure use of KSpace long-range solver, set g_ewald
 
   if (force->kspace == NULL)
     error->all(FLERR,"Pair style requires a KSpace style");
   g_ewald = force->kspace->g_ewald;
 }
 
 /* ----------------------------------------------------------------------
    neighbor callback to inform pair style of neighbor list to use
    regular or rRESPA
 ------------------------------------------------------------------------- */
 
 void PairLJCharmmCoulLongSoft::init_list(int id, NeighList *ptr)
 {
   if (id == 0) list = ptr;
   else if (id == 1) listinner = ptr;
   else if (id == 2) listmiddle = ptr;
   else if (id == 3) listouter = ptr;
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairLJCharmmCoulLongSoft::init_one(int i, int j)
 {
   if (setflag[i][j] == 0) {
     epsilon[i][j] = mix_energy(epsilon[i][i],epsilon[j][j],
                                sigma[i][i],sigma[j][j]);
     sigma[i][j] = mix_distance(sigma[i][i],sigma[j][j]);
     if (lambda[i][i] != lambda[j][j])
       error->all(FLERR,"Pair lj/charmm/coul/long/soft different lambda values in mix");
     lambda[i][j] = lambda[i][i];
     eps14[i][j] = mix_energy(eps14[i][i],eps14[j][j],
                                sigma14[i][i],sigma14[j][j]);
     sigma14[i][j] = mix_distance(sigma14[i][i],sigma14[j][j]);
   }
 
   double cut = MAX(cut_lj,cut_coul);
 
   lj1[i][j] = pow(lambda[i][j], nlambda);
   lj2[i][j] = pow(sigma[i][j], 6.0);
   lj3[i][j] = alphalj * (1.0 - lambda[i][j])*(1.0 - lambda[i][j]);
   lj4[i][j] = alphac  * (1.0 - lambda[i][j])*(1.0 - lambda[i][j]);
 
   // 1-4 interactions unaffected (they're part of the dihedral term)
   lj14_1[i][j] = 48.0 * eps14[i][j] * pow(sigma14[i][j],12.0);
   lj14_2[i][j] = 24.0 * eps14[i][j] * pow(sigma14[i][j],6.0);
   lj14_3[i][j] = 4.0 * eps14[i][j] * pow(sigma14[i][j],12.0);
   lj14_4[i][j] = 4.0 * eps14[i][j] * pow(sigma14[i][j],6.0);
 
   epsilon[j][i] = epsilon[i][j];
   sigma[j][i] = sigma[i][j];
   lambda[j][i] = lambda[i][j];
   lj1[j][i] = lj1[i][j];
   lj2[j][i] = lj2[i][j];
   lj3[j][i] = lj3[i][j];
   lj4[j][i] = lj4[i][j];
   lj14_1[j][i] = lj14_1[i][j];
   lj14_2[j][i] = lj14_2[i][j];
   lj14_3[j][i] = lj14_3[i][j];
   lj14_4[j][i] = lj14_4[i][j];
 
   return cut;
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairLJCharmmCoulLongSoft::write_restart(FILE *fp)
 {
   write_restart_settings(fp);
 
   int i,j;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       fwrite(&setflag[i][j],sizeof(int),1,fp);
       if (setflag[i][j]) {
         fwrite(&epsilon[i][j],sizeof(double),1,fp);
         fwrite(&sigma[i][j],sizeof(double),1,fp);
         fwrite(&lambda[i][j],sizeof(double),1,fp);
         fwrite(&eps14[i][j],sizeof(double),1,fp);
         fwrite(&sigma14[i][j],sizeof(double),1,fp);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairLJCharmmCoulLongSoft::read_restart(FILE *fp)
 {
   read_restart_settings(fp);
 
   allocate();
 
   int i,j;
   int me = comm->me;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
       MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
       if (setflag[i][j]) {
         if (me == 0) {
           fread(&epsilon[i][j],sizeof(double),1,fp);
           fread(&sigma[i][j],sizeof(double),1,fp);
           fread(&lambda[i][j],sizeof(double),1,fp);
           fread(&eps14[i][j],sizeof(double),1,fp);
           fread(&sigma14[i][j],sizeof(double),1,fp);
         }
         MPI_Bcast(&epsilon[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&sigma[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&lambda[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&eps14[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&sigma14[i][j],1,MPI_DOUBLE,0,world);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairLJCharmmCoulLongSoft::write_restart_settings(FILE *fp)
 {
   fwrite(&nlambda,sizeof(double),1,fp);
   fwrite(&alphalj,sizeof(double),1,fp);
   fwrite(&alphac,sizeof(double),1,fp);
 
   fwrite(&cut_lj_inner,sizeof(double),1,fp);
   fwrite(&cut_lj,sizeof(double),1,fp);
   fwrite(&cut_coul,sizeof(double),1,fp);
   fwrite(&offset_flag,sizeof(int),1,fp);
   fwrite(&mix_flag,sizeof(int),1,fp);
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairLJCharmmCoulLongSoft::read_restart_settings(FILE *fp)
 {
   if (comm->me == 0) {
     fread(&nlambda,sizeof(double),1,fp);
     fread(&alphalj,sizeof(double),1,fp);
     fread(&alphac,sizeof(double),1,fp);
 
     fread(&cut_lj_inner,sizeof(double),1,fp);
     fread(&cut_lj,sizeof(double),1,fp);
     fread(&cut_coul,sizeof(double),1,fp);
     fread(&offset_flag,sizeof(int),1,fp);
     fread(&mix_flag,sizeof(int),1,fp);
   }
 
   MPI_Bcast(&nlambda,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&alphalj,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&alphac,1,MPI_DOUBLE,0,world);
 
   MPI_Bcast(&cut_lj_inner,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&cut_lj,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&cut_coul,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&offset_flag,1,MPI_INT,0,world);
   MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
 }
 
 
 /* ----------------------------------------------------------------------
    proc 0 writes to data file
 ------------------------------------------------------------------------- */
 
 void PairLJCharmmCoulLongSoft::write_data(FILE *fp)
 {
   for (int i = 1; i <= atom->ntypes; i++)
     fprintf(fp,"%d %g %g %g %g %g\n",i,epsilon[i][i],sigma[i][i],
             lambda[i][i],eps14[i][i],sigma14[i][i]);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes all pairs to data file
 ------------------------------------------------------------------------- */
 
 void PairLJCharmmCoulLongSoft::write_data_all(FILE *fp)
 {
   for (int i = 1; i <= atom->ntypes; i++)
     for (int j = i; j <= atom->ntypes; j++)
       fprintf(fp,"%d %d %g %g %g %g %g\n",i,j,epsilon[i][j],sigma[i][j],
               lambda[i][j],eps14[i][j],sigma14[i][j]);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairLJCharmmCoulLongSoft::single(int i, int j, int itype, int jtype,
                                         double rsq,
                                         double factor_coul, double factor_lj,
                                         double &fforce)
 {
   double r,grij,expm2,t,erfc,prefactor;
   double switch1,switch2,forcecoul,forcelj,phicoul,philj;
   double denc, denlj, r4sig6;
 
   if (rsq < cut_coulsq) {
     r = sqrt(rsq);
     grij = g_ewald * r;
     expm2 = exp(-grij*grij);
     t = 1.0 / (1.0 + EWALD_P*grij);
     erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
 
     denc = sqrt(lj4[itype][jtype] + rsq);
     prefactor = force->qqrd2e * lj1[itype][jtype] * atom->q[i]*atom->q[j] /
       (denc*denc*denc);
 
     forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
     if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
   } else forcecoul = 0.0;
 
   if (rsq < cut_ljsq) {
     r4sig6 = rsq*rsq / lj2[itype][jtype];
     denlj = lj3[itype][jtype] + rsq*r4sig6;
     forcelj = lj1[itype][jtype] * epsilon[itype][jtype] * 
       (48.0*r4sig6/(denlj*denlj*denlj) - 24.0*r4sig6/(denlj*denlj));
     if (rsq > cut_lj_innersq) {
       switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) *
         (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj;
       switch2 = 12.0 * (cut_ljsq-rsq) * (rsq-cut_lj_innersq) / denom_lj;
       philj = lj1[itype][jtype] * 4.0 * epsilon[itype][jtype] * 
         (1.0/(denlj*denlj) - 1.0/denlj);
       forcelj = forcelj*switch1 + philj*switch2;
     }
   } else forcelj = 0.0;
   fforce = forcecoul + factor_lj*forcelj;
 
   double eng = 0.0;
   if (rsq < cut_coulsq) {
     prefactor = force->qqrd2e * lj1[itype][jtype] * atom->q[i]*atom->q[j] / denc;
     phicoul = prefactor*erfc;
     if (factor_coul < 1.0) phicoul -= (1.0-factor_coul)*prefactor;
     eng += phicoul;
   }
 
   if (rsq < cut_ljsq) {
     philj = lj1[itype][jtype] * 4.0 * epsilon[itype][jtype] * 
       (1.0/(denlj*denlj) - 1.0/denlj) - offset[itype][jtype];
     if (rsq > cut_lj_innersq) {
       switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) *
         (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj;
       philj *= switch1;
     }
     eng += factor_lj*philj;
   }
 
   return eng;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void *PairLJCharmmCoulLongSoft::extract(const char *str, int &dim)
 {
   dim = 2;
   if (strcmp(str,"lj14_1") == 0) return (void *) lj14_1;
   if (strcmp(str,"lj14_2") == 0) return (void *) lj14_2;
   if (strcmp(str,"lj14_3") == 0) return (void *) lj14_3;
   if (strcmp(str,"lj14_4") == 0) return (void *) lj14_4;
 
   if (strcmp(str,"epsilon") == 0) return (void *) epsilon;
   if (strcmp(str,"sigma") == 0) return (void *) sigma;
   if (strcmp(str,"lambda") == 0) return (void *) lambda;
 
   dim = 0;
   if (strcmp(str,"implicit") == 0) return (void *) &implicit;
   if (strcmp(str,"cut_coul") == 0) return (void *) &cut_coul;
 
   return NULL;
 }
diff --git a/src/USER-FEP/pair_lj_cut_coul_cut_soft.cpp b/src/USER-FEP/pair_lj_cut_coul_cut_soft.cpp
index ac2e4b4af..bf903072a 100644
--- a/src/USER-FEP/pair_lj_cut_coul_cut_soft.cpp
+++ b/src/USER-FEP/pair_lj_cut_coul_cut_soft.cpp
@@ -1,514 +1,514 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Soft-core version: Agilio Padua (Univ Blaise Pascal & CNRS)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_lj_cut_coul_cut_soft.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "math_const.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
 /* ---------------------------------------------------------------------- */
 
 PairLJCutCoulCutSoft::PairLJCutCoulCutSoft(LAMMPS *lmp) : Pair(lmp)
 {
   writedata = 1;
 }
 
 /* ---------------------------------------------------------------------- */
 
 PairLJCutCoulCutSoft::~PairLJCutCoulCutSoft()
 {
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
 
     memory->destroy(cut_lj);
     memory->destroy(cut_ljsq);
     memory->destroy(cut_coul);
     memory->destroy(cut_coulsq);
     memory->destroy(epsilon);
     memory->destroy(sigma);
     memory->destroy(lambda);
     memory->destroy(lj1);
     memory->destroy(lj2);
     memory->destroy(lj3);
     memory->destroy(lj4);
     memory->destroy(offset);
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCutCoulCutSoft::compute(int eflag, int vflag)
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
   double rsq,forcecoul,forcelj,factor_coul,factor_lj;
   double denc, denlj, r4sig6;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   evdwl = ecoul = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
 
         if (rsq < cut_coulsq[itype][jtype]) {
           denc = sqrt(lj4[itype][jtype] + rsq);
           forcecoul = qqrd2e * lj1[itype][jtype] * qtmp*q[j] / (denc*denc*denc);
         } else forcecoul = 0.0;
         
         if (rsq < cut_ljsq[itype][jtype]) {
           r4sig6 = rsq*rsq / lj2[itype][jtype];
           denlj = lj3[itype][jtype] + rsq*r4sig6;
           forcelj = lj1[itype][jtype] * epsilon[itype][jtype] * 
             (48.0*r4sig6/(denlj*denlj*denlj) - 24.0*r4sig6/(denlj*denlj));
         } else forcelj = 0.0;
 
         fpair = factor_coul*forcecoul + factor_lj*forcelj;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
         if (newton_pair || j < nlocal) {
           f[j][0] -= delx*fpair;
           f[j][1] -= dely*fpair;
           f[j][2] -= delz*fpair;
         }
 
         if (eflag) {
           if (rsq < cut_coulsq[itype][jtype])
             ecoul = factor_coul * qqrd2e * lj1[itype][jtype] * qtmp*q[j] / denc;
           else ecoul = 0.0;
           if (rsq < cut_ljsq[itype][jtype]) {
             evdwl = lj1[itype][jtype] * 4.0 * epsilon[itype][jtype] * 
               (1.0/(denlj*denlj) - 1.0/denlj) - offset[itype][jtype];
             evdwl *= factor_lj;
           } else evdwl = 0.0;
         }
 
         if (evflag) ev_tally(i,j,nlocal,newton_pair,
                              evdwl,ecoul,fpair,delx,dely,delz);
       }
     }
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairLJCutCoulCutSoft::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
   memory->create(cut_lj,n+1,n+1,"pair:cut_lj");
   memory->create(cut_ljsq,n+1,n+1,"pair:cut_ljsq");
   memory->create(cut_coul,n+1,n+1,"pair:cut_coul");
   memory->create(cut_coulsq,n+1,n+1,"pair:cut_coulsq");
   memory->create(epsilon,n+1,n+1,"pair:epsilon");
   memory->create(sigma,n+1,n+1,"pair:sigma");
   memory->create(lambda,n+1,n+1,"pair:lambda");
   memory->create(lj1,n+1,n+1,"pair:lj1");
   memory->create(lj2,n+1,n+1,"pair:lj2");
   memory->create(lj3,n+1,n+1,"pair:lj3");
   memory->create(lj4,n+1,n+1,"pair:lj4");
   memory->create(offset,n+1,n+1,"pair:offset");
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairLJCutCoulCutSoft::settings(int narg, char **arg)
 {
   if (narg < 4 || narg > 5) error->all(FLERR,"Illegal pair_style command");
 
   nlambda = force->numeric(FLERR,arg[0]);
   alphalj = force->numeric(FLERR,arg[1]);
   alphac  = force->numeric(FLERR,arg[2]);
 
   cut_lj_global = force->numeric(FLERR,arg[3]);
   if (narg == 4) cut_coul_global = cut_lj_global;
   else cut_coul_global = force->numeric(FLERR,arg[4]);
 
   // reset cutoffs that have been explicitly set
 
   if (allocated) {
     int i,j;
     for (i = 1; i <= atom->ntypes; i++)
       for (j = i+1; j <= atom->ntypes; j++)
         if (setflag[i][j]) {
           cut_lj[i][j] = cut_lj_global;
           cut_coul[i][j] = cut_coul_global;
         }
   }
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairLJCutCoulCutSoft::coeff(int narg, char **arg)
 {
   if (narg < 5 || narg > 7) 
     error->all(FLERR,"Incorrect args for pair coefficients");
   if (!allocated) allocate();
 
   int ilo,ihi,jlo,jhi;
   force->bounds(arg[0],atom->ntypes,ilo,ihi);
   force->bounds(arg[1],atom->ntypes,jlo,jhi);
 
   double epsilon_one = force->numeric(FLERR,arg[2]);
   double sigma_one = force->numeric(FLERR,arg[3]);
   double lambda_one = force->numeric(FLERR,arg[4]);
 
   double cut_lj_one = cut_lj_global;
   double cut_coul_one = cut_coul_global;
   if (narg >= 6) cut_coul_one = cut_lj_one = force->numeric(FLERR,arg[5]);
   if (narg == 7) cut_coul_one = force->numeric(FLERR,arg[6]);
 
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
     for (int j = MAX(jlo,i); j <= jhi; j++) {
       epsilon[i][j] = epsilon_one;
       sigma[i][j] = sigma_one;
       lambda[i][j] = lambda_one;
       cut_lj[i][j] = cut_lj_one;
       cut_coul[i][j] = cut_coul_one;
       setflag[i][j] = 1;
       count++;
     }
   }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairLJCutCoulCutSoft::init_style()
 {
   if (!atom->q_flag)
     error->all(FLERR,"Pair style lj/cut/coul/cut/soft requires atom attribute q");
 
-  neighbor->request(this);
+  neighbor->request(this,instance_me);
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairLJCutCoulCutSoft::init_one(int i, int j)
 {
   if (setflag[i][j] == 0) {
     epsilon[i][j] = mix_energy(epsilon[i][i],epsilon[j][j],
                                sigma[i][i],sigma[j][j]);
     sigma[i][j] = mix_distance(sigma[i][i],sigma[j][j]);
     if (lambda[i][i] != lambda[j][j])
       error->all(FLERR,"Pair lj/cut/coul/cut/soft different lambda values in mix");
     lambda[i][j] = lambda[i][i];
     cut_lj[i][j] = mix_distance(cut_lj[i][i],cut_lj[j][j]);
     cut_coul[i][j] = mix_distance(cut_coul[i][i],cut_coul[j][j]);
   }
 
   double cut = MAX(cut_lj[i][j],cut_coul[i][j]);
   cut_ljsq[i][j] = cut_lj[i][j] * cut_lj[i][j];
   cut_coulsq[i][j] = cut_coul[i][j] * cut_coul[i][j];
 
   lj1[i][j] = pow(lambda[i][j], nlambda);
   lj2[i][j] = pow(sigma[i][j], 6.0);
   lj3[i][j] = alphalj * (1.0 - lambda[i][j])*(1.0 - lambda[i][j]);
   lj4[i][j] = alphac  * (1.0 - lambda[i][j])*(1.0 - lambda[i][j]);
 
   if (offset_flag) {
     double denlj = lj3[i][j] + pow(sigma[i][j] / cut_lj[i][j], 6.0);
     offset[i][j] = lj1[i][j] * 4.0 * epsilon[i][j] * (1.0/(denlj*denlj) - 1.0/denlj);
   } else offset[i][j] = 0.0;
 
   epsilon[j][i] = epsilon[i][j];
   sigma[j][i] = sigma[i][j];
   lambda[j][i] = lambda[i][j];
   cut_ljsq[j][i] = cut_ljsq[i][j];
   cut_coulsq[j][i] = cut_coulsq[i][j];
   lj1[j][i] = lj1[i][j];
   lj2[j][i] = lj2[i][j];
   lj3[j][i] = lj3[i][j];
   lj4[j][i] = lj4[i][j];
   offset[j][i] = offset[i][j];
 
   // compute I,J contribution to long-range tail correction
   // count total # of atoms of type I and J via Allreduce
 
   if (tail_flag) {
     int *type = atom->type;
     int nlocal = atom->nlocal;
 
     double count[2],all[2];
     count[0] = count[1] = 0.0;
     for (int k = 0; k < nlocal; k++) {
       if (type[k] == i) count[0] += 1.0;
       if (type[k] == j) count[1] += 1.0;
     }
     MPI_Allreduce(count,all,2,MPI_DOUBLE,MPI_SUM,world);
 
     double sig2 = sigma[i][j]*sigma[i][j];
     double sig6 = sig2*sig2*sig2;
     double rc3 = cut_lj[i][j]*cut_lj[i][j]*cut_lj[i][j];
     double rc6 = rc3*rc3;
     double rc9 = rc3*rc6;
     etail_ij = 8.0*MY_PI*all[0]*all[1]* lj1[i][j] * epsilon[i][j] *
       sig6 * (sig6 - 3.0*rc6) / (9.0*rc9);
     ptail_ij = 16.0*MY_PI*all[0]*all[1]* lj1[i][j] * epsilon[i][j] *
       sig6 * (2.0*sig6 - 3.0*rc6) / (9.0*rc9);
   }
 
   return cut;
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairLJCutCoulCutSoft::write_restart(FILE *fp)
 {
   write_restart_settings(fp);
 
   int i,j;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       fwrite(&setflag[i][j],sizeof(int),1,fp);
       if (setflag[i][j]) {
         fwrite(&epsilon[i][j],sizeof(double),1,fp);
         fwrite(&sigma[i][j],sizeof(double),1,fp);
         fwrite(&lambda[i][j],sizeof(double),1,fp);
         fwrite(&cut_lj[i][j],sizeof(double),1,fp);
         fwrite(&cut_coul[i][j],sizeof(double),1,fp);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairLJCutCoulCutSoft::read_restart(FILE *fp)
 {
   read_restart_settings(fp);
   allocate();
 
   int i,j;
   int me = comm->me;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
       MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
       if (setflag[i][j]) {
         if (me == 0) {
           fread(&epsilon[i][j],sizeof(double),1,fp);
           fread(&sigma[i][j],sizeof(double),1,fp);
           fread(&lambda[i][j],sizeof(double),1,fp);
           fread(&cut_lj[i][j],sizeof(double),1,fp);
           fread(&cut_coul[i][j],sizeof(double),1,fp);
         }
         MPI_Bcast(&epsilon[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&sigma[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&lambda[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&cut_lj[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&cut_coul[i][j],1,MPI_DOUBLE,0,world);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairLJCutCoulCutSoft::write_restart_settings(FILE *fp)
 {
   fwrite(&nlambda,sizeof(double),1,fp);
   fwrite(&alphalj,sizeof(double),1,fp);
   fwrite(&alphac,sizeof(double),1,fp);
 
   fwrite(&cut_lj_global,sizeof(double),1,fp);
   fwrite(&cut_coul_global,sizeof(double),1,fp);
   fwrite(&offset_flag,sizeof(int),1,fp);
   fwrite(&mix_flag,sizeof(int),1,fp);
   fwrite(&tail_flag,sizeof(int),1,fp);
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairLJCutCoulCutSoft::read_restart_settings(FILE *fp)
 {
   if (comm->me == 0) {
     fread(&nlambda,sizeof(double),1,fp);
     fread(&alphalj,sizeof(double),1,fp);
     fread(&alphac,sizeof(double),1,fp);
 
     fread(&cut_lj_global,sizeof(double),1,fp);
     fread(&cut_coul_global,sizeof(double),1,fp);
     fread(&offset_flag,sizeof(int),1,fp);
     fread(&mix_flag,sizeof(int),1,fp);
     fread(&tail_flag,sizeof(int),1,fp);
   }
 
   MPI_Bcast(&nlambda,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&alphalj,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&alphac,1,MPI_DOUBLE,0,world);
 
   MPI_Bcast(&cut_lj_global,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&cut_coul_global,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&offset_flag,1,MPI_INT,0,world);
   MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
   MPI_Bcast(&tail_flag,1,MPI_INT,0,world);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes to data file
 ------------------------------------------------------------------------- */
 
 void PairLJCutCoulCutSoft::write_data(FILE *fp)
 {
   for (int i = 1; i <= atom->ntypes; i++)
     fprintf(fp,"%d %g %g %g\n",i,epsilon[i][i],sigma[i][i],lambda[i][i]);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes all pairs to data file
 ------------------------------------------------------------------------- */
 
 void PairLJCutCoulCutSoft::write_data_all(FILE *fp)
 {
   for (int i = 1; i <= atom->ntypes; i++)
     for (int j = i; j <= atom->ntypes; j++)
       fprintf(fp,"%d %d %g %g %g %g\n",i,j,epsilon[i][j],sigma[i][j],
               lambda[i][j],cut_lj[i][j]);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairLJCutCoulCutSoft::single(int i, int j, int itype, int jtype,
                                   double rsq,
                                   double factor_coul, double factor_lj,
                                   double &fforce)
 {
   double forcecoul,forcelj,phicoul,philj;
   double denc, denlj, r4sig6;
 
   if (rsq < cut_coulsq[itype][jtype]) {
     denc = sqrt(lj4[itype][jtype] + rsq);
     forcecoul = force->qqrd2e * lj1[itype][jtype] * atom->q[i]*atom->q[j] /
       (denc*denc*denc);
   } else forcecoul = 0.0;
   if (rsq < cut_ljsq[itype][jtype]) {
     r4sig6 = rsq*rsq / lj2[itype][jtype];
     denlj = lj3[itype][jtype] + rsq*r4sig6;
     forcelj = lj1[itype][jtype] * epsilon[itype][jtype] * 
       (48.0*r4sig6/(denlj*denlj*denlj) - 24.0*r4sig6/(denlj*denlj));
   } else forcelj = 0.0;
   fforce = factor_coul*forcecoul + factor_lj*forcelj;
 
   double eng = 0.0;
   if (rsq < cut_coulsq[itype][jtype]) {
     phicoul = force->qqrd2e * lj1[itype][jtype] * atom->q[i]*atom->q[j] / denc;
     eng += factor_coul*phicoul;
   }
   if (rsq < cut_ljsq[itype][jtype]) {
     philj = lj1[itype][jtype] * 4.0 * epsilon[itype][jtype] * 
       (1.0/(denlj*denlj) - 1.0/denlj) - offset[itype][jtype];
     eng += factor_lj*philj;
   }
 
   return eng;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void *PairLJCutCoulCutSoft::extract(const char *str, int &dim)
 {
   dim = 0;
   if (strcmp(str,"cut_coul") == 0) return (void *) &cut_coul;
   dim = 2;
   if (strcmp(str,"epsilon") == 0) return (void *) epsilon;
   if (strcmp(str,"sigma") == 0) return (void *) sigma;
   if (strcmp(str,"lambda") == 0) return (void *) lambda;
   return NULL;
 }
diff --git a/src/USER-FEP/pair_lj_cut_coul_long_soft.cpp b/src/USER-FEP/pair_lj_cut_coul_long_soft.cpp
index 5b6053521..516fdbf89 100644
--- a/src/USER-FEP/pair_lj_cut_coul_long_soft.cpp
+++ b/src/USER-FEP/pair_lj_cut_coul_long_soft.cpp
@@ -1,955 +1,955 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Paul Crozier (SNL)
    Soft-core version: Agilio Padua (Univ Blaise Pascal & CNRS)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_lj_cut_coul_long_soft.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "kspace.h"
 #include "update.h"
 #include "integrate.h"
 #include "respa.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "math_const.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
 #define EWALD_F   1.12837917
 #define EWALD_P   0.3275911
 #define A1        0.254829592
 #define A2       -0.284496736
 #define A3        1.421413741
 #define A4       -1.453152027
 #define A5        1.061405429
 
 /* ---------------------------------------------------------------------- */
 
 PairLJCutCoulLongSoft::PairLJCutCoulLongSoft(LAMMPS *lmp) : Pair(lmp)
 {
   ewaldflag = pppmflag = 1;
   respa_enable = 1;
   writedata = 1;
   qdist = 0.0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 PairLJCutCoulLongSoft::~PairLJCutCoulLongSoft()
 {
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
 
     memory->destroy(cut_lj);
     memory->destroy(cut_ljsq);
     memory->destroy(epsilon);
     memory->destroy(sigma);
     memory->destroy(lambda);
     memory->destroy(lj1);
     memory->destroy(lj2);
     memory->destroy(lj3);
     memory->destroy(lj4);
     memory->destroy(offset);
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCutCoulLongSoft::compute(int eflag, int vflag)
 {
   int i,ii,j,jj,inum,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
   double r,rsq,forcecoul,forcelj,factor_coul,factor_lj;
   double grij,expm2,prefactor,t,erfc;
   double denc, denlj, r4sig6;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   evdwl = ecoul = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
 
         if (rsq < cut_coulsq) {
           r = sqrt(rsq);
           grij = g_ewald * r;
           expm2 = exp(-grij*grij);
           t = 1.0 / (1.0 + EWALD_P*grij);
           erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
           
           denc = sqrt(lj4[itype][jtype] + rsq);
           prefactor = qqrd2e * lj1[itype][jtype] * qtmp*q[j] / (denc*denc*denc);
           
           forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
           if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
         } else forcecoul = 0.0;
 
         if (rsq < cut_ljsq[itype][jtype]) {
           r4sig6 = rsq*rsq / lj2[itype][jtype];
           denlj = lj3[itype][jtype] + rsq*r4sig6;
           forcelj = lj1[itype][jtype] * epsilon[itype][jtype] * 
             (48.0*r4sig6/(denlj*denlj*denlj) - 24.0*r4sig6/(denlj*denlj));
         } else forcelj = 0.0;
 
         fpair = forcecoul + factor_lj*forcelj;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
         if (newton_pair || j < nlocal) {
           f[j][0] -= delx*fpair;
           f[j][1] -= dely*fpair;
           f[j][2] -= delz*fpair;
         }
 
         if (eflag) {
           if (rsq < cut_coulsq) {
             prefactor = qqrd2e * lj1[itype][jtype] * qtmp*q[j] / denc;
             ecoul = prefactor*erfc;
             if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
           } else ecoul = 0.0;
 
           if (rsq < cut_ljsq[itype][jtype]) {
             evdwl = lj1[itype][jtype] * 4.0 * epsilon[itype][jtype] * 
               (1.0/(denlj*denlj) - 1.0/denlj) - offset[itype][jtype];
             evdwl *= factor_lj;
           } else evdwl = 0.0;
         }
 
         if (evflag) ev_tally(i,j,nlocal,newton_pair,
                              evdwl,ecoul,fpair,delx,dely,delz);
       }
     }
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCutCoulLongSoft::compute_inner()
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,fpair;
   double rsq,forcecoul,forcelj,factor_coul,factor_lj;
   double rsw;
   double denc, denlj, r4sig6;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
   inum = listinner->inum;
   ilist = listinner->ilist;
   numneigh = listinner->numneigh;
   firstneigh = listinner->firstneigh;
 
   double cut_out_on = cut_respa[0];
   double cut_out_off = cut_respa[1];
 
   double cut_out_diff = cut_out_off - cut_out_on;
   double cut_out_on_sq = cut_out_on*cut_out_on;
   double cut_out_off_sq = cut_out_off*cut_out_off;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
 
       if (rsq < cut_out_off_sq) {
         jtype = type[j];
 
         denc = sqrt(lj4[itype][jtype] + rsq);
         forcecoul = qqrd2e * lj1[itype][jtype] * qtmp*q[j] / (denc*denc*denc);
         if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*forcecoul;
 
         if (rsq < cut_ljsq[itype][jtype]) {
           r4sig6 = rsq*rsq / lj2[itype][jtype];
           denlj = lj3[itype][jtype] + rsq*r4sig6;
           forcelj = lj1[itype][jtype] * epsilon[itype][jtype] * 
             (48.0*r4sig6/(denlj*denlj*denlj) - 24.0*r4sig6/(denlj*denlj));
         } else forcelj = 0.0;
 
         fpair = forcecoul + factor_lj*forcelj;
 
         if (rsq > cut_out_on_sq) {
           rsw = (sqrt(rsq) - cut_out_on)/cut_out_diff;
           fpair  *= 1.0 + rsw*rsw*(2.0*rsw-3.0);
         }
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
         if (newton_pair || j < nlocal) {
           f[j][0] -= delx*fpair;
           f[j][1] -= dely*fpair;
           f[j][2] -= delz*fpair;
         }
       }
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCutCoulLongSoft::compute_middle()
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,fpair;
   double rsq,forcecoul,forcelj,factor_coul,factor_lj;
   double rsw;
   double denc, denlj, r4sig6;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
   inum = listmiddle->inum;
   ilist = listmiddle->ilist;
   numneigh = listmiddle->numneigh;
   firstneigh = listmiddle->firstneigh;
 
   double cut_in_off = cut_respa[0];
   double cut_in_on = cut_respa[1];
   double cut_out_on = cut_respa[2];
   double cut_out_off = cut_respa[3];
 
   double cut_in_diff = cut_in_on - cut_in_off;
   double cut_out_diff = cut_out_off - cut_out_on;
   double cut_in_off_sq = cut_in_off*cut_in_off;
   double cut_in_on_sq = cut_in_on*cut_in_on;
   double cut_out_on_sq = cut_out_on*cut_out_on;
   double cut_out_off_sq = cut_out_off*cut_out_off;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
 
       if (rsq < cut_out_off_sq && rsq > cut_in_off_sq) {
         jtype = type[j];
 
         denc = sqrt(lj4[itype][jtype] + rsq);
         forcecoul = qqrd2e * lj1[itype][jtype] * qtmp*q[j] / (denc*denc*denc);
         if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*forcecoul;
 
         if (rsq < cut_ljsq[itype][jtype]) {
           r4sig6 = rsq*rsq / lj2[itype][jtype];
           denlj = lj3[itype][jtype] + rsq*r4sig6;
           forcelj = lj1[itype][jtype] * epsilon[itype][jtype] * 
             (48.0*r4sig6/(denlj*denlj*denlj) - 24.0*r4sig6/(denlj*denlj));
         } else forcelj = 0.0;
 
         fpair = forcecoul + factor_lj*forcelj;
 
         if (rsq < cut_in_on_sq) {
           rsw = (sqrt(rsq) - cut_in_off)/cut_in_diff;
           fpair *= rsw*rsw*(3.0 - 2.0*rsw);
         }
         if (rsq > cut_out_on_sq) {
           rsw = (sqrt(rsq) - cut_out_on)/cut_out_diff;
           fpair *= 1.0 + rsw*rsw*(2.0*rsw - 3.0);
         }
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
         if (newton_pair || j < nlocal) {
           f[j][0] -= delx*fpair;
           f[j][1] -= dely*fpair;
           f[j][2] -= delz*fpair;
         }
       }
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCutCoulLongSoft::compute_outer(int eflag, int vflag)
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
   double r,rsq,forcecoul,forcelj,factor_coul,factor_lj;
   double grij,expm2,fprefactor,eprefactor,t,erfc;
   double rsw;
   double denc, denlj, r4sig6;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   evdwl = ecoul = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = 0;
 
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
   inum = listouter->inum;
   ilist = listouter->ilist;
   numneigh = listouter->numneigh;
   firstneigh = listouter->firstneigh;
 
   double cut_in_off = cut_respa[2];
   double cut_in_on = cut_respa[3];
 
   double cut_in_diff = cut_in_on - cut_in_off;
   double cut_in_off_sq = cut_in_off*cut_in_off;
   double cut_in_on_sq = cut_in_on*cut_in_on;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
 
         if (rsq < cut_coulsq) {
           r = sqrt(rsq);
           grij = g_ewald * r;
           expm2 = exp(-grij*grij);
           t = 1.0 / (1.0 + EWALD_P*grij);
           erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
           
           denc = sqrt(lj4[itype][jtype] + rsq);
           fprefactor = qqrd2e * lj1[itype][jtype] * qtmp*q[j] / (denc*denc*denc);
           
           forcecoul = fprefactor * (erfc + EWALD_F*grij*expm2 - 1.0);
 
           if (rsq > cut_in_off_sq) {
             if (rsq < cut_in_on_sq) {
               rsw = (r - cut_in_off)/cut_in_diff;
               forcecoul += fprefactor*rsw*rsw*(3.0 - 2.0*rsw);
               if (factor_coul < 1.0)
                 forcecoul -=
                   (1.0-factor_coul)*fprefactor*rsw*rsw*(3.0 - 2.0*rsw);
             } else {
               forcecoul += fprefactor;
               if (factor_coul < 1.0)
                 forcecoul -= (1.0-factor_coul)*fprefactor;
             }
           }
         } else forcecoul = 0.0;
 
         if (rsq < cut_ljsq[itype][jtype] && rsq > cut_in_off_sq) {
           r4sig6 = rsq*rsq / lj2[itype][jtype];
           denlj = lj3[itype][jtype] + rsq*r4sig6;
           forcelj = lj1[itype][jtype] * epsilon[itype][jtype] * 
             (48.0*r4sig6/(denlj*denlj*denlj) - 24.0*r4sig6/(denlj*denlj));
           if (rsq < cut_in_on_sq) {
             rsw = (sqrt(rsq) - cut_in_off)/cut_in_diff;
             forcelj *= rsw*rsw*(3.0 - 2.0*rsw);
           }
         } else forcelj = 0.0;
 
         fpair = forcecoul + forcelj;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
         if (newton_pair || j < nlocal) {
           f[j][0] -= delx*fpair;
           f[j][1] -= dely*fpair;
           f[j][2] -= delz*fpair;
         }
 
         if (eflag) {
           if (rsq < cut_coulsq) {
             eprefactor = qqrd2e * lj1[itype][jtype] * qtmp*q[j] / denc;
             ecoul = eprefactor*erfc;
             if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*eprefactor;
           } else ecoul = 0.0;
 
           if (rsq < cut_ljsq[itype][jtype]) {
             evdwl = lj1[itype][jtype] * 4.0 * epsilon[itype][jtype] * 
               (1.0/(denlj*denlj) - 1.0/denlj) - offset[itype][jtype];
             evdwl *= factor_lj;
           } else evdwl = 0.0;
         }
 
         if (vflag) {
           if (rsq < cut_coulsq) {
             forcecoul = fprefactor * (erfc + EWALD_F*grij*expm2);
             if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*fprefactor;
           } else forcecoul = 0.0;
 
           if (rsq <= cut_in_off_sq) {
             r4sig6 = rsq*rsq / lj2[itype][jtype];
             denlj = lj3[itype][jtype] + rsq*r4sig6;
             forcelj = lj1[itype][jtype] * epsilon[itype][jtype] * 
               (48.0*r4sig6/(denlj*denlj*denlj) - 24.0*r4sig6/(denlj*denlj));
           } else if (rsq < cut_in_on_sq) {
             forcelj = lj1[itype][jtype] * epsilon[itype][jtype] * 
               (48.0*r4sig6/(denlj*denlj*denlj) - 24.0*r4sig6/(denlj*denlj));
           }
           fpair = forcecoul + factor_lj*forcelj;
         }
 
         if (evflag) ev_tally(i,j,nlocal,newton_pair,
                              evdwl,ecoul,fpair,delx,dely,delz);
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairLJCutCoulLongSoft::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
   memory->create(cut_lj,n+1,n+1,"pair:cut_lj");
   memory->create(cut_ljsq,n+1,n+1,"pair:cut_ljsq");
   memory->create(epsilon,n+1,n+1,"pair:epsilon");
   memory->create(sigma,n+1,n+1,"pair:sigma");
   memory->create(lambda,n+1,n+1,"pair:lambda");
   memory->create(lj1,n+1,n+1,"pair:lj1");
   memory->create(lj2,n+1,n+1,"pair:lj2");
   memory->create(lj3,n+1,n+1,"pair:lj3");
   memory->create(lj4,n+1,n+1,"pair:lj4");
   memory->create(offset,n+1,n+1,"pair:offset");
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairLJCutCoulLongSoft::settings(int narg, char **arg)
 {
  if (narg < 4 || narg > 5) error->all(FLERR,"Illegal pair_style command");
 
   nlambda = force->numeric(FLERR,arg[0]);
   alphalj = force->numeric(FLERR,arg[1]);
   alphac  = force->numeric(FLERR,arg[2]);
 
   cut_lj_global = force->numeric(FLERR,arg[3]);
   if (narg == 4) cut_coul = cut_lj_global;
   else cut_coul = force->numeric(FLERR,arg[4]);
 
   // reset cutoffs that have been explicitly set
 
   if (allocated) {
     int i,j;
     for (i = 1; i <= atom->ntypes; i++)
       for (j = i+1; j <= atom->ntypes; j++)
         if (setflag[i][j]) cut_lj[i][j] = cut_lj_global;
   }
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairLJCutCoulLongSoft::coeff(int narg, char **arg)
 {
   if (narg < 5 || narg > 6)
     error->all(FLERR,"Incorrect args for pair coefficients");
   if (!allocated) allocate();
 
   int ilo,ihi,jlo,jhi;
   force->bounds(arg[0],atom->ntypes,ilo,ihi);
   force->bounds(arg[1],atom->ntypes,jlo,jhi);
 
   double epsilon_one = force->numeric(FLERR,arg[2]);
   double sigma_one = force->numeric(FLERR,arg[3]);
   double lambda_one = force->numeric(FLERR,arg[4]);
 
   double cut_lj_one = cut_lj_global;
   if (narg == 6) cut_lj_one = force->numeric(FLERR,arg[5]);
 
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
     for (int j = MAX(jlo,i); j <= jhi; j++) {
       epsilon[i][j] = epsilon_one;
       sigma[i][j] = sigma_one;
       lambda[i][j] = lambda_one;
       cut_lj[i][j] = cut_lj_one;
       setflag[i][j] = 1;
       count++;
     }
   }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairLJCutCoulLongSoft::init_style()
 {
   if (!atom->q_flag)
     error->all(FLERR,"Pair style lj/cut/coul/long/soft requires atom attribute q");
 
   // request regular or rRESPA neighbor lists
 
   int irequest;
 
   if (update->whichflag == 1 && strstr(update->integrate_style,"respa")) {
     int respa = 0;
     if (((Respa *) update->integrate)->level_inner >= 0) respa = 1;
     if (((Respa *) update->integrate)->level_middle >= 0) respa = 2;
 
-    if (respa == 0) irequest = neighbor->request(this);
+    if (respa == 0) irequest = neighbor->request(this,instance_me);
     else if (respa == 1) {
-      irequest = neighbor->request(this);
+      irequest = neighbor->request(this,instance_me);
       neighbor->requests[irequest]->id = 1;
       neighbor->requests[irequest]->half = 0;
       neighbor->requests[irequest]->respainner = 1;
-      irequest = neighbor->request(this);
+      irequest = neighbor->request(this,instance_me);
       neighbor->requests[irequest]->id = 3;
       neighbor->requests[irequest]->half = 0;
       neighbor->requests[irequest]->respaouter = 1;
     } else {
-      irequest = neighbor->request(this);
+      irequest = neighbor->request(this,instance_me);
       neighbor->requests[irequest]->id = 1;
       neighbor->requests[irequest]->half = 0;
       neighbor->requests[irequest]->respainner = 1;
-      irequest = neighbor->request(this);
+      irequest = neighbor->request(this,instance_me);
       neighbor->requests[irequest]->id = 2;
       neighbor->requests[irequest]->half = 0;
       neighbor->requests[irequest]->respamiddle = 1;
-      irequest = neighbor->request(this);
+      irequest = neighbor->request(this,instance_me);
       neighbor->requests[irequest]->id = 3;
       neighbor->requests[irequest]->half = 0;
       neighbor->requests[irequest]->respaouter = 1;
     }
 
-  } else irequest = neighbor->request(this);
+  } else irequest = neighbor->request(this,instance_me);
 
   cut_coulsq = cut_coul * cut_coul;
 
   // set rRESPA cutoffs
 
   if (strstr(update->integrate_style,"respa") &&
       ((Respa *) update->integrate)->level_inner >= 0)
     cut_respa = ((Respa *) update->integrate)->cutoff;
   else cut_respa = NULL;
 
   // insure use of KSpace long-range solver, set g_ewald
 
   if (force->kspace == NULL)
     error->all(FLERR,"Pair style requires a KSpace style");
   g_ewald = force->kspace->g_ewald;
 }
 
 /* ----------------------------------------------------------------------
    neighbor callback to inform pair style of neighbor list to use
    regular or rRESPA
 ------------------------------------------------------------------------- */
 
 void PairLJCutCoulLongSoft::init_list(int id, NeighList *ptr)
 {
   if (id == 0) list = ptr;
   else if (id == 1) listinner = ptr;
   else if (id == 2) listmiddle = ptr;
   else if (id == 3) listouter = ptr;
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairLJCutCoulLongSoft::init_one(int i, int j)
 {
   if (setflag[i][j] == 0) {
     epsilon[i][j] = mix_energy(epsilon[i][i],epsilon[j][j],
                                sigma[i][i],sigma[j][j]);
     sigma[i][j] = mix_distance(sigma[i][i],sigma[j][j]);
     if (lambda[i][i] != lambda[j][j])
       error->all(FLERR,"Pair lj/cut/coul/long/soft different lambda values in mix");
     lambda[i][j] = lambda[i][i];
     cut_lj[i][j] = mix_distance(cut_lj[i][i],cut_lj[j][j]);
   }
 
   // include TIP4P qdist in full cutoff, qdist = 0.0 if not TIP4P
 
   double cut = MAX(cut_lj[i][j],cut_coul+2.0*qdist);
   cut_ljsq[i][j] = cut_lj[i][j] * cut_lj[i][j];
 
   lj1[i][j] = pow(lambda[i][j], nlambda);
   lj2[i][j] = pow(sigma[i][j], 6.0);
   lj3[i][j] = alphalj * (1.0 - lambda[i][j])*(1.0 - lambda[i][j]);
   lj4[i][j] = alphac  * (1.0 - lambda[i][j])*(1.0 - lambda[i][j]);
 
   if (offset_flag) {
     double denlj = lj3[i][j] + pow(sigma[i][j] / cut_lj[i][j], 6.0);
     offset[i][j] = lj1[i][j] * 4.0 * epsilon[i][j] * (1.0/(denlj*denlj) - 1.0/denlj);
   } else offset[i][j] = 0.0;
 
   epsilon[j][i] = epsilon[i][j];
   sigma[j][i] = sigma[i][j];
   lambda[j][i] = lambda[i][j];
   cut_ljsq[j][i] = cut_ljsq[i][j];
   lj1[j][i] = lj1[i][j];
   lj2[j][i] = lj2[i][j];
   lj3[j][i] = lj3[i][j];
   lj4[j][i] = lj4[i][j];
   offset[j][i] = offset[i][j];
 
   // check interior rRESPA cutoff
 
   if (cut_respa && MIN(cut_lj[i][j],cut_coul) < cut_respa[3])
     error->all(FLERR,"Pair cutoff < Respa interior cutoff");
 
   // compute I,J contribution to long-range tail correction
   // count total # of atoms of type I and J via Allreduce
 
   if (tail_flag) {
     int *type = atom->type;
     int nlocal = atom->nlocal;
 
     double count[2],all[2];
     count[0] = count[1] = 0.0;
     for (int k = 0; k < nlocal; k++) {
       if (type[k] == i) count[0] += 1.0;
       if (type[k] == j) count[1] += 1.0;
     }
     MPI_Allreduce(count,all,2,MPI_DOUBLE,MPI_SUM,world);
 
     double sig2 = sigma[i][j]*sigma[i][j];
     double sig6 = sig2*sig2*sig2;
     double rc3 = cut_lj[i][j]*cut_lj[i][j]*cut_lj[i][j];
     double rc6 = rc3*rc3;
     double rc9 = rc3*rc6;
     etail_ij = 8.0*MY_PI*all[0]*all[1]* lj1[i][j] * epsilon[i][j] *
       sig6 * (sig6 - 3.0*rc6) / (9.0*rc9);
     ptail_ij = 16.0*MY_PI*all[0]*all[1]* lj1[i][j] * epsilon[i][j] *
       sig6 * (2.0*sig6 - 3.0*rc6) / (9.0*rc9);
   }
 
   return cut;
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairLJCutCoulLongSoft::write_restart(FILE *fp)
 {
   write_restart_settings(fp);
 
   int i,j;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       fwrite(&setflag[i][j],sizeof(int),1,fp);
       if (setflag[i][j]) {
         fwrite(&epsilon[i][j],sizeof(double),1,fp);
         fwrite(&sigma[i][j],sizeof(double),1,fp);
         fwrite(&lambda[i][j],sizeof(double),1,fp);
         fwrite(&cut_lj[i][j],sizeof(double),1,fp);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairLJCutCoulLongSoft::read_restart(FILE *fp)
 {
   read_restart_settings(fp);
 
   allocate();
 
   int i,j;
   int me = comm->me;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
       MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
       if (setflag[i][j]) {
         if (me == 0) {
           fread(&epsilon[i][j],sizeof(double),1,fp);
           fread(&sigma[i][j],sizeof(double),1,fp);
           fread(&lambda[i][j],sizeof(double),1,fp);
           fread(&cut_lj[i][j],sizeof(double),1,fp);
         }
         MPI_Bcast(&epsilon[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&sigma[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&lambda[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&cut_lj[i][j],1,MPI_DOUBLE,0,world);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairLJCutCoulLongSoft::write_restart_settings(FILE *fp)
 {
   fwrite(&nlambda,sizeof(double),1,fp);
   fwrite(&alphalj,sizeof(double),1,fp);
   fwrite(&alphac,sizeof(double),1,fp);
 
   fwrite(&cut_lj_global,sizeof(double),1,fp);
   fwrite(&cut_coul,sizeof(double),1,fp);
   fwrite(&offset_flag,sizeof(int),1,fp);
   fwrite(&mix_flag,sizeof(int),1,fp);
   fwrite(&tail_flag,sizeof(int),1,fp);
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairLJCutCoulLongSoft::read_restart_settings(FILE *fp)
 {
   if (comm->me == 0) {
     fread(&nlambda,sizeof(double),1,fp);
     fread(&alphalj,sizeof(double),1,fp);
     fread(&alphac,sizeof(double),1,fp);
 
     fread(&cut_lj_global,sizeof(double),1,fp);
     fread(&cut_coul,sizeof(double),1,fp);
     fread(&offset_flag,sizeof(int),1,fp);
     fread(&mix_flag,sizeof(int),1,fp);
     fread(&tail_flag,sizeof(int),1,fp);
   }
 
   MPI_Bcast(&nlambda,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&alphalj,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&alphac,1,MPI_DOUBLE,0,world);
 
   MPI_Bcast(&cut_lj_global,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&cut_coul,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&offset_flag,1,MPI_INT,0,world);
   MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
   MPI_Bcast(&tail_flag,1,MPI_INT,0,world);
 }
 
 
 /* ----------------------------------------------------------------------
    proc 0 writes to data file
 ------------------------------------------------------------------------- */
 
 void PairLJCutCoulLongSoft::write_data(FILE *fp)
 {
   for (int i = 1; i <= atom->ntypes; i++)
     fprintf(fp,"%d %g %g %g\n",i,epsilon[i][i],sigma[i][i],lambda[i][i]);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes all pairs to data file
 ------------------------------------------------------------------------- */
 
 void PairLJCutCoulLongSoft::write_data_all(FILE *fp)
 {
   for (int i = 1; i <= atom->ntypes; i++)
     for (int j = i; j <= atom->ntypes; j++)
       fprintf(fp,"%d %d %g %g %g %g\n",i,j,epsilon[i][j],sigma[i][j],
               lambda[i][j],cut_lj[i][j]);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairLJCutCoulLongSoft::single(int i, int j, int itype, int jtype,
                                  double rsq,
                                  double factor_coul, double factor_lj,
                                  double &fforce)
 {
   double r,grij,expm2,t,erfc,prefactor;
   double forcecoul,forcelj,phicoul,philj;
   double denc, denlj, r4sig6;
 
   if (rsq < cut_coulsq) {
     r = sqrt(rsq);
     grij = g_ewald * r;
     expm2 = exp(-grij*grij);
     t = 1.0 / (1.0 + EWALD_P*grij);
     erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
     
     denc = sqrt(lj4[itype][jtype] + rsq);
     prefactor = force->qqrd2e * lj1[itype][jtype] * atom->q[i]*atom->q[j] /
       (denc*denc*denc);
     
     forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
     if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
   } else forcecoul = 0.0;
 
   if (rsq < cut_ljsq[itype][jtype]) {
     r4sig6 = rsq*rsq / lj2[itype][jtype];
     denlj = lj3[itype][jtype] + rsq*r4sig6;
     forcelj = lj1[itype][jtype] * epsilon[itype][jtype] * 
       (48.0*r4sig6/(denlj*denlj*denlj) - 24.0*r4sig6/(denlj*denlj));
   } else forcelj = 0.0;
 
   fforce = forcecoul + factor_lj*forcelj;
 
   double eng = 0.0;
   if (rsq < cut_coulsq) {
     prefactor = force->qqrd2e * lj1[itype][jtype] * atom->q[i]*atom->q[j] / denc;
     phicoul = prefactor*erfc;
     if (factor_coul < 1.0) phicoul -= (1.0-factor_coul)*prefactor;
     eng += phicoul;
   }
 
   if (rsq < cut_ljsq[itype][jtype]) {
     philj = lj1[itype][jtype] * 4.0 * epsilon[itype][jtype] * 
       (1.0/(denlj*denlj) - 1.0/denlj) - offset[itype][jtype];
     eng += factor_lj*philj;
   }
 
   return eng;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void *PairLJCutCoulLongSoft::extract(const char *str, int &dim)
 {
   dim = 0;
   if (strcmp(str,"cut_coul") == 0) return (void *) &cut_coul;
   dim = 2;
   if (strcmp(str,"epsilon") == 0) return (void *) epsilon;
   if (strcmp(str,"sigma") == 0) return (void *) sigma;
   if (strcmp(str,"lambda") == 0) return (void *) lambda;
   return NULL;
 }
diff --git a/src/USER-FEP/pair_lj_cut_soft.cpp b/src/USER-FEP/pair_lj_cut_soft.cpp
index 8ec4a002f..6329c2608 100644
--- a/src/USER-FEP/pair_lj_cut_soft.cpp
+++ b/src/USER-FEP/pair_lj_cut_soft.cpp
@@ -1,779 +1,779 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Paul Crozier (SNL)
    Soft-core version: Agilio Padua (Univ Blaise Pascal & CNRS)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_lj_cut_soft.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "update.h"
 #include "integrate.h"
 #include "respa.h"
 #include "math_const.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
 /* ---------------------------------------------------------------------- */
 
 PairLJCutSoft::PairLJCutSoft(LAMMPS *lmp) : Pair(lmp)
 {
   respa_enable = 1;
   writedata = 1;
   allocated = 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 PairLJCutSoft::~PairLJCutSoft()
 {
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
 
     memory->destroy(cut);
     memory->destroy(epsilon);
     memory->destroy(sigma);
     memory->destroy(lambda);
     memory->destroy(lj1);
     memory->destroy(lj2);
     memory->destroy(lj3);
     memory->destroy(offset);
     allocated=0;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCutSoft::compute(int eflag, int vflag)
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
   double rsq,forcelj,factor_lj;
   double denlj, r4sig6;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   evdwl = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   double **x = atom->x;
   double **f = atom->f;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
 
         r4sig6 = rsq*rsq / lj2[itype][jtype];
         denlj = lj3[itype][jtype] + rsq*r4sig6;
         forcelj = lj1[itype][jtype] * epsilon[itype][jtype] * 
           (48.0*r4sig6/(denlj*denlj*denlj) - 24.0*r4sig6/(denlj*denlj));
 
         fpair = factor_lj*forcelj;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
         if (newton_pair || j < nlocal) {
           f[j][0] -= delx*fpair;
           f[j][1] -= dely*fpair;
           f[j][2] -= delz*fpair;
         }
 
         if (eflag) {
           evdwl = lj1[itype][jtype] * 4.0 * epsilon[itype][jtype] * 
             (1.0/(denlj*denlj) - 1.0/denlj) - offset[itype][jtype];
           evdwl *= factor_lj;
         }
 
         if (evflag) ev_tally(i,j,nlocal,newton_pair,
                              evdwl,0.0,fpair,delx,dely,delz);
       }
     }
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCutSoft::compute_inner()
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,fpair;
   double rsq,forcelj,factor_lj,rsw;
   double denlj, r4sig6;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   double **x = atom->x;
   double **f = atom->f;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
 
   inum = listinner->inum;
   ilist = listinner->ilist;
   numneigh = listinner->numneigh;
   firstneigh = listinner->firstneigh;
 
   double cut_out_on = cut_respa[0];
   double cut_out_off = cut_respa[1];
 
   double cut_out_diff = cut_out_off - cut_out_on;
   double cut_out_on_sq = cut_out_on*cut_out_on;
   double cut_out_off_sq = cut_out_off*cut_out_off;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
 
       if (rsq < cut_out_off_sq) {
         jtype = type[j];
 
         r4sig6 = rsq*rsq / lj2[itype][jtype];
         denlj = lj3[itype][jtype] + rsq*r4sig6;
         forcelj = lj1[itype][jtype] * epsilon[itype][jtype] * 
           (48.0*r4sig6/(denlj*denlj*denlj) - 24.0*r4sig6/(denlj*denlj));
 
         fpair = factor_lj*forcelj;
 
         if (rsq > cut_out_on_sq) {
           rsw = (sqrt(rsq) - cut_out_on)/cut_out_diff;
           fpair *= 1.0 - rsw*rsw*(3.0 - 2.0*rsw);
         }
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
         if (newton_pair || j < nlocal) {
           f[j][0] -= delx*fpair;
           f[j][1] -= dely*fpair;
           f[j][2] -= delz*fpair;
         }
       }
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCutSoft::compute_middle()
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,fpair;
   double rsq,forcelj,factor_lj,rsw;
   double denlj, r4sig6;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   double **x = atom->x;
   double **f = atom->f;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
 
   inum = listmiddle->inum;
   ilist = listmiddle->ilist;
   numneigh = listmiddle->numneigh;
   firstneigh = listmiddle->firstneigh;
 
   double cut_in_off = cut_respa[0];
   double cut_in_on = cut_respa[1];
   double cut_out_on = cut_respa[2];
   double cut_out_off = cut_respa[3];
 
   double cut_in_diff = cut_in_on - cut_in_off;
   double cut_out_diff = cut_out_off - cut_out_on;
   double cut_in_off_sq = cut_in_off*cut_in_off;
   double cut_in_on_sq = cut_in_on*cut_in_on;
   double cut_out_on_sq = cut_out_on*cut_out_on;
   double cut_out_off_sq = cut_out_off*cut_out_off;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
 
       if (rsq < cut_out_off_sq && rsq > cut_in_off_sq) {
         jtype = type[j];
 
         r4sig6 = rsq*rsq / lj2[itype][jtype];
         denlj = lj3[itype][jtype] + rsq*r4sig6;
         forcelj = lj1[itype][jtype] * epsilon[itype][jtype] * 
           (48.0*r4sig6/(denlj*denlj*denlj) - 24.0*r4sig6/(denlj*denlj));
 
         fpair = factor_lj*forcelj;
 
         if (rsq < cut_in_on_sq) {
           rsw = (sqrt(rsq) - cut_in_off)/cut_in_diff;
           fpair *= rsw*rsw*(3.0 - 2.0*rsw);
         }
         if (rsq > cut_out_on_sq) {
           rsw = (sqrt(rsq) - cut_out_on)/cut_out_diff;
           fpair *= 1.0 + rsw*rsw*(2.0*rsw - 3.0);
         }
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
         if (newton_pair || j < nlocal) {
           f[j][0] -= delx*fpair;
           f[j][1] -= dely*fpair;
           f[j][2] -= delz*fpair;
         }
       }
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCutSoft::compute_outer(int eflag, int vflag)
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
   double rsq,forcelj,factor_lj,rsw;
   double denlj, r4sig6;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   evdwl = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = 0;
 
   double **x = atom->x;
   double **f = atom->f;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
 
   inum = listouter->inum;
   ilist = listouter->ilist;
   numneigh = listouter->numneigh;
   firstneigh = listouter->firstneigh;
 
   double cut_in_off = cut_respa[2];
   double cut_in_on = cut_respa[3];
 
   double cut_in_diff = cut_in_on - cut_in_off;
   double cut_in_off_sq = cut_in_off*cut_in_off;
   double cut_in_on_sq = cut_in_on*cut_in_on;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         if (rsq > cut_in_off_sq) {
 
           r4sig6 = rsq*rsq / lj2[itype][jtype];
           denlj = lj3[itype][jtype] + rsq*r4sig6;
           forcelj = lj1[itype][jtype] * epsilon[itype][jtype] * 
             (48.0*r4sig6/(denlj*denlj*denlj) - 24.0*r4sig6/(denlj*denlj));
 
           fpair = factor_lj*forcelj;
 
           if (rsq < cut_in_on_sq) {
             rsw = (sqrt(rsq) - cut_in_off)/cut_in_diff;
             fpair *= rsw*rsw*(3.0 - 2.0*rsw);
           }
 
           f[i][0] += delx*fpair;
           f[i][1] += dely*fpair;
           f[i][2] += delz*fpair;
           if (newton_pair || j < nlocal) {
             f[j][0] -= delx*fpair;
             f[j][1] -= dely*fpair;
             f[j][2] -= delz*fpair;
           }
         }
 
         if (eflag) {
           evdwl = lj1[itype][jtype] * 4.0 * epsilon[itype][jtype] * 
             (1.0/(denlj*denlj) - 1.0/denlj) - offset[itype][jtype];
           evdwl *= factor_lj;
         }
 
         if (vflag) {
           if (rsq <= cut_in_off_sq) {
             r4sig6 = rsq*rsq / lj2[itype][jtype];
             denlj = lj3[itype][jtype] + rsq*r4sig6;
             forcelj = lj1[itype][jtype] * epsilon[itype][jtype] * 
               (48.0*r4sig6/(denlj*denlj*denlj) - 24.0*r4sig6/(denlj*denlj));
             fpair = factor_lj*forcelj;
           } else if (rsq < cut_in_on_sq)
             fpair = factor_lj*forcelj;
         }
 
         if (evflag) ev_tally(i,j,nlocal,newton_pair,
                              evdwl,0.0,fpair,delx,dely,delz);
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairLJCutSoft::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
   memory->create(cut,n+1,n+1,"pair:cut");
   memory->create(epsilon,n+1,n+1,"pair:epsilon");
   memory->create(sigma,n+1,n+1,"pair:sigma");
   memory->create(lambda,n+1,n+1,"pair:lambda");
   memory->create(lj1,n+1,n+1,"pair:lj1");
   memory->create(lj2,n+1,n+1,"pair:lj2");
   memory->create(lj3,n+1,n+1,"pair:lj3");
   memory->create(offset,n+1,n+1,"pair:offset");
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairLJCutSoft::settings(int narg, char **arg)
 {
   if (narg != 3) error->all(FLERR,"Illegal pair_style command");
 
   nlambda = force->numeric(FLERR,arg[0]);
   alphalj = force->numeric(FLERR,arg[1]);
 
   cut_global = force->numeric(FLERR,arg[2]);
 
   // reset cutoffs that have been explicitly set
 
   if (allocated) {
     int i,j;
     for (i = 1; i <= atom->ntypes; i++)
       for (j = i+1; j <= atom->ntypes; j++)
         if (setflag[i][j]) cut[i][j] = cut_global;
   }
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairLJCutSoft::coeff(int narg, char **arg)
 {
   if (narg < 5 || narg > 6)
     error->all(FLERR,"Incorrect args for pair coefficients");
   if (!allocated) allocate();
 
   int ilo,ihi,jlo,jhi;
   force->bounds(arg[0],atom->ntypes,ilo,ihi);
   force->bounds(arg[1],atom->ntypes,jlo,jhi);
 
   double epsilon_one = force->numeric(FLERR,arg[2]);
   double sigma_one = force->numeric(FLERR,arg[3]);
   double lambda_one = force->numeric(FLERR,arg[4]);
 
   double cut_one = cut_global;
   if (narg == 6) cut_one = force->numeric(FLERR,arg[5]);
 
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
     for (int j = MAX(jlo,i); j <= jhi; j++) {
       epsilon[i][j] = epsilon_one;
       sigma[i][j] = sigma_one;
       lambda[i][j] = lambda_one;
       cut[i][j] = cut_one;
       setflag[i][j] = 1;
       count++;
     }
   }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairLJCutSoft::init_style()
 {
   // request regular or rRESPA neighbor lists
 
   int irequest;
 
   if (update->whichflag == 1 && strstr(update->integrate_style,"respa")) {
     int respa = 0;
     if (((Respa *) update->integrate)->level_inner >= 0) respa = 1;
     if (((Respa *) update->integrate)->level_middle >= 0) respa = 2;
 
-    if (respa == 0) irequest = neighbor->request(this);
+    if (respa == 0) irequest = neighbor->request(this,instance_me);
     else if (respa == 1) {
-      irequest = neighbor->request(this);
+      irequest = neighbor->request(this,instance_me);
       neighbor->requests[irequest]->id = 1;
       neighbor->requests[irequest]->half = 0;
       neighbor->requests[irequest]->respainner = 1;
-      irequest = neighbor->request(this);
+      irequest = neighbor->request(this,instance_me);
       neighbor->requests[irequest]->id = 3;
       neighbor->requests[irequest]->half = 0;
       neighbor->requests[irequest]->respaouter = 1;
     } else {
-      irequest = neighbor->request(this);
+      irequest = neighbor->request(this,instance_me);
       neighbor->requests[irequest]->id = 1;
       neighbor->requests[irequest]->half = 0;
       neighbor->requests[irequest]->respainner = 1;
-      irequest = neighbor->request(this);
+      irequest = neighbor->request(this,instance_me);
       neighbor->requests[irequest]->id = 2;
       neighbor->requests[irequest]->half = 0;
       neighbor->requests[irequest]->respamiddle = 1;
-      irequest = neighbor->request(this);
+      irequest = neighbor->request(this,instance_me);
       neighbor->requests[irequest]->id = 3;
       neighbor->requests[irequest]->half = 0;
       neighbor->requests[irequest]->respaouter = 1;
     }
 
-  } else irequest = neighbor->request(this);
+  } else irequest = neighbor->request(this,instance_me);
 
   // set rRESPA cutoffs
 
   if (strstr(update->integrate_style,"respa") &&
       ((Respa *) update->integrate)->level_inner >= 0)
     cut_respa = ((Respa *) update->integrate)->cutoff;
   else cut_respa = NULL;
 
 }
 
 /* ----------------------------------------------------------------------
    neighbor callback to inform pair style of neighbor list to use
    regular or rRESPA
 ------------------------------------------------------------------------- */
 
 void PairLJCutSoft::init_list(int id, NeighList *ptr)
 {
   if (id == 0) list = ptr;
   else if (id == 1) listinner = ptr;
   else if (id == 2) listmiddle = ptr;
   else if (id == 3) listouter = ptr;
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairLJCutSoft::init_one(int i, int j)
 {
   if (setflag[i][j] == 0) {
     epsilon[i][j] = mix_energy(epsilon[i][i],epsilon[j][j],
                                sigma[i][i],sigma[j][j]);
     sigma[i][j] = mix_distance(sigma[i][i],sigma[j][j]);
     cut[i][j] = mix_distance(cut[i][i],cut[j][j]);
   }
 
   lj1[i][j] = pow(lambda[i][j], nlambda);
   lj2[i][j] = pow(sigma[i][j], 6.0);
   lj3[i][j] = alphalj * (1.0 - lambda[i][j])*(1.0 - lambda[i][j]);
 
   if (offset_flag) {
     double denlj = lj3[i][j] + pow(sigma[i][j] / cut[i][j], 6.0);
     offset[i][j] = lj1[i][j] * 4.0 * epsilon[i][j] * (1.0/(denlj*denlj) - 1.0/denlj);
   } else offset[i][j] = 0.0;
 
   epsilon[j][i] = epsilon[i][j];
   sigma[j][i] = sigma[i][j];
   lambda[j][i] = lambda[i][j];
   lj1[j][i] = lj1[i][j];
   lj2[j][i] = lj2[i][j];
   lj3[j][i] = lj3[i][j];
   offset[j][i] = offset[i][j];
 
   // check interior rRESPA cutoff
 
   if (cut_respa && cut[i][j] < cut_respa[3])
     error->all(FLERR,"Pair cutoff < Respa interior cutoff");
 
   // compute I,J contribution to long-range tail correction
   // count total # of atoms of type I and J via Allreduce
 
   if (tail_flag) {
     int *type = atom->type;
     int nlocal = atom->nlocal;
 
     double count[2],all[2];
     count[0] = count[1] = 0.0;
     for (int k = 0; k < nlocal; k++) {
       if (type[k] == i) count[0] += 1.0;
       if (type[k] == j) count[1] += 1.0;
     }
     MPI_Allreduce(count,all,2,MPI_DOUBLE,MPI_SUM,world);
 
     double sig2 = sigma[i][j]*sigma[i][j];
     double sig6 = sig2*sig2*sig2;
     double rc3 = cut[i][j]*cut[i][j]*cut[i][j];
     double rc6 = rc3*rc3;
     double rc9 = rc3*rc6;
     etail_ij = 8.0*MY_PI*all[0]*all[1]* lj1[i][j] * epsilon[i][j] *
       sig6 * (sig6 - 3.0*rc6) / (9.0*rc9);
     ptail_ij = 16.0*MY_PI*all[0]*all[1]* lj1[i][j] * epsilon[i][j] *
       sig6 * (2.0*sig6 - 3.0*rc6) / (9.0*rc9);
   }
 
   return cut[i][j];
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairLJCutSoft::write_restart(FILE *fp)
 {
   write_restart_settings(fp);
 
   int i,j;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       fwrite(&setflag[i][j],sizeof(int),1,fp);
       if (setflag[i][j]) {
         fwrite(&epsilon[i][j],sizeof(double),1,fp);
         fwrite(&sigma[i][j],sizeof(double),1,fp);
         fwrite(&lambda[i][j],sizeof(double),1,fp);
         fwrite(&cut[i][j],sizeof(double),1,fp);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
    proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairLJCutSoft::read_restart(FILE *fp)
 {
   read_restart_settings(fp);
   allocate();
 
   int i,j;
   int me = comm->me;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
       MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
       if (setflag[i][j]) {
         if (me == 0) {
           fread(&epsilon[i][j],sizeof(double),1,fp);
           fread(&sigma[i][j],sizeof(double),1,fp);
           fread(&lambda[i][j],sizeof(double),1,fp);
           fread(&cut[i][j],sizeof(double),1,fp);
         }
         MPI_Bcast(&epsilon[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&sigma[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&lambda[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&cut[i][j],1,MPI_DOUBLE,0,world);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairLJCutSoft::write_restart_settings(FILE *fp)
 {
   fwrite(&nlambda,sizeof(double),1,fp);
   fwrite(&alphalj,sizeof(double),1,fp);
 
   fwrite(&cut_global,sizeof(double),1,fp);
   fwrite(&offset_flag,sizeof(int),1,fp);
   fwrite(&mix_flag,sizeof(int),1,fp);
   fwrite(&tail_flag,sizeof(int),1,fp);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairLJCutSoft::read_restart_settings(FILE *fp)
 {
   int me = comm->me;
   if (me == 0) {
     fread(&nlambda,sizeof(double),1,fp);
     fread(&alphalj,sizeof(double),1,fp);
 
     fread(&cut_global,sizeof(double),1,fp);
     fread(&offset_flag,sizeof(int),1,fp);
     fread(&mix_flag,sizeof(int),1,fp);
     fread(&tail_flag,sizeof(int),1,fp);
   }
   MPI_Bcast(&nlambda,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&alphalj,1,MPI_DOUBLE,0,world);
 
   MPI_Bcast(&cut_global,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&offset_flag,1,MPI_INT,0,world);
   MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
   MPI_Bcast(&tail_flag,1,MPI_INT,0,world);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes to data file
 ------------------------------------------------------------------------- */
 
 void PairLJCutSoft::write_data(FILE *fp)
 {
   for (int i = 1; i <= atom->ntypes; i++)
     fprintf(fp,"%d %g %g %g\n",i,epsilon[i][i],sigma[i][i],lambda[i][i]);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes all pairs to data file
 ------------------------------------------------------------------------- */
 
 void PairLJCutSoft::write_data_all(FILE *fp)
 {
   for (int i = 1; i <= atom->ntypes; i++)
     for (int j = i; j <= atom->ntypes; j++)
       fprintf(fp,"%d %d %g %g %g %g\n",i,j,epsilon[i][j],sigma[i][j],
               lambda[i][j],cut[i][j]);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairLJCutSoft::single(int i, int j, int itype, int jtype, double rsq,
                          double factor_coul, double factor_lj,
                          double &fforce)
 {
   double forcelj,philj;
   double r4sig6, denlj;
 
   if (rsq < cutsq[itype][jtype]) {
     r4sig6 = rsq*rsq / lj2[itype][jtype];
     denlj = lj3[itype][jtype] + rsq*r4sig6;
     forcelj = lj1[itype][jtype] * epsilon[itype][jtype] * 
       (48.0*r4sig6/(denlj*denlj*denlj) - 24.0*r4sig6/(denlj*denlj));
   } else forcelj = 0.0;
   fforce = factor_lj*forcelj;
 
   if (rsq < cutsq[itype][jtype]) {
     philj = lj1[itype][jtype] * 4.0 * epsilon[itype][jtype] * 
       (1.0/(denlj*denlj) - 1.0/denlj) - offset[itype][jtype];
   } else philj = 0.0;
 
   return factor_lj*philj;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void *PairLJCutSoft::extract(const char *str, int &dim)
 {
   dim = 2;
   if (strcmp(str,"epsilon") == 0) return (void *) epsilon;
   if (strcmp(str,"sigma") == 0) return (void *) sigma;
   if (strcmp(str,"lambda") == 0) return (void *) lambda;
   return NULL;
 }
diff --git a/src/USER-MISC/compute_ackland_atom.cpp b/src/USER-MISC/compute_ackland_atom.cpp
index df5161e75..d768074aa 100644
--- a/src/USER-MISC/compute_ackland_atom.cpp
+++ b/src/USER-MISC/compute_ackland_atom.cpp
@@ -1,395 +1,395 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: G. Ziegenhain, gerolf@ziegenhain.com
                         Copyright (C) 2007
 ------------------------------------------------------------------------- */
 
 #include "string.h"
 #include "compute_ackland_atom.h"
 #include "atom.h"
 #include "update.h"
 #include "modify.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "force.h"
 #include "pair.h"
 #include "comm.h"
 #include "memory.h"
 #include "error.h"
 #include <math.h>
 
 using namespace LAMMPS_NS;
 
 enum{UNKNOWN,BCC,FCC,HCP,ICO};
 
 /* ---------------------------------------------------------------------- */
 
 ComputeAcklandAtom::ComputeAcklandAtom(LAMMPS *lmp, int narg, char **arg) :
   Compute(lmp, narg, arg)
 {
   if (narg != 3) error->all(FLERR,"Illegal compute ackland/atom command");
 
   peratom_flag = 1;
   size_peratom_cols = 0;
 
   nmax = 0;
   structure = NULL;
   maxneigh = 0;
   distsq = NULL;
   nearest = NULL;
   nearest_n0 = NULL;
   nearest_n1 = NULL;
 }
 
 /* ---------------------------------------------------------------------- */
 
 ComputeAcklandAtom::~ComputeAcklandAtom()
 {
   memory->destroy(structure);
   memory->destroy(distsq);
   memory->destroy(nearest);
   memory->destroy(nearest_n0);
   memory->destroy(nearest_n1);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputeAcklandAtom::init()
 {
   // need an occasional full neighbor list
 
-  int irequest = neighbor->request(this);
+  int irequest = neighbor->request(this,instance_me);
   neighbor->requests[irequest]->pair = 0;
   neighbor->requests[irequest]->compute = 1;
   neighbor->requests[irequest]->half = 0;
   neighbor->requests[irequest]->full = 1;
   neighbor->requests[irequest]->occasional = 1;
 
   int count = 0;
   for (int i = 0; i < modify->ncompute; i++)
     if (strcmp(modify->compute[i]->style,"ackland/atom") == 0) count++;
   if (count > 1 && comm->me == 0)
     error->warning(FLERR,"More than one compute ackland/atom");
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputeAcklandAtom::init_list(int id, NeighList *ptr)
 {
   list = ptr;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputeAcklandAtom::compute_peratom()
 {
   int i,j,ii,jj,k,n,inum,jnum;
   double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
   int *ilist,*jlist,*numneigh,**firstneigh;
   int chi[8];
 
   invoked_peratom = update->ntimestep;
 
   // grow structure array if necessary
 
   if (atom->nlocal > nmax) {
     memory->destroy(structure);
     nmax = atom->nmax;
     memory->create(structure,nmax,"compute/ackland/atom:ackland");
     vector_atom = structure;
   }
 
   // invoke full neighbor list (will copy or build if necessary)
 
   neighbor->build_one(list);
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // compute structure parameter for each atom in group
   // use full neighbor list
 
   double **x = atom->x;
   int *mask = atom->mask;
   double cutsq = force->pair->cutforce * force->pair->cutforce;
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     if (mask[i] & groupbit) {
       xtmp = x[i][0];
       ytmp = x[i][1];
       ztmp = x[i][2];
       jlist = firstneigh[i];
       jnum = numneigh[i];
 
       // ensure distsq and nearest arrays are long enough
 
       if (jnum > maxneigh) {
               memory->destroy(distsq);
               memory->destroy(nearest);
         memory->destroy(nearest_n0);
         memory->destroy(nearest_n1);
               maxneigh = jnum;
               memory->create(distsq,maxneigh,"compute/ackland/atom:distsq");
               memory->create(nearest,maxneigh,"compute/ackland/atom:nearest");
         memory->create(nearest_n0,maxneigh,"compute/ackland/atom:nearest_n0");
         memory->create(nearest_n1,maxneigh,"compute/ackland/atom:nearest_n1");
       }
 
       // loop over list of all neighbors within force cutoff
       // distsq[] = distance sq to each
       // nearest[] = atom indices of neighbors
 
       n = 0;
       for (jj = 0; jj < jnum; jj++) {
               j = jlist[jj];
         j &= NEIGHMASK;
 
               delx = xtmp - x[j][0];
               dely = ytmp - x[j][1];
               delz = ztmp - x[j][2];
               rsq = delx*delx + dely*dely + delz*delz;
               if (rsq < cutsq) {
           distsq[n] = rsq;
           nearest[n++] = j;
         }
       }
 
       // Select 6 nearest neighbors
 
       select2(6,n,distsq,nearest);
 
       // Mean squared separation
 
       double r0_sq = 0.;
       for (j = 0; j < 6; j++)
         r0_sq += distsq[j];
       r0_sq /= 6.;
 
       // n0 near neighbors with: distsq<1.45*r0_sq
       // n1 near neighbors with: distsq<1.55*r0_sq
 
       double n0_dist_sq = 1.45*r0_sq,
         n1_dist_sq = 1.55*r0_sq;
       int n0 = 0, n1 = 0;
       for (j = 0; j < n; j++) {
          if (distsq[j] < n1_dist_sq) {
             nearest_n1[n1++] = nearest[j];
             if (distsq[j] < n0_dist_sq) {
                nearest_n0[n0++] = nearest[j];
             }
          }
       }
 
       // Evaluate all angles <(r_ij,rik) forall n0 particles with:
       // distsq < 1.45*r0_sq
 
       double bond_angle;
       double norm_j, norm_k;
       chi[0] = chi[1] = chi[2] = chi[3] = chi[4] = chi[5] = chi[6] = chi[7] = 0;
       double x_ij, y_ij, z_ij, x_ik, y_ik, z_ik;
       for (j = 0; j < n0; j++) {
         x_ij = x[i][0]-x[nearest_n0[j]][0];
         y_ij = x[i][1]-x[nearest_n0[j]][1];
         z_ij = x[i][2]-x[nearest_n0[j]][2];
         norm_j = sqrt (x_ij*x_ij + y_ij*y_ij + z_ij*z_ij);
         if (norm_j <= 0.) continue;
         for (k = j+1; k < n0; k++) {
           x_ik = x[i][0]-x[nearest_n0[k]][0];
           y_ik = x[i][1]-x[nearest_n0[k]][1];
           z_ik = x[i][2]-x[nearest_n0[k]][2];
           norm_k = sqrt (x_ik*x_ik + y_ik*y_ik + z_ik*z_ik);
           if (norm_k <= 0.)
             continue;
 
           bond_angle = (x_ij*x_ik + y_ij*y_ik + z_ij*z_ik) / (norm_j*norm_k);
 
           // Histogram for identifying the relevant peaks
 
           if (bond_angle < -0.945) chi[0]++;
           else if (bond_angle < -0.915) chi[1]++;
           else if (bond_angle < -0.755) chi[2]++;
           else if (bond_angle < -0.195) chi[3]++;
           else if (bond_angle < 0.195) chi[4]++;
           else if (bond_angle < 0.245) chi[5]++;
           else if (bond_angle < 0.795) chi[6]++;
           else chi[7]++;
         }
       }
 
       // Deviations from the different lattice structures
 
       double delta_bcc = 0.35*chi[4]/(double)(chi[5]+chi[6]-chi[4]);
       double delta_cp = fabs(1.-(double)chi[6]/24.);
       double delta_fcc = 0.61*(fabs((double)(chi[0]+chi[1]-6.))+
                                (double)chi[2])/6.0;
       double delta_hcp = (fabs((double)chi[0]-3.)+
                           fabs((double)chi[0]+(double)chi[1]+
                                (double)chi[2]+(double)chi[3]-9.0))/12.0;
 
       // Identification of the local structure according to the reference
 
       if (chi[0] == 7)       { delta_bcc = 0.; }
       else if (chi[0] == 6)  { delta_fcc = 0.; }
       else if (chi[0] <= 3)  { delta_hcp = 0.; }
 
       if (chi[7] > 0.)
          structure[i] = UNKNOWN;
       else
       if (chi[4] < 3.)
       {
          if (n1 > 13 || n1 < 11)
             structure[i] = UNKNOWN;
          else
             structure[i] = ICO;
       } else
       if (delta_bcc <= delta_cp)
       {
          if (n1 < 11)
             structure[i] = UNKNOWN;
          else
             structure[i] = BCC;
       } else
       if (n1 > 12 || n1 < 11)
          structure[i] = UNKNOWN;
       else
       if (delta_fcc < delta_hcp)
          structure[i] = FCC;
       else
          structure[i] = HCP;
 
     } else structure[i] = 0.0;
   }
 }
 
 /* ----------------------------------------------------------------------
    2 select routines from Numerical Recipes (slightly modified)
    find k smallest values in array of length n
    2nd routine sorts auxiliary array at same time
 ------------------------------------------------------------------------- */
 
 #define SWAP(a,b)   tmp = a; a = b; b = tmp;
 #define ISWAP(a,b) itmp = a; a = b; b = itmp;
 
 void ComputeAcklandAtom::select(int k, int n, double *arr)
   {
   int i,ir,j,l,mid;
   double a,tmp;
 
   arr--;
   l = 1;
   ir = n;
   for (;;) {
     if (ir <= l+1) {
       if (ir == l+1 && arr[ir] < arr[l]) {
         SWAP(arr[l],arr[ir])
       }
       return;
     } else {
       mid=(l+ir) >> 1;
       SWAP(arr[mid],arr[l+1])
       if (arr[l] > arr[ir]) {
         SWAP(arr[l],arr[ir])
       }
       if (arr[l+1] > arr[ir]) {
         SWAP(arr[l+1],arr[ir])
       }
       if (arr[l] > arr[l+1]) {
         SWAP(arr[l],arr[l+1])
       }
       i = l+1;
       j = ir;
       a = arr[l+1];
       for (;;) {
         do i++; while (arr[i] < a);
         do j--; while (arr[j] > a);
         if (j < i) break;
         SWAP(arr[i],arr[j])
       }
       arr[l+1] = arr[j];
       arr[j] = a;
       if (j >= k) ir = j-1;
       if (j <= k) l = i;
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputeAcklandAtom::select2(int k, int n, double *arr, int *iarr)
 {
   int i,ir,j,l,mid,ia,itmp;
   double a,tmp;
 
   arr--;
   iarr--;
   l = 1;
   ir = n;
   for (;;) {
     if (ir <= l+1) {
       if (ir == l+1 && arr[ir] < arr[l]) {
         SWAP(arr[l],arr[ir])
         ISWAP(iarr[l],iarr[ir])
       }
       return;
     } else {
       mid=(l+ir) >> 1;
       SWAP(arr[mid],arr[l+1])
       ISWAP(iarr[mid],iarr[l+1])
       if (arr[l] > arr[ir]) {
         SWAP(arr[l],arr[ir])
         ISWAP(iarr[l],iarr[ir])
       }
       if (arr[l+1] > arr[ir]) {
         SWAP(arr[l+1],arr[ir])
         ISWAP(iarr[l+1],iarr[ir])
       }
       if (arr[l] > arr[l+1]) {
         SWAP(arr[l],arr[l+1])
         ISWAP(iarr[l],iarr[l+1])
       }
       i = l+1;
       j = ir;
       a = arr[l+1];
       ia = iarr[l+1];
       for (;;) {
         do i++; while (arr[i] < a);
         do j--; while (arr[j] > a);
         if (j < i) break;
         SWAP(arr[i],arr[j])
         ISWAP(iarr[i],iarr[j])
       }
       arr[l+1] = arr[j];
       arr[j] = a;
       iarr[l+1] = iarr[j];
       iarr[j] = ia;
       if (j >= k) ir = j-1;
       if (j <= k) l = i;
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    memory usage of local atom-based array
 ------------------------------------------------------------------------- */
 
 double ComputeAcklandAtom::memory_usage()
 {
   double bytes = nmax * sizeof(double);
   return bytes;
 }
diff --git a/src/USER-MISC/compute_basal_atom.cpp b/src/USER-MISC/compute_basal_atom.cpp
index 6052642eb..85a08d183 100644
--- a/src/USER-MISC/compute_basal_atom.cpp
+++ b/src/USER-MISC/compute_basal_atom.cpp
@@ -1,542 +1,542 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: C.D. Barrett, cdb333@cavs.msstate.edu
                         Copyright (C) 2013
 ------------------------------------------------------------------------- */
 
 #include "string.h"
 #include "compute_basal_atom.h"
 #include "atom.h"
 #include "update.h"
 #include "modify.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "force.h"
 #include "pair.h"
 #include "comm.h"
 #include "memory.h"
 #include "error.h"
 #include <math.h>
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
 ComputeBasalAtom::ComputeBasalAtom(LAMMPS *lmp, int narg, char **arg) :
   Compute(lmp, narg, arg)
 {
   if (narg != 3) error->all(FLERR,"Illegal compute basal/atom command");
 
   peratom_flag = 1;
   size_peratom_cols = 3;
 
   nmax = 0;
   BPV = NULL;
   maxneigh = 0;
   distsq = NULL;
   nearest = NULL;
   nearest_n0 = NULL;
   nearest_n1 = NULL;
 }
 
 /* ---------------------------------------------------------------------- */
 
 ComputeBasalAtom::~ComputeBasalAtom()
 {
   memory->destroy(BPV);
   memory->destroy(distsq);
   memory->destroy(nearest);
   memory->destroy(nearest_n0);
   memory->destroy(nearest_n1);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputeBasalAtom::init()
 {
   // need an occasional full neighbor list
 
-  int irequest = neighbor->request(this);
+  int irequest = neighbor->request(this,instance_me);
   neighbor->requests[irequest]->pair = 0;
   neighbor->requests[irequest]->compute = 1;
   neighbor->requests[irequest]->half = 0;
   neighbor->requests[irequest]->full = 1;
   neighbor->requests[irequest]->occasional = 1;
 
   int count1 = 0;
   for (int i = 0; i < modify->ncompute; i++)
     if (strcmp(modify->compute[i]->style,"basal/atom") == 0) count1++;
   if (count1 > 1 && comm->me == 0)
     error->warning(FLERR,"More than one compute basal/atom");
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputeBasalAtom::init_list(int id, NeighList *ptr)
 {
   list = ptr;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputeBasalAtom::compute_peratom()
 {
   int i,j,ii,jj,k,n,inum,jnum;
   double xtmp,ytmp,ztmp,delx,dely,delz,rsq,var5,var6,var7;
   int *ilist,*jlist,*numneigh,**firstneigh;
   int chi[8];
   int value;
   int count;
   int k2[3];
   int j1[3];
   double x4[3],y4[3],z4[3],x5[3],y5[3],z5[3],x6[3],y6[3],z6[3];
   double x7[3],y7[3],z7[3];
 
   invoked_peratom = update->ntimestep;
 
   // grow structure array if necessary
 
   if (atom->nlocal > nmax) {
     memory->destroy(BPV);
     nmax = atom->nmax;
     memory->create(BPV,nmax,3,"basal/atom:basal");
     array_atom = BPV;
   }
 
   // invoke full neighbor list (will copy or build if necessary)
 
   neighbor->build_one(list);
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // compute structure parameter for each atom in group
   // use full neighbor list
 
   double **x = atom->x;
   int *mask = atom->mask;
   double cutsq = force->pair->cutforce * force->pair->cutforce;
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     if (mask[i] & groupbit) {
       xtmp = x[i][0];
       ytmp = x[i][1];
       ztmp = x[i][2];
       jlist = firstneigh[i];
       jnum = numneigh[i];
 
       // ensure distsq and nearest arrays are long enough
 
       if (jnum > maxneigh) {
       	memory->destroy(distsq);
       	memory->destroy(nearest);
 	memory->destroy(nearest_n0);
 	memory->destroy(nearest_n1);
       	maxneigh = jnum;
       	memory->create(distsq,maxneigh,"compute/basal/atom:distsq");
       	memory->create(nearest,maxneigh,"compute/basal/atom:nearest");
 	memory->create(nearest_n0,maxneigh,"compute/basal/atom:nearest_n0");
 	memory->create(nearest_n1,maxneigh,"compute/basal/atom:nearest_n1");
       }
       // neighbor selection is identical to ackland/atom algorithm
 
       // loop over list of all neighbors within force cutoff
       // distsq[] = distance sq to each
       // nearest[] = atom indices of neighbors
 
       n = 0;
       for (jj = 0; jj < jnum; jj++) {
       	j = jlist[jj];
 	j &= NEIGHMASK;
 	
       	delx = xtmp - x[j][0];
       	dely = ytmp - x[j][1];
       	delz = ztmp - x[j][2];
       	rsq = delx*delx + dely*dely + delz*delz;
       	if (rsq < cutsq) {
 	  distsq[n] = rsq;
 	  nearest[n++] = j;
 	}  
       }
 
       // Select 6 nearest neighbors
 
       select2(6,n,distsq,nearest);
 
       // Mean squared separation
 
       double r0_sq = 0.0;
       for (j = 0; j < 6; j++) r0_sq += distsq[j];
       r0_sq /= 6.0;
 
       // n0 near neighbors with: distsq<1.45*r0_sq
       // n1 near neighbors with: distsq<1.55*r0_sq
 
       double n0_dist_sq = 1.45*r0_sq,
 	n1_dist_sq = 1.55*r0_sq;
       int n0 = 0, n1 = 0;
       for (j = 0; j < n; j++) {
          if (distsq[j] < n1_dist_sq) {
             nearest_n1[n1++] = nearest[j];
             if (distsq[j] < n0_dist_sq) {
                nearest_n0[n0++] = nearest[j];
             }
          }
       }
 
       // Evaluate all angles <(r_ij,rik) forall n0 particles with: distsq<1.45*r0_sq
       double bond_angle;
       double norm_j, norm_k;
       chi[0] = chi[1] = chi[2] = chi[3] = chi[4] = chi[5] = chi[6] = chi[7] = 0;
       double x_ij, y_ij, z_ij, x_ik, y_ik, z_ik,x3[n0],y3[n0],z3[n0],
         xmean5, ymean5, zmean5, xmean6, ymean6, zmean6, xmean7, ymean7, zmean7;
       for (j = 0; j < n0; j++) {
 	x_ij = x[i][0]-x[nearest_n0[j]][0];
 	y_ij = x[i][1]-x[nearest_n0[j]][1];
 	z_ij = x[i][2]-x[nearest_n0[j]][2];
 	norm_j = sqrt (x_ij*x_ij + y_ij*y_ij + z_ij*z_ij);
 	if (norm_j <= 0.) {continue;}
 	for (k = j+1; k < n0; k++) {
 	  x_ik = x[i][0]-x[nearest_n0[k]][0];
 	  y_ik = x[i][1]-x[nearest_n0[k]][1];
 	  z_ik = x[i][2]-x[nearest_n0[k]][2];
 	  norm_k = sqrt (x_ik*x_ik + y_ik*y_ik + z_ik*z_ik);
 	  if (norm_k <= 0.) {continue;}
 	  bond_angle = (x_ij*x_ik + y_ij*y_ik + z_ij*z_ik) / (norm_j*norm_k);
 	  //find all bond angles that are about 180 degrees
 	  if (-1. <= bond_angle && bond_angle < -0.945) { 
 		x3[chi[0]] = x_ik - x_ij;
 		y3[chi[0]] = y_ik - y_ij;
 		z3[chi[0]] = z_ik - z_ij;
                 chi[0]++;
  	  }
 	}
       }
       // for atoms that have 2 or 3 ~180 bond angles:
       if (2 == chi[0] || 3 == chi[0]) {
           count = value = 0;
       	  if (chi[0] == 2) {
             k2[0] = 0;
             j1[0] = 1;
           }
           else {
             k2[0] = 0;
             k2[1] = 0;
             k2[2] = 1;
             j1[0]=1;
             j1[1]=2;
             j1[2]=2;
           }
           xmean5 = ymean5 = zmean5 = xmean6 = ymean6 = zmean6 = xmean7 = ymean7 = zmean7 = 0;
 	  for (j = 0; j < chi[0]; j++) {
             for (k = j+1; k < chi[0]; k++) {
 	       //get cross products
                x4[count] = y3[j1[count]]*z3[k2[count]]-y3[k2[count]]*z3[j1[count]];
                y4[count] = z3[j1[count]]*x3[k2[count]]-z3[k2[count]]*x3[j1[count]];
                z4[count] = x3[j1[count]]*y3[k2[count]]-x3[k2[count]]*y3[j1[count]];
 	       //get all sign combinations of cross products
                x5[count] = x4[count]*copysign(1.0,x4[count]);
                y5[count] = y4[count]*copysign(1.0,x4[count]);
                z5[count] = z4[count]*copysign(1.0,x4[count]);
                x6[count] = x4[count]*copysign(1.0,y4[count]);
                y6[count] = y4[count]*copysign(1.0,y4[count]);
                z6[count] = z4[count]*copysign(1.0,y4[count]);
                x7[count] = x4[count]*copysign(1.0,z4[count]);
                y7[count] = y4[count]*copysign(1.0,z4[count]);
                z7[count] = z4[count]*copysign(1.0,z4[count]);
 	       //get average cross products
                xmean5 = xmean5 + x5[count];
                ymean5 = ymean5 + y5[count];
                zmean5 = zmean5 + z5[count];
                xmean6 = xmean6 + x6[count];
                ymean6 = ymean6 + y6[count];
                zmean6 = zmean6 + z6[count];
                xmean7 = xmean7 + x7[count];
                ymean7 = ymean7 + y7[count];
                zmean6 = zmean6 + z7[count];
                count++;
             }
           }
           xmean5 = xmean5/count;
           xmean6 = xmean6/count;
           xmean7 = xmean7/count;
           ymean5 = ymean5/count;
           ymean6 = ymean6/count;
           ymean7 = ymean7/count;
           zmean5 = zmean5/count;
           zmean6 = zmean6/count;
           zmean7 = zmean7/count;
           var5 = var6 = var7 = 0.0;
 	  //find standard deviations
           for (j=0;j<count;j++){
             var5 = var5 + x5[j]*x5[j]-2*x5[j]*xmean5+xmean5*xmean5+y5[j]*y5[j]-2*y5[j]*ymean5+ymean5*ymean5+z5[j]*z5[j]-2*z5[j]*zmean5+zmean5*zmean5;
             var6 = var6 + x6[j]*x6[j]-2*x6[j]*xmean6+xmean6*xmean6+y6[j]*y6[j]-2*y6[j]*ymean6+ymean6*ymean6+z6[j]*z6[j]-2*z6[j]*zmean6+zmean6*zmean6;
             var7 = var7 + x7[j]*x7[j]-2*x7[j]*xmean7+xmean7*xmean7+y7[j]*y7[j]-2*y7[j]*ymean7+ymean7*ymean7+z7[j]*z7[j]-2*z7[j]*zmean7+zmean7*zmean7;
           }
           //select sign combination with minimum standard deviation
           if (var5 < var6) {
               if (var5 < var7) { value = 0;}
               else {value = 2;}
           }
           else if (var6 < var7) {value = 1;}
           else {value = 2;}
 	  //BPV is average of cross products of all neighbor vectors which are part of 180 degree angles
           BPV[i][0] = 0;
           BPV[i][1] = 0;
           BPV[i][2] = 0;
           for (k=0;k<count;k++) {
            if (value == 0){
                BPV[i][0] = BPV[i][0]+x5[k];
                BPV[i][1] = BPV[i][1]+y5[k];
                BPV[i][2] = BPV[i][2]+z5[k];
            }
            else if (value == 1) {
                BPV[i][0] = BPV[i][0]+x6[k];
                BPV[i][1] = BPV[i][1]+y6[k];
                BPV[i][2] = BPV[i][2]+z6[k];
            }
            else {
                BPV[i][0] = BPV[i][0]+x7[k];
                BPV[i][1] = BPV[i][1]+y7[k];
                BPV[i][2] = BPV[i][2]+z7[k];
            }
           }
       }
       //for atoms with more than three 180 degree bond angles:
       else if (chi[0] > 3) {
           double x44[3], y44[3], z44[3], S0;
           int l, m;
           count = value = 0;
           S0 = 100000;
           k2[0] = 0;
           k2[1] = 0;
           k2[2] = 1;
           j1[0]=1;
           j1[1]=2;
           j1[2]=2;
 	  //algorithm is as above, but now all combinations of three 180 degree angles are compared, and the combination with minimum standard deviation is chosen
           for (j=0; j<chi[0]; j++) {
               for (k=j+1; k<chi[0]; k++) {
                   for (l=k+1; l<chi[0]; l++) {
                       if (k >= chi[0] || l >= chi[0]) continue;
 		      //get unique combination of three neighbor vectors
                       x4[0] = x3[j];
                       x4[1] = x3[k];
                       x4[2] = x3[l];
                       y4[0] = y3[j];
                       y4[1] = y3[k];
                       y4[2] = y3[l];
                       z4[0] = z3[j];
                       z4[1] = z3[k];
                       z4[2] = z3[l];
                       xmean5 = ymean5 = zmean5 = xmean6 = ymean6 = zmean6 = xmean7 = ymean7 = zmean7 = 0;
                       for (m=0;m<3;m++) {
 			//get cross products
                         x44[m] = y4[j1[m]]*z4[k2[m]]-y4[k2[m]]*z4[j1[m]];
                         y44[m] = z4[j1[m]]*x4[k2[m]]-z4[k2[m]]*x4[j1[m]];
                         z44[m] = x4[j1[m]]*y4[k2[m]]-x4[k2[m]]*y4[j1[m]];
                         x5[m] = x44[m]*copysign(1.0,x44[m]);
                         y5[m] = y44[m]*copysign(1.0,x44[m]);
                         z5[m] = z44[m]*copysign(1.0,x44[m]);
                         x6[m] = x44[m]*copysign(1.0,y44[m]);
                         y6[m] = y44[m]*copysign(1.0,y44[m]);
                         z6[m] = z44[m]*copysign(1.0,y44[m]);
                         x7[m] = x44[m]*copysign(1.0,z44[m]);
                         y7[m] = y44[m]*copysign(1.0,z44[m]);
                         z7[m] = z44[m]*copysign(1.0,z44[m]);
 			//get average cross products
                         xmean5 = xmean5 + x5[m];
                         ymean5 = ymean5 + y5[m];
                         zmean5 = zmean5 + z5[m];
                         xmean6 = xmean6 + x6[m];
                         ymean6 = ymean6 + y6[m];
                         zmean6 = zmean6 + z6[m];
                         xmean7 = xmean7 + x7[m];
                         ymean7 = ymean7 + y7[m];
                         zmean6 = zmean6 + z7[m];
                       }
                       xmean5 = xmean5/3;
                       xmean6 = xmean6/3;
                       xmean7 = xmean7/3;
                       ymean5 = ymean5/3;
                       ymean6 = ymean6/3;
                       ymean7 = ymean7/3;
                       zmean5 = zmean5/3;
                       zmean6 = zmean6/3;
                       zmean7 = zmean7/3;
                       var5 = var6 = var7 = 0;
 		      //get standard deviations
                       for (m=0;m<3;m++){
                             var5 = var5 + x5[m]*x5[m]-2*x5[m]*xmean5+xmean5*xmean5+y5[m]*y5[m]-2*y5[m]*ymean5+ymean5*ymean5+z5[m]*z5[m]-2*z5[m]*zmean5+zmean5*zmean5;
                             var6 = var6 + x6[m]*x6[m]-2*x6[m]*xmean6+xmean6*xmean6+y6[m]*y6[m]-2*y6[m]*ymean6+ymean6*ymean6+z6[m]*z6[m]-2*z6[m]*zmean6+zmean6*zmean6;
                             var7 = var7 + x7[m]*x7[m]-2*x7[m]*xmean7+xmean7*xmean7+y7[m]*y7[m]-2*y7[m]*ymean7+ymean7*ymean7+z7[m]*z7[m]-2*z7[m]*zmean7+zmean7*zmean7;
                       }
 		      //choose minimum standard deviation
                       if (var5 < S0) {
                           S0 = var5;
                           BPV[i][0] = (x5[0]+x5[1]+x5[2])/3;
                           BPV[i][1] = (y5[0]+y5[1]+x5[2])/3;
                           BPV[i][2] = (z5[0]+z5[1]+z5[2])/3;
                       }
                       if (var6 < S0) {
                           S0 = var6;
                           BPV[i][0] = (x6[0]+x6[1]+x6[2])/3;
                           BPV[i][1] = (y6[0]+y6[1]+x6[2])/3;
                           BPV[i][2] = (z6[0]+z6[1]+z6[2])/3;
                       }
                       if (var7 < S0) {
                           S0 = var7;
                           BPV[i][0] = (x7[0]+x7[1]+x7[2])/3;
                           BPV[i][1] = (y7[0]+y7[1]+x7[2])/3;
                           BPV[i][2] = (z7[0]+z7[1]+z7[2])/3;
                       }
                   }
               }
           }
       }
       //if there are less than two ~180 degree bond angles, the algorithm returns null
       else BPV[i][0] = BPV[i][1] = BPV[i][2] = 0.0;
 
       //normalize BPV:
       double Mag = sqrt(BPV[i][0]*BPV[i][0] + 
                         BPV[i][1]*BPV[i][1] + BPV[i][2]*BPV[i][2]);
       if (Mag > 0){
         BPV[i][0] = BPV[i][0]/Mag;
         BPV[i][1] = BPV[i][1]/Mag;
         BPV[i][2] = BPV[i][2]/Mag;
       }
     } else BPV[i][0] = BPV[i][1] = BPV[i][2] = 0.0;
   }
 }
 /* ----------------------------------------------------------------------
    2 select routines from Numerical Recipes (slightly modified)
    find k smallest values in array of length n
    2nd routine sorts auxiliary array at same time
 ------------------------------------------------------------------------- */
 
 #define SWAP(a,b)   tmp = a; a = b; b = tmp;
 #define ISWAP(a,b) itmp = a; a = b; b = itmp;
 
 void ComputeBasalAtom::select(int k, int n, double *arr)
   {
   int i,ir,j,l,mid;
   double a,tmp;
 
   arr--;
   l = 1;
   ir = n;
   for (;;) {
     if (ir <= l+1) {
       if (ir == l+1 && arr[ir] < arr[l]) {
 	SWAP(arr[l],arr[ir])
       }
       return;
     } else {
       mid=(l+ir) >> 1;
       SWAP(arr[mid],arr[l+1])
       if (arr[l] > arr[ir]) {
 	SWAP(arr[l],arr[ir])
       }
       if (arr[l+1] > arr[ir]) {
 	SWAP(arr[l+1],arr[ir])
       }
       if (arr[l] > arr[l+1]) {
 	SWAP(arr[l],arr[l+1])
       }
       i = l+1;
       j = ir;
       a = arr[l+1];
       for (;;) {
 	do i++; while (arr[i] < a);
 	do j--; while (arr[j] > a);
 	if (j < i) break;
 	SWAP(arr[i],arr[j])
       }
       arr[l+1] = arr[j];
       arr[j] = a;
       if (j >= k) ir = j-1;
       if (j <= k) l = i;
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputeBasalAtom::select2(int k, int n, double *arr, int *iarr)
 {
   int i,ir,j,l,mid,ia,itmp;
   double a,tmp;
 
   arr--;
   iarr--;
   l = 1;
   ir = n;
   for (;;) {
     if (ir <= l+1) {
       if (ir == l+1 && arr[ir] < arr[l]) {
 	SWAP(arr[l],arr[ir])
 	ISWAP(iarr[l],iarr[ir])
       }
       return;
     } else {
       mid=(l+ir) >> 1;
       SWAP(arr[mid],arr[l+1])
       ISWAP(iarr[mid],iarr[l+1])
       if (arr[l] > arr[ir]) {
 	SWAP(arr[l],arr[ir])
 	ISWAP(iarr[l],iarr[ir])
       }
       if (arr[l+1] > arr[ir]) {
 	SWAP(arr[l+1],arr[ir])
 	ISWAP(iarr[l+1],iarr[ir])
       }
       if (arr[l] > arr[l+1]) {
 	SWAP(arr[l],arr[l+1])
 	ISWAP(iarr[l],iarr[l+1])
       }
       i = l+1;
       j = ir;
       a = arr[l+1];
       ia = iarr[l+1];
       for (;;) {
 	do i++; while (arr[i] < a);
 	do j--; while (arr[j] > a);
 	if (j < i) break;
 	SWAP(arr[i],arr[j])
 	ISWAP(iarr[i],iarr[j])
       }
       arr[l+1] = arr[j];
       arr[j] = a;
       iarr[l+1] = iarr[j];
       iarr[j] = ia;
       if (j >= k) ir = j-1;
       if (j <= k) l = i;
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    memory usage of local atom-based array
 ------------------------------------------------------------------------- */
 
 double ComputeBasalAtom::memory_usage()
 {
   double bytes = 3*nmax * sizeof(double);
   return bytes;
 }
diff --git a/src/USER-MISC/pair_coul_diel.cpp b/src/USER-MISC/pair_coul_diel.cpp
index 67c09e9b4..a846aa53c 100644
--- a/src/USER-MISC/pair_coul_diel.cpp
+++ b/src/USER-MISC/pair_coul_diel.cpp
@@ -1,348 +1,348 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 /* ----------------------------------------------------------------------
    Contributiong authors: Arben Jusufi, Axel Kohlmeyer (Temple U.)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_coul_diel.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
 PairCoulDiel::PairCoulDiel(LAMMPS *lmp) : Pair(lmp) {}
 
 /* ---------------------------------------------------------------------- */
 
 PairCoulDiel::~PairCoulDiel()
 {
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(sigmae);
     memory->destroy(rme);
     memory->destroy(offset);
     memory->destroy(cutsq);
     memory->destroy(cut);
     allocated = 0;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairCoulDiel::compute(int eflag, int vflag)
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,ecoul,fpair;
   double rsq,r,rarg,th,depsdr,epsr,forcecoul,factor_coul;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   ecoul = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
 
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r = sqrt(rsq);
         rarg = (r-rme[itype][jtype])/sigmae[itype][jtype];
         th=tanh(rarg);
         epsr=a_eps+b_eps*th;
         depsdr=b_eps * (1.0 - th*th) / sigmae[itype][jtype];
 
         forcecoul = qqrd2e*qtmp*q[j]*((eps_s*(epsr+r*depsdr)/epsr/epsr) -1.)/rsq;
         fpair = factor_coul*forcecoul/r;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
         if (newton_pair || j < nlocal) {
           f[j][0] -= delx*fpair;
           f[j][1] -= dely*fpair;
           f[j][2] -= delz*fpair;
         }
 
         if (eflag) {
           ecoul = (qqrd2e*qtmp*q[j]*((eps_s/epsr) -1.)/r) - offset[itype][jtype];
           ecoul *= factor_coul;
         }
 
         if (evflag) ev_tally(i,j,nlocal,newton_pair,0.0,
                              ecoul,fpair,delx,dely,delz);
       }
     }
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairCoulDiel::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
   memory->create(cut,n+1,n+1,"pair:cut");
   memory->create(sigmae,n+1,n+1,"pair:sigmae");
   memory->create(rme,n+1,n+1,"pair:rme");
   memory->create(offset,n+1,n+1,"pair:offset");
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairCoulDiel::settings(int narg, char **arg)
 {
   if (narg != 1) error->all(FLERR,"Illegal pair_style command");
 
   cut_global = force->numeric(FLERR,arg[0]);
 
   // reset cutoffs that have been explicitly set
 
   if (allocated) {
     int i,j;
     for (i = 1; i <= atom->ntypes; i++)
       for (j = i+1; j <= atom->ntypes; j++)
         if (setflag[i][j]) cut[i][j] = cut_global;
   }
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairCoulDiel::coeff(int narg, char **arg)
 {
   if (narg < 5 || narg > 6) error->all(FLERR,"Incorrect args for pair coefficients");
   if (!allocated) allocate();
 
   int ilo,ihi,jlo,jhi;
   force->bounds(arg[0],atom->ntypes,ilo,ihi);
   force->bounds(arg[1],atom->ntypes,jlo,jhi);
 
   eps_s = force->numeric(FLERR,arg[2]);
   double rme_one =force->numeric(FLERR,arg[3]);
   double sigmae_one = force->numeric(FLERR,arg[4]);
 
   double cut_one = cut_global;
   if (narg == 6) cut_one = force->numeric(FLERR,arg[5]);
 
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
     for (int j = MAX(jlo,i); j <= jhi; j++) {
       sigmae[i][j] = sigmae_one;
       rme[i][j] = rme_one;
       cut[i][j] = cut_one;
       setflag[i][j] = 1;
       count++;
     }
   }
   a_eps = 0.5*(5.2+eps_s);
   b_eps = 0.5*(eps_s-5.2);
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairCoulDiel::init_style()
 {
   if (!atom->q_flag)
     error->all(FLERR,"Pair style coul/diel requires atom attribute q");
 
-  neighbor->request(this);
+  neighbor->request(this,instance_me);
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairCoulDiel::init_one(int i, int j)
 {
   if (setflag[i][j] == 0) {
     error->all(FLERR,"for pair style coul/diel, parameters need to be set explicitly for all pairs.");
   }
 
   double *q = atom->q;
   double qqrd2e = force->qqrd2e;
 
   if (offset_flag) {
     double rarg = (cut[i][j]-rme[i][j])/sigmae[i][j];
     double epsr=a_eps+b_eps*tanh(rarg);
     offset[i][j] = qqrd2e*q[i]*q[j]*((eps_s/epsr) -1.)/cut[i][j];
   } else offset[i][j] = 0.0;
 
 
   sigmae[j][i] = sigmae[i][j];
   rme[j][i] = rme[i][j];
   offset[j][i] = offset[i][j];
   cut[j][i] = cut[i][j];
 
   return cut[i][j];
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairCoulDiel::write_restart(FILE *fp)
 {
   write_restart_settings(fp);
 
   int i,j;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       fwrite(&setflag[i][j],sizeof(int),1,fp);
       if (setflag[i][j]) {
         fwrite(&rme[i][j],sizeof(double),1,fp);
         fwrite(&sigmae[i][j],sizeof(double),1,fp);
         fwrite(&cut[i][j],sizeof(double),1,fp);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairCoulDiel::read_restart(FILE *fp)
 {
   read_restart_settings(fp);
   allocate();
 
   int i,j;
   int me = comm->me;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       if (setflag[i][j]) {
         if (me == 0) {
           fread(&rme[i][j],sizeof(double),1,fp);
           fread(&sigmae[i][j],sizeof(double),1,fp);
           fread(&cut[i][j],sizeof(double),1,fp);
         }
         MPI_Bcast(&rme[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&sigmae[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&cut[i][j],1,MPI_DOUBLE,0,world);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairCoulDiel::write_restart_settings(FILE *fp)
 {
   fwrite(&cut_global,sizeof(double),1,fp);
   fwrite(&offset_flag,sizeof(int),1,fp);
   fwrite(&mix_flag,sizeof(int),1,fp);
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairCoulDiel::read_restart_settings(FILE *fp)
 {
   if (comm->me == 0) {
     fread(&cut_global,sizeof(double),1,fp);
     fread(&offset_flag,sizeof(int),1,fp);
     fread(&mix_flag,sizeof(int),1,fp);
   }
   MPI_Bcast(&cut_global,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&offset_flag,1,MPI_INT,0,world);
   MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairCoulDiel::single(int i, int j, int itype, int jtype,
                            double rsq, double factor_coul, double factor_lj,
                            double &fforce)
 {
   double r, rarg,forcedielec,phidielec;
   double th,epsr,depsdr;
   double *q = atom->q;
   double qqrd2e = force->qqrd2e;
 
   r=sqrt(rsq);
   rarg = (r-rme[itype][jtype])/sigmae[itype][jtype];
   th = tanh(rarg);
   epsr=a_eps+b_eps*th;
   depsdr=b_eps*(1.-th*th)/sigmae[itype][jtype];
 
   forcedielec = qqrd2e*q[i]*q[j]*((eps_s*(epsr+r*depsdr)/epsr/epsr) -1.)/rsq;
   fforce = factor_coul*forcedielec/r;
 
   phidielec = (qqrd2e*q[i]*q[j]*((eps_s/epsr) -1.)/r)- offset[itype][jtype];
   return factor_coul*phidielec;
 }
diff --git a/src/USER-MISC/pair_edip.cpp b/src/USER-MISC/pair_edip.cpp
index e9a83eded..c6eceb0ca 100755
--- a/src/USER-MISC/pair_edip.cpp
+++ b/src/USER-MISC/pair_edip.cpp
@@ -1,1056 +1,1056 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Luca Ferraro (CASPUR)
    email: luca.ferraro@caspur.it
 
    Environment Dependent Interatomic Potential
    References:
     1) J. F. Justo, M. Z. Bazant, E. Kaxiras, V. V. Bulatov, S. Yip
        Phys. Rev. B 58, 2539 (1998)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "float.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_edip.h"
 #include "atom.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "force.h"
 #include "comm.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
 #define MAXLINE 1024
 #define DELTA 4
 
 #define GRIDDENSITY 8000
 #define GRIDSTART 0.1
 
 // max number of interaction per atom for f(Z) environment potential
 
 #define leadDimInteractionList 64
 
 /* ---------------------------------------------------------------------- */
 
 PairEDIP::PairEDIP(LAMMPS *lmp) : Pair(lmp)
 {
   single_enable = 0;
   restartinfo = 0;
   one_coeff = 1;
   manybody_flag = 1;
 
   nelements = 0;
   elements = NULL;
   nparams = maxparam = 0;
   params = NULL;
   elem2param = NULL;
 }
 
 /* ----------------------------------------------------------------------
    check if allocated, since class can be destructed when incomplete
 ------------------------------------------------------------------------- */
 
 PairEDIP::~PairEDIP()
 {
   if (elements)
     for (int i = 0; i < nelements; i++) delete [] elements[i];
   delete [] elements;
   memory->destroy(params);
   memory->destroy(elem2param);
 
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
     delete [] map;
 
     deallocateGrids();
     deallocatePreLoops();
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairEDIP::compute(int eflag, int vflag)
 {
   int i,j,k,ii,inum,jnum;
   int itype,jtype,ktype,ijparam,ikparam;
   double xtmp,ytmp,ztmp,evdwl;
   int *ilist,*jlist,*numneigh,**firstneigh;
   register int preForceCoord_counter;
 
   double invR_ij;
   double invR_ik;
   double directorCos_ij_x;
   double directorCos_ij_y;
   double directorCos_ij_z;
   double directorCos_ik_x;
   double directorCos_ik_y;
   double directorCos_ik_z;
   double cosTeta;
 
   int interpolIDX;
   double interpolTMP;
   double interpolDeltaX;
   double interpolY1;
   double interpolY2;
 
   double invRMinusCutoffA;
   double sigmaInvRMinusCutoffA;
   double gammInvRMinusCutoffA;
   double cosTetaDiff;
   double cosTetaDiffCosTetaDiff;
   double cutoffFunction_ij;
   double exp2B_ij;
   double exp2BDerived_ij;
   double pow2B_ij;
   double pow2BDerived_ij;
   double exp3B_ij;
   double exp3BDerived_ij;
   double exp3B_ik;
   double exp3BDerived_ik;
   double qFunction;
   double tauFunction;
   double tauFunctionDerived;
   double expMinusBetaZeta_iZeta_i;
   double qFunctionCosTetaDiffCosTetaDiff;
   double expMinusQFunctionCosTetaDiffCosTetaDiff;
   double zeta_i;
   double zeta_iDerived;
   double zeta_iDerivedInvR_ij;
 
   double forceModCoord_factor;
   double forceModCoord;
   double forceModCoord_ij;
   double forceMod2B;
   double forceMod3B_factor1_ij;
   double forceMod3B_factor2_ij;
   double forceMod3B_factor2;
   double forceMod3B_factor1_ik;
   double forceMod3B_factor2_ik;
   double potentia3B_factor;
   double potential2B_factor;
 
   evdwl = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   double **x = atom->x;
   double **f = atom->f;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   int newton_pair = force->newton_pair;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over full neighbor list of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     zeta_i = 0.0;
     int numForceCoordPairs = 0;
 
     i = ilist[ii];
     itype = map[type[i]];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
 
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     // pre-loop to compute environment coordination f(Z)
 
     for (int neighbor_j = 0; neighbor_j < jnum; neighbor_j++) {
         j = jlist[neighbor_j];
         j &= NEIGHMASK;
 
         double dr_ij[3], r_ij;
 
         dr_ij[0] = xtmp - x[j][0];
         dr_ij[1] = ytmp - x[j][1];
         dr_ij[2] = ztmp - x[j][2];
         r_ij = dr_ij[0]*dr_ij[0] + dr_ij[1]*dr_ij[1] + dr_ij[2]*dr_ij[2];
 
         jtype = map[type[j]];
         ijparam = elem2param[itype][jtype][jtype];
         if (r_ij > params[ijparam].cutsq) continue;
 
         r_ij = sqrt(r_ij);
 
         invR_ij = 1.0 / r_ij;
         preInvR_ij[neighbor_j] = invR_ij;
 
         invRMinusCutoffA =  1.0 / (r_ij - cutoffA);
         sigmaInvRMinusCutoffA = sigma * invRMinusCutoffA;
         gammInvRMinusCutoffA = gamm * invRMinusCutoffA;
 
         interpolDeltaX = r_ij - GRIDSTART;
         interpolTMP = (interpolDeltaX * GRIDDENSITY);
         interpolIDX = (int) interpolTMP;
 
         interpolY1 = exp3B[interpolIDX];
         interpolY2 = exp3B[interpolIDX+1];
         exp3B_ij = interpolY1 + (interpolY2 - interpolY1) *
           (interpolTMP-interpolIDX);
 
         exp3BDerived_ij = - exp3B_ij * gammInvRMinusCutoffA * invRMinusCutoffA;
 
         preExp3B_ij[neighbor_j] = exp3B_ij;
         preExp3BDerived_ij[neighbor_j] = exp3BDerived_ij;
 
         interpolY1 = exp2B[interpolIDX];
         interpolY2 = exp2B[interpolIDX+1];
         exp2B_ij = interpolY1 + (interpolY2 - interpolY1) *
           (interpolTMP-interpolIDX);
 
         exp2BDerived_ij = - exp2B_ij * sigmaInvRMinusCutoffA * invRMinusCutoffA;
 
         preExp2B_ij[neighbor_j] = exp2B_ij;
         preExp2BDerived_ij[neighbor_j] = exp2BDerived_ij;
 
         interpolY1 = pow2B[interpolIDX];
         interpolY2 = pow2B[interpolIDX+1];
         pow2B_ij = interpolY1 + (interpolY2 - interpolY1) *
           (interpolTMP-interpolIDX);
 
         prePow2B_ij[neighbor_j] = pow2B_ij;
 
         // zeta and its derivative
 
         if (r_ij < cutoffC) zeta_i += 1.0;
         else {
             interpolY1 = cutoffFunction[interpolIDX];
             interpolY2 = cutoffFunction[interpolIDX+1];
             cutoffFunction_ij = interpolY1 + (interpolY2 - interpolY1) *
               (interpolTMP-interpolIDX);
 
             zeta_i += cutoffFunction_ij;
 
             interpolY1 = cutoffFunctionDerived[interpolIDX];
             interpolY2 = cutoffFunctionDerived[interpolIDX+1];
             zeta_iDerived = interpolY1 + (interpolY2 - interpolY1) *
               (interpolTMP-interpolIDX);
 
             zeta_iDerivedInvR_ij = zeta_iDerived * invR_ij;
 
             preForceCoord_counter=numForceCoordPairs*5;
             preForceCoord[preForceCoord_counter+0]=zeta_iDerivedInvR_ij;
             preForceCoord[preForceCoord_counter+1]=dr_ij[0];
             preForceCoord[preForceCoord_counter+2]=dr_ij[1];
             preForceCoord[preForceCoord_counter+3]=dr_ij[2];
             preForceCoord[preForceCoord_counter+4]=j;
             numForceCoordPairs++;
         }
     }
 
     // quantities depending on zeta_i
 
     interpolDeltaX = zeta_i;
     interpolTMP = (interpolDeltaX * GRIDDENSITY);
     interpolIDX = (int) interpolTMP;
 
     interpolY1 = expMinusBetaZeta_iZeta_iGrid[interpolIDX];
     interpolY2 = expMinusBetaZeta_iZeta_iGrid[interpolIDX+1];
     expMinusBetaZeta_iZeta_i = interpolY1 + (interpolY2 - interpolY1) *
       (interpolTMP-interpolIDX);
 
     interpolY1 = qFunctionGrid[interpolIDX];
     interpolY2 = qFunctionGrid[interpolIDX+1];
     qFunction = interpolY1 + (interpolY2 - interpolY1) *
       (interpolTMP-interpolIDX);
 
     interpolY1 = tauFunctionGrid[interpolIDX];
     interpolY2 = tauFunctionGrid[interpolIDX+1];
     tauFunction = interpolY1 + (interpolY2 - interpolY1) *
       (interpolTMP-interpolIDX);
 
     interpolY1 = tauFunctionDerivedGrid[interpolIDX];
     interpolY2 = tauFunctionDerivedGrid[interpolIDX+1];
     tauFunctionDerived = interpolY1 + (interpolY2 - interpolY1) *
       (interpolTMP-interpolIDX);
 
     forceModCoord_factor = 2.0 * beta * zeta_i * expMinusBetaZeta_iZeta_i;
 
     forceModCoord = 0.0;
 
     // two-body interactions, skip half of them
 
     for (int neighbor_j = 0; neighbor_j < jnum; neighbor_j++) {
       double dr_ij[3], r_ij, f_ij[3];
 
       j = jlist[neighbor_j];
       j &= NEIGHMASK;
 
       dr_ij[0] = x[j][0] - xtmp;
       dr_ij[1] = x[j][1] - ytmp;
       dr_ij[2] = x[j][2] - ztmp;
       r_ij = dr_ij[0]*dr_ij[0] + dr_ij[1]*dr_ij[1] + dr_ij[2]*dr_ij[2];
 
       jtype = map[type[j]];
       ijparam = elem2param[itype][jtype][jtype];
       if (r_ij > params[ijparam].cutsq) continue;
 
       r_ij = sqrt(r_ij);
 
       invR_ij = preInvR_ij[neighbor_j];
       pow2B_ij = prePow2B_ij[neighbor_j];
 
       potential2B_factor = pow2B_ij - expMinusBetaZeta_iZeta_i;
 
       exp2B_ij = preExp2B_ij[neighbor_j];
 
       pow2BDerived_ij = - rho * invR_ij * pow2B_ij;
 
       forceModCoord += (forceModCoord_factor*exp2B_ij);
 
       exp2BDerived_ij = preExp2BDerived_ij[neighbor_j];
       forceMod2B = exp2BDerived_ij * potential2B_factor +
         exp2B_ij * pow2BDerived_ij;
 
       directorCos_ij_x = invR_ij * dr_ij[0];
       directorCos_ij_y = invR_ij * dr_ij[1];
       directorCos_ij_z = invR_ij * dr_ij[2];
 
       exp3B_ij = preExp3B_ij[neighbor_j];
       exp3BDerived_ij = preExp3BDerived_ij[neighbor_j];
 
       f_ij[0] = forceMod2B * directorCos_ij_x;
       f_ij[1] = forceMod2B * directorCos_ij_y;
       f_ij[2] = forceMod2B * directorCos_ij_z;
 
       f[i][0] += f_ij[0];
       f[i][1] += f_ij[1];
       f[i][2] += f_ij[2];
 
       f[j][0] -= f_ij[0];
       f[j][1] -= f_ij[1];
       f[j][2] -= f_ij[2];
 
       // potential energy
 
       evdwl = (exp2B_ij * potential2B_factor);
 
       if (evflag) ev_tally(i, j, nlocal, newton_pair, evdwl, 0.0,
                            -forceMod2B*invR_ij, dr_ij[0], dr_ij[1], dr_ij[2]);
 
       // three-body Forces
 
       for (int neighbor_k = neighbor_j + 1; neighbor_k < jnum; neighbor_k++) {
           double dr_ik[3], r_ik, f_ik[3];
 
           k = jlist[neighbor_k];
           k &= NEIGHMASK;
           ktype = map[type[k]];
           ikparam = elem2param[itype][ktype][ktype];
 
           dr_ik[0] = x[k][0] - xtmp;
           dr_ik[1] = x[k][1] - ytmp;
           dr_ik[2] = x[k][2] - ztmp;
           r_ik = dr_ik[0]*dr_ik[0] + dr_ik[1]*dr_ik[1] + dr_ik[2]*dr_ik[2];
 
           if (r_ik > params[ikparam].cutsq) continue;
 
           r_ik = sqrt(r_ik);
 
           invR_ik = preInvR_ij[neighbor_k];
 
           directorCos_ik_x = invR_ik * dr_ik[0];
           directorCos_ik_y = invR_ik * dr_ik[1];
           directorCos_ik_z = invR_ik * dr_ik[2];
 
           cosTeta = directorCos_ij_x * directorCos_ik_x +
             directorCos_ij_y * directorCos_ik_y +
             directorCos_ij_z * directorCos_ik_z;
 
           cosTetaDiff = cosTeta + tauFunction;
           cosTetaDiffCosTetaDiff = cosTetaDiff * cosTetaDiff;
           qFunctionCosTetaDiffCosTetaDiff = cosTetaDiffCosTetaDiff * qFunction;
           expMinusQFunctionCosTetaDiffCosTetaDiff =
             exp(-qFunctionCosTetaDiffCosTetaDiff);
 
           potentia3B_factor = lambda *
             ((1.0 - expMinusQFunctionCosTetaDiffCosTetaDiff) +
              eta * qFunctionCosTetaDiffCosTetaDiff);
 
           exp3B_ik = preExp3B_ij[neighbor_k];
           exp3BDerived_ik = preExp3BDerived_ij[neighbor_k];
 
           forceMod3B_factor1_ij = - exp3BDerived_ij * exp3B_ik *
             potentia3B_factor;
           forceMod3B_factor2 = 2.0 * lambda * exp3B_ij * exp3B_ik *
             qFunction * cosTetaDiff *
             (eta + expMinusQFunctionCosTetaDiffCosTetaDiff);
           forceMod3B_factor2_ij = forceMod3B_factor2 * invR_ij;
 
           f_ij[0] = forceMod3B_factor1_ij * directorCos_ij_x +
             forceMod3B_factor2_ij *
             (cosTeta * directorCos_ij_x - directorCos_ik_x);
           f_ij[1] = forceMod3B_factor1_ij * directorCos_ij_y +
             forceMod3B_factor2_ij *
             (cosTeta * directorCos_ij_y - directorCos_ik_y);
           f_ij[2] = forceMod3B_factor1_ij * directorCos_ij_z +
             forceMod3B_factor2_ij *
             (cosTeta * directorCos_ij_z - directorCos_ik_z);
 
           forceMod3B_factor1_ik = - exp3BDerived_ik * exp3B_ij *
             potentia3B_factor;
           forceMod3B_factor2_ik = forceMod3B_factor2 * invR_ik;
 
           f_ik[0] = forceMod3B_factor1_ik * directorCos_ik_x +
             forceMod3B_factor2_ik *
             (cosTeta * directorCos_ik_x - directorCos_ij_x);
           f_ik[1] = forceMod3B_factor1_ik * directorCos_ik_y +
             forceMod3B_factor2_ik *
             (cosTeta * directorCos_ik_y - directorCos_ij_y);
           f_ik[2] = forceMod3B_factor1_ik * directorCos_ik_z +
             forceMod3B_factor2_ik *
             (cosTeta * directorCos_ik_z - directorCos_ij_z);
 
           forceModCoord += (forceMod3B_factor2 *
                             (tauFunctionDerived -  0.5 * mu * cosTetaDiff));
 
           f[j][0] += f_ij[0];
           f[j][1] += f_ij[1];
           f[j][2] += f_ij[2];
 
           f[k][0] += f_ik[0];
           f[k][1] += f_ik[1];
           f[k][2] += f_ik[2];
 
           f[i][0] -= f_ij[0] + f_ik[0];
           f[i][1] -= f_ij[1] + f_ik[1];
           f[i][2] -= f_ij[2] + f_ik[2];
 
           // potential energy
 
           evdwl = (exp3B_ij * exp3B_ik * potentia3B_factor);
 
           if (evflag) ev_tally3(i,j,k,evdwl,0.0,f_ij,f_ik,dr_ij,dr_ik);
       }
     }
 
     // forces due to environment coordination f(Z)
 
     for (int idx = 0; idx < numForceCoordPairs; idx++) {
         double dr_ij[3],f_ij[3];
 
         preForceCoord_counter = idx * 5;
         zeta_iDerivedInvR_ij=preForceCoord[preForceCoord_counter+0];
         dr_ij[0]=preForceCoord[preForceCoord_counter+1];
         dr_ij[1]=preForceCoord[preForceCoord_counter+2];
         dr_ij[2]=preForceCoord[preForceCoord_counter+3];
         j = static_cast<int> (preForceCoord[preForceCoord_counter+4]);
 
         forceModCoord_ij = forceModCoord * zeta_iDerivedInvR_ij;
 
         f_ij[0] = forceModCoord_ij * dr_ij[0];
         f_ij[1] = forceModCoord_ij * dr_ij[1];
         f_ij[2] = forceModCoord_ij * dr_ij[2];
 
         f[i][0] -= f_ij[0];
         f[i][1] -= f_ij[1];
         f[i][2] -= f_ij[2];
 
         f[j][0] += f_ij[0];
         f[j][1] += f_ij[1];
         f[j][2] += f_ij[2];
 
         // potential energy
 
         evdwl = 0.0;
         if (evflag) ev_tally(i, j, nlocal, newton_pair, evdwl, 0.0,
                              -forceModCoord_ij, dr_ij[0], dr_ij[1], dr_ij[2]);
     }
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairEDIP::allocateGrids(void)
 {
   int numGridPointsOneCutoffFunction;
   int numGridPointsNotOneCutoffFunction;
   int numGridPointsCutoffFunction;
   int numGridPointsR;
   int numGridPointsRTotal;
   int numGridPointsQFunctionGrid;
   int numGridPointsExpMinusBetaZeta_iZeta_i;
   int numGridPointsTauFunctionGrid;
   double maxArgumentTauFunctionGrid;
   double maxArgumentQFunctionGrid;
   double maxArgumentExpMinusBetaZeta_iZeta_i;
   double const leftLimitToZero = -DBL_MIN * 1000.0;
 
   // tauFunctionGrid
 
   maxArgumentTauFunctionGrid = leadDimInteractionList;
   numGridPointsTauFunctionGrid = (int)
     ((maxArgumentTauFunctionGrid) * GRIDDENSITY) + 2;
 
   memory->create(tauFunctionGrid,numGridPointsTauFunctionGrid,
                  "edip:tauFunctionGrid");
   memory->create(tauFunctionDerivedGrid,numGridPointsTauFunctionGrid,
                  "edip:tauFunctionDerivedGrid");
 
   // expMinusBetaZeta_iZeta_iGrid
 
   maxArgumentExpMinusBetaZeta_iZeta_i = leadDimInteractionList;
   numGridPointsExpMinusBetaZeta_iZeta_i = (int)
     ((maxArgumentExpMinusBetaZeta_iZeta_i) * GRIDDENSITY) + 2;
   memory->create(expMinusBetaZeta_iZeta_iGrid,
                  numGridPointsExpMinusBetaZeta_iZeta_i,
                  "edip:expMinusBetaZeta_iZeta_iGrid");
 
   // qFunctionGrid
 
   maxArgumentQFunctionGrid = leadDimInteractionList;
   numGridPointsQFunctionGrid = (int)
     ((maxArgumentQFunctionGrid) * GRIDDENSITY) + 2;
   memory->create(qFunctionGrid,numGridPointsQFunctionGrid,"edip:qFunctionGrid");
 
   // cutoffFunction
 
   numGridPointsOneCutoffFunction = (int) ((cutoffC - GRIDSTART) * GRIDDENSITY);
   numGridPointsNotOneCutoffFunction = (int) ((cutoffA-cutoffC) * GRIDDENSITY);
   numGridPointsCutoffFunction = numGridPointsOneCutoffFunction +
     numGridPointsNotOneCutoffFunction+2;
 
   memory->create(cutoffFunction,numGridPointsCutoffFunction,
                  "edip:cutoffFunction");
   memory->create(cutoffFunctionDerived,numGridPointsCutoffFunction,
                  "edip:cutoffFunctionDerived");
 
   // pow2B
 
   numGridPointsR = (int)
     ((cutoffA + leftLimitToZero - GRIDSTART) * GRIDDENSITY);
   numGridPointsRTotal = numGridPointsR + 2;
 
   memory->create(pow2B,numGridPointsRTotal,"edip:pow2B");
   memory->create(exp2B,numGridPointsRTotal,"edip:exp2B");
   memory->create(exp3B,numGridPointsRTotal,"edip:exp3B");
 }
 
 /* ----------------------------------------------------------------------
    pre-calculated structures
 ------------------------------------------------------------------------- */
 
 void PairEDIP::allocatePreLoops(void)
 {
   int nthreads = comm->nthreads;
 
   memory->create(preInvR_ij,nthreads*leadDimInteractionList,"edip:preInvR_ij");
   memory->create(preExp3B_ij,nthreads*leadDimInteractionList,"edip:preExp3B_ij");
   memory->create(preExp3BDerived_ij,nthreads*leadDimInteractionList,
                  "edip:preExp3BDerived_ij");
   memory->create(preExp2B_ij,nthreads*leadDimInteractionList,"edip:preExp2B_ij");
   memory->create(preExp2BDerived_ij,nthreads*leadDimInteractionList,
                  "edip:preExp2BDerived_ij");
   memory->create(prePow2B_ij,nthreads*leadDimInteractionList,"edip:prePow2B_ij");
   memory->create(preForceCoord,5*nthreads*leadDimInteractionList,"edip:preForceCoord");
 }
 
 /* ----------------------------------------------------------------------
    deallocate grids
 ------------------------------------------------------------------------- */
 
 void PairEDIP::deallocateGrids(void)
 {
   memory->destroy(cutoffFunction);
   memory->destroy(cutoffFunctionDerived);
   memory->destroy(pow2B);
   memory->destroy(exp2B);
   memory->destroy(exp3B);
   memory->destroy(qFunctionGrid);
   memory->destroy(expMinusBetaZeta_iZeta_iGrid);
   memory->destroy(tauFunctionGrid);
   memory->destroy(tauFunctionDerivedGrid);
 }
 
 /* ----------------------------------------------------------------------
    deallocate preLoops
 ------------------------------------------------------------------------- */
 
 void PairEDIP::deallocatePreLoops(void)
 {
   memory->destroy(preInvR_ij);
   memory->destroy(preExp3B_ij);
   memory->destroy(preExp3BDerived_ij);
   memory->destroy(preExp2B_ij);
   memory->destroy(preExp2BDerived_ij);
   memory->destroy(prePow2B_ij);
   memory->destroy(preForceCoord);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairEDIP::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
   map = new int[n+1];
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairEDIP::settings(int narg, char **arg)
 {
   if (narg != 0) error->all(FLERR,"Illegal pair_style command");
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairEDIP::initGrids(void)
 {
   int l;
   int numGridPointsOneCutoffFunction;
   int numGridPointsNotOneCutoffFunction;
   int numGridPointsCutoffFunction;
   int numGridPointsR;
   int numGridPointsQFunctionGrid;
   int numGridPointsExpMinusBetaZeta_iZeta_i;
   int numGridPointsTauFunctionGrid;
   double maxArgumentTauFunctionGrid;
   double maxArgumentQFunctionGrid;
   double maxArgumentExpMinusBetaZeta_iZeta_i;
   double r;
   double temp;
   double temp3;
   double temp4;
   double deltaArgumentR;
   double deltaArgumentCutoffFunction;
   double deltaArgumentQFunctionGrid;
   double deltaArgumentTauFunctionGrid;
   double deltaArgumentExpMinusBetaZeta_iZeta_i;
   double const leftLimitToZero = -DBL_MIN * 1000.0;
 
   // tauFunctionGrid
 
   maxArgumentTauFunctionGrid = leadDimInteractionList;
 
   numGridPointsTauFunctionGrid = (int)
     ((maxArgumentTauFunctionGrid) * GRIDDENSITY) + 2;
 
   r = 0.0;
   deltaArgumentTauFunctionGrid = 1.0 / GRIDDENSITY;
 
   for (l = 0; l < numGridPointsTauFunctionGrid; l++) {
       tauFunctionGrid[l] = u1 + u2 * u3 * exp(-u4 * r) -
         u2 * exp(-2.0 * u4 * r);
       tauFunctionDerivedGrid[l] = - u2 * u3 * u4 * exp(-u4 * r) +
         2.0 * u2 * u4 * exp(-2.0 * u4 * r);
       r += deltaArgumentTauFunctionGrid;
   }
 
   // expMinusBetaZeta_iZeta_iGrid
 
   maxArgumentExpMinusBetaZeta_iZeta_i = leadDimInteractionList;
 
   numGridPointsExpMinusBetaZeta_iZeta_i = (int)
     ((maxArgumentExpMinusBetaZeta_iZeta_i) * GRIDDENSITY) + 2;
 
   r = 0.0;
   deltaArgumentExpMinusBetaZeta_iZeta_i = 1.0 / GRIDDENSITY;
 
   for (l = 0; l < numGridPointsExpMinusBetaZeta_iZeta_i; l++) {
       expMinusBetaZeta_iZeta_iGrid[l] = exp(-beta * r * r);
       r += deltaArgumentExpMinusBetaZeta_iZeta_i;
   }
 
   // qFunctionGrid
 
   maxArgumentQFunctionGrid = leadDimInteractionList;
   numGridPointsQFunctionGrid =
     (int) ((maxArgumentQFunctionGrid) * GRIDDENSITY) + 2;
 
   r = 0.0;
   deltaArgumentQFunctionGrid = 1.0 / GRIDDENSITY;
 
   for (l = 0; l < numGridPointsQFunctionGrid; l++) {
       qFunctionGrid[l] = Q0 * exp(-mu * r);
       r += deltaArgumentQFunctionGrid;
   }
 
   // cutoffFunction
 
   numGridPointsOneCutoffFunction =
     (int) ((cutoffC - GRIDSTART) * GRIDDENSITY);
   numGridPointsNotOneCutoffFunction =
     (int) ((cutoffA-cutoffC) * GRIDDENSITY);
   numGridPointsCutoffFunction =
     numGridPointsOneCutoffFunction+numGridPointsNotOneCutoffFunction+2;
 
   r = GRIDSTART;
   deltaArgumentCutoffFunction = 1.0 / GRIDDENSITY;
 
   for (l = 0; l < numGridPointsOneCutoffFunction; l++) {
       cutoffFunction[l] = 1.0;
       cutoffFunctionDerived[l] = 0.0;
       r += deltaArgumentCutoffFunction;
   }
 
   for (l = numGridPointsOneCutoffFunction;
        l < numGridPointsCutoffFunction; l++) {
       temp = (cutoffA - cutoffC)/(r - cutoffC);
       temp3 = temp * temp * temp;
       temp4 = temp3 * temp;
       cutoffFunction[l] = exp(alpha/(1.0-temp3));
       cutoffFunctionDerived[l] = (-3*alpha/(cutoffA-cutoffC)) *
         (temp4/((1-temp3)*(1-temp3)))*exp(alpha/(1.0-temp3));
       r += deltaArgumentCutoffFunction;
   }
 
   // pow2B
 
   numGridPointsR = (int)
     ((cutoffA + leftLimitToZero - GRIDSTART) * GRIDDENSITY);
 
   r = GRIDSTART;
   deltaArgumentR = 1.0 / GRIDDENSITY;
   for (l = 0; l < numGridPointsR; l++) {
       pow2B[l] = pow((B/r),rho);
       exp2B[l] = A * exp(sigma/(r-cutoffA));
       exp3B[l] = exp(gamm/(r-cutoffA));
       r += deltaArgumentR;
   }
 
   pow2B[numGridPointsR] = pow((B/r),rho);
   exp2B[numGridPointsR]=0;
   exp3B[numGridPointsR]=0;
   r += deltaArgumentR;
   pow2B[numGridPointsR+1] = pow((B/r),rho);
   exp2B[numGridPointsR+1]=0;
   exp3B[numGridPointsR+1]=0;
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairEDIP::coeff(int narg, char **arg)
 {
   int i,j,n;
 
   if (!allocated) allocate();
 
   if (narg != 3 + atom->ntypes)
     error->all(FLERR,"Incorrect args for pair coefficients");
 
   // insure I,J args are * *
 
   if (strcmp(arg[0],"*") != 0 || strcmp(arg[1],"*") != 0)
     error->all(FLERR,"Incorrect args for pair coefficients");
 
   // read args that map atom types to elements in potential file
   // map[i] = which element the Ith atom type is, -1 if NULL
   // nelements = # of unique elements
   // elements = list of element names
 
   if (elements) {
     for (i = 0; i < nelements; i++) delete [] elements[i];
     delete [] elements;
   }
   elements = new char*[atom->ntypes];
   for (i = 0; i < atom->ntypes; i++) elements[i] = NULL;
 
   nelements = 0;
   for (i = 3; i < narg; i++) {
     if (strcmp(arg[i],"NULL") == 0) {
       map[i-2] = -1;
       continue;
     }
     for (j = 0; j < nelements; j++)
       if (strcmp(arg[i],elements[j]) == 0) break;
     map[i-2] = j;
     if (j == nelements) {
       n = strlen(arg[i]) + 1;
       elements[j] = new char[n];
       strcpy(elements[j],arg[i]);
       nelements++;
     }
   }
 
   // read potential file and initialize potential parameters
 
   read_file(arg[2]);
   setup();
 
   // clear setflag since coeff() called once with I,J = * *
 
   n = atom->ntypes;
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   // set setflag i,j for type pairs where both are mapped to elements
 
   int count = 0;
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       if (map[i] >= 0 && map[j] >= 0) {
         setflag[i][j] = 1;
         count++;
       }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 
   // allocate tables and internal structures
 
   allocatePreLoops();
   allocateGrids();
   initGrids();
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairEDIP::init_style()
 {
   if (force->newton_pair == 0)
     error->all(FLERR,"Pair style EDIP requires newton pair on");
 
   // need a full neighbor list
 
-  int irequest = neighbor->request(this);
+  int irequest = neighbor->request(this,instance_me);
   neighbor->requests[irequest]->half = 0;
   neighbor->requests[irequest]->full = 1;
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairEDIP::init_one(int i, int j)
 {
   if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set");
 
   return cutmax;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairEDIP::read_file(char *file)
 {
   int params_per_line = 20;
   char **words = new char*[params_per_line+1];
 
   memory->sfree(params);
   params = NULL;
   nparams = maxparam = 0;
 
   // open file on proc 0
 
   FILE *fp;
   if (comm->me == 0) {
     fp = force->open_potential(file);
     if (fp == NULL) {
       char str[128];
       sprintf(str,"Cannot open EDIP potential file %s",file);
       error->one(FLERR,str);
     }
   }
 
   // read each set of params from potential file
   // one set of params can span multiple lines
   // store params if all 3 element tags are in element list
 
   int n,nwords,ielement,jelement,kelement;
   char line[MAXLINE],*ptr;
   int eof = 0;
 
   while (1) {
     if (comm->me == 0) {
       ptr = fgets(line,MAXLINE,fp);
       if (ptr == NULL) {
         eof = 1;
         fclose(fp);
       } else n = strlen(line) + 1;
     }
     MPI_Bcast(&eof,1,MPI_INT,0,world);
     if (eof) break;
     MPI_Bcast(&n,1,MPI_INT,0,world);
     MPI_Bcast(line,n,MPI_CHAR,0,world);
 
     // strip comment, skip line if blank
 
     if ((ptr = strchr(line,'#'))) *ptr = '\0';
     nwords = atom->count_words(line);
     if (nwords == 0) continue;
 
     // concatenate additional lines until have params_per_line words
 
     while (nwords < params_per_line) {
       n = strlen(line);
       if (comm->me == 0) {
         ptr = fgets(&line[n],MAXLINE-n,fp);
         if (ptr == NULL) {
           eof = 1;
           fclose(fp);
         } else n = strlen(line) + 1;
       }
       MPI_Bcast(&eof,1,MPI_INT,0,world);
       if (eof) break;
       MPI_Bcast(&n,1,MPI_INT,0,world);
       MPI_Bcast(line,n,MPI_CHAR,0,world);
       if ((ptr = strchr(line,'#'))) *ptr = '\0';
       nwords = atom->count_words(line);
     }
 
     if (nwords != params_per_line)
       error->all(FLERR,"Incorrect format in EDIP potential file");
 
     // words = ptrs to all words in line
 
     nwords = 0;
     words[nwords++] = strtok(line," \t\n\r\f");
     while ((words[nwords++] = strtok(NULL," \t\n\r\f"))) continue;
 
     // ielement,jelement,kelement = 1st args
     // if all 3 args are in element list, then parse this line
     // else skip to next entry in file
 
     for (ielement = 0; ielement < nelements; ielement++)
       if (strcmp(words[0],elements[ielement]) == 0) break;
     if (ielement == nelements) continue;
     for (jelement = 0; jelement < nelements; jelement++)
       if (strcmp(words[1],elements[jelement]) == 0) break;
     if (jelement == nelements) continue;
     for (kelement = 0; kelement < nelements; kelement++)
       if (strcmp(words[2],elements[kelement]) == 0) break;
     if (kelement == nelements) continue;
 
     // load up parameter settings and error check their values
 
     if (nparams == maxparam) {
       maxparam += DELTA;
       params = (Param *) memory->srealloc(params,maxparam*sizeof(Param),
                                           "pair:params");
     }
 
     params[nparams].ielement = ielement;
     params[nparams].jelement = jelement;
     params[nparams].kelement = kelement;
     params[nparams].A = atof(words[3]);
     params[nparams].B = atof(words[4]);
     params[nparams].cutoffA = atof(words[5]);
     params[nparams].cutoffC = atof(words[6]);
     params[nparams].alpha = atof(words[7]);
     params[nparams].beta = atof(words[8]);
     params[nparams].eta = atof(words[9]);
     params[nparams].gamm = atof(words[10]);
     params[nparams].lambda = atof(words[11]);
     params[nparams].mu = atof(words[12]);
     params[nparams].rho = atof(words[13]);
     params[nparams].sigma = atof(words[14]);
     params[nparams].Q0 = atof(words[15]);
     params[nparams].u1 = atof(words[16]);
     params[nparams].u2 = atof(words[17]);
     params[nparams].u3 = atof(words[18]);
     params[nparams].u4 = atof(words[19]);
 
     if (params[nparams].A < 0.0 || params[nparams].B < 0.0 ||
         params[nparams].cutoffA < 0.0 || params[nparams].cutoffC < 0.0 ||
         params[nparams].alpha < 0.0 || params[nparams].beta < 0.0 ||
         params[nparams].eta < 0.0 || params[nparams].gamm < 0.0 ||
         params[nparams].lambda < 0.0 || params[nparams].mu < 0.0 ||
         params[nparams].rho < 0.0 || params[nparams].sigma < 0.0)
       error->all(FLERR,"Illegal EDIP parameter");
 
     nparams++;
   }
 
   delete [] words;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairEDIP::setup()
 {
   int i,j,k,m,n;
   double rtmp;
 
   // set elem2param for all triplet combinations
   // must be a single exact match to lines read from file
   // do not allow for ACB in place of ABC
 
   memory->destroy(elem2param);
   memory->create(elem2param,nelements,nelements,nelements,"pair:elem2param");
 
   for (i = 0; i < nelements; i++)
     for (j = 0; j < nelements; j++)
       for (k = 0; k < nelements; k++) {
         n = -1;
         for (m = 0; m < nparams; m++) {
           if (i == params[m].ielement && j == params[m].jelement &&
               k == params[m].kelement) {
             if (n >= 0) error->all(FLERR,"Potential file has duplicate entry");
             n = m;
           }
         }
         if (n < 0) error->all(FLERR,"Potential file is missing an entry");
         elem2param[i][j][k] = n;
       }
 
   // set cutoff square
 
   for (m = 0; m < nparams; m++) {
     params[m].cutsq = params[m].cutoffA*params[m].cutoffA;
   }
 
   // set cutmax to max of all params
 
   cutmax = 0.0;
   for (m = 0; m < nparams; m++) {
     rtmp = sqrt(params[m].cutsq);
     if (rtmp > cutmax) cutmax = rtmp;
   }
 
   // this should be removed for multi species parametrizations
 
   A = params[0].A;
   B = params[0].B;
   rho = params[0].rho;
   cutoffA = params[0].cutoffA;
   cutoffC = params[0].cutoffC;
   sigma = params[0].sigma;
   lambda = params[0].lambda;
   gamm = params[0].gamm;
   eta = params[0].eta;
   Q0 = params[0].Q0;
   mu = params[0].mu;
   beta = params[0].beta;
   alpha = params[0].alpha;
   u1 = params[0].u1;
   u2 = params[0].u2;
   u3 = params[0].u3;
   u4 = params[0].u4;
 }
diff --git a/src/USER-MISC/pair_lj_sf_dipole_sf.cpp b/src/USER-MISC/pair_lj_sf_dipole_sf.cpp
index f81a0062a..58d7f4d61 100755
--- a/src/USER-MISC/pair_lj_sf_dipole_sf.cpp
+++ b/src/USER-MISC/pair_lj_sf_dipole_sf.cpp
@@ -1,510 +1,510 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Mario Orsi (U Southampton), orsimario@gmail.com
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdlib.h"
 #include "pair_lj_sf_dipole_sf.h"
 #include "atom.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "comm.h"
 #include "force.h"
 #include "memory.h"
 #include "error.h"
 #include "update.h"
 #include "string.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
 PairLJSFDipoleSF::PairLJSFDipoleSF(LAMMPS *lmp) : Pair(lmp)
 {
   single_enable = 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 PairLJSFDipoleSF::~PairLJSFDipoleSF()
 {
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
 
     memory->destroy(cut_lj);
     memory->destroy(cut_ljsq);
     memory->destroy(cut_coul);
     memory->destroy(cut_coulsq);
     memory->destroy(epsilon);
     memory->destroy(sigma);
     memory->destroy(lj1);
     memory->destroy(lj2);
     memory->destroy(lj3);
     memory->destroy(lj4);
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJSFDipoleSF::compute(int eflag, int vflag)
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fx,fy,fz;
   double rsq,rinv,r2inv,r6inv,r3inv,r5inv;
   double forcecoulx,forcecouly,forcecoulz,crossx,crossy,crossz;
   double tixcoul,tiycoul,tizcoul,tjxcoul,tjycoul,tjzcoul;
   double fq,pdotp,pidotr,pjdotr,pre1,pre2,pre3,pre4;
   double forcelj,factor_coul,factor_lj;
   double presf,afac,bfac,pqfac,qpfac,forceljcut,forceljsf;
   double aforcecoulx,aforcecouly,aforcecoulz;
   double bforcecoulx,bforcecouly,bforcecoulz;
   double rcutlj2inv, rcutcoul2inv,rcutlj6inv;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   evdwl = ecoul = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   double **mu = atom->mu;
   double **torque = atom->torque;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r2inv = 1.0/rsq;
         rinv = sqrt(r2inv);
 
         // atom can have both a charge and dipole
         // i,j = charge-charge, dipole-dipole, dipole-charge, or charge-dipole
 
         forcecoulx = forcecouly = forcecoulz = 0.0;
         tixcoul = tiycoul = tizcoul = 0.0;
         tjxcoul = tjycoul = tjzcoul = 0.0;
 
         if (rsq < cut_coulsq[itype][jtype]) {
 
           if (qtmp != 0.0 && q[j] != 0.0) {
             pre1 = qtmp*q[j]*rinv*(r2inv-1.0/cut_coulsq[itype][jtype]);
 
             forcecoulx += pre1*delx;
             forcecouly += pre1*dely;
             forcecoulz += pre1*delz;
           }
 
           if (mu[i][3] > 0.0 && mu[j][3] > 0.0) {
             r3inv = r2inv*rinv;
             r5inv = r3inv*r2inv;
             rcutcoul2inv=1.0/cut_coulsq[itype][jtype];
 
             pdotp = mu[i][0]*mu[j][0] + mu[i][1]*mu[j][1] + mu[i][2]*mu[j][2];
             pidotr = mu[i][0]*delx + mu[i][1]*dely + mu[i][2]*delz;
             pjdotr = mu[j][0]*delx + mu[j][1]*dely + mu[j][2]*delz;
 
             afac = 1.0 - rsq*rsq * rcutcoul2inv*rcutcoul2inv;
             pre1 = afac * ( pdotp - 3.0 * r2inv * pidotr * pjdotr );
             aforcecoulx = pre1*delx;
             aforcecouly = pre1*dely;
             aforcecoulz = pre1*delz;
 
             bfac = 1.0 - 4.0*rsq*sqrt(rsq)*rcutcoul2inv*sqrt(rcutcoul2inv) +
               3.0*rsq*rsq*rcutcoul2inv*rcutcoul2inv;
             presf = 2.0 * r2inv * pidotr * pjdotr;
             bforcecoulx = bfac * (pjdotr*mu[i][0]+pidotr*mu[j][0]-presf*delx);
             bforcecouly = bfac * (pjdotr*mu[i][1]+pidotr*mu[j][1]-presf*dely);
             bforcecoulz = bfac * (pjdotr*mu[i][2]+pidotr*mu[j][2]-presf*delz);
 
             forcecoulx += 3.0 * r5inv * ( aforcecoulx + bforcecoulx );
             forcecouly += 3.0 * r5inv * ( aforcecouly + bforcecouly );
             forcecoulz += 3.0 * r5inv * ( aforcecoulz + bforcecoulz );
 
             pre2 = 3.0 * bfac * r5inv * pjdotr;
             pre3 = 3.0 * bfac * r5inv * pidotr;
             pre4 = -bfac * r3inv;
 
             crossx = pre4 * (mu[i][1]*mu[j][2] - mu[i][2]*mu[j][1]);
             crossy = pre4 * (mu[i][2]*mu[j][0] - mu[i][0]*mu[j][2]);
             crossz = pre4 * (mu[i][0]*mu[j][1] - mu[i][1]*mu[j][0]);
 
             tixcoul += crossx + pre2 * (mu[i][1]*delz - mu[i][2]*dely);
             tiycoul += crossy + pre2 * (mu[i][2]*delx - mu[i][0]*delz);
             tizcoul += crossz + pre2 * (mu[i][0]*dely - mu[i][1]*delx);
             tjxcoul += -crossx + pre3 * (mu[j][1]*delz - mu[j][2]*dely);
             tjycoul += -crossy + pre3 * (mu[j][2]*delx - mu[j][0]*delz);
             tjzcoul += -crossz + pre3 * (mu[j][0]*dely - mu[j][1]*delx);
           }
 
           if (mu[i][3] > 0.0 && q[j] != 0.0) {
             r3inv = r2inv*rinv;
             r5inv = r3inv*r2inv;
             pidotr = mu[i][0]*delx + mu[i][1]*dely + mu[i][2]*delz;
             rcutcoul2inv=1.0/cut_coulsq[itype][jtype];
             pre1 = 3.0 * q[j] * r5inv * pidotr * (1-rsq*rcutcoul2inv);
             pqfac = 1.0 - 3.0*rsq*rcutcoul2inv +
               2.0*rsq*sqrt(rsq)*rcutcoul2inv*sqrt(rcutcoul2inv);
             pre2 = q[j] * r3inv * pqfac;
 
             forcecoulx += pre2*mu[i][0] - pre1*delx;
             forcecouly += pre2*mu[i][1] - pre1*dely;
             forcecoulz += pre2*mu[i][2] - pre1*delz;
             tixcoul += pre2 * (mu[i][1]*delz - mu[i][2]*dely);
             tiycoul += pre2 * (mu[i][2]*delx - mu[i][0]*delz);
             tizcoul += pre2 * (mu[i][0]*dely - mu[i][1]*delx);
           }
 
           if (mu[j][3] > 0.0 && qtmp != 0.0) {
             r3inv = r2inv*rinv;
             r5inv = r3inv*r2inv;
             pjdotr = mu[j][0]*delx + mu[j][1]*dely + mu[j][2]*delz;
             rcutcoul2inv=1.0/cut_coulsq[itype][jtype];
             pre1 = 3.0 * qtmp * r5inv * pjdotr * (1-rsq*rcutcoul2inv);
             qpfac = 1.0 - 3.0*rsq*rcutcoul2inv +
               2.0*rsq*sqrt(rsq)*rcutcoul2inv*sqrt(rcutcoul2inv);
             pre2 = qtmp * r3inv * qpfac;
 
             forcecoulx += pre1*delx - pre2*mu[j][0];
             forcecouly += pre1*dely - pre2*mu[j][1];
             forcecoulz += pre1*delz - pre2*mu[j][2];
             tjxcoul += -pre2 * (mu[j][1]*delz - mu[j][2]*dely);
             tjycoul += -pre2 * (mu[j][2]*delx - mu[j][0]*delz);
             tjzcoul += -pre2 * (mu[j][0]*dely - mu[j][1]*delx);
           }
         }
 
         // LJ interaction
 
         if (rsq < cut_ljsq[itype][jtype]) {
           r6inv = r2inv*r2inv*r2inv;
           forceljcut = r6inv*(lj1[itype][jtype]*r6inv-lj2[itype][jtype])*r2inv;
 
           rcutlj2inv = 1.0 / cut_ljsq[itype][jtype];
           rcutlj6inv = rcutlj2inv * rcutlj2inv * rcutlj2inv;
           forceljsf = (lj1[itype][jtype]*rcutlj6inv - lj2[itype][jtype]) *
             rcutlj6inv * rcutlj2inv;
 
           forcelj = factor_lj * (forceljcut - forceljsf);
         } else forcelj = 0.0;
 
         // total force
 
         fq = factor_coul*qqrd2e;
         fx = fq*forcecoulx + delx*forcelj;
         fy = fq*forcecouly + dely*forcelj;
         fz = fq*forcecoulz + delz*forcelj;
 
         // force & torque accumulation
 
         f[i][0] += fx;
         f[i][1] += fy;
         f[i][2] += fz;
         torque[i][0] += fq*tixcoul;
         torque[i][1] += fq*tiycoul;
         torque[i][2] += fq*tizcoul;
 
         if (newton_pair || j < nlocal) {
           f[j][0] -= fx;
           f[j][1] -= fy;
           f[j][2] -= fz;
           torque[j][0] += fq*tjxcoul;
           torque[j][1] += fq*tjycoul;
           torque[j][2] += fq*tjzcoul;
         }
 
         if (eflag) {
           if (rsq < cut_coulsq[itype][jtype]) {
             ecoul = (1.0-sqrt(rsq)/sqrt(cut_coulsq[itype][jtype]));
             ecoul *= ecoul;
             ecoul *= qtmp * q[j] * rinv;
             if (mu[i][3] > 0.0 && mu[j][3] > 0.0)
               ecoul += bfac * (r3inv*pdotp - 3.0*r5inv*pidotr*pjdotr);
             if (mu[i][3] > 0.0 && q[j] != 0.0)
               ecoul += -q[j] * r3inv * pqfac * pidotr;
             if (mu[j][3] > 0.0 && qtmp != 0.0)
               ecoul += qtmp * r3inv * qpfac * pjdotr;
             ecoul *= factor_coul*qqrd2e;
           } else ecoul = 0.0;
 
           if (rsq < cut_ljsq[itype][jtype]) {
             evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype])+
               rcutlj6inv*(6*lj3[itype][jtype]*rcutlj6inv-3*lj4[itype][jtype])*
               rsq*rcutlj2inv+
               rcutlj6inv*(-7*lj3[itype][jtype]*rcutlj6inv+4*lj4[itype][jtype]);
             evdwl *= factor_lj;
           } else evdwl = 0.0;
         }
 
         if (evflag) ev_tally_xyz(i,j,nlocal,newton_pair,
                                  evdwl,ecoul,fx,fy,fz,delx,dely,delz);
       }
     }
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairLJSFDipoleSF::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
   memory->create(cut_lj,n+1,n+1,"pair:cut_lj");
   memory->create(cut_ljsq,n+1,n+1,"pair:cut_ljsq");
   memory->create(cut_coul,n+1,n+1,"pair:cut_coul");
   memory->create(cut_coulsq,n+1,n+1,"pair:cut_coulsq");
   memory->create(epsilon,n+1,n+1,"pair:epsilon");
   memory->create(sigma,n+1,n+1,"pair:sigma");
   memory->create(lj1,n+1,n+1,"pair:lj1");
   memory->create(lj2,n+1,n+1,"pair:lj2");
   memory->create(lj3,n+1,n+1,"pair:lj3");
   memory->create(lj4,n+1,n+1,"pair:lj4");
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairLJSFDipoleSF::settings(int narg, char **arg)
 {
   if (narg < 1 || narg > 2)
     error->all(FLERR,"Incorrect args in pair_style command");
 
   if (strcmp(update->unit_style,"electron") == 0)
     error->all(FLERR,"Cannot (yet) use 'electron' units with dipoles");
 
   cut_lj_global = force->numeric(FLERR,arg[0]);
   if (narg == 1) cut_coul_global = cut_lj_global;
   else cut_coul_global = force->numeric(FLERR,arg[1]);
 
   // reset cutoffs that have been explicitly set
 
   if (allocated) {
     int i,j;
     for (i = 1; i <= atom->ntypes; i++)
       for (j = i+1; j <= atom->ntypes; j++)
         if (setflag[i][j]) {
           cut_lj[i][j] = cut_lj_global;
           cut_coul[i][j] = cut_coul_global;
         }
   }
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairLJSFDipoleSF::coeff(int narg, char **arg)
 {
   if (narg < 4 || narg > 6)
     error->all(FLERR,"Incorrect args for pair coefficients");
   if (!allocated) allocate();
 
   int ilo,ihi,jlo,jhi;
   force->bounds(arg[0],atom->ntypes,ilo,ihi);
   force->bounds(arg[1],atom->ntypes,jlo,jhi);
 
   double epsilon_one = force->numeric(FLERR,arg[2]);
   double sigma_one = force->numeric(FLERR,arg[3]);
 
   double cut_lj_one = cut_lj_global;
   double cut_coul_one = cut_coul_global;
   if (narg >= 5) cut_coul_one = cut_lj_one = force->numeric(FLERR,arg[4]);
   if (narg == 6) cut_coul_one = force->numeric(FLERR,arg[5]);
 
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
     for (int j = MAX(jlo,i); j <= jhi; j++) {
       epsilon[i][j] = epsilon_one;
       sigma[i][j] = sigma_one;
       cut_lj[i][j] = cut_lj_one;
       cut_coul[i][j] = cut_coul_one;
       setflag[i][j] = 1;
       count++;
     }
   }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairLJSFDipoleSF::init_style()
 {
   if (!atom->q_flag || !atom->mu_flag || !atom->torque_flag)
     error->all(FLERR,"Pair dipole/sf requires atom attributes q, mu, torque");
 
-  neighbor->request(this);
+  neighbor->request(this,instance_me);
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairLJSFDipoleSF::init_one(int i, int j)
 {
   if (setflag[i][j] == 0) {
     epsilon[i][j] = mix_energy(epsilon[i][i],epsilon[j][j],
                                sigma[i][i],sigma[j][j]);
     sigma[i][j] = mix_distance(sigma[i][i],sigma[j][j]);
     cut_lj[i][j] = mix_distance(cut_lj[i][i],cut_lj[j][j]);
     cut_coul[i][j] = mix_distance(cut_coul[i][i],cut_coul[j][j]);
   }
 
   double cut = MAX(cut_lj[i][j],cut_coul[i][j]);
   cut_ljsq[i][j] = cut_lj[i][j] * cut_lj[i][j];
   cut_coulsq[i][j] = cut_coul[i][j] * cut_coul[i][j];
 
   lj1[i][j] = 48.0 * epsilon[i][j] * pow(sigma[i][j],12.0);
   lj2[i][j] = 24.0 * epsilon[i][j] * pow(sigma[i][j],6.0);
   lj3[i][j] = 4.0 * epsilon[i][j] * pow(sigma[i][j],12.0);
   lj4[i][j] = 4.0 * epsilon[i][j] * pow(sigma[i][j],6.0);
 
   cut_ljsq[j][i] = cut_ljsq[i][j];
   cut_coulsq[j][i] = cut_coulsq[i][j];
   lj1[j][i] = lj1[i][j];
   lj2[j][i] = lj2[i][j];
   lj3[j][i] = lj3[i][j];
   lj4[j][i] = lj4[i][j];
 
   return cut;
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairLJSFDipoleSF::write_restart(FILE *fp)
 {
   write_restart_settings(fp);
 
   int i,j;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       fwrite(&setflag[i][j],sizeof(int),1,fp);
       if (setflag[i][j]) {
         fwrite(&epsilon[i][j],sizeof(double),1,fp);
         fwrite(&sigma[i][j],sizeof(double),1,fp);
         fwrite(&cut_lj[i][j],sizeof(double),1,fp);
         fwrite(&cut_coul[i][j],sizeof(double),1,fp);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
    proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairLJSFDipoleSF::read_restart(FILE *fp)
 {
   read_restart_settings(fp);
 
   allocate();
 
   int i,j;
   int me = comm->me;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
       MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
       if (setflag[i][j]) {
         if (me == 0) {
           fread(&epsilon[i][j],sizeof(double),1,fp);
           fread(&sigma[i][j],sizeof(double),1,fp);
           fread(&cut_lj[i][j],sizeof(double),1,fp);
           fread(&cut_coul[i][j],sizeof(double),1,fp);
         }
         MPI_Bcast(&epsilon[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&sigma[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&cut_lj[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&cut_coul[i][j],1,MPI_DOUBLE,0,world);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairLJSFDipoleSF::write_restart_settings(FILE *fp)
 {
   fwrite(&cut_lj_global,sizeof(double),1,fp);
   fwrite(&cut_coul_global,sizeof(double),1,fp);
   fwrite(&mix_flag,sizeof(int),1,fp);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairLJSFDipoleSF::read_restart_settings(FILE *fp)
 {
   if (comm->me == 0) {
     fread(&cut_lj_global,sizeof(double),1,fp);
     fread(&cut_coul_global,sizeof(double),1,fp);
     fread(&mix_flag,sizeof(int),1,fp);
   }
   MPI_Bcast(&cut_lj_global,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&cut_coul_global,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
 }
diff --git a/src/USER-MISC/pair_meam_spline.cpp b/src/USER-MISC/pair_meam_spline.cpp
index be0a8fdff..26cd70509 100644
--- a/src/USER-MISC/pair_meam_spline.cpp
+++ b/src/USER-MISC/pair_meam_spline.cpp
@@ -1,739 +1,739 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Alexander Stukowski (LLNL), alex@stukowski.com
    see LLNL copyright notice at bottom of file
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
  * File history of changes:
  * 25-Oct-10 - AS: First code version.
  * 17-Feb-11 - AS: Several optimizations (introduced MEAM2Body struct).
  * 25-Mar-11 - AS: Fixed calculation of per-atom virial stress.
  * 11-Apr-11 - AS: Adapted code to new memory management of LAMMPS.
  * 24-Sep-11 - AS: Adapted code to new interface of Error::one() function.
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_meam_spline.h"
 #include "atom.h"
 #include "force.h"
 #include "comm.h"
 #include "memory.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
 PairMEAMSpline::PairMEAMSpline(LAMMPS *lmp) : Pair(lmp)
 {
   single_enable = 0;
   restartinfo = 0;
   one_coeff = 1;
   manybody_flag = 1;
 
   nelements = 0;
   elements = NULL;
 
   Uprime_values = NULL;
   nmax = 0;
   maxNeighbors = 0;
   twoBodyInfo = NULL;
 
   comm_forward = 1;
   comm_reverse = 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 PairMEAMSpline::~PairMEAMSpline()
 {
   if (elements)
     for (int i = 0; i < nelements; i++) delete [] elements[i];
   delete [] elements;
 
   delete[] twoBodyInfo;
   memory->destroy(Uprime_values);
 
   if(allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
     delete [] map;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairMEAMSpline::compute(int eflag, int vflag)
 {
   if (eflag || vflag) ev_setup(eflag, vflag);
   else evflag = vflag_fdotr =
          eflag_global = vflag_global = eflag_atom = vflag_atom = 0;
 
   double cutforcesq = cutoff*cutoff;
 
   // Grow per-atom array if necessary
 
   if (atom->nmax > nmax) {
     memory->destroy(Uprime_values);
     nmax = atom->nmax;
     memory->create(Uprime_values,nmax,"pair:Uprime");
   }
 
   double** const x = atom->x;
   double** forces = atom->f;
   int nlocal = atom->nlocal;
   bool newton_pair = force->newton_pair;
 
   int inum_full = listfull->inum;
   int* ilist_full = listfull->ilist;
   int* numneigh_full = listfull->numneigh;
   int** firstneigh_full = listfull->firstneigh;
 
   // Determine the maximum number of neighbors a single atom has
 
   int newMaxNeighbors = 0;
   for(int ii = 0; ii < inum_full; ii++) {
     int jnum = numneigh_full[ilist_full[ii]];
     if(jnum > newMaxNeighbors) newMaxNeighbors = jnum;
   }
 
   // Allocate array for temporary bond info
 
   if(newMaxNeighbors > maxNeighbors) {
     maxNeighbors = newMaxNeighbors;
     delete[] twoBodyInfo;
     twoBodyInfo = new MEAM2Body[maxNeighbors];
   }
 
   // Sum three-body contributions to charge density and
   // compute embedding energies
 
   for(int ii = 0; ii < inum_full; ii++) {
     int i = ilist_full[ii];
     double xtmp = x[i][0];
     double ytmp = x[i][1];
     double ztmp = x[i][2];
     int* jlist = firstneigh_full[i];
     int jnum = numneigh_full[i];
     double rho_value = 0;
     int numBonds = 0;
     MEAM2Body* nextTwoBodyInfo = twoBodyInfo;
 
     for(int jj = 0; jj < jnum; jj++) {
       int j = jlist[jj];
       j &= NEIGHMASK;
 
       double jdelx = x[j][0] - xtmp;
       double jdely = x[j][1] - ytmp;
       double jdelz = x[j][2] - ztmp;
       double rij_sq = jdelx*jdelx + jdely*jdely + jdelz*jdelz;
 
       if(rij_sq < cutforcesq) {
         double rij = sqrt(rij_sq);
         double partial_sum = 0;
 
         nextTwoBodyInfo->tag = j;
         nextTwoBodyInfo->r = rij;
         nextTwoBodyInfo->f = f.eval(rij, nextTwoBodyInfo->fprime);
         nextTwoBodyInfo->del[0] = jdelx / rij;
         nextTwoBodyInfo->del[1] = jdely / rij;
         nextTwoBodyInfo->del[2] = jdelz / rij;
 
         for(int kk = 0; kk < numBonds; kk++) {
           const MEAM2Body& bondk = twoBodyInfo[kk];
           double cos_theta = (nextTwoBodyInfo->del[0]*bondk.del[0] +
                               nextTwoBodyInfo->del[1]*bondk.del[1] +
                               nextTwoBodyInfo->del[2]*bondk.del[2]);
           partial_sum += bondk.f * g.eval(cos_theta);
         }
 
         rho_value += nextTwoBodyInfo->f * partial_sum;
         rho_value += rho.eval(rij);
 
         numBonds++;
         nextTwoBodyInfo++;
       }
     }
 
     // Compute embedding energy and its derivative
 
     double Uprime_i;
     double embeddingEnergy = U.eval(rho_value, Uprime_i) - zero_atom_energy;
     Uprime_values[i] = Uprime_i;
     if(eflag) {
       if(eflag_global) eng_vdwl += embeddingEnergy;
       if(eflag_atom) eatom[i] += embeddingEnergy;
     }
 
     double forces_i[3] = {0, 0, 0};
 
     // Compute three-body contributions to force
 
     for(int jj = 0; jj < numBonds; jj++) {
       const MEAM2Body bondj = twoBodyInfo[jj];
       double rij = bondj.r;
       int j = bondj.tag;
 
       double f_rij_prime = bondj.fprime;
       double f_rij = bondj.f;
 
       double forces_j[3] = {0, 0, 0};
 
       MEAM2Body const* bondk = twoBodyInfo;
       for(int kk = 0; kk < jj; kk++, ++bondk) {
         double rik = bondk->r;
 
         double cos_theta = (bondj.del[0]*bondk->del[0] +
                             bondj.del[1]*bondk->del[1] +
                             bondj.del[2]*bondk->del[2]);
         double g_prime;
         double g_value = g.eval(cos_theta, g_prime);
         double f_rik_prime = bondk->fprime;
         double f_rik = bondk->f;
 
         double fij = -Uprime_i * g_value * f_rik * f_rij_prime;
         double fik = -Uprime_i * g_value * f_rij * f_rik_prime;
 
         double prefactor = Uprime_i * f_rij * f_rik * g_prime;
         double prefactor_ij = prefactor / rij;
         double prefactor_ik = prefactor / rik;
         fij += prefactor_ij * cos_theta;
         fik += prefactor_ik * cos_theta;
 
         double fj[3], fk[3];
 
         fj[0] = bondj.del[0] * fij - bondk->del[0] * prefactor_ij;
         fj[1] = bondj.del[1] * fij - bondk->del[1] * prefactor_ij;
         fj[2] = bondj.del[2] * fij - bondk->del[2] * prefactor_ij;
         forces_j[0] += fj[0];
         forces_j[1] += fj[1];
         forces_j[2] += fj[2];
 
         fk[0] = bondk->del[0] * fik - bondj.del[0] * prefactor_ik;
         fk[1] = bondk->del[1] * fik - bondj.del[1] * prefactor_ik;
         fk[2] = bondk->del[2] * fik - bondj.del[2] * prefactor_ik;
         forces_i[0] -= fk[0];
         forces_i[1] -= fk[1];
         forces_i[2] -= fk[2];
 
         int k = bondk->tag;
         forces[k][0] += fk[0];
         forces[k][1] += fk[1];
         forces[k][2] += fk[2];
 
         if(evflag) {
           double delta_ij[3];
           double delta_ik[3];
           delta_ij[0] = bondj.del[0] * rij;
           delta_ij[1] = bondj.del[1] * rij;
           delta_ij[2] = bondj.del[2] * rij;
           delta_ik[0] = bondk->del[0] * rik;
           delta_ik[1] = bondk->del[1] * rik;
           delta_ik[2] = bondk->del[2] * rik;
           ev_tally3(i, j, k, 0.0, 0.0, fj, fk, delta_ij, delta_ik);
         }
       }
 
       forces[i][0] -= forces_j[0];
       forces[i][1] -= forces_j[1];
       forces[i][2] -= forces_j[2];
       forces[j][0] += forces_j[0];
       forces[j][1] += forces_j[1];
       forces[j][2] += forces_j[2];
     }
 
     forces[i][0] += forces_i[0];
     forces[i][1] += forces_i[1];
     forces[i][2] += forces_i[2];
   }
 
   // Communicate U'(rho) values
 
   comm->forward_comm_pair(this);
 
   int inum_half = listhalf->inum;
   int* ilist_half = listhalf->ilist;
   int* numneigh_half = listhalf->numneigh;
   int** firstneigh_half = listhalf->firstneigh;
 
   // Compute two-body pair interactions
 
   for(int ii = 0; ii < inum_half; ii++) {
     int i = ilist_half[ii];
     double xtmp = x[i][0];
     double ytmp = x[i][1];
     double ztmp = x[i][2];
     int* jlist = firstneigh_half[i];
     int jnum = numneigh_half[i];
 
     for(int jj = 0; jj < jnum; jj++) {
       int j = jlist[jj];
       j &= NEIGHMASK;
 
       double jdel[3];
       jdel[0] = x[j][0] - xtmp;
       jdel[1] = x[j][1] - ytmp;
       jdel[2] = x[j][2] - ztmp;
       double rij_sq = jdel[0]*jdel[0] + jdel[1]*jdel[1] + jdel[2]*jdel[2];
 
       if(rij_sq < cutforcesq) {
         double rij = sqrt(rij_sq);
 
         double rho_prime;
         rho.eval(rij, rho_prime);
         double fpair = rho_prime * (Uprime_values[i] + Uprime_values[j]);
 
         double pair_pot_deriv;
         double pair_pot = phi.eval(rij, pair_pot_deriv);
         fpair += pair_pot_deriv;
 
         // Divide by r_ij to get forces from gradient
 
         fpair /= rij;
 
         forces[i][0] += jdel[0]*fpair;
         forces[i][1] += jdel[1]*fpair;
         forces[i][2] += jdel[2]*fpair;
         forces[j][0] -= jdel[0]*fpair;
         forces[j][1] -= jdel[1]*fpair;
         forces[j][2] -= jdel[2]*fpair;
         if (evflag) ev_tally(i, j, nlocal, newton_pair,
                              pair_pot, 0.0, -fpair, jdel[0], jdel[1], jdel[2]);
       }
     }
   }
 
   if(vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairMEAMSpline::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
   map = new int[n+1];
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairMEAMSpline::settings(int narg, char **arg)
 {
   if(narg != 0) error->all(FLERR,"Illegal pair_style command");
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairMEAMSpline::coeff(int narg, char **arg)
 {
   int i,j,n;
 
   if (!allocated) allocate();
 
   if (narg != 3 + atom->ntypes)
     error->all(FLERR,"Incorrect args for pair coefficients");
 
   // insure I,J args are * *
 
   if (strcmp(arg[0],"*") != 0 || strcmp(arg[1],"*") != 0)
     error->all(FLERR,"Incorrect args for pair coefficients");
 
   // read args that map atom types to elements in potential file
   // map[i] = which element the Ith atom type is, -1 if NULL
   // nelements = # of unique elements
   // elements = list of element names
 
   if (elements) {
     for (i = 0; i < nelements; i++) delete [] elements[i];
     delete [] elements;
   }
   elements = new char*[atom->ntypes];
   for (i = 0; i < atom->ntypes; i++) elements[i] = NULL;
 
   nelements = 0;
   for (i = 3; i < narg; i++) {
     if (strcmp(arg[i],"NULL") == 0) {
       map[i-2] = -1;
       continue;
     }
     for (j = 0; j < nelements; j++)
       if (strcmp(arg[i],elements[j]) == 0) break;
     map[i-2] = j;
     if (j == nelements) {
       n = strlen(arg[i]) + 1;
       elements[j] = new char[n];
       strcpy(elements[j],arg[i]);
       nelements++;
     }
   }
 
   // for now, only allow single element
 
   if (nelements > 1)
     error->all(FLERR,
                "Pair meam/spline only supports single element potentials");
 
   // read potential file
 
   read_file(arg[2]);
 
   // clear setflag since coeff() called once with I,J = * *
 
   n = atom->ntypes;
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   // set setflag i,j for type pairs where both are mapped to elements
 
   int count = 0;
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       if (map[i] >= 0 && map[j] >= 0) {
         setflag[i][j] = 1;
         count++;
       }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 #define MAXLINE 1024
 
 void PairMEAMSpline::read_file(const char* filename)
 {
         if(comm->me == 0) {
                 FILE *fp = force->open_potential(filename);
                 if(fp == NULL) {
                         char str[1024];
                         sprintf(str,"Cannot open spline MEAM potential file %s", filename);
                         error->one(FLERR,str);
                 }
 
                 // Skip first line of file.
                 char line[MAXLINE];
                 fgets(line, MAXLINE, fp);
 
                 // Parse spline functions.
                 phi.parse(fp, error);
                 rho.parse(fp, error);
                 U.parse(fp, error);
                 f.parse(fp, error);
                 g.parse(fp, error);
 
                 fclose(fp);
         }
 
         // Transfer spline functions from master processor to all other processors.
         phi.communicate(world, comm->me);
         rho.communicate(world, comm->me);
         f.communicate(world, comm->me);
         U.communicate(world, comm->me);
         g.communicate(world, comm->me);
 
         // Calculate 'zero-point energy' of single atom in vacuum.
         zero_atom_energy = U.eval(0.0);
 
         // Determine maximum cutoff radius of all relevant spline functions.
         cutoff = 0.0;
         if(phi.cutoff() > cutoff) cutoff = phi.cutoff();
         if(rho.cutoff() > cutoff) cutoff = rho.cutoff();
         if(f.cutoff() > cutoff) cutoff = f.cutoff();
 
         // Set LAMMPS pair interaction flags.
         for(int i = 1; i <= atom->ntypes; i++) {
                 for(int j = 1; j <= atom->ntypes; j++) {
                         setflag[i][j] = 1;
                         cutsq[i][j] = cutoff;
                 }
         }
 
         //phi.writeGnuplot("phi.gp", "Phi(r)");
         //rho.writeGnuplot("rho.gp", "Rho(r)");
         //f.writeGnuplot("f.gp", "f(r)");
         //U.writeGnuplot("U.gp", "U(rho)");
         //g.writeGnuplot("g.gp", "g(x)");
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 void PairMEAMSpline::init_style()
 {
         if(force->newton_pair == 0)
                 error->all(FLERR,"Pair style meam/spline requires newton pair on");
 
         // Need both full and half neighbor list.
-        int irequest_full = neighbor->request(this);
+        int irequest_full = neighbor->request(this,instance_me);
         neighbor->requests[irequest_full]->id = 1;
         neighbor->requests[irequest_full]->half = 0;
         neighbor->requests[irequest_full]->full = 1;
-        int irequest_half = neighbor->request(this);
+        int irequest_half = neighbor->request(this,instance_me);
         neighbor->requests[irequest_half]->id = 2;
         neighbor->requests[irequest_half]->half = 0;
         neighbor->requests[irequest_half]->half_from_full = 1;
         neighbor->requests[irequest_half]->otherlist = irequest_full;
 }
 
 /* ----------------------------------------------------------------------
    neighbor callback to inform pair style of neighbor list to use
    half or full
 ------------------------------------------------------------------------- */
 void PairMEAMSpline::init_list(int id, NeighList *ptr)
 {
         if(id == 1) listfull = ptr;
         else if(id == 2) listhalf = ptr;
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 double PairMEAMSpline::init_one(int i, int j)
 {
         return cutoff;
 }
 
 /* ---------------------------------------------------------------------- */
 
 int PairMEAMSpline::pack_forward_comm(int n, int *list, double *buf, 
                                       int pbc_flag, int *pbc)
 {
         int* list_iter = list;
         int* list_iter_end = list + n;
         while(list_iter != list_iter_end)
                 *buf++ = Uprime_values[*list_iter++];
         return n;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairMEAMSpline::unpack_forward_comm(int n, int first, double *buf)
 {
         memcpy(&Uprime_values[first], buf, n * sizeof(buf[0]));
 }
 
 /* ---------------------------------------------------------------------- */
 
 int PairMEAMSpline::pack_reverse_comm(int n, int first, double *buf)
 {
         return 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairMEAMSpline::unpack_reverse_comm(int n, int *list, double *buf)
 {
 }
 
 /* ----------------------------------------------------------------------
    Returns memory usage of local atom-based arrays
 ------------------------------------------------------------------------- */
 double PairMEAMSpline::memory_usage()
 {
         return nmax * sizeof(double);        // The Uprime_values array.
 }
 
 
 /// Parses the spline knots from a text file.
 void PairMEAMSpline::SplineFunction::parse(FILE* fp, Error* error)
 {
         char line[MAXLINE];
 
         // Parse number of spline knots.
         fgets(line, MAXLINE, fp);
         int n = atoi(line);
         if(n < 2)
                 error->one(FLERR,"Invalid number of spline knots in MEAM potential file");
 
         // Parse first derivatives at beginning and end of spline.
         fgets(line, MAXLINE, fp);
         double d0 = atof(strtok(line, " \t\n\r\f"));
         double dN = atof(strtok(NULL, " \t\n\r\f"));
         init(n, d0, dN);
 
         // Skip line.
         fgets(line, MAXLINE, fp);
 
         // Parse knot coordinates.
         for(int i=0; i<n; i++) {
                 fgets(line, MAXLINE, fp);
                 double x, y, y2;
                 if(sscanf(line, "%lg %lg %lg", &x, &y, &y2) != 3) {
                         error->one(FLERR,"Invalid knot line in MEAM potential file");
                 }
                 setKnot(i, x, y);
         }
 
         prepareSpline(error);
 }
 
 /// Calculates the second derivatives at the knots of the cubic spline.
 void PairMEAMSpline::SplineFunction::prepareSpline(Error* error)
 {
         xmin = X[0];
         xmax = X[N-1];
 
         isGridSpline = true;
         h = (xmax-xmin)/(N-1);
         hsq = h*h;
 
         double* u = new double[N];
         Y2[0] = -0.5;
         u[0] = (3.0/(X[1]-X[0])) * ((Y[1]-Y[0])/(X[1]-X[0]) - deriv0);
         for(int i = 1; i <= N-2; i++) {
                 double sig = (X[i]-X[i-1]) / (X[i+1]-X[i-1]);
                 double p = sig * Y2[i-1] + 2.0;
                 Y2[i] = (sig - 1.0) / p;
                 u[i] = (Y[i+1]-Y[i]) / (X[i+1]-X[i]) - (Y[i]-Y[i-1])/(X[i]-X[i-1]);
                 u[i] = (6.0 * u[i]/(X[i+1]-X[i-1]) - sig*u[i-1])/p;
 
                 if(fabs(h*i+xmin - X[i]) > 1e-8)
                         isGridSpline = false;
         }
 
         double qn = 0.5;
         double un = (3.0/(X[N-1]-X[N-2])) * (derivN - (Y[N-1]-Y[N-2])/(X[N-1]-X[N-2]));
         Y2[N-1] = (un - qn*u[N-2]) / (qn * Y2[N-2] + 1.0);
         for(int k = N-2; k >= 0; k--) {
                 Y2[k] = Y2[k] * Y2[k+1] + u[k];
         }
 
         delete[] u;
 
 #if !SPLINE_MEAM_SUPPORT_NON_GRID_SPLINES
         if(!isGridSpline)
                 error->one(FLERR,"Support for MEAM potentials with non-uniform cubic splines has not been enabled in the MEAM potential code. Set SPLINE_MEAM_SUPPORT_NON_GRID_SPLINES in pair_spline_meam.h to 1 to enable it");
 #endif
 
         // Shift the spline to X=0 to speed up interpolation.
         for(int i = 0; i < N; i++) {
                 Xs[i] = X[i] - xmin;
 #if !SPLINE_MEAM_SUPPORT_NON_GRID_SPLINES
                 if(i < N-1) Ydelta[i] = (Y[i+1]-Y[i])/h;
                 Y2[i] /= h*6.0;
 #endif
         }
         xmax_shifted = xmax - xmin;
 }
 
 /// Broadcasts the spline function parameters to all processors.
 void PairMEAMSpline::SplineFunction::communicate(MPI_Comm& world, int me)
 {
         MPI_Bcast(&N, 1, MPI_INT, 0, world);
         MPI_Bcast(&deriv0, 1, MPI_DOUBLE, 0, world);
         MPI_Bcast(&derivN, 1, MPI_DOUBLE, 0, world);
         MPI_Bcast(&xmin, 1, MPI_DOUBLE, 0, world);
         MPI_Bcast(&xmax, 1, MPI_DOUBLE, 0, world);
         MPI_Bcast(&xmax_shifted, 1, MPI_DOUBLE, 0, world);
         MPI_Bcast(&isGridSpline, 1, MPI_INT, 0, world);
         MPI_Bcast(&h, 1, MPI_DOUBLE, 0, world);
         MPI_Bcast(&hsq, 1, MPI_DOUBLE, 0, world);
         if(me != 0) {
                 X = new double[N];
                 Xs = new double[N];
                 Y = new double[N];
                 Y2 = new double[N];
                 Ydelta = new double[N];
         }
         MPI_Bcast(X, N, MPI_DOUBLE, 0, world);
         MPI_Bcast(Xs, N, MPI_DOUBLE, 0, world);
         MPI_Bcast(Y, N, MPI_DOUBLE, 0, world);
         MPI_Bcast(Y2, N, MPI_DOUBLE, 0, world);
         MPI_Bcast(Ydelta, N, MPI_DOUBLE, 0, world);
 }
 
 /// Writes a Gnuplot script that plots the spline function.
 ///
 /// This function is for debugging only!
 void PairMEAMSpline::SplineFunction::writeGnuplot(const char* filename, const char* title) const
 {
         FILE* fp = fopen(filename, "w");
         fprintf(fp, "#!/usr/bin/env gnuplot\n");
         if(title) fprintf(fp, "set title \"%s\"\n", title);
         double tmin = X[0] - (X[N-1] - X[0]) * 0.05;
         double tmax = X[N-1] + (X[N-1] - X[0]) * 0.05;
         double delta = (tmax - tmin) / (N*200);
         fprintf(fp, "set xrange [%f:%f]\n", tmin, tmax);
         fprintf(fp, "plot '-' with lines notitle, '-' with points notitle pt 3 lc 3\n");
         for(double x = tmin; x <= tmax+1e-8; x += delta) {
                 double y = eval(x);
                 fprintf(fp, "%f %f\n", x, y);
         }
         fprintf(fp, "e\n");
         for(int i = 0; i < N; i++) {
                 fprintf(fp, "%f %f\n", X[i], Y[i]);
         }
         fprintf(fp, "e\n");
         fclose(fp);
 }
 
 /* ----------------------------------------------------------------------
  * Spline-based Modified Embedded Atom method (MEAM) potential routine.
  *
  * Copyright (2011) Lawrence Livermore National Security, LLC.
  * Produced at the Lawrence Livermore National Laboratory.
  * Written by Alexander Stukowski (<alex@stukowski.com>).
  * LLNL-CODE-525797 All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it under
  * the terms of the GNU General Public License (as published by the Free
  * Software Foundation) version 2, dated June 1991.
  *
  * This program is distributed in the hope that it will be useful, but
  * WITHOUT ANY WARRANTY; without even the IMPLIED WARRANTY OF MERCHANTABILITY
  * or FITNESS FOR A PARTICULAR PURPOSE. See the terms and conditions of the
  * GNU General Public License for more details.
  *
  * Our Preamble Notice
  * A. This notice is required to be provided under our contract with the
  * U.S. Department of Energy (DOE). This work was produced at the
  * Lawrence Livermore National Laboratory under Contract No.
  * DE-AC52-07NA27344 with the DOE.
  *
  * B. Neither the United States Government nor Lawrence Livermore National
  * Security, LLC nor any of their employees, makes any warranty, express or
  * implied, or assumes any liability or responsibility for the accuracy,
  * completeness, or usefulness of any information, apparatus, product, or
  * process disclosed, or represents that its use would not infringe
  * privately-owned rights.
  *
  * C. Also, reference herein to any specific commercial products, process,
  * or services by trade name, trademark, manufacturer or otherwise does not
  * necessarily constitute or imply its endorsement, recommendation, or
  * favoring by the United States Government or Lawrence Livermore National
  * Security, LLC. The views and opinions of authors expressed herein do not
  * necessarily state or reflect those of the United States Government or
  * Lawrence Livermore National Security, LLC, and shall not be used for
  * advertising or product endorsement purposes.
 ------------------------------------------------------------------------- */
diff --git a/src/USER-MISC/pair_meam_sw_spline.cpp b/src/USER-MISC/pair_meam_sw_spline.cpp
index 9c275b9d4..30613afab 100644
--- a/src/USER-MISC/pair_meam_sw_spline.cpp
+++ b/src/USER-MISC/pair_meam_sw_spline.cpp
@@ -1,1000 +1,1000 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Robert Rudd (LLNL), robert.rudd@llnl.gov
    Based on the spline-based MEAM routine written by
      Alexander Stukowski (LLNL), alex@stukowski.com
    see LLNL copyright notice at bottom of file
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
  * File history of changes:
  * 01-Aug-12 - RER: First code version.
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_meam_sw_spline.h"
 #include "atom.h"
 #include "force.h"
 #include "comm.h"
 #include "memory.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
 PairMEAMSWSpline::PairMEAMSWSpline(LAMMPS *lmp) : Pair(lmp)
 {
   single_enable = 0;
   restartinfo = 0;
   one_coeff = 1;
   manybody_flag = 1;
 
   nelements = 0;
   elements = NULL;
 
   Uprime_values = NULL;
   //ESWprime_values = NULL;
   nmax = 0;
   maxNeighbors = 0;
   twoBodyInfo = NULL;
 
   comm_forward = 1;
   comm_reverse = 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 PairMEAMSWSpline::~PairMEAMSWSpline()
 {
   if (elements)
     for (int i = 0; i < nelements; i++) delete [] elements[i];
   delete [] elements;
 
   delete[] twoBodyInfo;
   memory->destroy(Uprime_values);
   //memory->destroy(ESWprime_values);
 
   if(allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
     delete [] map;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairMEAMSWSpline::compute(int eflag, int vflag)
 {
   if (eflag || vflag) ev_setup(eflag, vflag);
   else evflag = vflag_fdotr =
          eflag_global = vflag_global = eflag_atom = vflag_atom = 0;
 
   double cutforcesq = cutoff*cutoff;
 
   // Grow per-atom array if necessary
 
   if (atom->nmax > nmax) {
     memory->destroy(Uprime_values);
     //memory->destroy(ESWprime_values);
     nmax = atom->nmax;
     memory->create(Uprime_values,nmax,"pair:Uprime");
     //memory->create(ESWprime_values,nmax,"pair:ESWprime");
   }
 
   double** const x = atom->x;
   double** forces = atom->f;
   int nlocal = atom->nlocal;
   bool newton_pair = force->newton_pair;
 
   int inum_full = listfull->inum;
   int* ilist_full = listfull->ilist;
   int* numneigh_full = listfull->numneigh;
   int** firstneigh_full = listfull->firstneigh;
 
   // Determine the maximum number of neighbors a single atom has
 
   int newMaxNeighbors = 0;
   for(int ii = 0; ii < inum_full; ii++) {
     int jnum = numneigh_full[ilist_full[ii]];
     if(jnum > newMaxNeighbors) newMaxNeighbors = jnum;
   }
 
   // Allocate array for temporary bond info
 
   if(newMaxNeighbors > maxNeighbors) {
     maxNeighbors = newMaxNeighbors;
     delete[] twoBodyInfo;
     twoBodyInfo = new MEAM2Body[maxNeighbors];
   }
 
   // Sum three-body contributions to charge density and
   // compute embedding energies
 
   for(int ii = 0; ii < inum_full; ii++) {
     int i = ilist_full[ii];
     double xtmp = x[i][0];
     double ytmp = x[i][1];
     double ztmp = x[i][2];
     int* jlist = firstneigh_full[i];
     int jnum = numneigh_full[i];
     double rho_value = 0;
     double rhoSW_value = 0;
     int numBonds = 0;
     MEAM2Body* nextTwoBodyInfo = twoBodyInfo;
 
     for(int jj = 0; jj < jnum; jj++) {
       int j = jlist[jj];
       j &= NEIGHMASK;
 
       double jdelx = x[j][0] - xtmp;
       double jdely = x[j][1] - ytmp;
       double jdelz = x[j][2] - ztmp;
       double rij_sq = jdelx*jdelx + jdely*jdely + jdelz*jdelz;
 
       if(rij_sq < cutforcesq) {
         double rij = sqrt(rij_sq);
         double partial_sum = 0;
         double partial_sum2 = 0;
 
         nextTwoBodyInfo->tag = j;
         nextTwoBodyInfo->r = rij;
         nextTwoBodyInfo->f = f.eval(rij, nextTwoBodyInfo->fprime);
         nextTwoBodyInfo->F = F.eval(rij, nextTwoBodyInfo->Fprime);
         nextTwoBodyInfo->del[0] = jdelx / rij;
         nextTwoBodyInfo->del[1] = jdely / rij;
         nextTwoBodyInfo->del[2] = jdelz / rij;
 
         for(int kk = 0; kk < numBonds; kk++) {
           const MEAM2Body& bondk = twoBodyInfo[kk];
           double cos_theta = (nextTwoBodyInfo->del[0]*bondk.del[0] +
                               nextTwoBodyInfo->del[1]*bondk.del[1] +
                               nextTwoBodyInfo->del[2]*bondk.del[2]);
           partial_sum += bondk.f * g.eval(cos_theta);
           partial_sum2 += bondk.F * G.eval(cos_theta);
         }
 
         rho_value += nextTwoBodyInfo->f * partial_sum;
         rhoSW_value += nextTwoBodyInfo->F * partial_sum2;
         rho_value += rho.eval(rij);
 
         numBonds++;
         nextTwoBodyInfo++;
       }
     }
 
     // Compute embedding energy and its derivative
 
     double Uprime_i;
     double embeddingEnergy = U.eval(rho_value, Uprime_i) - zero_atom_energy;
     double SWEnergy = rhoSW_value;
     double ESWprime_i = 1.0;
     Uprime_values[i] = Uprime_i;
     // ESWprime_values[i] = ESWprime_i;
     if(eflag) {
       if(eflag_global) eng_vdwl += embeddingEnergy + SWEnergy;
       if(eflag_atom) eatom[i] += embeddingEnergy + SWEnergy;
     }
 
     double forces_i[3] = {0, 0, 0};
 
     // Compute three-body contributions to force
 
     for(int jj = 0; jj < numBonds; jj++) {
       const MEAM2Body bondj = twoBodyInfo[jj];
       double rij = bondj.r;
       int j = bondj.tag;
 
       double f_rij_prime = bondj.fprime;
       double F_rij_prime = bondj.Fprime;
       double f_rij = bondj.f;
       double F_rij = bondj.F;
 
       double forces_j[3] = {0, 0, 0};
 
       MEAM2Body const* bondk = twoBodyInfo;
       for(int kk = 0; kk < jj; kk++, ++bondk) {
         double rik = bondk->r;
 
         double cos_theta = (bondj.del[0]*bondk->del[0] +
                             bondj.del[1]*bondk->del[1] +
                             bondj.del[2]*bondk->del[2]);
         double g_prime;
         double g_value = g.eval(cos_theta, g_prime);
         double G_prime;
         double G_value = G.eval(cos_theta, G_prime);
         double f_rik_prime = bondk->fprime;
         double f_rik = bondk->f;
         double F_rik_prime = bondk->Fprime;
         double F_rik = bondk->F;
 
         double fij = -Uprime_i * g_value * f_rik * f_rij_prime;
         double fik = -Uprime_i * g_value * f_rij * f_rik_prime;
         double Fij = -ESWprime_i * G_value * F_rik * F_rij_prime;
         double Fik = -ESWprime_i * G_value * F_rij * F_rik_prime;
 
         double prefactor = Uprime_i * f_rij * f_rik * g_prime;
         double prefactor2 = ESWprime_i * F_rij * F_rik * G_prime;
         double prefactor_ij = prefactor / rij;
         double prefactor_ik = prefactor / rik;
         fij += prefactor_ij * cos_theta;
         fik += prefactor_ik * cos_theta;
         double prefactor2_ij = prefactor2 / rij;
         double prefactor2_ik = prefactor2 / rik;
         Fij += prefactor2_ij * cos_theta;
         Fik += prefactor2_ik * cos_theta;
 
         double fj[3], fk[3];
 
         fj[0]  = bondj.del[0] * fij - bondk->del[0] * prefactor_ij;
         fj[1]  = bondj.del[1] * fij - bondk->del[1] * prefactor_ij;
         fj[2]  = bondj.del[2] * fij - bondk->del[2] * prefactor_ij;
         fj[0] += bondj.del[0] * Fij - bondk->del[0] * prefactor2_ij;
         fj[1] += bondj.del[1] * Fij - bondk->del[1] * prefactor2_ij;
         fj[2] += bondj.del[2] * Fij - bondk->del[2] * prefactor2_ij;
         forces_j[0] += fj[0];
         forces_j[1] += fj[1];
         forces_j[2] += fj[2];
 
         fk[0]  = bondk->del[0] * fik - bondj.del[0] * prefactor_ik;
         fk[1]  = bondk->del[1] * fik - bondj.del[1] * prefactor_ik;
         fk[2]  = bondk->del[2] * fik - bondj.del[2] * prefactor_ik;
         fk[0] += bondk->del[0] * Fik - bondj.del[0] * prefactor2_ik;
         fk[1] += bondk->del[1] * Fik - bondj.del[1] * prefactor2_ik;
         fk[2] += bondk->del[2] * Fik - bondj.del[2] * prefactor2_ik;
         forces_i[0] -= fk[0];
         forces_i[1] -= fk[1];
         forces_i[2] -= fk[2];
 
         int k = bondk->tag;
         forces[k][0] += fk[0];
         forces[k][1] += fk[1];
         forces[k][2] += fk[2];
 
         if(evflag) {
           double delta_ij[3];
           double delta_ik[3];
           delta_ij[0] = bondj.del[0] * rij;
           delta_ij[1] = bondj.del[1] * rij;
           delta_ij[2] = bondj.del[2] * rij;
           delta_ik[0] = bondk->del[0] * rik;
           delta_ik[1] = bondk->del[1] * rik;
           delta_ik[2] = bondk->del[2] * rik;
           ev_tally3(i, j, k, 0.0, 0.0, fj, fk, delta_ij, delta_ik);
         }
       }
 
       forces[i][0] -= forces_j[0];
       forces[i][1] -= forces_j[1];
       forces[i][2] -= forces_j[2];
       forces[j][0] += forces_j[0];
       forces[j][1] += forces_j[1];
       forces[j][2] += forces_j[2];
     }
 
     forces[i][0] += forces_i[0];
     forces[i][1] += forces_i[1];
     forces[i][2] += forces_i[2];
   }
 
   // Communicate U'(rho) values
 
   comm->forward_comm_pair(this);
 
   int inum_half = listhalf->inum;
   int* ilist_half = listhalf->ilist;
   int* numneigh_half = listhalf->numneigh;
   int** firstneigh_half = listhalf->firstneigh;
 
   // Compute two-body pair interactions
 
   for(int ii = 0; ii < inum_half; ii++) {
     int i = ilist_half[ii];
     double xtmp = x[i][0];
     double ytmp = x[i][1];
     double ztmp = x[i][2];
     int* jlist = firstneigh_half[i];
     int jnum = numneigh_half[i];
 
     for(int jj = 0; jj < jnum; jj++) {
       int j = jlist[jj];
       j &= NEIGHMASK;
 
       double jdel[3];
       jdel[0] = x[j][0] - xtmp;
       jdel[1] = x[j][1] - ytmp;
       jdel[2] = x[j][2] - ztmp;
       double rij_sq = jdel[0]*jdel[0] + jdel[1]*jdel[1] + jdel[2]*jdel[2];
 
       if(rij_sq < cutforcesq) {
         double rij = sqrt(rij_sq);
 
         double rho_prime;
         rho.eval(rij, rho_prime);
         double fpair = rho_prime * (Uprime_values[i] + Uprime_values[j]);
 
         double pair_pot_deriv;
         double pair_pot = phi.eval(rij, pair_pot_deriv);
         fpair += pair_pot_deriv;
 
         // Divide by r_ij to get forces from gradient
 
         fpair /= rij;
 
         forces[i][0] += jdel[0]*fpair;
         forces[i][1] += jdel[1]*fpair;
         forces[i][2] += jdel[2]*fpair;
         forces[j][0] -= jdel[0]*fpair;
         forces[j][1] -= jdel[1]*fpair;
         forces[j][2] -= jdel[2]*fpair;
         if (evflag) ev_tally(i, j, nlocal, newton_pair,
                              pair_pot, 0.0, -fpair, jdel[0], jdel[1], jdel[2]);
       }
     }
   }
 
   if(vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairMEAMSWSpline::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
   map = new int[n+1];
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairMEAMSWSpline::settings(int narg, char **arg)
 {
   if(narg != 0) error->all(FLERR,"Illegal pair_style command");
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairMEAMSWSpline::coeff(int narg, char **arg)
 {
   int i,j,n;
 
   if (!allocated) allocate();
 
   if (narg != 3 + atom->ntypes)
     error->all(FLERR,"Incorrect args for pair coefficients");
 
   // insure I,J args are * *
 
   if (strcmp(arg[0],"*") != 0 || strcmp(arg[1],"*") != 0)
     error->all(FLERR,"Incorrect args for pair coefficients");
 
   // read args that map atom types to elements in potential file
   // map[i] = which element the Ith atom type is, -1 if NULL
   // nelements = # of unique elements
   // elements = list of element names
 
   if (elements) {
     for (i = 0; i < nelements; i++) delete [] elements[i];
     delete [] elements;
   }
   elements = new char*[atom->ntypes];
   for (i = 0; i < atom->ntypes; i++) elements[i] = NULL;
 
   nelements = 0;
   for (i = 3; i < narg; i++) {
     if (strcmp(arg[i],"NULL") == 0) {
       map[i-2] = -1;
       continue;
     }
     for (j = 0; j < nelements; j++)
       if (strcmp(arg[i],elements[j]) == 0) break;
     map[i-2] = j;
     if (j == nelements) {
       n = strlen(arg[i]) + 1;
       elements[j] = new char[n];
       strcpy(elements[j],arg[i]);
       nelements++;
     }
   }
 
   // for now, only allow single element
 
   if (nelements > 1)
     error->all(FLERR,
                "Pair meam/sw/spline only supports single element potentials");
 
   // read potential file
 
   read_file(arg[2]);
 
   // clear setflag since coeff() called once with I,J = * *
 
   n = atom->ntypes;
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   // set setflag i,j for type pairs where both are mapped to elements
 
   int count = 0;
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       if (map[i] >= 0 && map[j] >= 0) {
         setflag[i][j] = 1;
         count++;
       }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 #define MAXLINE 1024
 
 void PairMEAMSWSpline::read_file(const char* filename)
 {
         if(comm->me == 0) {
                 FILE *fp = force->open_potential(filename);
                 if(fp == NULL) {
                         char str[1024];
                         sprintf(str,"Cannot open spline MEAM potential file %s", filename);
                         error->one(FLERR,str);
                 }
 
                 // Skip first line of file.
                 char line[MAXLINE];
                 fgets(line, MAXLINE, fp);
 
                 // Parse spline functions.
                 phi.parse(fp, error);
                 F.parse(fp, error);
                 G.parse(fp, error);
                 rho.parse(fp, error);
                 U.parse(fp, error);
                 f.parse(fp, error);
                 g.parse(fp, error);
 
                 fclose(fp);
         }
 
         // Transfer spline functions from master processor to all other processors.
         phi.communicate(world, comm->me);
         rho.communicate(world, comm->me);
         f.communicate(world, comm->me);
         U.communicate(world, comm->me);
         g.communicate(world, comm->me);
         F.communicate(world, comm->me);
         G.communicate(world, comm->me);
 
         // Calculate 'zero-point energy' of single atom in vacuum.
         zero_atom_energy = U.eval(0.0);
 
         // Determine maximum cutoff radius of all relevant spline functions.
         cutoff = 0.0;
         if(phi.cutoff() > cutoff) cutoff = phi.cutoff();
         if(rho.cutoff() > cutoff) cutoff = rho.cutoff();
         if(f.cutoff() > cutoff) cutoff = f.cutoff();
         if(F.cutoff() > cutoff) cutoff = F.cutoff();
 
         // Set LAMMPS pair interaction flags.
         for(int i = 1; i <= atom->ntypes; i++) {
                 for(int j = 1; j <= atom->ntypes; j++) {
                         setflag[i][j] = 1;
                         cutsq[i][j] = cutoff;
                 }
         }
 
         // phi.writeGnuplot("phi.gp", "Phi(r)");
         // rho.writeGnuplot("rho.gp", "Rho(r)");
         // f.writeGnuplot("f.gp", "f(r)");
         // U.writeGnuplot("U.gp", "U(rho)");
         // g.writeGnuplot("g.gp", "g(x)");
         // F.writeGnuplot("F.gp", "F(r)");
         // G.writeGnuplot("G.gp", "G(x)");
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 void PairMEAMSWSpline::init_style()
 {
         if(force->newton_pair == 0)
                 error->all(FLERR,"Pair style meam/sw/spline requires newton pair on");
 
         // Need both full and half neighbor list.
-        int irequest_full = neighbor->request(this);
+        int irequest_full = neighbor->request(this,instance_me);
         neighbor->requests[irequest_full]->id = 1;
         neighbor->requests[irequest_full]->half = 0;
         neighbor->requests[irequest_full]->full = 1;
-        int irequest_half = neighbor->request(this);
+        int irequest_half = neighbor->request(this,instance_me);
         neighbor->requests[irequest_half]->id = 2;
         neighbor->requests[irequest_half]->half = 0;
         neighbor->requests[irequest_half]->half_from_full = 1;
         neighbor->requests[irequest_half]->otherlist = irequest_full;
 }
 
 /* ----------------------------------------------------------------------
    neighbor callback to inform pair style of neighbor list to use
    half or full
 ------------------------------------------------------------------------- */
 void PairMEAMSWSpline::init_list(int id, NeighList *ptr)
 {
         if(id == 1) listfull = ptr;
         else if(id == 2) listhalf = ptr;
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 double PairMEAMSWSpline::init_one(int i, int j)
 {
         return cutoff;
 }
 
 /* ---------------------------------------------------------------------- */
 
 int PairMEAMSWSpline::pack_forward_comm(int n, int *list, double *buf, 
                                         int pbc_flag, int *pbc)
 {
         int* list_iter = list;
         int* list_iter_end = list + n;
         while(list_iter != list_iter_end)
                 *buf++ = Uprime_values[*list_iter++];
         return n;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairMEAMSWSpline::unpack_forward_comm(int n, int first, double *buf)
 {
         memcpy(&Uprime_values[first], buf, n * sizeof(buf[0]));
 }
 
 /* ---------------------------------------------------------------------- */
 
 int PairMEAMSWSpline::pack_reverse_comm(int n, int first, double *buf)
 {
         return 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairMEAMSWSpline::unpack_reverse_comm(int n, int *list, double *buf)
 {
 }
 
 /* ----------------------------------------------------------------------
    Returns memory usage of local atom-based arrays
 ------------------------------------------------------------------------- */
 double PairMEAMSWSpline::memory_usage()
 {
         return nmax * sizeof(double);        // The Uprime_values array.
 }
 
 
 /// Parses the spline knots from a text file.
 void PairMEAMSWSpline::SplineFunction::parse(FILE* fp, Error* error)
 {
         char line[MAXLINE];
 
         // Parse number of spline knots.
         fgets(line, MAXLINE, fp);
         int n = atoi(line);
         if(n < 2)
                 error->one(FLERR,"Invalid number of spline knots in MEAM potential file");
 
         // Parse first derivatives at beginning and end of spline.
         fgets(line, MAXLINE, fp);
         double d0 = atof(strtok(line, " \t\n\r\f"));
         double dN = atof(strtok(NULL, " \t\n\r\f"));
         init(n, d0, dN);
 
         // Skip line.
         fgets(line, MAXLINE, fp);
 
         // Parse knot coordinates.
         for(int i=0; i<n; i++) {
                 fgets(line, MAXLINE, fp);
                 double x, y, y2;
                 if(sscanf(line, "%lg %lg %lg", &x, &y, &y2) != 3) {
                         error->one(FLERR,"Invalid knot line in MEAM potential file");
                 }
                 setKnot(i, x, y);
         }
 
         prepareSpline(error);
 }
 
 /// Calculates the second derivatives at the knots of the cubic spline.
 void PairMEAMSWSpline::SplineFunction::prepareSpline(Error* error)
 {
         xmin = X[0];
         xmax = X[N-1];
 
         isGridSpline = true;
         h = (xmax-xmin)/((double)(N-1));
         hsq = h*h;
 
         double* u = new double[N];
         Y2[0] = -0.5;
         u[0] = (3.0/(X[1]-X[0])) * ((Y[1]-Y[0])/(X[1]-X[0]) - deriv0);
         for(int i = 1; i <= N-2; i++) {
                 double sig = (X[i]-X[i-1]) / (X[i+1]-X[i-1]);
                 double p = sig * Y2[i-1] + 2.0;
                 Y2[i] = (sig - 1.0) / p;
                 u[i] = (Y[i+1]-Y[i]) / (X[i+1]-X[i]) - (Y[i]-Y[i-1])/(X[i]-X[i-1]);
                 u[i] = (6.0 * u[i]/(X[i+1]-X[i-1]) - sig*u[i-1])/p;
 
                 if(fabs(h*i+xmin - X[i]) > 1e-8)
                         isGridSpline = false;
         }
 
         double qn = 0.5;
         double un = (3.0/(X[N-1]-X[N-2])) * (derivN - (Y[N-1]-Y[N-2])/(X[N-1]-X[N-2]));
         Y2[N-1] = (un - qn*u[N-2]) / (qn * Y2[N-2] + 1.0);
         for(int k = N-2; k >= 0; k--) {
                 Y2[k] = Y2[k] * Y2[k+1] + u[k];
         }
 
         delete[] u;
 
 #if !SPLINE_MEAM_SUPPORT_NON_GRID_SPLINES
         if(!isGridSpline)
                 error->one(FLERR,"Support for MEAM potentials with non-uniform cubic splines has not been enabled in the MEAM potential code. Set SPLINE_MEAM_SUPPORT_NON_GRID_SPLINES in pair_spline_meam.h to 1 to enable it");
 #endif
 
         // Shift the spline to X=0 to speed up interpolation.
         for(int i = 0; i < N; i++) {
                 Xs[i] = X[i] - xmin;
 #if !SPLINE_MEAM_SUPPORT_NON_GRID_SPLINES
                 if(i < N-1) Ydelta[i] = (Y[i+1]-Y[i])/h;
                 Y2[i] /= h*6.0;
 #endif
         }
         xmax_shifted = xmax - xmin;
 }
 
 /// Broadcasts the spline function parameters to all processors.
 void PairMEAMSWSpline::SplineFunction::communicate(MPI_Comm& world, int me)
 {
         MPI_Bcast(&N, 1, MPI_INT, 0, world);
         MPI_Bcast(&deriv0, 1, MPI_DOUBLE, 0, world);
         MPI_Bcast(&derivN, 1, MPI_DOUBLE, 0, world);
         MPI_Bcast(&xmin, 1, MPI_DOUBLE, 0, world);
         MPI_Bcast(&xmax, 1, MPI_DOUBLE, 0, world);
         MPI_Bcast(&xmax_shifted, 1, MPI_DOUBLE, 0, world);
         MPI_Bcast(&isGridSpline, 1, MPI_INT, 0, world);
         MPI_Bcast(&h, 1, MPI_DOUBLE, 0, world);
         MPI_Bcast(&hsq, 1, MPI_DOUBLE, 0, world);
         if(me != 0) {
                 X = new double[N];
                 Xs = new double[N];
                 Y = new double[N];
                 Y2 = new double[N];
                 Ydelta = new double[N];
         }
         MPI_Bcast(X, N, MPI_DOUBLE, 0, world);
         MPI_Bcast(Xs, N, MPI_DOUBLE, 0, world);
         MPI_Bcast(Y, N, MPI_DOUBLE, 0, world);
         MPI_Bcast(Y2, N, MPI_DOUBLE, 0, world);
         MPI_Bcast(Ydelta, N, MPI_DOUBLE, 0, world);
 }
 
 /// Writes a Gnuplot script that plots the spline function.
 ///
 /// This function is for debugging only!
 void PairMEAMSWSpline::SplineFunction::writeGnuplot(const char* filename, const char* title) const
 {
         FILE* fp = fopen(filename, "w");
         fprintf(fp, "#!/usr/bin/env gnuplot\n");
         if(title) fprintf(fp, "set title \"%s\"\n", title);
         double tmin = X[0] - (X[N-1] - X[0]) * 0.05;
         double tmax = X[N-1] + (X[N-1] - X[0]) * 0.05;
         double delta = (tmax - tmin) / (N*200);
         fprintf(fp, "set xrange [%f:%f]\n", tmin, tmax);
         fprintf(fp, "plot '-' with lines notitle, '-' with points notitle pt 3 lc 3\n");
         for(double x = tmin; x <= tmax+1e-8; x += delta) {
                 double y = eval(x);
                 fprintf(fp, "%f %f\n", x, y);
         }
         fprintf(fp, "e\n");
         for(int i = 0; i < N; i++) {
                 fprintf(fp, "%f %f\n", X[i], Y[i]);
         }
         fprintf(fp, "e\n");
         fclose(fp);
 }
 
 /* ----------------------------------------------------------------------
  * Spline-based Modified Embedded Atom Method plus 
  * Stillinger-Weber (MEAM+SW) potential routine.
  *
  * Copyright (2012) Lawrence Livermore National Security, LLC.
  * Produced at the Lawrence Livermore National Laboratory.
  * Written by Robert E. Rudd (<robert.rudd@llnl.gov>).
  * Based on the spline-based MEAM routine written by 
  * Alexander Stukowski (<alex@stukowski.com>).
  * LLNL-CODE-588032 All rights reserved.
  * 
  * The spline-based MEAM+SW format was first devised and used to develop
  * potentials for bcc transition metals by Jeremy Nicklas, Michael Fellinger,
  * and Hyoungki Park at The Ohio State University.
  *
  * This program is free software; you can redistribute it and/or modify it under
  * the terms of the GNU General Public License (as published by the Free
  * Software Foundation) version 2, dated June 1991.
  *
  * This program is distributed in the hope that it will be useful, but
  * WITHOUT ANY WARRANTY; without even the IMPLIED WARRANTY OF MERCHANTABILITY
  * or FITNESS FOR A PARTICULAR PURPOSE. See the terms and conditions of the
  * GNU General Public License for more details.
  *
  * Our Preamble Notice
  * A. This notice is required to be provided under our contract with the
  * U.S. Department of Energy (DOE). This work was produced at the
  * Lawrence Livermore National Laboratory under Contract No.
  * DE-AC52-07NA27344 with the DOE.
  *
  * B. Neither the United States Government nor Lawrence Livermore National
  * Security, LLC nor any of their employees, makes any warranty, express or
  * implied, or assumes any liability or responsibility for the accuracy,
  * completeness, or usefulness of any information, apparatus, product, or
  * process disclosed, or represents that its use would not infringe
  * privately-owned rights.
  *
  * C. Also, reference herein to any specific commercial products, process,
  * or services by trade name, trademark, manufacturer or otherwise does not
  * necessarily constitute or imply its endorsement, recommendation, or
  * favoring by the United States Government or Lawrence Livermore National
  * Security, LLC. The views and opinions of authors expressed herein do not
  * necessarily state or reflect those of the United States Government or
  * Lawrence Livermore National Security, LLC, and shall not be used for
  * advertising or product endorsement purposes.
  *
  * The precise terms and conditions for copying, distribution and modification
  * follows.
  *
  * GNU Terms and Conditions for Copying, Distribution, and Modification
  *
  * 0.  This License applies to any program or other work which contains a
  * notice placed by the copyright holder saying it may be distributed under
  * the terms of this General Public License.  The "Program," below, refers to
  * any such program or work, and a "work based on the Program" means either
  * the Program or any derivative work under copyright law: that is to say, a
  * work containing the Program or a portion of it, either verbatim or with
  * modifications and/or translated into another language.  (Hereinafter,
  * translation is included without limitation in the term "modification".)
  * Each licensee is addressed as "you."
  *
  * Activities other than copying, distribution and modification are not
  * covered by this License; they are outside its scope.  The act of running
  * the Program is not restricted, and the output from the Program is covered
  * only if its contents constitute a work based on the Program (independent of
  * having been made by running the Program).  Whether that is true depends on
  * what the Program does.  
  *
  * 1.  You may copy and distribute verbatim copies of the Program's source
  * code as you receive it, in any medium, provided that you conspicuously and
  * appropriately publish on each copy an appropriate copyright notice and
  * disclaimer of warranty; keep intact all the notices that refer to this
  * License and to the absence of any warranty; and give any other recipients
  * of the Program a copy of this License along with the Program.
  *
  * You may charge a fee for the physical act of transferring a copy, and you
  * may at your option offer warranty protection in exchange for a fee.
  *
  * 2.  You may modify your copy or copies of the Program or any portion of it,
  * thus forming a work based on the Program, and copy and distribute such
  * modifications or work under the terms of Section 1 above, provided that you
  * also meet all of these conditions:
  *
  *  a)  You must cause the modified files to carry prominent notices stating
  *  that you changed the files and the date of any change.
  *
  *  b)  You must cause any work that you distribute or publish, that in whole
  *  or in part contains or is derived from the Program or any part thereof, to
  *  be licensed as a whole at no charge to all third parties under the terms
  *  of this License.
  *
  *  c)  If the modified program normally reads commands interactively when
  *  run, you must cause it, when started running for such interactive use in
  *  the most ordinary way, to print or display an announcement including an
  *  appropriate copyright notice and a notice that there is no warranty (or
  *  else, saying that you provide a warranty) and that users may redistribute
  *  the program under these conditions, and telling the user how to view a
  *  copy of this License.  (Exception: if the Program itself is interactive
  *  but does not normally print such an announcement, your work based on the
  *  Program is not required to print an announcement.)
  *
  * These requirements apply to the modified work as a whole.  If
  * identifiable sections of that work are not derived from the Program, and
  * can be reasonably considered independent and separate works in
  * themselves, then this License, and its terms, do not apply to those
  * sections when you distribute them as separate work.  But when you
  * distribute the same section as part of a whole which is a work based on
  * the Program, the distribution of the whole must be on the terms of this
  * License, whose permissions for other licensees extend to the entire
  * whole, and thus to each and every part regardless of who wrote it.
  *
  * Thus, it is not the intent of this section to claim rights or contest
  * your rights to work written entirely by you; rather, the intent is to
  * exercise the right to control the distribution of derivative or
  * collective works based on the Program.
  *
  * In addition, mere aggregation of another work not based on the Program
  * with the Program (or with a work based on the Program) on a volume of a
  * storage or distribution medium does not bring the other work under the
  * scope of this License.
  *
  * 3.  You may copy and distribute the Program (or a work based on it, under
  * Section 2) in object code or executable form under the terms of Sections
  * 1 and 2 above provided that you also do one of the following:
  *
  *  a)  Accompany it with the complete corresponding machine-readable source
  *  code, which must be distributed under the terms of Sections 1 and 2 above
  *  on a medium customarily used for software interchange; or,
  *
  *  b)  Accompany it with a written offer, valid for at least three years,
  *  to give any third party, for a charge no more than your cost of
  *  physically performing source distribution, a complete machine-readable
  *  copy of the corresponding source code, to be distributed under the terms
  *  of Sections 1 and 2 above on a medium customarily used for software
  *  interchange; or,
  *
  *  c)  Accompany it with the information you received as to the offer to
  *  distribute corresponding source code.  (This alternative is allowed only
  *  for noncommercial distribution and only if you received the program in
  *  object code or executable form with such an offer, in accord with
  *  Subsection b above.)
  *
  * The source code for a work means the preferred form the work for making
  * modifications to it.  For an executable work, complete source code means
  * all the source code for all modules it contains, plus any associated
  * interface definition files, plus the scripts used to control compilation
  * and installation of the executable.  However, as a special exception, the
  * source code distributed need not include anything that is normally
  * distributed (in either source or binary form) with the major components
  * (compiler, kernel, and so on) of the operating system on which the
  * executable runs, unless that component itself accompanies the executable.
  *
  * If distribution of executable or object code is made by offering access to
  * copy from a designated place, then offering equivalent access to copy the
  * source code from the same place counts as distribution of the source code,
  * even though third parties are not compelled to copy the source along with
  * the object code.
  *
  * 4.  You may not copy, modify, sublicense, or distribute the Program except
  * as expressly provided under this License.  Any attempt otherwise to copy,
  * modify, sublicense or distribute the Program is void, and will
  * automatically terminate your rights under this License.  However, parties
  * who have received copies, or rights, from you under this License will not
  * have their licenses terminated so long as such parties remain in full
  * compliance.
  *
  * 5.  You are not required to accept this License, since you have not signed
  * it.  However, nothing else grants you permission to modify or distribute
  * the Program or its derivative works.  These actions are prohibited by law
  * if you do not accept this License.  Therefore, by modifying or distributing
  * the Program (or any work based on the Program), you indicate your
  * acceptance of this License to do so, and all its terms and conditions for
  * copying, distributing or modifying the Program or works based on it.
  *
  * 6.  Each time you redistribute the Program (or any work based on the
  * Program), the recipient automatically receives a license from the original
  * licensor to copy, distribute or modify the Program subject to these terms
  * and conditions.  You may not impose any further restrictions on the
  * recipients' exercise of the rights granted herein.  You are not responsible
  * for enforcing compliance by third parties to this License.
  *
  * 7.  If, as a consequence of a court judgment or allegation of patent
  * infringement or for any other reason (not limited to patent 
  * issues), conditions are imposed on you (whether by court 
  * order, agreement or otherwise) that contradict the conditions 
  * of this License, they do not excuse you from the conditions 
  * of this License.  If you cannot distribute so as to satisfy
  * simultaneously your obligations under this License and any other pertinent
  * obligations, then as a consequence you may not distribute the Program at
  * all.  For example, if a patent license would not permit royalty-free
  * redistribution of the Program by all those who receive copies directly or
  * indirectly through you, then the only way you could satisfy both it and
  * this License would be to refrain entirely from distribution of the Program.
  *
  * If any portion of this section is held invalid or unenforceable under any
  * particular circumstance, the balance of the section is intended to apply
  * and the section as a whole is intended to apply in other circumstances.
  *
  * It is not the purpose to this section to induce you to infringe any patents
  * or other property right claims or to contest validity of any such claims;
  * this section has the sole purpose of protecting the integrity of the free
  * software distribution system, which is implemented by public license
  * practices.  Many people have made generous contributions to the wide range
  * of software distributed through that system in reliance on consistent
  * application of that system; it is up to the author/donor to decide if he or
  * she is willing to distribute software through any other system and a
  * licensee cannot impose that choice.
  *
  * This section is intended to make thoroughly clear what is believed to be a
  * consequence of the rest of this License.
  *
  * 8.  If the distribution and/or use of the Program is restricted in certain
  * countries either by patents or by copyrighted interfaces, the original
  * copyright holder who places the Program under this License may add an
  * explicit geographical distribution limitation excluding those countries, so
  * that distribution is permitted only in or among countries not thus
  * excluded.  In such case, this License incorporates the limitation as if
  * written in the body of this License.
  *
  * 9.  The Free Software Foundation may publish revised and/or new versions of
  * the General Public License from time to time.  Such new versions will be
  * similar in spirit to the present version, but may differ in detail to
  * address new problems or concerns.
  *
  * Each version is given a distinguishing version number.  If the Program
  * specifies a version number of this License which applies to it and "any
  * later version," you have the option of following the terms and conditions
  * either of that version of any later version published by the Free Software
  * Foundation.  If the Program does not specify a version number of this
  * License, you may choose any version ever published by the Free Software
  * Foundation.
  *
  * 10.  If you wish to incorporate parts of the Program into other free
  * programs whose distribution conditions are different, write to the author
  * to ask for permission.  For software which is copyrighted by the Free
  * Software Foundation, write to the Free Software Foundation; we sometimes
  * make exceptions for this.  Our decision to grant permission will be guided
  * by the two goals of preserving the free status of all derivatives of our
  * free software and or promoting the sharing and reuse of software generally.
  *
  * NO WARRANTY
  *
  * 11.  BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
  * FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
  * OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
  * PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
  * OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
  * TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
  * PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
  * REPAIR OR CORRECTION.
  *
  * 12.  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
  * WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
  * REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
  * INCLUDING ANY GENERAL, SPECIAL INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
  * OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
  * TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
  * YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
  * PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGES.
  *
  * END OF TERMS AND CONDITIONS 
 ------------------------------------------------------------------------- */
diff --git a/src/USER-MISC/pair_srp.cpp b/src/USER-MISC/pair_srp.cpp
index f32dc96e0..ec7730a1f 100644
--- a/src/USER-MISC/pair_srp.cpp
+++ b/src/USER-MISC/pair_srp.cpp
@@ -1,704 +1,704 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
  
 /* ----------------------------------------------------------------------
    Contributing authors: Timothy Sirk (ARL), Pieter in't Veld (BASF)
 
 This pair style srp command calculates a segmental repulsive force
 between bonds. This is useful for preventing the crossing of bonds if
 soft non-bonded potentials are used, such as DPD polymer chains.
 
 See the doc page for pair_style srp command for usage instructions.
 
 There is an example script for this package in examples/USER/srp.
 
 Please contact Timothy Sirk for questions (tim.sirk@us.army.mil).
 ------------------------------------------------------------------------- */
 
 #include "stdlib.h" 
 #include "pair_srp.h" 
 #include "atom.h" 
 #include "comm.h" 
 #include "force.h" 
 #include "neighbor.h" 
 #include "neigh_list.h" 
 #include "memory.h" 
 #include "error.h" 
 #include "domain.h" 
 #include "modify.h"
 #include "fix.h"
 #include "fix_srp.h"
 #include "thermo.h"
 #include "output.h"
 #include "string.h"
 #include "citeme.h"
  
 using namespace LAMMPS_NS; 
  
 #define SMALL 1.0e-10
 #define BIG 1e10
 #define ONETWOBIT 0x40000000
 
 static const char cite_srp[] =
   "@Article{Sirk2012\n"
   " author = {T. Sirk and Y. Sliozberg and J. Brennan and M. Lisal and J. Andzelm},\n"
   " title = {An enhanced entangled polymer model for dissipative particle dynamics},\n"
   " journal = {J.~Chem.~Phys.},\n"
   " year =    2012,\n"
   " volume =  136,\n"
   " pages =   {134903}\n"
   "}\n\n";
 
 /* ----------------------------------------------------------------------
  set size of pair comms in constructor
  ---------------------------------------------------------------------- */
 
 PairSRP::PairSRP(LAMMPS *lmp) : Pair(lmp) 
 {
   writedata = 1; 
  
   if (lmp->citeme) lmp->citeme->add(cite_srp);
 
   nextra = 1;
   segment = NULL;
 } 
  
 /* ----------------------------------------------------------------------
  allocate all arrays
  ------------------------------------------------------------------------- */
 
 void PairSRP::allocate()
 {
     allocated = 1;
     // particles of bptype inserted by fix srp
     // bptype is the highest numbered atom type
     int n = bptype;
     memory->create(cutsq, n + 1, n + 1, "pair:cutsq");
     memory->create(cut, n + 1, n + 1, "pair:cut");
     memory->create(a0, n + 1, n + 1, "pair:a0");
 
     // setflag for atom types
     memory->create(setflag,n+1,n+1,"pair:setflag");
     for (int i = 1; i <= n; i++)
         for (int j = i; j <= n; j++)
             setflag[i][j] = 0;
 
     maxcount = 0;
 }
 
 /* ---------------------------------------------------------------------- 
  free 
  ------------------------------------------------------------------------- */ 
 
 PairSRP::~PairSRP() 
 { 
     if (allocated) 
     { 
         memory->destroy(setflag); 
         memory->destroy(cutsq); 
         memory->destroy(cut); 
         memory->destroy(a0); 
         memory->destroy(segment); 
     }
 
   // check nfix in case all fixes have already been deleted
   if (modify->nfix) modify->delete_fix("mysrp");
 } 
  
 /* ---------------------------------------------------------------------- 
  compute bond-bond repulsions 
  ------------------------------------------------------------------------- */ 
  
 void PairSRP::compute(int eflag, int vflag) 
 
 {
     // setup energy and virial 
     if (eflag || vflag) 
         ev_setup(eflag, vflag); 
     else 
         evflag = vflag_fdotr = 0; 
 
     double **x = atom->x; 
     double **f = atom->f; 
     int nlocal = atom->nlocal; 
     int nall = nlocal + atom->nghost; 
     int i0, i1, j0, j1; 
     int i,j,ii,jj,inum,jnum;
     double dijsq, dij;
 
     int *ilist,*jlist,*numneigh,**firstneigh;
     inum = list->inum;
     ilist = list->ilist;
     numneigh = list->numneigh;
     firstneigh = list->firstneigh;
 
     double dx,dy,dz,ti,tj;
     double wd, lever0, lever1, evdwl, fpair;
     double fxlever0, fylever0, fzlever0, fxlever1, fylever1, fzlever1;
     double fx, fy, fz;
 
     // mapping global to local for atoms inside bond particles 
     // exclude 1-2 neighs if requested 
     if (neighbor->ago == 0){
       remapBonds(nall);
       if(exclude) onetwoexclude(ilist, inum, jlist, numneigh, firstneigh);
     }
 
   // this pair style only used with hybrid
   // due to exclusions
   // each atom i is type bptype  
   // each neigh j is type bptype 
   
   // using midpoint distance option
   if(midpoint){
 
     for (ii = 0; ii < inum; ii++) {
 
       i = ilist[ii];
       jnum = numneigh[i];
       // two atoms inside bond particle
       i0 = segment[i][0];
       j0 = segment[i][1];
 
       for (jj = 0; jj < jnum; jj++) {
 
         jlist = firstneigh[i];
         j = jlist[jj];
 
         // enforce 1-2 exclusions
         if( (sbmask(j) & exclude) )
           continue;
 
         j &= NEIGHMASK;
         //retrieve atoms from bond particle
         i1 = segment[j][0];
         j1 = segment[j][1];
 
         // midpt dist bond 0 and 1
         dx = 0.5*(x[i0][0] - x[i1][0] + x[j0][0] - x[j1][0]);
         dy = 0.5*(x[i0][1] - x[i1][1] + x[j0][1] - x[j1][1]);  
         dz = 0.5*(x[i0][2] - x[i1][2] + x[j0][2] - x[j1][2]);  
         dijsq = dx*dx + dy*dy + dz*dz;
 
         if (dijsq < cutsq[bptype][bptype]){
         dij = sqrt(dijsq);
 
         if (dij < SMALL) 
           continue;     // dij can be 0.0 with soft potentials
 
         wd = 1.0 - dij / cut[bptype][bptype];
         fpair = 0.5 * a0[bptype][bptype] * wd / dij; // 0.5 factor for lever rule 
 
         // force for bond 0, beads 0,1
         //force between bonds
         fx = fpair * dx;
         fy = fpair * dy;
         fz = fpair * dz;
 
         f[i0][0] += fx; //keep force sign for bond 0
         f[i0][1] += fy;
         f[i0][2] += fz;
 
         f[j0][0] += fx;
         f[j0][1] += fy;
         f[j0][2] += fz;
 
         f[i1][0] -= fx; //flip force sign for bond 1
         f[i1][1] -= fy;
         f[i1][2] -= fz;
 
         f[j1][0] -= fx;
         f[j1][1] -= fy;
         f[j1][2] -= fz;
 
         // ************************************************* //
 
         if (eflag){
           evdwl = 0.5 * a0[bptype][bptype] * cut[bptype][bptype] * wd * wd;
         }
 
         if (evflag){
           ev_tally(i0,i1,nlocal,1,0.5*evdwl,0.0,fpair,dx,dy,dz);
           ev_tally(j0,j1,nlocal,1,0.5*evdwl,0.0,fpair,dx,dy,dz);
         }
 
         if (vflag_fdotr) virial_fdotr_compute();
 
         }
       }
    }
  } 
   else{
   // using min distance option
   
     for (ii = 0; ii < inum; ii++) {
 
       i = ilist[ii];
       jnum = numneigh[i];
       i0 = segment[i][0];
       j0 = segment[i][1];
 
       for (jj = 0; jj < jnum; jj++) {
 
         jlist = firstneigh[i];
         j = jlist[jj];
 
         // enforce 1-2 exclusions
         if( (sbmask(j) & exclude) )
           continue;
 
         j &= NEIGHMASK;
 
         i1 = segment[j][0];
         j1 = segment[j][1];
 
         getMinDist(x, dx, dy, dz, ti, tj, i0, j0, i1, j1);
         dijsq = dx*dx + dy*dy + dz*dz;
 
         if (dijsq < cutsq[bptype][bptype]){
 
         dij = sqrt(dijsq); 
 
         if (dij < SMALL)
  	  continue;     // dij can be 0.0 with soft potentials
 
         wd = 1.0 - dij / cut[bptype][bptype];
         fpair = a0[bptype][bptype] * wd / dij; 
 
         // force for bond 0, beads 0,1
         lever0 = 0.5 + ti; // assign force according to lever rule
         lever1 = 0.5 + tj; // assign force according to lever rule
         //force between bonds
         fx = fpair * dx;
         fy = fpair * dy;
         fz = fpair * dz;
 
         //decompose onto atoms
         fxlever0 = fx * lever0;
         fylever0 = fy * lever0;
         fzlever0 = fz * lever0;
         fxlever1 = fx * lever1;
         fylever1 = fy * lever1;
         fzlever1 = fz * lever1;
 
         f[i0][0] += fxlever0; //keep force sign for bond 0
         f[i0][1] += fylever0;
         f[i0][2] += fzlever0;
 
         f[j0][0] += (fx - fxlever0);
         f[j0][1] += (fy - fylever0);
         f[j0][2] += (fz - fzlever0);
 
         f[i1][0] -= fxlever1; //flip force sign for bond 1
         f[i1][1] -= fylever1;
         f[i1][2] -= fzlever1;
 
         f[j1][0] -= (fx - fxlever1);
         f[j1][1] -= (fy - fylever1);
         f[j1][2] -= (fz - fzlever1);
 
         // ************************************************* //
 
         if (eflag){
           evdwl = 0.5 * a0[bptype][bptype] * cut[bptype][bptype] * wd * wd;
         }
 
         if (evflag){
           ev_tally(i0,i1,nlocal,1,0.5*evdwl,0.0,0.5*fpair,dx,dy,dz);
           ev_tally(j0,j1,nlocal,1,0.5*evdwl,0.0,0.5*fpair,dx,dy,dz);
         }
 
        if (vflag_fdotr) virial_fdotr_compute();
 
       }
     }
   }
  }
 }
 
 /* ----------------------------------------------------------------------
  global settings
  ------------------------------------------------------------------------- */
 
 void PairSRP::settings(int narg, char **arg)
 {
     if (narg < 3 || narg > 5)
         error->all(FLERR,"Illegal pair_style command");
 
     cut_global = force->numeric(FLERR,arg[0]);
     btype = force->inumeric(FLERR,arg[1]);
     if (btype > atom->nbondtypes) error->all(FLERR,"Illegal pair_style command");
 
     // settings
     midpoint = 0;
     min = 0;
 
   if (strcmp(arg[2],"min") == 0) min = 1;
   else if (strcmp(arg[2],"mid") == 0) midpoint = 1;
   else
      error->all(FLERR,"Illegal pair_style command");
 
   int iarg = 3;
   // default exclude 1-2
   // scaling for 1-2, etc not supported
   exclude = 1; 
 
   while (iarg < narg) {
     if (strcmp(arg[iarg],"exclude") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal pair srp command");
       if (strcmp(arg[iarg+1],"yes") == 0)
         exclude = 1;
       if (strcmp(arg[iarg+1],"no") == 0){
         if (min) error->all(FLERR,"Illegal exclude option in pair srp command");
         exclude = 0;
       }
       iarg += 2;
     } else error->all(FLERR,"Illegal pair srp command");
   }
 
   // highest atom type is saved for bond particles
   bptype = atom->ntypes;
 
   // reset cutoffs if explicitly set
   if (allocated) {
     int i,j;
     for (i = 1; i <= bptype; i++)
       for (j = i+1; j <= bptype; j++)
         if (setflag[i][j]) cut[i][j] = cut_global;
   }
 }
 
 /* ----------------------------------------------------------------------
  set coeffs 
  ------------------------------------------------------------------------- */
 
 void PairSRP::coeff(int narg, char **arg)
 {
     if (narg < 3 || narg > 4)
         error->all(FLERR,"PairSRP: Incorrect args for pair coeff");
     if (!allocated) allocate();
 
     // set ij bond-bond cutoffs
     int ilo, ihi, jlo, jhi;
     force->bounds(arg[0], bptype, ilo, ihi);
     force->bounds(arg[1], bptype, jlo, jhi);
 
     double a0_one = force->numeric(FLERR,arg[2]);
     double cut_one = cut_global;
     if (narg == 4)  cut_one = force->numeric(FLERR,arg[3]);
 
     int count = 0;
     for (int i = ilo; i <= ihi; i++)
     {
         for (int j = MAX(jlo,i); j <= jhi; j++)
         {
             a0[i][j] = a0_one;
             cut[i][j] = cut_one;
             cutsq[i][j] = cut_one * cut_one;
             setflag[i][j] = 1;
             count++;
         }
     }
 
     if (count == 0) error->warning(FLERR,"PairSRP: No pair coefficients were set");
 }
 
 /* ----------------------------------------------------------------------
  init specific to this pair style
  ------------------------------------------------------------------------- */
 
 void PairSRP::init_style()
 {
   if (!force->newton_pair)
     error->all(FLERR,"PairSRP: Pair srp requires newton pair on");
 
   // need fix srp
   // invoke here instead of constructor
   // to make restart possible
   char **fixarg = new char*[3];
   fixarg[0] = (char *) "mysrp";
   fixarg[1] = (char *) "all";
   fixarg[2] = (char *) "SRP";
   modify->add_fix(3,fixarg);
   f_srp = (FixSRP *) modify->fix[modify->nfix-1];
   delete [] fixarg;
 
   // set bond type in fix srp
   // bonds of this type will be represented by bond particles
   // btype = bond type
   // bptype = bond particle type
   char c0[20];
   char* arg0[2];
   sprintf(c0, "%d", btype);
   arg0[0] = (char *) "btype";
   arg0[1] = c0;
   f_srp->modify_params(2, arg0);
 
   // bond particles do not contribute to energy or virial
   // bond particles do not belong to group all
   // but thermo normalization is by nall 
   // therefore should turn off normalization
   int me;
   MPI_Comm_rank(world,&me);
   char *arg1[2];
   arg1[0] = (char *) "norm";
   arg1[1] = (char *) "no";
   output->thermo->modify_params(2, arg1);
   if (me == 0) 
     error->message(FLERR,"Thermo normalization turned off by pair srp");
 
-  neighbor->request(this);
+  neighbor->request(this,instance_me);
 }
 
 /* ----------------------------------------------------------------------
  init for one type pair i,j and corresponding j,i
  ------------------------------------------------------------------------- */
 
 double PairSRP::init_one(int i, int j)
 {
 
  if (setflag[i][j] == 0) error->all(FLERR,"PairSRP: All pair coeffs are not set");
 
   cut[j][i] = cut[i][j];
   a0[j][i] = a0[i][j];
 
   return cut[i][j];
 }
  
 /* ---------------------------------------------------------------------- 
  find min distance for bonds i0/j0 and i1/j1
  ------------------------------------------------------------------------- */ 
 inline void PairSRP::getMinDist(double** &x, double &dx, double &dy, double &dz, double &ti, double &tj, int &i0, int &j0, int &i1, int &j1)
 { 
     // move these outside the loop 
     double diffx0, diffy0, diffz0, diffx1, diffy1, diffz1, dPx, dPy, dPz, RiRi, RiRj, RjRj;
     double denom, termx0, termy0, termz0, num0, termx1, termy1, termz1, num1;
 
     // compute midpt dist from 1st atom, 1st bond
     diffx0 = x[j0][0] - x[i0][0]; // x,y,z from bond 0
     diffy0 = x[j0][1] - x[i0][1];
     diffz0 = x[j0][2] - x[i0][2];
 
     // compute midpt dist from 1st atom, 2nd bond
     diffx1 = x[j1][0] - x[i1][0]; 
     diffy1 = x[j1][1] - x[i1][1];
     diffz1 = x[j1][2] - x[i1][2];
 
     // midpoint distance
     dPx = 0.5*(diffx0-diffx1) + x[i0][0]-x[i1][0];
     dPy = 0.5*(diffy0-diffy1) + x[i0][1]-x[i1][1];
     dPz = 0.5*(diffz0-diffz1) + x[i0][2]-x[i1][2];
 
     // Ri^2 Rj^2
     RiRi = diffx0*diffx0 + diffy0*diffy0 + diffz0*diffz0;
     RiRj = diffx0*diffx1 + diffy0*diffy1 + diffz0*diffz1;
     RjRj = diffx1*diffx1 + diffy1*diffy1 + diffz1*diffz1;
     denom = RiRj*RiRj - RiRi*RjRj;
 
     // handle case of parallel lines
     // reduce to midpt distance 
     if (fabs(denom) < SMALL){ 
         if(denom < 0) denom = -BIG;
         else denom = BIG;
     } 
 
     // calc ti  
     termx0 = RiRj*diffx1 - RjRj*diffx0;
     termy0 = RiRj*diffy1 - RjRj*diffy0;
     termz0 = RiRj*diffz1 - RjRj*diffz0;
     num0 = dPx*termx0 + dPy*termy0 + dPz*termz0;
     ti = num0 / denom;
     if (ti > 0.5) ti = 0.5; 
     if (ti < -0.5) ti = -0.5; 
  
     // calc tj  
     termx1 = RiRj*diffx0 - RiRi*diffx1;
     termy1 = RiRj*diffy0 - RiRi*diffy1;
     termz1 = RiRj*diffz0 - RiRi*diffz1;
     num1 = dPx*termx1 + dPy*termy1 + dPz*termz1;
     tj = -num1/ denom;
     if (tj > 0.5)  tj = 0.5;
     if (tj < -0.5) tj = -0.5;
  
     // min dist 
     dx = dPx - ti*diffx0 + tj*diffx1;
     dy = dPy - ti*diffy0 + tj*diffy1;
     dz = dPz - ti*diffz0 + tj*diffz1;
 } 
 
 /* -------------------------------------------------------- 
 map global id of atoms in stored by each bond particle
  ------------------------------------------------------- */ 
 inline void PairSRP::remapBonds(int &nall)
 {
   if(nall > maxcount){
     memory->grow(segment, nall, 2, "pair:segment");    
     maxcount = nall;
   }
 
   // loop over all bond particles
   // each bond paricle holds two bond atoms
   // map global ids of bond atoms to local ids
   // might not be able to map both bond atoms of j, if j is outside neighcut  
   // these are not on neighlist, so are not used 
   int tmp;
   srp = f_srp->array_atom;
 
     for (int i = 0; i < nall; i++) {
       if(atom->type[i] == bptype){
         // tmp is local id
         // tmp == -1 is ok
         tmp = atom->map((int)srp[i][0]);
         segment[i][0] = domain->closest_image(i,tmp);
         // repeat with other id
         tmp = atom->map((int)srp[i][1]);
         segment[i][1] = domain->closest_image(i,tmp);
       }
     }
 }
 
 /* -------------------------------------------------------- 
 add exclusions for 1-2 neighs, if requested
 more complex exclusions or scaling probably not needed 
  ------------------------------------------------------- */ 
 inline void PairSRP::onetwoexclude(int* &ilist, int &inum, int* &jlist, int* &numneigh, int** &firstneigh)
 {
     int i0, i1, j0, j1;
     int i,j,ii,jj,jnum;
 
     // encode neighs with exclusions
     // only need 1-2 info for normal uses of srp 
     // add 1-3, etc later if ever needed 
 
     for (ii = 0; ii < inum; ii++) {
 
       i = ilist[ii];
       jnum = numneigh[i];
       // two atoms inside bond particle
       i0 = segment[i][0];
       j0 = segment[i][1];
 
       for (jj = 0; jj < jnum; jj++) {
 
         jlist = firstneigh[i];
         j = jlist[jj];
         j &= NEIGHMASK;
         //two atoms inside bond particle
         i1 = segment[j][0];
         j1 = segment[j][1];
 
         // check for a 1-2 neigh 
         if(i0 == i1 || i0 == j1 || i1 == j0 || j0 == j1){
           j |= ONETWOBIT;
           jlist[jj] = j;
         }
       }
     }
 }
 
 /* ----------------------------------------------------------------------
 proc 0 writes to data file
 ------------------------------------------------------------------------- */
 
 void PairSRP::write_data(FILE *fp)
 {
   for (int i = 1; i <= atom->ntypes; i++)
     fprintf(fp,"%d %g\n",i,a0[i][i]);
 }
 
 /* ----------------------------------------------------------------------
 proc 0 writes all pairs to data file
 ------------------------------------------------------------------------- */
 
 void PairSRP::write_data_all(FILE *fp)
 {
   for (int i = 1; i <= atom->ntypes; i++)
     for (int j = i; j <= atom->ntypes; j++)
       fprintf(fp,"%d %d %g %g\n",i,j,a0[i][j],cut[i][j]);
 }
  
 /* ----------------------------------------------------------------------
    proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairSRP::write_restart(FILE *fp)
 {
   write_restart_settings(fp);
 
   int i,j;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       fwrite(&setflag[i][j],sizeof(int),1,fp);
       if (setflag[i][j]) {
         fwrite(&a0[i][j],sizeof(double),1,fp);
         fwrite(&cut[i][j],sizeof(double),1,fp);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
    proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairSRP::read_restart(FILE *fp)
 {
   read_restart_settings(fp);
   allocate();
 
   int i,j;
   int me = comm->me;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
       MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
       if (setflag[i][j]) {
         if (me == 0) {
           printf(" i %d j %d \n",i,j);
           fread(&a0[i][j],sizeof(double),1,fp);
           fread(&cut[i][j],sizeof(double),1,fp);
         }
         MPI_Bcast(&a0[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&cut[i][j],1,MPI_DOUBLE,0,world);
       }
     }
 }
 /* ----------------------------------------------------------------------
    proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairSRP::write_restart_settings(FILE *fp)
 {
   fwrite(&cut_global,sizeof(double),1,fp);
   fwrite(&bptype,sizeof(int),1,fp);
   fwrite(&btype,sizeof(int),1,fp);
   fwrite(&min,sizeof(int),1,fp);
   fwrite(&midpoint,sizeof(int),1,fp);
   fwrite(&exclude,sizeof(int),1,fp);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairSRP::read_restart_settings(FILE *fp)
 {
   if (comm->me == 0) {
     fread(&cut_global,sizeof(double),1,fp);
     fread(&bptype,sizeof(int),1,fp);
     fread(&btype,sizeof(int),1,fp);
     fread(&min,sizeof(int),1,fp);
     fread(&midpoint,sizeof(int),1,fp);
     fread(&exclude,sizeof(int),1,fp);
   }
   MPI_Bcast(&cut_global,1,MPI_DOUBLE,0,world);
 }
diff --git a/src/USER-MISC/pair_tersoff_table.cpp b/src/USER-MISC/pair_tersoff_table.cpp
index 1652385fc..69c5057ce 100644
--- a/src/USER-MISC/pair_tersoff_table.cpp
+++ b/src/USER-MISC/pair_tersoff_table.cpp
@@ -1,1021 +1,1021 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Luca Ferraro (CASPUR)
    email: luca.ferraro@caspur.it
 
    Tersoff Potential
    References: (referenced as tersoff_2 functional form in LAMMPS manual)
     1) Tersoff, Phys. Rev. B 39, 5566 (1988)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_tersoff_table.h"
 #include "atom.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "force.h"
 #include "comm.h"
 #include "memory.h"
 
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
 #define MAXLINE 1024
 #define DELTA 4
 
 #define GRIDSTART 0.1
 #define GRIDDENSITY_FCUTOFF 5000
 #define GRIDDENSITY_EXP 12000
 #define GRIDDENSITY_GTETA 12000
 #define GRIDDENSITY_BIJ 7500
 
 // max number of interaction per atom for environment potential
 
 #define leadingDimensionInteractionList 64
 
 /* ---------------------------------------------------------------------- */
 
 PairTersoffTable::PairTersoffTable(LAMMPS *lmp) : Pair(lmp)
 {
   single_enable = 0;
   one_coeff = 1;
   manybody_flag = 1;
 
   nelements = 0;
   elements = NULL;
   nparams = maxparam = 0;
   params = NULL;
   elem2param = NULL;
   allocated = 0;
 
   preGtetaFunction = preGtetaFunctionDerived = NULL;
   preCutoffFunction = preCutoffFunctionDerived = NULL;
 }
 
 /* ----------------------------------------------------------------------
    check if allocated, since class can be destructed when incomplete
 ------------------------------------------------------------------------- */
 
 PairTersoffTable::~PairTersoffTable()
 {
   if (elements)
     for (int i = 0; i < nelements; i++) delete [] elements[i];
   delete [] elements;
   memory->destroy(params);
   memory->destroy(elem2param);
 
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
     delete [] map;
 
     deallocateGrids();
     deallocatePreLoops();
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairTersoffTable::compute(int eflag, int vflag)
 {
   int i,j,k,ii,inum,jnum;
   int itype,jtype,ktype,ijparam,ikparam,ijkparam;
   double xtmp,ytmp,ztmp;
   double fxtmp,fytmp,fztmp;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
 
   int interpolIDX;
   double directorCos_ij_x, directorCos_ij_y, directorCos_ij_z, directorCos_ik_x, directorCos_ik_y, directorCos_ik_z;
   double invR_ij, invR_ik, cosTeta;
   double repulsivePotential, attractivePotential;
   double exponentRepulsivePotential, exponentAttractivePotential,interpolTMP,interpolDeltaX,interpolY1;
   double interpolY2, cutoffFunctionIJ, attractiveExponential, repulsiveExponential, cutoffFunctionDerivedIJ,zeta;
   double gtetaFunctionIJK,gtetaFunctionDerivedIJK,cutoffFunctionIK;
   double cutoffFunctionDerivedIK,factor_force3_ij,factor_1_force3_ik;
   double factor_2_force3_ik,betaZetaPowerIJK,betaZetaPowerDerivedIJK,factor_force_tot;
   double factor_force_ij;
   double gtetaFunctionDerived_temp,gtetaFunction_temp;
 
   double evdwl = 0.0;
 
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   double **x = atom->x;
   double **f = atom->f;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   int newton_pair = force->newton_pair;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over full neighbor list of my atoms
   for (ii = 0; ii < inum; ii++) {
 
     i = ilist[ii];
     itype = map[type[i]];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     fxtmp = fytmp = fztmp = 0.0;
 
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     if (jnum > leadingDimensionInteractionList) {
       char errmsg[256];
       sprintf(errmsg,"Too many neighbors for interaction list: %d vs %d.\n"
               "Check your system or increase 'leadingDimensionInteractionList'",
               jnum, leadingDimensionInteractionList);
       error->one(FLERR,errmsg);
     }
 
     // Pre-calculate gteta and cutoff function
     for (int neighbor_j = 0; neighbor_j < jnum; neighbor_j++) {
 
       double dr_ij[3], r_ij;
 
       j = jlist[neighbor_j];
       j &= NEIGHMASK;
 
       dr_ij[0] = xtmp - x[j][0];
       dr_ij[1] = ytmp - x[j][1];
       dr_ij[2] = ztmp - x[j][2];
       r_ij = dr_ij[0]*dr_ij[0] + dr_ij[1]*dr_ij[1] + dr_ij[2]*dr_ij[2];
 
       jtype = map[type[j]];
       ijparam = elem2param[itype][jtype][jtype];
 
       if (r_ij > params[ijparam].cutsq) continue;
 
       r_ij = sqrt(r_ij);
 
       invR_ij = 1.0 / r_ij;
 
       directorCos_ij_x = invR_ij * dr_ij[0];
       directorCos_ij_y = invR_ij * dr_ij[1];
       directorCos_ij_z = invR_ij * dr_ij[2];
 
       // preCutoffFunction
       interpolDeltaX =  r_ij - GRIDSTART;
       interpolTMP = (interpolDeltaX * GRIDDENSITY_FCUTOFF);
       interpolIDX = (int) interpolTMP;
       interpolY1 = cutoffFunction[itype][jtype][interpolIDX];
       interpolY2 = cutoffFunction[itype][jtype][interpolIDX+1];
       preCutoffFunction[neighbor_j] = interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX);
       // preCutoffFunctionDerived
       interpolY1 = cutoffFunctionDerived[itype][jtype][interpolIDX];
       interpolY2 = cutoffFunctionDerived[itype][jtype][interpolIDX+1];
       preCutoffFunctionDerived[neighbor_j] = interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX);
 
 
       for (int neighbor_k = neighbor_j + 1; neighbor_k < jnum; neighbor_k++) {
         double dr_ik[3], r_ik;
 
         k = jlist[neighbor_k];
         k &= NEIGHMASK;
         ktype = map[type[k]];
         ikparam = elem2param[itype][ktype][ktype];
         ijkparam = elem2param[itype][jtype][ktype];
 
         dr_ik[0] = xtmp -x[k][0];
         dr_ik[1] = ytmp -x[k][1];
         dr_ik[2] = ztmp -x[k][2];
         r_ik = dr_ik[0]*dr_ik[0] + dr_ik[1]*dr_ik[1] + dr_ik[2]*dr_ik[2];
 
         if (r_ik > params[ikparam].cutsq) continue;
 
         r_ik = sqrt(r_ik);
 
         invR_ik = 1.0 / r_ik;
 
         directorCos_ik_x = invR_ik * dr_ik[0];
         directorCos_ik_y = invR_ik * dr_ik[1];
         directorCos_ik_z = invR_ik * dr_ik[2];
 
         cosTeta = directorCos_ij_x * directorCos_ik_x + directorCos_ij_y * directorCos_ik_y + directorCos_ij_z * directorCos_ik_z;
 
         // preGtetaFunction
         interpolDeltaX=cosTeta+1.0;
         interpolTMP = (interpolDeltaX * GRIDDENSITY_GTETA);
         interpolIDX = (int) interpolTMP;
         interpolY1 = gtetaFunction[itype][interpolIDX];
         interpolY2 = gtetaFunction[itype][interpolIDX+1];
         gtetaFunction_temp = interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX);
         // preGtetaFunctionDerived
         interpolY1 = gtetaFunctionDerived[itype][interpolIDX];
         interpolY2 = gtetaFunctionDerived[itype][interpolIDX+1];
         gtetaFunctionDerived_temp = interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX);
 
         preGtetaFunction[neighbor_j][neighbor_k]=params[ijkparam].gamma*gtetaFunction_temp;
         preGtetaFunctionDerived[neighbor_j][neighbor_k]=params[ijkparam].gamma*gtetaFunctionDerived_temp;
         preGtetaFunction[neighbor_k][neighbor_j]=params[ijkparam].gamma*gtetaFunction_temp;
         preGtetaFunctionDerived[neighbor_k][neighbor_j]=params[ijkparam].gamma*gtetaFunctionDerived_temp;
 
       } // loop on K
 
     } // loop on J
 
 
     // loop over neighbors of atom i
     for (int neighbor_j = 0; neighbor_j < jnum; neighbor_j++) {
 
       double dr_ij[3], r_ij, f_ij[3];
 
       j = jlist[neighbor_j];
       j &= NEIGHMASK;
 
       dr_ij[0] = xtmp - x[j][0];
       dr_ij[1] = ytmp - x[j][1];
       dr_ij[2] = ztmp - x[j][2];
       r_ij = dr_ij[0]*dr_ij[0] + dr_ij[1]*dr_ij[1] + dr_ij[2]*dr_ij[2];
 
       jtype = map[type[j]];
       ijparam = elem2param[itype][jtype][jtype];
 
       if (r_ij > params[ijparam].cutsq) continue;
 
       r_ij = sqrt(r_ij);
       invR_ij = 1.0 / r_ij;
 
       directorCos_ij_x = invR_ij * dr_ij[0];
       directorCos_ij_y = invR_ij * dr_ij[1];
       directorCos_ij_z = invR_ij * dr_ij[2];
 
       exponentRepulsivePotential = params[ijparam].lam1 * r_ij;
       exponentAttractivePotential = params[ijparam].lam2 * r_ij;
 
       // repulsiveExponential
       interpolDeltaX =  exponentRepulsivePotential - minArgumentExponential;
       interpolTMP = (interpolDeltaX * GRIDDENSITY_EXP);
       interpolIDX = (int) interpolTMP;
       interpolY1 = exponential[interpolIDX];
       interpolY2 = exponential[interpolIDX+1];
       repulsiveExponential = interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX);
       // attractiveExponential
       interpolDeltaX =  exponentAttractivePotential - minArgumentExponential;
       interpolTMP = (interpolDeltaX * GRIDDENSITY_EXP);
       interpolIDX = (int) interpolTMP;
       interpolY1 = exponential[interpolIDX];
       interpolY2 = exponential[interpolIDX+1];
       attractiveExponential = interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX);
 
       repulsivePotential = params[ijparam].biga * repulsiveExponential;
       attractivePotential = -params[ijparam].bigb * attractiveExponential;
 
       cutoffFunctionIJ = preCutoffFunction[neighbor_j];
       cutoffFunctionDerivedIJ = preCutoffFunctionDerived[neighbor_j];
 
       zeta = 0.0;
 
       // first loop over neighbors of atom i except j - part 1/2
       for (int neighbor_k = 0; neighbor_k < neighbor_j; neighbor_k++) {
         double dr_ik[3], r_ik;
 
         k = jlist[neighbor_k];
         k &= NEIGHMASK;
         ktype = map[type[k]];
         ikparam = elem2param[itype][ktype][ktype];
         ijkparam = elem2param[itype][jtype][ktype];
 
         dr_ik[0] = xtmp -x[k][0];
         dr_ik[1] = ytmp -x[k][1];
         dr_ik[2] = ztmp -x[k][2];
         r_ik = dr_ik[0]*dr_ik[0] + dr_ik[1]*dr_ik[1] + dr_ik[2]*dr_ik[2];
 
         if (r_ik > params[ikparam].cutsq) continue;
 
         r_ik = sqrt(r_ik);
 
         invR_ik = 1.0 / r_ik;
 
         gtetaFunctionIJK = preGtetaFunction[neighbor_j][neighbor_k];
 
         cutoffFunctionIK = preCutoffFunction[neighbor_k];
 
         zeta += cutoffFunctionIK * gtetaFunctionIJK;
 
       }
 
       // first loop over neighbors of atom i except j - part 2/2
       for (int neighbor_k = neighbor_j+1; neighbor_k < jnum; neighbor_k++) {
         double dr_ik[3], r_ik;
 
         k = jlist[neighbor_k];
         k &= NEIGHMASK;
         ktype = map[type[k]];
         ikparam = elem2param[itype][ktype][ktype];
         ijkparam = elem2param[itype][jtype][ktype];
 
         dr_ik[0] = xtmp -x[k][0];
         dr_ik[1] = ytmp -x[k][1];
         dr_ik[2] = ztmp -x[k][2];
         r_ik = dr_ik[0]*dr_ik[0] + dr_ik[1]*dr_ik[1] + dr_ik[2]*dr_ik[2];
 
         if (r_ik > params[ikparam].cutsq) continue;
 
         r_ik = sqrt(r_ik);
         invR_ik = 1.0 / r_ik;
 
         directorCos_ik_x = invR_ik * dr_ik[0];
         directorCos_ik_y = invR_ik * dr_ik[1];
         directorCos_ik_z = invR_ik * dr_ik[2];
 
         gtetaFunctionIJK = preGtetaFunction[neighbor_j][neighbor_k];
 
         cutoffFunctionIK = preCutoffFunction[neighbor_k];
 
         zeta += cutoffFunctionIK * gtetaFunctionIJK;
       }
 
       // betaZetaPowerIJK
       interpolDeltaX= params[ijparam].beta * zeta;
       interpolTMP = (interpolDeltaX * GRIDDENSITY_BIJ);
       interpolIDX = (int) interpolTMP;
       interpolY1 = betaZetaPower[itype][interpolIDX];
       interpolY2 = betaZetaPower[itype][interpolIDX+1];
       betaZetaPowerIJK = (interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX));
       // betaZetaPowerDerivedIJK
       interpolY1 = betaZetaPowerDerived[itype][interpolIDX];
       interpolY2 = betaZetaPowerDerived[itype][interpolIDX+1];
       betaZetaPowerDerivedIJK = params[ijparam].beta*(interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX));
 
       // Forces and virial
       factor_force_ij = 0.5*cutoffFunctionDerivedIJ*(repulsivePotential + attractivePotential * betaZetaPowerIJK)+0.5*cutoffFunctionIJ*(-repulsivePotential*params[ijparam].lam1-betaZetaPowerIJK*attractivePotential*params[ijparam].lam2);
 
       f_ij[0] = factor_force_ij * directorCos_ij_x;
       f_ij[1] = factor_force_ij * directorCos_ij_y;
       f_ij[2] = factor_force_ij * directorCos_ij_z;
 
       f[j][0] += f_ij[0];
       f[j][1] += f_ij[1];
       f[j][2] += f_ij[2];
 
       fxtmp -= f_ij[0];
       fytmp -= f_ij[1];
       fztmp -= f_ij[2];
 
       // potential energy
       evdwl = cutoffFunctionIJ * repulsivePotential + cutoffFunctionIJ * attractivePotential * betaZetaPowerIJK;
 
       if (evflag) ev_tally(i, j, nlocal, newton_pair, 0.5 * evdwl, 0.0,
                            -factor_force_ij*invR_ij, dr_ij[0], dr_ij[1], dr_ij[2]);
 
       factor_force_tot= 0.5*cutoffFunctionIJ*attractivePotential*betaZetaPowerDerivedIJK;
 
       // second loop over neighbors of atom i except j, forces and virial only - part 1/2
       for (int neighbor_k = 0; neighbor_k < neighbor_j; neighbor_k++) {
         double dr_ik[3], r_ik, f_ik[3];
 
         k = jlist[neighbor_k];
         k &= NEIGHMASK;
         ktype = map[type[k]];
         ikparam = elem2param[itype][ktype][ktype];
         ijkparam = elem2param[itype][jtype][ktype];
 
         dr_ik[0] = xtmp -x[k][0];
         dr_ik[1] = ytmp -x[k][1];
         dr_ik[2] = ztmp -x[k][2];
         r_ik = dr_ik[0]*dr_ik[0] + dr_ik[1]*dr_ik[1] + dr_ik[2]*dr_ik[2];
 
         if (r_ik > params[ikparam].cutsq) continue;
 
         r_ik = sqrt(r_ik);
         invR_ik = 1.0 / r_ik;
 
         directorCos_ik_x = invR_ik * dr_ik[0];
         directorCos_ik_y = invR_ik * dr_ik[1];
         directorCos_ik_z = invR_ik * dr_ik[2];
 
         cosTeta = directorCos_ij_x * directorCos_ik_x + directorCos_ij_y * directorCos_ik_y + directorCos_ij_z * directorCos_ik_z;
 
         gtetaFunctionIJK = preGtetaFunction[neighbor_j][neighbor_k];
 
         gtetaFunctionDerivedIJK = preGtetaFunctionDerived[neighbor_j][neighbor_k];
 
         cutoffFunctionIK = preCutoffFunction[neighbor_k];
 
         cutoffFunctionDerivedIK = preCutoffFunctionDerived[neighbor_k];
 
         factor_force3_ij= cutoffFunctionIK * gtetaFunctionDerivedIJK * invR_ij *factor_force_tot;
 
         f_ij[0] = factor_force3_ij * (directorCos_ij_x*cosTeta - directorCos_ik_x);
         f_ij[1] = factor_force3_ij * (directorCos_ij_y*cosTeta - directorCos_ik_y);
         f_ij[2] = factor_force3_ij * (directorCos_ij_z*cosTeta - directorCos_ik_z);
 
         factor_1_force3_ik = (cutoffFunctionIK * gtetaFunctionDerivedIJK * invR_ik)*factor_force_tot;
         factor_2_force3_ik = -(cutoffFunctionDerivedIK * gtetaFunctionIJK)*factor_force_tot;
 
         f_ik[0] = factor_1_force3_ik * (directorCos_ik_x*cosTeta - directorCos_ij_x) + factor_2_force3_ik * directorCos_ik_x;
         f_ik[1] = factor_1_force3_ik * (directorCos_ik_y*cosTeta - directorCos_ij_y) + factor_2_force3_ik * directorCos_ik_y;
         f_ik[2] = factor_1_force3_ik * (directorCos_ik_z*cosTeta - directorCos_ij_z) + factor_2_force3_ik * directorCos_ik_z;
 
         f[j][0] -= f_ij[0];
         f[j][1] -= f_ij[1];
         f[j][2] -= f_ij[2];
 
         f[k][0] -= f_ik[0];
         f[k][1] -= f_ik[1];
         f[k][2] -= f_ik[2];
 
         fxtmp += f_ij[0] + f_ik[0];
         fytmp += f_ij[1] + f_ik[1];
         fztmp += f_ij[2] + f_ik[2];
 
         // potential energy
         evdwl = 0.0;
 
         if (evflag) ev_tally3(i,j,k,evdwl,0.0,f_ij,f_ik,dr_ij,dr_ik);
       }
 
       // second loop over neighbors of atom i except j, forces and virial only - part 2/2
       for (int neighbor_k = neighbor_j+1; neighbor_k < jnum; neighbor_k++) {
         double dr_ik[3], r_ik, f_ik[3];
 
         k = jlist[neighbor_k];
         k &= NEIGHMASK;
         ktype = map[type[k]];
         ikparam = elem2param[itype][ktype][ktype];
         ijkparam = elem2param[itype][jtype][ktype];
 
         dr_ik[0] = xtmp -x[k][0];
         dr_ik[1] = ytmp -x[k][1];
         dr_ik[2] = ztmp -x[k][2];
         r_ik = dr_ik[0]*dr_ik[0] + dr_ik[1]*dr_ik[1] + dr_ik[2]*dr_ik[2];
 
         if (r_ik > params[ikparam].cutsq) continue;
 
         r_ik = sqrt(r_ik);
         invR_ik = 1.0 / r_ik;
 
         directorCos_ik_x = invR_ik * dr_ik[0];
         directorCos_ik_y = invR_ik * dr_ik[1];
         directorCos_ik_z = invR_ik * dr_ik[2];
 
         cosTeta = directorCos_ij_x * directorCos_ik_x + directorCos_ij_y * directorCos_ik_y + directorCos_ij_z * directorCos_ik_z;
 
         gtetaFunctionIJK = preGtetaFunction[neighbor_j][neighbor_k];
 
         gtetaFunctionDerivedIJK = preGtetaFunctionDerived[neighbor_j][neighbor_k];
 
         cutoffFunctionIK = preCutoffFunction[neighbor_k];
 
         cutoffFunctionDerivedIK = preCutoffFunctionDerived[neighbor_k];
 
         factor_force3_ij= cutoffFunctionIK * gtetaFunctionDerivedIJK * invR_ij *factor_force_tot;
 
         f_ij[0] = factor_force3_ij * (directorCos_ij_x*cosTeta - directorCos_ik_x);
         f_ij[1] = factor_force3_ij * (directorCos_ij_y*cosTeta - directorCos_ik_y);
         f_ij[2] = factor_force3_ij * (directorCos_ij_z*cosTeta - directorCos_ik_z);
 
         factor_1_force3_ik = (cutoffFunctionIK * gtetaFunctionDerivedIJK * invR_ik)*factor_force_tot;
         factor_2_force3_ik = -(cutoffFunctionDerivedIK * gtetaFunctionIJK)*factor_force_tot;
 
         f_ik[0] = factor_1_force3_ik * (directorCos_ik_x*cosTeta - directorCos_ij_x) + factor_2_force3_ik * directorCos_ik_x;
         f_ik[1] = factor_1_force3_ik * (directorCos_ik_y*cosTeta - directorCos_ij_y) + factor_2_force3_ik * directorCos_ik_y;
         f_ik[2] = factor_1_force3_ik * (directorCos_ik_z*cosTeta - directorCos_ij_z) + factor_2_force3_ik * directorCos_ik_z;
 
         f[j][0] -= f_ij[0];
         f[j][1] -= f_ij[1];
         f[j][2] -= f_ij[2];
 
         f[k][0] -= f_ik[0];
         f[k][1] -= f_ik[1];
         f[k][2] -= f_ik[2];
 
         fxtmp += f_ij[0] + f_ik[0];
         fytmp += f_ij[1] + f_ik[1];
         fztmp += f_ij[2] + f_ik[2];
 
         // potential energy
         evdwl = 0.0;
 
         if (evflag) ev_tally3(i,j,k,evdwl,0.0,f_ij,f_ik,dr_ij,dr_ik);
 
       }
     } // loop on J
     f[i][0] += fxtmp;
     f[i][1] += fytmp;
     f[i][2] += fztmp;
   } // loop on I
 
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairTersoffTable::deallocatePreLoops(void)
 {
     memory->destroy(preGtetaFunction);
     memory->destroy(preGtetaFunctionDerived);
     memory->destroy(preCutoffFunction);
     memory->destroy(preCutoffFunctionDerived);
 }
 
 void PairTersoffTable::allocatePreLoops(void)
 {
   memory->create(preGtetaFunction,leadingDimensionInteractionList,leadingDimensionInteractionList,"tersofftable:preGtetaFunction");
 
   memory->create(preGtetaFunctionDerived,leadingDimensionInteractionList,leadingDimensionInteractionList,"tersofftable:preGtetaFunctionDerived");
 
   memory->create(preCutoffFunction,leadingDimensionInteractionList,"tersofftable:preCutoffFunction");
 
   memory->create(preCutoffFunctionDerived,leadingDimensionInteractionList,"tersofftable:preCutoffFunctionDerived");
 }
 
 void PairTersoffTable::deallocateGrids()
 {
   memory->destroy(exponential);
   memory->destroy(gtetaFunction);
   memory->destroy(gtetaFunctionDerived);
   memory->destroy(cutoffFunction);
   memory->destroy(cutoffFunctionDerived);
   memory->destroy(betaZetaPower);
   memory->destroy(betaZetaPowerDerived);
 }
 
 void PairTersoffTable::allocateGrids(void)
 {
   int   i, j, l;
 
   int     numGridPointsExponential, numGridPointsGtetaFunction, numGridPointsOneCutoffFunction;
   int     numGridPointsNotOneCutoffFunction, numGridPointsCutoffFunction, numGridPointsBetaZetaPower;
   // double minArgumentExponential;
   double  deltaArgumentCutoffFunction, deltaArgumentExponential, deltaArgumentBetaZetaPower;
   double  deltaArgumentGtetaFunction;
   double  r, minMu, maxLambda, maxCutoff;
   double const PI=acos(-1.0);
 
   // exponential
 
   // find min and max argument
   minMu=params[0].lam2;
   maxLambda=params[0].lam1;
   for (i=1; i<nparams; i++) {
     if (params[i].lam2 < minMu) minMu = params[i].lam2;
     if (params[i].lam1 > maxLambda) maxLambda = params[i].lam1;
   }
   maxCutoff=cutmax;
 
   minArgumentExponential=minMu*GRIDSTART;
 
   numGridPointsExponential=(int)((maxLambda*maxCutoff-minArgumentExponential)*GRIDDENSITY_EXP)+2;
 
   memory->create(exponential,numGridPointsExponential,"tersofftable:exponential");
 
   r = minArgumentExponential;
   deltaArgumentExponential = 1.0 / GRIDDENSITY_EXP;
   for (i = 0; i < numGridPointsExponential; i++)
     {
       exponential[i] = exp(-r);
       r += deltaArgumentExponential;
     }
 
 
   // gtetaFunction
 
   numGridPointsGtetaFunction=(int)(2.0*GRIDDENSITY_GTETA)+2;
 
   memory->create(gtetaFunction,nelements,numGridPointsGtetaFunction,"tersofftable:gtetaFunction");
   memory->create(gtetaFunctionDerived,nelements,numGridPointsGtetaFunction,"tersofftable:gtetaFunctionDerived");
 
   r = minArgumentExponential;
   for (i=0; i<nelements; i++) {
     r = -1.0;
     deltaArgumentGtetaFunction = 1.0 / GRIDDENSITY_GTETA;
 
     int iparam = elem2param[i][i][i];
     double c = params[iparam].c;
     double d = params[iparam].d;
     double h = params[iparam].h;
 
     for (j = 0; j < numGridPointsGtetaFunction; j++) {
       gtetaFunction[i][j]=1.0+(c*c)/(d*d)-(c*c)/(d*d+(h-r)*(h-r));
       gtetaFunctionDerived[i][j]= -2.0 * c * c * (h-r) / ((d*d+(h-r)*(h-r))*(d*d+(h-r)*(h-r)));
       r += deltaArgumentGtetaFunction;
     }
   }
 
 
   // cutoffFunction, zetaFunction, find grids.
 
   int ngrid_max = -1;
   int zeta_max = -1;
 
   for (i=0; i<nelements; i++) {
 
     int iparam = elem2param[i][i][i];
     double c = params[iparam].c;
     double d = params[iparam].d;
     double beta = params[iparam].beta;
 
     numGridPointsBetaZetaPower=(int)(((1.0+(c*c)/(d*d)-(c*c)/(d*d+4))*beta*leadingDimensionInteractionList*GRIDDENSITY_BIJ))+2;
     zeta_max = MAX(zeta_max,numGridPointsBetaZetaPower);
 
     for (j=0; j<nelements; j++) {
       for (j=0; j<nelements; j++) {
 
         int ijparam = elem2param[i][j][j];
         double cutoffR = params[ijparam].cutoffR;
         double cutoffS = params[ijparam].cutoffS;
 
         numGridPointsOneCutoffFunction=(int) ((cutoffR-GRIDSTART)*GRIDDENSITY_FCUTOFF)+1;
         numGridPointsNotOneCutoffFunction=(int) ((cutoffS-cutoffR)*GRIDDENSITY_FCUTOFF)+2;
         numGridPointsCutoffFunction=numGridPointsOneCutoffFunction+numGridPointsNotOneCutoffFunction;
 
         ngrid_max = MAX(ngrid_max,numGridPointsCutoffFunction);
       }
     }
   }
 
   memory->create(cutoffFunction,nelements,nelements,ngrid_max,"tersoff:cutfunc");
   memory->create(cutoffFunctionDerived,nelements,nelements,ngrid_max,"tersoff:cutfuncD");
 
   // cutoffFunction, compute.
 
   for (i=0; i<nelements; i++) {
     for (j=0; j<nelements; j++) {
       for (j=0; j<nelements; j++) {
         int ijparam = elem2param[i][j][j];
         double cutoffR = params[ijparam].cutoffR;
         double cutoffS = params[ijparam].cutoffS;
 
         numGridPointsOneCutoffFunction=(int) ((cutoffR-GRIDSTART)*GRIDDENSITY_FCUTOFF)+1;
         numGridPointsNotOneCutoffFunction=(int) ((cutoffS-cutoffR)*GRIDDENSITY_FCUTOFF)+2;
         numGridPointsCutoffFunction=numGridPointsOneCutoffFunction+numGridPointsNotOneCutoffFunction;
 
         r = GRIDSTART;
         deltaArgumentCutoffFunction = 1.0 / GRIDDENSITY_FCUTOFF;
 
         for (l = 0; l < numGridPointsOneCutoffFunction; l++) {
           cutoffFunction[i][j][l] = 1.0;
           cutoffFunctionDerived[i][j][l]=0.0;
           r += deltaArgumentCutoffFunction;
         }
 
         for (l = numGridPointsOneCutoffFunction; l < numGridPointsCutoffFunction; l++) {
           cutoffFunction[i][j][l] = 0.5 + 0.5 * cos (PI * (r - cutoffR)/(cutoffS-cutoffR)) ;
           cutoffFunctionDerived[i][j][l] =  -0.5 * PI * sin (PI * (r - cutoffR)/(cutoffS-cutoffR)) / (cutoffS-cutoffR) ;
           r += deltaArgumentCutoffFunction;
         }
       }
     }
   }
 
   // betaZetaPower, compute
 
   memory->create(betaZetaPower,nelements,zeta_max,"tersoff:zetafunc");
   memory->create(betaZetaPowerDerived,nelements,zeta_max,"tersoff:zetafuncD");
 
   for (i=0; i<nelements; i++) {
 
     int iparam = elem2param[i][i][i];
     double c = params[iparam].c;
     double d = params[iparam].d;
     double beta = params[iparam].beta;
 
     numGridPointsBetaZetaPower=(int)(((1.0+(c*c)/(d*d)-(c*c)/(d*d+4))*beta*leadingDimensionInteractionList*GRIDDENSITY_BIJ))+2;
 
     r=0.0;
     deltaArgumentBetaZetaPower = 1.0 / GRIDDENSITY_BIJ;
 
     betaZetaPower[i][0]=1.0;
 
     r += deltaArgumentBetaZetaPower;
 
     for (j = 1; j < numGridPointsBetaZetaPower; j++) {
       double powern=params[iparam].powern;
       betaZetaPower[i][j]=pow((1+pow(r,powern)),-1/(2*powern));
       betaZetaPowerDerived[i][j]=-0.5*pow(r,powern-1.0)*pow((1+pow(r,powern)),-1/(2*powern)-1) ;
       r += deltaArgumentBetaZetaPower;
     }
     betaZetaPowerDerived[i][0]=(betaZetaPower[i][1]-1.0)*GRIDDENSITY_BIJ;
   }
 }
 
 void PairTersoffTable::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
   map = new int[n+1];
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairTersoffTable::settings(int narg, char **arg)
 {
   if (narg != 0) error->all(FLERR,"Illegal pair_style command");
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairTersoffTable::coeff(int narg, char **arg)
 {
   int i,j,n;
 
   if (!allocated) allocate();
 
   if (narg != 3 + atom->ntypes)
     error->all(FLERR,"Incorrect args for pair coefficients");
 
   // insure I,J args are * *
 
   if (strcmp(arg[0],"*") != 0 || strcmp(arg[1],"*") != 0)
     error->all(FLERR,"Incorrect args for pair coefficients");
 
   // read args that map atom types to elements in potential file
   // map[i] = which element the Ith atom type is, -1 if NULL
   // nelements = # of unique elements
   // elements = list of element names
 
   if (elements) {
     for (i = 0; i < nelements; i++) delete [] elements[i];
     delete [] elements;
   }
   elements = new char*[atom->ntypes];
   for (i = 0; i < atom->ntypes; i++) elements[i] = NULL;
 
   nelements = 0;
   for (i = 3; i < narg; i++) {
     if (strcmp(arg[i],"NULL") == 0) {
       map[i-2] = -1;
       continue;
     }
     for (j = 0; j < nelements; j++)
       if (strcmp(arg[i],elements[j]) == 0) break;
     map[i-2] = j;
     if (j == nelements) {
       n = strlen(arg[i]) + 1;
       elements[j] = new char[n];
       strcpy(elements[j],arg[i]);
       nelements++;
     }
   }
 
   // read potential file and initialize potential parameters
 
   read_file(arg[2]);
   setup();
 
   // clear setflag since coeff() called once with I,J = * *
 
   n = atom->ntypes;
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   // set setflag i,j for type pairs where both are mapped to elements
 
   int count = 0;
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       if (map[i] >= 0 && map[j] >= 0) {
         setflag[i][j] = 1;
         count++;
       }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 
   // allocate tables and internal structures
   allocatePreLoops();
   allocateGrids();
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairTersoffTable::init_style()
 {
   if (force->newton_pair == 0)
     error->all(FLERR,"Pair style Tersoff requires newton pair on");
 
   // need a full neighbor list
 
-  int irequest = neighbor->request(this);
+  int irequest = neighbor->request(this,instance_me);
   neighbor->requests[irequest]->half = 0;
   neighbor->requests[irequest]->full = 1;
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairTersoffTable::init_one(int i, int j)
 {
   if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set");
 
   return cutmax;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairTersoffTable::read_file(char *file)
 {
   int params_per_line = 17;
   char **words = new char*[params_per_line+1];
 
   memory->sfree(params);
   params = NULL;
   nparams = maxparam = 0;
 
   // open file on proc 0
 
   FILE *fp;
   if (comm->me == 0) {
     fp = force->open_potential(file);
     if (fp == NULL) {
       char str[128];
       sprintf(str,"Cannot open Tersoff potential file %s",file);
       error->one(FLERR,str);
     }
   }
 
   // read each set of params from potential file
   // one set of params can span multiple lines
   // store params if all 3 element tags are in element list
 
   int n,nwords,ielement,jelement,kelement;
   char line[MAXLINE],*ptr;
   int eof = 0;
 
   while (1) {
     if (comm->me == 0) {
       ptr = fgets(line,MAXLINE,fp);
       if (ptr == NULL) {
         eof = 1;
         fclose(fp);
       } else n = strlen(line) + 1;
     }
     MPI_Bcast(&eof,1,MPI_INT,0,world);
     if (eof) break;
     MPI_Bcast(&n,1,MPI_INT,0,world);
     MPI_Bcast(line,n,MPI_CHAR,0,world);
 
     // strip comment, skip line if blank
 
     if ((ptr = strchr(line,'#'))) *ptr = '\0';
     nwords = atom->count_words(line);
     if (nwords == 0) continue;
 
     // concatenate additional lines until have params_per_line words
 
     while (nwords < params_per_line) {
       n = strlen(line);
       if (comm->me == 0) {
         ptr = fgets(&line[n],MAXLINE-n,fp);
         if (ptr == NULL) {
           eof = 1;
           fclose(fp);
         } else n = strlen(line) + 1;
       }
       MPI_Bcast(&eof,1,MPI_INT,0,world);
       if (eof) break;
       MPI_Bcast(&n,1,MPI_INT,0,world);
       MPI_Bcast(line,n,MPI_CHAR,0,world);
       if ((ptr = strchr(line,'#'))) *ptr = '\0';
       nwords = atom->count_words(line);
     }
 
     if (nwords != params_per_line)
       error->all(FLERR,"Incorrect format in Tersoff potential file");
 
     // words = ptrs to all words in line
 
     nwords = 0;
     words[nwords++] = strtok(line," \t\n\r\f");
     while ((words[nwords++] = strtok(NULL," \t\n\r\f"))) continue;
 
     // ielement,jelement,kelement = 1st args
     // if all 3 args are in element list, then parse this line
     // else skip to next entry in file
 
     for (ielement = 0; ielement < nelements; ielement++)
       if (strcmp(words[0],elements[ielement]) == 0) break;
     if (ielement == nelements) continue;
     for (jelement = 0; jelement < nelements; jelement++)
       if (strcmp(words[1],elements[jelement]) == 0) break;
     if (jelement == nelements) continue;
     for (kelement = 0; kelement < nelements; kelement++)
       if (strcmp(words[2],elements[kelement]) == 0) break;
     if (kelement == nelements) continue;
 
     // load up parameter settings and error check their values
 
     if (nparams == maxparam) {
       maxparam += DELTA;
       params = (Param *) memory->srealloc(params,maxparam*sizeof(Param),
                                           "pair:params");
     }
 
     params[nparams].ielement = ielement;
     params[nparams].jelement = jelement;
     params[nparams].kelement = kelement;
     params[nparams].powerm = atof(words[3]); // not used (only tersoff_2 is implemented)
     params[nparams].gamma = atof(words[4]); // not used (only tersoff_2 is implemented)
     params[nparams].lam3 = atof(words[5]); // not used (only tersoff_2 is implemented)
     params[nparams].c = atof(words[6]);
     params[nparams].d = atof(words[7]);
     params[nparams].h = atof(words[8]);
     params[nparams].powern = atof(words[9]);
     params[nparams].beta = atof(words[10]);
     params[nparams].lam2 = atof(words[11]);
     params[nparams].bigb = atof(words[12]);
     // current implementation is based on functional form
     // of tersoff_2 as reported in the reference paper
     double bigr = atof(words[13]);
     double bigd = atof(words[14]);
     params[nparams].cutoffR = bigr - bigd;
     params[nparams].cutoffS = bigr + bigd;
     params[nparams].lam1 = atof(words[15]);
     params[nparams].biga = atof(words[16]);
 
     // currently only allow m exponent of 1 or 3
     params[nparams].powermint = int(params[nparams].powerm);
 
     if (params[nparams].c < 0.0 || params[nparams].d < 0.0 ||
         params[nparams].powern < 0.0 || params[nparams].beta < 0.0 ||
         params[nparams].lam2 < 0.0 || params[nparams].bigb < 0.0 ||
         params[nparams].cutoffR < 0.0 ||params[nparams].cutoffS < 0.0 ||
         params[nparams].cutoffR > params[nparams].cutoffS ||
         params[nparams].lam1 < 0.0 || params[nparams].biga < 0.0
     ) error->all(FLERR,"Illegal Tersoff parameter");
 
     // only tersoff_2 parametrization is implemented
     if (params[nparams].gamma != 1.0 || params[nparams].lam3 != 0.0)
       error->all(FLERR,"Current tersoff/table pair_style implements only tersoff_2 parametrization");
     nparams++;
   }
 
   delete [] words;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairTersoffTable::setup()
 {
   int i,j,k,m,n;
 
   // set elem2param for all triplet combinations
   // must be a single exact match to lines read from file
   // do not allow for ACB in place of ABC
 
   memory->destroy(elem2param);
   memory->create(elem2param,nelements,nelements,nelements,"pair:elem2param");
 
   for (i = 0; i < nelements; i++)
     for (j = 0; j < nelements; j++)
       for (k = 0; k < nelements; k++) {
         n = -1;
         for (m = 0; m < nparams; m++) {
           if (i == params[m].ielement && j == params[m].jelement &&
               k == params[m].kelement) {
             if (n >= 0) error->all(FLERR,"Potential file has duplicate entry");
             n = m;
           }
         }
         if (n < 0) error->all(FLERR,"Potential file is missing an entry");
         elem2param[i][j][k] = n;
       }
 
   // set cutoff square
   for (m = 0; m < nparams; m++) {
     params[m].cut = params[m].cutoffS;
     params[m].cutsq = params[m].cut*params[m].cut;
   }
 
   // set cutmax to max of all params
   cutmax = 0.0;
   for (m = 0; m < nparams; m++) {
     if (params[m].cut > cutmax) cutmax = params[m].cut;
   }
 }
diff --git a/src/USER-OMP/fix_peri_neigh_omp.cpp b/src/USER-OMP/fix_peri_neigh_omp.cpp
index 2aba2665b..a5f78a719 100644
--- a/src/USER-OMP/fix_peri_neigh_omp.cpp
+++ b/src/USER-OMP/fix_peri_neigh_omp.cpp
@@ -1,50 +1,50 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Mike Parks (SNL)
 ------------------------------------------------------------------------- */
 
 #include "fix_peri_neigh_omp.h"
 #include "fix_omp.h"
 #include "modify.h"
 #include "neighbor.h"
 #include "neigh_request.h"
 
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
 /* ---------------------------------------------------------------------- */
 
 void FixPeriNeighOMP::init()
 {
   if (!first) return;
 
   // determine status of neighbor flag of the omp package command
   int ifix = modify->find_fix("package_omp");
   int use_omp = 0;
   if (ifix >=0) {
      FixOMP * fix = static_cast<FixOMP *>(lmp->modify->fix[ifix]);
      if (fix->get_neighbor()) use_omp = 1;
   }
 
   // need a full neighbor list once
 
-  int irequest = neighbor->request(this);
+  int irequest = neighbor->request(this,instance_me);
   neighbor->requests[irequest]->pair = 0;
   neighbor->requests[irequest]->fix  = 1;
   neighbor->requests[irequest]->half = 0;
   neighbor->requests[irequest]->full = 1;
   neighbor->requests[irequest]->omp = use_omp;
   neighbor->requests[irequest]->occasional = 1;
 }
diff --git a/src/USER-OMP/fix_qeq_comb_omp.cpp b/src/USER-OMP/fix_qeq_comb_omp.cpp
index 09a0f598a..7b2be715d 100644
--- a/src/USER-OMP/fix_qeq_comb_omp.cpp
+++ b/src/USER-OMP/fix_qeq_comb_omp.cpp
@@ -1,192 +1,192 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Axel Kohlmeyer (Temple U)
 ------------------------------------------------------------------------- */
 
 #include "mpi.h"
 #include "math.h"
 #include "fix_qeq_comb_omp.h"
 #include "fix_omp.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "group.h"
 #include "memory.h"
 #include "modify.h"
 #include "error.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "respa.h"
 #include "update.h"
 #include "pair_comb_omp.h"
 
 #include <string.h>
 
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
 /* ---------------------------------------------------------------------- */
 
 FixQEQCombOMP::FixQEQCombOMP(LAMMPS *lmp, int narg, char **arg) :
   FixQEQComb(lmp, narg, arg)
 {
   if (narg < 5) error->all(FLERR,"Illegal fix qeq/comb/omp command");
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEQCombOMP::init()
 {
   if (!atom->q_flag)
     error->all(FLERR,"Fix qeq/comb/omp requires atom attribute q");
 
   if (NULL != force->pair_match("comb3",0))
     error->all(FLERR,"No support for comb3 currently available in USER-OMP");
 
   comb = (PairComb *) force->pair_match("comb/omp",1);
   if (comb == NULL)
     comb = (PairComb *) force->pair_match("comb",1);
   if (comb == NULL) 
     error->all(FLERR,"Must use pair_style comb or "
                "comb/omp with fix qeq/comb/omp");
 
   if (strstr(update->integrate_style,"respa"))
     nlevels_respa = ((Respa *) update->integrate)->nlevels;
 
   ngroup = group->count(igroup);
   if (ngroup == 0) error->all(FLERR,"Fix qeq/comb group has no atoms");
 
   // determine status of neighbor flag of the omp package command
   int ifix = modify->find_fix("package_omp");
   int use_omp = 0;
   if (ifix >=0) {
      FixOMP * fix = static_cast<FixOMP *>(lmp->modify->fix[ifix]);
      if (fix->get_neighbor()) use_omp = 1;
   }
 
-  int irequest = neighbor->request(this);
+  int irequest = neighbor->request(this,instance_me);
   neighbor->requests[irequest]->omp = use_omp;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEQCombOMP::post_force(int vflag)
 {
   int i,ii,iloop,loopmax,inum,*ilist;
   double heatpq,qmass,dtq,dtq2;
   double enegchkall,enegmaxall;
 
   if (update->ntimestep % nevery) return;
 
   // reallocate work arrays if necessary
   // qf = charge force
   // q1 = charge displacement
   // q2 = tmp storage of charge force for next iteration
 
   if (atom->nmax > nmax) {
     memory->destroy(qf);
     memory->destroy(q1);
     memory->destroy(q2);
     nmax = atom->nmax;
     memory->create(qf,nmax,"qeq:qf");
     memory->create(q1,nmax,"qeq:q1");
     memory->create(q2,nmax,"qeq:q2");
     vector_atom = qf;
   }
 
   // more loops for first-time charge equilibrium
 
   iloop = 0;
   if (firstflag) loopmax = 500;
   else loopmax = 200;
 
   // charge-equilibration loop
 
   if (me == 0 && fp)
     fprintf(fp,"Charge equilibration on step " BIGINT_FORMAT "\n",
             update->ntimestep);
 
   heatpq = 0.05;
   qmass  = 0.016;
   dtq    = 0.01;
   dtq2   = 0.5*dtq*dtq/qmass;
 
   double enegchk = 0.0;
   double enegtot = 0.0;
   double enegmax = 0.0;
 
   double *q = atom->q;
   int *mask = atom->mask;
 
   inum = comb->list->inum;
   ilist = comb->list->ilist;
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     q1[i] = q2[i] = qf[i] = 0.0;
   }
 
   for (iloop = 0; iloop < loopmax; iloop ++ ) {
     for (ii = 0; ii < inum; ii++) {
       i = ilist[ii];
       if (mask[i] & groupbit) {
         q1[i] += qf[i]*dtq2 - heatpq*q1[i];
         q[i]  += q1[i];
       }
     }
     comm->forward_comm_fix(this);
 
     if(comb) enegtot = comb->yasu_char(qf,igroup);
     enegtot /= ngroup;
     enegchk = enegmax = 0.0;
 
     for (ii = 0; ii < inum ; ii++) {
       i = ilist[ii];
       if (mask[i] & groupbit) {
         q2[i] = enegtot-qf[i];
         enegmax = MAX(enegmax,fabs(q2[i]));
         enegchk += fabs(q2[i]);
         qf[i] = q2[i];
       }
     }
 
     MPI_Allreduce(&enegchk,&enegchkall,1,MPI_DOUBLE,MPI_SUM,world);
     enegchk = enegchkall/ngroup;
     MPI_Allreduce(&enegmax,&enegmaxall,1,MPI_DOUBLE,MPI_MAX,world);
     enegmax = enegmaxall;
 
     if (enegchk <= precision && enegmax <= 100.0*precision) break;
 
     if (me == 0 && fp)
       fprintf(fp,"  iteration: %d, enegtot %.6g, "
               "enegmax %.6g, fq deviation: %.6g\n",
               iloop,enegtot,enegmax,enegchk);
 
     for (ii = 0; ii < inum; ii++) {
       i = ilist[ii];
       if (mask[i] & groupbit)
         q1[i] += qf[i]*dtq2 - heatpq*q1[i];
     }
   }
 
   if (me == 0 && fp) {
     if (iloop == loopmax)
       fprintf(fp,"Charges did not converge in %d iterations\n",iloop);
     else
       fprintf(fp,"Charges converged in %d iterations to %.10f tolerance\n",
               iloop,enegchk);
   }
 }
diff --git a/src/USER-REAXC/fix_qeq_reax.cpp b/src/USER-REAXC/fix_qeq_reax.cpp
index c6045bfe5..dfcd16316 100644
--- a/src/USER-REAXC/fix_qeq_reax.cpp
+++ b/src/USER-REAXC/fix_qeq_reax.cpp
@@ -1,1039 +1,1039 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Hasan Metin Aktulga, Purdue University
    (now at Lawrence Berkeley National Laboratory, hmaktulga@lbl.gov)
 
      Hybrid and sub-group capabilities: Ray Shan (Sandia)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "fix_qeq_reax.h"
 #include "pair_reax_c.h"
 #include "atom.h"
 #include "comm.h"
 #include "domain.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "update.h"
 #include "force.h"
 #include "group.h"
 #include "pair.h"
 #include "respa.h"
 #include "memory.h"
 #include "citeme.h"
 #include "error.h"
 #include "reaxc_defs.h"
 
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
 #define EV_TO_KCAL_PER_MOL 14.4
 //#define DANGER_ZONE     0.95
 //#define LOOSE_ZONE      0.7
 #define SQR(x) ((x)*(x))
 #define CUBE(x) ((x)*(x)*(x))
 #define MIN_NBRS 100
 
 static const char cite_fix_qeq_reax[] =
   "fix qeq/reax command:\n\n"
   "@Article{Aktulga12,\n"
   " author = {H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama},\n"
   " title = {Parallel reactive molecular dynamics: Numerical methods and algorithmic techniques},\n"
   " journal = {Parallel Computing},\n"
   " year =    2012,\n"
   " volume =  38,\n"
   " pages =   {245--259}\n"
   "}\n\n";
 
 /* ---------------------------------------------------------------------- */
 
 FixQEqReax::FixQEqReax(LAMMPS *lmp, int narg, char **arg) :
   Fix(lmp, narg, arg)
 {
   if (lmp->citeme) lmp->citeme->add(cite_fix_qeq_reax);
 
   if (narg != 8) error->all(FLERR,"Illegal fix qeq/reax command");
 
   nevery = force->inumeric(FLERR,arg[3]);
   swa = force->numeric(FLERR,arg[4]);
   swb = force->numeric(FLERR,arg[5]);
   tolerance = force->numeric(FLERR,arg[6]);
   pertype_parameters(arg[7]);
 
   shld = NULL;
 
   n = n_cap = 0;
   N = nmax = 0;
   m_fill = m_cap = 0;
   pack_flag = 0;
   s = NULL;
   t = NULL;
   nprev = 5;
 
   Hdia_inv = NULL;
   b_s = NULL;
   b_t = NULL;
   b_prc = NULL;
   b_prm = NULL;
 
   // CG
   p = NULL;
   q = NULL;
   r = NULL;
   d = NULL;
 
   // H matrix
   H.firstnbr = NULL;
   H.numnbrs = NULL;
   H.jlist = NULL;
   H.val = NULL;
 
   comm_forward = comm_reverse = 1;
 
   // perform initial allocation of atom-based arrays
   // register with Atom class
 
   s_hist = t_hist = NULL;
   grow_arrays(atom->nmax);
   atom->add_callback(0);
   for( int i = 0; i < atom->nmax; i++ )
     for (int j = 0; j < nprev; ++j )
       s_hist[i][j] = t_hist[i][j] = 0;
 
   reaxc = NULL;
   reaxc = (PairReaxC *) force->pair_match("reax/c",1);
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 FixQEqReax::~FixQEqReax()
 {
   // unregister callbacks to this fix from Atom class
 
   atom->delete_callback(id,0);
 
   memory->destroy(s_hist);
   memory->destroy(t_hist);
 
   deallocate_storage();
   deallocate_matrix();
 
   memory->destroy(shld);
 
   if (!reaxflag) {
     memory->destroy(chi);
     memory->destroy(eta);
     memory->destroy(gamma);
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 int FixQEqReax::setmask()
 {
   int mask = 0;
   mask |= PRE_FORCE;
   mask |= PRE_FORCE_RESPA;
   mask |= MIN_PRE_FORCE;
   return mask;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqReax::pertype_parameters(char *arg)
 {
   if (strcmp(arg,"reax/c") == 0) {
     reaxflag = 1;
     Pair *pair = force->pair_match("reax/c",1);
     if (pair == NULL) error->all(FLERR,"No pair reax/c for fix qeq/reax");
     int tmp;
     chi = (double *) pair->extract("chi",tmp);
     eta = (double *) pair->extract("eta",tmp);
     gamma = (double *) pair->extract("gamma",tmp);
     if (chi == NULL || eta == NULL || gamma == NULL)
       error->all(FLERR,
                  "Fix qeq/reax could not extract params from pair reax/c");
     return;
   }
 
   int i,itype,ntypes;
   double v1,v2,v3;
   FILE *pf;
 
   reaxflag = 0;
   ntypes = atom->ntypes;
 
   memory->create(chi,ntypes+1,"qeq/reax:chi");
   memory->create(eta,ntypes+1,"qeq/reax:eta");
   memory->create(gamma,ntypes+1,"qeq/reax:gamma");
 
   if (comm->me == 0) {
     if ((pf = fopen(arg,"r")) == NULL)
       error->one(FLERR,"Fix qeq/reax parameter file could not be found");
 
     for (i = 1; i <= ntypes && !feof(pf); i++) {
       fscanf(pf,"%d %lg %lg %lg",&itype,&v1,&v2,&v3);
       if (itype < 1 || itype > ntypes)
         error->one(FLERR,"Fix qeq/reax invalid atom type in param file");
       chi[itype] = v1;
       eta[itype] = v2;
       gamma[itype] = v3;
     }
     if (i <= ntypes) error->one(FLERR,"Invalid param file for fix qeq/reax");
     fclose(pf);
   }
 
   MPI_Bcast(&chi[1],ntypes,MPI_DOUBLE,0,world);
   MPI_Bcast(&eta[1],ntypes,MPI_DOUBLE,0,world);
   MPI_Bcast(&gamma[1],ntypes,MPI_DOUBLE,0,world);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqReax::allocate_storage()
 {
   nmax = atom->nmax;
 
   memory->create(s,nmax,"qeq:s");
   memory->create(t,nmax,"qeq:t");
 
   memory->create(Hdia_inv,nmax,"qeq:Hdia_inv");
   memory->create(b_s,nmax,"qeq:b_s");
   memory->create(b_t,nmax,"qeq:b_t");
   memory->create(b_prc,nmax,"qeq:b_prc");
   memory->create(b_prm,nmax,"qeq:b_prm");
 
   memory->create(p,nmax,"qeq:p");
   memory->create(q,nmax,"qeq:q");
   memory->create(r,nmax,"qeq:r");
   memory->create(d,nmax,"qeq:d");
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqReax::deallocate_storage()
 {
   memory->destroy(s);
   memory->destroy(t);
 
   memory->destroy( Hdia_inv );
   memory->destroy( b_s );
   memory->destroy( b_t );
   memory->destroy( b_prc );
   memory->destroy( b_prm );
 
   memory->destroy( p );
   memory->destroy( q );
   memory->destroy( r );
   memory->destroy( d );
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqReax::reallocate_storage()
 {
   deallocate_storage();
   allocate_storage();
   init_storage();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqReax::allocate_matrix()
 {
   int i,ii,inum,m;
   int *ilist, *numneigh;
 
   int mincap;
   double safezone;
 
   if( reaxflag ) {
     mincap = reaxc->system->mincap;
     safezone = reaxc->system->safezone;
   } else {
     mincap = MIN_CAP;
     safezone = SAFE_ZONE;
   }
 
   n = atom->nlocal;
   n_cap = MAX( (int)(n * safezone), mincap );
 
   // determine the total space for the H matrix
 
   if (reaxc) {
     inum = reaxc->list->inum;
     ilist = reaxc->list->ilist;
     numneigh = reaxc->list->numneigh;
   } else {
     inum = list->inum;
     ilist = list->ilist;
     numneigh = list->numneigh;
   }
 
   m = 0;
   for( ii = 0; ii < inum; ii++ ) {
     i = ilist[ii];
     m += numneigh[i];
   }
   m_cap = MAX( (int)(m * safezone), mincap * MIN_NBRS );
 
   H.n = n_cap;
   H.m = m_cap;
   memory->create(H.firstnbr,n_cap,"qeq:H.firstnbr");
   memory->create(H.numnbrs,n_cap,"qeq:H.numnbrs");
   memory->create(H.jlist,m_cap,"qeq:H.jlist");
   memory->create(H.val,m_cap,"qeq:H.val");
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqReax::deallocate_matrix()
 {
   memory->destroy( H.firstnbr );
   memory->destroy( H.numnbrs );
   memory->destroy( H.jlist );
   memory->destroy( H.val );
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqReax::reallocate_matrix()
 {
   deallocate_matrix();
   allocate_matrix();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqReax::init()
 {
   if (!atom->q_flag) error->all(FLERR,"Fix qeq/reax requires atom attribute q");
 
   ngroup = group->count(igroup);
   if (ngroup == 0) error->all(FLERR,"Fix qeq/reax group has no atoms");
 
   /*
   if (reaxc)
     if (ngroup != reaxc->ngroup)
       error->all(FLERR,"Fix qeq/reax group and pair reax/c have "
 		       "different numbers of atoms");
   */
 
   // need a half neighbor list w/ Newton off and ghost neighbors
   // built whenever re-neighboring occurs
 
-  int irequest = neighbor->request(this);
+  int irequest = neighbor->request(this,instance_me);
   neighbor->requests[irequest]->pair = 0;
   neighbor->requests[irequest]->fix = 1;
   neighbor->requests[irequest]->newton = 2;
   neighbor->requests[irequest]->ghost = 1;
 
   init_shielding();
   init_taper();
 
   if (strstr(update->integrate_style,"respa"))
     nlevels_respa = ((Respa *) update->integrate)->nlevels;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqReax::init_list(int id, NeighList *ptr)
 {
   list = ptr;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqReax::init_shielding()
 {
   int i,j;
   int ntypes;
 
   ntypes = atom->ntypes;
   memory->create(shld,ntypes+1,ntypes+1,"qeq:shileding");
 
   for( i = 1; i <= ntypes; ++i )
     for( j = 1; j <= ntypes; ++j )
       shld[i][j] = pow( gamma[i] * gamma[j], -1.5 );
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqReax::init_taper()
 {
   double d7, swa2, swa3, swb2, swb3;
 
   if (fabs(swa) > 0.01 && comm->me == 0)
     error->warning(FLERR,"Fix qeq/reax has non-zero lower Taper radius cutoff");
   if (swb < 0)
     error->all(FLERR, "Fix qeq/reax has negative upper Taper radius cutoff");
   else if (swb < 5 && comm->me == 0)
     error->warning(FLERR,"Fix qeq/reax has very low Taper radius cutoff");
 
   d7 = pow( swb - swa, 7 );
   swa2 = SQR( swa );
   swa3 = CUBE( swa );
   swb2 = SQR( swb );
   swb3 = CUBE( swb );
 
   Tap[7] =  20.0 / d7;
   Tap[6] = -70.0 * (swa + swb) / d7;
   Tap[5] =  84.0 * (swa2 + 3.0*swa*swb + swb2) / d7;
   Tap[4] = -35.0 * (swa3 + 9.0*swa2*swb + 9.0*swa*swb2 + swb3 ) / d7;
   Tap[3] = 140.0 * (swa3*swb + 3.0*swa2*swb2 + swa*swb3 ) / d7;
   Tap[2] =-210.0 * (swa3*swb2 + swa2*swb3) / d7;
   Tap[1] = 140.0 * swa3 * swb3 / d7;
   Tap[0] = (-35.0*swa3*swb2*swb2 + 21.0*swa2*swb3*swb2 +
             7.0*swa*swb3*swb3 + swb3*swb3*swb ) / d7;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqReax::setup_pre_force(int vflag)
 {
   neighbor->build_one(list);
 
   deallocate_storage();
   allocate_storage();
 
   init_storage();
 
   deallocate_matrix();
   allocate_matrix();
 
   pre_force(vflag);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqReax::setup_pre_force_respa(int vflag, int ilevel)
 {
   if (ilevel < nlevels_respa-1) return;
   setup_pre_force(vflag);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqReax::min_setup_pre_force(int vflag)
 {
   setup_pre_force(vflag);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqReax::init_storage()
 {
   int NN;
 
   if (reaxc) 
     NN = reaxc->list->inum + reaxc->list->gnum;
   else
     NN = list->inum + list->gnum;
 
   for( int i = 0; i < NN; i++ ) {
     Hdia_inv[i] = 1. / eta[atom->type[i]];
     b_s[i] = -chi[atom->type[i]];
     b_t[i] = -1.0;
     b_prc[i] = 0;
     b_prm[i] = 0;
     s[i] = t[i] = 0;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqReax::pre_force(int vflag)
 {
   double t_start, t_end;
 
   if (update->ntimestep % nevery) return;
   if( comm->me == 0 ) t_start = MPI_Wtime();
 
   n = atom->nlocal;
   N = atom->nlocal + atom->nghost;
 
   // grow arrays if necessary
   // need to be atom->nmax in length
 
   if( atom->nmax > nmax ) reallocate_storage();
   if( n > n_cap*DANGER_ZONE || m_fill > m_cap*DANGER_ZONE )
     reallocate_matrix();
 
   init_matvec();
   matvecs = CG(b_s, s);    	// CG on s - parallel
   matvecs += CG(b_t, t); 	// CG on t - parallel
   calculate_Q();
 
   if( comm->me == 0 ) {
     t_end = MPI_Wtime();
     qeq_time = t_end - t_start;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqReax::pre_force_respa(int vflag, int ilevel, int iloop)
 {
   if (ilevel == nlevels_respa-1) pre_force(vflag);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqReax::min_pre_force(int vflag)
 {
   pre_force(vflag);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqReax::init_matvec()
 {
   /* fill-in H matrix */
   compute_H();
 
   int nn, ii, i;
   int *ilist;
 
   if (reaxc) {
     nn = reaxc->list->inum;
     ilist = reaxc->list->ilist;
   } else {
     nn = list->inum;
     ilist = list->ilist;
   }
 
   for( ii = 0; ii < nn; ++ii ) {
     i = ilist[ii];
     if (atom->mask[i] & groupbit) {
     
       /* init pre-conditioner for H and init solution vectors */
       Hdia_inv[i] = 1. / eta[ atom->type[i] ];
       b_s[i]      = -chi[ atom->type[i] ];
       b_t[i]      = -1.0;
 
       /* linear extrapolation for s & t from previous solutions */
       //s[i] = 2 * s_hist[i][0] - s_hist[i][1];
       //t[i] = 2 * t_hist[i][0] - t_hist[i][1];
 
       /* quadratic extrapolation for s & t from previous solutions */
       //s[i] = s_hist[i][2] + 3 * ( s_hist[i][0] - s_hist[i][1] );
       t[i] = t_hist[i][2] + 3 * ( t_hist[i][0] - t_hist[i][1] );
 
       /* cubic extrapolation for s & t from previous solutions */
       s[i] = 4*(s_hist[i][0]+s_hist[i][2])-(6*s_hist[i][1]+s_hist[i][3]);
       //t[i] = 4*(t_hist[i][0]+t_hist[i][2])-(6*t_hist[i][1]+t_hist[i][3]);
     }
   }
 
   pack_flag = 2;
   comm->forward_comm_fix(this); //Dist_vector( s );
   pack_flag = 3;
   comm->forward_comm_fix(this); //Dist_vector( t );
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqReax::compute_H()
 {
   int inum, jnum, *ilist, *jlist, *numneigh, **firstneigh;
   int i, j, ii, jj, flag;
   double **x, SMALL = 0.0001;
   double dx, dy, dz, r_sqr;
 
   int *type = atom->type;
   tagint *tag = atom->tag;
   x = atom->x;
   int *mask = atom->mask;
 
   if (reaxc) {
     inum = reaxc->list->inum;
     ilist = reaxc->list->ilist;
     numneigh = reaxc->list->numneigh;
     firstneigh = reaxc->list->firstneigh;
   } else {
     inum = list->inum;
     ilist = list->ilist;
     numneigh = list->numneigh;
     firstneigh = list->firstneigh;
   }
   
   // fill in the H matrix
   m_fill = 0;
   r_sqr = 0;
   for( ii = 0; ii < inum; ii++ ) {
     i = ilist[ii];
     if (mask[i] & groupbit) {
       jlist = firstneigh[i];
       jnum = numneigh[i];
       H.firstnbr[i] = m_fill;
 
       for( jj = 0; jj < jnum; jj++ ) {
         j = jlist[jj];
 
         dx = x[j][0] - x[i][0];
         dy = x[j][1] - x[i][1];
         dz = x[j][2] - x[i][2];
         r_sqr = SQR(dx) + SQR(dy) + SQR(dz);
 
         flag = 0;
         if (r_sqr <= SQR(swb)) {
           if (j < n) flag = 1;
           else if (tag[i] < tag[j]) flag = 1;
           else if (tag[i] == tag[j]) {
             if (dz > SMALL) flag = 1;
             else if (fabs(dz) < SMALL) {
               if (dy > SMALL) flag = 1;
               else if (fabs(dy) < SMALL && dx > SMALL)
                 flag = 1;
 	    }
 	  }
 	}
 
         if( flag ) {
           H.jlist[m_fill] = j;
           H.val[m_fill] = calculate_H( sqrt(r_sqr), shld[type[i]][type[j]] );
           m_fill++;
         }
       }
       H.numnbrs[i] = m_fill - H.firstnbr[i];
     }
   }
 
   if (m_fill >= H.m) {
     char str[128];
     sprintf(str,"H matrix size has been exceeded: m_fill=%d H.m=%d\n",
              m_fill, H.m );
     error->warning(FLERR,str);
     error->all(FLERR,"Fix qeq/reax has insufficient QEq matrix size");
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 double FixQEqReax::calculate_H( double r, double gamma )
 {
   double Taper, denom;
 
   Taper = Tap[7] * r + Tap[6];
   Taper = Taper * r + Tap[5];
   Taper = Taper * r + Tap[4];
   Taper = Taper * r + Tap[3];
   Taper = Taper * r + Tap[2];
   Taper = Taper * r + Tap[1];
   Taper = Taper * r + Tap[0];
 
   denom = r * r * r + gamma;
   denom = pow(denom,0.3333333333333);
 
   return Taper * EV_TO_KCAL_PER_MOL / denom;
 }
 
 /* ---------------------------------------------------------------------- */
 
 int FixQEqReax::CG( double *b, double *x )
 {
   int  i, j, imax;
   double tmp, alpha, beta, b_norm;
   double sig_old, sig_new;
 
   int nn, jj;
   int *ilist;
   if (reaxc) {
     nn = reaxc->list->inum;
     ilist = reaxc->list->ilist;
   } else {
     nn = list->inum;
     ilist = list->ilist;
   }
 
   imax = 200;
 
   pack_flag = 1;
   sparse_matvec( &H, x, q );
   comm->reverse_comm_fix( this ); //Coll_Vector( q );
 
   vector_sum( r , 1.,  b, -1., q, nn );
 
   for( jj = 0; jj < nn; ++jj ) {
     j = ilist[jj];
     if (atom->mask[j] & groupbit)
       d[j] = r[j] * Hdia_inv[j]; //pre-condition
   }
 
   b_norm = parallel_norm( b, nn );
   sig_new = parallel_dot( r, d, nn);
 
   for( i = 1; i < imax && sqrt(sig_new) / b_norm > tolerance; ++i ) {
     comm->forward_comm_fix(this); //Dist_vector( d );
     sparse_matvec( &H, d, q );
     comm->reverse_comm_fix(this); //Coll_vector( q );
 
     tmp = parallel_dot( d, q, nn);
     alpha = sig_new / tmp;
 
     vector_add( x, alpha, d, nn );
     vector_add( r, -alpha, q, nn );
 
     // pre-conditioning
     for( jj = 0; jj < nn; ++jj ) {
       j = ilist[jj];
       if (atom->mask[j] & groupbit)
         p[j] = r[j] * Hdia_inv[j];
     }
 
     sig_old = sig_new;
     sig_new = parallel_dot( r, p, nn);
 
     beta = sig_new / sig_old;
     vector_sum( d, 1., p, beta, d, nn );
 
   }
 
   if (i >= imax && comm->me == 0) {
     char str[128];
     sprintf(str,"Fix qeq/reax CG convergence failed after %d iterations "
             "at " BIGINT_FORMAT " step",i,update->ntimestep);
     error->warning(FLERR,str);
   }
 
   return i;
 }
 
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqReax::sparse_matvec( sparse_matrix *A, double *x, double *b )
 {
   int i, j, itr_j;
   int nn, NN, ii;
   int *ilist;
 
   if (reaxc) {
     nn = reaxc->list->inum;
     NN = reaxc->list->inum + reaxc->list->gnum;
     ilist = reaxc->list->ilist;
   } else {
     nn = list->inum;
     NN = list->inum + list->gnum;
     ilist = list->ilist;
   }
 
   for( ii = 0; ii < nn; ++ii ) {
     i = ilist[ii];
     if (atom->mask[i] & groupbit)
       b[i] = eta[ atom->type[i] ] * x[i];
   }
 
   for( ii = nn; ii < NN; ++ii ) {
     i = ilist[ii];
     if (atom->mask[i] & groupbit)
       b[i] = 0;
   }
 
   for( ii = 0; ii < nn; ++ii ) {
     i = ilist[ii];
     if (atom->mask[i] & groupbit) {
       for( itr_j=A->firstnbr[i]; itr_j<A->firstnbr[i]+A->numnbrs[i]; itr_j++) {
         j = A->jlist[itr_j];
         b[i] += A->val[itr_j] * x[j];
         b[j] += A->val[itr_j] * x[i];
       }
     }
   }
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqReax::calculate_Q()
 {
   int i, k;
   double u, s_sum, t_sum;
   double *q = atom->q;
 
   int nn, ii;
   int *ilist;
 
   if (reaxc) {
     nn = reaxc->list->inum;
     ilist = reaxc->list->ilist;
   } else {
     nn = list->inum;
     ilist = list->ilist;
   }
 
   s_sum = parallel_vector_acc( s, nn );
   t_sum = parallel_vector_acc( t, nn);
   u = s_sum / t_sum;
 
   for( ii = 0; ii < nn; ++ii ) {
     i = ilist[ii];
     if (atom->mask[i] & groupbit) {
       q[i] = s[i] - u * t[i];
 
       /* backup s & t */
       for( k = 4; k > 0; --k ) {
         s_hist[i][k] = s_hist[i][k-1];
         t_hist[i][k] = t_hist[i][k-1];
       }
       s_hist[i][0] = s[i];
       t_hist[i][0] = t[i];
     }
   }
 
   pack_flag = 4;
   comm->forward_comm_fix( this ); //Dist_vector( atom->q );
 }
 
 /* ---------------------------------------------------------------------- */
 
 int FixQEqReax::pack_forward_comm(int n, int *list, double *buf,
                                   int pbc_flag, int *pbc)
 {
   int m;
 
   if( pack_flag == 1)
     for(m = 0; m < n; m++) buf[m] = d[list[m]];
   else if( pack_flag == 2 )
     for(m = 0; m < n; m++) buf[m] = s[list[m]];
   else if( pack_flag == 3 )
     for(m = 0; m < n; m++) buf[m] = t[list[m]];
   else if( pack_flag == 4 )
     for(m = 0; m < n; m++) buf[m] = atom->q[list[m]];
 
   return n;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqReax::unpack_forward_comm(int n, int first, double *buf)
 {
   int i, m;
 
   if( pack_flag == 1)
     for(m = 0, i = first; m < n; m++, i++) d[i] = buf[m];
   else if( pack_flag == 2)
     for(m = 0, i = first; m < n; m++, i++) s[i] = buf[m];
   else if( pack_flag == 3)
     for(m = 0, i = first; m < n; m++, i++) t[i] = buf[m];
   else if( pack_flag == 4)
     for(m = 0, i = first; m < n; m++, i++) atom->q[i] = buf[m];
 }
 
 /* ---------------------------------------------------------------------- */
 
 int FixQEqReax::pack_reverse_comm(int n, int first, double *buf)
 {
   int i, m;
   for(m = 0, i = first; m < n; m++, i++) buf[m] = q[i];
   return n;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqReax::unpack_reverse_comm(int n, int *list, double *buf)
 {
   for(int m = 0; m < n; m++) q[list[m]] += buf[m];
 }
 
 /* ----------------------------------------------------------------------
    memory usage of local atom-based arrays
 ------------------------------------------------------------------------- */
 
 double FixQEqReax::memory_usage()
 {
   double bytes;
 
   bytes = atom->nmax*nprev*2 * sizeof(double); // s_hist & t_hist
   bytes += atom->nmax*11 * sizeof(double); // storage
   bytes += n_cap*2 * sizeof(int); // matrix...
   bytes += m_cap * sizeof(int);
   bytes += m_cap * sizeof(double);
 
   return bytes;
 }
 
 /* ----------------------------------------------------------------------
    allocate fictitious charge arrays
 ------------------------------------------------------------------------- */
 
 void FixQEqReax::grow_arrays(int nmax)
 {
   memory->grow(s_hist,nmax,nprev,"qeq:s_hist");
   memory->grow(t_hist,nmax,nprev,"qeq:t_hist");
 }
 
 /* ----------------------------------------------------------------------
    copy values within fictitious charge arrays
 ------------------------------------------------------------------------- */
 
 void FixQEqReax::copy_arrays(int i, int j, int delflag)
 {
   for (int m = 0; m < nprev; m++) {
     s_hist[j][m] = s_hist[i][m];
     t_hist[j][m] = t_hist[i][m];
   }
 }
 
 /* ----------------------------------------------------------------------
    pack values in local atom-based array for exchange with another proc
 ------------------------------------------------------------------------- */
 
 int FixQEqReax::pack_exchange(int i, double *buf)
 {
   for (int m = 0; m < nprev; m++) buf[m] = s_hist[i][m];
   for (int m = 0; m < nprev; m++) buf[nprev+m] = t_hist[i][m];
   return nprev*2;
 }
 
 /* ----------------------------------------------------------------------
    unpack values in local atom-based array from exchange with another proc
 ------------------------------------------------------------------------- */
 
 int FixQEqReax::unpack_exchange(int nlocal, double *buf)
 {
   for (int m = 0; m < nprev; m++) s_hist[nlocal][m] = buf[m];
   for (int m = 0; m < nprev; m++) t_hist[nlocal][m] = buf[nprev+m];
   return nprev*2;
 }
 
 /* ---------------------------------------------------------------------- */
 
 double FixQEqReax::parallel_norm( double *v, int n )
 {
   int  i;
   double my_sum, norm_sqr;
 
   int ii;
   int *ilist;
 
   if (reaxc)
     ilist = reaxc->list->ilist;
   else
     ilist = list->ilist;
 
   my_sum = 0.0;
   norm_sqr = 0.0;
   for( ii = 0; ii < n; ++ii ) {
     i = ilist[ii];
     if (atom->mask[i] & groupbit)
       my_sum += SQR( v[i] );
   }
 
   MPI_Allreduce( &my_sum, &norm_sqr, 1, MPI_DOUBLE, MPI_SUM, world );
 
   return sqrt( norm_sqr );
 }
 
 /* ---------------------------------------------------------------------- */
 
 double FixQEqReax::parallel_dot( double *v1, double *v2, int n)
 {
   int  i;
   double my_dot, res;
 
   int ii;
   int *ilist;
 
   if (reaxc)
     ilist = reaxc->list->ilist;
   else
     ilist = list->ilist;
 
   my_dot = 0.0;
   res = 0.0;
   for( ii = 0; ii < n; ++ii ) {
     i = ilist[ii];
     if (atom->mask[i] & groupbit)
       my_dot += v1[i] * v2[i];
   }
   
   MPI_Allreduce( &my_dot, &res, 1, MPI_DOUBLE, MPI_SUM, world );
 
   return res;
 }
 
 /* ---------------------------------------------------------------------- */
 
 double FixQEqReax::parallel_vector_acc( double *v, int n )
 {
   int  i;
   double my_acc, res;
 
   int ii;
   int *ilist;
 
   if (reaxc)
     ilist = reaxc->list->ilist;
   else
     ilist = list->ilist;
 
   my_acc = 0.0;
   res = 0.0;
   for( ii = 0; ii < n; ++ii ) {
     i = ilist[ii];
     if (atom->mask[i] & groupbit)
       my_acc += v[i];
   }
 
   MPI_Allreduce( &my_acc, &res, 1, MPI_DOUBLE, MPI_SUM, world );
 
   return res;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqReax::vector_sum( double* dest, double c, double* v,
                                 double d, double* y, int k )
 {
   int kk;
   int *ilist;
 
   if (reaxc)
     ilist = reaxc->list->ilist;
   else
     ilist = list->ilist;
 
   for( --k; k>=0; --k ) {
     kk = ilist[k];
     if (atom->mask[kk] & groupbit)
       dest[kk] = c * v[kk] + d * y[kk];
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqReax::vector_add( double* dest, double c, double* v, int k )
 {
   int kk;
   int *ilist;
 
   if (reaxc)
     ilist = reaxc->list->ilist;
   else
     ilist = list->ilist;
 
   for( --k; k>=0; --k ) {
     kk = ilist[k];
     if (atom->mask[kk] & groupbit)
       dest[kk] += c * v[kk];
   }
 
 }
diff --git a/src/USER-REAXC/pair_reax_c.cpp b/src/USER-REAXC/pair_reax_c.cpp
index a0f704596..277d31ccf 100644
--- a/src/USER-REAXC/pair_reax_c.cpp
+++ b/src/USER-REAXC/pair_reax_c.cpp
@@ -1,825 +1,825 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Hasan Metin Aktulga, Purdue University
    (now at Lawrence Berkeley National Laboratory, hmaktulga@lbl.gov)
    Per-atom energy/virial added by Ray Shan (Sandia)
    Fix reax/c/bonds and fix reax/c/species for pair_style reax/c added by 
    	Ray Shan (Sandia)
    Hybrid and hybrid/overlay compatibility added by Ray Shan (Sandia)
 ------------------------------------------------------------------------- */
 
 #include "pair_reax_c.h"
 #include "atom.h"
 #include "update.h"
 #include "force.h"
 #include "comm.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "modify.h"
 #include "fix.h"
 #include "fix_reax_c.h"
 #include "citeme.h"
 #include "memory.h"
 #include "error.h"
 
 #include "reaxc_types.h"
 #include "reaxc_allocate.h"
 #include "reaxc_control.h"
 #include "reaxc_ffield.h"
 #include "reaxc_forces.h"
 #include "reaxc_init_md.h"
 #include "reaxc_io_tools.h"
 #include "reaxc_list.h"
 #include "reaxc_lookup.h"
 #include "reaxc_reset_tools.h"
 #include "reaxc_traj.h"
 #include "reaxc_vector.h"
 #include "fix_reaxc_bonds.h"
 
 using namespace LAMMPS_NS;
 
 static const char cite_pair_reax_c[] =
   "pair reax/c command:\n\n"
   "@Article{Aktulga12,\n"
   " author = {H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama},\n"
   " title = {Parallel reactive molecular dynamics: Numerical methods and algorithmic techniques},\n"
   " journal = {Parallel Computing},\n"
   " year =    2012,\n"
   " volume =  38,\n"
   " pages =   {245--259}\n"
   "}\n\n";
 
 /* ---------------------------------------------------------------------- */
 
 PairReaxC::PairReaxC(LAMMPS *lmp) : Pair(lmp)
 {
   if (lmp->citeme) lmp->citeme->add(cite_pair_reax_c);
 
   single_enable = 0;
   restartinfo = 0;
   one_coeff = 1;
   manybody_flag = 1;
   ghostneigh = 1;
 
   system = (reax_system *)
     memory->smalloc(sizeof(reax_system),"reax:system");
   control = (control_params *)
     memory->smalloc(sizeof(control_params),"reax:control");
   data = (simulation_data *)
     memory->smalloc(sizeof(simulation_data),"reax:data");
   workspace = (storage *)
     memory->smalloc(sizeof(storage),"reax:storage");
   lists = (reax_list *)
     memory->smalloc(LIST_N * sizeof(reax_list),"reax:lists");
   out_control = (output_controls *)
     memory->smalloc(sizeof(output_controls),"reax:out_control");
   mpi_data = (mpi_datatypes *)
     memory->smalloc(sizeof(mpi_datatypes),"reax:mpi");
 
   MPI_Comm_rank(world,&system->my_rank);
 
   system->my_coords[0] = 0;
   system->my_coords[1] = 0;
   system->my_coords[2] = 0;
   system->num_nbrs = 0;
   system->n = 0; // my atoms
   system->N = 0; // mine + ghosts
   system->bigN = 0;  // all atoms in the system
   system->local_cap = 0;
   system->total_cap = 0;
   system->gcell_cap = 0;
   system->bndry_cuts.ghost_nonb = 0;
   system->bndry_cuts.ghost_hbond = 0;
   system->bndry_cuts.ghost_bond = 0;
   system->bndry_cuts.ghost_cutoff = 0;
   system->my_atoms = NULL;
   system->pair_ptr = this;
 
   fix_reax = NULL;
   tmpid = NULL;
   tmpbo = NULL;
 
   nextra = 14;
   pvector = new double[nextra];
 
   setup_flag = 0;
   fixspecies_flag = 0;
 
   nmax = 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 PairReaxC::~PairReaxC()
 {
   if (fix_reax) modify->delete_fix("REAXC");
 
   if (setup_flag) {
     Close_Output_Files( system, control, out_control, mpi_data );
 
     // deallocate reax data-structures
 
     if( control->tabulate ) Deallocate_Lookup_Tables( system );
 
     if( control->hbond_cut > 0 )  Delete_List( lists+HBONDS, world );
     Delete_List( lists+BONDS, world );
     Delete_List( lists+THREE_BODIES, world );
     Delete_List( lists+FAR_NBRS, world );
 
     DeAllocate_Workspace( control, workspace );
     DeAllocate_System( system );
   }
 
   memory->destroy( system );
   memory->destroy( control );
   memory->destroy( data );
   memory->destroy( workspace );
   memory->destroy( lists );
   memory->destroy( out_control );
   memory->destroy( mpi_data );
 
   // deallocate interface storage
   if( allocated ) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
     memory->destroy(cutghost);
     delete [] map;
 
     delete [] chi;
     delete [] eta;
     delete [] gamma;
   }
 
   memory->destroy(tmpid);
   memory->destroy(tmpbo);
 
   delete [] pvector;
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairReaxC::allocate( )
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
   memory->create(cutghost,n+1,n+1,"pair:cutghost");
   map = new int[n+1];
 
   chi = new double[n+1];
   eta = new double[n+1];
   gamma = new double[n+1];
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairReaxC::settings(int narg, char **arg)
 {
   if (narg < 1) error->all(FLERR,"Illegal pair_style command");
 
   // read name of control file or use default controls
 
   if (strcmp(arg[0],"NULL") == 0) {
     strcpy( control->sim_name, "simulate" );
     control->ensemble = 0;
     out_control->energy_update_freq = 0;
     control->tabulate = 0;
 
     control->reneighbor = 1;
     control->vlist_cut = control->nonb_cut;
     control->bond_cut = 5.;
     control->hbond_cut = 7.50;
     control->thb_cut = 0.001;
     control->thb_cutsq = 0.00001;
     control->bg_cut = 0.3;
 
     out_control->write_steps = 0;
     out_control->traj_method = 0;
     strcpy( out_control->traj_title, "default_title" );
     out_control->atom_info = 0;
     out_control->bond_info = 0;
     out_control->angle_info = 0;
   } else Read_Control_File(arg[0], control, out_control);
 
   // default values
 
   qeqflag = 1;
   control->lgflag = 0;
   system->mincap = MIN_CAP;
   system->safezone = SAFE_ZONE;
   system->saferzone = SAFER_ZONE;
 
   // process optional keywords
 
   int iarg = 1;
 
   while (iarg < narg) {
     if (strcmp(arg[iarg],"checkqeq") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal pair_style reax/c command");
       if (strcmp(arg[iarg+1],"yes") == 0) qeqflag = 1;
       else if (strcmp(arg[iarg+1],"no") == 0) qeqflag = 0;
       else error->all(FLERR,"Illegal pair_style reax/c command");
       iarg += 2;
     } else if (strcmp(arg[iarg],"lgvdw") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal pair_style reax/c command");
       if (strcmp(arg[iarg+1],"yes") == 0) control->lgflag = 1;
       else if (strcmp(arg[iarg+1],"no") == 0) control->lgflag = 0;
       else error->all(FLERR,"Illegal pair_style reax/c command");
       iarg += 2;
     } else if (strcmp(arg[iarg],"safezone") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal pair_style reax/c command");
       system->safezone = force->numeric(FLERR,arg[iarg+1]);
       if (system->safezone < 0.0) 
 	error->all(FLERR,"Illegal pair_style reax/c safezone command");
       system->saferzone = system->safezone + 0.2;
       iarg += 2;
     } else if (strcmp(arg[iarg],"mincap") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal pair_style reax/c command");
       system->mincap = force->inumeric(FLERR,arg[iarg+1]);
       if (system->mincap < 0) 
 	error->all(FLERR,"Illegal pair_style reax/c mincap command");
       iarg += 2;
     } else error->all(FLERR,"Illegal pair_style reax/c command");
   }
 
   // LAMMPS is responsible for generating nbrs
 
   control->reneighbor = 1;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairReaxC::coeff( int nargs, char **args )
 {
   if (!allocated) allocate();
 
   if (nargs != 3 + atom->ntypes)
     error->all(FLERR,"Incorrect args for pair coefficients");
 
   // insure I,J args are * *
 
   if (strcmp(args[0],"*") != 0 || strcmp(args[1],"*") != 0)
     error->all(FLERR,"Incorrect args for pair coefficients");
 
   // read ffield file
 
   Read_Force_Field(args[2], &(system->reax_param), control);
 
   // read args that map atom types to elements in potential file
   // map[i] = which element the Ith atom type is, -1 if NULL
 
   int itmp = 0;
   int nreax_types = system->reax_param.num_atom_types;
   for (int i = 3; i < nargs; i++) {
     if (strcmp(args[i],"NULL") == 0) {
       map[i-2] = -1;
       itmp ++;
       continue;
     }
   }
 
   int n = atom->ntypes;
 
   // pair_coeff element map
   for (int i = 3; i < nargs; i++)
     for (int j = 0; j < nreax_types; j++)
       if (strcasecmp(args[i],system->reax_param.sbp[j].name) == 0) {
         map[i-2] = j;
 	itmp ++;
       }
 
   // error check
   if (itmp != n) 
     error->all(FLERR,"Non-existent ReaxFF type");
 
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   // set setflag i,j for type pairs where both are mapped to elements
 
   int count = 0;
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       if (map[i] >= 0 && map[j] >= 0) {
         setflag[i][j] = 1;
         count++;
       }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairReaxC::init_style( )
 {
   if (!atom->q_flag)
     error->all(FLERR,"Pair style reax/c requires atom attribute q");
 
   // firstwarn = 1;
 
   int iqeq;
   for (iqeq = 0; iqeq < modify->nfix; iqeq++)
     if (strcmp(modify->fix[iqeq]->style,"qeq/reax") == 0) break;
   if (iqeq == modify->nfix && qeqflag == 1)
     error->all(FLERR,"Pair reax/c requires use of fix qeq/reax");
 
   system->n = atom->nlocal; // my atoms
   system->N = atom->nlocal + atom->nghost; // mine + ghosts
   system->bigN = static_cast<int> (atom->natoms);  // all atoms in the system
   system->wsize = comm->nprocs;
 
   system->big_box.V = 0;
   system->big_box.box_norms[0] = 0;
   system->big_box.box_norms[1] = 0;
   system->big_box.box_norms[2] = 0;
 
   if (atom->tag_enable == 0)
     error->all(FLERR,"Pair style reax/c requires atom IDs");
   if (force->newton_pair == 0)
     error->all(FLERR,"Pair style reax/c requires newton pair on");
 
   // need a half neighbor list w/ Newton off and ghost neighbors
   // built whenever re-neighboring occurs
 
-  int irequest = neighbor->request(this);
+  int irequest = neighbor->request(this,instance_me);
   neighbor->requests[irequest]->newton = 2;
   neighbor->requests[irequest]->ghost = 1;
 
   cutmax = MAX3(control->nonb_cut, control->hbond_cut, 2*control->bond_cut);
 
   for( int i = 0; i < LIST_N; ++i )
     lists[i].allocated = 0;
 
   if (fix_reax == NULL) {
     char **fixarg = new char*[3];
     fixarg[0] = (char *) "REAXC";
     fixarg[1] = (char *) "all";
     fixarg[2] = (char *) "REAXC";
     modify->add_fix(3,fixarg);
     delete [] fixarg;
     fix_reax = (FixReaxC *) modify->fix[modify->nfix-1];
   }
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairReaxC::setup( )
 {
   int oldN;
   int mincap = system->mincap;
   double safezone = system->safezone;
 
   system->n = atom->nlocal; // my atoms
   system->N = atom->nlocal + atom->nghost; // mine + ghosts
   oldN = system->N;
   system->bigN = static_cast<int> (atom->natoms);  // all atoms in the system
 
   if (setup_flag == 0) {
 
     setup_flag = 1;
 
     int *num_bonds = fix_reax->num_bonds;
     int *num_hbonds = fix_reax->num_hbonds;
 
     control->vlist_cut = neighbor->cutneighmax;
 
     // determine the local and total capacity
 
     system->local_cap = MAX( (int)(system->n * safezone), mincap );
     system->total_cap = MAX( (int)(system->N * safezone), mincap );
 
     // initialize my data structures
 
     PreAllocate_Space( system, control, workspace, world );
     write_reax_atoms();
 
     int num_nbrs = estimate_reax_lists();
     if(!Make_List(system->total_cap, num_nbrs, TYP_FAR_NEIGHBOR,
                   lists+FAR_NBRS, world))
       error->all(FLERR,"Pair reax/c problem in far neighbor list");
 
     write_reax_lists();
     Initialize( system, control, data, workspace, &lists, out_control,
                 mpi_data, world );
     for( int k = 0; k < system->N; ++k ) {
       num_bonds[k] = system->my_atoms[k].num_bonds;
       num_hbonds[k] = system->my_atoms[k].num_hbonds;
     }
 
   } else {
 
     // fill in reax datastructures
 
     write_reax_atoms();
 
     // reset the bond list info for new atoms
 
     for(int k = oldN; k < system->N; ++k)
       Set_End_Index( k, Start_Index( k, lists+BONDS ), lists+BONDS );
 
     // check if I need to shrink/extend my data-structs
 
     ReAllocate( system, control, data, workspace, &lists, mpi_data );
   }
 
   ngroup = 0;
   int ngroup_sum = 0;
   for (int i = 0; i < list->inum; i++) {
     ngroup ++;
   }
   MPI_Allreduce( &ngroup, &ngroup_sum, 1, MPI_INT, MPI_SUM, world );
   ngroup = ngroup_sum;
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairReaxC::init_one(int i, int j)
 {
   if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set");
 
   cutghost[i][j] = cutghost[j][i] = cutmax;
   return cutmax;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairReaxC::compute(int eflag, int vflag)
 {
   double evdwl,ecoul;
   double t_start, t_end;
 
   // communicate num_bonds once every reneighboring
   // 2 num arrays stored by fix, grab ptr to them
 
   if (neighbor->ago == 0) comm->forward_comm_fix(fix_reax);
   int *num_bonds = fix_reax->num_bonds;
   int *num_hbonds = fix_reax->num_hbonds;
 
   evdwl = ecoul = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else ev_unset();
 
   if (vflag_global) control->virial = 1;
   else control->virial = 0;
 
   system->n = atom->nlocal; // my atoms
   system->N = atom->nlocal + atom->nghost; // mine + ghosts
   system->bigN = static_cast<int> (atom->natoms);  // all atoms in the system
 
   system->big_box.V = 0;
   system->big_box.box_norms[0] = 0;
   system->big_box.box_norms[1] = 0;
   system->big_box.box_norms[2] = 0;
   if( comm->me == 0 ) t_start = MPI_Wtime();
 
   // setup data structures
 
   setup();
 
   Reset( system, control, data, workspace, &lists, world );
   workspace->realloc.num_far = write_reax_lists();
   // timing for filling in the reax lists
   if( comm->me == 0 ) {
     t_end = MPI_Wtime();
     data->timing.nbrs = t_end - t_start;
   }
 
   // forces
 
   Compute_Forces(system,control,data,workspace,&lists,out_control,mpi_data);
   read_reax_forces(vflag);
 
   for(int k = 0; k < system->N; ++k) {
     num_bonds[k] = system->my_atoms[k].num_bonds;
     num_hbonds[k] = system->my_atoms[k].num_hbonds;
   }
 
   // energies and pressure
 
   if (eflag_global) {
     evdwl += data->my_en.e_bond;
     evdwl += data->my_en.e_ov;
     evdwl += data->my_en.e_un;
     evdwl += data->my_en.e_lp;
     evdwl += data->my_en.e_ang;
     evdwl += data->my_en.e_pen;
     evdwl += data->my_en.e_coa;
     evdwl += data->my_en.e_hb;
     evdwl += data->my_en.e_tor;
     evdwl += data->my_en.e_con;
     evdwl += data->my_en.e_vdW;
 
     ecoul += data->my_en.e_ele;
     ecoul += data->my_en.e_pol;
 
     // eng_vdwl += evdwl;
     // eng_coul += ecoul;
 
     // Store the different parts of the energy
     // in a list for output by compute pair command
 
     pvector[0] = data->my_en.e_bond;
     pvector[1] = data->my_en.e_ov + data->my_en.e_un;
     pvector[2] = data->my_en.e_lp;
     pvector[3] = 0.0;
     pvector[4] = data->my_en.e_ang;
     pvector[5] = data->my_en.e_pen;
     pvector[6] = data->my_en.e_coa;
     pvector[7] = data->my_en.e_hb;
     pvector[8] = data->my_en.e_tor;
     pvector[9] = data->my_en.e_con;
     pvector[10] = data->my_en.e_vdW;
     pvector[11] = data->my_en.e_ele;
     pvector[12] = 0.0;
     pvector[13] = data->my_en.e_pol;
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
 
 // Set internal timestep counter to that of LAMMPS
 
   data->step = update->ntimestep;
 
   Output_Results( system, control, data, &lists, out_control, mpi_data );
 
   // populate tmpid and tmpbo arrays for fix reax/c/species
   int i, j;
 
   if(fixspecies_flag) {
     if (system->N > nmax) {
       memory->destroy(tmpid);
       memory->destroy(tmpbo);
       nmax = system->N;
       memory->create(tmpid,nmax,MAXSPECBOND,"pair:tmpid");
       memory->create(tmpbo,nmax,MAXSPECBOND,"pair:tmpbo");
     }
    
     for (i = 0; i < system->N; i ++)
       for (j = 0; j < MAXSPECBOND; j ++) {
         tmpbo[i][j] = 0.0;
 	tmpid[i][j] = 0;
       }
     FindBond();
   }
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairReaxC::write_reax_atoms()
 {
   int *num_bonds = fix_reax->num_bonds;
   int *num_hbonds = fix_reax->num_hbonds;
 
   if (system->N > system->total_cap)
     error->all(FLERR,"Too many ghost atoms");
 
   for( int i = 0; i < system->N; ++i ){
     system->my_atoms[i].orig_id = atom->tag[i];
     system->my_atoms[i].type = map[atom->type[i]];
     system->my_atoms[i].x[0] = atom->x[i][0];
     system->my_atoms[i].x[1] = atom->x[i][1];
     system->my_atoms[i].x[2] = atom->x[i][2];
     system->my_atoms[i].q = atom->q[i];
     system->my_atoms[i].num_bonds = num_bonds[i];
     system->my_atoms[i].num_hbonds = num_hbonds[i];
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairReaxC::get_distance( rvec xj, rvec xi, double *d_sqr, rvec *dvec )
 {
   (*dvec)[0] = xj[0] - xi[0];
   (*dvec)[1] = xj[1] - xi[1];
   (*dvec)[2] = xj[2] - xi[2];
   *d_sqr = SQR((*dvec)[0]) + SQR((*dvec)[1]) + SQR((*dvec)[2]);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairReaxC::set_far_nbr( far_neighbor_data *fdest,
                               int j, double d, rvec dvec )
 {
   fdest->nbr = j;
   fdest->d = d;
   rvec_Copy( fdest->dvec, dvec );
   ivec_MakeZero( fdest->rel_box );
 }
 
 /* ---------------------------------------------------------------------- */
 
 int PairReaxC::estimate_reax_lists()
 {
   int itr_i, itr_j, i, j;
   int num_nbrs, num_marked;
   int *ilist, *jlist, *numneigh, **firstneigh, *marked;
   double d_sqr;
   rvec dvec;
   double **x;
 
   int mincap = system->mincap;
   double safezone = system->safezone;
 
   x = atom->x;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   num_nbrs = 0;
   num_marked = 0;
   marked = (int*) calloc( system->N, sizeof(int) );
 
   int numall = list->inum + list->gnum;
 
   for( itr_i = 0; itr_i < numall; ++itr_i ){
     i = ilist[itr_i];
     marked[i] = 1;
     ++num_marked;
     jlist = firstneigh[i];
 
     for( itr_j = 0; itr_j < numneigh[i]; ++itr_j ){
       j = jlist[itr_j];
       j &= NEIGHMASK;
       get_distance( x[j], x[i], &d_sqr, &dvec );
 
       if( d_sqr <= SQR(control->nonb_cut) )
         ++num_nbrs;
     }
   }
 
   free( marked );
 
   return static_cast<int> (MAX( num_nbrs*safezone, mincap*MIN_NBRS ));
 }
 
 /* ---------------------------------------------------------------------- */
 
 int PairReaxC::write_reax_lists()
 {
   int itr_i, itr_j, i, j;
   int num_nbrs;
   int *ilist, *jlist, *numneigh, **firstneigh;
   double d_sqr;
   rvec dvec;
   double *dist, **x;
   reax_list *far_nbrs;
   far_neighbor_data *far_list;
 
   x = atom->x;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   far_nbrs = lists + FAR_NBRS;
   far_list = far_nbrs->select.far_nbr_list;
 
   num_nbrs = 0;
   dist = (double*) calloc( system->N, sizeof(double) );
 
   int numall = list->inum + list->gnum;
 
   for( itr_i = 0; itr_i < numall; ++itr_i ){
     i = ilist[itr_i];
     jlist = firstneigh[i];
     Set_Start_Index( i, num_nbrs, far_nbrs );
 
     for( itr_j = 0; itr_j < numneigh[i]; ++itr_j ){
       j = jlist[itr_j];
       j &= NEIGHMASK;
       get_distance( x[j], x[i], &d_sqr, &dvec );
 
       if( d_sqr <= (control->nonb_cut*control->nonb_cut) ){
         dist[j] = sqrt( d_sqr );
         set_far_nbr( &far_list[num_nbrs], j, dist[j], dvec );
         ++num_nbrs;
       }
     }
     Set_End_Index( i, num_nbrs, far_nbrs );
   }
 
   free( dist );
 
   return num_nbrs;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairReaxC::read_reax_forces(int vflag)
 {
   for( int i = 0; i < system->N; ++i ) {
     system->my_atoms[i].f[0] = workspace->f[i][0];
     system->my_atoms[i].f[1] = workspace->f[i][1];
     system->my_atoms[i].f[2] = workspace->f[i][2];
 
     atom->f[i][0] += -workspace->f[i][0];
     atom->f[i][1] += -workspace->f[i][1];
     atom->f[i][2] += -workspace->f[i][2];
   }
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 void *PairReaxC::extract(const char *str, int &dim)
 {
   dim = 1;
   if (strcmp(str,"chi") == 0 && chi) {
     for (int i = 1; i <= atom->ntypes; i++)
       if (map[i] >= 0) chi[i] = system->reax_param.sbp[map[i]].chi;
       else chi[i] = 0.0;
     return (void *) chi;
   }
   if (strcmp(str,"eta") == 0 && eta) {
     for (int i = 1; i <= atom->ntypes; i++)
       if (map[i] >= 0) eta[i] = system->reax_param.sbp[map[i]].eta;
       else eta[i] = 0.0;
     return (void *) eta;
   }
   if (strcmp(str,"gamma") == 0 && gamma) {
     for (int i = 1; i <= atom->ntypes; i++)
       if (map[i] >= 0) gamma[i] = system->reax_param.sbp[map[i]].gamma;
       else gamma[i] = 0.0;
     return (void *) gamma;
   }
   return NULL;
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairReaxC::memory_usage()
 {
   double bytes = 0.0;
   
   // From pair_reax_c
   bytes += 1.0 * system->N * sizeof(int);
   bytes += 1.0 * system->N * sizeof(double);
 
   // From reaxc_allocate: BO
   bytes += 1.0 * system->total_cap * sizeof(reax_atom);
   bytes += 19.0 * system->total_cap * sizeof(real);
   bytes += 3.0 * system->total_cap * sizeof(int);
 
   // From reaxc_lists
   bytes += 2.0 * lists->n * sizeof(int);
   bytes += lists->num_intrs * sizeof(three_body_interaction_data);
   bytes += lists->num_intrs * sizeof(bond_data);
   bytes += lists->num_intrs * sizeof(dbond_data);
   bytes += lists->num_intrs * sizeof(dDelta_data);
   bytes += lists->num_intrs * sizeof(far_neighbor_data);
   bytes += lists->num_intrs * sizeof(hbond_data);
 
   if(fixspecies_flag)
     bytes += 2 * nmax * MAXSPECBOND * sizeof(double);
 
   return bytes;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairReaxC::FindBond()
 {
   int i, j, pj, nj;
   double bo_tmp, bo_cut;
 
   bond_data *bo_ij;
   bo_cut = 0.10;
 
   for (i = 0; i < system->n; i++) {
     nj = 0;
     for( pj = Start_Index(i, lists); pj < End_Index(i, lists); ++pj ) {
       bo_ij = &( lists->select.bond_list[pj] );
       j = bo_ij->nbr;
       if (j < i) continue;
 
       bo_tmp = bo_ij->bo_data.BO;
 
       if (bo_tmp >= bo_cut ) {
 	tmpid[i][nj] = j;
 	tmpbo[i][nj] = bo_tmp;
 	nj ++;
 	if (nj > MAXSPECBOND) error->all(FLERR,"Increase MAXSPECBOND in reaxc_defs.h");
       }
     }
   }
 }
diff --git a/src/USER-SPH/pair_sph_rhosum.cpp b/src/USER-SPH/pair_sph_rhosum.cpp
index c4b4a8e11..db0749b2a 100644
--- a/src/USER-SPH/pair_sph_rhosum.cpp
+++ b/src/USER-SPH/pair_sph_rhosum.cpp
@@ -1,313 +1,313 @@
 /* ----------------------------------------------------------------------
  LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
  http://lammps.sandia.gov, Sandia National Laboratories
  Steve Plimpton, sjplimp@sandia.gov
 
  Copyright (2003) Sandia Corporation.  Under the terms of Contract
  DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
  certain rights in this software.  This software is distributed under
  the GNU General Public License.
 
  See the README file in the top-level LAMMPS directory.
  ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdlib.h"
 #include "pair_sph_rhosum.h"
 #include "atom.h"
 #include "force.h"
 #include "comm.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "memory.h"
 #include "error.h"
 #include "neighbor.h"
 #include "update.h"
 #include "domain.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
 PairSPHRhoSum::PairSPHRhoSum(LAMMPS *lmp) : Pair(lmp)
 {
   restartinfo = 0;
 
   // set comm size needed by this Pair
 
   comm_forward = 1;
   first = 1;
 }
 
 /* ---------------------------------------------------------------------- */
 
 PairSPHRhoSum::~PairSPHRhoSum() {
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
 
     memory->destroy(cut);
   }
 }
 
 /* ----------------------------------------------------------------------
  init specific to this pair style
  ------------------------------------------------------------------------- */
 
 void PairSPHRhoSum::init_style() {
   // need a full neighbor list
-  int irequest = neighbor->request(this);
+  int irequest = neighbor->request(this,instance_me);
   neighbor->requests[irequest]->half = 0;
   neighbor->requests[irequest]->full = 1;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairSPHRhoSum::compute(int eflag, int vflag) {
   int i, j, ii, jj, jnum, itype, jtype;
   double xtmp, ytmp, ztmp, delx, dely, delz;
   double rsq, imass, h, ih, ihsq;
   int *jlist;
   double wf;
   // neighbor list variables
   int inum, *ilist, *numneigh, **firstneigh;
 
   if (eflag || vflag)
     ev_setup(eflag, vflag);
   else
     evflag = vflag_fdotr = 0;
 
   double **x = atom->x;
   double *rho = atom->rho;
   int *type = atom->type;
   double *mass = atom->mass;
 
   // check consistency of pair coefficients
 
   if (first) {
     for (i = 1; i <= atom->ntypes; i++) {
       for (j = 1; i <= atom->ntypes; i++) {
         if (cutsq[i][j] > 0.0) {
           if (!setflag[i][i] || !setflag[j][j]) {
             if (comm->me == 0) {
               printf(
                   "SPH particle types %d and %d interact, but not all of their single particle properties are set.\n",
                   i, j);
             }
           }
         }
       }
     }
     first = 0;
   }
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // recompute density
   // we use a full neighborlist here
 
   if (nstep != 0) {
     if ((update->ntimestep % nstep) == 0) {
 
       // initialize density with self-contribution,
       for (ii = 0; ii < inum; ii++) {
         i = ilist[ii];
         itype = type[i];
         imass = mass[itype];
 
         h = cut[itype][itype];
         if (domain->dimension == 3) {
           /*
           // Lucy kernel, 3d
           wf = 2.0889086280811262819e0 / (h * h * h);
           */
 
           // quadric kernel, 3d
           wf = 2.1541870227086614782 / (h * h * h);
         } else {
           /*
           // Lucy kernel, 2d
           wf = 1.5915494309189533576e0 / (h * h);
           */
 
           // quadric kernel, 2d
           wf = 1.5915494309189533576e0 / (h * h);
         }
 
         rho[i] = imass * wf;
       }
 
       // add density at each atom via kernel function overlap
       for (ii = 0; ii < inum; ii++) {
         i = ilist[ii];
         xtmp = x[i][0];
         ytmp = x[i][1];
         ztmp = x[i][2];
         itype = type[i];
         jlist = firstneigh[i];
         jnum = numneigh[i];
 
         for (jj = 0; jj < jnum; jj++) {
           j = jlist[jj];
           j &= NEIGHMASK;
 
           jtype = type[j];
           delx = xtmp - x[j][0];
           dely = ytmp - x[j][1];
           delz = ztmp - x[j][2];
           rsq = delx * delx + dely * dely + delz * delz;
 
           if (rsq < cutsq[itype][jtype]) {
             h = cut[itype][jtype];
             ih = 1.0 / h;
             ihsq = ih * ih;
 
             if (domain->dimension == 3) {
               /*
               // Lucy kernel, 3d
               r = sqrt(rsq);
               wf = (h - r) * ihsq;
               wf =  2.0889086280811262819e0 * (h + 3. * r) * wf * wf * wf * ih;
               */
 
               // quadric kernel, 3d
               wf = 1.0 - rsq * ihsq;
               wf = wf * wf;
               wf = wf * wf;
               wf = 2.1541870227086614782e0 * wf * ihsq * ih;
             } else {
               // Lucy kernel, 2d
               //r = sqrt(rsq);
               //wf = (h - r) * ihsq;
               //wf = 1.5915494309189533576e0 * (h + 3. * r) * wf * wf * wf;
 
               // quadric kernel, 2d
               wf = 1.0 - rsq * ihsq;
               wf = wf * wf;
               wf = wf * wf;
               wf = 1.5915494309189533576e0 * wf * ihsq;
             }
 
             rho[i] += mass[jtype] * wf;
           }
 
         }
       }
     }
   }
 
   // communicate densities
   comm->forward_comm_pair(this);
 }
 
 /* ----------------------------------------------------------------------
  allocate all arrays
  ------------------------------------------------------------------------- */
 
 void PairSPHRhoSum::allocate() {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag, n + 1, n + 1, "pair:setflag");
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   memory->create(cutsq, n + 1, n + 1, "pair:cutsq");
 
   memory->create(cut, n + 1, n + 1, "pair:cut");
 }
 
 /* ----------------------------------------------------------------------
  global settings
  ------------------------------------------------------------------------- */
 
 void PairSPHRhoSum::settings(int narg, char **arg) {
   if (narg != 1)
     error->all(FLERR,
         "Illegal number of setting arguments for pair_style sph/rhosum");
   nstep = force->inumeric(FLERR,arg[0]);
 }
 
 /* ----------------------------------------------------------------------
  set coeffs for one or more type pairs
  ------------------------------------------------------------------------- */
 
 void PairSPHRhoSum::coeff(int narg, char **arg) {
   if (narg != 3)
     error->all(FLERR,"Incorrect number of args for sph/rhosum coefficients");
   if (!allocated)
     allocate();
 
   int ilo, ihi, jlo, jhi;
   force->bounds(arg[0], atom->ntypes, ilo, ihi);
   force->bounds(arg[1], atom->ntypes, jlo, jhi);
 
   double cut_one = force->numeric(FLERR,arg[2]);
 
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
     for (int j = MAX(jlo,i); j <= jhi; j++) {
       //printf("setting cut[%d][%d] = %f\n", i, j, cut_one);
       cut[i][j] = cut_one;
       setflag[i][j] = 1;
       count++;
     }
   }
 
   if (count == 0)
     error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
  init for one type pair i,j and corresponding j,i
  ------------------------------------------------------------------------- */
 
 double PairSPHRhoSum::init_one(int i, int j) {
   if (setflag[i][j] == 0) {
     error->all(FLERR,"All pair sph/rhosum coeffs are not set");
   }
 
   cut[j][i] = cut[i][j];
 
   return cut[i][j];
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairSPHRhoSum::single(int i, int j, int itype, int jtype, double rsq,
     double factor_coul, double factor_lj, double &fforce) {
   fforce = 0.0;
 
   return 0.0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 int PairSPHRhoSum::pack_forward_comm(int n, int *list, double *buf, 
                                      int pbc_flag, int *pbc) {
   int i, j, m;
   double *rho = atom->rho;
 
   m = 0;
   for (i = 0; i < n; i++) {
     j = list[i];
     buf[m++] = rho[j];
   }
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairSPHRhoSum::unpack_forward_comm(int n, int first, double *buf) {
   int i, m, last;
   double *rho = atom->rho;
 
   m = 0;
   last = first + n;
   for (i = first; i < last; i++)
     rho[i] = buf[m++];
 }
diff --git a/src/compute_centro_atom.cpp b/src/compute_centro_atom.cpp
index a7f3e9fef..0676ad0a4 100644
--- a/src/compute_centro_atom.cpp
+++ b/src/compute_centro_atom.cpp
@@ -1,333 +1,333 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Michel Perez (U Lyon) for non-fcc lattices
 ------------------------------------------------------------------------- */
 
 #include "string.h"
 #include "stdlib.h"
 #include "compute_centro_atom.h"
 #include "atom.h"
 #include "update.h"
 #include "modify.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "force.h"
 #include "pair.h"
 #include "comm.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
 ComputeCentroAtom::ComputeCentroAtom(LAMMPS *lmp, int narg, char **arg) :
   Compute(lmp, narg, arg)
 {
   if (narg != 4) error->all(FLERR,"Illegal compute centro/atom command");
 
   if (strcmp(arg[3],"fcc") == 0) nnn = 12;
   else if (strcmp(arg[3],"bcc") == 0) nnn = 8;
   else nnn = force->inumeric(FLERR,arg[3]);
 
   if (nnn <= 0 || nnn % 2)
     error->all(FLERR,"Illegal neighbor value for compute centro/atom command");
 
   peratom_flag = 1;
   size_peratom_cols = 0;
 
   nmax = 0;
   centro = NULL;
   maxneigh = 0;
   distsq = NULL;
   nearest = NULL;
 }
 
 /* ---------------------------------------------------------------------- */
 
 ComputeCentroAtom::~ComputeCentroAtom()
 {
   memory->destroy(centro);
   memory->destroy(distsq);
   memory->destroy(nearest);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputeCentroAtom::init()
 {
   if (force->pair == NULL)
     error->all(FLERR,"Compute centro/atom requires a pair style be defined");
 
   int count = 0;
   for (int i = 0; i < modify->ncompute; i++)
     if (strcmp(modify->compute[i]->style,"centro/atom") == 0) count++;
   if (count > 1 && comm->me == 0)
     error->warning(FLERR,"More than one compute centro/atom");
 
   // need an occasional full neighbor list
 
-  int irequest = neighbor->request(this);
+  int irequest = neighbor->request(this,instance_me);
   neighbor->requests[irequest]->pair = 0;
   neighbor->requests[irequest]->compute = 1;
   neighbor->requests[irequest]->half = 0;
   neighbor->requests[irequest]->full = 1;
   neighbor->requests[irequest]->occasional = 1;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputeCentroAtom::init_list(int id, NeighList *ptr)
 {
   list = ptr;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputeCentroAtom::compute_peratom()
 {
   int i,j,k,ii,jj,kk,n,inum,jnum;
   double xtmp,ytmp,ztmp,delx,dely,delz,rsq,value;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   invoked_peratom = update->ntimestep;
 
   // grow centro array if necessary
 
   if (atom->nlocal > nmax) {
     memory->destroy(centro);
     nmax = atom->nmax;
     memory->create(centro,nmax,"centro/atom:centro");
     vector_atom = centro;
   }
 
   // invoke full neighbor list (will copy or build if necessary)
 
   neighbor->build_one(list);
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // npairs = number of unique pairs
 
   int nhalf = nnn/2;
   int npairs = nnn * (nnn-1) / 2;
   double *pairs = new double[npairs];
 
   // compute centro-symmetry parameter for each atom in group
   // use full neighbor list
 
   double **x = atom->x;
   int *mask = atom->mask;
   double cutsq = force->pair->cutforce * force->pair->cutforce;
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     if (mask[i] & groupbit) {
       xtmp = x[i][0];
       ytmp = x[i][1];
       ztmp = x[i][2];
       jlist = firstneigh[i];
       jnum = numneigh[i];
 
       // insure distsq and nearest arrays are long enough
 
       if (jnum > maxneigh) {
         memory->destroy(distsq);
         memory->destroy(nearest);
         maxneigh = jnum;
         memory->create(distsq,maxneigh,"centro/atom:distsq");
         memory->create(nearest,maxneigh,"centro/atom:nearest");
       }
 
       // loop over list of all neighbors within force cutoff
       // distsq[] = distance sq to each
       // nearest[] = atom indices of neighbors
 
       n = 0;
       for (jj = 0; jj < jnum; jj++) {
         j = jlist[jj];
         j &= NEIGHMASK;
 
         delx = xtmp - x[j][0];
         dely = ytmp - x[j][1];
         delz = ztmp - x[j][2];
         rsq = delx*delx + dely*dely + delz*delz;
         if (rsq < cutsq) {
           distsq[n] = rsq;
           nearest[n++] = j;
         }
       }
 
       // if not nnn neighbors, centro = 0.0
 
       if (n < nnn) {
         centro[i] = 0.0;
         continue;
       }
 
       // store nnn nearest neighs in 1st nnn locations of distsq and nearest
 
       select2(nnn,n,distsq,nearest);
 
       // R = Ri + Rj for each of npairs i,j pairs among nnn neighbors
       // pairs = squared length of each R
 
       n = 0;
       for (j = 0; j < nnn; j++) {
         jj = nearest[j];
         for (k = j+1; k < nnn; k++) {
           kk = nearest[k];
           delx = x[jj][0] + x[kk][0] - 2.0*xtmp;
           dely = x[jj][1] + x[kk][1] - 2.0*ytmp;
           delz = x[jj][2] + x[kk][2] - 2.0*ztmp;
           pairs[n++] = delx*delx + dely*dely + delz*delz;
         }
       }
 
       // store nhalf smallest pair distances in 1st nhalf locations of pairs
 
       select(nhalf,npairs,pairs);
 
       // centrosymmetry = sum of nhalf smallest squared values
 
       value = 0.0;
       for (j = 0; j < nhalf; j++) value += pairs[j];
       centro[i] = value;
     } else centro[i] = 0.0;
   }
 
   delete [] pairs;
 }
 
 /* ----------------------------------------------------------------------
    2 select routines from Numerical Recipes (slightly modified)
    find k smallest values in array of length n
    2nd routine sorts auxiliary array at same time
 ------------------------------------------------------------------------- */
 
 #define SWAP(a,b)   tmp = a; a = b; b = tmp;
 #define ISWAP(a,b) itmp = a; a = b; b = itmp;
 
 void ComputeCentroAtom::select(int k, int n, double *arr)
 {
   int i,ir,j,l,mid;
   double a,tmp;
 
   arr--;
   l = 1;
   ir = n;
   for (;;) {
     if (ir <= l+1) {
       if (ir == l+1 && arr[ir] < arr[l]) {
         SWAP(arr[l],arr[ir])
       }
       return;
     } else {
       mid=(l+ir) >> 1;
       SWAP(arr[mid],arr[l+1])
       if (arr[l] > arr[ir]) {
         SWAP(arr[l],arr[ir])
       }
       if (arr[l+1] > arr[ir]) {
         SWAP(arr[l+1],arr[ir])
       }
       if (arr[l] > arr[l+1]) {
         SWAP(arr[l],arr[l+1])
       }
       i = l+1;
       j = ir;
       a = arr[l+1];
       for (;;) {
         do i++; while (arr[i] < a);
         do j--; while (arr[j] > a);
         if (j < i) break;
         SWAP(arr[i],arr[j])
       }
       arr[l+1] = arr[j];
       arr[j] = a;
       if (j >= k) ir = j-1;
       if (j <= k) l = i;
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputeCentroAtom::select2(int k, int n, double *arr, int *iarr)
 {
   int i,ir,j,l,mid,ia,itmp;
   double a,tmp;
 
   arr--;
   iarr--;
   l = 1;
   ir = n;
   for (;;) {
     if (ir <= l+1) {
       if (ir == l+1 && arr[ir] < arr[l]) {
         SWAP(arr[l],arr[ir])
         ISWAP(iarr[l],iarr[ir])
       }
       return;
     } else {
       mid=(l+ir) >> 1;
       SWAP(arr[mid],arr[l+1])
       ISWAP(iarr[mid],iarr[l+1])
       if (arr[l] > arr[ir]) {
         SWAP(arr[l],arr[ir])
         ISWAP(iarr[l],iarr[ir])
       }
       if (arr[l+1] > arr[ir]) {
         SWAP(arr[l+1],arr[ir])
         ISWAP(iarr[l+1],iarr[ir])
       }
       if (arr[l] > arr[l+1]) {
         SWAP(arr[l],arr[l+1])
         ISWAP(iarr[l],iarr[l+1])
       }
       i = l+1;
       j = ir;
       a = arr[l+1];
       ia = iarr[l+1];
       for (;;) {
         do i++; while (arr[i] < a);
         do j--; while (arr[j] > a);
         if (j < i) break;
         SWAP(arr[i],arr[j])
         ISWAP(iarr[i],iarr[j])
       }
       arr[l+1] = arr[j];
       arr[j] = a;
       iarr[l+1] = iarr[j];
       iarr[j] = ia;
       if (j >= k) ir = j-1;
       if (j <= k) l = i;
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    memory usage of local atom-based array
 ------------------------------------------------------------------------- */
 
 double ComputeCentroAtom::memory_usage()
 {
   double bytes = nmax * sizeof(double);
   return bytes;
 }
diff --git a/src/compute_cluster_atom.cpp b/src/compute_cluster_atom.cpp
index 993a7dee7..343a0dd1b 100644
--- a/src/compute_cluster_atom.cpp
+++ b/src/compute_cluster_atom.cpp
@@ -1,244 +1,244 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "string.h"
 #include "stdlib.h"
 #include "compute_cluster_atom.h"
 #include "atom.h"
 #include "update.h"
 #include "modify.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "force.h"
 #include "pair.h"
 #include "comm.h"
 #include "memory.h"
 #include "error.h"
 
 #include "group.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
 ComputeClusterAtom::ComputeClusterAtom(LAMMPS *lmp, int narg, char **arg) :
   Compute(lmp, narg, arg)
 {
   if (narg != 4) error->all(FLERR,"Illegal compute cluster/atom command");
 
   double cutoff = force->numeric(FLERR,arg[3]);
   cutsq = cutoff*cutoff;
 
   peratom_flag = 1;
   size_peratom_cols = 0;
   comm_forward = 1;
 
   nmax = 0;
   clusterID = NULL;
 }
 
 /* ---------------------------------------------------------------------- */
 
 ComputeClusterAtom::~ComputeClusterAtom()
 {
   memory->destroy(clusterID);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputeClusterAtom::init()
 {
   if (atom->tag_enable == 0)
     error->all(FLERR,"Cannot use compute cluster/atom unless atoms have IDs");
   if (force->pair == NULL)
     error->all(FLERR,"Compute cluster/atom requires a pair style be defined");
   if (sqrt(cutsq) > force->pair->cutforce)
     error->all(FLERR,
                "Compute cluster/atom cutoff is longer than pairwise cutoff");
 
   // need an occasional full neighbor list
   // full required so that pair of atoms on 2 procs both set their clusterID
 
-  int irequest = neighbor->request(this);
+  int irequest = neighbor->request(this,instance_me);
   neighbor->requests[irequest]->pair = 0;
   neighbor->requests[irequest]->compute = 1;
   neighbor->requests[irequest]->half = 0;
   neighbor->requests[irequest]->full = 1;
   neighbor->requests[irequest]->occasional = 1;
 
   int count = 0;
   for (int i = 0; i < modify->ncompute; i++)
     if (strcmp(modify->compute[i]->style,"cluster/atom") == 0) count++;
   if (count > 1 && comm->me == 0)
     error->warning(FLERR,"More than one compute cluster/atom");
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputeClusterAtom::init_list(int id, NeighList *ptr)
 {
   list = ptr;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputeClusterAtom::compute_peratom()
 {
   int i,j,ii,jj,inum,jnum;
   double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   invoked_peratom = update->ntimestep;
 
   // grow clusterID array if necessary
 
   if (atom->nlocal+atom->nghost > nmax) {
     memory->destroy(clusterID);
     nmax = atom->nmax;
     memory->create(clusterID,nmax,"cluster/atom:clusterID");
     vector_atom = clusterID;
   }
 
   // invoke full neighbor list (will copy or build if necessary)
 
   neighbor->build_one(list);
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // if group is dynamic, insure ghost atom masks are current
 
   if (group->dynamic[igroup]) {
     commflag = 0;
     comm->forward_comm_compute(this);
   }
 
   // every atom starts in its own cluster, with clusterID = atomID
 
   tagint *tag = atom->tag;
   int *mask = atom->mask;
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     if (mask[i] & groupbit) clusterID[i] = tag[i];
     else clusterID[i] = 0;
   }
 
   // loop until no more changes on any proc:
   // acquire clusterIDs of ghost atoms
   // loop over my atoms, checking distance to neighbors
   // if both atoms are in cluster, assign lowest clusterID to both
   // iterate until no changes in my atoms
   // then check if any proc made changes
 
   commflag = 1;
   double **x = atom->x;
 
   int change,done,anychange;
 
   while (1) {
     comm->forward_comm_compute(this);
 
     change = 0;
     while (1) {
       done = 1;
       for (ii = 0; ii < inum; ii++) {
         i = ilist[ii];
         if (!(mask[i] & groupbit)) continue;
 
         xtmp = x[i][0];
         ytmp = x[i][1];
         ztmp = x[i][2];
         jlist = firstneigh[i];
         jnum = numneigh[i];
 
         for (jj = 0; jj < jnum; jj++) {
           j = jlist[jj];
           j &= NEIGHMASK;
           if (!(mask[j] & groupbit)) continue;
           if (clusterID[i] == clusterID[j]) continue;
 
           delx = xtmp - x[j][0];
           dely = ytmp - x[j][1];
           delz = ztmp - x[j][2];
           rsq = delx*delx + dely*dely + delz*delz;
           if (rsq < cutsq) {
             clusterID[i] = clusterID[j] = MIN(clusterID[i],clusterID[j]);
             done = 0;
           }
         }
       }
       if (!done) change = 1;
       if (done) break;
     }
 
     // stop if all procs are done
 
     MPI_Allreduce(&change,&anychange,1,MPI_INT,MPI_MAX,world);
     if (!anychange) break;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 int ComputeClusterAtom::pack_forward_comm(int n, int *list, double *buf,
                                           int pbc_flag, int *pbc)
 {
   int i,j,m;
 
   m = 0;
   if (commflag) {
     for (i = 0; i < n; i++) {
       j = list[i];
       buf[m++] = clusterID[j];
     }
   } else {
     int *mask = atom->mask;
     for (i = 0; i < n; i++) {
       j = list[i];
       buf[m++] = ubuf(mask[j]).d;
     }
   }
 
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputeClusterAtom::unpack_forward_comm(int n, int first, double *buf)
 {
   int i,m,last;
 
   m = 0;
   last = first + n;
   if (commflag)
     for (i = first; i < last; i++) clusterID[i] = buf[m++];
   else {
     int *mask = atom->mask;
     for (i = first; i < last; i++) mask[i] = (int) ubuf(buf[m++]).i;
   }
 }
 
 /* ----------------------------------------------------------------------
    memory usage of local atom-based array
 ------------------------------------------------------------------------- */
 
 double ComputeClusterAtom::memory_usage()
 {
   double bytes = nmax * sizeof(double);
   return bytes;
 }
diff --git a/src/compute_cna_atom.cpp b/src/compute_cna_atom.cpp
index 405f17b02..2bf33fec3 100644
--- a/src/compute_cna_atom.cpp
+++ b/src/compute_cna_atom.cpp
@@ -1,367 +1,367 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Wan Liang (Chinese Academy of Sciences)
 ------------------------------------------------------------------------- */
 
 #include "string.h"
 #include "stdlib.h"
 #include "compute_cna_atom.h"
 #include "atom.h"
 #include "update.h"
 #include "force.h"
 #include "pair.h"
 #include "modify.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "comm.h"
 #include "memory.h"
 #include "error.h"
 #include "math.h"
 
 using namespace LAMMPS_NS;
 
 #define MAXNEAR 16
 #define MAXCOMMON 8
 
 enum{UNKNOWN,FCC,HCP,BCC,ICOS,OTHER};
 enum{NCOMMON,NBOND,MAXBOND,MINBOND};
 
 /* ---------------------------------------------------------------------- */
 
 ComputeCNAAtom::ComputeCNAAtom(LAMMPS *lmp, int narg, char **arg) :
   Compute(lmp, narg, arg)
 {
   if (narg != 4) error->all(FLERR,"Illegal compute cna/atom command");
 
   peratom_flag = 1;
   size_peratom_cols = 0;
 
   double cutoff = force->numeric(FLERR,arg[3]);
   if (cutoff < 0.0) error->all(FLERR,"Illegal compute cna/atom command");
   cutsq = cutoff*cutoff;
 
   nmax = 0;
   nearest = NULL;
   nnearest = NULL;
   pattern = NULL;
 }
 
 /* ---------------------------------------------------------------------- */
 
 ComputeCNAAtom::~ComputeCNAAtom()
 {
   memory->destroy(nearest);
   memory->destroy(nnearest);
   memory->destroy(pattern);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputeCNAAtom::init()
 {
   if (force->pair == NULL)
     error->all(FLERR,"Compute cna/atom requires a pair style be defined");
   if (sqrt(cutsq) > force->pair->cutforce)
     error->all(FLERR,"Compute cna/atom cutoff is longer than pairwise cutoff");
 
   // cannot use neighbor->cutneighmax b/c neighbor has not yet been init
 
   if (2.0*sqrt(cutsq) > force->pair->cutforce + neighbor->skin &&
       comm->me == 0)
     error->warning(FLERR,"Compute cna/atom cutoff may be too large to find "
                    "ghost atom neighbors");
 
   int count = 0;
   for (int i = 0; i < modify->ncompute; i++)
     if (strcmp(modify->compute[i]->style,"cna/atom") == 0) count++;
   if (count > 1 && comm->me == 0)
     error->warning(FLERR,"More than one compute cna/atom defined");
 
   // need an occasional full neighbor list
 
-  int irequest = neighbor->request(this);
+  int irequest = neighbor->request(this,instance_me);
   neighbor->requests[irequest]->pair = 0;
   neighbor->requests[irequest]->compute = 1;
   neighbor->requests[irequest]->half = 0;
   neighbor->requests[irequest]->full = 1;
   neighbor->requests[irequest]->occasional = 1;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputeCNAAtom::init_list(int id, NeighList *ptr)
 {
   list = ptr;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputeCNAAtom::compute_peratom()
 {
   int i,j,k,ii,jj,kk,m,n,inum,jnum,inear,jnear;
   int firstflag,ncommon,nbonds,maxbonds,minbonds;
   int nfcc,nhcp,nbcc4,nbcc6,nico,cj,ck,cl,cm;
   int *ilist,*jlist,*numneigh,**firstneigh;
   int cna[MAXNEAR][4],onenearest[MAXNEAR];
   int common[MAXCOMMON],bonds[MAXCOMMON];
   double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
 
   invoked_peratom = update->ntimestep;
 
   // grow arrays if necessary
 
   if (atom->nlocal > nmax) {
     memory->destroy(nearest);
     memory->destroy(nnearest);
     memory->destroy(pattern);
     nmax = atom->nmax;
 
     memory->create(nearest,nmax,MAXNEAR,"cna:nearest");
     memory->create(nnearest,nmax,"cna:nnearest");
     memory->create(pattern,nmax,"cna:cna_pattern");
     vector_atom = pattern;
   }
 
   // invoke full neighbor list (will copy or build if necessary)
 
   neighbor->build_one(list);
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // find the neigbours of each atom within cutoff using full neighbor list
   // nearest[] = atom indices of nearest neighbors, up to MAXNEAR
   // do this for all atoms, not just compute group
   // since CNA calculation requires neighbors of neighbors
 
   double **x = atom->x;
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
 
   int nerror = 0;
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     n = 0;
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       if (rsq < cutsq) {
         if (n < MAXNEAR) nearest[i][n++] = j;
         else {
           nerror++;
           break;
         }
       }
     }
     nnearest[i] = n;
   }
 
   // warning message
 
   int nerrorall;
   MPI_Allreduce(&nerror,&nerrorall,1,MPI_INT,MPI_SUM,world);
   if (nerrorall && comm->me == 0) {
     char str[128];
     sprintf(str,"Too many neighbors in CNA for %d atoms",nerrorall);
     error->warning(FLERR,str,0);
   }
 
   // compute CNA for each atom in group
   // only performed if # of nearest neighbors = 12 or 14 (fcc,hcp)
 
   nerror = 0;
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
 
     if (!(mask[i] & groupbit)) {
       pattern[i] = UNKNOWN;
       continue;
     }
 
     if (nnearest[i] != 12 && nnearest[i] != 14) {
       pattern[i] = OTHER;
       continue;
     }
 
     // loop over near neighbors of I to build cna data structure
     // cna[k][NCOMMON] = # of common neighbors of I with each of its neighs
     // cna[k][NBONDS] = # of bonds between those common neighbors
     // cna[k][MAXBOND] = max # of bonds of any common neighbor
     // cna[k][MINBOND] = min # of bonds of any common neighbor
 
     for (m = 0; m < nnearest[i]; m++) {
       j = nearest[i][m];
 
       // common = list of neighbors common to atom I and atom J
       // if J is an owned atom, use its near neighbor list to find them
       // if J is a ghost atom, use full neighbor list of I to find them
       // in latter case, must exclude J from I's neighbor list
 
       if (j < nlocal) {
         firstflag = 1;
         ncommon = 0;
         for (inear = 0; inear < nnearest[i]; inear++)
           for (jnear = 0; jnear < nnearest[j]; jnear++)
             if (nearest[i][inear] == nearest[j][jnear]) {
               if (ncommon < MAXCOMMON) common[ncommon++] = nearest[i][inear];
               else if (firstflag) {
                 nerror++;
                 firstflag = 0;
               }
             }
 
       } else {
         xtmp = x[j][0];
         ytmp = x[j][1];
         ztmp = x[j][2];
         jlist = firstneigh[i];
         jnum = numneigh[i];
 
         n = 0;
         for (kk = 0; kk < jnum; kk++) {
           k = jlist[kk];
           k &= NEIGHMASK;
           if (k == j) continue;
 
           delx = xtmp - x[k][0];
           dely = ytmp - x[k][1];
           delz = ztmp - x[k][2];
           rsq = delx*delx + dely*dely + delz*delz;
           if (rsq < cutsq) {
             if (n < MAXNEAR) onenearest[n++] = k;
             else break;
           }
         }
 
         firstflag = 1;
         ncommon = 0;
         for (inear = 0; inear < nnearest[i]; inear++)
           for (jnear = 0; jnear < n; jnear++)
             if (nearest[i][inear] == onenearest[jnear]) {
               if (ncommon < MAXCOMMON) common[ncommon++] = nearest[i][inear];
               else if (firstflag) {
                 nerror++;
                 firstflag = 0;
               }
             }
       }
 
       cna[m][NCOMMON] = ncommon;
 
       // calculate total # of bonds between common neighbor atoms
       // also max and min # of common atoms any common atom is bonded to
       // bond = pair of atoms within cutoff
 
       for (n = 0; n < ncommon; n++) bonds[n] = 0;
 
       nbonds = 0;
       for (jj = 0; jj < ncommon; jj++) {
         j = common[jj];
         xtmp = x[j][0];
         ytmp = x[j][1];
         ztmp = x[j][2];
         for (kk = jj+1; kk < ncommon; kk++) {
           k = common[kk];
           delx = xtmp - x[k][0];
           dely = ytmp - x[k][1];
           delz = ztmp - x[k][2];
           rsq = delx*delx + dely*dely + delz*delz;
           if (rsq < cutsq) {
             nbonds++;
             bonds[jj]++;
             bonds[kk]++;
           }
         }
       }
 
       cna[m][NBOND] = nbonds;
 
       maxbonds = 0;
       minbonds = MAXCOMMON;
       for (n = 0; n < ncommon; n++) {
         maxbonds = MAX(bonds[n],maxbonds);
         minbonds = MIN(bonds[n],minbonds);
       }
       cna[m][MAXBOND] = maxbonds;
       cna[m][MINBOND] = minbonds;
     }
 
     // detect CNA pattern of the atom
 
     nfcc = nhcp = nbcc4 = nbcc6 = nico = 0;
     pattern[i] = OTHER;
 
     if (nnearest[i] == 12) {
       for (inear = 0; inear < 12; inear++) {
         cj = cna[inear][NCOMMON];
         ck = cna[inear][NBOND];
         cl = cna[inear][MAXBOND];
         cm = cna[inear][MINBOND];
         if (cj == 4 && ck == 2 && cl == 1 && cm == 1) nfcc++;
         else if (cj == 4 && ck == 2 && cl == 2 && cm == 0) nhcp++;
         else if (cj == 5 && ck == 5 && cl == 2 && cm == 2) nico++;
       }
       if (nfcc == 12) pattern[i] = FCC;
       else if (nfcc == 6 && nhcp == 6) pattern[i] = HCP;
       else if (nico == 12) pattern[i] = ICOS;
 
     } else if (nnearest[i] == 14) {
       for (inear = 0; inear < 14; inear++) {
         cj = cna[inear][NCOMMON];
         ck = cna[inear][NBOND];
         cl = cna[inear][MAXBOND];
         cm = cna[inear][MINBOND];
         if (cj == 4 && ck == 4 && cl == 2 && cm == 2) nbcc4++;
         else if (cj == 6 && ck == 6 && cl == 2 && cm == 2) nbcc6++;
       }
       if (nbcc4 == 6 && nbcc6 == 8) pattern[i] = BCC;
     }
   }
 
   // warning message
 
   MPI_Allreduce(&nerror,&nerrorall,1,MPI_INT,MPI_SUM,world);
   if (nerrorall && comm->me == 0) {
     char str[128];
     sprintf(str,"Too many common neighbors in CNA %d times",nerrorall);
     error->warning(FLERR,str);
   }
 }
 
 /* ----------------------------------------------------------------------
    memory usage of local atom-based array
 ------------------------------------------------------------------------- */
 
 double ComputeCNAAtom::memory_usage()
 {
   double bytes = nmax * sizeof(int);
   bytes += nmax * MAXNEAR * sizeof(int);
   bytes += nmax * sizeof(double);
   return bytes;
 }
diff --git a/src/compute_contact_atom.cpp b/src/compute_contact_atom.cpp
index 5104dd0c1..8db5c3e4b 100644
--- a/src/compute_contact_atom.cpp
+++ b/src/compute_contact_atom.cpp
@@ -1,197 +1,197 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "string.h"
 #include "stdlib.h"
 #include "compute_contact_atom.h"
 #include "atom.h"
 #include "update.h"
 #include "modify.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "force.h"
 #include "pair.h"
 #include "comm.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
 ComputeContactAtom::ComputeContactAtom(LAMMPS *lmp, int narg, char **arg) :
   Compute(lmp, narg, arg)
 {
   if (narg != 3) error->all(FLERR,"Illegal compute contact/atom command");
 
   peratom_flag = 1;
   size_peratom_cols = 0;
   comm_reverse = 1;
 
   nmax = 0;
   contact = NULL;
 
   // error checks
 
   if (!atom->sphere_flag)
     error->all(FLERR,"Compute contact/atom requires atom style sphere");
 }
 
 /* ---------------------------------------------------------------------- */
 
 ComputeContactAtom::~ComputeContactAtom()
 {
   memory->destroy(contact);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputeContactAtom::init()
 {
   if (force->pair == NULL)
     error->all(FLERR,"Compute contact/atom requires a pair style be defined");
 
   int count = 0;
   for (int i = 0; i < modify->ncompute; i++)
     if (strcmp(modify->compute[i]->style,"contact/atom") == 0) count++;
   if (count > 1 && comm->me == 0)
     error->warning(FLERR,"More than one compute contact/atom");
 
   // need an occasional neighbor list
 
-  int irequest = neighbor->request(this);
+  int irequest = neighbor->request(this,instance_me);
   neighbor->requests[irequest]->half = 0;
   neighbor->requests[irequest]->gran = 1;
   neighbor->requests[irequest]->pair = 0;
   neighbor->requests[irequest]->compute = 1;
   neighbor->requests[irequest]->occasional = 1;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputeContactAtom::init_list(int id, NeighList *ptr)
 {
   list = ptr;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputeContactAtom::compute_peratom()
 {
   int i,j,ii,jj,inum,jnum;
   double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
   double radi,radsum,radsumsq;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   invoked_peratom = update->ntimestep;
 
   // grow contact array if necessary
 
   if (atom->nmax > nmax) {
     memory->destroy(contact);
     nmax = atom->nmax;
     memory->create(contact,nmax,"contact/atom:contact");
     vector_atom = contact;
   }
 
   // invoke neighbor list (will copy or build if necessary)
 
   neighbor->build_one(list);
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // compute number of contacts for each atom in group
   // contact if distance <= sum of radii
   // tally for both I and J
 
   double **x = atom->x;
   double *radius = atom->radius;
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
   int nall = nlocal + atom->nghost;
 
   for (i = 0; i < nall; i++) contact[i] = 0.0;
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     if (mask[i] & groupbit) {
       xtmp = x[i][0];
       ytmp = x[i][1];
       ztmp = x[i][2];
       radi = radius[i];
       jlist = firstneigh[i];
       jnum = numneigh[i];
 
       for (jj = 0; jj < jnum; jj++) {
         j = jlist[jj];
         j &= NEIGHMASK;
 
         delx = xtmp - x[j][0];
         dely = ytmp - x[j][1];
         delz = ztmp - x[j][2];
         rsq = delx*delx + dely*dely + delz*delz;
         radsum = radi + radius[j];
         radsumsq = radsum*radsum;
         if (rsq <= radsumsq) {
           contact[i] += 1.0;
           contact[j] += 1.0;
         }
       }
     }
   }
 
   // communicate ghost atom counts between neighbor procs if necessary
 
   if (force->newton_pair) comm->reverse_comm_compute(this);
 }
 
 /* ---------------------------------------------------------------------- */
 
 int ComputeContactAtom::pack_reverse_comm(int n, int first, double *buf)
 {
   int i,m,last;
 
   m = 0;
   last = first + n;
   for (i = first; i < last; i++)
     buf[m++] = contact[i];
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputeContactAtom::unpack_reverse_comm(int n, int *list, double *buf)
 {
   int i,j,m;
 
   m = 0;
   for (i = 0; i < n; i++) {
     j = list[i];
     contact[j] += buf[m++];
   }
 }
 
 /* ----------------------------------------------------------------------
    memory usage of local atom-based array
 ------------------------------------------------------------------------- */
 
 double ComputeContactAtom::memory_usage()
 {
   double bytes = nmax * sizeof(double);
   return bytes;
 }
diff --git a/src/compute_coord_atom.cpp b/src/compute_coord_atom.cpp
index 4efcae835..beb03812b 100644
--- a/src/compute_coord_atom.cpp
+++ b/src/compute_coord_atom.cpp
@@ -1,227 +1,227 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "string.h"
 #include "stdlib.h"
 #include "compute_coord_atom.h"
 #include "atom.h"
 #include "update.h"
 #include "modify.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "force.h"
 #include "pair.h"
 #include "comm.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
 ComputeCoordAtom::ComputeCoordAtom(LAMMPS *lmp, int narg, char **arg) :
   Compute(lmp, narg, arg)
 {
   if (narg < 4) error->all(FLERR,"Illegal compute coord/atom command");
 
   double cutoff = force->numeric(FLERR,arg[3]);
   cutsq = cutoff*cutoff;
 
   ncol = narg-4 + 1;
   int ntypes = atom->ntypes;
   typelo = new int[ncol];
   typehi = new int[ncol];
 
   if (narg == 4) {
     ncol = 1;
     typelo[0] = 1;
     typehi[0] = ntypes;
   } else {
     ncol = 0;
     int iarg = 4;
     while (iarg < narg) {
       force->bounds(arg[iarg],ntypes,typelo[ncol],typehi[ncol]);
       if (typelo[ncol] > typehi[ncol])
         error->all(FLERR,"Illegal compute coord/atom command");
       ncol++;
       iarg++;
     }
   }
 
   peratom_flag = 1;
   if (ncol == 1) size_peratom_cols = 0;
   else size_peratom_cols = ncol;
 
   nmax = 0;
   cvec = NULL;
   carray = NULL;
 }
 
 /* ---------------------------------------------------------------------- */
 
 ComputeCoordAtom::~ComputeCoordAtom()
 {
   delete [] typelo;
   delete [] typehi;
   memory->destroy(cvec);
   memory->destroy(carray);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputeCoordAtom::init()
 {
   if (force->pair == NULL)
     error->all(FLERR,"Compute coord/atom requires a pair style be defined");
   if (sqrt(cutsq) > force->pair->cutforce)
     error->all(FLERR,
                "Compute coord/atom cutoff is longer than pairwise cutoff");
 
   // need an occasional full neighbor list
 
-  int irequest = neighbor->request(this);
+  int irequest = neighbor->request(this,instance_me);
   neighbor->requests[irequest]->pair = 0;
   neighbor->requests[irequest]->compute = 1;
   neighbor->requests[irequest]->half = 0;
   neighbor->requests[irequest]->full = 1;
   neighbor->requests[irequest]->occasional = 1;
 
   int count = 0;
   for (int i = 0; i < modify->ncompute; i++)
     if (strcmp(modify->compute[i]->style,"coord/atom") == 0) count++;
   if (count > 1 && comm->me == 0)
     error->warning(FLERR,"More than one compute coord/atom");
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputeCoordAtom::init_list(int id, NeighList *ptr)
 {
   list = ptr;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputeCoordAtom::compute_peratom()
 {
   int i,j,m,ii,jj,inum,jnum,jtype,n;
   double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
   int *ilist,*jlist,*numneigh,**firstneigh;
   double *count;
 
   invoked_peratom = update->ntimestep;
 
   // grow coordination array if necessary
 
   if (atom->nlocal > nmax) {
     if (ncol == 1) {
       memory->destroy(cvec);
       nmax = atom->nmax;
       memory->create(cvec,nmax,"coord/atom:cvec");
       vector_atom = cvec;
     } else {
       memory->destroy(carray);
       nmax = atom->nmax;
       memory->create(carray,nmax,ncol,"coord/atom:carray");
       array_atom = carray;
     }
   }
 
   // invoke full neighbor list (will copy or build if necessary)
 
   neighbor->build_one(list);
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // compute coordination number(s) for each atom in group
   // use full neighbor list to count atoms less than cutoff
 
   double **x = atom->x;
   int *type = atom->type;
   int *mask = atom->mask;
 
   if (ncol == 1) {
     for (ii = 0; ii < inum; ii++) {
       i = ilist[ii];
       if (mask[i] & groupbit) {
         xtmp = x[i][0];
         ytmp = x[i][1];
         ztmp = x[i][2];
         jlist = firstneigh[i];
         jnum = numneigh[i];
 
         n = 0;
         for (jj = 0; jj < jnum; jj++) {
           j = jlist[jj];
           j &= NEIGHMASK;
           
           jtype = type[j];
           delx = xtmp - x[j][0];
           dely = ytmp - x[j][1];
           delz = ztmp - x[j][2];
           rsq = delx*delx + dely*dely + delz*delz;
           if (rsq < cutsq && jtype >= typelo[0] && jtype <= typehi[0]) n++;
         }
         
         cvec[i] = n;
       } else cvec[i] = 0.0;
     }
 
   } else {
     for (ii = 0; ii < inum; ii++) {
       i = ilist[ii];
       count = carray[i];
       for (m = 0; m < ncol; m++) count[m] = 0.0;
 
       if (mask[i] & groupbit) {
         xtmp = x[i][0];
         ytmp = x[i][1];
         ztmp = x[i][2];
         jlist = firstneigh[i];
         jnum = numneigh[i];
 
 
         for (jj = 0; jj < jnum; jj++) {
           j = jlist[jj];
           j &= NEIGHMASK;
           
           jtype = type[j];
           delx = xtmp - x[j][0];
           dely = ytmp - x[j][1];
           delz = ztmp - x[j][2];
           rsq = delx*delx + dely*dely + delz*delz;
           if (rsq < cutsq) {
             for (m = 0; m < ncol; m++)
               if (jtype >= typelo[m] && jtype <= typehi[m])
                 count[m] += 1.0;
           }
         }
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    memory usage of local atom-based array
 ------------------------------------------------------------------------- */
 
 double ComputeCoordAtom::memory_usage()
 {
   double bytes = ncol*nmax * sizeof(double);
   return bytes;
 }
diff --git a/src/compute_group_group.cpp b/src/compute_group_group.cpp
index 22140f449..900af6852 100644
--- a/src/compute_group_group.cpp
+++ b/src/compute_group_group.cpp
@@ -1,401 +1,401 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Naveen Michaud-Agrawal (Johns Hopkins U)
      K-space terms added by Stan Moore (BYU)
 ------------------------------------------------------------------------- */
 
 #include "mpi.h"
 #include "string.h"
 #include "compute_group_group.h"
 #include "atom.h"
 #include "update.h"
 #include "force.h"
 #include "pair.h"
 #include "neighbor.h"
 #include "neigh_request.h"
 #include "neigh_list.h"
 #include "group.h"
 #include "kspace.h"
 #include "error.h"
 #include "math.h"
 #include "comm.h"
 #include "domain.h"
 #include "math_const.h"
 
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
 #define SMALL 0.00001
 
 /* ---------------------------------------------------------------------- */
 
 ComputeGroupGroup::ComputeGroupGroup(LAMMPS *lmp, int narg, char **arg) :
   Compute(lmp, narg, arg)
 {
   if (narg < 4) error->all(FLERR,"Illegal compute group/group command");
 
   scalar_flag = vector_flag = 1;
   size_vector = 3;
   extscalar = 1;
   extvector = 1;
 
   int n = strlen(arg[3]) + 1;
   group2 = new char[n];
   strcpy(group2,arg[3]);
 
   jgroup = group->find(group2);
   if (jgroup == -1)
     error->all(FLERR,"Compute group/group group ID does not exist");
   jgroupbit = group->bitmask[jgroup];
 
   pairflag = 1;
   kspaceflag = 0;
   boundaryflag = 1;
 
   int iarg = 4;
   while (iarg < narg) {
     if (strcmp(arg[iarg],"pair") == 0) {
       if (iarg+2 > narg)
         error->all(FLERR,"Illegal compute group/group command");
       if (strcmp(arg[iarg+1],"yes") == 0) pairflag = 1;
       else if (strcmp(arg[iarg+1],"no") == 0) pairflag = 0;
       else error->all(FLERR,"Illegal compute group/group command");
       iarg += 2;
     } else if (strcmp(arg[iarg],"kspace") == 0) {
       if (iarg+2 > narg)
         error->all(FLERR,"Illegal compute group/group command");
       if (strcmp(arg[iarg+1],"yes") == 0) kspaceflag = 1;
       else if (strcmp(arg[iarg+1],"no") == 0) kspaceflag = 0;
       else error->all(FLERR,"Illegal compute group/group command");
       iarg += 2;
     } else if (strcmp(arg[iarg],"boundary") == 0) {
       if (iarg+2 > narg)
         error->all(FLERR,"Illegal compute group/group command");
       if (strcmp(arg[iarg+1],"yes") == 0) boundaryflag = 1;
       else if (strcmp(arg[iarg+1],"no") == 0) boundaryflag  = 0;
       else error->all(FLERR,"Illegal compute group/group command");
       iarg += 2;
     } else error->all(FLERR,"Illegal compute group/group command");
   }
 
   vector = new double[3];
 }
 
 /* ---------------------------------------------------------------------- */
 
 ComputeGroupGroup::~ComputeGroupGroup()
 {
   delete [] group2;
   delete [] vector;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputeGroupGroup::init()
 {
   // if non-hybrid, then error if single_enable = 0
   // if hybrid, let hybrid determine if sub-style sets single_enable = 0
 
   if (pairflag && force->pair == NULL)
     error->all(FLERR,"No pair style defined for compute group/group");
   if (force->pair_match("hybrid",0) == NULL && force->pair->single_enable == 0)
     error->all(FLERR,"Pair style does not support compute group/group");
 
   // error if Kspace style does not compute group/group interactions
 
   if (kspaceflag && force->kspace == NULL)
     error->all(FLERR,"No Kspace style defined for compute group/group");
   if (kspaceflag && force->kspace->group_group_enable == 0)
     error->all(FLERR,"Kspace style does not support compute group/group");
 
   if (pairflag) {
     pair = force->pair;
     cutsq = force->pair->cutsq;
   } else pair = NULL;
 
   if (kspaceflag) kspace = force->kspace;
   else kspace = NULL;
 
   // compute Kspace correction terms
 
   if (kspaceflag) {
     kspace_correction();
     if (fabs(e_correction) > SMALL && comm->me == 0) {
       char str[128];
       sprintf(str,"Both groups in compute group/group have a net charge; "
               "the Kspace boundary correction to energy will be non-zero");
       error->warning(FLERR,str);
     }
   }
 
   // recheck that group 2 has not been deleted
 
   jgroup = group->find(group2);
   if (jgroup == -1)
     error->all(FLERR,"Compute group/group group ID does not exist");
   jgroupbit = group->bitmask[jgroup];
 
   // need an occasional half neighbor list
 
   if (pairflag) {
-    int irequest = neighbor->request(this);
+    int irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->pair = 0;
     neighbor->requests[irequest]->compute = 1;
     neighbor->requests[irequest]->occasional = 1;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputeGroupGroup::init_list(int id, NeighList *ptr)
 {
   list = ptr;
 }
 
 /* ---------------------------------------------------------------------- */
 
 double ComputeGroupGroup::compute_scalar()
 {
   invoked_scalar = invoked_vector = update->ntimestep;
 
   scalar = 0.0;
   vector[0] = vector[1] = vector[2] = 0.0;
 
   if (pairflag) pair_contribution();
   if (kspaceflag) kspace_contribution();
 
   return scalar;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputeGroupGroup::compute_vector()
 {
   invoked_scalar = invoked_vector = update->ntimestep;
 
   scalar = 0.0;
   vector[0] = vector[1] = vector[2] = 0.0;
 
   if (pairflag) pair_contribution();
   if (kspaceflag) kspace_contribution();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputeGroupGroup::pair_contribution()
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz;
   double rsq,eng,fpair,factor_coul,factor_lj;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   double **x = atom->x;
   int *type = atom->type;
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
 
   // invoke half neighbor list (will copy or build if necessary)
 
   neighbor->build_one(list);
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
   // skip if I,J are not in 2 groups
 
   double one[4];
   one[0] = one[1] = one[2] = one[3] = 0.0;
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     // skip if atom I is not in either group
     if (!(mask[i] & groupbit || mask[i] & jgroupbit)) continue; 
 
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       if (!(mask[j] & groupbit || mask[j] & jgroupbit)) continue; // skip if atom J is not in either group
 
       int ij_flag = 0;
       int ji_flag = 0;
       if (mask[i] & groupbit && mask[j] & jgroupbit) ij_flag = 1;
       if (mask[j] & groupbit && mask[i] & jgroupbit) ji_flag = 1;
       if (!ij_flag && !ji_flag) continue; // skip if atoms I,J are only in the same group
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         eng = pair->single(i,j,itype,jtype,rsq,factor_coul,factor_lj,fpair);
 
         // energy only computed once so tally full amount
         // force tally is jgroup acting on igroup
 
         if (newton_pair || j < nlocal) {
           one[0] += eng;
           if (ij_flag) {
             one[1] += delx*fpair;
             one[2] += dely*fpair;
             one[3] += delz*fpair;
           }
           if (ji_flag) {
             one[1] -= delx*fpair;
             one[2] -= dely*fpair;
             one[3] -= delz*fpair;
           }
 
         // energy computed twice so tally half amount
         // only tally force if I own igroup atom
 
         } else {
           one[0] += 0.5*eng;
           if (ij_flag) {
             one[1] += delx*fpair;
             one[2] += dely*fpair;
             one[3] += delz*fpair;
           }
         }
       }
     }
   }
 
   double all[4];
   MPI_Allreduce(one,all,4,MPI_DOUBLE,MPI_SUM,world);
   scalar += all[0];
   vector[0] += all[1]; vector[1] += all[2]; vector[2] += all[3];
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputeGroupGroup::kspace_contribution()
 {
   double *vector_kspace = force->kspace->f2group;
 
   force->kspace->compute_group_group(groupbit,jgroupbit,0);
   scalar += 2.0*force->kspace->e2group;
   vector[0] += vector_kspace[0];
   vector[1] += vector_kspace[1];
   vector[2] += vector_kspace[2];
 
   // subtract extra A <--> A Kspace interaction so energy matches
   //   real-space style of compute group-group
   // add extra Kspace term to energy
 
   force->kspace->compute_group_group(groupbit,jgroupbit,1);
   scalar -= force->kspace->e2group;
 
   // self energy correction term
 
   scalar -= e_self;
 
   // k=0 boundary correction term
 
   if (boundaryflag) {
     double xprd = domain->xprd;
     double yprd = domain->yprd;
     double zprd = domain->zprd;
 
     // adjustment of z dimension for 2d slab Ewald
     // 3d Ewald just uses zprd since slab_volfactor = 1.0
 
     double volume = xprd*yprd*zprd*force->kspace->slab_volfactor;
     scalar -= e_correction/volume;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputeGroupGroup::kspace_correction()
 {
 
   // total charge of groups A & B, needed for correction term
 
   double qsqsum_group,qsum_A,qsum_B;
   qsqsum_group = qsum_A = qsum_B = 0.0;
 
   double *q = atom->q;
   int *mask = atom->mask;
   int groupbit_A = groupbit;
   int groupbit_B = jgroupbit;
 
   for (int i = 0; i < atom->nlocal; i++) {
     if ((mask[i] & groupbit_A) && (mask[i] & groupbit_B))
       qsqsum_group += q[i]*q[i];
     if (mask[i] & groupbit_A) qsum_A += q[i];
     if (mask[i] & groupbit_B) qsum_B += q[i];
   }
 
   double tmp;
   MPI_Allreduce(&qsqsum_group,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
   qsqsum_group = tmp;
 
   MPI_Allreduce(&qsum_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
   qsum_A = tmp;
 
   MPI_Allreduce(&qsum_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
   qsum_B = tmp;
 
   double g_ewald = force->kspace->g_ewald;
 
   double scale = 1.0;
   const double qscale = force->qqrd2e * scale;
 
   // self-energy correction
 
   e_self = qscale * g_ewald*qsqsum_group/MY_PIS;
   e_correction = 2.0*qsum_A*qsum_B;
 
   // subtract extra AA terms
 
   qsum_A = qsum_B = 0.0;
 
   for (int i = 0; i < atom->nlocal; i++) {
     if (!((mask[i] & groupbit_A) && (mask[i] & groupbit_B)))
       continue;
 
     if (mask[i] & groupbit_A) qsum_A += q[i];
     if (mask[i] & groupbit_B) qsum_B += q[i];
   }
 
   MPI_Allreduce(&qsum_A,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
   qsum_A = tmp;
 
   MPI_Allreduce(&qsum_B,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
   qsum_B = tmp;
 
   // k=0 energy correction term (still need to divide by volume above)
 
   e_correction -= qsum_A*qsum_B;
   e_correction *= qscale * MY_PI2 / (g_ewald*g_ewald);
 }
diff --git a/src/compute_pair_local.cpp b/src/compute_pair_local.cpp
index 0f138d531..55a87bfb9 100644
--- a/src/compute_pair_local.cpp
+++ b/src/compute_pair_local.cpp
@@ -1,289 +1,289 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "string.h"
 #include "stdlib.h"
 #include "compute_pair_local.h"
 #include "atom.h"
 #include "update.h"
 #include "force.h"
 #include "pair.h"
 #include "neighbor.h"
 #include "neigh_request.h"
 #include "neigh_list.h"
 #include "group.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
 #define DELTA 10000
 
 enum{DIST,ENG,FORCE,FX,FY,FZ,PN};
 
 /* ---------------------------------------------------------------------- */
 
 ComputePairLocal::ComputePairLocal(LAMMPS *lmp, int narg, char **arg) :
   Compute(lmp, narg, arg)
 {
   if (narg < 4) error->all(FLERR,"Illegal compute pair/local command");
 
   local_flag = 1;
   nvalues = narg - 3;
   if (nvalues == 1) size_local_cols = 0;
   else size_local_cols = nvalues;
 
   pstyle = new int[nvalues];
   pindex = new int[nvalues];
 
   nvalues = 0;
   for (int iarg = 3; iarg < narg; iarg++) {
     if (strcmp(arg[iarg],"dist") == 0) pstyle[nvalues++] = DIST;
     else if (strcmp(arg[iarg],"eng") == 0) pstyle[nvalues++] = ENG;
     else if (strcmp(arg[iarg],"force") == 0) pstyle[nvalues++] = FORCE;
     else if (strcmp(arg[iarg],"fx") == 0) pstyle[nvalues++] = FX;
     else if (strcmp(arg[iarg],"fy") == 0) pstyle[nvalues++] = FY;
     else if (strcmp(arg[iarg],"fz") == 0) pstyle[nvalues++] = FZ;
     else if (arg[iarg][0] == 'p') {
       int n = atoi(&arg[iarg][1]);
       if (n <= 0) error->all(FLERR,
                              "Invalid keyword in compute pair/local command");
       pstyle[nvalues] = PN;
       pindex[nvalues++] = n-1;
     } else error->all(FLERR,"Invalid keyword in compute pair/local command");
   }
 
   // set singleflag if need to call pair->single()
 
   singleflag = 0;
   for (int i = 0; i < nvalues; i++)
     if (pstyle[i] != DIST) singleflag = 1;
 
   nmax = 0;
   vector = NULL;
   array = NULL;
 }
 
 /* ---------------------------------------------------------------------- */
 
 ComputePairLocal::~ComputePairLocal()
 {
   memory->destroy(vector);
   memory->destroy(array);
   delete [] pstyle;
   delete [] pindex;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputePairLocal::init()
 {
   if (singleflag && force->pair == NULL)
     error->all(FLERR,"No pair style is defined for compute pair/local");
   if (singleflag && force->pair->single_enable == 0)
     error->all(FLERR,"Pair style does not support compute pair/local");
 
   for (int i = 0; i < nvalues; i++)
     if (pstyle[i] == PN && pindex[i] >= force->pair->single_extra)
       error->all(FLERR,"Pair style does not have extra field"
                  " requested by compute pair/local");
 
   // need an occasional half neighbor list
 
-  int irequest = neighbor->request(this);
+  int irequest = neighbor->request(this,instance_me);
   neighbor->requests[irequest]->pair = 0;
   neighbor->requests[irequest]->compute = 1;
   neighbor->requests[irequest]->occasional = 1;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputePairLocal::init_list(int id, NeighList *ptr)
 {
   list = ptr;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputePairLocal::compute_local()
 {
   invoked_local = update->ntimestep;
 
   // count local entries and compute pair info
 
   ncount = compute_pairs(0);
   if (ncount > nmax) reallocate(ncount);
   size_local_rows = ncount;
   compute_pairs(1);
 }
 
 /* ----------------------------------------------------------------------
    count pairs and compute pair info on this proc
    only count pair once if newton_pair is off
    both atom I,J must be in group
    if flag is set, compute requested info about pair
 ------------------------------------------------------------------------- */
 
 int ComputePairLocal::compute_pairs(int flag)
 {
   int i,j,m,n,ii,jj,inum,jnum,itype,jtype;
   tagint itag,jtag;
   double xtmp,ytmp,ztmp,delx,dely,delz;
   double rsq,eng,fpair,factor_coul,factor_lj;
   int *ilist,*jlist,*numneigh,**firstneigh;
   double *ptr;
 
   double **x = atom->x;
   tagint *tag = atom->tag;
   int *type = atom->type;
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
 
   // invoke half neighbor list (will copy or build if necessary)
 
   if (flag == 0) neighbor->build_one(list);
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
   // skip if I or J are not in group
   // for newton = 0 and J = ghost atom,
   //   need to insure I,J pair is only output by one proc
   //   use same itag,jtag logic as in Neighbor::neigh_half_nsq()
   // for flag = 0, just count pair interactions within force cutoff
   // for flag = 1, calculate requested output fields
 
   Pair *pair = force->pair;
   double **cutsq = force->pair->cutsq;
 
   m = 0;
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     if (!(mask[i] & groupbit)) continue;
 
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itag = tag[i];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       if (!(mask[j] & groupbit)) continue;
 
       // itag = jtag is possible for long cutoffs that include images of self
 
       if (newton_pair == 0 && j >= nlocal) {
         jtag = tag[j];
         if (itag > jtag) {
           if ((itag+jtag) % 2 == 0) continue;
         } else if (itag < jtag) {
           if ((itag+jtag) % 2 == 1) continue;
         } else {
           if (x[j][2] < ztmp) continue;
           if (x[j][2] == ztmp) {
             if (x[j][1] < ytmp) continue;
             if (x[j][1] == ytmp && x[j][0] < xtmp) continue;
           }
         }
       }
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
       if (rsq >= cutsq[itype][jtype]) continue;
 
       if (flag) {
         if (singleflag)
           eng = pair->single(i,j,itype,jtype,rsq,factor_coul,factor_lj,fpair);
 
         if (nvalues == 1) ptr = &vector[m];
         else ptr = array[m];
 
         for (n = 0; n < nvalues; n++) {
           switch (pstyle[n]) {
           case DIST:
             ptr[n] = sqrt(rsq);
             break;
           case ENG:
             ptr[n] = eng;
             break;
           case FORCE:
             ptr[n] = sqrt(rsq)*fpair;
             break;
           case FX:
             ptr[n] = delx*fpair;
             break;
           case FY:
             ptr[n] = dely*fpair;
             break;
           case FZ:
             ptr[n] = delz*fpair;
             break;
           case PN:
             ptr[n] = pair->svector[pindex[n]];
             break;
           }
         }
       }
 
       m++;
     }
   }
 
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputePairLocal::reallocate(int n)
 {
   // grow vector or array and indices array
 
   while (nmax < n) nmax += DELTA;
 
   if (nvalues == 1) {
     memory->destroy(vector);
     memory->create(vector,nmax,"pair/local:vector");
     vector_local = vector;
   } else {
     memory->destroy(array);
     memory->create(array,nmax,nvalues,"pair/local:array");
     array_local = array;
   }
 }
 
 /* ----------------------------------------------------------------------
    memory usage of local data
 ------------------------------------------------------------------------- */
 
 double ComputePairLocal::memory_usage()
 {
   double bytes = nmax*nvalues * sizeof(double);
   return bytes;
 }
diff --git a/src/compute_property_local.cpp b/src/compute_property_local.cpp
index 7746420c1..c08d723cb 100644
--- a/src/compute_property_local.cpp
+++ b/src/compute_property_local.cpp
@@ -1,937 +1,937 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #include "string.h"
 #include "compute_property_local.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "update.h"
 #include "force.h"
 #include "pair.h"
 #include "neighbor.h"
 #include "neigh_request.h"
 #include "neigh_list.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
 enum{NONE,NEIGH,PAIR,BOND,ANGLE,DIHEDRAL,IMPROPER};
 
 #define DELTA 10000
 
 /* ---------------------------------------------------------------------- */
 
 ComputePropertyLocal::ComputePropertyLocal(LAMMPS *lmp, int narg, char **arg) :
   Compute(lmp, narg, arg)
 {
   if (narg < 4) error->all(FLERR,"Illegal compute property/local command");
 
   local_flag = 1;
   nvalues = narg - 3;
   if (nvalues == 1) size_local_cols = 0;
   else size_local_cols = nvalues;
 
   pack_choice = new FnPtrPack[nvalues];
 
   kindflag = NONE;
 
   int i;
   for (int iarg = 3; iarg < narg; iarg++) {
     i = iarg-3;
 
     if (strcmp(arg[iarg],"natom1") == 0) {
       pack_choice[i] = &ComputePropertyLocal::pack_patom1;
       if (kindflag != NONE && kindflag != NEIGH)
         error->all(FLERR,
                    "Compute property/local cannot use these inputs together");
       kindflag = NEIGH;
     } else if (strcmp(arg[iarg],"natom2") == 0) {
       pack_choice[i] = &ComputePropertyLocal::pack_patom2;
       if (kindflag != NONE && kindflag != NEIGH)
         error->all(FLERR,
                    "Compute property/local cannot use these inputs together");
       kindflag = NEIGH;
     } else if (strcmp(arg[iarg],"ntype1") == 0) {
       pack_choice[i] = &ComputePropertyLocal::pack_ptype1;
       if (kindflag != NONE && kindflag != NEIGH)
         error->all(FLERR,
                    "Compute property/local cannot use these inputs together");
       kindflag = NEIGH;
     } else if (strcmp(arg[iarg],"ntype2") == 0) {
       pack_choice[i] = &ComputePropertyLocal::pack_ptype2;
       if (kindflag != NONE && kindflag != NEIGH)
         error->all(FLERR,
                    "Compute property/local cannot use these inputs together");
       kindflag = NEIGH;
 
     } else if (strcmp(arg[iarg],"patom1") == 0) {
       pack_choice[i] = &ComputePropertyLocal::pack_patom1;
       if (kindflag != NONE && kindflag != PAIR)
         error->all(FLERR,
                    "Compute property/local cannot use these inputs together");
       kindflag = PAIR;
     } else if (strcmp(arg[iarg],"patom2") == 0) {
       pack_choice[i] = &ComputePropertyLocal::pack_patom2;
       if (kindflag != NONE && kindflag != PAIR)
         error->all(FLERR,
                    "Compute property/local cannot use these inputs together");
       kindflag = PAIR;
     } else if (strcmp(arg[iarg],"ptype1") == 0) {
       pack_choice[i] = &ComputePropertyLocal::pack_ptype1;
       if (kindflag != NONE && kindflag != PAIR)
         error->all(FLERR,
                    "Compute property/local cannot use these inputs together");
       kindflag = PAIR;
     } else if (strcmp(arg[iarg],"ptype2") == 0) {
       pack_choice[i] = &ComputePropertyLocal::pack_ptype2;
       if (kindflag != NONE && kindflag != PAIR)
         error->all(FLERR,
                    "Compute property/local cannot use these inputs together");
       kindflag = PAIR;
 
     } else if (strcmp(arg[iarg],"batom1") == 0) {
       pack_choice[i] = &ComputePropertyLocal::pack_batom1;
       if (kindflag != NONE && kindflag != BOND)
         error->all(FLERR,
                    "Compute property/local cannot use these inputs together");
       kindflag = BOND;
     } else if (strcmp(arg[iarg],"batom2") == 0) {
       pack_choice[i] = &ComputePropertyLocal::pack_batom2;
       if (kindflag != NONE && kindflag != BOND)
         error->all(FLERR,
                    "Compute property/local cannot use these inputs together");
       kindflag = BOND;
     } else if (strcmp(arg[iarg],"btype") == 0) {
       pack_choice[i] = &ComputePropertyLocal::pack_btype;
       if (kindflag != NONE && kindflag != BOND)
         error->all(FLERR,
                    "Compute property/local cannot use these inputs together");
       kindflag = BOND;
 
     } else if (strcmp(arg[iarg],"aatom1") == 0) {
       pack_choice[i] = &ComputePropertyLocal::pack_aatom1;
       if (kindflag != NONE && kindflag != ANGLE)
         error->all(FLERR,
                    "Compute property/local cannot use these inputs together");
       kindflag = ANGLE;
     } else if (strcmp(arg[iarg],"aatom2") == 0) {
       pack_choice[i] = &ComputePropertyLocal::pack_aatom2;
       if (kindflag != NONE && kindflag != ANGLE)
         error->all(FLERR,
                    "Compute property/local cannot use these inputs together");
       kindflag = ANGLE;
     } else if (strcmp(arg[iarg],"aatom3") == 0) {
       pack_choice[i] = &ComputePropertyLocal::pack_aatom3;
       if (kindflag != NONE && kindflag != ANGLE)
         error->all(FLERR,
                    "Compute property/local cannot use these inputs together");
       kindflag = ANGLE;
     } else if (strcmp(arg[iarg],"atype") == 0) {
       pack_choice[i] = &ComputePropertyLocal::pack_atype;
       if (kindflag != NONE && kindflag != ANGLE)
         error->all(FLERR,
                    "Compute property/local cannot use these inputs together");
       kindflag = ANGLE;
 
     } else if (strcmp(arg[iarg],"datom1") == 0) {
       pack_choice[i] = &ComputePropertyLocal::pack_datom1;
       if (kindflag != NONE && kindflag != DIHEDRAL)
         error->all(FLERR,
                    "Compute property/local cannot use these inputs together");
       kindflag = DIHEDRAL;
     } else if (strcmp(arg[iarg],"datom2") == 0) {
       pack_choice[i] = &ComputePropertyLocal::pack_datom2;
       if (kindflag != NONE && kindflag != DIHEDRAL)
         error->all(FLERR,
                    "Compute property/local cannot use these inputs together");
       kindflag = DIHEDRAL;
     } else if (strcmp(arg[iarg],"datom3") == 0) {
       pack_choice[i] = &ComputePropertyLocal::pack_datom3;
       if (kindflag != NONE && kindflag != DIHEDRAL)
         error->all(FLERR,
                    "Compute property/local cannot use these inputs together");
       kindflag = DIHEDRAL;
     } else if (strcmp(arg[iarg],"datom4") == 0) {
       pack_choice[i] = &ComputePropertyLocal::pack_datom4;
       if (kindflag != NONE && kindflag != DIHEDRAL)
         error->all(FLERR,
                    "Compute property/local cannot use these inputs together");
       kindflag = DIHEDRAL;
     } else if (strcmp(arg[iarg],"dtype") == 0) {
       pack_choice[i] = &ComputePropertyLocal::pack_dtype;
       if (kindflag != NONE && kindflag != DIHEDRAL)
         error->all(FLERR,
                    "Compute property/local cannot use these inputs together");
       kindflag = DIHEDRAL;
 
     } else if (strcmp(arg[iarg],"iatom1") == 0) {
       pack_choice[i] = &ComputePropertyLocal::pack_iatom1;
       if (kindflag != NONE && kindflag != IMPROPER)
         error->all(FLERR,
                    "Compute property/local cannot use these inputs together");
       kindflag = IMPROPER;
     } else if (strcmp(arg[iarg],"iatom2") == 0) {
       pack_choice[i] = &ComputePropertyLocal::pack_iatom2;
       if (kindflag != NONE && kindflag != IMPROPER)
         error->all(FLERR,
                    "Compute property/local cannot use these inputs together");
       kindflag = IMPROPER;
     } else if (strcmp(arg[iarg],"iatom3") == 0) {
       pack_choice[i] = &ComputePropertyLocal::pack_iatom3;
       if (kindflag != NONE && kindflag != IMPROPER)
         error->all(FLERR,
                    "Compute property/local cannot use these inputs together");
       kindflag = IMPROPER;
     } else if (strcmp(arg[iarg],"iatom4") == 0) {
       pack_choice[i] = &ComputePropertyLocal::pack_iatom4;
       if (kindflag != NONE && kindflag != IMPROPER)
         error->all(FLERR,
                    "Compute property/local cannot use these inputs together");
       kindflag = IMPROPER;
     } else if (strcmp(arg[iarg],"itype") == 0) {
       pack_choice[i] = &ComputePropertyLocal::pack_itype;
       if (kindflag != NONE && kindflag != IMPROPER)
         error->all(FLERR,
                    "Compute property/local cannot use these inputs together");
       kindflag = IMPROPER;
 
     } else error->all(FLERR,
                       "Invalid keyword in compute property/local command");
   }
 
   // error check
 
   if (atom->molecular == 2 && (kindflag == BOND || kindflag == ANGLE ||
                                kindflag == DIHEDRAL || kindflag == IMPROPER))
     error->all(FLERR,"Compute property/local does not (yet) work "
                "with atom_style template");
 
   if (kindflag == BOND && atom->avec->bonds_allow == 0)
     error->all(FLERR,
                "Compute property/local for property that isn't allocated");
   if (kindflag == ANGLE && atom->avec->angles_allow == 0)
     error->all(FLERR,
                "Compute property/local for property that isn't allocated");
   if (kindflag == DIHEDRAL && atom->avec->dihedrals_allow == 0)
     error->all(FLERR,
                "Compute property/local for property that isn't allocated");
   if (kindflag == IMPROPER && atom->avec->impropers_allow == 0)
     error->all(FLERR,
                "Compute property/local for property that isn't allocated");
 
   nmax = 0;
   vector = NULL;
   array = NULL;
   indices = NULL;
 }
 
 /* ---------------------------------------------------------------------- */
 
 ComputePropertyLocal::~ComputePropertyLocal()
 {
   delete [] pack_choice;
   memory->destroy(vector);
   memory->destroy(array);
   memory->destroy(indices);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputePropertyLocal::init()
 {
   if (kindflag == NEIGH || kindflag == PAIR) {
     if (force->pair == NULL)
       error->all(FLERR,"No pair style is defined for compute property/local");
     if (force->pair->single_enable == 0)
       error->all(FLERR,"Pair style does not support compute property/local");
   }
 
   // for NEIGH/PAIR need an occasional half neighbor list
 
   if (kindflag == NEIGH || kindflag == PAIR) {
-    int irequest = neighbor->request(this);
+    int irequest = neighbor->request(this,instance_me);
     neighbor->requests[irequest]->pair = 0;
     neighbor->requests[irequest]->compute = 1;
     neighbor->requests[irequest]->occasional = 1;
   }
 
   // do initial memory allocation so that memory_usage() is correct
   // cannot be done yet for NEIGH/PAIR, since neigh list does not exist
 
   if (kindflag == NEIGH) ncount = 0;
   else if (kindflag == PAIR) ncount = 0;
   else if (kindflag == BOND) ncount = count_bonds(0);
   else if (kindflag == ANGLE) ncount = count_angles(0);
   else if (kindflag == DIHEDRAL) ncount = count_dihedrals(0);
   else if (kindflag == IMPROPER) ncount = count_impropers(0);
 
   if (ncount > nmax) reallocate(ncount);
   size_local_rows = ncount;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputePropertyLocal::init_list(int id, NeighList *ptr)
 {
   list = ptr;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputePropertyLocal::compute_local()
 {
   invoked_local = update->ntimestep;
 
   // count local entries and generate list of indices
 
   if (kindflag == NEIGH) ncount = count_pairs(0,0);
   else if (kindflag == PAIR) ncount = count_pairs(0,1);
   else if (kindflag == BOND) ncount = count_bonds(0);
   else if (kindflag == ANGLE) ncount = count_angles(0);
   else if (kindflag == DIHEDRAL) ncount = count_dihedrals(0);
   else if (kindflag == IMPROPER) ncount = count_impropers(0);
 
   if (ncount > nmax) reallocate(ncount);
   size_local_rows = ncount;
 
   if (kindflag == NEIGH) ncount = count_pairs(1,0);
   else if (kindflag == PAIR) ncount = count_pairs(1,1);
   else if (kindflag == BOND) ncount = count_bonds(1);
   else if (kindflag == ANGLE) ncount = count_angles(1);
   else if (kindflag == DIHEDRAL) ncount = count_dihedrals(1);
   else if (kindflag == IMPROPER) ncount = count_impropers(1);
 
   // fill vector or array with local values
 
   if (nvalues == 1) {
     buf = vector;
     (this->*pack_choice[0])(0);
   } else {
     if (array) buf = &array[0][0];
     for (int n = 0; n < nvalues; n++)
       (this->*pack_choice[n])(n);
   }
 }
 
 /* ----------------------------------------------------------------------
    count pairs and compute pair info on this proc
    only count pair once if newton_pair is off
    both atom I,J must be in group
    if allflag is set, compute requested info about pair
    if forceflag = 1, pair must be within force cutoff, else neighbor cutoff
 ------------------------------------------------------------------------- */
 
 int ComputePropertyLocal::count_pairs(int allflag, int forceflag)
 {
   int i,j,m,ii,jj,inum,jnum,itype,jtype;
   tagint itag,jtag;
   double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   double **x = atom->x;
   tagint *tag = atom->tag;
   int *type = atom->type;
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
   int newton_pair = force->newton_pair;
 
   // invoke half neighbor list (will copy or build if necessary)
 
   if (allflag == 0) neighbor->build_one(list);
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
   // skip if I or J are not in group
   // for newton = 0 and J = ghost atom,
   //   need to insure I,J pair is only output by one proc
   //   use same itag,jtag logic as in Neighbor::neigh_half_nsq()
 
   double **cutsq = force->pair->cutsq;
 
   m = 0;
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     if (!(mask[i] & groupbit)) continue;
 
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itag = tag[i];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       j &= NEIGHMASK;
 
       if (!(mask[j] & groupbit)) continue;
 
       // itag = jtag is possible for long cutoffs that include images of self
 
       if (newton_pair == 0 && j >= nlocal) {
         jtag = tag[j];
         if (itag > jtag) {
           if ((itag+jtag) % 2 == 0) continue;
         } else if (itag < jtag) {
           if ((itag+jtag) % 2 == 1) continue;
         } else {
           if (x[j][2] < ztmp) continue;
           if (x[j][2] == ztmp) {
             if (x[j][1] < ytmp) continue;
             if (x[j][1] == ytmp && x[j][0] < xtmp) continue;
           }
         }
       }
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
       if (forceflag && rsq >= cutsq[itype][jtype]) continue;
 
       if (allflag) {
         indices[m][0] = i;
         indices[m][1] = j;
       }
       m++;
     }
   }
 
   return m;
 }
 
 /* ----------------------------------------------------------------------
    count bonds on this proc
    only count bond once if newton_bond is off
    all atoms in interaction must be in group
    all atoms in interaction must be known to proc
    if bond is deleted (type = 0), do not count
    if bond is turned off (type < 0), still count
 ------------------------------------------------------------------------- */
 
 int ComputePropertyLocal::count_bonds(int flag)
 {
   int i,atom1,atom2;
 
   int *num_bond = atom->num_bond;
   tagint **bond_atom = atom->bond_atom;
   int **bond_type = atom->bond_type;
   tagint *tag = atom->tag;
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
   int newton_bond = force->newton_bond;
 
   int m = 0;
   for (atom1 = 0; atom1 < nlocal; atom1++) {
     if (!(mask[atom1] & groupbit)) continue;
     for (i = 0; i < num_bond[atom1]; i++) {
       atom2 = atom->map(bond_atom[atom1][i]);
       if (atom2 < 0 || !(mask[atom2] & groupbit)) continue;
       if (newton_bond == 0 && tag[atom1] > tag[atom2]) continue;
       if (bond_type[atom1][i] == 0) continue;
 
       if (flag) {
         indices[m][0] = atom1;
         indices[m][1] = i;
       }
       m++;
     }
   }
 
   return m;
 }
 
 /* ----------------------------------------------------------------------
    count angles on this proc
    only count if 2nd atom is the one storing the angle
    all atoms in interaction must be in group
    all atoms in interaction must be known to proc
    if angle is deleted (type = 0), do not count
    if angle is turned off (type < 0), still count
 ------------------------------------------------------------------------- */
 
 int ComputePropertyLocal::count_angles(int flag)
 {
   int i,atom1,atom2,atom3;
 
   int *num_angle = atom->num_angle;
   tagint **angle_atom1 = atom->angle_atom1;
   tagint **angle_atom2 = atom->angle_atom2;
   tagint **angle_atom3 = atom->angle_atom3;
   int **angle_type = atom->angle_type;
   tagint *tag = atom->tag;
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
 
   int m = 0;
   for (atom2 = 0; atom2 < nlocal; atom2++) {
     if (!(mask[atom2] & groupbit)) continue;
     for (i = 0; i < num_angle[atom2]; i++) {
       if (tag[atom2] != angle_atom2[atom2][i]) continue;
       atom1 = atom->map(angle_atom1[atom2][i]);
       if (atom1 < 0 || !(mask[atom1] & groupbit)) continue;
       atom3 = atom->map(angle_atom3[atom2][i]);
       if (atom3 < 0 || !(mask[atom3] & groupbit)) continue;
       if (angle_type[atom2][i] == 0) continue;
 
       if (flag) {
         indices[m][0] = atom2;
         indices[m][1] = i;
       }
       m++;
     }
   }
 
   return m;
 }
 
 /* ----------------------------------------------------------------------
    count dihedrals on this proc
    only count if 2nd atom is the one storing the dihedral
    all atoms in interaction must be in group
    all atoms in interaction must be known to proc
 ------------------------------------------------------------------------- */
 
 int ComputePropertyLocal::count_dihedrals(int flag)
 {
   int i,atom1,atom2,atom3,atom4;
 
   int *num_dihedral = atom->num_dihedral;
   tagint **dihedral_atom1 = atom->dihedral_atom1;
   tagint **dihedral_atom2 = atom->dihedral_atom2;
   tagint **dihedral_atom3 = atom->dihedral_atom3;
   tagint **dihedral_atom4 = atom->dihedral_atom4;
   tagint *tag = atom->tag;
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
 
   int m = 0;
   for (atom2 = 0; atom2 < nlocal; atom2++) {
     if (!(mask[atom2] & groupbit)) continue;
     for (i = 0; i < num_dihedral[atom2]; i++) {
       if (tag[atom2] != dihedral_atom2[atom2][i]) continue;
       atom1 = atom->map(dihedral_atom1[atom2][i]);
       if (atom1 < 0 || !(mask[atom1] & groupbit)) continue;
       atom3 = atom->map(dihedral_atom3[atom2][i]);
       if (atom3 < 0 || !(mask[atom3] & groupbit)) continue;
       atom4 = atom->map(dihedral_atom4[atom2][i]);
       if (atom4 < 0 || !(mask[atom4] & groupbit)) continue;
 
       if (flag) {
         indices[m][0] = atom2;
         indices[m][1] = i;
       }
       m++;
     }
   }
 
   return m;
 }
 
 /* ----------------------------------------------------------------------
    count impropers on this proc
    only count if 2nd atom is the one storing the improper
    all atoms in interaction must be in group
    all atoms in interaction must be known to proc
 ------------------------------------------------------------------------- */
 
 int ComputePropertyLocal::count_impropers(int flag)
 {
   int i,atom1,atom2,atom3,atom4;
 
   int *num_improper = atom->num_improper;
   tagint **improper_atom1 = atom->improper_atom1;
   tagint **improper_atom2 = atom->improper_atom2;
   tagint **improper_atom3 = atom->improper_atom3;
   tagint **improper_atom4 = atom->improper_atom4;
   tagint *tag = atom->tag;
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
 
   int m = 0;
   for (atom2 = 0; atom2 < nlocal; atom2++) {
     if (!(mask[atom2] & groupbit)) continue;
     for (i = 0; i < num_improper[atom2]; i++) {
       if (tag[atom2] != improper_atom2[atom2][i]) continue;
       atom1 = atom->map(improper_atom1[atom2][i]);
       if (atom1 < 0 || !(mask[atom1] & groupbit)) continue;
       atom3 = atom->map(improper_atom3[atom2][i]);
       if (atom3 < 0 || !(mask[atom3] & groupbit)) continue;
       atom4 = atom->map(improper_atom4[atom2][i]);
       if (atom4 < 0 || !(mask[atom4] & groupbit)) continue;
 
       if (flag) {
         indices[m][0] = atom2;
         indices[m][1] = i;
       }
       m++;
     }
   }
 
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputePropertyLocal::reallocate(int n)
 {
   // grow vector or array and indices array
 
   while (nmax < n) nmax += DELTA;
   if (nvalues == 1) {
     memory->destroy(vector);
     memory->create(vector,nmax,"property/local:vector");
     vector_local = vector;
   } else {
     memory->destroy(array);
     memory->create(array,nmax,nvalues,"property/local:array");
     array_local = array;
   }
 
   memory->destroy(indices);
   memory->create(indices,nmax,2,"property/local:indices");
 }
 
 /* ----------------------------------------------------------------------
    memory usage of local data
 ------------------------------------------------------------------------- */
 
 double ComputePropertyLocal::memory_usage()
 {
   double bytes = nmax*nvalues * sizeof(double);
   bytes += nmax*2 * sizeof(int);
   return bytes;
 }
 
 /* ----------------------------------------------------------------------
    one method for every keyword compute property/local can output
    the atom property is packed into buf starting at n with stride nvalues
    customize a new keyword by adding a method
 ------------------------------------------------------------------------- */
 
 /* ---------------------------------------------------------------------- */
 
 void ComputePropertyLocal::pack_patom1(int n)
 {
   int i;
   tagint *tag = atom->tag;
 
   for (int m = 0; m < ncount; m++) {
     i = indices[m][0];
     buf[n] = tag[i];
     n += nvalues;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputePropertyLocal::pack_patom2(int n)
 {
   int i;
   tagint *tag = atom->tag;
 
   for (int m = 0; m < ncount; m++) {
     i = indices[m][1];
     buf[n] = tag[i];
     n += nvalues;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputePropertyLocal::pack_ptype1(int n)
 {
   int i;
   int *type = atom->type;
 
   for (int m = 0; m < ncount; m++) {
     i = indices[m][0];
     buf[n] = type[i];
     n += nvalues;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputePropertyLocal::pack_ptype2(int n)
 {
   int i;
   int *type = atom->type;
 
   for (int m = 0; m < ncount; m++) {
     i = indices[m][1];
     buf[n] = type[i];
     n += nvalues;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputePropertyLocal::pack_batom1(int n)
 {
   int i;
   tagint *tag = atom->tag;
 
   for (int m = 0; m < ncount; m++) {
     i = indices[m][0];
     buf[n] = tag[i];
     n += nvalues;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputePropertyLocal::pack_batom2(int n)
 {
   int i,j;
   tagint **bond_atom = atom->bond_atom;
 
   for (int m = 0; m < ncount; m++) {
     i = indices[m][0];
     j = indices[m][1];
     buf[n] = bond_atom[i][j];
     n += nvalues;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputePropertyLocal::pack_btype(int n)
 {
   int i,j;
   int **bond_type = atom->bond_type;
 
   for (int m = 0; m < ncount; m++) {
     i = indices[m][0];
     j = indices[m][1];
     buf[n] = bond_type[i][j];
     n += nvalues;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputePropertyLocal::pack_aatom1(int n)
 {
   int i,j;
   tagint **angle_atom1 = atom->angle_atom1;
 
   for (int m = 0; m < ncount; m++) {
     i = indices[m][0];
     j = indices[m][1];
     buf[n] = angle_atom1[i][j];
     n += nvalues;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputePropertyLocal::pack_aatom2(int n)
 {
   int i,j;
   tagint **angle_atom2 = atom->angle_atom2;
 
   for (int m = 0; m < ncount; m++) {
     i = indices[m][0];
     j = indices[m][1];
     buf[n] = angle_atom2[i][j];
     n += nvalues;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputePropertyLocal::pack_aatom3(int n)
 {
   int i,j;
   tagint **angle_atom3 = atom->angle_atom3;
 
   for (int m = 0; m < ncount; m++) {
     i = indices[m][0];
     j = indices[m][1];
     buf[n] = angle_atom3[i][j];
     n += nvalues;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputePropertyLocal::pack_atype(int n)
 {
   int i,j;
   int **angle_type = atom->angle_type;
 
   for (int m = 0; m < ncount; m++) {
     i = indices[m][0];
     j = indices[m][1];
     buf[n] = angle_type[i][j];
     n += nvalues;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputePropertyLocal::pack_datom1(int n)
 {
   int i,j;
   tagint **dihedral_atom1 = atom->dihedral_atom1;
 
   for (int m = 0; m < ncount; m++) {
     i = indices[m][0];
     j = indices[m][1];
     buf[n] = dihedral_atom1[i][j];
     n += nvalues;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputePropertyLocal::pack_datom2(int n)
 {
   int i,j;
   tagint **dihedral_atom2 = atom->dihedral_atom2;
 
   for (int m = 0; m < ncount; m++) {
     i = indices[m][0];
     j = indices[m][1];
     buf[n] = dihedral_atom2[i][j];
     n += nvalues;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputePropertyLocal::pack_datom3(int n)
 {
   int i,j;
   tagint **dihedral_atom3 = atom->dihedral_atom3;
 
   for (int m = 0; m < ncount; m++) {
     i = indices[m][0];
     j = indices[m][1];
     buf[n] = dihedral_atom3[i][j];
     n += nvalues;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputePropertyLocal::pack_datom4(int n)
 {
   int i,j;
   tagint **dihedral_atom4 = atom->dihedral_atom4;
 
   for (int m = 0; m < ncount; m++) {
     i = indices[m][0];
     j = indices[m][1];
     buf[n] = dihedral_atom4[i][j];
     n += nvalues;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputePropertyLocal::pack_dtype(int n)
 {
   int i,j;
   int **dihedral_type = atom->dihedral_type;
 
   for (int m = 0; m < ncount; m++) {
     i = indices[m][0];
     j = indices[m][1];
     buf[n] = dihedral_type[i][j];
     n += nvalues;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputePropertyLocal::pack_iatom1(int n)
 {
   int i,j;
   tagint **improper_atom1 = atom->improper_atom1;
 
   for (int m = 0; m < ncount; m++) {
     i = indices[m][0];
     j = indices[m][1];
     buf[n] = improper_atom1[i][j];
     n += nvalues;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputePropertyLocal::pack_iatom2(int n)
 {
   int i,j;
   tagint **improper_atom2 = atom->improper_atom2;
 
   for (int m = 0; m < ncount; m++) {
     i = indices[m][0];
     j = indices[m][1];
     buf[n] = improper_atom2[i][j];
     n += nvalues;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputePropertyLocal::pack_iatom3(int n)
 {
   int i,j;
   tagint **improper_atom3 = atom->improper_atom3;
 
   for (int m = 0; m < ncount; m++) {
     i = indices[m][0];
     j = indices[m][1];
     buf[n] = improper_atom3[i][j];
     n += nvalues;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputePropertyLocal::pack_iatom4(int n)
 {
   int i,j;
   tagint **improper_atom4 = atom->improper_atom4;
 
   for (int m = 0; m < ncount; m++) {
     i = indices[m][0];
     j = indices[m][1];
     buf[n] = improper_atom4[i][j];
     n += nvalues;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputePropertyLocal::pack_itype(int n)
 {
   int i,j;
   int **improper_type = atom->improper_type;
 
   for (int m = 0; m < ncount; m++) {
     i = indices[m][0];
     j = indices[m][1];
     buf[n] = improper_type[i][j];
     n += nvalues;
   }
 }
diff --git a/src/compute_rdf.cpp b/src/compute_rdf.cpp
index 0893d0232..7c5f433bb 100644
--- a/src/compute_rdf.cpp
+++ b/src/compute_rdf.cpp
@@ -1,310 +1,310 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing authors: Paul Crozier (SNL), Jeff Greathouse (SNL)
 ------------------------------------------------------------------------- */
 
 #include "mpi.h"
 #include "math.h"
 #include "stdlib.h"
 #include "compute_rdf.h"
 #include "atom.h"
 #include "update.h"
 #include "force.h"
 #include "pair.h"
 #include "domain.h"
 #include "neighbor.h"
 #include "neigh_request.h"
 #include "neigh_list.h"
 #include "group.h"
 #include "math_const.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
 /* ---------------------------------------------------------------------- */
 
 ComputeRDF::ComputeRDF(LAMMPS *lmp, int narg, char **arg) :
   Compute(lmp, narg, arg)
 {
   if (narg < 4 || (narg-4) % 2) error->all(FLERR,"Illegal compute rdf command");
 
   array_flag = 1;
   extarray = 0;
 
   nbin = force->inumeric(FLERR,arg[3]);
   if (nbin < 1) error->all(FLERR,"Illegal compute rdf command");
   if (narg == 4) npairs = 1;
   else npairs = (narg-4)/2;
 
   size_array_rows = nbin;
   size_array_cols = 1 + 2*npairs;
 
   int ntypes = atom->ntypes;
   memory->create(rdfpair,npairs,ntypes+1,ntypes+1,"rdf:rdfpair");
   memory->create(nrdfpair,ntypes+1,ntypes+1,"rdf:nrdfpair");
   ilo = new int[npairs];
   ihi = new int[npairs];
   jlo = new int[npairs];
   jhi = new int[npairs];
 
   if (narg == 4) {
     ilo[0] = 1; ihi[0] = ntypes;
     jlo[0] = 1; jhi[0] = ntypes;
     npairs = 1;
 
   } else {
     npairs = 0;
     int iarg = 4;
     while (iarg < narg) {
       force->bounds(arg[iarg],atom->ntypes,ilo[npairs],ihi[npairs]);
       force->bounds(arg[iarg+1],atom->ntypes,jlo[npairs],jhi[npairs]);
       if (ilo[npairs] > ihi[npairs] || jlo[npairs] > jhi[npairs])
         error->all(FLERR,"Illegal compute rdf command");
       npairs++;
       iarg += 2;
     }
   }
 
   int i,j;
   for (i = 1; i <= ntypes; i++)
     for (j = 1; j <= ntypes; j++)
       nrdfpair[i][j] = 0;
 
   for (int m = 0; m < npairs; m++)
     for (i = ilo[m]; i <= ihi[m]; i++)
       for (j = jlo[m]; j <= jhi[m]; j++)
         rdfpair[nrdfpair[i][j]++][i][j] = m;
 
   memory->create(hist,npairs,nbin,"rdf:hist");
   memory->create(histall,npairs,nbin,"rdf:histall");
   memory->create(array,nbin,1+2*npairs,"rdf:array");
   typecount = new int[ntypes+1];
   icount = new int[npairs];
   jcount = new int[npairs];
 }
 
 /* ---------------------------------------------------------------------- */
 
 ComputeRDF::~ComputeRDF()
 {
   memory->destroy(rdfpair);
   memory->destroy(nrdfpair);
   delete [] ilo;
   delete [] ihi;
   delete [] jlo;
   delete [] jhi;
   memory->destroy(hist);
   memory->destroy(histall);
   memory->destroy(array);
   delete [] typecount;
   delete [] icount;
   delete [] jcount;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputeRDF::init()
 {
   int i,m;
 
   if (force->pair) delr = force->pair->cutforce / nbin;
   else error->all(FLERR,"Compute rdf requires a pair style be defined");
   delrinv = 1.0/delr;
 
   // set 1st column of output array to bin coords
 
   for (int i = 0; i < nbin; i++)
     array[i][0] = (i+0.5) * delr;
 
   // count atoms of each type that are also in group
 
   int *mask = atom->mask;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   int ntypes = atom->ntypes;
 
   for (i = 1; i <= ntypes; i++) typecount[i] = 0;
   for (i = 0; i < nlocal; i++)
     if (mask[i] & groupbit) typecount[type[i]]++;
 
   // icount = # of I atoms participating in I,J pairs for each histogram
   // jcount = # of J atoms participating in I,J pairs for each histogram
 
   for (m = 0; m < npairs; m++) {
     icount[m] = 0;
     for (i = ilo[m]; i <= ihi[m]; i++) icount[m] += typecount[i];
     jcount[m] = 0;
     for (i = jlo[m]; i <= jhi[m]; i++) jcount[m] += typecount[i];
   }
 
   int *scratch = new int[npairs];
   MPI_Allreduce(icount,scratch,npairs,MPI_INT,MPI_SUM,world);
   for (i = 0; i < npairs; i++) icount[i] = scratch[i];
   MPI_Allreduce(jcount,scratch,npairs,MPI_INT,MPI_SUM,world);
   for (i = 0; i < npairs; i++) jcount[i] = scratch[i];
   delete [] scratch;
 
   // need an occasional half neighbor list
 
-  int irequest = neighbor->request(this);
+  int irequest = neighbor->request(this,instance_me);
   neighbor->requests[irequest]->pair = 0;
   neighbor->requests[irequest]->compute = 1;
   neighbor->requests[irequest]->occasional = 1;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputeRDF::init_list(int id, NeighList *ptr)
 {
   list = ptr;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputeRDF::compute_array()
 {
   int i,j,m,ii,jj,inum,jnum,itype,jtype,ipair,jpair,ibin,ihisto;
   double xtmp,ytmp,ztmp,delx,dely,delz,r;
   int *ilist,*jlist,*numneigh,**firstneigh;
   double factor_lj,factor_coul;
 
   invoked_array = update->ntimestep;
 
   // invoke half neighbor list (will copy or build if necessary)
 
   neighbor->build_one(list);
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // zero the histogram counts
 
   for (i = 0; i < npairs; i++)
     for (j = 0; j < nbin; j++)
       hist[i][j] = 0;
 
   // tally the RDF
   // both atom i and j must be in fix group
   // itype,jtype must have been specified by user
   // consider I,J as one interaction even if neighbor pair is stored on 2 procs
   // tally I,J pair each time I is central atom, and each time J is central
 
   double **x = atom->x;
   int *type = atom->type;
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
 
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     if (!(mask[i] & groupbit)) continue;
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       // if both weighting factors are 0, skip this pair
       // could be 0 and still be in neigh list for long-range Coulombics
       // want consistency with non-charged pairs which wouldn't be in list
 
       if (factor_lj == 0.0 && factor_coul == 0.0) continue;
 
       if (!(mask[j] & groupbit)) continue;
       jtype = type[j];
       ipair = nrdfpair[itype][jtype];
       jpair = nrdfpair[jtype][itype];
       if (!ipair && !jpair) continue;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       r = sqrt(delx*delx + dely*dely + delz*delz);
       ibin = static_cast<int> (r*delrinv);
       if (ibin >= nbin) continue;
 
       if (ipair)
         for (ihisto = 0; ihisto < ipair; ihisto++)
           hist[rdfpair[ihisto][itype][jtype]][ibin] += 1.0;
       if (newton_pair || j < nlocal) {
         if (jpair)
           for (ihisto = 0; ihisto < jpair; ihisto++)
             hist[rdfpair[ihisto][jtype][itype]][ibin] += 1.0;
       }
     }
   }
 
   // sum histograms across procs
 
   MPI_Allreduce(hist[0],histall[0],npairs*nbin,MPI_DOUBLE,MPI_SUM,world);
 
   // convert counts to g(r) and coord(r) and copy into output array
   // nideal = # of J atoms surrounding single I atom in a single bin
   //   assuming J atoms are at uniform density
 
   double constant,nideal,gr,ncoord,rlower,rupper;
 
   if (domain->dimension == 3) {
     constant = 4.0*MY_PI / (3.0*domain->xprd*domain->yprd*domain->zprd);
 
     for (m = 0; m < npairs; m++) {
       ncoord = 0.0;
       for (ibin = 0; ibin < nbin; ibin++) {
         rlower = ibin*delr;
         rupper = (ibin+1)*delr;
         nideal = constant *
           (rupper*rupper*rupper - rlower*rlower*rlower) * jcount[m];
         if (icount[m]*nideal != 0.0)
           gr = histall[m][ibin] / (icount[m]*nideal);
         else gr = 0.0;
         ncoord += gr*nideal;
         array[ibin][1+2*m] = gr;
         array[ibin][2+2*m] = ncoord;
       }
     }
 
   } else {
     constant = MY_PI / (domain->xprd*domain->yprd);
 
     for (m = 0; m < npairs; m++) {
       ncoord = 0.0;
       for (ibin = 0; ibin < nbin; ibin++) {
         rlower = ibin*delr;
         rupper = (ibin+1)*delr;
         nideal = constant * (rupper*rupper - rlower*rlower) * jcount[m];
         if (icount[m]*nideal != 0.0)
           gr = histall[m][ibin] / (icount[m]*nideal);
         else gr = 0.0;
         ncoord += gr*nideal;
         array[ibin][1+2*m] = gr;
         array[ibin][2+2*m] = ncoord;
       }
     }
   }
 }
diff --git a/src/pair_born_coul_wolf.cpp b/src/pair_born_coul_wolf.cpp
index 682940192..086d9ee57 100644
--- a/src/pair_born_coul_wolf.cpp
+++ b/src/pair_born_coul_wolf.cpp
@@ -1,492 +1,492 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Yongfeng Zhang (INL), yongfeng.zhang@inl.gov
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_born_coul_wolf.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "math_const.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
 /* ---------------------------------------------------------------------- */
 
 PairBornCoulWolf::PairBornCoulWolf(LAMMPS *lmp) : Pair(lmp)
 {
   writedata = 1;
   single_enable = 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 PairBornCoulWolf::~PairBornCoulWolf()
 {
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
 
     memory->destroy(cut_lj);
     memory->destroy(cut_ljsq);
     memory->destroy(a);
     memory->destroy(rho);
     memory->destroy(sigma);
     memory->destroy(c);
     memory->destroy(d);
     memory->destroy(rhoinv);
     memory->destroy(born1);
     memory->destroy(born2);
     memory->destroy(born3);
     memory->destroy(offset);
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBornCoulWolf::compute(int eflag, int vflag)
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
   double rsq,r2inv,r6inv,forcecoul,forceborn,factor_coul,factor_lj;
   double prefactor;
   double r,rexp;
   int *ilist,*jlist,*numneigh,**firstneigh;
   double erfcc,erfcd,v_sh,dvdrr,e_self,e_shift,f_shift,qisq;
 
   evdwl = ecoul = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
   // self and shifted coulombic energy
 
   e_self = v_sh = 0.0;
   e_shift = erfc(alf*cut_coul)/cut_coul;
   f_shift = -(e_shift+ 2.0*alf/MY_PIS * exp(-alf*alf*cut_coul*cut_coul)) /
     cut_coul;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     qisq = qtmp*qtmp;
     e_self = -(e_shift/2.0 + alf/MY_PIS) * qisq*qqrd2e;
     if (evflag) ev_tally(i,i,nlocal,0,0.0,e_self,0.0,0.0,0.0,0.0);
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r2inv = 1.0/rsq;
 
         if (rsq < cut_coulsq) {
           r = sqrt(rsq);
           prefactor = qqrd2e*qtmp*q[j]/r;
           erfcc = erfc(alf*r);
           erfcd = exp(-alf*alf*r*r);
           v_sh = (erfcc - e_shift*r) * prefactor;
           dvdrr = (erfcc/rsq + 2.0*alf/MY_PIS * erfcd/r) + f_shift;
           forcecoul = dvdrr*rsq*prefactor;
           if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
         } else forcecoul = 0.0;
 
         if (rsq < cut_ljsq[itype][jtype]) {
           r6inv = r2inv*r2inv*r2inv;
           r = sqrt(rsq);
           rexp = exp((sigma[itype][jtype]-r)*rhoinv[itype][jtype]);
           forceborn = born1[itype][jtype]*r*rexp - born2[itype][jtype]*r6inv
             + born3[itype][jtype]*r2inv*r6inv;
         } else forceborn = 0.0;
 
         fpair = (forcecoul + factor_lj*forceborn) * r2inv;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
         if (newton_pair || j < nlocal) {
           f[j][0] -= delx*fpair;
           f[j][1] -= dely*fpair;
           f[j][2] -= delz*fpair;
         }
 
         if (eflag) {
           if (rsq < cut_coulsq) {
             ecoul = v_sh;
             if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
           } else ecoul = 0.0;
           if (rsq < cut_ljsq[itype][jtype]) {
             evdwl = a[itype][jtype]*rexp - c[itype][jtype]*r6inv +
               d[itype][jtype]*r6inv*r2inv - offset[itype][jtype];
             evdwl *= factor_lj;
           } else evdwl = 0.0;
         }
 
         if (evflag) ev_tally(i,j,nlocal,newton_pair,
                              evdwl,ecoul,fpair,delx,dely,delz);
       }
     }
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairBornCoulWolf::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
   memory->create(cut_lj,n+1,n+1,"pair:cut_lj");
   memory->create(cut_ljsq,n+1,n+1,"pair:cut_ljsq");
   memory->create(a,n+1,n+1,"pair:a");
   memory->create(rho,n+1,n+1,"pair:rho");
   memory->create(sigma,n+1,n+1,"pair:sigma");
   memory->create(c,n+1,n+1,"pair:c");
   memory->create(d,n+1,n+1,"pair:d");
   memory->create(rhoinv,n+1,n+1,"pair:rhoinv");
   memory->create(born1,n+1,n+1,"pair:born1");
   memory->create(born2,n+1,n+1,"pair:born2");
   memory->create(born3,n+1,n+1,"pair:born3");
   memory->create(offset,n+1,n+1,"pair:offset");
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairBornCoulWolf::settings(int narg, char **arg)
 {
   if (narg < 2 || narg > 3) error->all(FLERR,"Illegal pair_style command");
 
   alf = force->numeric(FLERR,arg[0]);
   cut_lj_global = force->numeric(FLERR,arg[1]);
   if (narg == 2) cut_coul = cut_lj_global;
   else cut_coul = force->numeric(FLERR,arg[2]);
 
   if (allocated) {
     int i,j;
     for (i = 1; i <= atom->ntypes; i++)
       for (j = i+1; j <= atom->ntypes; j++)
         if (setflag[i][j]) cut_lj[i][j] = cut_lj_global;
   }
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairBornCoulWolf::coeff(int narg, char **arg)
 {
   if (narg < 7 || narg > 8)
     error->all(FLERR,"Incorrect args for pair coefficients");
   if (!allocated) allocate();
 
   int ilo,ihi,jlo,jhi;
   force->bounds(arg[0],atom->ntypes,ilo,ihi);
   force->bounds(arg[1],atom->ntypes,jlo,jhi);
 
   double a_one = force->numeric(FLERR,arg[2]);
   double rho_one = force->numeric(FLERR,arg[3]);
   double sigma_one = force->numeric(FLERR,arg[4]);
   if (rho_one <= 0) error->all(FLERR,"Incorrect args for pair coefficients");
   double c_one = force->numeric(FLERR,arg[5]);
   double d_one = force->numeric(FLERR,arg[6]);
 
   double cut_lj_one = cut_lj_global;
   if (narg == 8) cut_lj_one = force->numeric(FLERR,arg[7]);
 
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
     for (int j = MAX(jlo,i); j <= jhi; j++) {
       a[i][j] = a_one;
       rho[i][j] = rho_one;
       sigma[i][j] = sigma_one;
       c[i][j] = c_one;
       d[i][j] = d_one;
       cut_lj[i][j] = cut_lj_one;
       setflag[i][j] = 1;
       count++;
     }
   }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairBornCoulWolf::init_style()
 {
   if (!atom->q_flag)
     error->all(FLERR,"Pair style born/coul/wolf requires atom attribute q");
 
-  neighbor->request(this);
+  neighbor->request(this,instance_me);
 
   cut_coulsq = cut_coul * cut_coul;
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairBornCoulWolf::init_one(int i, int j)
 {
   if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set");
 
   double cut = MAX(cut_lj[i][j],cut_coul);
   cut_ljsq[i][j] = cut_lj[i][j] * cut_lj[i][j];
 
   rhoinv[i][j] = 1.0/rho[i][j];
   born1[i][j] = a[i][j]/rho[i][j];
   born2[i][j] = 6.0*c[i][j];
   born3[i][j] = 8.0*d[i][j];
 
   if (offset_flag) {
     double rexp = exp((sigma[i][j]-cut_lj[i][j])*rhoinv[i][j]);
     offset[i][j] = a[i][j]*rexp - c[i][j]/pow(cut_lj[i][j],6.0)
       + d[i][j]/pow(cut_lj[i][j],8.0);
   } else offset[i][j] = 0.0;
 
   cut_ljsq[j][i] = cut_ljsq[i][j];
   a[j][i] = a[i][j];
   c[j][i] = c[i][j];
   d[j][i] = d[i][j];
   rhoinv[j][i] = rhoinv[i][j];
   sigma[j][i] = sigma[i][j];
   born1[j][i] = born1[i][j];
   born2[j][i] = born2[i][j];
   born3[j][i] = born3[i][j];
   offset[j][i] = offset[i][j];
 
   return cut;
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairBornCoulWolf::write_restart(FILE *fp)
 {
   write_restart_settings(fp);
 
   int i,j;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       fwrite(&setflag[i][j],sizeof(int),1,fp);
       if (setflag[i][j]) {
         fwrite(&a[i][j],sizeof(double),1,fp);
         fwrite(&rho[i][j],sizeof(double),1,fp);
         fwrite(&sigma[i][j],sizeof(double),1,fp);
         fwrite(&c[i][j],sizeof(double),1,fp);
         fwrite(&d[i][j],sizeof(double),1,fp);
         fwrite(&cut_lj[i][j],sizeof(double),1,fp);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairBornCoulWolf::read_restart(FILE *fp)
 {
   read_restart_settings(fp);
 
   allocate();
 
   int i,j;
   int me = comm->me;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
       MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
       if (setflag[i][j]) {
         if (me == 0) {
           fread(&a[i][j],sizeof(double),1,fp);
           fread(&rho[i][j],sizeof(double),1,fp);
           fread(&sigma[i][j],sizeof(double),1,fp);
           fread(&c[i][j],sizeof(double),1,fp);
           fread(&d[i][j],sizeof(double),1,fp);
           fread(&cut_lj[i][j],sizeof(double),1,fp);
         }
         MPI_Bcast(&a[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&rho[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&sigma[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&c[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&d[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&cut_lj[i][j],1,MPI_DOUBLE,0,world);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairBornCoulWolf::write_restart_settings(FILE *fp)
 {
   fwrite(&alf,sizeof(double),1,fp);
   fwrite(&cut_lj_global,sizeof(double),1,fp);
   fwrite(&cut_coul,sizeof(double),1,fp);
   fwrite(&offset_flag,sizeof(int),1,fp);
   fwrite(&mix_flag,sizeof(int),1,fp);
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairBornCoulWolf::read_restart_settings(FILE *fp)
 {
   if (comm->me == 0) {
     fread(&alf,sizeof(double),1,fp);
     fread(&cut_lj_global,sizeof(double),1,fp);
     fread(&cut_coul,sizeof(double),1,fp);
     fread(&offset_flag,sizeof(int),1,fp);
     fread(&mix_flag,sizeof(int),1,fp);
   }
   MPI_Bcast(&alf,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&cut_lj_global,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&cut_coul,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&offset_flag,1,MPI_INT,0,world);
   MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes to data file
 ------------------------------------------------------------------------- */
 
 void PairBornCoulWolf::write_data(FILE *fp)
 {
   for (int i = 1; i <= atom->ntypes; i++)
     fprintf(fp,"%d %g %g %g %g %g\n",i,
             a[i][i],rho[i][i],sigma[i][i],c[i][i],d[i][i]);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes all pairs to data file
 ------------------------------------------------------------------------- */
 
 void PairBornCoulWolf::write_data_all(FILE *fp)
 {
   for (int i = 1; i <= atom->ntypes; i++)
     for (int j = i; j <= atom->ntypes; j++)
       fprintf(fp,"%d %d %g %g %g %g %g %g\n",i,j,
               a[i][j],rho[i][j],sigma[i][j],c[i][j],d[i][j],cut_lj[i][j]);
 }
 
 /* ----------------------------------------------------------------------
    only the pair part is calculated here
 ------------------------------------------------------------------------- */
 
 double PairBornCoulWolf::single(int i, int j, int itype, int jtype,
                                 double rsq,
                                 double factor_coul, double factor_lj,
                                 double &fforce)
 {
   double r2inv,r6inv,r,prefactor,rexp;
   double forcecoul,forceborn,phicoul,phiborn;
   double e_shift,f_shift,dvdrr,erfcc,erfcd;
 
   r2inv = 1.0/rsq;
   e_shift = erfc(alf*cut_coul) / cut_coul;
   f_shift = -(e_shift+2*alf/MY_PIS * exp(-alf*alf*cut_coul*cut_coul)) /
     cut_coul;
 
   if (rsq < cut_coulsq) {
     r = sqrt(rsq);
     prefactor = force->qqrd2e * atom->q[i]*atom->q[j]/r;
     erfcc = erfc(alf*r);
     erfcd = exp(-alf*alf*r*r);
     dvdrr = (erfcc/rsq + 2.0*alf/MY_PIS * erfcd/r) + f_shift;
     forcecoul = dvdrr*rsq*prefactor;
     if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
   } else forcecoul = 0.0;
 
   if (rsq < cut_ljsq[itype][jtype]) {
     r6inv = r2inv*r2inv*r2inv;
     r = sqrt(rsq);
     rexp = exp(-r*rhoinv[itype][jtype]);
     forceborn = born1[itype][jtype]*r*rexp - born2[itype][jtype]*r6inv
       + born3[itype][jtype]*r2inv*r6inv;
   } else forceborn = 0.0;
 
   fforce = (forcecoul + factor_lj*forceborn) * r2inv;
 
   double eng = 0.0;
   if (rsq < cut_coulsq) {
     phicoul = prefactor * (erfcc-e_shift*r);
     if (factor_coul < 1.0) phicoul -= (1.0-factor_coul)*prefactor;
     eng += phicoul;
   }
   if (rsq < cut_ljsq[itype][jtype]) {
     phiborn = a[itype][jtype]*rexp - c[itype][jtype]*r6inv
       + d[itype][jtype]*r2inv*r6inv - offset[itype][jtype];
     eng += factor_lj*phiborn;
   }
   return eng;
 }
diff --git a/src/pair_buck_coul_cut.cpp b/src/pair_buck_coul_cut.cpp
index ed5a24df0..0faf6a6f4 100644
--- a/src/pair_buck_coul_cut.cpp
+++ b/src/pair_buck_coul_cut.cpp
@@ -1,472 +1,472 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Eduardo Bringa (LLNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "pair_buck_coul_cut.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "math_const.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
 /* ---------------------------------------------------------------------- */
 
 PairBuckCoulCut::PairBuckCoulCut(LAMMPS *lmp) : Pair(lmp)
 {
   writedata = 1;
 }
 
 /* ---------------------------------------------------------------------- */
 
 PairBuckCoulCut::~PairBuckCoulCut()
 {
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
 
     memory->destroy(cut_lj);
     memory->destroy(cut_ljsq);
     memory->destroy(cut_coul);
     memory->destroy(cut_coulsq);
     memory->destroy(a);
     memory->destroy(rho);
     memory->destroy(c);
     memory->destroy(rhoinv);
     memory->destroy(buck1);
     memory->destroy(buck2);
     memory->destroy(offset);
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairBuckCoulCut::compute(int eflag, int vflag)
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
   double rsq,r2inv,r6inv,forcecoul,forcebuck,factor_coul,factor_lj;
   double r,rexp;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   evdwl = ecoul = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r2inv = 1.0/rsq;
         r = sqrt(rsq);
 
         if (rsq < cut_coulsq[itype][jtype])
           forcecoul = qqrd2e * qtmp*q[j]/r;
         else forcecoul = 0.0;
 
         if (rsq < cut_ljsq[itype][jtype]) {
           r6inv = r2inv*r2inv*r2inv;
           rexp = exp(-r*rhoinv[itype][jtype]);
           forcebuck = buck1[itype][jtype]*r*rexp - buck2[itype][jtype]*r6inv;
         } else forcebuck = 0.0;
 
         fpair = (factor_coul*forcecoul + factor_lj*forcebuck) * r2inv;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
         if (newton_pair || j < nlocal) {
           f[j][0] -= delx*fpair;
           f[j][1] -= dely*fpair;
           f[j][2] -= delz*fpair;
         }
 
         if (eflag) {
           if (rsq < cut_coulsq[itype][jtype])
             ecoul = factor_coul * qqrd2e * qtmp*q[j]/r;
           else ecoul = 0.0;
           if (rsq < cut_ljsq[itype][jtype]) {
             evdwl = a[itype][jtype]*rexp - c[itype][jtype]*r6inv -
               offset[itype][jtype];
             evdwl *= factor_lj;
           } else evdwl = 0.0;
         }
 
         if (evflag) ev_tally(i,j,nlocal,newton_pair,
                              evdwl,ecoul,fpair,delx,dely,delz);
       }
     }
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairBuckCoulCut::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
 
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
   memory->create(cut_lj,n+1,n+1,"pair:cut_lj");
   memory->create(cut_ljsq,n+1,n+1,"pair:cut_ljsq");
   memory->create(cut_coul,n+1,n+1,"pair:cut_coul");
   memory->create(cut_coulsq,n+1,n+1,"pair:cut_coulsq");
   memory->create(a,n+1,n+1,"pair:a");
   memory->create(rho,n+1,n+1,"pair:rho");
   memory->create(c,n+1,n+1,"pair:c");
   memory->create(rhoinv,n+1,n+1,"pair:rhoinv");
   memory->create(buck1,n+1,n+1,"pair:buck1");
   memory->create(buck2,n+1,n+1,"pair:buck2");
   memory->create(offset,n+1,n+1,"pair:offset");
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairBuckCoulCut::settings(int narg, char **arg)
 {
   if (narg < 1 || narg > 2) error->all(FLERR,"Illegal pair_style command");
 
   cut_lj_global = force->numeric(FLERR,arg[0]);
   if (narg == 1) cut_coul_global = cut_lj_global;
   else cut_coul_global = force->numeric(FLERR,arg[1]);
 
   // reset cutoffs that have been explicitly set
 
   if (allocated) {
     int i,j;
     for (i = 1; i <= atom->ntypes; i++)
       for (j = i+1; j <= atom->ntypes; j++)
         if (setflag[i][j]) {
           cut_lj[i][j] = cut_lj_global;
           cut_coul[i][j] = cut_coul_global;
         }
   }
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairBuckCoulCut::coeff(int narg, char **arg)
 {
   if (narg < 5 || narg > 7) 
     error->all(FLERR,"Incorrect args for pair coefficients");
   if (!allocated) allocate();
 
   int ilo,ihi,jlo,jhi;
   force->bounds(arg[0],atom->ntypes,ilo,ihi);
   force->bounds(arg[1],atom->ntypes,jlo,jhi);
 
   double a_one = force->numeric(FLERR,arg[2]);
   double rho_one = force->numeric(FLERR,arg[3]);
   if (rho_one <= 0) error->all(FLERR,"Incorrect args for pair coefficients");
   double c_one = force->numeric(FLERR,arg[4]);
 
   double cut_lj_one = cut_lj_global;
   double cut_coul_one = cut_coul_global;
   if (narg >= 6) cut_coul_one = cut_lj_one = force->numeric(FLERR,arg[5]);
   if (narg == 7) cut_coul_one = force->numeric(FLERR,arg[6]);
 
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
     for (int j = MAX(jlo,i); j <= jhi; j++) {
       a[i][j] = a_one;
       rho[i][j] = rho_one;
       c[i][j] = c_one;
       cut_lj[i][j] = cut_lj_one;
       cut_coul[i][j] = cut_coul_one;
       setflag[i][j] = 1;
       count++;
     }
   }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairBuckCoulCut::init_style()
 {
   if (!atom->q_flag)
     error->all(FLERR,"Pair style buck/coul/cut requires atom attribute q");
 
-  neighbor->request(this);
+  neighbor->request(this,instance_me);
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairBuckCoulCut::init_one(int i, int j)
 {
   if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set");
 
   double cut = MAX(cut_lj[i][j],cut_coul[i][j]);
   cut_ljsq[i][j] = cut_lj[i][j] * cut_lj[i][j];
   cut_coulsq[i][j] = cut_coul[i][j] * cut_coul[i][j];
 
   rhoinv[i][j] = 1.0/rho[i][j];
   buck1[i][j] = a[i][j]/rho[i][j];
   buck2[i][j] = 6.0*c[i][j];
 
   if (offset_flag) {
     double rexp = exp(-cut_lj[i][j]/rho[i][j]);
     offset[i][j] = a[i][j]*rexp - c[i][j]/pow(cut_lj[i][j],6.0);
   } else offset[i][j] = 0.0;
 
   cut_ljsq[j][i] = cut_ljsq[i][j];
   cut_coulsq[j][i] = cut_coulsq[i][j];
   a[j][i] = a[i][j];
   c[j][i] = c[i][j];
   rhoinv[j][i] = rhoinv[i][j];
   buck1[j][i] = buck1[i][j];
   buck2[j][i] = buck2[i][j];
   offset[j][i] = offset[i][j];
 
   // compute I,J contribution to long-range tail correction
   // count total # of atoms of type I and J via Allreduce
 
   if (tail_flag) {
     int *type = atom->type;
     int nlocal = atom->nlocal;
 
     double count[2],all[2];
     count[0] = count[1] = 0.0;
     for (int k = 0; k < nlocal; k++) {
       if (type[k] == i) count[0] += 1.0;
       if (type[k] == j) count[1] += 1.0;
     }
     MPI_Allreduce(count,all,2,MPI_DOUBLE,MPI_SUM,world);
 
     double rho1 = rho[i][j];
     double rho2 = rho1*rho1;
     double rho3 = rho2*rho1;
     double rc = cut_lj[i][j];
     double rc2 = rc*rc;
     double rc3 = rc2*rc;
     etail_ij = 2.0*MY_PI*all[0]*all[1]*
       (a[i][j]*exp(-rc/rho1)*rho1*(rc2 + 2.0*rho1*rc + 2.0*rho2) -
        c[i][j]/(3.0*rc3));
     ptail_ij = (-1/3.0)*2.0*MY_PI*all[0]*all[1]*
       (-a[i][j]*exp(-rc/rho1)*
        (rc3 + 3.0*rho1*rc2 + 6.0*rho2*rc + 6.0*rho3) + 2.0*c[i][j]/rc3);
   }
 
   return cut;
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairBuckCoulCut::write_restart(FILE *fp)
 {
   write_restart_settings(fp);
 
   int i,j;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       fwrite(&setflag[i][j],sizeof(int),1,fp);
       if (setflag[i][j]) {
         fwrite(&a[i][j],sizeof(double),1,fp);
         fwrite(&rho[i][j],sizeof(double),1,fp);
         fwrite(&c[i][j],sizeof(double),1,fp);
         fwrite(&cut_lj[i][j],sizeof(double),1,fp);
         fwrite(&cut_coul[i][j],sizeof(double),1,fp);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairBuckCoulCut::read_restart(FILE *fp)
 {
   read_restart_settings(fp);
 
   allocate();
 
   int i,j;
   int me = comm->me;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
       MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
       if (setflag[i][j]) {
         if (me == 0) {
           fread(&a[i][j],sizeof(double),1,fp);
           fread(&rho[i][j],sizeof(double),1,fp);
           fread(&c[i][j],sizeof(double),1,fp);
           fread(&cut_lj[i][j],sizeof(double),1,fp);
           fread(&cut_coul[i][j],sizeof(double),1,fp);
         }
         MPI_Bcast(&a[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&rho[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&c[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&cut_lj[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&cut_coul[i][j],1,MPI_DOUBLE,0,world);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairBuckCoulCut::write_restart_settings(FILE *fp)
 {
   fwrite(&cut_lj_global,sizeof(double),1,fp);
   fwrite(&cut_coul_global,sizeof(double),1,fp);
   fwrite(&offset_flag,sizeof(int),1,fp);
   fwrite(&mix_flag,sizeof(int),1,fp);
   fwrite(&tail_flag,sizeof(int),1,fp);
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairBuckCoulCut::read_restart_settings(FILE *fp)
 {
   if (comm->me == 0) {
     fread(&cut_lj_global,sizeof(double),1,fp);
     fread(&cut_coul_global,sizeof(double),1,fp);
     fread(&offset_flag,sizeof(int),1,fp);
     fread(&mix_flag,sizeof(int),1,fp);
     fread(&tail_flag,sizeof(int),1,fp);
   }
   MPI_Bcast(&cut_lj_global,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&cut_coul_global,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&offset_flag,1,MPI_INT,0,world);
   MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
   MPI_Bcast(&tail_flag,1,MPI_INT,0,world);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes to data file
 ------------------------------------------------------------------------- */
 
 void PairBuckCoulCut::write_data(FILE *fp)
 {
   for (int i = 1; i <= atom->ntypes; i++)
     fprintf(fp,"%d %g %g %g\n",i,a[i][i],rho[i][i],c[i][i]);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes all pairs to data file
 ------------------------------------------------------------------------- */
 
 void PairBuckCoulCut::write_data_all(FILE *fp)
 {
   for (int i = 1; i <= atom->ntypes; i++)
     for (int j = i; j <= atom->ntypes; j++)
       fprintf(fp,"%d %d %g %g %g %g %g\n",i,j,
               a[i][j],rho[i][j],c[i][j],cut_lj[i][j],cut_coul[i][j]);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairBuckCoulCut::single(int i, int j, int itype, int jtype,
                                double rsq,
                                double factor_coul, double factor_lj,
                                double &fforce)
 {
   double r2inv,r6inv,r,rexp,forcecoul,forcebuck,phicoul,phibuck;
 
   r2inv = 1.0/rsq;
   if (rsq < cut_coulsq[itype][jtype])
     forcecoul = force->qqrd2e * atom->q[i]*atom->q[j]*sqrt(r2inv);
   else forcecoul = 0.0;
   if (rsq < cut_ljsq[itype][jtype]) {
     r6inv = r2inv*r2inv*r2inv;
     r = sqrt(rsq);
     rexp = exp(-r*rhoinv[itype][jtype]);
     forcebuck = buck1[itype][jtype]*r*rexp - buck2[itype][jtype]*r6inv;
   } else forcebuck = 0.0;
   fforce = (factor_coul*forcecoul + factor_lj*forcebuck) * r2inv;
 
   double eng = 0.0;
   if (rsq < cut_coulsq[itype][jtype]) {
     phicoul = force->qqrd2e * atom->q[i]*atom->q[j]*sqrt(r2inv);
     eng += factor_coul*phicoul;
   }
   if (rsq < cut_ljsq[itype][jtype]) {
     phibuck = a[itype][jtype]*rexp - c[itype][jtype]*r6inv -
       offset[itype][jtype];
     eng += factor_lj*phibuck;
   }
   return eng;
 }
diff --git a/src/pair_coul_cut.cpp b/src/pair_coul_cut.cpp
index cd893a0d9..4a54f2f2a 100644
--- a/src/pair_coul_cut.cpp
+++ b/src/pair_coul_cut.cpp
@@ -1,309 +1,309 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_coul_cut.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
 PairCoulCut::PairCoulCut(LAMMPS *lmp) : Pair(lmp) {}
 
 /* ---------------------------------------------------------------------- */
 
 PairCoulCut::~PairCoulCut()
 {
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
 
     memory->destroy(cut);
     memory->destroy(scale);
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairCoulCut::compute(int eflag, int vflag)
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,ecoul,fpair;
   double rsq,r2inv,rinv,forcecoul,factor_coul;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   ecoul = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r2inv = 1.0/rsq;
         rinv = sqrt(r2inv);
         forcecoul = qqrd2e * scale[itype][jtype] * qtmp*q[j]*rinv;
         fpair = factor_coul*forcecoul * r2inv;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
         if (newton_pair || j < nlocal) {
           f[j][0] -= delx*fpair;
           f[j][1] -= dely*fpair;
           f[j][2] -= delz*fpair;
         }
 
         if (eflag)
           ecoul = factor_coul * qqrd2e * scale[itype][jtype] * qtmp*q[j]*rinv;
 
         if (evflag) ev_tally(i,j,nlocal,newton_pair,
                              0.0,ecoul,fpair,delx,dely,delz);
       }
     }
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairCoulCut::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
   memory->create(cut,n+1,n+1,"pair:cut");
   memory->create(scale,n+1,n+1,"pair:scale");
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairCoulCut::settings(int narg, char **arg)
 {
   if (narg != 1) error->all(FLERR,"Illegal pair_style command");
 
   cut_global = force->numeric(FLERR,arg[0]);
 
   // reset cutoffs that have been explicitly set
 
   if (allocated) {
     int i,j;
     for (i = 1; i <= atom->ntypes; i++)
       for (j = i+1; j <= atom->ntypes; j++)
         if (setflag[i][j]) cut[i][j] = cut_global;
   }
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairCoulCut::coeff(int narg, char **arg)
 {
   if (narg < 2 || narg > 3) 
     error->all(FLERR,"Incorrect args for pair coefficients");
   if (!allocated) allocate();
 
   int ilo,ihi,jlo,jhi;
   force->bounds(arg[0],atom->ntypes,ilo,ihi);
   force->bounds(arg[1],atom->ntypes,jlo,jhi);
 
   double cut_one = cut_global;
   if (narg == 3) cut_one = force->numeric(FLERR,arg[2]);
 
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
     for (int j = MAX(jlo,i); j <= jhi; j++) {
       cut[i][j] = cut_one;
       scale[i][j] = 1.0;
       setflag[i][j] = 1;
       count++;
     }
   }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairCoulCut::init_style()
 {
   if (!atom->q_flag)
     error->all(FLERR,"Pair style coul/cut requires atom attribute q");
 
-  neighbor->request(this);
+  neighbor->request(this,instance_me);
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairCoulCut::init_one(int i, int j)
 {
   if (setflag[i][j] == 0)
     cut[i][j] = mix_distance(cut[i][i],cut[j][j]);
 
   scale[j][i] = scale[i][j];
 
   return cut[i][j];
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairCoulCut::write_restart(FILE *fp)
 {
   write_restart_settings(fp);
 
   int i,j;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       fwrite(&setflag[i][j],sizeof(int),1,fp);
       if (setflag[i][j]) fwrite(&cut[i][j],sizeof(double),1,fp);
     }
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairCoulCut::read_restart(FILE *fp)
 {
   read_restart_settings(fp);
   allocate();
 
   int i,j;
   int me = comm->me;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
       MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
       if (setflag[i][j]) {
         if (me == 0) fread(&cut[i][j],sizeof(double),1,fp);
         MPI_Bcast(&cut[i][j],1,MPI_DOUBLE,0,world);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairCoulCut::write_restart_settings(FILE *fp)
 {
   fwrite(&cut_global,sizeof(double),1,fp);
   fwrite(&offset_flag,sizeof(int),1,fp);
   fwrite(&mix_flag,sizeof(int),1,fp);
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairCoulCut::read_restart_settings(FILE *fp)
 {
   if (comm->me == 0) {
     fread(&cut_global,sizeof(double),1,fp);
     fread(&offset_flag,sizeof(int),1,fp);
     fread(&mix_flag,sizeof(int),1,fp);
   }
   MPI_Bcast(&cut_global,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&offset_flag,1,MPI_INT,0,world);
   MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairCoulCut::single(int i, int j, int itype, int jtype,
                            double rsq, double factor_coul, double factor_lj,
                            double &fforce)
 {
   double r2inv,rinv,forcecoul,phicoul;
 
   r2inv = 1.0/rsq;
   rinv = sqrt(r2inv);
   forcecoul = force->qqrd2e * atom->q[i]*atom->q[j]*rinv;
   fforce = factor_coul*forcecoul * r2inv;
 
   phicoul = force->qqrd2e * atom->q[i]*atom->q[j]*rinv;
   return factor_coul*phicoul;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void *PairCoulCut::extract(const char *str, int &dim)
 {
   dim = 2;
   if (strcmp(str,"scale") == 0) return (void *) scale;
   return NULL;
 }
diff --git a/src/pair_coul_dsf.cpp b/src/pair_coul_dsf.cpp
index 09550cae1..0aba5548d 100644
--- a/src/pair_coul_dsf.cpp
+++ b/src/pair_coul_dsf.cpp
@@ -1,331 +1,331 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under 
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Trung Dac Nguyen (ORNL)
    References: Fennell and Gezelter, JCP 124, 234104 (2006)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_coul_dsf.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "memory.h"
 #include "math_const.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
 #define EWALD_F   1.12837917
 #define EWALD_P   0.3275911
 #define A1        0.254829592
 #define A2       -0.284496736
 #define A3        1.421413741
 #define A4       -1.453152027
 #define A5        1.061405429
 
 /* ---------------------------------------------------------------------- */
 
 PairCoulDSF::PairCoulDSF(LAMMPS *lmp) : Pair(lmp)
 {
   single_enable = 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 PairCoulDSF::~PairCoulDSF()
 {
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairCoulDSF::compute(int eflag, int vflag)
 {
   int i,j,ii,jj,inum,jnum;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,ecoul,fpair;
   double r,rsq,r2inv,forcecoul,factor_coul;
   double prefactor,erfcc,erfcd,e_self,t;
   int *ilist,*jlist,*numneigh,**firstneigh;
   
   ecoul = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
   
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
   
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     jlist = firstneigh[i];
     jnum = numneigh[i];
     
     if (eflag) {
       e_self = -(e_shift/2.0 + alpha/MY_PIS) * qtmp*qtmp*qqrd2e;
       ev_tally(i,i,nlocal,0,0.0,e_self,0.0,0.0,0.0,0.0);
     }
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
 
       if (rsq < cut_coulsq) {
         r2inv = 1.0/rsq;
 
         r = sqrt(rsq);
         prefactor = factor_coul * qqrd2e*qtmp*q[j]/r;
         erfcd = exp(-alpha*alpha*rsq);
         t = 1.0 / (1.0 + EWALD_P*alpha*r);
         erfcc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * erfcd;
         forcecoul = prefactor * (erfcc/r + 2.0*alpha/MY_PIS * erfcd + 
                                  r*f_shift) * r;
 
         fpair = forcecoul * r2inv;
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
         if (newton_pair || j < nlocal) {
           f[j][0] -= delx*fpair;
           f[j][1] -= dely*fpair;
           f[j][2] -= delz*fpair;
         }
 
         if (eflag) {
           ecoul = prefactor * (erfcc - r*e_shift - rsq*f_shift);
         } else ecoul = 0.0;
 
         if (evflag) ev_tally(i,j,nlocal,newton_pair,
                              0.0,ecoul,fpair,delx,dely,delz);
       }
     }
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairCoulDSF::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairCoulDSF::settings(int narg, char **arg)
 {
   if (narg != 2) error->all(FLERR,"Illegal pair_style command");
 
   alpha = force->numeric(FLERR,arg[0]);
   cut_coul = force->numeric(FLERR,arg[1]);
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairCoulDSF::coeff(int narg, char **arg)
 {
   if (narg != 2) error->all(FLERR,"Incorrect args for pair coefficients");
   if (!allocated) allocate();
 
   int ilo,ihi,jlo,jhi;
   force->bounds(arg[0],atom->ntypes,ilo,ihi);
   force->bounds(arg[1],atom->ntypes,jlo,jhi);
   
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
     for (int j = MAX(jlo,i); j <= jhi; j++) {
       setflag[i][j] = 1;
       count++;
     }
   }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairCoulDSF::init_style()
 {
   if (!atom->q_flag)
     error->all(FLERR,"Pair style coul/dsf requires atom attribute q");
 
-  neighbor->request(this);
+  neighbor->request(this,instance_me);
 
   cut_coulsq = cut_coul * cut_coul;
   double erfcc = erfc(alpha*cut_coul); 
   double erfcd = exp(-alpha*alpha*cut_coul*cut_coul);
   f_shift = -(erfcc/cut_coulsq + 2.0/MY_PIS*alpha*erfcd/cut_coul); 
   e_shift = erfcc/cut_coul - f_shift*cut_coul; 
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairCoulDSF::init_one(int i, int j)
 {
   return cut_coul;
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairCoulDSF::write_restart(FILE *fp)
 {
   write_restart_settings(fp);
 
   int i,j;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       fwrite(&setflag[i][j],sizeof(int),1,fp);
     }
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairCoulDSF::read_restart(FILE *fp)
 {
   read_restart_settings(fp);
   allocate();
 
   int i,j;
   int me = comm->me;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
       MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
     }
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairCoulDSF::write_restart_settings(FILE *fp)
 {
   fwrite(&alpha,sizeof(double),1,fp);
   fwrite(&cut_coul,sizeof(double),1,fp);
   fwrite(&offset_flag,sizeof(int),1,fp);
   fwrite(&mix_flag,sizeof(int),1,fp);
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairCoulDSF::read_restart_settings(FILE *fp)
 {
   if (comm->me == 0) {
     fread(&alpha,sizeof(double),1,fp);
     fread(&cut_coul,sizeof(double),1,fp);
     fread(&offset_flag,sizeof(int),1,fp);
     fread(&mix_flag,sizeof(int),1,fp);
   }
   MPI_Bcast(&alpha,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&cut_coul,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&offset_flag,1,MPI_INT,0,world);
   MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairCoulDSF::single(int i, int j, int itype, int jtype, double rsq,
                            double factor_coul, double factor_lj,
                            double &fforce)
 {
   double r2inv,r,erfcc,erfcd,prefactor,t;
   double forcecoul,phicoul;
   
   r2inv = 1.0/rsq;
 
   double eng = 0.0;
   if (rsq < cut_coulsq) {
     r = sqrt(rsq);
     prefactor = factor_coul * force->qqrd2e * atom->q[i]*atom->q[j]/r;
     erfcd = exp(-alpha*alpha*rsq);
     t = 1.0 / (1.0 + EWALD_P*alpha*r);
     erfcc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * erfcd;
 	
     forcecoul = prefactor * (erfcc/r + 2.0*alpha/MY_PIS*erfcd + 
       r*f_shift) * r;
 
     phicoul = prefactor * (erfcc - r*e_shift - rsq*f_shift);
     eng += phicoul;
   } else forcecoul = 0.0;
   
   fforce = forcecoul * r2inv;
       
   return eng;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void *PairCoulDSF::extract(const char *str, int &dim)
 {
   if (strcmp(str,"cut_coul") == 0) {
     dim = 0;
     return (void *) &cut_coul;
   }
   return NULL;
 }
diff --git a/src/pair_coul_wolf.cpp b/src/pair_coul_wolf.cpp
index 250a8bb1d..799b672a4 100644
--- a/src/pair_coul_wolf.cpp
+++ b/src/pair_coul_wolf.cpp
@@ -1,323 +1,323 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Yongfeng Zhang (INL), yongfeng.zhang@inl.gov
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_coul_wolf.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "math_const.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
 /* ---------------------------------------------------------------------- */
 
 PairCoulWolf::PairCoulWolf(LAMMPS *lmp) : Pair(lmp)
 {
   single_enable = 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 PairCoulWolf::~PairCoulWolf()
 {
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairCoulWolf::compute(int eflag, int vflag)
 {
   int i,j,ii,jj,inum,jnum;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,ecoul,fpair;
   double rsq,forcecoul,factor_coul;
   double prefactor;
   double r;
   int *ilist,*jlist,*numneigh,**firstneigh;
   double erfcc,erfcd,v_sh,dvdrr,e_self,e_shift,f_shift,qisq;
 
   ecoul = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
   // self and shifted coulombic energy
 
   e_self = v_sh = 0.0;
   e_shift = erfc(alf*cut_coul)/cut_coul;
   f_shift = -(e_shift+ 2.0*alf/MY_PIS * exp(-alf*alf*cut_coul*cut_coul)) /
     cut_coul;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     qisq = qtmp*qtmp;
     e_self = -(e_shift/2.0 + alf/MY_PIS) * qisq*qqrd2e;
     if (evflag) ev_tally(i,i,nlocal,0,0.0,e_self,0.0,0.0,0.0,0.0);
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
 
       if (rsq < cut_coulsq) {
         r = sqrt(rsq);
         prefactor = qqrd2e*qtmp*q[j]/r;
         erfcc = erfc(alf*r);
         erfcd = exp(-alf*alf*r*r);
         v_sh = (erfcc - e_shift*r) * prefactor;
         dvdrr = (erfcc/rsq + 2.0*alf/MY_PIS * erfcd/r) + f_shift;
         forcecoul = dvdrr*rsq*prefactor;
         if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
         fpair = forcecoul / rsq;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
         if (newton_pair || j < nlocal) {
           f[j][0] -= delx*fpair;
           f[j][1] -= dely*fpair;
           f[j][2] -= delz*fpair;
         }
 
         if (eflag) {
           if (rsq < cut_coulsq) {
             ecoul = v_sh;
             if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
           } else ecoul = 0.0;
         }
 
         if (evflag) ev_tally(i,j,nlocal,newton_pair,
                              0.0,ecoul,fpair,delx,dely,delz);
       }
     }
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairCoulWolf::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 }
 
 /* ----------------------------------------------------------------------
    global settings
    unlike other pair styles,
      there are no individual pair settings that these override
 ------------------------------------------------------------------------- */
 
 void PairCoulWolf::settings(int narg, char **arg)
 {
   if (narg != 2) error->all(FLERR,"Illegal pair_style command");
 
   alf = force->numeric(FLERR,arg[0]);
   cut_coul = force->numeric(FLERR,arg[1]);
 }
 
 /* ----------------------------------------------------------------------
    set cutoffs for one or more type pairs, optional
 ------------------------------------------------------------------------- */
 
 void PairCoulWolf::coeff(int narg, char **arg)
 {
   if (narg != 2) error->all(FLERR,"Incorrect args for pair coefficients");
   if (!allocated) allocate();
 
   int ilo,ihi,jlo,jhi;
   force->bounds(arg[0],atom->ntypes,ilo,ihi);
   force->bounds(arg[1],atom->ntypes,jlo,jhi);
 
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
     for (int j = MAX(jlo,i); j <= jhi; j++) {
       setflag[i][j] = 1;
       count++;
     }
   }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairCoulWolf::init_style()
 {
   if (!atom->q_flag)
     error->all(FLERR,"Pair coul/wolf requires atom attribute q");
 
-  neighbor->request(this);
+  neighbor->request(this,instance_me);
 
   cut_coulsq = cut_coul*cut_coul;
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairCoulWolf::init_one(int i, int j)
 {
   return cut_coul;
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairCoulWolf::write_restart(FILE *fp)
 {
   write_restart_settings(fp);
 
   int i,j;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++)
       fwrite(&setflag[i][j],sizeof(int),1,fp);
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairCoulWolf::read_restart(FILE *fp)
 {
   read_restart_settings(fp);
   allocate();
 
   int i,j;
   int me = comm->me;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
       MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
     }
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairCoulWolf::write_restart_settings(FILE *fp)
 {
   fwrite(&alf,sizeof(double),1,fp);
   fwrite(&cut_coul,sizeof(double),1,fp);
   fwrite(&offset_flag,sizeof(int),1,fp);
   fwrite(&mix_flag,sizeof(int),1,fp);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairCoulWolf::read_restart_settings(FILE *fp)
 {
   if (comm->me == 0) {
     fread(&alf,sizeof(double),1,fp);
     fread(&cut_coul,sizeof(double),1,fp);
     fread(&offset_flag,sizeof(int),1,fp);
     fread(&mix_flag,sizeof(int),1,fp);
   }
   MPI_Bcast(&alf,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&cut_coul,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&offset_flag,1,MPI_INT,0,world);
   MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
 }
 
 /* ----------------------------------------------------------------------
    only the pair part is calculated here
 ------------------------------------------------------------------------- */
 
 double PairCoulWolf::single(int i, int j, int itype, int jtype, double rsq,
                             double factor_coul, double factor_lj,
                             double &fforce)
 {
   double r,prefactor;
   double forcecoul,phicoul;
   double e_shift,f_shift,dvdrr,erfcc,erfcd;
 
   e_shift = erfc(alf*cut_coul) / cut_coul;
   f_shift = -(e_shift+ 2.0*alf/MY_PIS * exp(-alf*alf*cut_coul*cut_coul)) /
     cut_coul;
 
   if (rsq < cut_coulsq) {
     r = sqrt(rsq);
     prefactor = force->qqrd2e * atom->q[i]*atom->q[j]/r;
     erfcc = erfc(alf*r);
     erfcd = exp(-alf*alf*r*r);
     dvdrr = (erfcc/rsq + 2.0*alf/MY_PIS * erfcd/r) + f_shift;
     forcecoul = dvdrr*rsq*prefactor;
     if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
   } else forcecoul = 0.0;
   fforce = forcecoul / rsq;
 
   double eng = 0.0;
   if (rsq < cut_coulsq) {
     phicoul = prefactor * (erfcc-e_shift*r);
     if (factor_coul < 1.0) phicoul -= (1.0-factor_coul)*prefactor;
     eng += phicoul;
   }
   return eng;
 }
diff --git a/src/pair_dpd.cpp b/src/pair_dpd.cpp
index c4da8efbe..87af68d7f 100644
--- a/src/pair_dpd.cpp
+++ b/src/pair_dpd.cpp
@@ -1,406 +1,406 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Kurt Smith (U Pittsburgh)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "pair_dpd.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "comm.h"
 #include "update.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "random_mars.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
 #define EPSILON 1.0e-10
 
 /* ---------------------------------------------------------------------- */
 
 PairDPD::PairDPD(LAMMPS *lmp) : Pair(lmp)
 {
   writedata = 1;
   random = NULL;
 }
 
 /* ---------------------------------------------------------------------- */
 
 PairDPD::~PairDPD()
 {
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
 
     memory->destroy(cut);
     memory->destroy(a0);
     memory->destroy(gamma);
     memory->destroy(sigma);
   }
 
   if (random) delete random;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairDPD::compute(int eflag, int vflag)
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
   double vxtmp,vytmp,vztmp,delvx,delvy,delvz;
   double rsq,r,rinv,dot,wd,randnum,factor_dpd;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   evdwl = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   double **x = atom->x;
   double **v = atom->v;
   double **f = atom->f;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
   double dtinvsqrt = 1.0/sqrt(update->dt);
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     vxtmp = v[i][0];
     vytmp = v[i][1];
     vztmp = v[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_dpd = special_lj[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r = sqrt(rsq);
         if (r < EPSILON) continue;     // r can be 0.0 in DPD systems
         rinv = 1.0/r;
         delvx = vxtmp - v[j][0];
         delvy = vytmp - v[j][1];
         delvz = vztmp - v[j][2];
         dot = delx*delvx + dely*delvy + delz*delvz;
         wd = 1.0 - r/cut[itype][jtype];
         randnum = random->gaussian();
 
         // conservative force = a0 * wd
         // drag force = -gamma * wd^2 * (delx dot delv) / r
         // random force = sigma * wd * rnd * dtinvsqrt;
 
         fpair = a0[itype][jtype]*wd;
         fpair -= gamma[itype][jtype]*wd*wd*dot*rinv;
         fpair += sigma[itype][jtype]*wd*randnum*dtinvsqrt;
         fpair *= factor_dpd*rinv;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
         if (newton_pair || j < nlocal) {
           f[j][0] -= delx*fpair;
           f[j][1] -= dely*fpair;
           f[j][2] -= delz*fpair;
         }
 
         if (eflag) {
           // unshifted eng of conservative term:
           // evdwl = -a0[itype][jtype]*r * (1.0-0.5*r/cut[itype][jtype]);
           // eng shifted to 0.0 at cutoff
           evdwl = 0.5*a0[itype][jtype]*cut[itype][jtype] * wd*wd;
           evdwl *= factor_dpd;
         }
 
         if (evflag) ev_tally(i,j,nlocal,newton_pair,
                              evdwl,0.0,fpair,delx,dely,delz);
       }
     }
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairDPD::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
   memory->create(cut,n+1,n+1,"pair:cut");
   memory->create(a0,n+1,n+1,"pair:a0");
   memory->create(gamma,n+1,n+1,"pair:gamma");
   memory->create(sigma,n+1,n+1,"pair:sigma");
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairDPD::settings(int narg, char **arg)
 {
   if (narg != 3) error->all(FLERR,"Illegal pair_style command");
 
   temperature = force->numeric(FLERR,arg[0]);
   cut_global = force->numeric(FLERR,arg[1]);
   seed = force->inumeric(FLERR,arg[2]);
 
   // initialize Marsaglia RNG with processor-unique seed
 
   if (seed <= 0) error->all(FLERR,"Illegal pair_style command");
   delete random;
   random = new RanMars(lmp,seed + comm->me);
 
   // reset cutoffs that have been explicitly set
 
   if (allocated) {
     int i,j;
     for (i = 1; i <= atom->ntypes; i++)
       for (j = i+1; j <= atom->ntypes; j++)
         if (setflag[i][j]) cut[i][j] = cut_global;
   }
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairDPD::coeff(int narg, char **arg)
 {
   if (narg < 4 || narg > 5) 
     error->all(FLERR,"Incorrect args for pair coefficients");
   if (!allocated) allocate();
 
   int ilo,ihi,jlo,jhi;
   force->bounds(arg[0],atom->ntypes,ilo,ihi);
   force->bounds(arg[1],atom->ntypes,jlo,jhi);
 
   double a0_one = force->numeric(FLERR,arg[2]);
   double gamma_one = force->numeric(FLERR,arg[3]);
 
   double cut_one = cut_global;
   if (narg == 5) cut_one = force->numeric(FLERR,arg[4]);
 
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
     for (int j = MAX(jlo,i); j <= jhi; j++) {
       a0[i][j] = a0_one;
       gamma[i][j] = gamma_one;
       cut[i][j] = cut_one;
       setflag[i][j] = 1;
       count++;
     }
   }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairDPD::init_style()
 {
   if (comm->ghost_velocity == 0)
     error->all(FLERR,"Pair dpd requires ghost atoms store velocity");
 
   // if newton off, forces between atoms ij will be double computed
   // using different random numbers
 
   if (force->newton_pair == 0 && comm->me == 0) error->warning(FLERR,
       "Pair dpd needs newton pair on for momentum conservation");
 
-  neighbor->request(this);
+  neighbor->request(this,instance_me);
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairDPD::init_one(int i, int j)
 {
   if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set");
 
   sigma[i][j] = sqrt(2.0*force->boltz*temperature*gamma[i][j]);
 
   cut[j][i] = cut[i][j];
   a0[j][i] = a0[i][j];
   gamma[j][i] = gamma[i][j];
   sigma[j][i] = sigma[i][j];
 
   return cut[i][j];
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairDPD::write_restart(FILE *fp)
 {
   write_restart_settings(fp);
 
   int i,j;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       fwrite(&setflag[i][j],sizeof(int),1,fp);
       if (setflag[i][j]) {
         fwrite(&a0[i][j],sizeof(double),1,fp);
         fwrite(&gamma[i][j],sizeof(double),1,fp);
         fwrite(&cut[i][j],sizeof(double),1,fp);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
    proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairDPD::read_restart(FILE *fp)
 {
   read_restart_settings(fp);
 
   allocate();
 
   int i,j;
   int me = comm->me;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
       MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
       if (setflag[i][j]) {
         if (me == 0) {
           fread(&a0[i][j],sizeof(double),1,fp);
           fread(&gamma[i][j],sizeof(double),1,fp);
           fread(&cut[i][j],sizeof(double),1,fp);
         }
         MPI_Bcast(&a0[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&gamma[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&cut[i][j],1,MPI_DOUBLE,0,world);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairDPD::write_restart_settings(FILE *fp)
 {
   fwrite(&temperature,sizeof(double),1,fp);
   fwrite(&cut_global,sizeof(double),1,fp);
   fwrite(&seed,sizeof(int),1,fp);
   fwrite(&mix_flag,sizeof(int),1,fp);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairDPD::read_restart_settings(FILE *fp)
 {
   if (comm->me == 0) {
     fread(&temperature,sizeof(double),1,fp);
     fread(&cut_global,sizeof(double),1,fp);
     fread(&seed,sizeof(int),1,fp);
     fread(&mix_flag,sizeof(int),1,fp);
   }
   MPI_Bcast(&temperature,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&cut_global,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&seed,1,MPI_INT,0,world);
   MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
 
   // initialize Marsaglia RNG with processor-unique seed
   // same seed that pair_style command initially specified
 
   if (random) delete random;
   random = new RanMars(lmp,seed + comm->me);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes to data file
 ------------------------------------------------------------------------- */
 
 void PairDPD::write_data(FILE *fp)
 {
   for (int i = 1; i <= atom->ntypes; i++)
     fprintf(fp,"%d %g %g\n",i,a0[i][i],gamma[i][i]);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes all pairs to data file
 ------------------------------------------------------------------------- */
 
 void PairDPD::write_data_all(FILE *fp)
 {
   for (int i = 1; i <= atom->ntypes; i++)
     for (int j = i; j <= atom->ntypes; j++)
       fprintf(fp,"%d %d %g %g %g\n",i,j,a0[i][j],gamma[i][j],cut[i][j]);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairDPD::single(int i, int j, int itype, int jtype, double rsq,
                        double factor_coul, double factor_dpd, double &fforce)
 {
   double r,rinv,wd,phi;
 
   r = sqrt(rsq);
   if (r < EPSILON) {
     fforce = 0.0;
     return 0.0;
   }
 
   rinv = 1.0/r;
   wd = 1.0 - r/cut[itype][jtype];
   fforce = a0[itype][jtype]*wd * factor_dpd*rinv;
 
   phi = 0.5*a0[itype][jtype]*cut[itype][jtype] * wd*wd;
   return factor_dpd*phi;
 }
diff --git a/src/pair_lj96_cut.cpp b/src/pair_lj96_cut.cpp
index 7e61b0158..d5e24be73 100644
--- a/src/pair_lj96_cut.cpp
+++ b/src/pair_lj96_cut.cpp
@@ -1,727 +1,727 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Chuanfu Luo (luochuanfu@gmail.com)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_lj96_cut.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "update.h"
 #include "integrate.h"
 #include "respa.h"
 #include "math_const.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
 /* ---------------------------------------------------------------------- */
 
 PairLJ96Cut::PairLJ96Cut(LAMMPS *lmp) : Pair(lmp)
 {
   respa_enable = 1;
   writedata = 1;
 }
 
 /* ---------------------------------------------------------------------- */
 
 PairLJ96Cut::~PairLJ96Cut()
 {
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
 
     memory->destroy(cut);
     memory->destroy(epsilon);
     memory->destroy(sigma);
     memory->destroy(lj1);
     memory->destroy(lj2);
     memory->destroy(lj3);
     memory->destroy(lj4);
     memory->destroy(offset);
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJ96Cut::compute(int eflag, int vflag)
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
   double rsq,r2inv,r3inv,r6inv,forcelj,factor_lj;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   evdwl = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   double **x = atom->x;
   double **f = atom->f;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r2inv = 1.0/rsq;
         r6inv = r2inv*r2inv*r2inv;
         r3inv = sqrt(r6inv);
         forcelj = r6inv * (lj1[itype][jtype]*r3inv - lj2[itype][jtype]);
         fpair = factor_lj*forcelj*r2inv;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
         if (newton_pair || j < nlocal) {
           f[j][0] -= delx*fpair;
           f[j][1] -= dely*fpair;
           f[j][2] -= delz*fpair;
         }
 
         if (eflag) {
           evdwl = r6inv*(lj3[itype][jtype]*r3inv-lj4[itype][jtype]) -
             offset[itype][jtype];
           evdwl *= factor_lj;
         }
 
         if (evflag) ev_tally(i,j,nlocal,newton_pair,
                              evdwl,0.0,fpair,delx,dely,delz);
       }
     }
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJ96Cut::compute_inner()
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,fpair;
   double rsq,r2inv,r3inv,r6inv,forcelj,factor_lj,rsw;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   double **x = atom->x;
   double **f = atom->f;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
 
   inum = listinner->inum;
   ilist = listinner->ilist;
   numneigh = listinner->numneigh;
   firstneigh = listinner->firstneigh;
 
   double cut_out_on = cut_respa[0];
   double cut_out_off = cut_respa[1];
 
   double cut_out_diff = cut_out_off - cut_out_on;
   double cut_out_on_sq = cut_out_on*cut_out_on;
   double cut_out_off_sq = cut_out_off*cut_out_off;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
 
       if (rsq < cut_out_off_sq) {
         r2inv = 1.0/rsq;
         r6inv = r2inv*r2inv*r2inv;
         r3inv = sqrt(r6inv);
         jtype = type[j];
         forcelj = r6inv * (lj1[itype][jtype]*r3inv - lj2[itype][jtype]);
         fpair = factor_lj*forcelj*r2inv;
         if (rsq > cut_out_on_sq) {
           rsw = (sqrt(rsq) - cut_out_on)/cut_out_diff;
           fpair *= 1.0 - rsw*rsw*(3.0 - 2.0*rsw);
         }
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
         if (newton_pair || j < nlocal) {
           f[j][0] -= delx*fpair;
           f[j][1] -= dely*fpair;
           f[j][2] -= delz*fpair;
         }
       }
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJ96Cut::compute_middle()
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,fpair;
   double rsq,r2inv,r3inv,r6inv,forcelj,factor_lj,rsw;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   double **x = atom->x;
   double **f = atom->f;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
 
   inum = listmiddle->inum;
   ilist = listmiddle->ilist;
   numneigh = listmiddle->numneigh;
   firstneigh = listmiddle->firstneigh;
 
   double cut_in_off = cut_respa[0];
   double cut_in_on = cut_respa[1];
   double cut_out_on = cut_respa[2];
   double cut_out_off = cut_respa[3];
 
   double cut_in_diff = cut_in_on - cut_in_off;
   double cut_out_diff = cut_out_off - cut_out_on;
   double cut_in_off_sq = cut_in_off*cut_in_off;
   double cut_in_on_sq = cut_in_on*cut_in_on;
   double cut_out_on_sq = cut_out_on*cut_out_on;
   double cut_out_off_sq = cut_out_off*cut_out_off;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
 
       if (rsq < cut_out_off_sq && rsq > cut_in_off_sq) {
         r2inv = 1.0/rsq;
         r6inv = r2inv*r2inv*r2inv;
         r3inv = sqrt(r6inv);
         jtype = type[j];
         forcelj = r6inv * (lj1[itype][jtype]*r3inv - lj2[itype][jtype]);
         fpair = factor_lj*forcelj*r2inv;
         if (rsq < cut_in_on_sq) {
           rsw = (sqrt(rsq) - cut_in_off)/cut_in_diff;
           fpair *= rsw*rsw*(3.0 - 2.0*rsw);
         }
         if (rsq > cut_out_on_sq) {
           rsw = (sqrt(rsq) - cut_out_on)/cut_out_diff;
           fpair *= 1.0 + rsw*rsw*(2.0*rsw - 3.0);
         }
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
         if (newton_pair || j < nlocal) {
           f[j][0] -= delx*fpair;
           f[j][1] -= dely*fpair;
           f[j][2] -= delz*fpair;
         }
       }
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJ96Cut::compute_outer(int eflag, int vflag)
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
   double rsq,r2inv,r3inv,r6inv,forcelj,factor_lj,rsw;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   evdwl = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = 0;
 
   double **x = atom->x;
   double **f = atom->f;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
 
   inum = listouter->inum;
   ilist = listouter->ilist;
   numneigh = listouter->numneigh;
   firstneigh = listouter->firstneigh;
 
   double cut_in_off = cut_respa[2];
   double cut_in_on = cut_respa[3];
 
   double cut_in_diff = cut_in_on - cut_in_off;
   double cut_in_off_sq = cut_in_off*cut_in_off;
   double cut_in_on_sq = cut_in_on*cut_in_on;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         if (rsq > cut_in_off_sq) {
           r2inv = 1.0/rsq;
           r6inv = r2inv*r2inv*r2inv;
           r3inv = sqrt(r6inv);
           forcelj = r6inv * (lj1[itype][jtype]*r3inv - lj2[itype][jtype]);
           fpair = factor_lj*forcelj*r2inv;
           if (rsq < cut_in_on_sq) {
             rsw = (sqrt(rsq) - cut_in_off)/cut_in_diff;
             fpair *= rsw*rsw*(3.0 - 2.0*rsw);
           }
 
           f[i][0] += delx*fpair;
           f[i][1] += dely*fpair;
           f[i][2] += delz*fpair;
           if (newton_pair || j < nlocal) {
             f[j][0] -= delx*fpair;
             f[j][1] -= dely*fpair;
             f[j][2] -= delz*fpair;
           }
         }
 
         if (eflag) {
           r2inv = 1.0/rsq;
           r6inv = r2inv*r2inv*r2inv;
           r3inv = sqrt(r6inv);
           evdwl = r6inv*(lj3[itype][jtype]*r3inv-lj4[itype][jtype]) -
             offset[itype][jtype];
           evdwl *= factor_lj;
         }
 
         if (vflag) {
           if (rsq <= cut_in_off_sq) {
             r2inv = 1.0/rsq;
             r6inv = r2inv*r2inv*r2inv;
             r3inv = sqrt(r6inv);
             forcelj = r6inv * (lj1[itype][jtype]*r3inv - lj2[itype][jtype]);
             fpair = factor_lj*forcelj*r2inv;
           } else if (rsq < cut_in_on_sq)
             fpair = factor_lj*forcelj*r2inv;
         }
 
         if (evflag) ev_tally(i,j,nlocal,newton_pair,
                              evdwl,0.0,fpair,delx,dely,delz);
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairLJ96Cut::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
   memory->create(cut,n+1,n+1,"pair:cut");
   memory->create(epsilon,n+1,n+1,"pair:epsilon");
   memory->create(sigma,n+1,n+1,"pair:sigma");
   memory->create(lj1,n+1,n+1,"pair:lj1");
   memory->create(lj2,n+1,n+1,"pair:lj2");
   memory->create(lj3,n+1,n+1,"pair:lj3");
   memory->create(lj4,n+1,n+1,"pair:lj4");
   memory->create(offset,n+1,n+1,"pair:offset");
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairLJ96Cut::settings(int narg, char **arg)
 {
   if (narg != 1) error->all(FLERR,"Illegal pair_style command");
 
   cut_global = force->numeric(FLERR,arg[0]);
 
   // reset cutoffs that have been explicitly set
 
   if (allocated) {
     int i,j;
     for (i = 1; i <= atom->ntypes; i++)
       for (j = i+1; j <= atom->ntypes; j++)
         if (setflag[i][j]) cut[i][j] = cut_global;
   }
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairLJ96Cut::coeff(int narg, char **arg)
 {
   if (narg < 4 || narg > 5)
     error->all(FLERR,"Incorrect args for pair coefficients");
   if (!allocated) allocate();
 
   int ilo,ihi,jlo,jhi;
   force->bounds(arg[0],atom->ntypes,ilo,ihi);
   force->bounds(arg[1],atom->ntypes,jlo,jhi);
 
   double epsilon_one = force->numeric(FLERR,arg[2]);
   double sigma_one = force->numeric(FLERR,arg[3]);
 
   double cut_one = cut_global;
   if (narg == 5) cut_one = force->numeric(FLERR,arg[4]);
 
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
     for (int j = MAX(jlo,i); j <= jhi; j++) {
       epsilon[i][j] = epsilon_one;
       sigma[i][j] = sigma_one;
       cut[i][j] = cut_one;
       setflag[i][j] = 1;
       count++;
     }
   }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairLJ96Cut::init_style()
 {
   // request regular or rRESPA neighbor lists
 
   int irequest;
 
   if (update->whichflag == 1 && strstr(update->integrate_style,"respa")) {
     int respa = 0;
     if (((Respa *) update->integrate)->level_inner >= 0) respa = 1;
     if (((Respa *) update->integrate)->level_middle >= 0) respa = 2;
 
-    if (respa == 0) irequest = neighbor->request(this);
+    if (respa == 0) irequest = neighbor->request(this,instance_me);
     else if (respa == 1) {
-      irequest = neighbor->request(this);
+      irequest = neighbor->request(this,instance_me);
       neighbor->requests[irequest]->id = 1;
       neighbor->requests[irequest]->half = 0;
       neighbor->requests[irequest]->respainner = 1;
-      irequest = neighbor->request(this);
+      irequest = neighbor->request(this,instance_me);
       neighbor->requests[irequest]->id = 3;
       neighbor->requests[irequest]->half = 0;
       neighbor->requests[irequest]->respaouter = 1;
     } else {
-      irequest = neighbor->request(this);
+      irequest = neighbor->request(this,instance_me);
       neighbor->requests[irequest]->id = 1;
       neighbor->requests[irequest]->half = 0;
       neighbor->requests[irequest]->respainner = 1;
-      irequest = neighbor->request(this);
+      irequest = neighbor->request(this,instance_me);
       neighbor->requests[irequest]->id = 2;
       neighbor->requests[irequest]->half = 0;
       neighbor->requests[irequest]->respamiddle = 1;
-      irequest = neighbor->request(this);
+      irequest = neighbor->request(this,instance_me);
       neighbor->requests[irequest]->id = 3;
       neighbor->requests[irequest]->half = 0;
       neighbor->requests[irequest]->respaouter = 1;
     }
 
-  } else irequest = neighbor->request(this);
+  } else irequest = neighbor->request(this,instance_me);
 
   // set rRESPA cutoffs
 
   if (strstr(update->integrate_style,"respa") &&
       ((Respa *) update->integrate)->level_inner >= 0)
     cut_respa = ((Respa *) update->integrate)->cutoff;
   else cut_respa = NULL;
 }
 
 /* ----------------------------------------------------------------------
    neighbor callback to inform pair style of neighbor list to use
    regular or rRESPA
 ------------------------------------------------------------------------- */
 
 void PairLJ96Cut::init_list(int id, NeighList *ptr)
 {
   if (id == 0) list = ptr;
   else if (id == 1) listinner = ptr;
   else if (id == 2) listmiddle = ptr;
   else if (id == 3) listouter = ptr;
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairLJ96Cut::init_one(int i, int j)
 {
   if (setflag[i][j] == 0) {
     epsilon[i][j] = mix_energy(epsilon[i][i],epsilon[j][j],
                                sigma[i][i],sigma[j][j]);
     sigma[i][j] = mix_distance(sigma[i][i],sigma[j][j]);
     cut[i][j] = mix_distance(cut[i][i],cut[j][j]);
   }
 
   lj1[i][j] = 36.0 * epsilon[i][j] * pow(sigma[i][j],9.0);
   lj2[i][j] = 24.0 * epsilon[i][j] * pow(sigma[i][j],6.0);
   lj3[i][j] = 4.0 * epsilon[i][j] * pow(sigma[i][j],9.0);
   lj4[i][j] = 4.0 * epsilon[i][j] * pow(sigma[i][j],6.0);
 
   if (offset_flag) {
     double ratio = sigma[i][j] / cut[i][j];
     offset[i][j] = 4.0 * epsilon[i][j] * (pow(ratio,9.0) - pow(ratio,6.0));
   } else offset[i][j] = 0.0;
 
   lj1[j][i] = lj1[i][j];
   lj2[j][i] = lj2[i][j];
   lj3[j][i] = lj3[i][j];
   lj4[j][i] = lj4[i][j];
   offset[j][i] = offset[i][j];
 
   // check interior rRESPA cutoff
 
   if (cut_respa && cut[i][j] < cut_respa[3])
     error->all(FLERR,"Pair cutoff < Respa interior cutoff");
 
   // compute I,J contribution to long-range tail correction
   // count total # of atoms of type I and J via Allreduce
 
   if (tail_flag) {
     int *type = atom->type;
     int nlocal = atom->nlocal;
 
     double count[2],all[2];
     count[0] = count[1] = 0.0;
     for (int k = 0; k < nlocal; k++) {
       if (type[k] == i) count[0] += 1.0;
       if (type[k] == j) count[1] += 1.0;
     }
     MPI_Allreduce(count,all,2,MPI_DOUBLE,MPI_SUM,world);
 
     double sig3 = sigma[i][j]*sigma[i][j]*sigma[i][j];
     double sig6 = sig3*sig3;
     double rc3 = cut[i][j]*cut[i][j]*cut[i][j];
     double rc6 = rc3*rc3;
 
     etail_ij = 8.0*MY_PI*all[0]*all[1]*epsilon[i][j] *
       sig6 * (sig3 - 2.0*rc3) / (6.0*rc6);
     ptail_ij = 8.0*MY_PI*all[0]*all[1]*epsilon[i][j] *
       sig6 * (3.0*sig3 - 4.0*rc3) / (6.0*rc6);
   }
 
   return cut[i][j];
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairLJ96Cut::write_restart(FILE *fp)
 {
   write_restart_settings(fp);
 
   int i,j;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       fwrite(&setflag[i][j],sizeof(int),1,fp);
       if (setflag[i][j]) {
         fwrite(&epsilon[i][j],sizeof(double),1,fp);
         fwrite(&sigma[i][j],sizeof(double),1,fp);
         fwrite(&cut[i][j],sizeof(double),1,fp);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
    proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairLJ96Cut::read_restart(FILE *fp)
 {
   read_restart_settings(fp);
   allocate();
 
   int i,j;
   int me = comm->me;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
       MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
       if (setflag[i][j]) {
         if (me == 0) {
           fread(&epsilon[i][j],sizeof(double),1,fp);
           fread(&sigma[i][j],sizeof(double),1,fp);
           fread(&cut[i][j],sizeof(double),1,fp);
         }
         MPI_Bcast(&epsilon[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&sigma[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&cut[i][j],1,MPI_DOUBLE,0,world);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairLJ96Cut::write_restart_settings(FILE *fp)
 {
   fwrite(&cut_global,sizeof(double),1,fp);
   fwrite(&offset_flag,sizeof(int),1,fp);
   fwrite(&mix_flag,sizeof(int),1,fp);
   fwrite(&tail_flag,sizeof(int),1,fp);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairLJ96Cut::read_restart_settings(FILE *fp)
 {
   int me = comm->me;
   if (me == 0) {
     fread(&cut_global,sizeof(double),1,fp);
     fread(&offset_flag,sizeof(int),1,fp);
     fread(&mix_flag,sizeof(int),1,fp);
     fread(&tail_flag,sizeof(int),1,fp);
   }
   MPI_Bcast(&cut_global,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&offset_flag,1,MPI_INT,0,world);
   MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
   MPI_Bcast(&tail_flag,1,MPI_INT,0,world);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes to data file
 ------------------------------------------------------------------------- */
 
 void PairLJ96Cut::write_data(FILE *fp)
 {
   for (int i = 1; i <= atom->ntypes; i++)
     fprintf(fp,"%d %g %g\n",i,epsilon[i][i],sigma[i][i]);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes all pairs to data file
 ------------------------------------------------------------------------- */
 
 void PairLJ96Cut::write_data_all(FILE *fp)
 {
   for (int i = 1; i <= atom->ntypes; i++)
     for (int j = i; j <= atom->ntypes; j++)
       fprintf(fp,"%d %d %g %g %g\n",i,j,epsilon[i][j],sigma[i][j],cut[i][j]);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairLJ96Cut::single(int i, int j, int itype, int jtype, double rsq,
                            double factor_coul, double factor_lj,
                            double &fforce)
 {
   double r2inv,r3inv,r6inv,forcelj,philj;
 
   r2inv = 1.0/rsq;
   r6inv = r2inv*r2inv*r2inv;
   r3inv = sqrt(r6inv);
   forcelj = r6inv * (lj1[itype][jtype]*r3inv - lj2[itype][jtype]);
   fforce = factor_lj*forcelj*r2inv;
 
   philj = r6inv*(lj3[itype][jtype]*r3inv-lj4[itype][jtype]) -
     offset[itype][jtype];
   return factor_lj*philj;
 }
diff --git a/src/pair_lj_cut_coul_cut.cpp b/src/pair_lj_cut_coul_cut.cpp
index f3ee3d4a4..65e7aef5e 100644
--- a/src/pair_lj_cut_coul_cut.cpp
+++ b/src/pair_lj_cut_coul_cut.cpp
@@ -1,471 +1,471 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_lj_cut_coul_cut.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "math_const.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
 /* ---------------------------------------------------------------------- */
 
 PairLJCutCoulCut::PairLJCutCoulCut(LAMMPS *lmp) : Pair(lmp)
 {
   writedata = 1;
 }
 
 /* ---------------------------------------------------------------------- */
 
 PairLJCutCoulCut::~PairLJCutCoulCut()
 {
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
 
     memory->destroy(cut_lj);
     memory->destroy(cut_ljsq);
     memory->destroy(cut_coul);
     memory->destroy(cut_coulsq);
     memory->destroy(epsilon);
     memory->destroy(sigma);
     memory->destroy(lj1);
     memory->destroy(lj2);
     memory->destroy(lj3);
     memory->destroy(lj4);
     memory->destroy(offset);
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCutCoulCut::compute(int eflag, int vflag)
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
   double rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   evdwl = ecoul = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r2inv = 1.0/rsq;
 
         if (rsq < cut_coulsq[itype][jtype])
           forcecoul = qqrd2e * qtmp*q[j]*sqrt(r2inv);
         else forcecoul = 0.0;
 
         if (rsq < cut_ljsq[itype][jtype]) {
           r6inv = r2inv*r2inv*r2inv;
           forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
         } else forcelj = 0.0;
 
         fpair = (factor_coul*forcecoul + factor_lj*forcelj) * r2inv;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
         if (newton_pair || j < nlocal) {
           f[j][0] -= delx*fpair;
           f[j][1] -= dely*fpair;
           f[j][2] -= delz*fpair;
         }
 
         if (eflag) {
           if (rsq < cut_coulsq[itype][jtype])
             ecoul = factor_coul * qqrd2e * qtmp*q[j]*sqrt(r2inv);
           else ecoul = 0.0;
           if (rsq < cut_ljsq[itype][jtype]) {
             evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) -
               offset[itype][jtype];
             evdwl *= factor_lj;
           } else evdwl = 0.0;
         }
 
         if (evflag) ev_tally(i,j,nlocal,newton_pair,
                              evdwl,ecoul,fpair,delx,dely,delz);
       }
     }
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairLJCutCoulCut::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
   memory->create(cut_lj,n+1,n+1,"pair:cut_lj");
   memory->create(cut_ljsq,n+1,n+1,"pair:cut_ljsq");
   memory->create(cut_coul,n+1,n+1,"pair:cut_coul");
   memory->create(cut_coulsq,n+1,n+1,"pair:cut_coulsq");
   memory->create(epsilon,n+1,n+1,"pair:epsilon");
   memory->create(sigma,n+1,n+1,"pair:sigma");
   memory->create(lj1,n+1,n+1,"pair:lj1");
   memory->create(lj2,n+1,n+1,"pair:lj2");
   memory->create(lj3,n+1,n+1,"pair:lj3");
   memory->create(lj4,n+1,n+1,"pair:lj4");
   memory->create(offset,n+1,n+1,"pair:offset");
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairLJCutCoulCut::settings(int narg, char **arg)
 {
   if (narg < 1 || narg > 2) error->all(FLERR,"Illegal pair_style command");
 
   cut_lj_global = force->numeric(FLERR,arg[0]);
   if (narg == 1) cut_coul_global = cut_lj_global;
   else cut_coul_global = force->numeric(FLERR,arg[1]);
 
   // reset cutoffs that have been explicitly set
 
   if (allocated) {
     int i,j;
     for (i = 1; i <= atom->ntypes; i++)
       for (j = i+1; j <= atom->ntypes; j++)
         if (setflag[i][j]) {
           cut_lj[i][j] = cut_lj_global;
           cut_coul[i][j] = cut_coul_global;
         }
   }
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairLJCutCoulCut::coeff(int narg, char **arg)
 {
   if (narg < 4 || narg > 6) 
     error->all(FLERR,"Incorrect args for pair coefficients");
   if (!allocated) allocate();
 
   int ilo,ihi,jlo,jhi;
   force->bounds(arg[0],atom->ntypes,ilo,ihi);
   force->bounds(arg[1],atom->ntypes,jlo,jhi);
 
   double epsilon_one = force->numeric(FLERR,arg[2]);
   double sigma_one = force->numeric(FLERR,arg[3]);
 
   double cut_lj_one = cut_lj_global;
   double cut_coul_one = cut_coul_global;
   if (narg >= 5) cut_coul_one = cut_lj_one = force->numeric(FLERR,arg[4]);
   if (narg == 6) cut_coul_one = force->numeric(FLERR,arg[5]);
 
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
     for (int j = MAX(jlo,i); j <= jhi; j++) {
       epsilon[i][j] = epsilon_one;
       sigma[i][j] = sigma_one;
       cut_lj[i][j] = cut_lj_one;
       cut_coul[i][j] = cut_coul_one;
       setflag[i][j] = 1;
       count++;
     }
   }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairLJCutCoulCut::init_style()
 {
   if (!atom->q_flag)
     error->all(FLERR,"Pair style lj/cut/coul/cut requires atom attribute q");
 
-  neighbor->request(this);
+  neighbor->request(this,instance_me);
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairLJCutCoulCut::init_one(int i, int j)
 {
   if (setflag[i][j] == 0) {
     epsilon[i][j] = mix_energy(epsilon[i][i],epsilon[j][j],
                                sigma[i][i],sigma[j][j]);
     sigma[i][j] = mix_distance(sigma[i][i],sigma[j][j]);
     cut_lj[i][j] = mix_distance(cut_lj[i][i],cut_lj[j][j]);
     cut_coul[i][j] = mix_distance(cut_coul[i][i],cut_coul[j][j]);
   }
 
   double cut = MAX(cut_lj[i][j],cut_coul[i][j]);
   cut_ljsq[i][j] = cut_lj[i][j] * cut_lj[i][j];
   cut_coulsq[i][j] = cut_coul[i][j] * cut_coul[i][j];
 
   lj1[i][j] = 48.0 * epsilon[i][j] * pow(sigma[i][j],12.0);
   lj2[i][j] = 24.0 * epsilon[i][j] * pow(sigma[i][j],6.0);
   lj3[i][j] = 4.0 * epsilon[i][j] * pow(sigma[i][j],12.0);
   lj4[i][j] = 4.0 * epsilon[i][j] * pow(sigma[i][j],6.0);
 
   if (offset_flag) {
     double ratio = sigma[i][j] / cut_lj[i][j];
     offset[i][j] = 4.0 * epsilon[i][j] * (pow(ratio,12.0) - pow(ratio,6.0));
   } else offset[i][j] = 0.0;
 
   cut_ljsq[j][i] = cut_ljsq[i][j];
   cut_coulsq[j][i] = cut_coulsq[i][j];
   lj1[j][i] = lj1[i][j];
   lj2[j][i] = lj2[i][j];
   lj3[j][i] = lj3[i][j];
   lj4[j][i] = lj4[i][j];
   offset[j][i] = offset[i][j];
 
   // compute I,J contribution to long-range tail correction
   // count total # of atoms of type I and J via Allreduce
 
   if (tail_flag) {
     int *type = atom->type;
     int nlocal = atom->nlocal;
 
     double count[2],all[2];
     count[0] = count[1] = 0.0;
     for (int k = 0; k < nlocal; k++) {
       if (type[k] == i) count[0] += 1.0;
       if (type[k] == j) count[1] += 1.0;
     }
     MPI_Allreduce(count,all,2,MPI_DOUBLE,MPI_SUM,world);
 
     double sig2 = sigma[i][j]*sigma[i][j];
     double sig6 = sig2*sig2*sig2;
     double rc3 = cut_lj[i][j]*cut_lj[i][j]*cut_lj[i][j];
     double rc6 = rc3*rc3;
     double rc9 = rc3*rc6;
     etail_ij = 8.0*MY_PI*all[0]*all[1]*epsilon[i][j] *
       sig6 * (sig6 - 3.0*rc6) / (9.0*rc9);
     ptail_ij = 16.0*MY_PI*all[0]*all[1]*epsilon[i][j] *
       sig6 * (2.0*sig6 - 3.0*rc6) / (9.0*rc9);
   }
 
   return cut;
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairLJCutCoulCut::write_restart(FILE *fp)
 {
   write_restart_settings(fp);
 
   int i,j;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       fwrite(&setflag[i][j],sizeof(int),1,fp);
       if (setflag[i][j]) {
         fwrite(&epsilon[i][j],sizeof(double),1,fp);
         fwrite(&sigma[i][j],sizeof(double),1,fp);
         fwrite(&cut_lj[i][j],sizeof(double),1,fp);
         fwrite(&cut_coul[i][j],sizeof(double),1,fp);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairLJCutCoulCut::read_restart(FILE *fp)
 {
   read_restart_settings(fp);
   allocate();
 
   int i,j;
   int me = comm->me;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
       MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
       if (setflag[i][j]) {
         if (me == 0) {
           fread(&epsilon[i][j],sizeof(double),1,fp);
           fread(&sigma[i][j],sizeof(double),1,fp);
           fread(&cut_lj[i][j],sizeof(double),1,fp);
           fread(&cut_coul[i][j],sizeof(double),1,fp);
         }
         MPI_Bcast(&epsilon[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&sigma[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&cut_lj[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&cut_coul[i][j],1,MPI_DOUBLE,0,world);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairLJCutCoulCut::write_restart_settings(FILE *fp)
 {
   fwrite(&cut_lj_global,sizeof(double),1,fp);
   fwrite(&cut_coul_global,sizeof(double),1,fp);
   fwrite(&offset_flag,sizeof(int),1,fp);
   fwrite(&mix_flag,sizeof(int),1,fp);
   fwrite(&tail_flag,sizeof(int),1,fp);
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairLJCutCoulCut::read_restart_settings(FILE *fp)
 {
   if (comm->me == 0) {
     fread(&cut_lj_global,sizeof(double),1,fp);
     fread(&cut_coul_global,sizeof(double),1,fp);
     fread(&offset_flag,sizeof(int),1,fp);
     fread(&mix_flag,sizeof(int),1,fp);
     fread(&tail_flag,sizeof(int),1,fp);
   }
   MPI_Bcast(&cut_lj_global,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&cut_coul_global,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&offset_flag,1,MPI_INT,0,world);
   MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
   MPI_Bcast(&tail_flag,1,MPI_INT,0,world);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes to data file
 ------------------------------------------------------------------------- */
 
 void PairLJCutCoulCut::write_data(FILE *fp)
 {
   for (int i = 1; i <= atom->ntypes; i++)
     fprintf(fp,"%d %g %g\n",i,epsilon[i][i],sigma[i][i]);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes all pairs to data file
 ------------------------------------------------------------------------- */
 
 void PairLJCutCoulCut::write_data_all(FILE *fp)
 {
   for (int i = 1; i <= atom->ntypes; i++)
     for (int j = i; j <= atom->ntypes; j++)
       fprintf(fp,"%d %d %g %g %g\n",i,j,epsilon[i][j],sigma[i][j],cut_lj[i][j]);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairLJCutCoulCut::single(int i, int j, int itype, int jtype,
                                 double rsq,
                                 double factor_coul, double factor_lj,
                                 double &fforce)
 {
   double r2inv,r6inv,forcecoul,forcelj,phicoul,philj;
 
   r2inv = 1.0/rsq;
   if (rsq < cut_coulsq[itype][jtype])
     forcecoul = force->qqrd2e * atom->q[i]*atom->q[j]*sqrt(r2inv);
   else forcecoul = 0.0;
   if (rsq < cut_ljsq[itype][jtype]) {
     r6inv = r2inv*r2inv*r2inv;
     forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
   } else forcelj = 0.0;
   fforce = (factor_coul*forcecoul + factor_lj*forcelj) * r2inv;
 
   double eng = 0.0;
   if (rsq < cut_coulsq[itype][jtype]) {
     phicoul = force->qqrd2e * atom->q[i]*atom->q[j]*sqrt(r2inv);
     eng += factor_coul*phicoul;
   }
   if (rsq < cut_ljsq[itype][jtype]) {
     philj = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) -
       offset[itype][jtype];
     eng += factor_lj*philj;
   }
 
   return eng;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void *PairLJCutCoulCut::extract(const char *str, int &dim)
 {
   dim = 0;
   if (strcmp(str,"cut_coul") == 0) return (void *) &cut_coul;
   dim = 2;
   if (strcmp(str,"epsilon") == 0) return (void *) epsilon;
   if (strcmp(str,"sigma") == 0) return (void *) sigma;
   return NULL;
 }
diff --git a/src/pair_lj_cut_coul_dsf.cpp b/src/pair_lj_cut_coul_dsf.cpp
index 1fac1ef44..4d3cbd9a4 100644
--- a/src/pair_lj_cut_coul_dsf.cpp
+++ b/src/pair_lj_cut_coul_dsf.cpp
@@ -1,476 +1,476 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under 
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Trung Dac Nguyen (ORNL)
    References: Fennell and Gezelter, JCP 124, 234104 (2006)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_lj_cut_coul_dsf.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "memory.h"
 #include "math_const.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
 #define EWALD_F   1.12837917
 #define EWALD_P   0.3275911
 #define A1        0.254829592
 #define A2       -0.284496736
 #define A3        1.421413741
 #define A4       -1.453152027
 #define A5        1.061405429
 
 /* ---------------------------------------------------------------------- */
 
 PairLJCutCoulDSF::PairLJCutCoulDSF(LAMMPS *lmp) : Pair(lmp)
 {
   single_enable = 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 PairLJCutCoulDSF::~PairLJCutCoulDSF()
 {
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
 
     memory->destroy(cut_lj);
     memory->destroy(cut_ljsq);
     memory->destroy(epsilon);
     memory->destroy(sigma);
     memory->destroy(lj1);
     memory->destroy(lj2);
     memory->destroy(lj3);
     memory->destroy(lj4);
     memory->destroy(offset);
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCutCoulDSF::compute(int eflag, int vflag)
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
   double r,rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
   double prefactor,erfcc,erfcd,t;
   int *ilist,*jlist,*numneigh,**firstneigh;
   
   evdwl = ecoul = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
   
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_lj = force->special_lj;
   double *special_coul = force->special_coul;
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
   
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
     
     if (eflag) {
       double e_self = -(e_shift/2.0 + alpha/MY_PIS) * qtmp*qtmp*qqrd2e;
       ev_tally(i,i,nlocal,0,0.0,e_self,0.0,0.0,0.0,0.0);
     }
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r2inv = 1.0/rsq;
 
         if (rsq < cut_ljsq[itype][jtype]) {
           r6inv = r2inv*r2inv*r2inv;
           forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
         } else forcelj = 0.0;
 
         if (rsq < cut_coulsq) {
           r = sqrt(rsq);
           prefactor = factor_coul * qqrd2e*qtmp*q[j]/r;
           erfcd = exp(-alpha*alpha*r*r);
           t = 1.0 / (1.0 + EWALD_P*alpha*r);
           erfcc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * erfcd;
           forcecoul = prefactor * (erfcc/r + 2.0*alpha/MY_PIS * erfcd + 
             r*f_shift) * r;
         } else forcecoul = 0.0;
 
         fpair = (forcecoul + factor_lj*forcelj) * r2inv;
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
         if (newton_pair || j < nlocal) {
           f[j][0] -= delx*fpair;
           f[j][1] -= dely*fpair;
           f[j][2] -= delz*fpair;
         }
 
         if (eflag) {
           if (rsq < cut_ljsq[itype][jtype]) {
             evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) -
                     offset[itype][jtype];
             evdwl *= factor_lj;
           } else evdwl = 0.0;
           
           if (rsq < cut_coulsq) {
             ecoul = prefactor * (erfcc - r*e_shift - rsq*f_shift);
           } else ecoul = 0.0;
         }
 
         if (evflag) ev_tally(i,j,nlocal,newton_pair,
                              evdwl,ecoul,fpair,delx,dely,delz);
       }
     }
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairLJCutCoulDSF::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
   memory->create(cut_lj,n+1,n+1,"pair:cut_lj");
   memory->create(cut_ljsq,n+1,n+1,"pair:cut_ljsq");
   memory->create(epsilon,n+1,n+1,"pair:epsilon");
   memory->create(sigma,n+1,n+1,"pair:sigma");
   memory->create(lj1,n+1,n+1,"pair:lj1");
   memory->create(lj2,n+1,n+1,"pair:lj2");
   memory->create(lj3,n+1,n+1,"pair:lj3");
   memory->create(lj4,n+1,n+1,"pair:lj4");
   memory->create(offset,n+1,n+1,"pair:offset");
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairLJCutCoulDSF::settings(int narg, char **arg)
 {
   if (narg != 3) error->all(FLERR,"Illegal pair_style command");
 
   alpha = force->numeric(FLERR,arg[0]);
   cut_lj_global = force->numeric(FLERR,arg[1]);
   cut_coul = force->numeric(FLERR,arg[2]);
   
   // reset cutoffs that have been explicitly set
 
   if (allocated) {
     int i,j;
     for (i = 1; i <= atom->ntypes; i++)
       for (j = i+1; j <= atom->ntypes; j++)
         if (setflag[i][j])
           cut_lj[i][j] = cut_lj_global;
   }
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairLJCutCoulDSF::coeff(int narg, char **arg)
 {
   if (narg < 4 || narg > 5) 
     error->all(FLERR,"Incorrect args for pair coefficients");
   if (!allocated) allocate();
 
   int ilo,ihi,jlo,jhi;
   force->bounds(arg[0],atom->ntypes,ilo,ihi);
   force->bounds(arg[1],atom->ntypes,jlo,jhi);
   
   double epsilon_one = force->numeric(FLERR,arg[2]);
   double sigma_one = force->numeric(FLERR,arg[3]);
 
   double cut_lj_one = cut_lj_global;
   if (narg == 5) cut_lj_one = force->numeric(FLERR,arg[4]);
     
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
     for (int j = MAX(jlo,i); j <= jhi; j++) {
       epsilon[i][j] = epsilon_one;
       sigma[i][j] = sigma_one;
       cut_lj[i][j] = cut_lj_one;
       setflag[i][j] = 1;
       count++;
     }
   }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairLJCutCoulDSF::init_style()
 {
   if (!atom->q_flag)
     error->all(FLERR,"Pair style lj/cut/coul/dsf requires atom attribute q");
 
-  neighbor->request(this);
+  neighbor->request(this,instance_me);
 
   cut_coulsq = cut_coul * cut_coul;
   double erfcc = erfc(alpha*cut_coul); 
   double erfcd = exp(-alpha*alpha*cut_coul*cut_coul);
   f_shift = -(erfcc/cut_coulsq + 2.0/MY_PIS*alpha*erfcd/cut_coul); 
   e_shift = erfcc/cut_coul - f_shift*cut_coul; 
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairLJCutCoulDSF::init_one(int i, int j)
 {
   if (setflag[i][j] == 0) {
     epsilon[i][j] = mix_energy(epsilon[i][i],epsilon[j][j],
                                sigma[i][i],sigma[j][j]);
     sigma[i][j] = mix_distance(sigma[i][i],sigma[j][j]);
     cut_lj[i][j] = mix_distance(cut_lj[i][i],cut_lj[j][j]);
   }
 
   double cut = MAX(cut_lj[i][j],cut_coul);
   cut_ljsq[i][j] = cut_lj[i][j] * cut_lj[i][j];
   
   lj1[i][j] = 48.0 * epsilon[i][j] * pow(sigma[i][j],12.0);
   lj2[i][j] = 24.0 * epsilon[i][j] * pow(sigma[i][j],6.0);
   lj3[i][j] = 4.0 * epsilon[i][j] * pow(sigma[i][j],12.0);
   lj4[i][j] = 4.0 * epsilon[i][j] * pow(sigma[i][j],6.0);
      
   if (offset_flag) {
     double ratio = sigma[i][j] / cut_lj[i][j];
     offset[i][j] = 4.0 * epsilon[i][j] * (pow(ratio,12.0) - pow(ratio,6.0));
   } else offset[i][j] = 0.0;
   
   cut_ljsq[j][i] = cut_ljsq[i][j];
   lj1[j][i] = lj1[i][j];
   lj2[j][i] = lj2[i][j];
   lj3[j][i] = lj3[i][j];
   lj4[j][i] = lj4[i][j];
   offset[j][i] = offset[i][j];
 
   // compute I,J contribution to long-range tail correction
   // count total # of atoms of type I and J via Allreduce
 
   if (tail_flag) {
     int *type = atom->type;
     int nlocal = atom->nlocal;
 
     double count[2],all[2];
     count[0] = count[1] = 0.0;
     for (int k = 0; k < nlocal; k++) {
       if (type[k] == i) count[0] += 1.0;
       if (type[k] == j) count[1] += 1.0;
     }
     MPI_Allreduce(count,all,2,MPI_DOUBLE,MPI_SUM,world);
         
     double sig2 = sigma[i][j]*sigma[i][j];
     double sig6 = sig2*sig2*sig2;
     double rc3 = cut_lj[i][j]*cut_lj[i][j]*cut_lj[i][j];
     double rc6 = rc3*rc3;
     double rc9 = rc3*rc6;
     etail_ij = 8.0*MY_PI*all[0]*all[1]*epsilon[i][j] * 
                sig6 * (sig6 - 3.0*rc6) / (9.0*rc9); 
     ptail_ij = 16.0*MY_PI*all[0]*all[1]*epsilon[i][j] * 
                sig6 * (2.0*sig6 - 3.0*rc6) / (9.0*rc9); 
   } 
 
   return cut;
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairLJCutCoulDSF::write_restart(FILE *fp)
 {
   write_restart_settings(fp);
 
   int i,j;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       fwrite(&setflag[i][j],sizeof(int),1,fp);
       if (setflag[i][j]) {
         fwrite(&epsilon[i][j],sizeof(double),1,fp);
         fwrite(&sigma[i][j],sizeof(double),1,fp);
         fwrite(&cut_lj[i][j],sizeof(double),1,fp);
 	    }
     }
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairLJCutCoulDSF::read_restart(FILE *fp)
 {
   read_restart_settings(fp);
   allocate();
 
   int i,j;
   int me = comm->me;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
       MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
       if (setflag[i][j]) {
         if (me == 0) {
           fread(&epsilon[i][j],sizeof(double),1,fp);
           fread(&sigma[i][j],sizeof(double),1,fp);
           fread(&cut_lj[i][j],sizeof(double),1,fp);
         }
         MPI_Bcast(&epsilon[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&sigma[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&cut_lj[i][j],1,MPI_DOUBLE,0,world);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairLJCutCoulDSF::write_restart_settings(FILE *fp)
 {
   fwrite(&alpha,sizeof(double),1,fp);
   fwrite(&cut_lj_global,sizeof(double),1,fp);
   fwrite(&cut_coul,sizeof(double),1,fp);
   fwrite(&offset_flag,sizeof(int),1,fp);
   fwrite(&mix_flag,sizeof(int),1,fp);
   fwrite(&tail_flag,sizeof(int),1,fp);
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairLJCutCoulDSF::read_restart_settings(FILE *fp)
 {
   if (comm->me == 0) {
     fread(&alpha,sizeof(double),1,fp);
     fread(&cut_lj_global,sizeof(double),1,fp);
     fread(&cut_coul,sizeof(double),1,fp);
     fread(&offset_flag,sizeof(int),1,fp);
     fread(&mix_flag,sizeof(int),1,fp);
     fread(&tail_flag,sizeof(int),1,fp);
   }
   MPI_Bcast(&alpha,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&cut_lj_global,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&cut_coul,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&offset_flag,1,MPI_INT,0,world);
   MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
   MPI_Bcast(&tail_flag,1,MPI_INT,0,world);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairLJCutCoulDSF::single(int i, int j, int itype, int jtype, double rsq,
                                 double factor_coul, double factor_lj,
                                 double &fforce)
 {
   double r2inv,r6inv,r,erfcc,erfcd,prefactor;
   double forcecoul,forcelj,phicoul,philj;
   
   r2inv = 1.0/rsq;
   if (rsq < cut_ljsq[itype][jtype]) {
     r6inv = r2inv*r2inv*r2inv;
     forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
   } else forcelj = 0.0;
 
   if (rsq < cut_coulsq) {
     r = sqrt(rsq);
     prefactor = factor_coul * force->qqrd2e * atom->q[i]*atom->q[j]/r;
     erfcc = erfc(alpha*r); 
     erfcd = exp(-alpha*alpha*r*r);
     forcecoul = prefactor * (erfcc/r + 2.0*alpha/MY_PIS * erfcd + 
       r*f_shift) * r;
   } else forcecoul = 0.0;
   
   fforce = (forcecoul + factor_lj*forcelj) * r2inv;
       
   double eng = 0.0;
   if (rsq < cut_ljsq[itype][jtype]) {
     philj = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) -
       offset[itype][jtype];
     eng += factor_lj*philj;
   }
 
   if (rsq < cut_coulsq) { 
     phicoul = prefactor * (erfcc - r*e_shift - rsq*f_shift);
     eng += phicoul;
   } 
   
   return eng;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void *PairLJCutCoulDSF::extract(const char *str, int &dim)
 {
   if (strcmp(str,"cut_coul") == 0) {
     dim = 0;
     return (void *) &cut_coul;
   }
   return NULL;
 }
diff --git a/src/pair_lj_gromacs_coul_gromacs.cpp b/src/pair_lj_gromacs_coul_gromacs.cpp
index fb7e36f92..5a78c4e14 100644
--- a/src/pair_lj_gromacs_coul_gromacs.cpp
+++ b/src/pair_lj_gromacs_coul_gromacs.cpp
@@ -1,509 +1,509 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Mark Stevens (SNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_lj_gromacs_coul_gromacs.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
 PairLJGromacsCoulGromacs::PairLJGromacsCoulGromacs(LAMMPS *lmp) : Pair(lmp)
 {
   writedata = 1;
 }
 
 /* ---------------------------------------------------------------------- */
 
 PairLJGromacsCoulGromacs::~PairLJGromacsCoulGromacs()
 {
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
 
     memory->destroy(epsilon);
     memory->destroy(sigma);
     memory->destroy(lj1);
     memory->destroy(lj2);
     memory->destroy(lj3);
     memory->destroy(lj4);
     memory->destroy(ljsw1);
     memory->destroy(ljsw2);
     memory->destroy(ljsw3);
     memory->destroy(ljsw4);
     memory->destroy(ljsw5);
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJGromacsCoulGromacs::compute(int eflag, int vflag)
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
   double rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
   double r,tlj,tc,fswitch,fswitchcoul,eswitch,ecoulswitch;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   evdwl = ecoul = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   double **x = atom->x;
   double **f = atom->f;
   double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
   int newton_pair = force->newton_pair;
   double qqrd2e = force->qqrd2e;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
 
       if (rsq < cut_bothsq) {
         r2inv = 1.0/rsq;
 
         // skip if qi or qj = 0.0 since this potential may be used as
         // coarse-grain model with many uncharged atoms
 
         if (rsq < cut_coulsq && qtmp != 0.0 && q[j] != 0.0) {
           forcecoul = qqrd2e * qtmp*q[j]*sqrt(r2inv);
           if (rsq > cut_coul_innersq) {
             r = sqrt(rsq);
             tc = r - cut_coul_inner;
             fswitchcoul = qqrd2e * qtmp*q[j]*r*tc*tc*(coulsw1 + coulsw2*tc);
             forcecoul += fswitchcoul;
           }
         } else forcecoul = 0.0;
 
         if (rsq < cut_ljsq) {
           r6inv = r2inv*r2inv*r2inv;
           jtype = type[j];
           forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
           if (rsq > cut_lj_innersq) {
             r = sqrt(rsq);
             tlj = r - cut_lj_inner;
             fswitch = r*tlj*tlj*(ljsw1[itype][jtype] +
                                  ljsw2[itype][jtype]*tlj);
             forcelj += fswitch;
           }
         } else forcelj = 0.0;
 
         fpair = (factor_coul*forcecoul + factor_lj*forcelj) * r2inv;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
         if (newton_pair || j < nlocal) {
           f[j][0] -= delx*fpair;
           f[j][1] -= dely*fpair;
           f[j][2] -= delz*fpair;
         }
 
         if (eflag) {
           if (rsq < cut_coulsq) {
             ecoul = qqrd2e * qtmp*q[j] * (sqrt(r2inv) - coulsw5);
             if (rsq > cut_coul_innersq) {
               ecoulswitch = tc*tc*tc * (coulsw3 + coulsw4*tc);
               ecoul += qqrd2e*qtmp*q[j]*ecoulswitch;
             }
             ecoul *= factor_coul;
           } else ecoul = 0.0;
           if (rsq < cut_ljsq) {
             evdwl = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]);
             evdwl += ljsw5[itype][jtype];
             if (rsq > cut_lj_innersq) {
               eswitch = tlj*tlj*tlj *
                 (ljsw3[itype][jtype] + ljsw4[itype][jtype]*tlj);
               evdwl += eswitch;
             }
             evdwl *= factor_lj;
           } else evdwl = 0.0;
         }
 
         if (evflag) ev_tally(i,j,nlocal,newton_pair,
                              evdwl,ecoul,fpair,delx,dely,delz);
       }
     }
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairLJGromacsCoulGromacs::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
   memory->create(epsilon,n+1,n+1,"pair:epsilon");
   memory->create(sigma,n+1,n+1,"pair:sigma");
   memory->create(lj1,n+1,n+1,"pair:lj1");
   memory->create(lj2,n+1,n+1,"pair:lj2");
   memory->create(lj3,n+1,n+1,"pair:lj3");
   memory->create(lj4,n+1,n+1,"pair:lj4");
   memory->create(ljsw1,n+1,n+1,"pair:ljsw1");
   memory->create(ljsw2,n+1,n+1,"pair:ljsw2");
   memory->create(ljsw3,n+1,n+1,"pair:ljsw3");
   memory->create(ljsw4,n+1,n+1,"pair:ljsw4");
   memory->create(ljsw5,n+1,n+1,"pair:ljsw5");
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairLJGromacsCoulGromacs::settings(int narg, char **arg)
 {
   if (narg != 2 && narg != 4)
     error->all(FLERR,"Illegal pair_style command");
 
   cut_lj_inner = force->numeric(FLERR,arg[0]);
   cut_lj = force->numeric(FLERR,arg[1]);
   if (narg == 2) {
     cut_coul_inner = cut_lj_inner;
     cut_coul = cut_lj;
   } else {
     cut_coul_inner = force->numeric(FLERR,arg[2]);
     cut_coul = force->numeric(FLERR,arg[3]);
   }
 
   if (cut_lj_inner <= 0.0 || cut_coul_inner < 0.0)
     error->all(FLERR,"Illegal pair_style command");
   if (cut_lj_inner > cut_lj || cut_coul_inner > cut_coul)
     error->all(FLERR,"Illegal pair_style command");
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairLJGromacsCoulGromacs::coeff(int narg, char **arg)
 {
   if (narg != 4) error->all(FLERR,"Incorrect args for pair coefficients");
   if (!allocated) allocate();
 
   int ilo,ihi,jlo,jhi;
   force->bounds(arg[0],atom->ntypes,ilo,ihi);
   force->bounds(arg[1],atom->ntypes,jlo,jhi);
 
   double epsilon_one = force->numeric(FLERR,arg[2]);
   double sigma_one = force->numeric(FLERR,arg[3]);
 
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
     for (int j = MAX(jlo,i); j <= jhi; j++) {
       epsilon[i][j] = epsilon_one;
       sigma[i][j] = sigma_one;
       setflag[i][j] = 1;
       count++;
     }
   }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairLJGromacsCoulGromacs::init_style()
 {
   if (!atom->q_flag)
     error->all(FLERR,
                "Pair style lj/gromacs/coul/gromacs requires atom attribute q");
 
-  neighbor->request(this);
+  neighbor->request(this,instance_me);
 
   cut_lj_innersq = cut_lj_inner * cut_lj_inner;
   cut_ljsq = cut_lj * cut_lj;
   cut_coul_innersq = cut_coul_inner * cut_coul_inner;
   cut_coulsq = cut_coul * cut_coul;
   cut_bothsq = MAX(cut_ljsq,cut_coulsq);
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairLJGromacsCoulGromacs::init_one(int i, int j)
 {
   if (setflag[i][j] == 0) {
     epsilon[i][j] = mix_energy(epsilon[i][i],epsilon[j][j],
                                sigma[i][i],sigma[j][j]);
     sigma[i][j] = mix_distance(sigma[i][i],sigma[j][j]);
   }
 
   double cut = MAX(cut_lj,cut_coul);
 
   lj1[i][j] = 48.0 * epsilon[i][j] * pow(sigma[i][j],12.0);
   lj2[i][j] = 24.0 * epsilon[i][j] * pow(sigma[i][j],6.0);
   lj3[i][j] = 4.0 * epsilon[i][j] * pow(sigma[i][j],12.0);
   lj4[i][j] = 4.0 * epsilon[i][j] * pow(sigma[i][j],6.0);
 
   double r6inv = 1.0/pow(cut_lj,6.0);
   double r8inv = 1.0/pow(cut_lj,8.0);
   double t = cut_lj - cut_lj_inner;
   double t2inv = 1.0/(t*t);
   double t3inv = t2inv/t;
   double t3 = 1.0/t3inv;
   double a6 = (7.0*cut_lj_inner - 10.0*cut_lj)*r8inv*t2inv;
   double b6 = (9.0*cut_lj -  7.0*cut_lj_inner)*r8inv*t3inv;
   double a12 = (13.0*cut_lj_inner - 16.0*cut_lj)*r6inv*r8inv*t2inv;
   double b12 = (15.0*cut_lj - 13.0*cut_lj_inner)*r6inv*r8inv*t3inv;
   double c6 = r6inv - t3*(6.0*a6/3.0 + 6.0*b6*t/4.0);
   double c12 = r6inv*r6inv - t3*(12.0*a12/3.0 + 12.0*b12*t/4.0);
 
   ljsw1[i][j] = lj1[i][j]*a12 - lj2[i][j]*a6;
   ljsw2[i][j] = lj1[i][j]*b12 - lj2[i][j]*b6;
   ljsw3[i][j] = -lj3[i][j]*12.0*a12/3.0 + lj4[i][j]*6.0*a6/3.0;
   ljsw4[i][j] = -lj3[i][j]*12.0*b12/4.0 + lj4[i][j]*6.0*b6/4.0;
   ljsw5[i][j] = -lj3[i][j]*c12 + lj4[i][j]*c6;
 
   double r3inv = 1.0/pow(cut_coul,3.0);
   t = cut_coul - cut_coul_inner;
   t2inv = 1.0/(t*t);
   t3inv = t2inv/t;
   double a1 = (2.0*cut_coul_inner - 5.0*cut_coul) * r3inv*t2inv;
   double b1 = (4.0*cut_coul - 2.0*cut_coul_inner) * r3inv*t3inv;
   coulsw1 = a1;
   coulsw2 = b1;
   coulsw3 = -a1/3.0;
   coulsw4 = -b1/4.0;
   coulsw5 = 1.0/cut_coul - t*t*t*(a1/3.0 + b1*t/4.0);
 
   lj1[j][i] = lj1[i][j];
   lj2[j][i] = lj2[i][j];
   lj3[j][i] = lj3[i][j];
   lj4[j][i] = lj4[i][j];
   ljsw1[j][i] = ljsw1[i][j];
   ljsw2[j][i] = ljsw2[i][j];
   ljsw3[j][i] = ljsw3[i][j];
   ljsw4[j][i] = ljsw4[i][j];
   ljsw5[j][i] = ljsw5[i][j];
 
   return cut;
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairLJGromacsCoulGromacs::write_restart(FILE *fp)
 {
   write_restart_settings(fp);
 
   int i,j;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       fwrite(&setflag[i][j],sizeof(int),1,fp);
       if (setflag[i][j]) {
         fwrite(&epsilon[i][j],sizeof(double),1,fp);
         fwrite(&sigma[i][j],sizeof(double),1,fp);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairLJGromacsCoulGromacs::read_restart(FILE *fp)
 {
   read_restart_settings(fp);
   allocate();
 
   int i,j;
   int me = comm->me;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
       MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
       if (setflag[i][j]) {
         if (me == 0) {
           fread(&epsilon[i][j],sizeof(double),1,fp);
           fread(&sigma[i][j],sizeof(double),1,fp);
         }
         MPI_Bcast(&epsilon[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&sigma[i][j],1,MPI_DOUBLE,0,world);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
   proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairLJGromacsCoulGromacs::write_restart_settings(FILE *fp)
 {
   fwrite(&cut_lj_inner,sizeof(double),1,fp);
   fwrite(&cut_lj,sizeof(double),1,fp);
   fwrite(&cut_coul_inner,sizeof(double),1,fp);
   fwrite(&cut_coul,sizeof(double),1,fp);
   fwrite(&offset_flag,sizeof(int),1,fp);
   fwrite(&mix_flag,sizeof(int),1,fp);
 }
 
 /* ----------------------------------------------------------------------
   proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairLJGromacsCoulGromacs::read_restart_settings(FILE *fp)
 {
   if (comm->me == 0) {
     fread(&cut_lj_inner,sizeof(double),1,fp);
     fread(&cut_lj,sizeof(double),1,fp);
     fread(&cut_coul_inner,sizeof(double),1,fp);
     fread(&cut_coul,sizeof(double),1,fp);
     fread(&offset_flag,sizeof(int),1,fp);
     fread(&mix_flag,sizeof(int),1,fp);
   }
   MPI_Bcast(&cut_lj_inner,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&cut_lj,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&cut_coul_inner,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&cut_coul,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&offset_flag,1,MPI_INT,0,world);
   MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes to data file
 ------------------------------------------------------------------------- */
 
 void PairLJGromacsCoulGromacs::write_data(FILE *fp)
 {
   for (int i = 1; i <= atom->ntypes; i++)
     fprintf(fp,"%d %g %g\n",i,epsilon[i][i],sigma[i][i]);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes all pairs to data file
 ------------------------------------------------------------------------- */
 
 void PairLJGromacsCoulGromacs::write_data_all(FILE *fp)
 {
   for (int i = 1; i <= atom->ntypes; i++)
     for (int j = i; j <= atom->ntypes; j++)
       fprintf(fp,"%d %d %g %g\n",i,j,epsilon[i][j],sigma[i][j]);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairLJGromacsCoulGromacs::single(int i, int j, int itype, int jtype,
                                 double rsq,
                                 double factor_coul, double factor_lj,
                                 double &fforce)
 {
   double r2inv,r6inv,forcecoul,forcelj,phicoul,philj;
   double r,tlj,tc,fswitch,phiswitch,fswitchcoul,phiswitchcoul;
 
   r2inv = 1.0/rsq;
   if (rsq < cut_coulsq) {
     forcecoul = force->qqrd2e * atom->q[i]*atom->q[j]*sqrt(r2inv);
     if (rsq > cut_coul_innersq) {
       r = sqrt(rsq);
       tc = r - cut_coul_inner;
       fswitchcoul =  force->qqrd2e *
         atom->q[i]*atom->q[j] * r*tc*tc * (coulsw1 + coulsw2*tc);
       forcecoul += fswitchcoul;
     }
   } else forcecoul = 0.0;
 
   if (rsq < cut_ljsq) {
     r6inv = r2inv*r2inv*r2inv;
     forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
     if (rsq > cut_lj_innersq) {
       r = sqrt(rsq);
       tlj = r - cut_lj_inner;
       fswitch = r*tlj*tlj*(ljsw1[itype][jtype] + ljsw2[itype][jtype]*tlj);
       forcelj += fswitch;
     }
   } else forcelj = 0.0;
 
   fforce = (factor_coul*forcecoul + factor_lj*forcelj) * r2inv;
 
   double eng = 0.0;
   if (rsq < cut_coulsq) {
     phicoul = force->qqrd2e * atom->q[i]*atom->q[j] * (sqrt(r2inv)-coulsw5);
     if (rsq > cut_coul_innersq) {
       phiswitchcoul = force->qqrd2e * atom->q[i]*atom->q[j] *
         tc*tc*tc * (coulsw3 + coulsw4*tc);
       phicoul += phiswitchcoul;
     }
     eng += factor_coul*phicoul;
   }
 
   if (rsq < cut_ljsq) {
     philj = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]);
     philj += ljsw5[itype][jtype];
     if (rsq > cut_lj_innersq) {
       phiswitch = tlj*tlj*tlj *
         (ljsw3[itype][jtype] + ljsw4[itype][jtype]*tlj);
       philj += phiswitch;
     }
     eng += factor_lj*philj;
   }
 
   return eng;
 }
diff --git a/src/pair_mie_cut.cpp b/src/pair_mie_cut.cpp
index 7657388d1..0f7cb5eab 100644
--- a/src/pair_mie_cut.cpp
+++ b/src/pair_mie_cut.cpp
@@ -1,743 +1,743 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Cassiano Aimoli (aimoli@gmail.com)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_mie_cut.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "update.h"
 #include "integrate.h"
 #include "respa.h"
 #include "math_const.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
 /* ---------------------------------------------------------------------- */
 
 PairMIECut::PairMIECut(LAMMPS *lmp) : Pair(lmp)
 {
   respa_enable = 1;
 }
 
 /* ---------------------------------------------------------------------- */
 
 PairMIECut::~PairMIECut()
 {
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
 
     memory->destroy(cut);
     memory->destroy(epsilon);
     memory->destroy(sigma);
     memory->destroy(gamR);
     memory->destroy(gamA);
     memory->destroy(Cmie);
     memory->destroy(mie1);
     memory->destroy(mie2);
     memory->destroy(mie3);
     memory->destroy(mie4);
     memory->destroy(offset);
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairMIECut::compute(int eflag, int vflag)
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
   double rsq,r2inv,rgamR,rgamA,forcemie,factor_mie;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   evdwl = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   double **x = atom->x;
   double **f = atom->f;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_mie = force->special_lj;
   int newton_pair = force->newton_pair;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_mie = special_mie[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         r2inv = 1.0/rsq;
         rgamA = pow(r2inv,(gamA[itype][jtype]/2.0));
         rgamR = pow(r2inv,(gamR[itype][jtype]/2.0));
         forcemie =  (mie1[itype][jtype]*rgamR - mie2[itype][jtype]*rgamA);
         fpair = factor_mie*forcemie*r2inv;
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
         if (newton_pair || j < nlocal) {
           f[j][0] -= delx*fpair;
           f[j][1] -= dely*fpair;
           f[j][2] -= delz*fpair;
         }
 
         if (eflag) {
           evdwl = (mie3[itype][jtype]*rgamR - mie4[itype][jtype]*rgamA) -
             offset[itype][jtype];
           evdwl *= factor_mie;
         }
 
         if (evflag) ev_tally(i,j,nlocal,newton_pair,
                              evdwl,0.0,fpair,delx,dely,delz);
       }
     }
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairMIECut::compute_inner()
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,fpair;
   double rsq,r2inv,rgamR,rgamA,forcemie,factor_mie,rsw;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   double **x = atom->x;
   double **f = atom->f;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_mie = force->special_lj;
   int newton_pair = force->newton_pair;
 
   inum = listinner->inum;
   ilist = listinner->ilist;
   numneigh = listinner->numneigh;
   firstneigh = listinner->firstneigh;
 
   double cut_out_on = cut_respa[0];
   double cut_out_off = cut_respa[1];
 
   double cut_out_diff = cut_out_off - cut_out_on;
   double cut_out_on_sq = cut_out_on*cut_out_on;
   double cut_out_off_sq = cut_out_off*cut_out_off;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_mie = special_mie[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
 
       if (rsq < cut_out_off_sq) {
         jtype = type[j];
         r2inv = 1.0/rsq;
         rgamA = pow(r2inv,(gamA[itype][jtype]/2.0));
         rgamR = pow(r2inv,(gamR[itype][jtype]/2.0));
         forcemie =  (mie1[itype][jtype]*rgamR - mie2[itype][jtype]*rgamA);
         fpair = factor_mie*forcemie*r2inv;
         if (rsq > cut_out_on_sq) {
           rsw = (sqrt(rsq) - cut_out_on)/cut_out_diff;
           fpair *= 1.0 - rsw*rsw*(3.0 - 2.0*rsw);
         }
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
         if (newton_pair || j < nlocal) {
           f[j][0] -= delx*fpair;
           f[j][1] -= dely*fpair;
           f[j][2] -= delz*fpair;
         }
       }
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairMIECut::compute_middle()
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,fpair;
   double rsq,r2inv,rgamR,rgamA,forcemie,factor_mie,rsw;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   double **x = atom->x;
   double **f = atom->f;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_mie = force->special_lj;
   int newton_pair = force->newton_pair;
 
   inum = listmiddle->inum;
   ilist = listmiddle->ilist;
   numneigh = listmiddle->numneigh;
   firstneigh = listmiddle->firstneigh;
 
   double cut_in_off = cut_respa[0];
   double cut_in_on = cut_respa[1];
   double cut_out_on = cut_respa[2];
   double cut_out_off = cut_respa[3];
 
   double cut_in_diff = cut_in_on - cut_in_off;
   double cut_out_diff = cut_out_off - cut_out_on;
   double cut_in_off_sq = cut_in_off*cut_in_off;
   double cut_in_on_sq = cut_in_on*cut_in_on;
   double cut_out_on_sq = cut_out_on*cut_out_on;
   double cut_out_off_sq = cut_out_off*cut_out_off;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_mie = special_mie[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
 
       if (rsq < cut_out_off_sq && rsq > cut_in_off_sq) {
         jtype = type[j];
         r2inv = 1.0/rsq;
         rgamA = pow(r2inv,(gamA[itype][jtype]/2.0));
         rgamR = pow(r2inv,(gamR[itype][jtype]/2.0));
         forcemie =  (mie1[itype][jtype]*rgamR - mie2[itype][jtype]*rgamA);
         fpair = factor_mie*forcemie*r2inv;
         if (rsq < cut_in_on_sq) {
           rsw = (sqrt(rsq) - cut_in_off)/cut_in_diff;
           fpair *= rsw*rsw*(3.0 - 2.0*rsw);
         }
         if (rsq > cut_out_on_sq) {
           rsw = (sqrt(rsq) - cut_out_on)/cut_out_diff;
           fpair *= 1.0 + rsw*rsw*(2.0*rsw - 3.0);
         }
 
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
         if (newton_pair || j < nlocal) {
           f[j][0] -= delx*fpair;
           f[j][1] -= dely*fpair;
           f[j][2] -= delz*fpair;
         }
       }
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairMIECut::compute_outer(int eflag, int vflag)
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
   double rsq,r2inv,rgamR,rgamA,forcemie,factor_mie,rsw;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   evdwl = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = 0;
 
   double **x = atom->x;
   double **f = atom->f;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_mie = force->special_lj;
   int newton_pair = force->newton_pair;
 
   inum = listouter->inum;
   ilist = listouter->ilist;
   numneigh = listouter->numneigh;
   firstneigh = listouter->firstneigh;
 
   double cut_in_off = cut_respa[2];
   double cut_in_on = cut_respa[3];
 
   double cut_in_diff = cut_in_on - cut_in_off;
   double cut_in_off_sq = cut_in_off*cut_in_off;
   double cut_in_on_sq = cut_in_on*cut_in_on;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_mie = special_mie[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
         if (rsq > cut_in_off_sq) {
         r2inv = 1.0/rsq;
         rgamA = pow(r2inv,(gamA[itype][jtype]/2.0));
         rgamR = pow(r2inv,(gamR[itype][jtype]/2.0));
         forcemie =  (mie1[itype][jtype]*rgamR - mie2[itype][jtype]*rgamA);
           fpair = factor_mie*forcemie*r2inv;
           if (rsq < cut_in_on_sq) {
             rsw = (sqrt(rsq) - cut_in_off)/cut_in_diff;
             fpair *= rsw*rsw*(3.0 - 2.0*rsw);
           }
 
           f[i][0] += delx*fpair;
           f[i][1] += dely*fpair;
           f[i][2] += delz*fpair;
           if (newton_pair || j < nlocal) {
             f[j][0] -= delx*fpair;
             f[j][1] -= dely*fpair;
             f[j][2] -= delz*fpair;
           }
         }
 
         if (eflag) {
           r2inv = 1.0/rsq;
         rgamA = pow(r2inv,(gamA[itype][jtype]/2.0));
         rgamR = pow(r2inv,(gamR[itype][jtype]/2.0));
           evdwl = (mie3[itype][jtype]*rgamR - mie4[itype][jtype]*rgamA) - 
             offset[itype][jtype];
           evdwl *= factor_mie;
         }
 
         if (vflag) {
           if (rsq <= cut_in_off_sq) {
         r2inv = 1.0/rsq;
         rgamA = pow(r2inv,(gamA[itype][jtype]/2.0));
         rgamR = pow(r2inv,(gamR[itype][jtype]/2.0));
         forcemie =  (mie1[itype][jtype]*rgamR - mie2[itype][jtype]*rgamA);
             fpair = factor_mie*forcemie*r2inv;
           } else if (rsq < cut_in_on_sq)
             fpair = factor_mie*forcemie*r2inv;
         }
 
         if (evflag) ev_tally(i,j,nlocal,newton_pair,
                              evdwl,0.0,fpair,delx,dely,delz);
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairMIECut::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
   memory->create(cut,n+1,n+1,"pair:cut");
   memory->create(epsilon,n+1,n+1,"pair:epsilon");
   memory->create(sigma,n+1,n+1,"pair:sigma");
   memory->create(gamR,n+1,n+1,"pair:gamR");
   memory->create(gamA,n+1,n+1,"pair:gamA");
   memory->create(Cmie,n+1,n+1,"pair:Cmie");
   memory->create(mie1,n+1,n+1,"pair:mie1");
   memory->create(mie2,n+1,n+1,"pair:mie2");
   memory->create(mie3,n+1,n+1,"pair:mie3");
   memory->create(mie4,n+1,n+1,"pair:mie4");
   memory->create(offset,n+1,n+1,"pair:offset");
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairMIECut::settings(int narg, char **arg)
 {
   if (narg != 1) error->all(FLERR,"Illegal pair_style command");
 
   cut_global = force->numeric(FLERR,arg[0]);
 
   // reset cutoffs that have been explicitly set
 
   if (allocated) {
     int i,j;
     for (i = 1; i <= atom->ntypes; i++)
       for (j = i+1; j <= atom->ntypes; j++)
         if (setflag[i][j]) cut[i][j] = cut_global;
   }
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairMIECut::coeff(int narg, char **arg)
 {
   if (narg < 6 || narg > 7)
     error->all(FLERR,"Incorrect args for pair coefficients");
   if (!allocated) allocate();
 
   int ilo,ihi,jlo,jhi;
   force->bounds(arg[0],atom->ntypes,ilo,ihi);
   force->bounds(arg[1],atom->ntypes,jlo,jhi);
 
   double epsilon_one = force->numeric(FLERR,arg[2]);
   double sigma_one = force->numeric(FLERR,arg[3]);
   double gamR_one = force->numeric(FLERR,arg[4]);
   double gamA_one = force->numeric(FLERR,arg[5]);
 
   double cut_one = cut_global;
   if (narg == 7) cut_one = force->numeric(FLERR,arg[6]);
 
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
     for (int j = MAX(jlo,i); j <= jhi; j++) {
       epsilon[i][j] = epsilon_one;
       sigma[i][j] = sigma_one;
       gamR[i][j] = gamR_one;
       gamA[i][j] = gamA_one;
       cut[i][j] = cut_one;
       setflag[i][j] = 1;
       count++;
     }
   }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairMIECut::init_style()
 {
   // request regular or rRESPA neighbor lists
 
   int irequest;
 
   if (update->whichflag == 1 && strstr(update->integrate_style,"respa")) {
     int respa = 0;
     if (((Respa *) update->integrate)->level_inner >= 0) respa = 1;
     if (((Respa *) update->integrate)->level_middle >= 0) respa = 2;
 
-    if (respa == 0) irequest = neighbor->request(this);
+    if (respa == 0) irequest = neighbor->request(this,instance_me);
     else if (respa == 1) {
-      irequest = neighbor->request(this);
+      irequest = neighbor->request(this,instance_me);
       neighbor->requests[irequest]->id = 1;
       neighbor->requests[irequest]->half = 0;
       neighbor->requests[irequest]->respainner = 1;
-      irequest = neighbor->request(this);
+      irequest = neighbor->request(this,instance_me);
       neighbor->requests[irequest]->id = 3;
       neighbor->requests[irequest]->half = 0;
       neighbor->requests[irequest]->respaouter = 1;
     } else {
-      irequest = neighbor->request(this);
+      irequest = neighbor->request(this,instance_me);
       neighbor->requests[irequest]->id = 1;
       neighbor->requests[irequest]->half = 0;
       neighbor->requests[irequest]->respainner = 1;
-      irequest = neighbor->request(this);
+      irequest = neighbor->request(this,instance_me);
       neighbor->requests[irequest]->id = 2;
       neighbor->requests[irequest]->half = 0;
       neighbor->requests[irequest]->respamiddle = 1;
-      irequest = neighbor->request(this);
+      irequest = neighbor->request(this,instance_me);
       neighbor->requests[irequest]->id = 3;
       neighbor->requests[irequest]->half = 0;
       neighbor->requests[irequest]->respaouter = 1;
     }
 
-  } else irequest = neighbor->request(this);
+  } else irequest = neighbor->request(this,instance_me);
 
   // set rRESPA cutoffs
 
   if (strstr(update->integrate_style,"respa") &&
       ((Respa *) update->integrate)->level_inner >= 0)
     cut_respa = ((Respa *) update->integrate)->cutoff;
   else cut_respa = NULL;
 }
 
 /* ----------------------------------------------------------------------
    neighbor callback to inform pair style of neighbor list to use
    regular or rRESPA
 ------------------------------------------------------------------------- */
 
 void PairMIECut::init_list(int id, NeighList *ptr)
 {
   if (id == 0) list = ptr;
   else if (id == 1) listinner = ptr;
   else if (id == 2) listmiddle = ptr;
   else if (id == 3) listouter = ptr;
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairMIECut::init_one(int i, int j)
 {
   if (setflag[i][j] == 0) {
     epsilon[i][j] = mix_energy(epsilon[i][i],epsilon[j][j],
                                sigma[i][i],sigma[j][j]);
     sigma[i][j] = mix_distance(sigma[i][i],sigma[j][j]);
     gamR[i][j] = mix_distance(gamR[i][i],gamR[j][j]);
     gamA[i][j] = mix_distance(gamA[i][i],gamA[j][j]);
     cut[i][j] = mix_distance(cut[i][i],cut[j][j]);
   }
   
   gamA[j][i] = gamA[i][j];
   gamR[j][i] = gamR[i][j];
   Cmie[i][j] = (gamR[i][j]/(gamR[i][j]-gamA[i][j]) * 
                 pow((gamR[i][j]/gamA[i][j]),
                     (gamA[i][j]/(gamR[i][j]-gamA[i][j]))));
   mie1[i][j] = Cmie[i][j] * gamR[i][j]* epsilon[i][j] * 
     pow(sigma[i][j],gamR[i][j]);
   mie2[i][j] = Cmie[i][j] * gamA[i][j] * epsilon[i][j] * 
     pow(sigma[i][j],gamA[i][j]);
   mie3[i][j] = Cmie[i][j] * epsilon[i][j] * pow(sigma[i][j],gamR[i][j]);
   mie4[i][j] = Cmie[i][j] * epsilon[i][j] * pow(sigma[i][j],gamA[i][j]);
 
   if (offset_flag) {
     double ratio = sigma[i][j] / cut[i][j];
     offset[i][j] = Cmie[i][j] * epsilon[i][j] * 
       (pow(ratio,gamR[i][j]) - pow(ratio,gamA[i][j]));
   } else offset[i][j] = 0.0;
 
   mie1[j][i] = mie1[i][j];
   mie2[j][i] = mie2[i][j];
   mie3[j][i] = mie3[i][j];
   mie4[j][i] = mie4[i][j];
   offset[j][i] = offset[i][j];
 
   // check interior rRESPA cutoff
 
   if (cut_respa && cut[i][j] < cut_respa[3])
     error->all(FLERR,"Pair cutoff < Respa interior cutoff");
 
   // compute I,J contribution to long-range tail correction
   // count total # of atoms of type I and J via Allreduce
 
   if (tail_flag) {
     int *type = atom->type;
     int nlocal = atom->nlocal;
 
     double count[2],all[2];
     count[0] = count[1] = 0.0;
     for (int k = 0; k < nlocal; k++) {
       if (type[k] == i) count[0] += 1.0;
       if (type[k] == j) count[1] += 1.0;
     }
     MPI_Allreduce(count,all,2,MPI_DOUBLE,MPI_SUM,world);
 
     double siggamA = pow(sigma[i][j],gamA[i][j]);
     double siggamR = pow(sigma[i][j],gamR[i][j]);
     double rcgamA = pow(cut[i][j],(gamA[i][j]-3.0));
     double rcgamR = pow(cut[i][j],(gamR[i][j]-3.0));
     etail_ij = Cmie[i][j]*2.0*MY_PI*all[0]*all[1]*epsilon[i][j]*
       (siggamR/((gamR[i][j]-3.0)*rcgamR)-siggamA/((gamA[i][j]-3.0)*rcgamA));
     ptail_ij = Cmie[i][j]*2.0*MY_PI*all[0]*all[1]*epsilon[i][j]/3.0*
       ((gamR[i][j]/(gamR[i][j]-3.0))*siggamR/rcgamR-
        (gamA[i][j]/(gamA[i][j]-3.0))*siggamA/rcgamA);
   }
 
   return cut[i][j];
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairMIECut::write_restart(FILE *fp)
 {
   write_restart_settings(fp);
 
   int i,j;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       fwrite(&setflag[i][j],sizeof(int),1,fp);
       if (setflag[i][j]) {
         fwrite(&epsilon[i][j],sizeof(double),1,fp);
         fwrite(&sigma[i][j],sizeof(double),1,fp);
         fwrite(&gamR[i][j],sizeof(double),1,fp);
         fwrite(&gamA[i][j],sizeof(double),1,fp);
         fwrite(&cut[i][j],sizeof(double),1,fp);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
    proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairMIECut::read_restart(FILE *fp)
 {
   read_restart_settings(fp);
   allocate();
 
   int i,j;
   int me = comm->me;
   for (i = 1; i <= atom->ntypes; i++)
     for (j = i; j <= atom->ntypes; j++) {
       if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp);
       MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world);
       if (setflag[i][j]) {
         if (me == 0) {
           fread(&epsilon[i][j],sizeof(double),1,fp);
           fread(&sigma[i][j],sizeof(double),1,fp);
           fread(&gamR[i][j],sizeof(double),1,fp);
           fread(&gamA[i][j],sizeof(double),1,fp);
           fread(&cut[i][j],sizeof(double),1,fp);
         }
         MPI_Bcast(&epsilon[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&sigma[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&gamR[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&gamA[i][j],1,MPI_DOUBLE,0,world);
         MPI_Bcast(&cut[i][j],1,MPI_DOUBLE,0,world);
       }
     }
 }
 
 /* ----------------------------------------------------------------------
    proc 0 writes to restart file
 ------------------------------------------------------------------------- */
 
 void PairMIECut::write_restart_settings(FILE *fp)
 {
   fwrite(&cut_global,sizeof(double),1,fp);
   fwrite(&offset_flag,sizeof(int),1,fp);
   fwrite(&mix_flag,sizeof(int),1,fp);
   fwrite(&tail_flag,sizeof(int),1,fp);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 reads from restart file, bcasts
 ------------------------------------------------------------------------- */
 
 void PairMIECut::read_restart_settings(FILE *fp)
 {
   int me = comm->me;
   if (me == 0) {
     fread(&cut_global,sizeof(double),1,fp);
     fread(&offset_flag,sizeof(int),1,fp);
     fread(&mix_flag,sizeof(int),1,fp);
     fread(&tail_flag,sizeof(int),1,fp);
   }
   MPI_Bcast(&cut_global,1,MPI_DOUBLE,0,world);
   MPI_Bcast(&offset_flag,1,MPI_INT,0,world);
   MPI_Bcast(&mix_flag,1,MPI_INT,0,world);
   MPI_Bcast(&tail_flag,1,MPI_INT,0,world);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairMIECut::single(int i, int j, int itype, int jtype, double rsq,
                            double factor_coul, double factor_mie,
                            double &fforce)
 {
   double r2inv,rgamR,rgamA,forcemie,phimie;
 
   r2inv = 1.0/rsq;
   rgamA = pow(r2inv,(gamA[itype][jtype]/2.0));
   rgamR = pow(r2inv,(gamR[itype][jtype]/2.0));
   forcemie =  (mie1[itype][jtype]*rgamR - mie2[itype][jtype]*rgamA);
   fforce = factor_mie*forcemie*r2inv;
 
   phimie = (mie3[itype][jtype]*rgamR - mie4[itype][jtype]*rgamA) -
     offset[itype][jtype];
   return factor_mie*phimie;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void *PairMIECut::extract(const char *str, int &dim)
 {
   dim = 2;
   if (strcmp(str,"epsilon") == 0) return (void *) epsilon;
   if (strcmp(str,"sigma") == 0) return (void *) sigma;
   if (strcmp(str,"gamR") == 0) return (void *) gamR;
   if (strcmp(str,"gamA") == 0) return (void *) gamA;
   return NULL;
 }
diff --git a/src/pair_zbl.cpp b/src/pair_zbl.cpp
index 0adef4f40..5c2f7c4fc 100644
--- a/src/pair_zbl.cpp
+++ b/src/pair_zbl.cpp
@@ -1,436 +1,436 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing authors: Stephen Foiles, Aidan Thompson (SNL)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
 #include "pair_zbl.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "update.h"
 #include "integrate.h"
 #include "respa.h"
 #include "math_const.h"
 #include "memory.h"
 #include "error.h"
 
 // From J.F. Zeigler, J. P. Biersack and U. Littmark, 
 // "The Stopping and Range of Ions in Matter" volume 1, Pergamon, 1985.
 
 using namespace LAMMPS_NS;
 using namespace MathConst;
 using namespace PairZBLConstants;
 
 /* ---------------------------------------------------------------------- */
 
 PairZBL::PairZBL(LAMMPS *lmp) : Pair(lmp) {}
 
 /* ---------------------------------------------------------------------- */
 
 PairZBL::~PairZBL()
 {
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
 
     memory->destroy(z);
     memory->destroy(d1a);
     memory->destroy(d2a);
     memory->destroy(d3a);
     memory->destroy(d4a);
     memory->destroy(zze);
     memory->destroy(sw1);
     memory->destroy(sw2);
     memory->destroy(sw3);
     memory->destroy(sw4);
     memory->destroy(sw5);
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairZBL::compute(int eflag, int vflag)
 {
   int i,j,ii,jj,inum,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
   double rsq,r,t,fswitch,eswitch;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   evdwl = 0.0;
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   double **x = atom->x;
   double **f = atom->f;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   int newton_pair = force->newton_pair;
 
   inum = list->inum;
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cut_globalsq) {
 	r = sqrt(rsq);
         fpair = dzbldr(r, itype, jtype);
 
 	if (rsq > cut_innersq) {
 	  t = r - cut_inner;
 	  fswitch = t*t * 
 	    (sw1[itype][jtype] + sw2[itype][jtype]*t);
 	  fpair += fswitch;
 	}
 
         fpair *= -1.0/r;
         f[i][0] += delx*fpair;
         f[i][1] += dely*fpair;
         f[i][2] += delz*fpair;
         if (newton_pair || j < nlocal) {
           f[j][0] -= delx*fpair;
           f[j][1] -= dely*fpair;
           f[j][2] -= delz*fpair;
         }
 
         if (eflag) {
           evdwl = e_zbl(r, itype, jtype);
 	  evdwl += sw5[itype][jtype];
 	  if (rsq > cut_innersq) {
 	    eswitch = t*t*t * 
 	      (sw3[itype][jtype] + sw4[itype][jtype]*t);
 	    evdwl += eswitch;
 	  }
         }
 
         if (evflag) ev_tally(i,j,nlocal,newton_pair,
                              evdwl,0.0,fpair,delx,dely,delz);
       }
     }
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairZBL::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
 
   memory->create(z,n+1,"pair:z");
   memory->create(d1a,n+1,n+1,"pair:d1a");
   memory->create(d2a,n+1,n+1,"pair:d2a");
   memory->create(d3a,n+1,n+1,"pair:d3a");
   memory->create(d4a,n+1,n+1,"pair:d4a");
   memory->create(zze,n+1,n+1,"pair:zze");
   memory->create(sw1,n+1,n+1,"pair:sw1");
   memory->create(sw2,n+1,n+1,"pair:sw2");
   memory->create(sw3,n+1,n+1,"pair:sw3");
   memory->create(sw4,n+1,n+1,"pair:sw4");
   memory->create(sw5,n+1,n+1,"pair:sw5");
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairZBL::settings(int narg, char **arg)
 {
   if (narg != 2) error->all(FLERR,"Illegal pair_style command");
 
   cut_inner = force->numeric(FLERR,arg[0]);
   cut_global = force->numeric(FLERR,arg[1]);
 
   if (cut_inner <= 0.0 )
     error->all(FLERR,"Illegal pair_style command");
   if (cut_inner > cut_global)
     error->all(FLERR,"Illegal pair_style command");
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairZBL::coeff(int narg, char **arg)
 {
   if (narg != 3)
     error->all(FLERR,"Incorrect args for pair coefficients");
   if (!allocated) allocate();
 
   int ilo,ihi;
   force->bounds(arg[0],atom->ntypes,ilo,ihi);
 
   int jlo,jhi;
   force->bounds(arg[1],atom->ntypes,jlo,jhi);
 
   double z_one = force->numeric(FLERR,arg[2]);
 
   // set flag for each i-j pair
   // set z-parameter only for i-i pairs
 
   int count = 0;
   for (int i = ilo; i <= ihi; i++) {
     for (int j = MAX(jlo,i); j <= jhi; j++) {
       if (i == j) z[i] = z_one;
       setflag[i][j] = 1;
       count++;
     }
   }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairZBL::init_style()
 {
-  neighbor->request(this);
+  neighbor->request(this,instance_me);
 
   cut_innersq = cut_inner * cut_inner;
   cut_globalsq = cut_global * cut_global;
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairZBL::init_one(int i, int j)
 {
 
   double ainv = (pow(z[i],pzbl) + pow(z[j],pzbl))/(a0*force->angstrom);
   d1a[i][j] = d1*ainv;
   d2a[i][j] = d2*ainv;
   d3a[i][j] = d3*ainv;
   d4a[i][j] = d4*ainv;
   zze[i][j] = z[i]*z[j]*force->qqr2e*force->qelectron*force->qelectron;
 
   d1a[j][i] = d1a[i][j];
   d2a[j][i] = d2a[i][j];
   d3a[j][i] = d3a[i][j];
   d4a[j][i] = d4a[i][j];
   zze[j][i] = zze[i][j];
 
   // e =  t^3 (sw3 + sw4*t) + sw5 
   //   = A/3*t^3 + B/4*t^4 + C
   // sw3 = A/3 
   // sw4 = B/4 
   // sw5 = C
 
   // dedr = t^2 (sw1 + sw2*t) 
   //      = A*t^2 + B*t^3
   // sw1 = A 
   // sw2 = B 
 
   // de2dr2 = 2*A*t + 3*B*t^2
 
   // Require that at t = tc:
   // e = -Fc
   // dedr = -Fc'
   // d2edr2 = -Fc'' 
 
   // Hence:
   // A = (-3Fc' + tc*Fc'')/tc^2
   // B = ( 2Fc' - tc*Fc'')/tc^3
   // C = -Fc + tc/2*Fc' - tc^2/12*Fc''
 
   double tc = cut_global - cut_inner;
   double fc = e_zbl(cut_global, i, j);
   double fcp = dzbldr(cut_global, i, j);
   double fcpp = d2zbldr2(cut_global, i, j);
 
   double swa = (-3.0*fcp + tc*fcpp)/(tc*tc);
   double swb = ( 2.0*fcp - tc*fcpp)/(tc*tc*tc);
   double swc = -fc + (tc/2.0)*fcp - (tc*tc/12.0)*fcpp; 
   
   sw1[i][j] = swa;
   sw2[i][j] = swb;
   sw3[i][j] = swa/3.0;
   sw4[i][j] = swb/4.0;
   sw5[i][j] = swc;
 
   sw1[j][i] = sw1[i][j];
   sw2[j][i] = sw2[i][j];
   sw3[j][i] = sw3[i][j];
   sw4[j][i] = sw4[i][j];
   sw5[j][i] = sw5[i][j];
 
   return cut_global;
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairZBL::single(int i, int j, int itype, int jtype, double rsq,
                          double dummy1, double dummy2,
                          double &fforce)
 {
   double phi,r,t,eswitch,fswitch;
 
   r = sqrt(rsq);
   fforce = dzbldr(r, itype, jtype);
   if (rsq > cut_innersq) {
     t = r - cut_inner;
     fswitch = t*t * 
       (sw1[itype][jtype] + sw2[itype][jtype]*t);
     fforce += fswitch;
   }
   fforce *= -1.0/r;
 
   phi = e_zbl(r, itype, jtype);
   phi += sw5[itype][jtype];
   if (rsq > cut_innersq) {
     eswitch = t*t*t * 
       (sw3[itype][jtype] + sw4[itype][jtype]*t);
     phi += eswitch;
   }
 
   return phi;
 }
 
 /* ----------------------------------------------------------------------
    compute ZBL pair energy
 ------------------------------------------------------------------------- */
 
 double PairZBL::e_zbl(double r, int i, int j) {
   
   double d1aij = d1a[i][j];
   double d2aij = d2a[i][j];
   double d3aij = d3a[i][j];
   double d4aij = d4a[i][j];
   double zzeij = zze[i][j];
   double rinv = 1.0/r;
 
   double sum = c1*exp(-d1aij*r);
   sum += c2*exp(-d2aij*r);
   sum += c3*exp(-d3aij*r);
   sum += c4*exp(-d4aij*r);
 
   double result = zzeij*sum*rinv;
 
   return result;
 };
 
 
 /* ----------------------------------------------------------------------
    compute ZBL first derivative
 ------------------------------------------------------------------------- */
 
 double PairZBL::dzbldr(double r, int i, int j) {
 
   double d1aij = d1a[i][j];
   double d2aij = d2a[i][j];
   double d3aij = d3a[i][j];
   double d4aij = d4a[i][j];
   double zzeij = zze[i][j];
   double rinv = 1.0/r;
 
   double e1 = exp(-d1aij*r);
   double e2 = exp(-d2aij*r);
   double e3 = exp(-d3aij*r);
   double e4 = exp(-d4aij*r);
 
   double sum = c1*e1;
   sum += c2*e2;
   sum += c3*e3;
   sum += c4*e4;
 
   double sum_p = -c1*d1aij*e1;
   sum_p -= c2*d2aij*e2;
   sum_p -= c3*d3aij*e3;
   sum_p -= c4*d4aij*e4;
   
   double result = zzeij*(sum_p - sum*rinv)*rinv;
   
   return result;
 };
 
 /* ----------------------------------------------------------------------
    compute ZBL second derivative
 ------------------------------------------------------------------------- */
 
 double PairZBL::d2zbldr2(double r, int i, int j) {
 
   double d1aij = d1a[i][j];
   double d2aij = d2a[i][j];
   double d3aij = d3a[i][j];
   double d4aij = d4a[i][j];
   double zzeij = zze[i][j];
   double rinv = 1.0/r;
 
   double e1 = exp(-d1aij*r);
   double e2 = exp(-d2aij*r);
   double e3 = exp(-d3aij*r);
   double e4 = exp(-d4aij*r);
 
   double sum = c1*e1;
   sum += c2*e2;
   sum += c3*e3;
   sum += c4*e4;
 
   double sum_p = c1*e1*d1aij;
   sum_p += c2*e2*d2aij;
   sum_p += c3*e3*d3aij;
   sum_p += c4*e4*d4aij;
 
   double sum_pp = c1*e1*d1aij*d1aij;
   sum_pp += c2*e2*d2aij*d2aij;
   sum_pp += c3*e3*d3aij*d3aij;
   sum_pp += c4*e4*d4aij*d4aij;
   
   double result = zzeij*(sum_pp + 2.0*sum_p*rinv + 
 			 2.0*sum*rinv*rinv)*rinv;
   
   return result;
 };