diff --git a/src/USER-OMP/Install.sh b/src/USER-OMP/Install.sh
index 6af859952..db0beb521 100644
--- a/src/USER-OMP/Install.sh
+++ b/src/USER-OMP/Install.sh
@@ -1,32 +1,20 @@
 # Install/unInstall package files in LAMMPS
 # do not install child files if parent does not exist
 
-if (test $1 = 1) then
-
-#  if (test -e ../pair_lj_cut_coul_long.cpp) then
-#    cp pair_lj_cut_coul_long_omp.cpp ..
-#    cp pair_lj_cut_coul_long_omp.h ..
-#  fi
-
-  cp pair_lj_cut_omp.cpp ..
-
-  cp thr_omp.cpp ..
-
-  cp pair_lj_cut_omp.h ..
-
-  cp thr_omp.h ..
-
-elif (test $1 = 0) then
-
-#  rm -f ../pair_lj_cut_coul_long_omp.cpp
-  rm -f ../pair_lj_cut_omp.cpp
-
-  rm -f ../thr_omp.cpp
-
-#  rm -f ../pair_lj_cut_coul_long_omp.h
-  rm -f ../pair_lj_cut_omp.h
-
-  rm -f ../thr_omp.h
-
-fi
-
+for file in *_omp.cpp *_omp.h; do
+    # let us see if the "rain man" can count the toothpicks...
+   ofile=`echo $file | sed -e s,\\\\\\(.\\*\\\\\\)_omp\\\\.\\\\\\(h\\\\\\|cpp\\\\\\),\\\\1.\\\\2,`
+
+  if (test $1 = 1) then
+    if (test $file = "thr_omp.h") || (test $file = "thr_omp.cpp") then
+      :  # always install those files.
+    elif (test ! -e ../$ofile) then
+      continue
+    fi
+
+    cp $file ..
+
+  elif (test $1 = 0) then
+    rm -f ../$file
+  fi
+done
diff --git a/src/USER-OMP/Package.sh b/src/USER-OMP/Package.sh
index ecd2ebee1..5a004c918 100644
--- a/src/USER-OMP/Package.sh
+++ b/src/USER-OMP/Package.sh
@@ -1,21 +1,28 @@
-#/bin/sh
 # Update package files in LAMMPS
-# copy package file to src if it doesn't exists or is different
-# do not copy gayberne files if non-GPU version does not exist
+# cp package file to src if doesn't exist or is different
+# do not copy certain files if non-OMP versions do not exist
+# do remove OpenMP style files that have no matching
+#   non-OpenMP version installed, e.g. after a package has been removed
+
 for file in *_omp.cpp *_omp.h; do
   # let us see if the "rain man" can count the toothpicks...
   ofile=`echo $file | sed -e s,\\\\\\(.\\*\\\\\\)_omp\\\\.\\\\\\(h\\\\\\|cpp\\\\\\),\\\\1.\\\\2,`
   if (test $file = "thr_omp.h") || (test $file = "thr_omp.cpp") then
-    :  # do check for those files.
+    :  # always check for those files.
   elif (test ! -e ../$ofile) then
+    if (test -e ../$file) then
+      echo "  removing src/$file"
+      rm -f ../$file
+    fi
     continue
   fi
+
   if (test ! -e ../$file) then
     echo "  creating src/$file"
     cp $file ..
   elif ! cmp -s $file ../$file ; then
     echo "  updating src/$file"
     cp $file ..
   fi
 done
 
diff --git a/src/USER-OMP/dihedral_charmm_omp.cpp b/src/USER-OMP/dihedral_charmm_omp.cpp
new file mode 100644
index 000000000..63bfc4327
--- /dev/null
+++ b/src/USER-OMP/dihedral_charmm_omp.cpp
@@ -0,0 +1,328 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "mpi.h"
+#include "math.h"
+#include "dihedral_charmm_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "neighbor.h"
+#include "domain.h"
+#include "force.h"
+#include "pair.h"
+#include "update.h"
+#include "error.h"
+
+using namespace LAMMPS_NS;
+
+#define TOLERANCE 0.05
+#define SMALL     0.001
+
+/* ---------------------------------------------------------------------- */
+
+void DihedralCharmmOMP::compute(int eflag, int vflag)
+{
+
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+    ev_setup_thr(this);
+  } else evflag = 0;
+
+  // insure pair->ev_tally() will use 1-4 virial contribution
+
+  if (weightflag && vflag_global == 2)
+    force->pair->vflag_either = force->pair->vflag_global = 1;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->ndihedrallist;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(shared)
+#endif
+  {
+    int ifrom, ito, tid;
+    double **f;
+
+    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_bond) eval<1,1,1>(f, ifrom, ito, tid);
+	else eval<1,1,0>(f, ifrom, ito, tid);
+      } else {
+	if (force->newton_bond) eval<1,0,1>(f, ifrom, ito, tid);
+	else eval<1,0,0>(f, ifrom, ito, tid);
+      }
+    } else {
+      if (force->newton_bond) eval<0,0,1>(f, ifrom, ito, tid);
+      else eval<0,0,0>(f, ifrom, ito, tid);
+    }
+
+    // reduce per thread forces into global force array.
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+  } // end of omp parallel region
+
+  // reduce per thread energy and virial, if requested.
+  if (evflag) ev_reduce_thr(this);
+
+  // reduce contributions to non-bonded energy terms
+  for (int n = 0; n < nthreads; ++n) {
+    force->pair->eng_vdwl += eng_vdwl_thr[n];
+    force->pair->eng_coul += eng_coul_thr[n];
+  }
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void DihedralCharmmOMP::eval(double **f, int nfrom, int nto, int tid)
+{
+  
+  int i1,i2,i3,i4,i,m,n,type;
+  double vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,vb2xm,vb2ym,vb2zm;
+  double edihedral,f1[3],f2[3],f3[3],f4[3];
+  double ax,ay,az,bx,by,bz,rasq,rbsq,rgsq,rg,rginv,ra2inv,rb2inv,rabinv;
+  double df,df1,ddf1,fg,hg,fga,hgb,gaa,gbb;
+  double dtfx,dtfy,dtfz,dtgx,dtgy,dtgz,dthx,dthy,dthz;  
+  double c,s,p,sx2,sy2,sz2;
+  int itype,jtype;
+  double delx,dely,delz,rsq,r2inv,r6inv;
+  double forcecoul,forcelj,fpair,ecoul,evdwl;
+
+  edihedral = 0.0;
+
+  double **x = atom->x;
+  double *q = atom->q;
+  int *atomtype = atom->type;
+  int **dihedrallist = neighbor->dihedrallist;
+  int nlocal = atom->nlocal;
+  double qqrd2e = force->qqrd2e;
+
+  for (n = nfrom; n < nto; n++) {
+    i1 = dihedrallist[n][0];
+    i2 = dihedrallist[n][1];
+    i3 = dihedrallist[n][2];
+    i4 = dihedrallist[n][3];
+    type = dihedrallist[n][4];
+
+    // 1st bond
+
+    vb1x = x[i1][0] - x[i2][0];
+    vb1y = x[i1][1] - x[i2][1];
+    vb1z = x[i1][2] - x[i2][2];
+    domain->minimum_image(vb1x,vb1y,vb1z);
+
+    // 2nd bond
+
+    vb2x = x[i3][0] - x[i2][0];
+    vb2y = x[i3][1] - x[i2][1];
+    vb2z = x[i3][2] - x[i2][2];
+    domain->minimum_image(vb2x,vb2y,vb2z);
+
+    vb2xm = -vb2x;
+    vb2ym = -vb2y;
+    vb2zm = -vb2z;
+    domain->minimum_image(vb2xm,vb2ym,vb2zm);
+
+    // 3rd bond
+
+    vb3x = x[i4][0] - x[i3][0];
+    vb3y = x[i4][1] - x[i3][1];
+    vb3z = x[i4][2] - x[i3][2];
+    domain->minimum_image(vb3x,vb3y,vb3z);
+    
+    // c,s calculation
+
+    ax = vb1y*vb2zm - vb1z*vb2ym;
+    ay = vb1z*vb2xm - vb1x*vb2zm;
+    az = vb1x*vb2ym - vb1y*vb2xm;
+    bx = vb3y*vb2zm - vb3z*vb2ym;
+    by = vb3z*vb2xm - vb3x*vb2zm;
+    bz = vb3x*vb2ym - vb3y*vb2xm;
+
+    rasq = ax*ax + ay*ay + az*az;
+    rbsq = bx*bx + by*by + bz*bz;
+    rgsq = vb2xm*vb2xm + vb2ym*vb2ym + vb2zm*vb2zm;
+    rg = sqrt(rgsq);
+    
+    rginv = ra2inv = rb2inv = 0.0;
+    if (rg > 0) rginv = 1.0/rg;
+    if (rasq > 0) ra2inv = 1.0/rasq;
+    if (rbsq > 0) rb2inv = 1.0/rbsq;
+    rabinv = sqrt(ra2inv*rb2inv);
+
+    c = (ax*bx + ay*by + az*bz)*rabinv;
+    s = rg*rabinv*(ax*vb3x + ay*vb3y + az*vb3z);
+
+    // error check
+
+    if (c > 1.0 + TOLERANCE || c < (-1.0 - TOLERANCE)) {
+      int me = comm->me;
+
+      if (screen) {
+	char str[128];
+	sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " %d %d %d %d",
+		me,tid,update->ntimestep,
+		atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]);
+	error->warning(FLERR,str,0);
+	fprintf(screen,"  1st atom: %d %g %g %g\n",
+		me,x[i1][0],x[i1][1],x[i1][2]);
+	fprintf(screen,"  2nd atom: %d %g %g %g\n",
+		me,x[i2][0],x[i2][1],x[i2][2]);
+	fprintf(screen,"  3rd atom: %d %g %g %g\n",
+		me,x[i3][0],x[i3][1],x[i3][2]);
+	fprintf(screen,"  4th atom: %d %g %g %g\n",
+		me,x[i4][0],x[i4][1],x[i4][2]);
+      }
+    }
+    
+    if (c > 1.0) c = 1.0;
+    if (c < -1.0) c = -1.0;
+         
+    m = multiplicity[type];
+    p = 1.0;
+    df1 = 0.0;
+    
+    for (i = 0; i < m; i++) {
+      ddf1 = p*c - df1*s;
+      df1 = p*s + df1*c;
+      p = ddf1;
+    }
+
+    p = p*cos_shift[type] + df1*sin_shift[type];
+    df1 = df1*cos_shift[type] - ddf1*sin_shift[type];
+    df1 *= -m;
+    p += 1.0;
+ 
+    if (m == 0) {
+      p = 1.0 + cos_shift[type];
+      df1 = 0.0;
+    }
+
+    if (EFLAG) edihedral = k[type] * p; 
+       
+    fg = vb1x*vb2xm + vb1y*vb2ym + vb1z*vb2zm;
+    hg = vb3x*vb2xm + vb3y*vb2ym + vb3z*vb2zm;
+    fga = fg*ra2inv*rginv;
+    hgb = hg*rb2inv*rginv;
+    gaa = -ra2inv*rg;
+    gbb = rb2inv*rg;
+    
+    dtfx = gaa*ax;
+    dtfy = gaa*ay;
+    dtfz = gaa*az;
+    dtgx = fga*ax - hgb*bx;
+    dtgy = fga*ay - hgb*by;
+    dtgz = fga*az - hgb*bz;
+    dthx = gbb*bx;
+    dthy = gbb*by;
+    dthz = gbb*bz;
+    
+    df = -k[type] * df1;
+    
+    sx2 = df*dtgx;
+    sy2 = df*dtgy;
+    sz2 = df*dtgz;
+
+    f1[0] = df*dtfx;
+    f1[1] = df*dtfy;
+    f1[2] = df*dtfz;
+
+    f2[0] = sx2 - f1[0];
+    f2[1] = sy2 - f1[1];
+    f2[2] = sz2 - f1[2];
+
+    f4[0] = df*dthx;
+    f4[1] = df*dthy;
+    f4[2] = df*dthz;
+
+    f3[0] = -sx2 - f4[0];
+    f3[1] = -sy2 - f4[1];
+    f3[2] = -sz2 - f4[2];
+    
+    // apply force to each of 4 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += f1[0];
+      f[i1][1] += f1[1];
+      f[i1][2] += f1[2];
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] += f2[0];
+      f[i2][1] += f2[1];
+      f[i2][2] += f2[2];
+    }
+
+    if (NEWTON_BOND || i3 < nlocal) {
+      f[i3][0] += f3[0];
+      f[i3][1] += f3[1];
+      f[i3][2] += f3[2];
+    }
+
+    if (NEWTON_BOND || i4 < nlocal) {
+      f[i4][0] += f4[0];
+      f[i4][1] += f4[1];
+      f[i4][2] += f4[2];
+    }
+
+    if (EVFLAG)
+      ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,edihedral,f1,f3,f4,
+		   vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,tid);
+    // 1-4 LJ and Coulomb interactions
+    // tally energy/virial in pair, using newton_bond as newton flag
+
+    if (weight[type] > 0.0) {
+      itype = atomtype[i1];
+      jtype = atomtype[i4];
+
+      delx = x[i1][0] - x[i4][0];
+      dely = x[i1][1] - x[i4][1];
+      delz = x[i1][2] - x[i4][2];
+      domain->minimum_image(delx,dely,delz);
+      rsq = delx*delx + dely*dely + delz*delz;
+      r2inv = 1.0/rsq;
+      r6inv = r2inv*r2inv*r2inv;
+
+      if (implicit) forcecoul = qqrd2e * q[i1]*q[i4]*r2inv;
+      else forcecoul = qqrd2e * q[i1]*q[i4]*sqrt(r2inv);
+      forcelj = r6inv * (lj14_1[itype][jtype]*r6inv - lj14_2[itype][jtype]);
+      fpair = weight[type] * (forcelj+forcecoul)*r2inv;
+
+      if (EFLAG) {
+	ecoul = weight[type] * forcecoul;
+	evdwl = r6inv * (lj14_3[itype][jtype]*r6inv - lj14_4[itype][jtype]);
+	evdwl *= weight[type];
+      }
+
+      if (NEWTON_BOND || i1 < nlocal) {
+	f[i1][0] += delx*fpair;
+	f[i1][1] += dely*fpair;
+	f[i1][2] += delz*fpair;
+      }
+      if (NEWTON_BOND || i4 < nlocal) {
+	f[i4][0] -= delx*fpair;
+	f[i4][1] -= dely*fpair;
+	f[i4][2] -= delz*fpair;
+      }
+
+      if (EVFLAG) ev_tally_thr(force->pair,i1,i4,nlocal,NEWTON_BOND,
+			       evdwl,ecoul,fpair,delx,dely,delz,tid);
+    }
+  }
+}
+
diff --git a/src/USER-OMP/dihedral_charmm_omp.h b/src/USER-OMP/dihedral_charmm_omp.h
new file mode 100644
index 000000000..a39ad83f7
--- /dev/null
+++ b/src/USER-OMP/dihedral_charmm_omp.h
@@ -0,0 +1,48 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef DIHEDRAL_CLASS
+
+DihedralStyle(charmm/omp,DihedralCharmmOMP)
+
+#else
+
+#ifndef LMP_DIHEDRAL_CHARMM_OMP_H
+#define LMP_DIHEDRAL_CHARMM_OMP_H
+
+#include "dihedral_charmm.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class DihedralCharmmOMP : public DihedralCharmm, public ThrOMP {
+
+ public:
+    DihedralCharmmOMP(class LAMMPS *lmp) : 
+      DihedralCharmm(lmp), ThrOMP(lmp,DIHEDRAL) {};
+
+  virtual void compute(int, int);
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(double **f, int ifrom, int ito, int tid);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/dihedral_class2_omp.cpp b/src/USER-OMP/dihedral_class2_omp.cpp
new file mode 100644
index 000000000..734829664
--- /dev/null
+++ b/src/USER-OMP/dihedral_class2_omp.cpp
@@ -0,0 +1,532 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "mpi.h"
+#include "math.h"
+#include "dihedral_class2_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "neighbor.h"
+#include "domain.h"
+#include "force.h"
+#include "update.h"
+#include "error.h"
+
+using namespace LAMMPS_NS;
+
+#define TOLERANCE 0.05
+#define SMALL     0.0000001
+
+/* ---------------------------------------------------------------------- */
+
+void DihedralClass2OMP::compute(int eflag, int vflag)
+{
+
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+    ev_setup_thr(this);
+  } else evflag = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->ndihedrallist;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(shared)
+#endif
+  {
+    int ifrom, ito, tid;
+    double **f;
+
+    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_bond) eval<1,1,1>(f, ifrom, ito, tid);
+	else eval<1,1,0>(f, ifrom, ito, tid);
+      } else {
+	if (force->newton_bond) eval<1,0,1>(f, ifrom, ito, tid);
+	else eval<1,0,0>(f, ifrom, ito, tid);
+      }
+    } else {
+      if (force->newton_bond) eval<0,0,1>(f, ifrom, ito, tid);
+      else eval<0,0,0>(f, ifrom, ito, tid);
+    }
+
+    // reduce per thread forces into global force array.
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+  } // end of omp parallel region
+
+  // reduce per thread energy and virial, if requested.
+  if (evflag) ev_reduce_thr(this);
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void DihedralClass2OMP::eval(double **f, int nfrom, int nto, int tid)
+{
+  
+  int i1,i2,i3,i4,i,j,k,n,type;
+  double vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,vb2xm,vb2ym,vb2zm;
+  double edihedral;
+  double r1mag2,r1,r2mag2,r2,r3mag2,r3;
+  double sb1,rb1,sb2,rb2,sb3,rb3,c0,r12c1;
+  double r12c2,costh12,costh13,costh23,sc1,sc2,s1,s2,c;
+  double cosphi,phi,sinphi,a11,a22,a33,a12,a13,a23,sx1,sx2;
+  double sx12,sy1,sy2,sy12,sz1,sz2,sz12,dphi1,dphi2,dphi3;
+  double de_dihedral,t1,t2,t3,t4,cos2phi,cos3phi,bt1,bt2;
+  double bt3,sumbte,db,sumbtf,at1,at2,at3,da,da1,da2,r1_0;
+  double r3_0,dr1,dr2,tk1,tk2,s12,sin2;
+  double dcosphidr[4][3],dphidr[4][3],dbonddr[3][4][3],dthetadr[2][4][3];
+  double fabcd[4][3];
+
+  edihedral = 0.0;
+
+  double **x = atom->x;
+  int **dihedrallist = neighbor->dihedrallist;
+  int nlocal = atom->nlocal;
+
+  for (n = nfrom; n < nto; n++) {
+    i1 = dihedrallist[n][0];
+    i2 = dihedrallist[n][1];
+    i3 = dihedrallist[n][2];
+    i4 = dihedrallist[n][3];
+    type = dihedrallist[n][4];
+
+    // 1st bond
+
+    vb1x = x[i1][0] - x[i2][0];
+    vb1y = x[i1][1] - x[i2][1];
+    vb1z = x[i1][2] - x[i2][2];
+    domain->minimum_image(vb1x,vb1y,vb1z);
+
+    // 2nd bond
+
+    vb2x = x[i3][0] - x[i2][0];
+    vb2y = x[i3][1] - x[i2][1];
+    vb2z = x[i3][2] - x[i2][2];
+    domain->minimum_image(vb2x,vb2y,vb2z);
+
+    vb2xm = -vb2x;
+    vb2ym = -vb2y;
+    vb2zm = -vb2z;
+    domain->minimum_image(vb2xm,vb2ym,vb2zm);
+
+    // 3rd bond
+
+    vb3x = x[i4][0] - x[i3][0];
+    vb3y = x[i4][1] - x[i3][1];
+    vb3z = x[i4][2] - x[i3][2];
+    domain->minimum_image(vb3x,vb3y,vb3z);
+    
+    // distances
+
+    r1mag2 = vb1x*vb1x + vb1y*vb1y + vb1z*vb1z;
+    r1 = sqrt(r1mag2);
+    r2mag2 = vb2x*vb2x + vb2y*vb2y + vb2z*vb2z;
+    r2 = sqrt(r2mag2);
+    r3mag2 = vb3x*vb3x + vb3y*vb3y + vb3z*vb3z;
+    r3 = sqrt(r3mag2);
+
+    sb1 = 1.0/r1mag2;
+    rb1 = 1.0/r1;
+    sb2 = 1.0/r2mag2;
+    rb2 = 1.0/r2;
+    sb3 = 1.0/r3mag2;
+    rb3 = 1.0/r3;
+
+    c0 = (vb1x*vb3x + vb1y*vb3y + vb1z*vb3z) * rb1*rb3;
+
+    // angles
+
+    r12c1 = rb1*rb2;
+    r12c2 = rb2*rb3;
+    costh12 = (vb1x*vb2x + vb1y*vb2y + vb1z*vb2z) * r12c1;
+    costh13 = c0;
+    costh23 = (vb2xm*vb3x + vb2ym*vb3y + vb2zm*vb3z) * r12c2;
+          
+    // cos and sin of 2 angles and final c
+
+    sin2 = MAX(1.0 - costh12*costh12,0.0);
+    sc1 = sqrt(sin2);
+    if (sc1 < SMALL) sc1 = SMALL;
+    sc1 = 1.0/sc1;
+          
+    sin2 = MAX(1.0 - costh23*costh23,0.0);
+    sc2 = sqrt(sin2);
+    if (sc2 < SMALL) sc2 = SMALL;
+    sc2 = 1.0/sc2;
+          
+    s1 = sc1 * sc1;
+    s2 = sc2 * sc2;
+    s12 = sc1 * sc2;
+    c = (c0 + costh12*costh23) * s12;
+
+    // error check
+
+    if (c > 1.0 + TOLERANCE || c < (-1.0 - TOLERANCE)) {
+      int me;
+      MPI_Comm_rank(world,&me);
+      if (screen) {
+	char str[128];
+	sprintf(str,"Dihedral problem: %d " BIGINT_FORMAT " %d %d %d %d",
+		me,update->ntimestep,
+		atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]);
+	error->warning(FLERR,str,0);
+	fprintf(screen,"  1st atom: %d %g %g %g\n",
+		me,x[i1][0],x[i1][1],x[i1][2]);
+	fprintf(screen,"  2nd atom: %d %g %g %g\n",
+		me,x[i2][0],x[i2][1],x[i2][2]);
+	fprintf(screen,"  3rd atom: %d %g %g %g\n",
+		me,x[i3][0],x[i3][1],x[i3][2]);
+	fprintf(screen,"  4th atom: %d %g %g %g\n",
+		me,x[i4][0],x[i4][1],x[i4][2]);
+      }
+    }
+
+    if (c > 1.0) c = 1.0;
+    if (c < -1.0) c = -1.0;
+    cosphi = c;
+    phi = acos(c);
+
+    sinphi = sqrt(1.0 - c*c);
+    sinphi = MAX(sinphi,SMALL);
+
+    a11 = -c*sb1*s1;
+    a22 = sb2 * (2.0*costh13*s12 - c*(s1+s2));
+    a33 = -c*sb3*s2;
+    a12 = r12c1 * (costh12*c*s1 + costh23*s12);
+    a13 = rb1*rb3*s12;
+    a23 = r12c2 * (-costh23*c*s2 - costh12*s12);
+          
+    sx1  = a11*vb1x + a12*vb2x + a13*vb3x;
+    sx2  = a12*vb1x + a22*vb2x + a23*vb3x;
+    sx12 = a13*vb1x + a23*vb2x + a33*vb3x;
+    sy1  = a11*vb1y + a12*vb2y + a13*vb3y;
+    sy2  = a12*vb1y + a22*vb2y + a23*vb3y;
+    sy12 = a13*vb1y + a23*vb2y + a33*vb3y;
+    sz1  = a11*vb1z + a12*vb2z + a13*vb3z;
+    sz2  = a12*vb1z + a22*vb2z + a23*vb3z;
+    sz12 = a13*vb1z + a23*vb2z + a33*vb3z;
+
+    // set up d(cos(phi))/d(r) and dphi/dr arrays
+
+    dcosphidr[0][0] = -sx1;
+    dcosphidr[0][1] = -sy1;
+    dcosphidr[0][2] = -sz1;
+    dcosphidr[1][0] = sx2 + sx1;
+    dcosphidr[1][1] = sy2 + sy1;
+    dcosphidr[1][2] = sz2 + sz1;
+    dcosphidr[2][0] = sx12 - sx2;
+    dcosphidr[2][1] = sy12 - sy2;
+    dcosphidr[2][2] = sz12 - sz2;
+    dcosphidr[3][0] = -sx12;
+    dcosphidr[3][1] = -sy12;
+    dcosphidr[3][2] = -sz12;
+
+    for (i = 0; i < 4; i++)
+      for (j = 0; j < 3; j++)
+	dphidr[i][j] = -dcosphidr[i][j] / sinphi;
+
+    // energy
+
+    dphi1 = phi - phi1[type];
+    dphi2 = 2.0*phi - phi2[type];
+    dphi3 = 3.0*phi - phi3[type];
+    
+    if (EFLAG) edihedral = k1[type]*(1.0 - cos(dphi1)) +
+		 k2[type]*(1.0 - cos(dphi2)) +
+		 k3[type]*(1.0 - cos(dphi3));
+    
+    de_dihedral = k1[type]*sin(dphi1) + 2.0*k2[type]*sin(dphi2) +
+      3.0*k3[type]*sin(dphi3);
+
+    // torsion forces on all 4 atoms
+
+    for (i = 0; i < 4; i++)
+      for (j = 0; j < 3; j++)
+	fabcd[i][j] = de_dihedral*dphidr[i][j];
+
+    // set up d(bond)/d(r) array
+    // dbonddr(i,j,k) = bond i, atom j, coordinate k
+
+    for (i = 0; i < 3; i++)
+      for (j = 0; j < 4; j++)
+	for (k = 0; k < 3; k++)
+	  dbonddr[i][j][k] = 0.0;
+    
+    // bond1
+    
+    dbonddr[0][0][0] = vb1x / r1;
+    dbonddr[0][0][1] = vb1y / r1;
+    dbonddr[0][0][2] = vb1z / r1;
+    dbonddr[0][1][0] = -vb1x / r1;
+    dbonddr[0][1][1] = -vb1y / r1;
+    dbonddr[0][1][2] = -vb1z / r1;
+
+    // bond2
+
+    dbonddr[1][1][0] = vb2x / r2;
+    dbonddr[1][1][1] = vb2y / r2;
+    dbonddr[1][1][2] = vb2z / r2;
+    dbonddr[1][2][0] = -vb2x / r2;
+    dbonddr[1][2][1] = -vb2y / r2;
+    dbonddr[1][2][2] = -vb2z / r2;
+
+    // bond3
+    
+    dbonddr[2][2][0] = vb3x / r3;
+    dbonddr[2][2][1] = vb3y / r3;
+    dbonddr[2][2][2] = vb3z / r3;
+    dbonddr[2][3][0] = -vb3x / r3;
+    dbonddr[2][3][1] = -vb3y / r3;
+    dbonddr[2][3][2] = -vb3z / r3;
+
+    // set up d(theta)/d(r) array
+    // dthetadr(i,j,k) = angle i, atom j, coordinate k
+
+    for (i = 0; i < 2; i++)
+      for (j = 0; j < 4; j++)
+	for (k = 0; k < 3; k++)
+	  dthetadr[i][j][k] = 0.0;
+    
+    t1 = costh12 / r1mag2;
+    t2 = costh23 / r2mag2;
+    t3 = costh12 / r2mag2;
+    t4 = costh23 / r3mag2;
+    
+    // angle12
+    
+    dthetadr[0][0][0] = sc1 * ((t1 * vb1x) - (vb2x * r12c1));
+    dthetadr[0][0][1] = sc1 * ((t1 * vb1y) - (vb2y * r12c1));
+    dthetadr[0][0][2] = sc1 * ((t1 * vb1z) - (vb2z * r12c1));
+    
+    dthetadr[0][1][0] = sc1 * ((-t1 * vb1x) + (vb2x * r12c1) +
+			       (-t3 * vb2x) + (vb1x * r12c1));
+    dthetadr[0][1][1] = sc1 * ((-t1 * vb1y) + (vb2y * r12c1) +
+			       (-t3 * vb2y) + (vb1y * r12c1));
+    dthetadr[0][1][2] = sc1 * ((-t1 * vb1z) + (vb2z * r12c1) +
+			       (-t3 * vb2z) + (vb1z * r12c1));
+    
+    dthetadr[0][2][0] = sc1 * ((t3 * vb2x) - (vb1x * r12c1)); 
+    dthetadr[0][2][1] = sc1 * ((t3 * vb2y) - (vb1y * r12c1));
+    dthetadr[0][2][2] = sc1 * ((t3 * vb2z) - (vb1z * r12c1));
+    
+    // angle23
+    
+    dthetadr[1][1][0] = sc2 * ((t2 * vb2x) + (vb3x * r12c2));
+    dthetadr[1][1][1] = sc2 * ((t2 * vb2y) + (vb3y * r12c2));
+    dthetadr[1][1][2] = sc2 * ((t2 * vb2z) + (vb3z * r12c2));
+    
+    dthetadr[1][2][0] = sc2 * ((-t2 * vb2x) - (vb3x * r12c2) +
+			       (t4 * vb3x) + (vb2x * r12c2));
+    dthetadr[1][2][1] = sc2 * ((-t2 * vb2y) - (vb3y * r12c2) +
+			       (t4 * vb3y) + (vb2y * r12c2));
+    dthetadr[1][2][2] = sc2 * ((-t2 * vb2z) - (vb3z * r12c2) +
+			       (t4 * vb3z) + (vb2z * r12c2));
+    
+    dthetadr[1][3][0] = -sc2 * ((t4 * vb3x) + (vb2x * r12c2));
+    dthetadr[1][3][1] = -sc2 * ((t4 * vb3y) + (vb2y * r12c2));
+    dthetadr[1][3][2] = -sc2 * ((t4 * vb3z) + (vb2z * r12c2));
+    
+    // mid-bond/torsion coupling
+    // energy on bond2 (middle bond)
+    
+    cos2phi = cos(2.0*phi);
+    cos3phi = cos(3.0*phi);
+    
+    bt1 = mbt_f1[type] * cosphi;
+    bt2 = mbt_f2[type] * cos2phi;
+    bt3 = mbt_f3[type] * cos3phi;
+    sumbte = bt1 + bt2 + bt3;
+    db = r2 - mbt_r0[type];
+    if (EFLAG) edihedral += db * sumbte;
+    
+    // force on bond2
+    
+    bt1 = -mbt_f1[type] * sinphi;
+    bt2 = -2.0 * mbt_f2[type] * sin(2.0*phi);
+    bt3 = -3.0 * mbt_f3[type] * sin(3.0*phi);
+    sumbtf = bt1 + bt2 + bt3;
+    
+    for (i = 0; i < 4; i++)
+      for (j = 0; j < 3; j++)
+	fabcd[i][j] += db*sumbtf*dphidr[i][j] + sumbte*dbonddr[1][i][j];
+
+    // end-bond/torsion coupling
+    // energy on bond1 (first bond)
+
+    bt1 = ebt_f1_1[type] * cosphi;
+    bt2 = ebt_f2_1[type] * cos2phi;
+    bt3 = ebt_f3_1[type] * cos3phi;
+    sumbte = bt1 + bt2 + bt3;
+
+    db = r1 - ebt_r0_1[type];
+    if (EFLAG) edihedral += db * (bt1+bt2+bt3);
+
+    // force on bond1
+
+    bt1 = ebt_f1_1[type] * sinphi;
+    bt2 = 2.0 * ebt_f2_1[type] * sin(2.0*phi);
+    bt3 = 3.0 * ebt_f3_1[type] * sin(3.0*phi);
+    sumbtf = bt1 + bt2 + bt3;
+
+    for (i = 0; i < 4; i++)
+      for (j = 0; j < 3; j++)
+	fabcd[i][j] -= db*sumbtf*dphidr[i][j] + sumbte*dbonddr[0][i][j];
+
+    // end-bond/torsion coupling
+    // energy on bond3 (last bond)
+
+    bt1 = ebt_f1_2[type] * cosphi;
+    bt2 = ebt_f2_2[type] * cos2phi;
+    bt3 = ebt_f3_2[type] * cos3phi;
+    sumbte = bt1 + bt2 + bt3;
+
+    db = r3 - ebt_r0_2[type];
+    if (EFLAG) edihedral += db * (bt1+bt2+bt3);
+
+    // force on bond3
+
+    bt1 = -ebt_f1_2[type] * sinphi;
+    bt2 = -2.0 * ebt_f2_2[type] * sin(2.0*phi);
+    bt3 = -3.0 * ebt_f3_2[type] * sin(3.0*phi);
+    sumbtf = bt1 + bt2 + bt3;
+
+    for (i = 0; i < 4; i++)
+      for (j = 0; j < 3; j++)
+	fabcd[i][j] += db*sumbtf*dphidr[i][j] + sumbte*dbonddr[2][i][j];
+
+    // angle/torsion coupling
+    // energy on angle1
+
+    at1 = at_f1_1[type] * cosphi;
+    at2 = at_f2_1[type] * cos2phi;
+    at3 = at_f3_1[type] * cos3phi;
+    sumbte = at1 + at2 + at3;
+
+    da = acos(costh12) - at_theta0_1[type];
+    if (EFLAG) edihedral += da * (at1+at2+at3);
+
+    // force on angle1
+
+    bt1 = at_f1_1[type] * sinphi;
+    bt2 = 2.0 * at_f2_1[type] * sin(2.0*phi);
+    bt3 = 3.0 * at_f3_1[type] * sin(3.0*phi);
+    sumbtf = bt1 + bt2 + bt3;
+
+    for (i = 0; i < 4; i++)
+      for (j = 0; j < 3; j++)
+	fabcd[i][j] -= da*sumbtf*dphidr[i][j] + sumbte*dthetadr[0][i][j];
+
+    // energy on angle2
+
+    at1 = at_f1_2[type] * cosphi;
+    at2 = at_f2_2[type] * cos2phi;
+    at3 = at_f3_2[type] * cos3phi;
+    sumbte = at1 + at2 + at3;
+
+    da = acos(costh23) - at_theta0_2[type];
+    if (EFLAG) edihedral += da * (at1+at2+at3);
+
+    // force on angle2
+
+    bt1 = -at_f1_2[type] * sinphi;
+    bt2 = -2.0 * at_f2_2[type] * sin(2.0*phi);
+    bt3 = -3.0 * at_f3_2[type] * sin(3.0*phi);
+    sumbtf = bt1 + bt2 + bt3;
+
+    for (i = 0; i < 4; i++)
+      for (j = 0; j < 3; j++)
+	fabcd[i][j] += da*sumbtf*dphidr[i][j] + sumbte*dthetadr[1][i][j];
+
+    // angle/angle/torsion coupling
+
+    da1 = acos(costh12) - aat_theta0_1[type];
+    da2 = acos(costh23) - aat_theta0_2[type];
+          
+    if (EFLAG) edihedral += aat_k[type]*da1*da2*cosphi;
+
+    for (i = 0; i < 4; i++)
+      for (j = 0; j < 3; j++)
+	fabcd[i][j] -= aat_k[type] * 
+	  (cosphi * (da2*dthetadr[0][i][j] - da1*dthetadr[1][i][j]) +
+	   sinphi * da1*da2*dphidr[i][j]);
+
+    // bond1/bond3 coupling
+
+    if (fabs(bb13t_k[type]) > SMALL) {
+
+      r1_0 = bb13t_r10[type];
+      r3_0 = bb13t_r30[type];
+      dr1 = r1 - r1_0;
+      dr2 = r3 - r3_0;
+      tk1 = -bb13t_k[type] * dr1 / r3;
+      tk2 = -bb13t_k[type] * dr2 / r1;
+
+      if (EFLAG) edihedral += bb13t_k[type]*dr1*dr2;
+        
+      fabcd[0][0] += tk2 * vb1x;
+      fabcd[0][1] += tk2 * vb1y;
+      fabcd[0][2] += tk2 * vb1z;
+
+      fabcd[1][0] -= tk2 * vb1x;
+      fabcd[1][1] -= tk2 * vb1y;
+      fabcd[1][2] -= tk2 * vb1z;
+        
+      fabcd[2][0] -= tk1 * vb3x;
+      fabcd[2][1] -= tk1 * vb3y;
+      fabcd[2][2] -= tk1 * vb3z;
+
+      fabcd[3][0] += tk1 * vb3x;
+      fabcd[3][1] += tk1 * vb3y;
+      fabcd[3][2] += tk1 * vb3z;
+    }
+
+    // apply force to each of 4 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += fabcd[0][0];
+      f[i1][1] += fabcd[0][1];
+      f[i1][2] += fabcd[0][2];
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] += fabcd[1][0];
+      f[i2][1] += fabcd[1][1];
+      f[i2][2] += fabcd[1][2];
+    }
+
+    if (NEWTON_BOND || i3 < nlocal) {
+      f[i3][0] += fabcd[2][0];
+      f[i3][1] += fabcd[2][1];
+      f[i3][2] += fabcd[2][2];
+    }
+
+    if (NEWTON_BOND || i4 < nlocal) {
+      f[i4][0] += fabcd[3][0];
+      f[i4][1] += fabcd[3][1];
+      f[i4][2] += fabcd[3][2];
+    }
+
+    if (EVFLAG)
+      ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,edihedral,
+		   fabcd[0],fabcd[2],fabcd[3],
+		   vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,tid);
+  }
+}
+
diff --git a/src/USER-OMP/dihedral_class2_omp.h b/src/USER-OMP/dihedral_class2_omp.h
new file mode 100644
index 000000000..d26f2f871
--- /dev/null
+++ b/src/USER-OMP/dihedral_class2_omp.h
@@ -0,0 +1,48 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef DIHEDRAL_CLASS
+
+DihedralStyle(class2/omp,DihedralClass2OMP)
+
+#else
+
+#ifndef LMP_DIHEDRAL_CLASS2_OMP_H
+#define LMP_DIHEDRAL_CLASS2_OMP_H
+
+#include "dihedral_class2.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class DihedralClass2OMP : public DihedralClass2, public ThrOMP {
+
+ public:
+    DihedralClass2OMP(class LAMMPS *lmp) : 
+      DihedralClass2(lmp), ThrOMP(lmp,DIHEDRAL) {};
+
+  virtual void compute(int, int);
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(double **f, int ifrom, int ito, int tid);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/dihedral_cosine_shift_exp_omp.cpp b/src/USER-OMP/dihedral_cosine_shift_exp_omp.cpp
new file mode 100644
index 000000000..a6c027e92
--- /dev/null
+++ b/src/USER-OMP/dihedral_cosine_shift_exp_omp.cpp
@@ -0,0 +1,263 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "mpi.h"
+#include "math.h"
+#include "dihedral_cosine_shift_exp_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "neighbor.h"
+#include "domain.h"
+#include "force.h"
+#include "update.h"
+#include "error.h"
+
+using namespace LAMMPS_NS;
+
+#define TOLERANCE 0.05
+#define SMALL     0.001
+
+/* ---------------------------------------------------------------------- */
+
+void DihedralCosineShiftExpOMP::compute(int eflag, int vflag)
+{
+
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+    ev_setup_thr(this);
+  } else evflag = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->ndihedrallist;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(shared)
+#endif
+  {
+    int ifrom, ito, tid;
+    double **f;
+
+    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_bond) eval<1,1,1>(f, ifrom, ito, tid);
+	else eval<1,1,0>(f, ifrom, ito, tid);
+      } else {
+	if (force->newton_bond) eval<1,0,1>(f, ifrom, ito, tid);
+	else eval<1,0,0>(f, ifrom, ito, tid);
+      }
+    } else {
+      if (force->newton_bond) eval<0,0,1>(f, ifrom, ito, tid);
+      else eval<0,0,0>(f, ifrom, ito, tid);
+    }
+
+    // reduce per thread forces into global force array.
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+  } // end of omp parallel region
+
+  // reduce per thread energy and virial, if requested.
+  if (evflag) ev_reduce_thr(this);
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void DihedralCosineShiftExpOMP::eval(double **f, int nfrom, int nto, int tid)
+{
+  
+  int i1,i2,i3,i4,n,type;
+  double vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,vb2xm,vb2ym,vb2zm;
+  double edihedral,f1[3],f2[3],f3[3],f4[3];
+  double ax,ay,az,bx,by,bz,rasq,rbsq,rgsq,rg,rginv,ra2inv,rb2inv,rabinv;
+  double df,fg,hg,fga,hgb,gaa,gbb;
+  double dtfx,dtfy,dtfz,dtgx,dtgy,dtgz,dthx,dthy,dthz;  
+  double c,s,sx2,sy2,sz2;
+  double cccpsss,cssmscc,exp2;
+
+  edihedral = 0.0;
+
+  double **x = atom->x;
+  int **dihedrallist = neighbor->dihedrallist;
+  int nlocal = atom->nlocal;
+
+  for (n = nfrom; n < nto; n++) {
+    i1 = dihedrallist[n][0];
+    i2 = dihedrallist[n][1];
+    i3 = dihedrallist[n][2];
+    i4 = dihedrallist[n][3];
+    type = dihedrallist[n][4];
+
+    // 1st bond
+
+    vb1x = x[i1][0] - x[i2][0];
+    vb1y = x[i1][1] - x[i2][1];
+    vb1z = x[i1][2] - x[i2][2];
+    domain->minimum_image(vb1x,vb1y,vb1z);
+
+    // 2nd bond
+
+    vb2x = x[i3][0] - x[i2][0];
+    vb2y = x[i3][1] - x[i2][1];
+    vb2z = x[i3][2] - x[i2][2];
+    domain->minimum_image(vb2x,vb2y,vb2z);
+
+    vb2xm = -vb2x;
+    vb2ym = -vb2y;
+    vb2zm = -vb2z;
+    domain->minimum_image(vb2xm,vb2ym,vb2zm);
+
+    // 3rd bond
+
+    vb3x = x[i4][0] - x[i3][0];
+    vb3y = x[i4][1] - x[i3][1];
+    vb3z = x[i4][2] - x[i3][2];
+    domain->minimum_image(vb3x,vb3y,vb3z);
+    
+    // c,s calculation
+
+    ax = vb1y*vb2zm - vb1z*vb2ym;
+    ay = vb1z*vb2xm - vb1x*vb2zm;
+    az = vb1x*vb2ym - vb1y*vb2xm;
+    bx = vb3y*vb2zm - vb3z*vb2ym;
+    by = vb3z*vb2xm - vb3x*vb2zm;
+    bz = vb3x*vb2ym - vb3y*vb2xm;
+
+    rasq = ax*ax + ay*ay + az*az;
+    rbsq = bx*bx + by*by + bz*bz;
+    rgsq = vb2xm*vb2xm + vb2ym*vb2ym + vb2zm*vb2zm;
+    rg = sqrt(rgsq);
+    
+    rginv = ra2inv = rb2inv = 0.0;
+    if (rg > 0) rginv = 1.0/rg;
+    if (rasq > 0) ra2inv = 1.0/rasq;
+    if (rbsq > 0) rb2inv = 1.0/rbsq;
+    rabinv = sqrt(ra2inv*rb2inv);
+
+    c = (ax*bx + ay*by + az*bz)*rabinv;
+    s = rg*rabinv*(ax*vb3x + ay*vb3y + az*vb3z);
+
+    // error check
+
+    if (c > 1.0 + TOLERANCE || c < (-1.0 - TOLERANCE)) {
+      int me = comm->me;
+
+      if (screen) {
+	char str[128];
+	sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " %d %d %d %d",
+		me,tid,update->ntimestep,
+		atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]);
+	error->warning(FLERR,str,0);
+	fprintf(screen,"  1st atom: %d %g %g %g\n",
+		me,x[i1][0],x[i1][1],x[i1][2]);
+	fprintf(screen,"  2nd atom: %d %g %g %g\n",
+		me,x[i2][0],x[i2][1],x[i2][2]);
+	fprintf(screen,"  3rd atom: %d %g %g %g\n",
+		me,x[i3][0],x[i3][1],x[i3][2]);
+	fprintf(screen,"  4th atom: %d %g %g %g\n",
+		me,x[i4][0],x[i4][1],x[i4][2]);
+      }
+    }
+    
+    if (c > 1.0) c = 1.0;
+    if (c < -1.0) c = -1.0;
+
+    double aa=a[type];
+    double uumin=umin[type];
+    
+    cccpsss = c*cost[type]+s*sint[type];
+    cssmscc = c*sint[type]-s*cost[type];
+
+    if (doExpansion[type]) { 
+      //  |a|<0.001 so use expansions relative precision <1e-5
+      if (EFLAG) edihedral = -0.125*(1+cccpsss)*(4+aa*(cccpsss-1))*uumin;
+      df=0.5*uumin*( cssmscc + 0.5*aa*cccpsss);
+    } else {
+      exp2=exp(0.5*aa*(1+cccpsss));
+      if (EFLAG) edihedral = opt1[type]*(1-exp2);
+      df= 0.5*opt1[type]*aa* ( exp2*cssmscc );
+    }
+       
+    fg = vb1x*vb2xm + vb1y*vb2ym + vb1z*vb2zm;
+    hg = vb3x*vb2xm + vb3y*vb2ym + vb3z*vb2zm;
+    fga = fg*ra2inv*rginv;
+    hgb = hg*rb2inv*rginv;
+    gaa = -ra2inv*rg;
+    gbb = rb2inv*rg;
+    
+    dtfx = gaa*ax;
+    dtfy = gaa*ay;
+    dtfz = gaa*az;
+    dtgx = fga*ax - hgb*bx;
+    dtgy = fga*ay - hgb*by;
+    dtgz = fga*az - hgb*bz;
+    dthx = gbb*bx;
+    dthy = gbb*by;
+    dthz = gbb*bz;
+
+    sx2 = df*dtgx;
+    sy2 = df*dtgy;
+    sz2 = df*dtgz;
+
+    f1[0] = df*dtfx;
+    f1[1] = df*dtfy;
+    f1[2] = df*dtfz;
+
+    f2[0] = sx2 - f1[0];
+    f2[1] = sy2 - f1[1];
+    f2[2] = sz2 - f1[2];
+
+    f4[0] = df*dthx;
+    f4[1] = df*dthy;
+    f4[2] = df*dthz;
+
+    f3[0] = -sx2 - f4[0];
+    f3[1] = -sy2 - f4[1];
+    f3[2] = -sz2 - f4[2];
+
+    // apply force to each of 4 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += f1[0];
+      f[i1][1] += f1[1];
+      f[i1][2] += f1[2];
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] += f2[0];
+      f[i2][1] += f2[1];
+      f[i2][2] += f2[2];
+    }
+
+    if (NEWTON_BOND || i3 < nlocal) {
+      f[i3][0] += f3[0];
+      f[i3][1] += f3[1];
+      f[i3][2] += f3[2];
+    }
+
+    if (NEWTON_BOND || i4 < nlocal) {
+      f[i4][0] += f4[0];
+      f[i4][1] += f4[1];
+      f[i4][2] += f4[2];
+    }
+
+    if (EVFLAG)
+      ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,edihedral,f1,f3,f4,
+		   vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,tid);
+  }
+}
+
diff --git a/src/USER-OMP/dihedral_cosine_shift_exp_omp.h b/src/USER-OMP/dihedral_cosine_shift_exp_omp.h
new file mode 100644
index 000000000..eb906ab95
--- /dev/null
+++ b/src/USER-OMP/dihedral_cosine_shift_exp_omp.h
@@ -0,0 +1,48 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef DIHEDRAL_CLASS
+
+DihedralStyle(cosine/shift/exp/omp,DihedralCosineShiftExpOMP)
+
+#else
+
+#ifndef LMP_DIHEDRAL_COSINE_SHIFT_EXP_OMP_H
+#define LMP_DIHEDRAL_COSINE_SHIFT_EXP_OMP_H
+
+#include "dihedral_cosine_shift_exp.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class DihedralCosineShiftExpOMP : public DihedralCosineShiftExp, public ThrOMP {
+
+ public:
+    DihedralCosineShiftExpOMP(class LAMMPS *lmp) : 
+      DihedralCosineShiftExp(lmp), ThrOMP(lmp,DIHEDRAL) {};
+
+  virtual void compute(int, int);
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(double **f, int ifrom, int ito, int tid);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/dihedral_harmonic_omp.cpp b/src/USER-OMP/dihedral_harmonic_omp.cpp
new file mode 100644
index 000000000..0fa24090a
--- /dev/null
+++ b/src/USER-OMP/dihedral_harmonic_omp.cpp
@@ -0,0 +1,270 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "mpi.h"
+#include "math.h"
+#include "dihedral_harmonic_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "neighbor.h"
+#include "domain.h"
+#include "force.h"
+#include "update.h"
+#include "error.h"
+
+using namespace LAMMPS_NS;
+
+#define TOLERANCE 0.05
+#define SMALL     0.001
+
+/* ---------------------------------------------------------------------- */
+
+void DihedralHarmonicOMP::compute(int eflag, int vflag)
+{
+
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+    ev_setup_thr(this);
+  } else evflag = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->ndihedrallist;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(shared)
+#endif
+  {
+    int ifrom, ito, tid;
+    double **f;
+
+    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_bond) eval<1,1,1>(f, ifrom, ito, tid);
+	else eval<1,1,0>(f, ifrom, ito, tid);
+      } else {
+	if (force->newton_bond) eval<1,0,1>(f, ifrom, ito, tid);
+	else eval<1,0,0>(f, ifrom, ito, tid);
+      }
+    } else {
+      if (force->newton_bond) eval<0,0,1>(f, ifrom, ito, tid);
+      else eval<0,0,0>(f, ifrom, ito, tid);
+    }
+
+    // reduce per thread forces into global force array.
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+  } // end of omp parallel region
+
+  // reduce per thread energy and virial, if requested.
+  if (evflag) ev_reduce_thr(this);
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void DihedralHarmonicOMP::eval(double **f, int nfrom, int nto, int tid)
+{
+  
+  int i1,i2,i3,i4,i,m,n,type;
+  double vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,vb2xm,vb2ym,vb2zm;
+  double edihedral,f1[3],f2[3],f3[3],f4[3];
+  double ax,ay,az,bx,by,bz,rasq,rbsq,rgsq,rg,rginv,ra2inv,rb2inv,rabinv;
+  double df,df1,ddf1,fg,hg,fga,hgb,gaa,gbb;
+  double dtfx,dtfy,dtfz,dtgx,dtgy,dtgz,dthx,dthy,dthz;  
+  double c,s,p,sx2,sy2,sz2;
+
+  edihedral = 0.0;
+
+  double **x = atom->x;
+  int **dihedrallist = neighbor->dihedrallist;
+  int nlocal = atom->nlocal;
+
+  for (n = nfrom; n < nto; n++) {
+    i1 = dihedrallist[n][0];
+    i2 = dihedrallist[n][1];
+    i3 = dihedrallist[n][2];
+    i4 = dihedrallist[n][3];
+    type = dihedrallist[n][4];
+
+    // 1st bond
+
+    vb1x = x[i1][0] - x[i2][0];
+    vb1y = x[i1][1] - x[i2][1];
+    vb1z = x[i1][2] - x[i2][2];
+    domain->minimum_image(vb1x,vb1y,vb1z);
+
+    // 2nd bond
+
+    vb2x = x[i3][0] - x[i2][0];
+    vb2y = x[i3][1] - x[i2][1];
+    vb2z = x[i3][2] - x[i2][2];
+    domain->minimum_image(vb2x,vb2y,vb2z);
+
+    vb2xm = -vb2x;
+    vb2ym = -vb2y;
+    vb2zm = -vb2z;
+    domain->minimum_image(vb2xm,vb2ym,vb2zm);
+
+    // 3rd bond
+
+    vb3x = x[i4][0] - x[i3][0];
+    vb3y = x[i4][1] - x[i3][1];
+    vb3z = x[i4][2] - x[i3][2];
+    domain->minimum_image(vb3x,vb3y,vb3z);
+    
+    // c,s calculation
+
+    ax = vb1y*vb2zm - vb1z*vb2ym;
+    ay = vb1z*vb2xm - vb1x*vb2zm;
+    az = vb1x*vb2ym - vb1y*vb2xm;
+    bx = vb3y*vb2zm - vb3z*vb2ym;
+    by = vb3z*vb2xm - vb3x*vb2zm;
+    bz = vb3x*vb2ym - vb3y*vb2xm;
+
+    rasq = ax*ax + ay*ay + az*az;
+    rbsq = bx*bx + by*by + bz*bz;
+    rgsq = vb2xm*vb2xm + vb2ym*vb2ym + vb2zm*vb2zm;
+    rg = sqrt(rgsq);
+    
+    rginv = ra2inv = rb2inv = 0.0;
+    if (rg > 0) rginv = 1.0/rg;
+    if (rasq > 0) ra2inv = 1.0/rasq;
+    if (rbsq > 0) rb2inv = 1.0/rbsq;
+    rabinv = sqrt(ra2inv*rb2inv);
+
+    c = (ax*bx + ay*by + az*bz)*rabinv;
+    s = rg*rabinv*(ax*vb3x + ay*vb3y + az*vb3z);
+
+    // error check
+
+    if (c > 1.0 + TOLERANCE || c < (-1.0 - TOLERANCE)) {
+      int me = comm->me;
+
+      if (screen) {
+	char str[128];
+	sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " %d %d %d %d",
+		me,tid,update->ntimestep,
+		atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]);
+	error->warning(FLERR,str,0);
+	fprintf(screen,"  1st atom: %d %g %g %g\n",
+		me,x[i1][0],x[i1][1],x[i1][2]);
+	fprintf(screen,"  2nd atom: %d %g %g %g\n",
+		me,x[i2][0],x[i2][1],x[i2][2]);
+	fprintf(screen,"  3rd atom: %d %g %g %g\n",
+		me,x[i3][0],x[i3][1],x[i3][2]);
+	fprintf(screen,"  4th atom: %d %g %g %g\n",
+		me,x[i4][0],x[i4][1],x[i4][2]);
+      }
+    }
+    
+    if (c > 1.0) c = 1.0;
+    if (c < -1.0) c = -1.0;
+         
+    m = multiplicity[type];
+    p = 1.0;
+    df1 = 0.0;
+    
+    for (i = 0; i < m; i++) {
+      ddf1 = p*c - df1*s;
+      df1 = p*s + df1*c;
+      p = ddf1;
+    }
+
+    p = p*cos_shift[type] + df1*sin_shift[type];
+    df1 = df1*cos_shift[type] - ddf1*sin_shift[type];
+    df1 *= -m;
+    p += 1.0;
+ 
+    if (m == 0) {
+      p = 1.0 + cos_shift[type];
+      df1 = 0.0;
+    }
+
+    if (EFLAG) edihedral = k[type] * p; 
+       
+    fg = vb1x*vb2xm + vb1y*vb2ym + vb1z*vb2zm;
+    hg = vb3x*vb2xm + vb3y*vb2ym + vb3z*vb2zm;
+    fga = fg*ra2inv*rginv;
+    hgb = hg*rb2inv*rginv;
+    gaa = -ra2inv*rg;
+    gbb = rb2inv*rg;
+    
+    dtfx = gaa*ax;
+    dtfy = gaa*ay;
+    dtfz = gaa*az;
+    dtgx = fga*ax - hgb*bx;
+    dtgy = fga*ay - hgb*by;
+    dtgz = fga*az - hgb*bz;
+    dthx = gbb*bx;
+    dthy = gbb*by;
+    dthz = gbb*bz;
+    
+    df = -k[type] * df1;
+    
+    sx2 = df*dtgx;
+    sy2 = df*dtgy;
+    sz2 = df*dtgz;
+
+    f1[0] = df*dtfx;
+    f1[1] = df*dtfy;
+    f1[2] = df*dtfz;
+
+    f2[0] = sx2 - f1[0];
+    f2[1] = sy2 - f1[1];
+    f2[2] = sz2 - f1[2];
+
+    f4[0] = df*dthx;
+    f4[1] = df*dthy;
+    f4[2] = df*dthz;
+
+    f3[0] = -sx2 - f4[0];
+    f3[1] = -sy2 - f4[1];
+    f3[2] = -sz2 - f4[2];
+    
+    // apply force to each of 4 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += f1[0];
+      f[i1][1] += f1[1];
+      f[i1][2] += f1[2];
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] += f2[0];
+      f[i2][1] += f2[1];
+      f[i2][2] += f2[2];
+    }
+
+    if (NEWTON_BOND || i3 < nlocal) {
+      f[i3][0] += f3[0];
+      f[i3][1] += f3[1];
+      f[i3][2] += f3[2];
+    }
+
+    if (NEWTON_BOND || i4 < nlocal) {
+      f[i4][0] += f4[0];
+      f[i4][1] += f4[1];
+      f[i4][2] += f4[2];
+    }
+
+    if (EVFLAG)
+      ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,edihedral,f1,f3,f4,
+		   vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,tid);
+  }
+}
+
diff --git a/src/USER-OMP/dihedral_harmonic_omp.h b/src/USER-OMP/dihedral_harmonic_omp.h
new file mode 100644
index 000000000..2d7bae64e
--- /dev/null
+++ b/src/USER-OMP/dihedral_harmonic_omp.h
@@ -0,0 +1,48 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef DIHEDRAL_CLASS
+
+DihedralStyle(harmonic/omp,DihedralHarmonicOMP)
+
+#else
+
+#ifndef LMP_DIHEDRAL_HARMONIC_OMP_H
+#define LMP_DIHEDRAL_HARMONIC_OMP_H
+
+#include "dihedral_harmonic.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class DihedralHarmonicOMP : public DihedralHarmonic, public ThrOMP {
+
+ public:
+    DihedralHarmonicOMP(class LAMMPS *lmp) : 
+      DihedralHarmonic(lmp), ThrOMP(lmp,DIHEDRAL) {};
+
+  virtual void compute(int, int);
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(double **f, int ifrom, int ito, int tid);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/dihedral_helix_omp.cpp b/src/USER-OMP/dihedral_helix_omp.cpp
new file mode 100644
index 000000000..a3ca969ef
--- /dev/null
+++ b/src/USER-OMP/dihedral_helix_omp.cpp
@@ -0,0 +1,280 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "mpi.h"
+#include "math.h"
+#include "dihedral_helix_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "neighbor.h"
+#include "domain.h"
+#include "force.h"
+#include "update.h"
+#include "error.h"
+
+using namespace LAMMPS_NS;
+
+#define TOLERANCE 0.05
+#define SMALL     0.001
+#define SMALLER   0.00001
+
+/* ---------------------------------------------------------------------- */
+
+void DihedralHelixOMP::compute(int eflag, int vflag)
+{
+
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+    ev_setup_thr(this);
+  } else evflag = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->ndihedrallist;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(shared)
+#endif
+  {
+    int ifrom, ito, tid;
+    double **f;
+
+    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_bond) eval<1,1,1>(f, ifrom, ito, tid);
+	else eval<1,1,0>(f, ifrom, ito, tid);
+      } else {
+	if (force->newton_bond) eval<1,0,1>(f, ifrom, ito, tid);
+	else eval<1,0,0>(f, ifrom, ito, tid);
+      }
+    } else {
+      if (force->newton_bond) eval<0,0,1>(f, ifrom, ito, tid);
+      else eval<0,0,0>(f, ifrom, ito, tid);
+    }
+
+    // reduce per thread forces into global force array.
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+  } // end of omp parallel region
+
+  // reduce per thread energy and virial, if requested.
+  if (evflag) ev_reduce_thr(this);
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void DihedralHelixOMP::eval(double **f, int nfrom, int nto, int tid)
+{
+  
+  int i1,i2,i3,i4,n,type;
+  double vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,vb2xm,vb2ym,vb2zm;
+  double edihedral,f1[3],f2[3],f3[3],f4[3];
+  double sb1,sb2,sb3,rb1,rb3,c0,b1mag2,b1mag,b2mag2;
+  double b2mag,b3mag2,b3mag,ctmp,r12c1,c1mag,r12c2;
+  double c2mag,sc1,sc2,s1,s12,c,pd,a,a11,a22;
+  double a33,a12,a13,a23,sx2,sy2,sz2;
+  double s2,cx,cy,cz,cmag,dx,phi,si,siinv,sin2;
+
+  edihedral = 0.0;
+
+  double **x = atom->x;
+  int **dihedrallist = neighbor->dihedrallist;
+  int nlocal = atom->nlocal;
+
+  for (n = nfrom; n < nto; n++) {
+    i1 = dihedrallist[n][0];
+    i2 = dihedrallist[n][1];
+    i3 = dihedrallist[n][2];
+    i4 = dihedrallist[n][3];
+    type = dihedrallist[n][4];
+
+    // 1st bond
+
+    vb1x = x[i1][0] - x[i2][0];
+    vb1y = x[i1][1] - x[i2][1];
+    vb1z = x[i1][2] - x[i2][2];
+    domain->minimum_image(vb1x,vb1y,vb1z);
+
+    // 2nd bond
+
+    vb2x = x[i3][0] - x[i2][0];
+    vb2y = x[i3][1] - x[i2][1];
+    vb2z = x[i3][2] - x[i2][2];
+    domain->minimum_image(vb2x,vb2y,vb2z);
+
+    vb2xm = -vb2x;
+    vb2ym = -vb2y;
+    vb2zm = -vb2z;
+    domain->minimum_image(vb2xm,vb2ym,vb2zm);
+
+    // 3rd bond
+
+    vb3x = x[i4][0] - x[i3][0];
+    vb3y = x[i4][1] - x[i3][1];
+    vb3z = x[i4][2] - x[i3][2];
+    domain->minimum_image(vb3x,vb3y,vb3z);
+    
+    // c0 calculation
+        
+    sb1 = 1.0 / (vb1x*vb1x + vb1y*vb1y + vb1z*vb1z);
+    sb2 = 1.0 / (vb2x*vb2x + vb2y*vb2y + vb2z*vb2z);
+    sb3 = 1.0 / (vb3x*vb3x + vb3y*vb3y + vb3z*vb3z);
+        
+    rb1 = sqrt(sb1);
+    rb3 = sqrt(sb3);
+        
+    c0 = (vb1x*vb3x + vb1y*vb3y + vb1z*vb3z) * rb1*rb3;
+
+    // 1st and 2nd angle
+        
+    b1mag2 = vb1x*vb1x + vb1y*vb1y + vb1z*vb1z;
+    b1mag = sqrt(b1mag2);
+    b2mag2 = vb2x*vb2x + vb2y*vb2y + vb2z*vb2z;
+    b2mag = sqrt(b2mag2);
+    b3mag2 = vb3x*vb3x + vb3y*vb3y + vb3z*vb3z;
+    b3mag = sqrt(b3mag2);
+
+    ctmp = vb1x*vb2x + vb1y*vb2y + vb1z*vb2z;
+    r12c1 = 1.0 / (b1mag*b2mag);
+    c1mag = ctmp * r12c1;
+
+    ctmp = vb2xm*vb3x + vb2ym*vb3y + vb2zm*vb3z;
+    r12c2 = 1.0 / (b2mag*b3mag);
+    c2mag = ctmp * r12c2;
+
+    // cos and sin of 2 angles and final c
+
+    sin2 = MAX(1.0 - c1mag*c1mag,0.0);
+    sc1 = sqrt(sin2);
+    if (sc1 < SMALL) sc1 = SMALL;
+    sc1 = 1.0/sc1;
+
+    sin2 = MAX(1.0 - c2mag*c2mag,0.0);
+    sc2 = sqrt(sin2);
+    if (sc2 < SMALL) sc2 = SMALL;
+    sc2 = 1.0/sc2;
+
+    s1 = sc1 * sc1;
+    s2 = sc2 * sc2;
+    s12 = sc1 * sc2;
+    c = (c0 + c1mag*c2mag) * s12;
+
+    cx = vb1y*vb2z - vb1z*vb2y;
+    cy = vb1z*vb2x - vb1x*vb2z;
+    cz = vb1x*vb2y - vb1y*vb2x;
+    cmag = sqrt(cx*cx + cy*cy + cz*cz);
+    dx = (cx*vb3x + cy*vb3y + cz*vb3z)/cmag/b3mag;
+    
+    // error check
+
+    if (c > 1.0 + TOLERANCE || c < (-1.0 - TOLERANCE)) {
+      int me = comm->me;
+      if (screen) {
+	char str[128];
+	sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " %d %d %d %d",
+		me,tid,update->ntimestep,
+		atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]);
+	error->warning(FLERR,str,0);
+	fprintf(screen,"  1st atom: %d %g %g %g\n",
+		me,x[i1][0],x[i1][1],x[i1][2]);
+	fprintf(screen,"  2nd atom: %d %g %g %g\n",
+		me,x[i2][0],x[i2][1],x[i2][2]);
+	fprintf(screen,"  3rd atom: %d %g %g %g\n",
+		me,x[i3][0],x[i3][1],x[i3][2]);
+	fprintf(screen,"  4th atom: %d %g %g %g\n",
+		me,x[i4][0],x[i4][1],x[i4][2]);
+      }
+    }
+    
+    if (c > 1.0) c = 1.0;
+    if (c < -1.0) c = -1.0;
+
+    phi = acos(c);
+    if (dx < 0.0) phi *= -1.0;
+    si = sin(phi);
+    if (fabs(si) < SMALLER) si = SMALLER;
+    siinv = 1.0/si;
+
+    pd = -aphi[type] + 3.0*bphi[type]*sin(3.0*phi)*siinv +
+      cphi[type]*sin(phi + 0.25*PI)*siinv;
+
+    if (EFLAG) edihedral = aphi[type]*(1.0 - c) + bphi[type]*(1.0 + cos(3.0*phi)) +
+      cphi[type]*(1.0 + cos(phi + 0.25*PI));
+;
+
+    a = pd;
+    c = c * a;
+    s12 = s12 * a;
+    a11 = c*sb1*s1;
+    a22 = -sb2 * (2.0*c0*s12 - c*(s1+s2));
+    a33 = c*sb3*s2;
+    a12 = -r12c1 * (c1mag*c*s1 + c2mag*s12);
+    a13 = -rb1*rb3*s12;
+    a23 = r12c2 * (c2mag*c*s2 + c1mag*s12);
+
+    sx2  = a12*vb1x + a22*vb2x + a23*vb3x;
+    sy2  = a12*vb1y + a22*vb2y + a23*vb3y;
+    sz2  = a12*vb1z + a22*vb2z + a23*vb3z;
+
+    f1[0] = a11*vb1x + a12*vb2x + a13*vb3x;
+    f1[1] = a11*vb1y + a12*vb2y + a13*vb3y;
+    f1[2] = a11*vb1z + a12*vb2z + a13*vb3z;
+
+    f2[0] = -sx2 - f1[0];
+    f2[1] = -sy2 - f1[1];
+    f2[2] = -sz2 - f1[2];
+
+    f4[0] = a13*vb1x + a23*vb2x + a33*vb3x;
+    f4[1] = a13*vb1y + a23*vb2y + a33*vb3y;
+    f4[2] = a13*vb1z + a23*vb2z + a33*vb3z;
+
+    f3[0] = sx2 - f4[0];
+    f3[1] = sy2 - f4[1];
+    f3[2] = sz2 - f4[2];
+
+    // apply force to each of 4 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += f1[0];
+      f[i1][1] += f1[1];
+      f[i1][2] += f1[2];
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] += f2[0];
+      f[i2][1] += f2[1];
+      f[i2][2] += f2[2];
+    }
+
+    if (NEWTON_BOND || i3 < nlocal) {
+      f[i3][0] += f3[0];
+      f[i3][1] += f3[1];
+      f[i3][2] += f3[2];
+    }
+
+    if (NEWTON_BOND || i4 < nlocal) {
+      f[i4][0] += f4[0];
+      f[i4][1] += f4[1];
+      f[i4][2] += f4[2];
+    }
+
+    if (EVFLAG)
+      ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,edihedral,f1,f3,f4,
+		   vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,tid);
+  }
+}
diff --git a/src/USER-OMP/dihedral_helix_omp.h b/src/USER-OMP/dihedral_helix_omp.h
new file mode 100644
index 000000000..792319741
--- /dev/null
+++ b/src/USER-OMP/dihedral_helix_omp.h
@@ -0,0 +1,48 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef DIHEDRAL_CLASS
+
+DihedralStyle(helix/omp,DihedralHelixOMP)
+
+#else
+
+#ifndef LMP_DIHEDRAL_HELIX_OMP_H
+#define LMP_DIHEDRAL_HELIX_OMP_H
+
+#include "dihedral_helix.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class DihedralHelixOMP : public DihedralHelix, public ThrOMP {
+
+ public:
+    DihedralHelixOMP(class LAMMPS *lmp) : 
+      DihedralHelix(lmp), ThrOMP(lmp,DIHEDRAL) {};
+
+  virtual void compute(int, int);
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(double **f, int ifrom, int ito, int tid);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/dihedral_multi_harmonic_omp.cpp b/src/USER-OMP/dihedral_multi_harmonic_omp.cpp
new file mode 100644
index 000000000..bde958984
--- /dev/null
+++ b/src/USER-OMP/dihedral_multi_harmonic_omp.cpp
@@ -0,0 +1,269 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "mpi.h"
+#include "math.h"
+#include "dihedral_multi_harmonic_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "neighbor.h"
+#include "domain.h"
+#include "force.h"
+#include "update.h"
+#include "error.h"
+
+using namespace LAMMPS_NS;
+
+#define TOLERANCE 0.05
+#define SMALL     0.001
+
+/* ---------------------------------------------------------------------- */
+
+void DihedralMultiHarmonicOMP::compute(int eflag, int vflag)
+{
+
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+    ev_setup_thr(this);
+  } else evflag = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->ndihedrallist;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(shared)
+#endif
+  {
+    int ifrom, ito, tid;
+    double **f;
+
+    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_bond) eval<1,1,1>(f, ifrom, ito, tid);
+	else eval<1,1,0>(f, ifrom, ito, tid);
+      } else {
+	if (force->newton_bond) eval<1,0,1>(f, ifrom, ito, tid);
+	else eval<1,0,0>(f, ifrom, ito, tid);
+      }
+    } else {
+      if (force->newton_bond) eval<0,0,1>(f, ifrom, ito, tid);
+      else eval<0,0,0>(f, ifrom, ito, tid);
+    }
+
+    // reduce per thread forces into global force array.
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+  } // end of omp parallel region
+
+  // reduce per thread energy and virial, if requested.
+  if (evflag) ev_reduce_thr(this);
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void DihedralMultiHarmonicOMP::eval(double **f, int nfrom, int nto, int tid)
+{
+  
+  int i1,i2,i3,i4,n,type;
+  double vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,vb2xm,vb2ym,vb2zm;
+  double edihedral,f1[3],f2[3],f3[3],f4[3];
+  double sb1,sb2,sb3,rb1,rb3,c0,b1mag2,b1mag,b2mag2;
+  double b2mag,b3mag2,b3mag,ctmp,r12c1,c1mag,r12c2;
+  double c2mag,sc1,sc2,s1,s12,c,pd,a,a11,a22;
+  double a33,a12,a13,a23,sx2,sy2,sz2;
+  double s2,sin2;
+
+  edihedral = 0.0;
+
+  double **x = atom->x;
+  int **dihedrallist = neighbor->dihedrallist;
+  int nlocal = atom->nlocal;
+
+  for (n = nfrom; n < nto; n++) {
+    i1 = dihedrallist[n][0];
+    i2 = dihedrallist[n][1];
+    i3 = dihedrallist[n][2];
+    i4 = dihedrallist[n][3];
+    type = dihedrallist[n][4];
+
+    // 1st bond
+
+    vb1x = x[i1][0] - x[i2][0];
+    vb1y = x[i1][1] - x[i2][1];
+    vb1z = x[i1][2] - x[i2][2];
+    domain->minimum_image(vb1x,vb1y,vb1z);
+
+    // 2nd bond
+
+    vb2x = x[i3][0] - x[i2][0];
+    vb2y = x[i3][1] - x[i2][1];
+    vb2z = x[i3][2] - x[i2][2];
+    domain->minimum_image(vb2x,vb2y,vb2z);
+
+    vb2xm = -vb2x;
+    vb2ym = -vb2y;
+    vb2zm = -vb2z;
+    domain->minimum_image(vb2xm,vb2ym,vb2zm);
+
+    // 3rd bond
+
+    vb3x = x[i4][0] - x[i3][0];
+    vb3y = x[i4][1] - x[i3][1];
+    vb3z = x[i4][2] - x[i3][2];
+    domain->minimum_image(vb3x,vb3y,vb3z);
+    
+    // c0 calculation
+        
+    sb1 = 1.0 / (vb1x*vb1x + vb1y*vb1y + vb1z*vb1z);
+    sb2 = 1.0 / (vb2x*vb2x + vb2y*vb2y + vb2z*vb2z);
+    sb3 = 1.0 / (vb3x*vb3x + vb3y*vb3y + vb3z*vb3z);
+        
+    rb1 = sqrt(sb1);
+    rb3 = sqrt(sb3);
+        
+    c0 = (vb1x*vb3x + vb1y*vb3y + vb1z*vb3z) * rb1*rb3;
+
+    // 1st and 2nd angle
+        
+    b1mag2 = vb1x*vb1x + vb1y*vb1y + vb1z*vb1z;
+    b1mag = sqrt(b1mag2);
+    b2mag2 = vb2x*vb2x + vb2y*vb2y + vb2z*vb2z;
+    b2mag = sqrt(b2mag2);
+    b3mag2 = vb3x*vb3x + vb3y*vb3y + vb3z*vb3z;
+    b3mag = sqrt(b3mag2);
+
+    ctmp = vb1x*vb2x + vb1y*vb2y + vb1z*vb2z;
+    r12c1 = 1.0 / (b1mag*b2mag);
+    c1mag = ctmp * r12c1;
+
+    ctmp = vb2xm*vb3x + vb2ym*vb3y + vb2zm*vb3z;
+    r12c2 = 1.0 / (b2mag*b3mag);
+    c2mag = ctmp * r12c2;
+
+    // cos and sin of 2 angles and final c
+
+    sin2 = MAX(1.0 - c1mag*c1mag,0.0);
+    sc1 = sqrt(sin2);
+    if (sc1 < SMALL) sc1 = SMALL;
+    sc1 = 1.0/sc1;
+
+    sin2 = MAX(1.0 - c2mag*c2mag,0.0);
+    sc2 = sqrt(sin2);
+    if (sc2 < SMALL) sc2 = SMALL;
+    sc2 = 1.0/sc2;
+
+    s1 = sc1 * sc1;
+    s2 = sc2 * sc2;
+    s12 = sc1 * sc2;
+    c = (c0 + c1mag*c2mag) * s12;
+
+    // error check
+
+    if (c > 1.0 + TOLERANCE || c < (-1.0 - TOLERANCE)) {
+      int me = comm->me;
+      if (screen) {
+	char str[128];
+	sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " %d %d %d %d",
+		me,tid,update->ntimestep,
+		atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]);
+	error->warning(FLERR,str,0);
+	fprintf(screen,"  1st atom: %d %g %g %g\n",
+		me,x[i1][0],x[i1][1],x[i1][2]);
+	fprintf(screen,"  2nd atom: %d %g %g %g\n",
+		me,x[i2][0],x[i2][1],x[i2][2]);
+	fprintf(screen,"  3rd atom: %d %g %g %g\n",
+		me,x[i3][0],x[i3][1],x[i3][2]);
+	fprintf(screen,"  4th atom: %d %g %g %g\n",
+		me,x[i4][0],x[i4][1],x[i4][2]);
+      }
+    }
+    
+    if (c > 1.0) c = 1.0;
+    if (c < -1.0) c = -1.0;
+
+    // force & energy
+    // p = sum (i=1,5) a_i * c**(i-1)
+    // pd = dp/dc
+
+    pd = a2[type] + c*(2.0*a3[type] + c*(3.0*a4[type] + c*4.0*a5[type]));
+
+    if (EFLAG) 
+      edihedral = a1[type] + c*(a2[type] + c*(a3[type] + c*(a4[type] + c*a5[type])));
+
+    a = pd;
+    c = c * a;
+    s12 = s12 * a;
+    a11 = c*sb1*s1;
+    a22 = -sb2 * (2.0*c0*s12 - c*(s1+s2));
+    a33 = c*sb3*s2;
+    a12 = -r12c1*(c1mag*c*s1 + c2mag*s12);
+    a13 = -rb1*rb3*s12;
+    a23 = r12c2*(c2mag*c*s2 + c1mag*s12);
+
+    sx2  = a12*vb1x + a22*vb2x + a23*vb3x;
+    sy2  = a12*vb1y + a22*vb2y + a23*vb3y;
+    sz2  = a12*vb1z + a22*vb2z + a23*vb3z;
+
+    f1[0] = a11*vb1x + a12*vb2x + a13*vb3x;
+    f1[1] = a11*vb1y + a12*vb2y + a13*vb3y;
+    f1[2] = a11*vb1z + a12*vb2z + a13*vb3z;
+
+    f2[0] = -sx2 - f1[0];
+    f2[1] = -sy2 - f1[1];
+    f2[2] = -sz2 - f1[2];
+
+    f4[0] = a13*vb1x + a23*vb2x + a33*vb3x;
+    f4[1] = a13*vb1y + a23*vb2y + a33*vb3y;
+    f4[2] = a13*vb1z + a23*vb2z + a33*vb3z;
+
+    f3[0] = sx2 - f4[0];
+    f3[1] = sy2 - f4[1];
+    f3[2] = sz2 - f4[2];
+
+    // apply force to each of 4 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += f1[0];
+      f[i1][1] += f1[1];
+      f[i1][2] += f1[2];
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] += f2[0];
+      f[i2][1] += f2[1];
+      f[i2][2] += f2[2];
+    }
+
+    if (NEWTON_BOND || i3 < nlocal) {
+      f[i3][0] += f3[0];
+      f[i3][1] += f3[1];
+      f[i3][2] += f3[2];
+    }
+
+    if (NEWTON_BOND || i4 < nlocal) {
+      f[i4][0] += f4[0];
+      f[i4][1] += f4[1];
+      f[i4][2] += f4[2];
+    }
+
+    if (EVFLAG)
+      ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,edihedral,f1,f3,f4,
+		   vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,tid);
+  }
+}
diff --git a/src/USER-OMP/dihedral_multi_harmonic_omp.h b/src/USER-OMP/dihedral_multi_harmonic_omp.h
new file mode 100644
index 000000000..da2322f03
--- /dev/null
+++ b/src/USER-OMP/dihedral_multi_harmonic_omp.h
@@ -0,0 +1,48 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef DIHEDRAL_CLASS
+
+DihedralStyle(multi/harmonic/omp,DihedralMultiHarmonicOMP)
+
+#else
+
+#ifndef LMP_DIHEDRAL_MULTI_HARMONIC_OMP_H
+#define LMP_DIHEDRAL_MULTI_HARMONIC_OMP_H
+
+#include "dihedral_multi_harmonic.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class DihedralMultiHarmonicOMP : public DihedralMultiHarmonic, public ThrOMP {
+
+ public:
+    DihedralMultiHarmonicOMP(class LAMMPS *lmp) : 
+      DihedralMultiHarmonic(lmp), ThrOMP(lmp,DIHEDRAL) {};
+
+  virtual void compute(int, int);
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(double **f, int ifrom, int ito, int tid);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/dihedral_opls_omp.cpp b/src/USER-OMP/dihedral_opls_omp.cpp
new file mode 100644
index 000000000..9f59e26d2
--- /dev/null
+++ b/src/USER-OMP/dihedral_opls_omp.cpp
@@ -0,0 +1,286 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "mpi.h"
+#include "math.h"
+#include "dihedral_opls_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "neighbor.h"
+#include "domain.h"
+#include "force.h"
+#include "update.h"
+#include "error.h"
+
+using namespace LAMMPS_NS;
+
+#define TOLERANCE 0.05
+#define SMALL     0.001
+#define SMALLER   0.00001
+
+/* ---------------------------------------------------------------------- */
+
+void DihedralOPLSOMP::compute(int eflag, int vflag)
+{
+
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+    ev_setup_thr(this);
+  } else evflag = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = neighbor->ndihedrallist;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(shared)
+#endif
+  {
+    int ifrom, ito, tid;
+    double **f;
+
+    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_bond) eval<1,1,1>(f, ifrom, ito, tid);
+	else eval<1,1,0>(f, ifrom, ito, tid);
+      } else {
+	if (force->newton_bond) eval<1,0,1>(f, ifrom, ito, tid);
+	else eval<1,0,0>(f, ifrom, ito, tid);
+      }
+    } else {
+      if (force->newton_bond) eval<0,0,1>(f, ifrom, ito, tid);
+      else eval<0,0,0>(f, ifrom, ito, tid);
+    }
+
+    // reduce per thread forces into global force array.
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+  } // end of omp parallel region
+
+  // reduce per thread energy and virial, if requested.
+  if (evflag) ev_reduce_thr(this);
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+void DihedralOPLSOMP::eval(double **f, int nfrom, int nto, int tid)
+{
+  
+  int i1,i2,i3,i4,n,type;
+  double vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,vb2xm,vb2ym,vb2zm;
+  double edihedral,f1[3],f2[3],f3[3],f4[3];
+  double sb1,sb2,sb3,rb1,rb3,c0,b1mag2,b1mag,b2mag2;
+  double b2mag,b3mag2,b3mag,ctmp,r12c1,c1mag,r12c2;
+  double c2mag,sc1,sc2,s1,s12,c,pd,a,a11,a22;
+  double a33,a12,a13,a23,sx2,sy2,sz2;
+  double s2,cx,cy,cz,cmag,dx,phi,si,siinv,sin2;
+
+  edihedral = 0.0;
+
+  double **x = atom->x;
+  int **dihedrallist = neighbor->dihedrallist;
+  int nlocal = atom->nlocal;
+
+  for (n = nfrom; n < nto; n++) {
+    i1 = dihedrallist[n][0];
+    i2 = dihedrallist[n][1];
+    i3 = dihedrallist[n][2];
+    i4 = dihedrallist[n][3];
+    type = dihedrallist[n][4];
+
+    // 1st bond
+
+    vb1x = x[i1][0] - x[i2][0];
+    vb1y = x[i1][1] - x[i2][1];
+    vb1z = x[i1][2] - x[i2][2];
+    domain->minimum_image(vb1x,vb1y,vb1z);
+
+    // 2nd bond
+
+    vb2x = x[i3][0] - x[i2][0];
+    vb2y = x[i3][1] - x[i2][1];
+    vb2z = x[i3][2] - x[i2][2];
+    domain->minimum_image(vb2x,vb2y,vb2z);
+
+    vb2xm = -vb2x;
+    vb2ym = -vb2y;
+    vb2zm = -vb2z;
+    domain->minimum_image(vb2xm,vb2ym,vb2zm);
+
+    // 3rd bond
+
+    vb3x = x[i4][0] - x[i3][0];
+    vb3y = x[i4][1] - x[i3][1];
+    vb3z = x[i4][2] - x[i3][2];
+    domain->minimum_image(vb3x,vb3y,vb3z);
+    
+    // c0 calculation
+
+    sb1 = 1.0 / (vb1x*vb1x + vb1y*vb1y + vb1z*vb1z);
+    sb2 = 1.0 / (vb2x*vb2x + vb2y*vb2y + vb2z*vb2z);
+    sb3 = 1.0 / (vb3x*vb3x + vb3y*vb3y + vb3z*vb3z);
+
+    rb1 = sqrt(sb1);
+    rb3 = sqrt(sb3);
+
+    c0 = (vb1x*vb3x + vb1y*vb3y + vb1z*vb3z) * rb1*rb3;
+
+    // 1st and 2nd angle
+
+    b1mag2 = vb1x*vb1x + vb1y*vb1y + vb1z*vb1z;
+    b1mag = sqrt(b1mag2);
+    b2mag2 = vb2x*vb2x + vb2y*vb2y + vb2z*vb2z;
+    b2mag = sqrt(b2mag2);
+    b3mag2 = vb3x*vb3x + vb3y*vb3y + vb3z*vb3z;
+    b3mag = sqrt(b3mag2);
+
+    ctmp = vb1x*vb2x + vb1y*vb2y + vb1z*vb2z;
+    r12c1 = 1.0 / (b1mag*b2mag);
+    c1mag = ctmp * r12c1;
+
+    ctmp = vb2xm*vb3x + vb2ym*vb3y + vb2zm*vb3z;
+    r12c2 = 1.0 / (b2mag*b3mag);
+    c2mag = ctmp * r12c2;
+
+    // cos and sin of 2 angles and final c
+
+    sin2 = MAX(1.0 - c1mag*c1mag,0.0);
+    sc1 = sqrt(sin2);
+    if (sc1 < SMALL) sc1 = SMALL;
+    sc1 = 1.0/sc1;
+
+    sin2 = MAX(1.0 - c2mag*c2mag,0.0);
+    sc2 = sqrt(sin2);
+    if (sc2 < SMALL) sc2 = SMALL;
+    sc2 = 1.0/sc2;
+
+    s1 = sc1 * sc1;
+    s2 = sc2 * sc2;
+    s12 = sc1 * sc2;
+    c = (c0 + c1mag*c2mag) * s12;
+
+    cx = vb1y*vb2z - vb1z*vb2y;
+    cy = vb1z*vb2x - vb1x*vb2z;
+    cz = vb1x*vb2y - vb1y*vb2x;
+    cmag = sqrt(cx*cx + cy*cy + cz*cz);
+    dx = (cx*vb3x + cy*vb3y + cz*vb3z)/cmag/b3mag;
+
+    // error check
+
+    if (c > 1.0 + TOLERANCE || c < (-1.0 - TOLERANCE)) {
+      int me = comm->me;
+
+      if (screen) {
+	char str[128];
+	sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " %d %d %d %d",
+		me,tid,update->ntimestep,
+		atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]);
+	error->warning(FLERR,str,0);
+	fprintf(screen,"  1st atom: %d %g %g %g\n",
+		me,x[i1][0],x[i1][1],x[i1][2]);
+	fprintf(screen,"  2nd atom: %d %g %g %g\n",
+		me,x[i2][0],x[i2][1],x[i2][2]);
+	fprintf(screen,"  3rd atom: %d %g %g %g\n",
+		me,x[i3][0],x[i3][1],x[i3][2]);
+	fprintf(screen,"  4th atom: %d %g %g %g\n",
+		me,x[i4][0],x[i4][1],x[i4][2]);
+      }
+    }
+    
+    if (c > 1.0) c = 1.0;
+    if (c < -1.0) c = -1.0;
+
+    // force & energy
+    // p = sum (i=1,4) k_i * (1 + (-1)**(i+1)*cos(i*phi) )
+    // pd = dp/dc
+
+    phi = acos(c);
+    if (dx < 0.0) phi *= -1.0;
+    si = sin(phi);
+    if (fabs(si) < SMALLER) si = SMALLER;
+    siinv = 1.0/si;
+
+    pd = k1[type] - 2.0*k2[type]*sin(2.0*phi)*siinv + 
+      3.0*k3[type]*sin(3.0*phi)*siinv - 4.0*k4[type]*sin(4.0*phi)*siinv;
+
+    if (EFLAG) edihedral = k1[type]*(1.0 + c) + k2[type]*(1.0 - cos(2.0*phi))
+      + k3[type]*(1.0 + cos(3.0*phi)) + k4[type]*(1.0 - cos(4.0*phi));
+
+
+    a = pd;
+    c = c * a;
+    s12 = s12 * a;
+    a11 = c*sb1*s1;
+    a22 = -sb2 * (2.0*c0*s12 - c*(s1+s2));
+    a33 = c*sb3*s2;
+    a12 = -r12c1 * (c1mag*c*s1 + c2mag*s12);
+    a13 = -rb1*rb3*s12;
+    a23 = r12c2 * (c2mag*c*s2 + c1mag*s12);
+
+    sx2  = a12*vb1x + a22*vb2x + a23*vb3x;
+    sy2  = a12*vb1y + a22*vb2y + a23*vb3y;
+    sz2  = a12*vb1z + a22*vb2z + a23*vb3z;
+
+    f1[0] = a11*vb1x + a12*vb2x + a13*vb3x;
+    f1[1] = a11*vb1y + a12*vb2y + a13*vb3y;
+    f1[2] = a11*vb1z + a12*vb2z + a13*vb3z;
+
+    f2[0] = -sx2 - f1[0];
+    f2[1] = -sy2 - f1[1];
+    f2[2] = -sz2 - f1[2];
+
+    f4[0] = a13*vb1x + a23*vb2x + a33*vb3x;
+    f4[1] = a13*vb1y + a23*vb2y + a33*vb3y;
+    f4[2] = a13*vb1z + a23*vb2z + a33*vb3z;
+
+    f3[0] = sx2 - f4[0];
+    f3[1] = sy2 - f4[1];
+    f3[2] = sz2 - f4[2];
+
+    // apply force to each of 4 atoms
+
+    if (NEWTON_BOND || i1 < nlocal) {
+      f[i1][0] += f1[0];
+      f[i1][1] += f1[1];
+      f[i1][2] += f1[2];
+    }
+
+    if (NEWTON_BOND || i2 < nlocal) {
+      f[i2][0] += f2[0];
+      f[i2][1] += f2[1];
+      f[i2][2] += f2[2];
+    }
+
+    if (NEWTON_BOND || i3 < nlocal) {
+      f[i3][0] += f3[0];
+      f[i3][1] += f3[1];
+      f[i3][2] += f3[2];
+    }
+
+    if (NEWTON_BOND || i4 < nlocal) {
+      f[i4][0] += f4[0];
+      f[i4][1] += f4[1];
+      f[i4][2] += f4[2];
+    }
+
+    if (EVFLAG)
+      ev_tally_thr(this,i1,i2,i3,i4,nlocal,NEWTON_BOND,edihedral,f1,f3,f4,
+		   vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,tid);
+  }
+}
+
diff --git a/src/USER-OMP/dihedral_opls_omp.h b/src/USER-OMP/dihedral_opls_omp.h
new file mode 100644
index 000000000..58b992053
--- /dev/null
+++ b/src/USER-OMP/dihedral_opls_omp.h
@@ -0,0 +1,48 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef DIHEDRAL_CLASS
+
+DihedralStyle(opls/omp,DihedralOPLSOMP)
+
+#else
+
+#ifndef LMP_DIHEDRAL_OPLS_OMP_H
+#define LMP_DIHEDRAL_OPLS_OMP_H
+
+#include "dihedral_opls.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class DihedralOPLSOMP : public DihedralOPLS, public ThrOMP {
+
+ public:
+    DihedralOPLSOMP(class LAMMPS *lmp) : 
+      DihedralOPLS(lmp), ThrOMP(lmp,DIHEDRAL) {};
+
+  virtual void compute(int, int);
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_BOND>
+  void eval(double **f, int ifrom, int ito, int tid);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/fix_gravity_omp.cpp b/src/USER-OMP/fix_gravity_omp.cpp
new file mode 100644
index 000000000..c4f4b39b6
--- /dev/null
+++ b/src/USER-OMP/fix_gravity_omp.cpp
@@ -0,0 +1,114 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "stdio.h"
+#include "stdlib.h"
+#include "string.h"
+#include "fix_gravity_omp.h"
+#include "atom.h"
+#include "update.h"
+#include "domain.h"
+#include "respa.h"
+#include "error.h"
+
+using namespace LAMMPS_NS;
+
+enum{CHUTE,SPHERICAL,GRADIENT,VECTOR};
+
+/* ---------------------------------------------------------------------- */
+
+FixGravityOMP::FixGravityOMP(LAMMPS *lmp, int narg, char **arg) :
+  FixGravity(lmp, narg, arg) { }
+
+/* ---------------------------------------------------------------------- */
+
+void FixGravityOMP::post_force(int vflag)
+{
+  // update direction of gravity vector if gradient style
+
+  if (style == GRADIENT) {
+    if (domain->dimension == 3) {
+      double phi_current = degree2rad * 
+	(phi + (update->ntimestep - time_origin)*dt*phigrad*360.0);
+      double theta_current = degree2rad * 
+	(theta + (update->ntimestep - time_origin)*dt*thetagrad*360.0);
+      xgrav = sin(theta_current) * cos(phi_current);
+      ygrav = sin(theta_current) * sin(phi_current);
+      zgrav = cos(theta_current);
+    } else {
+      double theta_current = degree2rad * 
+	(theta + (update->ntimestep - time_origin)*dt*thetagrad*360.0);
+      xgrav = sin(theta_current);
+      ygrav = cos(theta_current);
+    }
+    xacc = magnitude*xgrav;
+    yacc = magnitude*ygrav;
+    zacc = magnitude*zgrav;
+  }
+
+  const double * const * const x = atom->x;
+  double * const * const f = atom->f;
+  double * const rmass = atom->rmass;
+  double * const mass = atom->mass;
+  int * const mask = atom->mask;
+  int * const type = atom->type;
+  const int nlocal = atom->nlocal;
+  const double xacc_thr = xacc;
+  const double yacc_thr = yacc;
+  const double zacc_thr = zacc;
+  double massone;
+  
+  int i;
+  eflag = 0;
+  double grav = 0.0;
+
+  if (rmass) {
+#if defined(_OPENMP)
+#pragma omp parallel for private(i,massone) default(none) reduction(-:grav)
+#endif
+    for (i = 0; i < nlocal; i++)
+      if (mask[i] & groupbit) {
+	massone = rmass[i];
+	f[i][0] += massone*xacc_thr;
+	f[i][1] += massone*yacc_thr;
+	f[i][2] += massone*zacc_thr;
+	grav -= massone * (xacc_thr*x[i][0] + yacc_thr*x[i][1] + zacc_thr*x[i][2]);
+      }
+  } else {
+#if defined(_OPENMP)
+#pragma omp parallel for private(i,massone) default(none) reduction(-:grav)
+#endif
+    for (i = 0; i < nlocal; i++)
+      if (mask[i] & groupbit) {
+	massone = mass[type[i]];
+	f[i][0] += massone*xacc_thr;
+	f[i][1] += massone*yacc_thr;
+	f[i][2] += massone*zacc_thr;
+	grav -= massone * (xacc_thr*x[i][0] + yacc_thr*x[i][1] + zacc_thr*x[i][2]);
+      }
+  }
+  egrav = grav;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixGravityOMP::post_force_respa(int vflag, int ilevel, int iloop)
+{
+  if (ilevel == nlevels_respa-1) post_force(vflag);
+}
+
diff --git a/src/USER-OMP/fix_gravity_omp.h b/src/USER-OMP/fix_gravity_omp.h
new file mode 100644
index 000000000..dd0144410
--- /dev/null
+++ b/src/USER-OMP/fix_gravity_omp.h
@@ -0,0 +1,38 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef FIX_CLASS
+
+FixStyle(gravity/omp,FixGravityOMP)
+
+#else
+
+#ifndef LMP_FIX_GRAVITY_OMP_H
+#define LMP_FIX_GRAVITY_OMP_H
+
+#include "fix_gravity.h"
+
+namespace LAMMPS_NS {
+
+class FixGravityOMP : public FixGravity {
+
+ public:
+  FixGravityOMP(class LAMMPS *, int, char **);
+  virtual void post_force(int);
+  virtual void post_force_respa(int, int, int);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/fix_nve_sphere_omp.cpp b/src/USER-OMP/fix_nve_sphere_omp.cpp
new file mode 100644
index 000000000..a642b21f2
--- /dev/null
+++ b/src/USER-OMP/fix_nve_sphere_omp.cpp
@@ -0,0 +1,140 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "stdio.h"
+#include "string.h"
+#include "fix_nve_sphere_omp.h"
+#include "atom.h"
+#include "atom_vec.h"
+#include "update.h"
+#include "respa.h"
+#include "force.h"
+#include "error.h"
+
+using namespace LAMMPS_NS;
+
+#define INERTIA 0.4          // moment of inertia prefactor for sphere
+
+enum{NONE,DIPOLE};
+
+/* ---------------------------------------------------------------------- */
+
+/* ---------------------------------------------------------------------- */
+
+void FixNVESphereOMP::initial_integrate(int vflag)
+{
+  double **x = atom->x;
+  double **v = atom->v;
+  double **f = atom->f;
+  double **omega = atom->omega;
+  double **torque = atom->torque;
+  double *radius = atom->radius;
+  double *rmass = atom->rmass;
+  int *mask = atom->mask;
+  int nlocal = atom->nlocal;
+  int i;
+  
+  if (igroup == atom->firstgroup) nlocal = atom->nfirst;
+
+  // set timestep here since dt may have changed or come via rRESPA
+  const double dtfrotate = dtf / INERTIA;
+
+  // update v,x,omega for all particles
+  // d_omega/dt = torque / inertia
+#if defined(_OPENMP)
+#pragma omp parallel for private(i) default(shared)
+#endif
+  for (i = 0; i < nlocal; i++) {
+    if (mask[i] & groupbit) {
+      const double dtfm = dtf / rmass[i];
+      v[i][0] += dtfm * f[i][0];
+      v[i][1] += dtfm * f[i][1];
+      v[i][2] += dtfm * f[i][2];
+      x[i][0] += dtv * v[i][0];
+      x[i][1] += dtv * v[i][1];
+      x[i][2] += dtv * v[i][2];
+      
+      const double dtirotate = dtfrotate / (radius[i]*radius[i]*rmass[i]);
+      omega[i][0] += dtirotate * torque[i][0];
+      omega[i][1] += dtirotate * torque[i][1];
+      omega[i][2] += dtirotate * torque[i][2];
+    }
+  }
+
+  // update mu for dipoles
+  // d_mu/dt = omega cross mu
+  // renormalize mu to dipole length
+
+  if (extra == DIPOLE) {
+    double **mu = atom->mu;
+#if defined(_OPENMP)
+#pragma omp parallel for private(i) default(shared)
+#endif
+    for (i = 0; i < nlocal; i++) { 
+      double g0,g1,g2,msq,scale;
+      if (mask[i] & groupbit) {
+	if (mu[i][3] > 0.0) {
+	  g0 = mu[i][0] + dtv * (omega[i][1]*mu[i][2]-omega[i][2]*mu[i][1]);
+	  g1 = mu[i][1] + dtv * (omega[i][2]*mu[i][0]-omega[i][0]*mu[i][2]);
+	  g2 = mu[i][2] + dtv * (omega[i][0]*mu[i][1]-omega[i][1]*mu[i][0]);
+	  msq = g0*g0 + g1*g1 + g2*g2;
+	  scale = mu[i][3]/sqrt(msq);
+	  mu[i][0] = g0*scale;
+	  mu[i][1] = g1*scale;
+	  mu[i][2] = g2*scale;
+	}
+      }
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixNVESphereOMP::final_integrate()
+{
+  double **v = atom->v;
+  double **f = atom->f;
+  double **omega = atom->omega;
+  double **torque = atom->torque;
+  double *rmass = atom->rmass;
+  double *radius = atom->radius;
+  int *mask = atom->mask;
+  int nlocal = atom->nlocal;
+  int i;
+  
+  if (igroup == atom->firstgroup) nlocal = atom->nfirst;
+
+  // set timestep here since dt may have changed or come via rRESPA
+
+  const double dtfrotate = dtf / INERTIA;
+
+  // update v,omega for all particles
+  // d_omega/dt = torque / inertia
+
+#if defined(_OPENMP)
+#pragma omp parallel for private(i) default(shared)
+#endif
+  for (i = 0; i < nlocal; i++)
+    if (mask[i] & groupbit) {
+      const double dtfm = dtf / rmass[i];
+      v[i][0] += dtfm * f[i][0];
+      v[i][1] += dtfm * f[i][1];
+      v[i][2] += dtfm * f[i][2];
+      
+      const double dtirotate = dtfrotate / (radius[i]*radius[i]*rmass[i]);
+      omega[i][0] += dtirotate * torque[i][0];
+      omega[i][1] += dtirotate * torque[i][1];
+      omega[i][2] += dtirotate * torque[i][2];
+    }
+}
diff --git a/src/USER-OMP/fix_nve_sphere_omp.h b/src/USER-OMP/fix_nve_sphere_omp.h
new file mode 100644
index 000000000..fe86039b1
--- /dev/null
+++ b/src/USER-OMP/fix_nve_sphere_omp.h
@@ -0,0 +1,39 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef FIX_CLASS
+
+FixStyle(nve/sphere/omp,FixNVESphereOMP)
+
+#else
+
+#ifndef LMP_FIX_NVE_SPHERE_OMP_H
+#define LMP_FIX_NVE_SPHERE_OMP_H
+
+#include "fix_nve_sphere.h"
+
+namespace LAMMPS_NS {
+
+class FixNVESphereOMP : public FixNVESphere {
+ public:
+  FixNVESphereOMP(class LAMMPS *lmp, int narg, char **arg) :
+    FixNVESphere(lmp, narg, arg) {};
+
+  virtual void initial_integrate(int);
+  virtual void final_integrate();
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/fix_qeq_comb_omp.cpp b/src/USER-OMP/fix_qeq_comb_omp.cpp
new file mode 100644
index 000000000..175bab898
--- /dev/null
+++ b/src/USER-OMP/fix_qeq_comb_omp.cpp
@@ -0,0 +1,166 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "mpi.h"
+#include <math.h>
+#include "fix_qeq_comb_omp.h"
+#include "atom.h"
+#include "force.h"
+#include "group.h"
+#include "memory.h"
+#include "error.h"
+#include "respa.h"
+#include "update.h"
+#include "pair_comb_omp.h"
+
+#include <string.h>
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+FixQEQCombOMP::FixQEQCombOMP(LAMMPS *lmp, int narg, char **arg) 
+  : FixQEQComb(lmp, narg, arg)
+{
+  if (narg < 5) error->all(FLERR,"Illegal fix qeq/comb/omp command");
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixQEQCombOMP::init()
+{
+  if (!atom->q_flag)
+    error->all(FLERR,"Fix qeq/comb/omp requires atom attribute q");
+
+  comb = (PairComb *) force->pair_match("comb/omp",1);
+  if (comb == NULL)
+    comb = (PairComb *) force->pair_match("comb",1);
+  if (comb == NULL) error->all(FLERR,"Must use pair_style comb or comb/omp with fix qeq/comb");
+
+  if (strstr(update->integrate_style,"respa"))
+    nlevels_respa = ((Respa *) update->integrate)->nlevels;
+
+  ngroup = group->count(igroup);
+  if (ngroup == 0) error->all(FLERR,"Fix qeq/comb group has no atoms");
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixQEQCombOMP::post_force(int vflag)
+{
+  int i,iloop,loopmax;
+  double heatpq,qmass,dtq,dtq2;
+  double enegchkall,enegmaxall;
+
+  if (update->ntimestep % nevery) return;
+
+  // reallocate work arrays if necessary
+  // qf = charge force
+  // q1 = charge displacement
+  // q2 = tmp storage of charge force for next iteration
+
+  if (atom->nmax > nmax) {
+    memory->destroy(qf);
+    memory->destroy(q1);
+    memory->destroy(q2);
+    nmax = atom->nmax;
+    memory->create(qf,nmax,"qeq:qf");
+    memory->create(q1,nmax,"qeq:q1");
+    memory->create(q2,nmax,"qeq:q2");
+    vector_atom = qf;
+  }
+
+  // more loops for first-time charge equilibrium
+
+  iloop = 0; 
+  if (firstflag) loopmax = 5000;
+  else loopmax = 2000;
+
+  // charge-equilibration loop
+
+  if (me == 0 && fp)
+    fprintf(fp,"Charge equilibration on step " BIGINT_FORMAT "\n",
+	    update->ntimestep);
+
+  heatpq = 0.05;
+  qmass  = 0.000548580;
+  dtq    = 0.0006;
+  dtq2   = 0.5*dtq*dtq/qmass;
+
+  double enegchk = 0.0;
+  double enegtot = 0.0; 
+  double enegmax = 0.0;
+
+  double *q = atom->q;
+  int *mask = atom->mask;
+  int nlocal = atom->nlocal;
+
+  for (i = 0; i < nlocal; i++)
+    q1[i] = q2[i] = qf[i] = 0.0;
+
+  for (iloop = 0; iloop < loopmax; iloop ++ ) {
+    for (i = 0; i < nlocal; i++)
+      if (mask[i] & groupbit) {
+	q1[i] += qf[i]*dtq2 - heatpq*q1[i];
+	q[i]  += q1[i]; 
+      }
+
+    enegtot = comb->yasu_char(qf,igroup);
+    enegtot /= ngroup;
+    enegchk = enegmax = 0.0;
+
+#if defined(_OPENMP)
+#pragma omp parallel for private(i) default(shared)
+#endif
+    for (i = 0; i < nlocal ; i++)
+      if (mask[i] & groupbit) {
+	q2[i] = enegtot-qf[i];
+	enegmax = MAX(enegmax,fabs(q2[i]));
+	enegchk += fabs(q2[i]);
+	qf[i] = q2[i];
+      }
+
+    MPI_Allreduce(&enegchk,&enegchkall,1,MPI_DOUBLE,MPI_SUM,world);
+    enegchk = enegchkall/ngroup;
+    MPI_Allreduce(&enegmax,&enegmaxall,1,MPI_DOUBLE,MPI_MAX,world);
+    enegmax = enegmaxall;
+
+    if (enegchk <= precision && enegmax <= 100.0*precision) break;
+
+    if (me == 0 && fp)
+      fprintf(fp,"  iteration: %d, enegtot %.6g, "
+	      "enegmax %.6g, fq deviation: %.6g\n",
+	      iloop,enegtot,enegmax,enegchk); 
+
+#if defined(_OPENMP)
+#pragma omp parallel for private(i) default(shared)
+#endif
+    for (i = 0; i < nlocal; i++)
+      if (mask[i] & groupbit)
+	q1[i] += qf[i]*dtq2 - heatpq*q1[i]; 
+  } 
+
+  if (me == 0 && fp) {
+    if (iloop == loopmax)
+      fprintf(fp,"Charges did not converge in %d iterations\n",iloop);
+    else
+      fprintf(fp,"Charges converged in %d iterations to %.10f tolerance\n",
+	      iloop,enegchk);
+  }
+}
+
diff --git a/src/USER-OMP/fix_qeq_comb_omp.h b/src/USER-OMP/fix_qeq_comb_omp.h
new file mode 100644
index 000000000..0febe6b0a
--- /dev/null
+++ b/src/USER-OMP/fix_qeq_comb_omp.h
@@ -0,0 +1,32 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef FIX_CLASS
+
+FixStyle(qeq/comb/omp,FixQEQCombOMP)
+
+#else
+
+#ifndef LMP_FIX_QEQ_COMB_OMP_H
+#define LMP_FIX_QEQ_COMB_OMP_H
+
+#include "fix_qeq_comb.h"
+
+namespace LAMMPS_NS {
+
+class FixQEQCombOMP : public FixQEQComb {
+ public:
+  FixQEQCombOMP(class LAMMPS *, int, char **);
+  virtual void init();
+  virtual void post_force(int);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/fix_shear_history_omp.cpp b/src/USER-OMP/fix_shear_history_omp.cpp
new file mode 100644
index 000000000..40781cb40
--- /dev/null
+++ b/src/USER-OMP/fix_shear_history_omp.cpp
@@ -0,0 +1,150 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "string.h"
+#include "stdio.h"
+#include "fix_shear_history_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+#include "force.h"
+#include "pair.h"
+#include "update.h"
+#include "modify.h"
+#include "error.h"
+
+#if defined(_OPENMP)
+#include <omp.h>
+#endif
+
+using namespace LAMMPS_NS;
+
+#define MAXTOUCH 15
+
+/* ----------------------------------------------------------------------
+   copy shear partner info from neighbor lists to atom arrays
+   so can be exchanged with atoms
+------------------------------------------------------------------------- */
+
+void FixShearHistoryOMP::pre_exchange()
+{
+
+  const int nlocal = atom->nlocal;
+  const int nghost = atom->nghost;
+  const int nall = nlocal + nghost;
+  const int nthreads = comm->nthreads;
+
+  int flag = 0;
+#if defined(_OPENMP)
+#pragma omp parallel shared(flag)
+#endif
+  {
+
+#if defined(_OPENMP)
+    const int tid = omp_get_thread_num();
+#else
+    const int tid = 0;
+#endif
+
+    // each thread works on a fixed chunk of local and ghost atoms.
+    const int ldelta = 1 + nlocal/nthreads;
+    const int lfrom = tid*ldelta;
+    const int lmax = lfrom +ldelta;
+    const int lto = (lmax > nlocal) ? nlocal : lmax;
+
+    const int gdelta = 1 + nghost/nthreads;
+    const int gfrom = nlocal + tid*gdelta;
+    const int gmax = gfrom + gdelta;
+    const int gto = (gmax > nall) ? nall : gmax;
+
+    
+    int i,j,ii,jj,m,inum,jnum;
+    int *ilist,*jlist,*numneigh,**firstneigh;
+    int *touch,**firsttouch;
+    double *shear,*allshear,**firstshear;
+
+    // zero npartners for all current atoms
+
+    for (i = lfrom; i < lto; i++) npartner[i] = 0;
+
+    // copy shear info from neighbor list atoms to atom arrays
+
+    int *tag = atom->tag;
+
+    NeighList *list = pair->list;
+    inum = list->inum;
+    ilist = list->ilist;
+    numneigh = list->numneigh;
+    firstneigh = list->firstneigh;
+    firsttouch = list->listgranhistory->firstneigh;
+    firstshear = list->listgranhistory->firstdouble;
+
+    for (ii = 0; ii < inum; ii++) {
+      i = ilist[ii];
+      jlist = firstneigh[i];
+      allshear = firstshear[i];
+      jnum = numneigh[i];
+      touch = firsttouch[i];
+
+      for (jj = 0; jj < jnum; jj++) {
+	if (touch[jj]) {
+	  j = jlist[jj];
+	  j &= NEIGHMASK;
+	  shear = &allshear[3*jj];
+	  
+	  if ((i >= lfrom) && (i < lto)) {
+	    if (npartner[i] < MAXTOUCH) {
+	      m = npartner[i];
+	      partner[i][m] = tag[j];
+	      shearpartner[i][m][0] = shear[0];
+	      shearpartner[i][m][1] = shear[1];
+	      shearpartner[i][m][2] = shear[2];
+	    }
+	    npartner[i]++;
+	  }
+
+	  if ((j >= lfrom) && (j < lto)) {
+	    if (npartner[j] < MAXTOUCH) {
+	      m = npartner[j];
+	      partner[j][m] = tag[i];
+	      shearpartner[j][m][0] = -shear[0];
+	      shearpartner[j][m][1] = -shear[1];
+	      shearpartner[j][m][2] = -shear[2];
+	    }
+	    npartner[j]++;
+	  }
+	  
+	  if ((j >= gfrom) && (j < gto)) {
+	    npartner[j]++;
+	  }
+	}
+      }
+    }
+  
+    // test for too many touching neighbors
+    int myflag = 0;
+    for (i = lfrom; i < lto; i++)
+      if (npartner[i] >= MAXTOUCH) myflag = 1;
+  
+    if (myflag)
+#if defined(_OPENMP)
+#pragma omp atomic
+#endif
+      ++flag;
+  }
+
+  int flag_all;
+  MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world);
+  if (flag_all) error->all(FLERR,"Too many touching neighbors - boost MAXTOUCH");
+}
diff --git a/src/USER-OMP/fix_shear_history_omp.h b/src/USER-OMP/fix_shear_history_omp.h
new file mode 100644
index 000000000..9a360b792
--- /dev/null
+++ b/src/USER-OMP/fix_shear_history_omp.h
@@ -0,0 +1,38 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef FIX_CLASS
+
+FixStyle(SHEAR_HISTORY/omp,FixShearHistoryOMP)
+
+#else
+
+#ifndef LMP_FIX_SHEAR_HISTORY_OMP_H
+#define LMP_FIX_SHEAR_HISTORY_OMP_H
+
+#include "fix_shear_history.h"
+
+namespace LAMMPS_NS {
+
+class FixShearHistoryOMP : public FixShearHistory {
+
+ public:
+  FixShearHistoryOMP(class LAMMPS *lmp, int narg, char **argv)
+    : FixShearHistory(lmp,narg,argv) {};
+  virtual void pre_exchange();
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_adp_omp.cpp b/src/USER-OMP/pair_adp_omp.cpp
new file mode 100644
index 000000000..e91642e6b
--- /dev/null
+++ b/src/USER-OMP/pair_adp_omp.cpp
@@ -0,0 +1,404 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   This software is distributed under the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "string.h"
+
+#include "pair_adp_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "memory.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+PairADPOMP::PairADPOMP(LAMMPS *lmp) :
+  PairADP(lmp), ThrOMP(lmp, PAIR)
+{
+  respa_enable = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairADPOMP::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+    ev_setup_thr(this);
+  } else evflag = vflag_fdotr = eflag_global = eflag_atom = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = list->inum;
+
+  // grow energy and fp arrays if necessary
+  // need to be atom->nmax in length
+
+  if (atom->nmax > nmax) {
+    memory->destroy(rho);
+    memory->destroy(fp);
+    memory->destroy(mu);
+    memory->destroy(lambda);
+    nmax = atom->nmax;
+    memory->create(rho,nthreads*nmax,"pair:rho");
+    memory->create(fp,nmax,"pair:fp");
+    memory->create(mu,nthreads*nmax,3,"pair:mu");
+    memory->create(lambda,nthreads*nmax,6,"pair:lambda");
+  }
+
+#if defined(_OPENMP)
+#pragma omp parallel default(shared)
+#endif
+  {
+    int ifrom, ito, tid;
+    double **f, *rho_t, **mu_t, **lambda_t;
+
+    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    if (force->newton_pair) {
+      rho_t = rho + tid*nall;
+      mu_t = mu + tid*nall;
+      lambda_t = lambda + tid*nall;
+    } else {
+      rho_t = rho + tid*atom->nlocal;
+      mu_t = mu + tid*atom->nlocal;
+      lambda_t = lambda + tid*atom->nlocal;
+    }
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_pair) eval<1,1,1>(f, rho_t, mu_t, lambda_t, ifrom, ito, tid);
+	else eval<1,1,0>(f, rho_t, mu_t, lambda_t, ifrom, ito, tid);
+      } else {
+	if (force->newton_pair) eval<1,0,1>(f, rho_t, mu_t, lambda_t, ifrom, ito, tid);
+	else eval<1,0,0>(f, rho_t, mu_t, lambda_t, ifrom, ito, tid);
+      }
+    } else {
+      if (force->newton_pair) eval<0,0,1>(f, rho_t, mu_t, lambda_t, ifrom, ito, tid);
+      else eval<0,0,0>(f, rho_t, mu_t, lambda_t, ifrom, ito, tid);
+    }
+
+    // reduce per thread forces into global force array.
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+  } // end of omp parallel region
+
+  // reduce per thread energy and virial, if requested.
+  if (evflag) ev_reduce_thr(this);
+  if (vflag_fdotr) virial_fdotr_compute();
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+void PairADPOMP::eval(double **f, double *rho_t, double **mu_t, 
+		      double **lambda_t, int iifrom, int iito, int tid)
+{
+  int i,j,ii,jj,m,jnum,itype,jtype;
+  double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
+  double rsq,r,p,rhoip,rhojp,z2,z2p,recip,phip,psip,phi;
+  double u2,u2p,w2,w2p,nu;
+  double *coeff;
+  int *ilist,*jlist,*numneigh,**firstneigh;
+  double delmux,delmuy,delmuz,trdelmu,tradellam;
+  double adpx,adpy,adpz,fx,fy,fz;
+  double sumlamxx,sumlamyy,sumlamzz,sumlamyz,sumlamxz,sumlamxy;
+
+  evdwl = 0.0;
+
+  double **x = atom->x;
+  int *type = atom->type;
+  int nlocal = atom->nlocal;
+  int nall = nlocal + atom->nghost;
+
+  double fxtmp,fytmp,fztmp;
+
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+
+  // zero out density 
+
+  if (NEWTON_PAIR) {
+    memset(rho_t, 0, nall*sizeof(double));
+    memset(&(mu_t[0][0]), 0, 3*nall*sizeof(double));
+    memset(&(lambda_t[0][0]), 0, 6*nall*sizeof(double));
+  } else {
+    memset(rho_t, 0, nlocal*sizeof(double));
+    memset(&(mu_t[0][0]), 0, 3*nlocal*sizeof(double));
+    memset(&(lambda_t[0][0]), 0, 6*nlocal*sizeof(double));
+  }
+
+  // rho = density at each atom
+  // loop over neighbors of my atoms
+
+  for (ii = iifrom; ii < iito; ii++) {
+    i = ilist[ii];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    itype = type[i];
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      j &= NEIGHMASK;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+
+      if (rsq < cutforcesq) {
+	jtype = type[j];
+	p = sqrt(rsq)*rdr + 1.0;
+	m = static_cast<int> (p);
+	m = MIN(m,nr-1);
+	p -= m;
+	p = MIN(p,1.0);
+	coeff = rhor_spline[type2rhor[jtype][itype]][m];
+	rho_t[i] += ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6];
+        coeff = u2r_spline[type2u2r[jtype][itype]][m];
+	u2 = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6];
+        mu_t[i][0] += u2*delx;
+        mu_t[i][1] += u2*dely;
+        mu_t[i][2] += u2*delz;
+        coeff = w2r_spline[type2w2r[jtype][itype]][m];
+	w2 = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6];
+        lambda_t[i][0] += w2*delx*delx;
+        lambda_t[i][1] += w2*dely*dely;
+        lambda_t[i][2] += w2*delz*delz;
+        lambda_t[i][3] += w2*dely*delz;
+        lambda_t[i][4] += w2*delx*delz;
+        lambda_t[i][5] += w2*delx*dely;
+
+	if (NEWTON_PAIR || j < nlocal) {
+          // verify sign difference for mu and lambda
+	  coeff = rhor_spline[type2rhor[itype][jtype]][m];
+	  rho_t[j] += ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6];
+          coeff = u2r_spline[type2u2r[itype][jtype]][m];
+          u2 = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6];
+          mu_t[j][0] -= u2*delx;
+          mu_t[j][1] -= u2*dely;
+          mu_t[j][2] -= u2*delz;
+          coeff = w2r_spline[type2w2r[itype][jtype]][m];
+	  w2 = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6];
+          lambda_t[j][0] += w2*delx*delx;
+          lambda_t[j][1] += w2*dely*dely;
+          lambda_t[j][2] += w2*delz*delz;
+          lambda_t[j][3] += w2*dely*delz;
+          lambda_t[j][4] += w2*delx*delz;
+          lambda_t[j][5] += w2*delx*dely;
+	}
+      }
+    }
+  }
+
+  // wait until all threads are done with computation
+  sync_threads();
+
+  // communicate and sum densities
+
+  if (NEWTON_PAIR) {
+    // reduce per thread density
+    data_reduce_thr(&(rho[0]), nall, comm->nthreads, 1, tid);
+    data_reduce_thr(&(mu[0][0]), nall, comm->nthreads, 3, tid);
+    data_reduce_thr(&(lambda[0][0]), nall, comm->nthreads, 6, tid);
+
+    // wait until reduction is complete
+    sync_threads();
+
+#if defined(_OPENMP)
+#pragma omp master
+#endif
+    { comm->reverse_comm_pair(this); }
+
+    // wait until master thread is done with communication
+    sync_threads();
+  
+  } else {
+    // reduce per thread density
+    data_reduce_thr(&(rho[0]), nlocal, comm->nthreads, 1, tid);
+    data_reduce_thr(&(mu[0][0]), nlocal, comm->nthreads, 3, tid);
+    data_reduce_thr(&(lambda[0][0]), nlocal, comm->nthreads, 6, tid);
+
+    // wait until reduction is complete
+    sync_threads();
+  }
+  
+  // fp = derivative of embedding energy at each atom
+  // phi = embedding energy at each atom
+
+  for (ii = iifrom; ii < iito; ii++) {
+    i = ilist[ii];
+    p = rho[i]*rdrho + 1.0;
+    m = static_cast<int> (p);
+    m = MAX(1,MIN(m,nrho-1));
+    p -= m;
+    p = MIN(p,1.0);
+    coeff = frho_spline[type2frho[type[i]]][m];
+    fp[i] = (coeff[0]*p + coeff[1])*p + coeff[2];
+    if (EFLAG) {
+      phi = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6];
+      phi += 0.5*(mu[i][0]*mu[i][0]+mu[i][1]*mu[i][1]+mu[i][2]*mu[i][2]);
+      phi += 0.5*(lambda[i][0]*lambda[i][0]+lambda[i][1]*
+		  lambda[i][1]+lambda[i][2]*lambda[i][2]);
+      phi += 1.0*(lambda[i][3]*lambda[i][3]+lambda[i][4]*
+		  lambda[i][4]+lambda[i][5]*lambda[i][5]);
+      phi -= 1.0/6.0*(lambda[i][0]+lambda[i][1]+lambda[i][2])*
+	(lambda[i][0]+lambda[i][1]+lambda[i][2]);
+      if (eflag_global) eng_vdwl_thr[tid] += phi;
+      if (eflag_atom) eatom_thr[tid][i] += phi;
+    }
+  }
+
+  // wait until all theads are done with computation
+  sync_threads();
+
+  // communicate derivative of embedding function
+  // MPI communication only on master thread
+#if defined(_OPENMP)
+#pragma omp master
+#endif
+  { comm->forward_comm_pair(this); }
+
+  // wait until master thread is done with communication
+  sync_threads();
+
+  // compute forces on each atom
+  // loop over neighbors of my atoms
+
+  for (ii = iifrom; ii < iito; ii++) {
+    i = ilist[ii];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    itype = type[i];
+    fxtmp = fytmp = fztmp = 0.0;
+
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      j &= NEIGHMASK;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+
+      if (rsq < cutforcesq) {
+	jtype = type[j];
+	r = sqrt(rsq);
+	p = r*rdr + 1.0;
+	m = static_cast<int> (p);
+	m = MIN(m,nr-1);
+	p -= m;
+	p = MIN(p,1.0);
+
+	// rhoip = derivative of (density at atom j due to atom i)
+	// rhojp = derivative of (density at atom i due to atom j)
+	// phi = pair potential energy
+	// phip = phi'
+	// z2 = phi * r
+	// z2p = (phi * r)' = (phi' r) + phi
+        // u2 = u
+        // u2p = u'
+        // w2 = w
+        // w2p = w'
+	// psip needs both fp[i] and fp[j] terms since r_ij appears in two
+	//   terms of embed eng: Fi(sum rho_ij) and Fj(sum rho_ji)
+	//   hence embed' = Fi(sum rho_ij) rhojp + Fj(sum rho_ji) rhoip
+
+	coeff = rhor_spline[type2rhor[itype][jtype]][m];
+	rhoip = (coeff[0]*p + coeff[1])*p + coeff[2];
+	coeff = rhor_spline[type2rhor[jtype][itype]][m];
+	rhojp = (coeff[0]*p + coeff[1])*p + coeff[2];
+	coeff = z2r_spline[type2z2r[itype][jtype]][m];
+	z2p = (coeff[0]*p + coeff[1])*p + coeff[2];
+	z2 = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6];
+        coeff = u2r_spline[type2u2r[itype][jtype]][m];
+	u2p = (coeff[0]*p + coeff[1])*p + coeff[2];
+	u2 = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6];
+        coeff = w2r_spline[type2w2r[itype][jtype]][m];
+	w2p = (coeff[0]*p + coeff[1])*p + coeff[2];
+	w2 = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6];
+
+	recip = 1.0/r;
+	phi = z2*recip;
+	phip = z2p*recip - phi*recip;
+	psip = fp[i]*rhojp + fp[j]*rhoip + phip;
+	fpair = -psip*recip;
+
+        delmux = mu[i][0]-mu[j][0];
+	delmuy = mu[i][1]-mu[j][1];
+	delmuz = mu[i][2]-mu[j][2];
+        trdelmu = delmux*delx+delmuy*dely+delmuz*delz;
+        sumlamxx = lambda[i][0]+lambda[j][0];
+        sumlamyy = lambda[i][1]+lambda[j][1];
+        sumlamzz = lambda[i][2]+lambda[j][2];
+        sumlamyz = lambda[i][3]+lambda[j][3];
+        sumlamxz = lambda[i][4]+lambda[j][4];
+        sumlamxy = lambda[i][5]+lambda[j][5];
+        tradellam = sumlamxx*delx*delx+sumlamyy*dely*dely+
+	  sumlamzz*delz*delz+2.0*sumlamxy*delx*dely+
+	  2.0*sumlamxz*delx*delz+2.0*sumlamyz*dely*delz;
+        nu = sumlamxx+sumlamyy+sumlamzz;
+
+        adpx = delmux*u2 + trdelmu*u2p*delx*recip + 
+	  2.0*w2*(sumlamxx*delx+sumlamxy*dely+sumlamxz*delz) + 
+	  w2p*delx*recip*tradellam - 1.0/3.0*nu*(w2p*r+2.0*w2)*delx;
+        adpy = delmuy*u2 + trdelmu*u2p*dely*recip + 
+	  2.0*w2*(sumlamxy*delx+sumlamyy*dely+sumlamyz*delz) + 
+	  w2p*dely*recip*tradellam - 1.0/3.0*nu*(w2p*r+2.0*w2)*dely;
+        adpz = delmuz*u2 + trdelmu*u2p*delz*recip + 
+	  2.0*w2*(sumlamxz*delx+sumlamyz*dely+sumlamzz*delz) + 
+	  w2p*delz*recip*tradellam - 1.0/3.0*nu*(w2p*r+2.0*w2)*delz;
+        adpx*=-1.0; adpy*=-1.0; adpz*=-1.0;
+
+        fx = delx*fpair+adpx;
+	fy = dely*fpair+adpy;
+	fz = delz*fpair+adpz;
+
+	fxtmp += fx;
+	fytmp += fy;
+	fztmp += fz;
+	if (NEWTON_PAIR || j < nlocal) {
+	  f[j][0] -= fx;
+	  f[j][1] -= fy;
+	  f[j][2] -= fz;
+	}
+
+	if (EFLAG) evdwl = phi;
+	if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,NEWTON_PAIR,evdwl,0.0,
+				     fx,fy,fz,delx,dely,delz,tid);
+      }
+    }
+    f[i][0] += fxtmp;
+    f[i][1] += fytmp;
+    f[i][2] += fztmp;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairADPOMP::memory_usage()
+{
+  double bytes = memory_usage_thr();
+  bytes += PairADP::memory_usage();
+
+  return bytes;
+}
diff --git a/src/USER-OMP/pair_adp_omp.h b/src/USER-OMP/pair_adp_omp.h
new file mode 100644
index 000000000..f7d2509cd
--- /dev/null
+++ b/src/USER-OMP/pair_adp_omp.h
@@ -0,0 +1,49 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(adp/omp,PairADPOMP)
+
+#else
+
+#ifndef LMP_PAIR_ADP_OMP_H
+#define LMP_PAIR_ADP_OMP_H
+
+#include "pair_adp.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairADPOMP : public PairADP, public ThrOMP {
+
+ public:
+  PairADPOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(double **f, double *rho_t, double **mu_t, double **lambda_t, 
+	    int iifrom, int iito, int tid);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_born_coul_long_omp.cpp
similarity index 61%
copy from src/USER-OMP/pair_lj_cut_omp.cpp
copy to src/USER-OMP/pair_born_coul_long_omp.cpp
index 8ed82c5e5..c277a080c 100644
--- a/src/USER-OMP/pair_lj_cut_omp.cpp
+++ b/src/USER-OMP/pair_born_coul_long_omp.cpp
@@ -1,163 +1,199 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    This software is distributed under the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Axel Kohlmeyer (Temple U)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
-#include "pair_lj_cut_omp.h"
+#include "pair_born_coul_long_omp.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
-#include "memory.h"
-#include "error.h"
 
 using namespace LAMMPS_NS;
 
+#define EWALD_F   1.12837917
+#define EWALD_P   0.3275911
+#define A1        0.254829592
+#define A2       -0.284496736
+#define A3        1.421413741
+#define A4       -1.453152027
+#define A5        1.061405429
+
 /* ---------------------------------------------------------------------- */
 
-PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) :
-  PairLJCut(lmp), ThrOMP(lmp, PAIR)
+PairBornCoulLongOMP::PairBornCoulLongOMP(LAMMPS *lmp) :
+  PairBornCoulLong(lmp), ThrOMP(lmp, PAIR)
 {
   respa_enable = 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
-void PairLJCutOMP::compute(int eflag, int vflag)
+void PairBornCoulLongOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
     ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
   const int nthreads = comm->nthreads;
   const int inum = list->inum;
 
 #if defined(_OPENMP)
 #pragma omp parallel default(shared)
 #endif
   {
     int ifrom, ito, tid;
     double **f;
 
     f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
 
     if (evflag) {
       if (eflag) {
 	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
 	else eval<1,1,0>(f, ifrom, ito, tid);
       } else {
 	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
 	else eval<1,0,0>(f, ifrom, ito, tid);
       }
     } else {
       if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
       else eval<0,0,0>(f, ifrom, ito, tid);
     }
 
     // reduce per thread forces into global force array.
-    force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid);
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
   } // end of omp parallel region
 
   // reduce per thread energy and virial, if requested.
   if (evflag) ev_reduce_thr(this);
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
+/* ---------------------------------------------------------------------- */
+
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairBornCoulLongOMP::eval(double **f, int iifrom, int iito, int tid)
 {
   int i,j,ii,jj,jnum,itype,jtype;
-  double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
-  double rsq,r2inv,r6inv,forcelj,factor_lj;
+  double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
+  double rsq,r2inv,r6inv,r,rexp,forcecoul,forceborn,factor_coul,factor_lj;
+  double grij,expm2,prefactor,t,erfc;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
-  evdwl = 0.0;
+  evdwl = ecoul = 0.0;
 
   double **x = atom->x;
+  double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
+  double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
+  double qqrd2e = force->qqrd2e;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = iifrom; ii < iito; ++ii) {
 
     i = ilist[ii];
+    qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
     fxtmp=fytmp=fztmp=0.0;
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
+      factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
 	r2inv = 1.0/rsq;
-	r6inv = r2inv*r2inv*r2inv;
-	forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
-	fpair = factor_lj*forcelj*r2inv;
+	r = sqrt(rsq);
+
+	if (rsq < cut_coulsq) {
+	  grij = g_ewald * r;
+	  expm2 = exp(-grij*grij);
+	  t = 1.0 / (1.0 + EWALD_P*grij);
+	  erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
+	  prefactor = qqrd2e * qtmp*q[j]/r;
+	  forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
+	  if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
+	} else forcecoul = 0.0;
+
+	if (rsq < cut_ljsq[itype][jtype]) {
+	  r6inv = r2inv*r2inv*r2inv;
+	  rexp = exp((sigma[itype][jtype]-r)*rhoinv[itype][jtype]);
+	  forceborn = born1[itype][jtype]*r*rexp - born2[itype][jtype]*r6inv
+	    + born3[itype][jtype]*r2inv*r6inv;
+	} else forceborn = 0.0;
+	
+	fpair = (forcecoul + factor_lj*forceborn)*r2inv;
 
 	fxtmp += delx*fpair;
 	fytmp += dely*fpair;
 	fztmp += delz*fpair;
 	if (NEWTON_PAIR || j < nlocal) {
 	  f[j][0] -= delx*fpair;
 	  f[j][1] -= dely*fpair;
 	  f[j][2] -= delz*fpair;
 	}
 
 	if (EFLAG) {
-	  evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype])
-	    - offset[itype][jtype];
-	  evdwl *= factor_lj;
-	}
+	  if (rsq < cut_coulsq) {
+	    ecoul = prefactor*erfc;
+	    if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
+	  } else ecoul = 0.0;
+	  if (rsq < cut_ljsq[itype][jtype]) {
+	    evdwl = a[itype][jtype]*rexp - c[itype][jtype]*r6inv 
+	      + d[itype][jtype]*r6inv*r2inv - offset[itype][jtype];
+	    evdwl *= factor_lj;
+	  }
+	} else evdwl = 0.0;
 
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,0.0,fpair,delx,dely,delz,tid);
+				 evdwl,ecoul,fpair,delx,dely,delz,tid);
       }
     }
     f[i][0] += fxtmp;
     f[i][1] += fytmp;
     f[i][2] += fztmp;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
-double PairLJCutOMP::memory_usage()
+double PairBornCoulLongOMP::memory_usage()
 {
   double bytes = memory_usage_thr();
-  bytes += PairLJCut::memory_usage();
+  bytes += PairBornCoulLong::memory_usage();
 
   return bytes;
 }
diff --git a/src/USER-OMP/pair_born_coul_long_omp.h b/src/USER-OMP/pair_born_coul_long_omp.h
new file mode 100644
index 000000000..d6ccbfc68
--- /dev/null
+++ b/src/USER-OMP/pair_born_coul_long_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(born/coul/long/omp,PairBornCoulLongOMP)
+
+#else
+
+#ifndef LMP_PAIR_BORN_COUL_LONG_OMP_H
+#define LMP_PAIR_BORN_COUL_LONG_OMP_H
+
+#include "pair_born_coul_long.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairBornCoulLongOMP : public PairBornCoulLong, public ThrOMP {
+
+ public:
+  PairBornCoulLongOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(double **f, int ifrom, int ito, int tid);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_born_omp.cpp
similarity index 83%
copy from src/USER-OMP/pair_lj_cut_omp.cpp
copy to src/USER-OMP/pair_born_omp.cpp
index 8ed82c5e5..c39d205c9 100644
--- a/src/USER-OMP/pair_lj_cut_omp.cpp
+++ b/src/USER-OMP/pair_born_omp.cpp
@@ -1,163 +1,163 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    This software is distributed under the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Axel Kohlmeyer (Temple U)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
-#include "pair_lj_cut_omp.h"
+#include "pair_born_omp.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
-#include "memory.h"
-#include "error.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
-PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) :
-  PairLJCut(lmp), ThrOMP(lmp, PAIR)
+PairBornOMP::PairBornOMP(LAMMPS *lmp) :
+  PairBorn(lmp), ThrOMP(lmp, PAIR)
 {
   respa_enable = 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
-void PairLJCutOMP::compute(int eflag, int vflag)
+void PairBornOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
     ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
   const int nthreads = comm->nthreads;
   const int inum = list->inum;
 
 #if defined(_OPENMP)
 #pragma omp parallel default(shared)
 #endif
   {
     int ifrom, ito, tid;
     double **f;
 
     f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
 
     if (evflag) {
       if (eflag) {
 	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
 	else eval<1,1,0>(f, ifrom, ito, tid);
       } else {
 	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
 	else eval<1,0,0>(f, ifrom, ito, tid);
       }
     } else {
       if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
       else eval<0,0,0>(f, ifrom, ito, tid);
     }
 
     // reduce per thread forces into global force array.
-    force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid);
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
   } // end of omp parallel region
 
   // reduce per thread energy and virial, if requested.
   if (evflag) ev_reduce_thr(this);
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairBornOMP::eval(double **f, int iifrom, int iito, int tid)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
-  double rsq,r2inv,r6inv,forcelj,factor_lj;
+  double rsq,r2inv,r6inv,r,rexp,forceborn,factor_lj;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   evdwl = 0.0;
 
   double **x = atom->x;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_lj = force->special_lj;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = iifrom; ii < iito; ++ii) {
 
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
     fxtmp=fytmp=fztmp=0.0;
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
 	r2inv = 1.0/rsq;
 	r6inv = r2inv*r2inv*r2inv;
-	forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
-	fpair = factor_lj*forcelj*r2inv;
+	r = sqrt(rsq);
+	rexp = exp((sigma[itype][jtype]-r)*rhoinv[itype][jtype]);
+	forceborn = born1[itype][jtype]*r*rexp - born2[itype][jtype]*r6inv
+	  + born3[itype][jtype]*r2inv*r6inv;
+	fpair = factor_lj*forceborn*r2inv;
 
 	fxtmp += delx*fpair;
 	fytmp += dely*fpair;
 	fztmp += delz*fpair;
 	if (NEWTON_PAIR || j < nlocal) {
 	  f[j][0] -= delx*fpair;
 	  f[j][1] -= dely*fpair;
 	  f[j][2] -= delz*fpair;
 	}
 
 	if (EFLAG) {
-	  evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype])
-	    - offset[itype][jtype];
+	  evdwl = a[itype][jtype]*rexp - c[itype][jtype]*r6inv 
+	    + d[itype][jtype]*r6inv*r2inv - offset[itype][jtype];
 	  evdwl *= factor_lj;
 	}
 
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
 				 evdwl,0.0,fpair,delx,dely,delz,tid);
       }
     }
     f[i][0] += fxtmp;
     f[i][1] += fytmp;
     f[i][2] += fztmp;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
-double PairLJCutOMP::memory_usage()
+double PairBornOMP::memory_usage()
 {
   double bytes = memory_usage_thr();
-  bytes += PairLJCut::memory_usage();
+  bytes += PairBorn::memory_usage();
 
   return bytes;
 }
diff --git a/src/USER-OMP/pair_born_omp.h b/src/USER-OMP/pair_born_omp.h
new file mode 100644
index 000000000..b24de4a57
--- /dev/null
+++ b/src/USER-OMP/pair_born_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(born/omp,PairBornOMP)
+
+#else
+
+#ifndef LMP_PAIR_BORN_OMP_H
+#define LMP_PAIR_BORN_OMP_H
+
+#include "pair_born.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairBornOMP : public PairBorn, public ThrOMP {
+
+ public:
+  PairBornOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(double **f, int ifrom, int ito, int tid);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_buck_coul_cut_omp.cpp
similarity index 69%
copy from src/USER-OMP/pair_lj_cut_omp.cpp
copy to src/USER-OMP/pair_buck_coul_cut_omp.cpp
index 8ed82c5e5..ac47d478a 100644
--- a/src/USER-OMP/pair_lj_cut_omp.cpp
+++ b/src/USER-OMP/pair_buck_coul_cut_omp.cpp
@@ -1,163 +1,182 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    This software is distributed under the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Axel Kohlmeyer (Temple U)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
-#include "pair_lj_cut_omp.h"
+#include "pair_buck_coul_cut_omp.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
-#include "memory.h"
-#include "error.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
-PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) :
-  PairLJCut(lmp), ThrOMP(lmp, PAIR)
+PairBuckCoulCutOMP::PairBuckCoulCutOMP(LAMMPS *lmp) :
+  PairBuckCoulCut(lmp), ThrOMP(lmp, PAIR)
 {
   respa_enable = 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
-void PairLJCutOMP::compute(int eflag, int vflag)
+void PairBuckCoulCutOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
     ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
   const int nthreads = comm->nthreads;
   const int inum = list->inum;
 
 #if defined(_OPENMP)
 #pragma omp parallel default(shared)
 #endif
   {
     int ifrom, ito, tid;
     double **f;
 
     f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
 
     if (evflag) {
       if (eflag) {
 	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
 	else eval<1,1,0>(f, ifrom, ito, tid);
       } else {
 	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
 	else eval<1,0,0>(f, ifrom, ito, tid);
       }
     } else {
       if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
       else eval<0,0,0>(f, ifrom, ito, tid);
     }
 
     // reduce per thread forces into global force array.
-    force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid);
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
   } // end of omp parallel region
 
   // reduce per thread energy and virial, if requested.
   if (evflag) ev_reduce_thr(this);
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
+/* ---------------------------------------------------------------------- */
+
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairBuckCoulCutOMP::eval(double **f, int iifrom, int iito, int tid)
 {
   int i,j,ii,jj,jnum,itype,jtype;
-  double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
-  double rsq,r2inv,r6inv,forcelj,factor_lj;
+  double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
+  double rsq,r2inv,r6inv,r,rexp,forcecoul,forcebuck,factor_coul,factor_lj;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
-  evdwl = 0.0;
+  evdwl = ecoul = 0.0;
 
   double **x = atom->x;
+  double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
+  double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
+  double qqrd2e = force->qqrd2e;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = iifrom; ii < iito; ++ii) {
 
     i = ilist[ii];
+    qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
     fxtmp=fytmp=fztmp=0.0;
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
+      factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
 	r2inv = 1.0/rsq;
-	r6inv = r2inv*r2inv*r2inv;
-	forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
-	fpair = factor_lj*forcelj*r2inv;
+	r = sqrt(rsq);
+
+	if (rsq < cut_coulsq[itype][jtype])
+	  forcecoul = qqrd2e * qtmp*q[j]/r;
+	else forcecoul = 0.0;
+
+	if (rsq < cut_ljsq[itype][jtype]) {
+	  r6inv = r2inv*r2inv*r2inv;
+	  rexp = exp(-r*rhoinv[itype][jtype]);
+	  forcebuck = buck1[itype][jtype]*r*rexp - buck2[itype][jtype]*r6inv;
+	} else forcebuck = 0.0;
+	
+	fpair = (forcecoul + factor_lj*forcebuck)*r2inv;
 
 	fxtmp += delx*fpair;
 	fytmp += dely*fpair;
 	fztmp += delz*fpair;
 	if (NEWTON_PAIR || j < nlocal) {
 	  f[j][0] -= delx*fpair;
 	  f[j][1] -= dely*fpair;
 	  f[j][2] -= delz*fpair;
 	}
 
 	if (EFLAG) {
-	  evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype])
-	    - offset[itype][jtype];
-	  evdwl *= factor_lj;
-	}
+	  if (rsq < cut_coulsq[itype][jtype])
+	    ecoul = factor_coul * qqrd2e * qtmp*q[j]/r;
+	  else ecoul = 0.0;
+	  if (rsq < cut_ljsq[itype][jtype]) {
+	    evdwl = a[itype][jtype]*rexp - c[itype][jtype]*r6inv -
+	      offset[itype][jtype];
+	    evdwl *= factor_lj;
+	  }
+	} else evdwl = 0.0;
 
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,0.0,fpair,delx,dely,delz,tid);
+				 evdwl,ecoul,fpair,delx,dely,delz,tid);
       }
     }
     f[i][0] += fxtmp;
     f[i][1] += fytmp;
     f[i][2] += fztmp;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
-double PairLJCutOMP::memory_usage()
+double PairBuckCoulCutOMP::memory_usage()
 {
   double bytes = memory_usage_thr();
-  bytes += PairLJCut::memory_usage();
+  bytes += PairBuckCoulCut::memory_usage();
 
   return bytes;
 }
diff --git a/src/USER-OMP/pair_buck_coul_cut_omp.h b/src/USER-OMP/pair_buck_coul_cut_omp.h
new file mode 100644
index 000000000..a77f3bad2
--- /dev/null
+++ b/src/USER-OMP/pair_buck_coul_cut_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(buck/coul/cut/omp,PairBuckCoulCutOMP)
+
+#else
+
+#ifndef LMP_PAIR_BUCK_COUL_CUT_OMP_H
+#define LMP_PAIR_BUCK_COUL_CUT_OMP_H
+
+#include "pair_buck_coul_cut.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairBuckCoulCutOMP : public PairBuckCoulCut, public ThrOMP {
+
+ public:
+  PairBuckCoulCutOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(double **f, int ifrom, int ito, int tid);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_buck_coul_long_omp.cpp
similarity index 62%
copy from src/USER-OMP/pair_lj_cut_omp.cpp
copy to src/USER-OMP/pair_buck_coul_long_omp.cpp
index 8ed82c5e5..6e7398ca4 100644
--- a/src/USER-OMP/pair_lj_cut_omp.cpp
+++ b/src/USER-OMP/pair_buck_coul_long_omp.cpp
@@ -1,163 +1,198 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    This software is distributed under the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Axel Kohlmeyer (Temple U)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
-#include "pair_lj_cut_omp.h"
+#include "pair_buck_coul_long_omp.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
-#include "memory.h"
-#include "error.h"
 
 using namespace LAMMPS_NS;
 
+#define EWALD_F   1.12837917
+#define EWALD_P   0.3275911
+#define A1        0.254829592
+#define A2       -0.284496736
+#define A3        1.421413741
+#define A4       -1.453152027
+#define A5        1.061405429
+
 /* ---------------------------------------------------------------------- */
 
-PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) :
-  PairLJCut(lmp), ThrOMP(lmp, PAIR)
+PairBuckCoulLongOMP::PairBuckCoulLongOMP(LAMMPS *lmp) :
+  PairBuckCoulLong(lmp), ThrOMP(lmp, PAIR)
 {
   respa_enable = 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
-void PairLJCutOMP::compute(int eflag, int vflag)
+void PairBuckCoulLongOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
     ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
   const int nthreads = comm->nthreads;
   const int inum = list->inum;
 
 #if defined(_OPENMP)
 #pragma omp parallel default(shared)
 #endif
   {
     int ifrom, ito, tid;
     double **f;
 
     f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
 
     if (evflag) {
       if (eflag) {
 	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
 	else eval<1,1,0>(f, ifrom, ito, tid);
       } else {
 	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
 	else eval<1,0,0>(f, ifrom, ito, tid);
       }
     } else {
       if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
       else eval<0,0,0>(f, ifrom, ito, tid);
     }
 
     // reduce per thread forces into global force array.
-    force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid);
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
   } // end of omp parallel region
 
   // reduce per thread energy and virial, if requested.
   if (evflag) ev_reduce_thr(this);
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
+/* ---------------------------------------------------------------------- */
+
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairBuckCoulLongOMP::eval(double **f, int iifrom, int iito, int tid)
 {
   int i,j,ii,jj,jnum,itype,jtype;
-  double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
-  double rsq,r2inv,r6inv,forcelj,factor_lj;
+  double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
+  double rsq,r2inv,r6inv,r,rexp,forcecoul,forcebuck,factor_coul,factor_lj;
+  double grij,expm2,prefactor,t,erfc;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
-  evdwl = 0.0;
+  evdwl = ecoul = 0.0;
 
   double **x = atom->x;
+  double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
+  double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
+  double qqrd2e = force->qqrd2e;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = iifrom; ii < iito; ++ii) {
 
     i = ilist[ii];
+    qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
     fxtmp=fytmp=fztmp=0.0;
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
+      factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
 	r2inv = 1.0/rsq;
-	r6inv = r2inv*r2inv*r2inv;
-	forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
-	fpair = factor_lj*forcelj*r2inv;
+	r = sqrt(rsq);
+
+	if (rsq < cut_coulsq) {
+	  grij = g_ewald * r;
+	  expm2 = exp(-grij*grij);
+	  t = 1.0 / (1.0 + EWALD_P*grij);
+	  erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
+	  prefactor = qqrd2e * qtmp*q[j]/r;
+	  forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
+	  if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
+	} else forcecoul = 0.0;
+
+	if (rsq < cut_ljsq[itype][jtype]) {
+	  r6inv = r2inv*r2inv*r2inv;
+	  rexp = exp(-r*rhoinv[itype][jtype]);
+	  forcebuck = buck1[itype][jtype]*r*rexp - buck2[itype][jtype]*r6inv;
+	} else forcebuck = 0.0;
+	
+	fpair = (forcecoul + factor_lj*forcebuck)*r2inv;
 
 	fxtmp += delx*fpair;
 	fytmp += dely*fpair;
 	fztmp += delz*fpair;
 	if (NEWTON_PAIR || j < nlocal) {
 	  f[j][0] -= delx*fpair;
 	  f[j][1] -= dely*fpair;
 	  f[j][2] -= delz*fpair;
 	}
 
 	if (EFLAG) {
-	  evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype])
-	    - offset[itype][jtype];
-	  evdwl *= factor_lj;
-	}
+	  if (rsq < cut_coulsq) {
+	    ecoul = prefactor*erfc;
+	    if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
+	  } else ecoul = 0.0;
+	  if (rsq < cut_ljsq[itype][jtype]) {
+	    evdwl = a[itype][jtype]*rexp - c[itype][jtype]*r6inv -
+	      offset[itype][jtype];
+	    evdwl *= factor_lj;
+	  }
+	} else evdwl = 0.0;
 
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,0.0,fpair,delx,dely,delz,tid);
+				 evdwl,ecoul,fpair,delx,dely,delz,tid);
       }
     }
     f[i][0] += fxtmp;
     f[i][1] += fytmp;
     f[i][2] += fztmp;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
-double PairLJCutOMP::memory_usage()
+double PairBuckCoulLongOMP::memory_usage()
 {
   double bytes = memory_usage_thr();
-  bytes += PairLJCut::memory_usage();
+  bytes += PairBuckCoulLong::memory_usage();
 
   return bytes;
 }
diff --git a/src/USER-OMP/pair_buck_coul_long_omp.h b/src/USER-OMP/pair_buck_coul_long_omp.h
new file mode 100644
index 000000000..2c87904de
--- /dev/null
+++ b/src/USER-OMP/pair_buck_coul_long_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(buck/coul/long/omp,PairBuckCoulLongOMP)
+
+#else
+
+#ifndef LMP_PAIR_BUCK_COUL_LONG_OMP_H
+#define LMP_PAIR_BUCK_COUL_LONG_OMP_H
+
+#include "pair_buck_coul_long.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairBuckCoulLongOMP : public PairBuckCoulLong, public ThrOMP {
+
+ public:
+  PairBuckCoulLongOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(double **f, int ifrom, int ito, int tid);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_buck_coul_omp.cpp b/src/USER-OMP/pair_buck_coul_omp.cpp
new file mode 100644
index 000000000..bd171f628
--- /dev/null
+++ b/src/USER-OMP/pair_buck_coul_omp.cpp
@@ -0,0 +1,230 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   This software is distributed under the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "pair_buck_coul_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "math_vector.h"
+#include "force.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+
+using namespace LAMMPS_NS;
+
+#define EWALD_F   1.12837917
+#define EWALD_P   0.3275911
+#define A1        0.254829592
+#define A2       -0.284496736
+#define A3        1.421413741
+#define A4       -1.453152027
+#define A5        1.061405429
+
+/* ---------------------------------------------------------------------- */
+
+PairBuckCoulOMP::PairBuckCoulOMP(LAMMPS *lmp) :
+  PairBuckCoul(lmp), ThrOMP(lmp, PAIR)
+{
+  respa_enable = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairBuckCoulOMP::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+    ev_setup_thr(this);
+  } else evflag = vflag_fdotr = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = list->inum;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(shared)
+#endif
+  {
+    int ifrom, ito, tid;
+    double **f;
+
+    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
+	else eval<1,1,0>(f, ifrom, ito, tid);
+      } else {
+	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
+	else eval<1,0,0>(f, ifrom, ito, tid);
+      }
+    } else {
+      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
+      else eval<0,0,0>(f, ifrom, ito, tid);
+    }
+
+    // reduce per thread forces into global force array.
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+  } // end of omp parallel region
+
+  // reduce per thread energy and virial, if requested.
+  if (evflag) ev_reduce_thr(this);
+  if (vflag_fdotr) virial_fdotr_compute();
+}
+
+/* ---------------------------------------------------------------------- */
+
+template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+void PairBuckCoulOMP::eval(double **f, int iifrom, int iito, int tid)
+{
+  double evdwl,ecoul,fpair;
+  evdwl = ecoul = 0.0;
+
+  double **x = atom->x;
+  double *q = atom->q;
+  int *type = atom->type;
+  int nlocal = atom->nlocal;
+  double *special_coul = force->special_coul;
+  double *special_lj = force->special_lj;
+  double qqrd2e = force->qqrd2e;
+
+  double *x0 = x[0];
+  double *f0 = f[0], *fi = f0;
+
+  int *ilist = list->ilist;
+
+  // loop over neighbors of my atoms
+
+  int i, ii, j, order1 = ewald_order&(1<<1), order6 = ewald_order&(1<<6);
+  int *jneigh, *jneighn, typei, typej, ni;
+  double qi, qri, *cutsqi, *cut_bucksqi,
+	 *buck1i, *buck2i, *buckai, *buckci, *rhoinvi, *offseti;
+  double r, rsq, r2inv, force_coul, force_buck;
+  double g2 = g_ewald*g_ewald, g6 = g2*g2*g2, g8 = g6*g2;
+  vector xi, d;
+
+  for (ii = iifrom; ii < iito; ++ii) {			// loop over my atoms
+    i = ilist[ii]; fi = f0+3*i;
+    if (order1) qri = (qi = q[i])*qqrd2e;		// initialize constants
+    offseti = offset[typei = type[i]];
+    buck1i = buck1[typei]; buck2i = buck2[typei];
+    buckai = buck_a[typei]; buckci = buck_c[typei], rhoinvi = rhoinv[typei];
+    cutsqi = cutsq[typei]; cut_bucksqi = cut_bucksq[typei];
+    memcpy(xi, x0+(i+(i<<1)), sizeof(vector));
+    jneighn = (jneigh = list->firstneigh[i])+list->numneigh[i];
+
+    for (; jneigh<jneighn; ++jneigh) {			// loop over neighbors
+      j = *jneigh;
+      ni = sbmask(j);
+      j &= NEIGHMASK;
+      
+      { register double *xj = x0+(j+(j<<1));
+	d[0] = xi[0] - xj[0];				// pair vector
+	d[1] = xi[1] - xj[1];
+	d[2] = xi[2] - xj[2]; }
+
+      if ((rsq = vec_dot(d, d)) >= cutsqi[typej = type[j]]) continue;
+      r2inv = 1.0/rsq;
+      r = sqrt(rsq);
+
+      if (order1 && (rsq < cut_coulsq)) {		// coulombic
+	if (!ncoultablebits || rsq <= tabinnersq) {	// series real space
+	  register double x = g_ewald*r;
+	  register double s = qri*q[j], t = 1.0/(1.0+EWALD_P*x);
+	  if (ni == 0) {
+	    s *= g_ewald*exp(-x*x);
+	    force_coul = (t *= ((((t*A5+A4)*t+A3)*t+A2)*t+A1)*s/x)+EWALD_F*s;
+	    if (EFLAG) ecoul = t;
+	  } else {					// special case
+	    register double f = s*(1.0-special_coul[ni])/r;
+	    s *= g_ewald*exp(-x*x);
+	    force_coul = (t *= ((((t*A5+A4)*t+A3)*t+A2)*t+A1)*s/x)+EWALD_F*s-f;
+	    if (EFLAG) ecoul = t-f;
+	  }					// table real space
+	} else {
+	  register union_int_float_t t;
+	  t.f = rsq;
+	  register const int k = (t.i & ncoulmask) >> ncoulshiftbits;
+	  register double f = (rsq-rtable[k])*drtable[k], qiqj = qi*q[j];
+	  if (ni == 0) {
+	    force_coul = qiqj*(ftable[k]+f*dftable[k]);
+	    if (EFLAG) ecoul = qiqj*(etable[k]+f*detable[k]);
+	  }
+	  else {					// special case
+	    t.f = (1.0-special_coul[ni])*(ctable[k]+f*dctable[k]);
+	    force_coul = qiqj*(ftable[k]+f*dftable[k]-t.f);
+	    if (EFLAG) ecoul = qiqj*(etable[k]+f*detable[k]-t.f);
+	  }
+	}
+      } else force_coul = ecoul = 0.0;
+
+      if (rsq < cut_bucksqi[typej]) {			// buckingham
+	register double rn = r2inv*r2inv*r2inv, 
+			expr = exp(-r*rhoinvi[typej]);
+	if (order6) {					// long-range
+	  register double x2 = g2*rsq, a2 = 1.0/x2;
+	  x2 = a2*exp(-x2)*buckci[typej];
+	  if (ni == 0) {
+	    force_buck =
+	      r*expr*buck1i[typej]-g8*(((6.0*a2+6.0)*a2+3.0)*a2+1.0)*x2*rsq;
+	    if (EFLAG) evdwl = expr*buckai[typej]-g6*((a2+1.0)*a2+0.5)*x2;
+	  } else {					// special case
+	    register double f = special_lj[ni], t = rn*(1.0-f);
+	    force_buck = f*r*expr*buck1i[typej]-
+	      g8*(((6.0*a2+6.0)*a2+3.0)*a2+1.0)*x2*rsq+t*buck2i[typej];
+	    if (EFLAG) evdwl = f*expr*buckai[typej] - 
+			 g6*((a2+1.0)*a2+0.5)*x2+t*buckci[typej];
+	  }
+	} else {						// cut
+	  if (ni == 0) {
+	    force_buck = r*expr*buck1i[typej]-rn*buck2i[typej];
+	    if (EFLAG) evdwl = expr*buckai[typej] - 
+			 rn*buckci[typej]-offseti[typej];
+	  } else {					// special case
+	    register double f = special_lj[ni];
+	    force_buck = f*(r*expr*buck1i[typej]-rn*buck2i[typej]);
+	    if (EFLAG) 
+	      evdwl = f*(expr*buckai[typej]-rn*buckci[typej]-offseti[typej]);
+	  }
+	}
+      } else force_buck = evdwl = 0.0;
+
+      fpair = (force_coul+force_buck)*r2inv;
+
+      if (NEWTON_PAIR || j < nlocal) {
+	register double *fj = f0+(j+(j<<1)), f;
+	fi[0] += f = d[0]*fpair; fj[0] -= f;
+	fi[1] += f = d[1]*fpair; fj[1] -= f;
+	fi[2] += f = d[2]*fpair; fj[2] -= f;
+      } else {
+	fi[0] += d[0]*fpair;
+	fi[1] += d[1]*fpair;
+	fi[2] += d[2]*fpair;
+      }
+
+      if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,
+			       evdwl,ecoul,fpair,d[0],d[1],d[2],tid);
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairBuckCoulOMP::memory_usage()
+{
+  double bytes = memory_usage_thr();
+  bytes += PairBuckCoul::memory_usage();
+
+  return bytes;
+}
diff --git a/src/USER-OMP/pair_buck_coul_omp.h b/src/USER-OMP/pair_buck_coul_omp.h
new file mode 100644
index 000000000..dbff9b419
--- /dev/null
+++ b/src/USER-OMP/pair_buck_coul_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(buck/coul/omp,PairBuckCoulOMP)
+
+#else
+
+#ifndef LMP_PAIR_BUCK_COUL_OMP_H
+#define LMP_PAIR_BUCK_COUL_OMP_H
+
+#include "pair_buck_coul.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairBuckCoulOMP : public PairBuckCoul, public ThrOMP {
+
+ public:
+  PairBuckCoulOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(double **f, int ifrom, int ito, int tid);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_buck_omp.cpp
similarity index 83%
copy from src/USER-OMP/pair_lj_cut_omp.cpp
copy to src/USER-OMP/pair_buck_omp.cpp
index 8ed82c5e5..66d8730ab 100644
--- a/src/USER-OMP/pair_lj_cut_omp.cpp
+++ b/src/USER-OMP/pair_buck_omp.cpp
@@ -1,163 +1,165 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    This software is distributed under the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Axel Kohlmeyer (Temple U)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
-#include "pair_lj_cut_omp.h"
+#include "pair_buck_omp.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
-#include "memory.h"
-#include "error.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
-PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) :
-  PairLJCut(lmp), ThrOMP(lmp, PAIR)
+PairBuckOMP::PairBuckOMP(LAMMPS *lmp) :
+  PairBuck(lmp), ThrOMP(lmp, PAIR)
 {
   respa_enable = 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
-void PairLJCutOMP::compute(int eflag, int vflag)
+void PairBuckOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
     ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
   const int nthreads = comm->nthreads;
   const int inum = list->inum;
 
 #if defined(_OPENMP)
 #pragma omp parallel default(shared)
 #endif
   {
     int ifrom, ito, tid;
     double **f;
 
     f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
 
     if (evflag) {
       if (eflag) {
 	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
 	else eval<1,1,0>(f, ifrom, ito, tid);
       } else {
 	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
 	else eval<1,0,0>(f, ifrom, ito, tid);
       }
     } else {
       if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
       else eval<0,0,0>(f, ifrom, ito, tid);
     }
 
     // reduce per thread forces into global force array.
-    force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid);
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
   } // end of omp parallel region
 
   // reduce per thread energy and virial, if requested.
   if (evflag) ev_reduce_thr(this);
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairBuckOMP::eval(double **f, int iifrom, int iito, int tid)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
-  double rsq,r2inv,r6inv,forcelj,factor_lj;
+  double rsq,r2inv,r6inv,r,rexp,forcebuck,factor_lj;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   evdwl = 0.0;
 
   double **x = atom->x;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_lj = force->special_lj;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = iifrom; ii < iito; ++ii) {
 
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
     fxtmp=fytmp=fztmp=0.0;
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
 	r2inv = 1.0/rsq;
 	r6inv = r2inv*r2inv*r2inv;
-	forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
-	fpair = factor_lj*forcelj*r2inv;
+	r = sqrt(rsq);
+	r2inv = 1.0/rsq;
+	r6inv = r2inv*r2inv*r2inv;
+	r = sqrt(rsq);
+	rexp = exp(-r*rhoinv[itype][jtype]);
+	forcebuck = buck1[itype][jtype]*r*rexp - buck2[itype][jtype]*r6inv;
+	fpair = factor_lj*forcebuck*r2inv;
 
 	fxtmp += delx*fpair;
 	fytmp += dely*fpair;
 	fztmp += delz*fpair;
 	if (NEWTON_PAIR || j < nlocal) {
 	  f[j][0] -= delx*fpair;
 	  f[j][1] -= dely*fpair;
 	  f[j][2] -= delz*fpair;
 	}
 
 	if (EFLAG) {
-	  evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype])
-	    - offset[itype][jtype];
+	  evdwl = a[itype][jtype]*rexp - c[itype][jtype]*r6inv -
+	    offset[itype][jtype];
 	  evdwl *= factor_lj;
 	}
 
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
 				 evdwl,0.0,fpair,delx,dely,delz,tid);
       }
     }
     f[i][0] += fxtmp;
     f[i][1] += fytmp;
     f[i][2] += fztmp;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
-double PairLJCutOMP::memory_usage()
+double PairBuckOMP::memory_usage()
 {
   double bytes = memory_usage_thr();
-  bytes += PairLJCut::memory_usage();
+  bytes += PairBuck::memory_usage();
 
   return bytes;
 }
diff --git a/src/USER-OMP/pair_buck_omp.h b/src/USER-OMP/pair_buck_omp.h
new file mode 100644
index 000000000..40b6702e6
--- /dev/null
+++ b/src/USER-OMP/pair_buck_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(buck/omp,PairBuckOMP)
+
+#else
+
+#ifndef LMP_PAIR_BUCK_OMP_H
+#define LMP_PAIR_BUCK_OMP_H
+
+#include "pair_buck.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairBuckOMP : public PairBuck, public ThrOMP {
+
+ public:
+  PairBuckOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(double **f, int ifrom, int ito, int tid);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_cdeam_omp.cpp b/src/USER-OMP/pair_cdeam_omp.cpp
new file mode 100644
index 000000000..01bd5f6ea
--- /dev/null
+++ b/src/USER-OMP/pair_cdeam_omp.cpp
@@ -0,0 +1,545 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   This software is distributed under the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "string.h"
+
+#include "pair_cdeam_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "error.h"
+#include "force.h"
+#include "memory.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+
+using namespace LAMMPS_NS;
+
+// This is for debugging purposes. The ASSERT() macro is used in the code to check
+// if everything runs as expected. Change this to #if 0 if you don't need the checking.
+#if 0
+	#define ASSERT(cond) ((!(cond)) ? my_failure(error,__FILE__,__LINE__) : my_noop())
+
+	inline void my_noop() {}
+	inline void my_failure(Error* error, const char* file, int line) {
+		char str[1024];
+		sprintf(str,"Assertion failure: File %s, line %i", file, line);
+		error->one(FLERR,str);
+	}
+#else
+	#define ASSERT(cond)
+#endif
+
+/* ---------------------------------------------------------------------- */
+
+PairCDEAMOMP::PairCDEAMOMP(LAMMPS *lmp, int _cdeamVersion) :
+  PairCDEAM(lmp,_cdeamVersion), PairEAM(lmp), ThrOMP(lmp, PAIR)
+{
+  respa_enable = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairCDEAMOMP::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+    ev_setup_thr(this);
+  } else evflag = vflag_fdotr = eflag_global = eflag_atom = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = list->inum;
+
+  // grow energy and fp arrays if necessary
+  // need to be atom->nmax in length
+
+  if (atom->nmax > nmax) {
+    memory->destroy(rho);
+    memory->destroy(rhoB);
+    memory->destroy(D_values);
+    memory->destroy(fp);
+    nmax = atom->nmax;
+    memory->create(rho,nthreads*nmax,"pair:rho");
+    memory->create(rhoB,nthreads*nmax,"pair:mu");
+    memory->create(D_values,nthreads*nmax,"pair:D_values");
+    memory->create(fp,nmax,"pair:fp");
+  }
+
+#if defined(_OPENMP)
+#pragma omp parallel default(shared)
+#endif
+  {
+    int ifrom, ito, tid;
+    double **f, *rho_t, *rhoB_t, *D_values_t;
+
+    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    if (force->newton_pair) {
+      rho_t = rho + tid*nall;
+      rhoB_t = rhoB + tid*nall;
+      D_values_t = D_values + tid*nall;
+    } else {
+      rho_t = rho + tid*atom->nlocal;
+      rhoB_t = rhoB + tid*atom->nlocal;
+      D_values_t = D_values + tid*atom->nlocal;
+    }
+
+    switch (cdeamVersion) {
+
+    case 1:
+  
+      if (evflag) {
+	if (eflag) {
+	  if (force->newton_pair) eval<1,1,1,1>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid);
+	  else eval<1,1,0,1>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid);
+	} else {
+	  if (force->newton_pair) eval<1,0,1,1>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid);
+	  else eval<1,0,0,1>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid);
+	}
+      } else {
+	if (force->newton_pair) eval<0,0,1,1>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid);
+	else eval<0,0,0,1>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid);
+      }
+      break;
+
+    case 2:
+
+      if (evflag) {
+	if (eflag) {
+	  if (force->newton_pair) eval<1,1,1,2>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid);
+	  else eval<1,1,0,2>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid);
+	} else {
+	  if (force->newton_pair) eval<1,0,1,2>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid);
+	  else eval<1,0,0,2>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid);
+	}
+      } else {
+	if (force->newton_pair) eval<0,0,1,2>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid);
+	else eval<0,0,0,2>(f, rho_t, rhoB_t, D_values_t, ifrom, ito, tid);
+      }
+      break;
+
+    default:
+#if defined(_OPENMP)
+#pragma omp master
+#endif
+    error->all(FLERR,"unsupported eam/cd pair style variant");
+    }
+    
+    // reduce per thread forces into global force array.
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+  } // end of omp parallel region
+
+  // reduce per thread energy and virial, if requested.
+  if (evflag) ev_reduce_thr(this);
+  if (vflag_fdotr) virial_fdotr_compute();
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_PAIR, int CDEAMVERSION>
+void PairCDEAMOMP::eval(double **f, double *rho_t, double *rhoB_t, 
+		      double *D_values_t, int iifrom, int iito, int tid)
+{
+  int i,j,ii,jj,jnum,itype,jtype;
+  double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
+  double rsq,rhoip,rhojp,recip,phi;
+  int *ilist,*jlist,*numneigh,**firstneigh;
+
+  evdwl = 0.0;
+
+  double **x = atom->x;
+  int *type = atom->type;
+  int nlocal = atom->nlocal;
+  int nall = nlocal + atom->nghost;
+
+  double fxtmp,fytmp,fztmp;
+
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+
+  // zero out density 
+
+  if (NEWTON_PAIR) {
+    memset(rho_t, 0, nall*sizeof(double));
+    memset(rhoB_t, 0, nall*sizeof(double));
+    memset(D_values_t, 0, nall*sizeof(double));
+  } else {
+    memset(rho_t, 0, nlocal*sizeof(double));
+    memset(rhoB_t, 0, nlocal*sizeof(double));
+    memset(D_values_t, 0, nlocal*sizeof(double));
+  }
+
+  // Stage I
+
+  // Compute rho and rhoB at each local atom site.
+  // Additionally calculate the D_i values here if we are using the one-site formulation.
+  // For the two-site formulation we have to calculate the D values in an extra loop (Stage II).
+
+  for (ii = iifrom; ii < iito; ii++) {
+    i = ilist[ii];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    itype = type[i];
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      j &= NEIGHMASK;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+
+      if(rsq < cutforcesq) {
+	jtype = type[j];
+	double r = sqrt(rsq);
+	const EAMTableIndex index = radiusToTableIndex(r);
+	double localrho = RhoOfR(index, jtype, itype);
+	rho_t[i] += localrho;
+	if(jtype == speciesB) rhoB_t[i] += localrho;
+	if(NEWTON_PAIR || j < nlocal) {
+	  localrho = RhoOfR(index, itype, jtype);
+	  rho_t[j] += localrho;
+	  if(itype == speciesB) rhoB_t[j] += localrho;
+	}
+
+	if(CDEAMVERSION == 1 && itype != jtype) {
+	  // Note: if the i-j interaction is not concentration dependent (because either
+	  // i or j are not species A or B) then its contribution to D_i and D_j should
+	  // be ignored.
+	  // This if-clause is only required for a ternary.
+	  if((itype == speciesA && jtype == speciesB) 
+	     || (jtype == speciesA && itype == speciesB)) {
+	    double Phi_AB = PhiOfR(index, itype, jtype, 1.0 / r);
+	    D_values_t[i] += Phi_AB;
+	    if(NEWTON_PAIR || j < nlocal)
+	      D_values_t[j] += Phi_AB;
+	  }
+	}
+      }
+    }
+  }
+
+  // wait until all threads are done with computation
+  sync_threads();
+
+  // communicate and sum densities
+
+  if (NEWTON_PAIR) {
+    // reduce per thread density
+    data_reduce_thr(&(rho[0]), nall, comm->nthreads, 1, tid);
+    data_reduce_thr(&(rhoB[0]), nall, comm->nthreads, 1, tid);
+    if (CDEAMVERSION==1)
+      data_reduce_thr(&(D_values[0]), nall, comm->nthreads, 1, tid);
+
+    // wait until reduction is complete
+    sync_threads();
+
+#if defined(_OPENMP)
+#pragma omp master
+#endif
+    { communicationStage = 1;
+      comm->reverse_comm_pair(this); }
+
+    // wait until master thread is done with communication
+    sync_threads();
+  
+  } else {
+    // reduce per thread density
+    data_reduce_thr(&(rho[0]), nlocal, comm->nthreads, 1, tid);
+    data_reduce_thr(&(rhoB[0]), nlocal, comm->nthreads, 1, tid);
+    if (CDEAMVERSION==1)
+      data_reduce_thr(&(D_values[0]), nlocal, comm->nthreads, 1, tid);
+
+    // wait until reduction is complete
+    sync_threads();
+  }
+  
+  // fp = derivative of embedding energy at each atom
+  // phi = embedding energy at each atom
+
+  for (ii = iifrom; ii < iito; ii++) {
+    i = ilist[ii];
+    EAMTableIndex index = rhoToTableIndex(rho[i]);
+    fp[i] = FPrimeOfRho(index, type[i]);
+    if(EFLAG) {
+      phi = FofRho(index, type[i]);
+      if (eflag_global) eng_vdwl_thr[tid] += phi;
+      if (eflag_atom) eatom_thr[tid][i] += phi;
+    }
+  }
+
+  // wait until all theads are done with computation
+  sync_threads();
+
+  // Communicate derivative of embedding function and densities
+  // and D_values (this for one-site formulation only).
+#if defined(_OPENMP)
+#pragma omp master
+#endif
+  { communicationStage = 2;
+    comm->forward_comm_pair(this); }
+
+  // wait until master thread is done with communication
+  sync_threads();
+
+
+  // The electron densities may not drop to zero because then the concentration would no longer be defined.
+  // But the concentration is not needed anyway if there is no interaction with another atom, which is the case
+  // if the electron density is exactly zero. That's why the following lines have been commented out.
+  //
+  //for(i = 0; i < nlocal + atom->nghost; i++) {
+  //	if(rho[i] == 0 && (type[i] == speciesA || type[i] == speciesB))
+  //		error->one(FLERR,"CD-EAM potential routine: Detected atom with zero electron density.");
+  //}
+
+  // Stage II
+  // This is only required for the original two-site formulation of the CD-EAM potential.
+
+  if(CDEAMVERSION == 2) {
+    // Compute intermediate value D_i for each atom.
+    for (ii = iifrom; ii < iito; ii++) {
+      i = ilist[ii];
+      xtmp = x[i][0];
+      ytmp = x[i][1];
+      ztmp = x[i][2];
+      itype = type[i];
+      jlist = firstneigh[i];
+      jnum = numneigh[i];
+
+      // This code line is required for ternary alloys.
+      if(itype != speciesA && itype != speciesB) continue;
+
+      double x_i = rhoB[i] / rho[i];	// Concentration at atom i.
+
+      for(jj = 0; jj < jnum; jj++) {
+	j = jlist[jj];
+	j &= NEIGHMASK;
+	jtype = type[j];
+	if(itype == jtype) continue;
+
+	// This code line is required for ternary alloys.
+	if(jtype != speciesA && jtype != speciesB) continue;
+
+	delx = xtmp - x[j][0];
+	dely = ytmp - x[j][1];
+	delz = ztmp - x[j][2];
+	rsq = delx*delx + dely*dely + delz*delz;
+
+	if(rsq < cutforcesq) {
+	  double r = sqrt(rsq);
+	  const EAMTableIndex index = radiusToTableIndex(r);
+
+	  // The concentration independent part of the cross pair potential.
+	  double Phi_AB = PhiOfR(index, itype, jtype, 1.0 / r);
+
+	  // Average concentration of two sites
+	  double x_ij = 0.5 * (x_i + rhoB[j]/rho[j]);
+
+	  // Calculate derivative of h(x_ij) polynomial function.
+	  double h_prime = evalHprime(x_ij);
+
+	  D_values_t[i] += h_prime * Phi_AB / (2.0 * rho[i] * rho[i]);
+	  if(NEWTON_PAIR || j < nlocal)
+	    D_values_t[j] += h_prime * Phi_AB / (2.0 * rho[j] * rho[j]);
+	}
+      }
+    }
+
+    if (NEWTON_PAIR) {
+      data_reduce_thr(&(D_values[0]), nall, comm->nthreads, 1, tid);
+
+      // wait until reduction is complete
+      sync_threads();
+
+#if defined(_OPENMP)
+#pragma omp master
+#endif
+      { communicationStage = 3;
+	comm->reverse_comm_pair(this); }
+
+      // wait until master thread is done with communication
+      sync_threads();
+  
+  } else {
+      data_reduce_thr(&(D_values[0]), nlocal, comm->nthreads, 1, tid);
+
+    // wait until reduction is complete
+    sync_threads();
+  }
+
+#if defined(_OPENMP)
+#pragma omp master
+#endif
+    { communicationStage = 4;
+      comm->forward_comm_pair(this); }
+    
+    // wait until master thread is done with communication
+    sync_threads();
+  }
+
+  // Stage III
+
+  // Compute force acting on each atom.
+  for (ii = iifrom; ii < iito; ii++) {
+    i = ilist[ii];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    itype = type[i];
+    fxtmp = fytmp = fztmp = 0.0;
+
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+
+    // Concentration at site i
+    double x_i = -1.0;		// The value -1 indicates: no concentration dependence for all interactions of atom i.
+    // It will be replaced by the concentration at site i if atom i is either A or B.
+
+    double D_i, h_prime_i;
+
+    // This if-clause is only required for ternary alloys.
+    if((itype == speciesA || itype == speciesB) && rho[i] != 0.0) {
+
+      // Compute local concentration at site i.
+      x_i = rhoB[i]/rho[i];
+      ASSERT(x_i >= 0 && x_i<=1.0);
+
+      if(CDEAMVERSION == 1) {
+	// Calculate derivative of h(x_i) polynomial function.
+	h_prime_i = evalHprime(x_i);
+	D_i = D_values[i] * h_prime_i / (2.0 * rho[i] * rho[i]);
+      } else if(CDEAMVERSION == 2) {
+	D_i = D_values[i];
+      } else ASSERT(false);
+    }
+
+    for(jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      j &= NEIGHMASK;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+
+      if(rsq < cutforcesq) {
+	jtype = type[j];
+	double r = sqrt(rsq);
+	const EAMTableIndex index = radiusToTableIndex(r);
+
+	// rhoip = derivative of (density at atom j due to atom i)
+	// rhojp = derivative of (density at atom i due to atom j)
+	// psip needs both fp[i] and fp[j] terms since r_ij appears in two
+	//   terms of embed eng: Fi(sum rho_ij) and Fj(sum rho_ji)
+	//   hence embed' = Fi(sum rho_ij) rhojp + Fj(sum rho_ji) rhoip
+	rhoip = RhoPrimeOfR(index, itype, jtype);
+	rhojp = RhoPrimeOfR(index, jtype, itype);
+	fpair = fp[i]*rhojp + fp[j]*rhoip;
+	recip = 1.0/r;
+
+	double x_j = -1;  // The value -1 indicates: no concentration dependence for this i-j pair
+	// because atom j is not of species A nor B.
+
+	// This code line is required for ternary alloy.
+	if(jtype == speciesA || jtype == speciesB) {
+	  ASSERT(rho[i] != 0.0);
+	  ASSERT(rho[j] != 0.0);
+
+	  // Compute local concentration at site j.
+	  x_j = rhoB[j]/rho[j];
+	  ASSERT(x_j >= 0 && x_j<=1.0);
+
+	  double D_j;
+	  if(CDEAMVERSION == 1) {
+	    // Calculate derivative of h(x_j) polynomial function.
+	    double h_prime_j = evalHprime(x_j);
+	    D_j = D_values[j] * h_prime_j / (2.0 * rho[j] * rho[j]);
+	  } else if(CDEAMVERSION == 2) {
+	    D_j = D_values[j];
+	  } else ASSERT(false);
+
+	  double t2 = -rhoB[j];
+	  if(itype == speciesB) t2 += rho[j];
+	  fpair += D_j * rhoip * t2;
+	}
+
+	// This if-clause is only required for a ternary alloy.
+	// Actually we don't need it at all because D_i should be zero anyway if
+	// atom i has no concentration dependent interactions (because it is not species A or B).
+	if(x_i != -1.0) {
+	  double t1 = -rhoB[i];
+	  if(jtype == speciesB) t1 += rho[i];
+	  fpair += D_i * rhojp * t1;
+	}
+
+	double phip;
+	double phi = PhiOfR(index, itype, jtype, recip, phip);
+	if(itype == jtype || x_i == -1.0 || x_j == -1.0) {
+	  // Case of no concentration dependence.
+	  fpair += phip;
+	} else {
+	  // We have a concentration dependence for the i-j interaction.
+	  double h;
+	  if(CDEAMVERSION == 1) {
+	    // Calculate h(x_i) polynomial function.
+	    double h_i = evalH(x_i);
+	    // Calculate h(x_j) polynomial function.
+	    double h_j = evalH(x_j);
+	    h = 0.5 * (h_i + h_j);
+	  } else if(CDEAMVERSION == 2) {
+	    // Average concentration.
+	    double x_ij = 0.5 * (x_i + x_j);
+	    // Calculate h(x_ij) polynomial function.
+	    h = evalH(x_ij);
+	  } else ASSERT(false);
+
+	  fpair += h * phip;
+	  phi *= h;
+	}
+
+	// Divide by r_ij and negate to get forces from gradient.
+	fpair /= -r;
+
+	fxtmp += delx*fpair;
+	fytmp += dely*fpair;
+	fztmp += delz*fpair;
+	if(NEWTON_PAIR || j < nlocal) {
+	  f[j][0] -= delx*fpair;
+	  f[j][1] -= dely*fpair;
+	  f[j][2] -= delz*fpair;
+	}
+
+	if(EFLAG) evdwl = phi;
+	if(EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,evdwl,0.0,
+				fpair,delx,dely,delz,tid);
+      }
+    }
+    f[i][0] += fxtmp;
+    f[i][1] += fytmp;
+    f[i][2] += fztmp;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairCDEAMOMP::memory_usage()
+{
+  double bytes = memory_usage_thr();
+  bytes += PairCDEAM::memory_usage();
+
+  return bytes;
+}
diff --git a/src/USER-OMP/pair_cdeam_omp.h b/src/USER-OMP/pair_cdeam_omp.h
new file mode 100644
index 000000000..85b124cb1
--- /dev/null
+++ b/src/USER-OMP/pair_cdeam_omp.h
@@ -0,0 +1,66 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(eam/cd/omp,PairCDEAM_OneSiteOMP)
+PairStyle(eam/cd/old/omp,PairCDEAM_TwoSiteOMP)
+
+#else
+
+#ifndef LMP_PAIR_CDEAM_OMP_H
+#define LMP_PAIR_CDEAM_OMP_H
+
+#include "pair_cdeam.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairCDEAMOMP : public PairCDEAM, public ThrOMP {
+
+ public:
+  PairCDEAMOMP(class LAMMPS *, int);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR, int CDEAMVERSION>
+  void eval(double **f, double *rho_t, double *rhoB_t, double *D_values_t, 
+	    int iifrom, int iito, int tid);
+};
+
+  /// The one-site concentration formulation of CD-EAM.
+  class PairCDEAM_OneSiteOMP : public PairCDEAMOMP
+  {
+  public:
+    /// Constructor.
+    PairCDEAM_OneSiteOMP(class LAMMPS* lmp) : PairEAM(lmp), PairCDEAMOMP(lmp, 1) {}
+  };
+  
+  /// The two-site concentration formulation of CD-EAM.
+  class PairCDEAM_TwoSiteOMP : public PairCDEAMOMP
+  {
+  public:
+    /// Constructor.
+    PairCDEAM_TwoSiteOMP(class LAMMPS* lmp) : PairEAM(lmp), PairCDEAMOMP(lmp, 2) {}
+  };
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_colloid_omp.cpp
similarity index 54%
copy from src/USER-OMP/pair_lj_cut_omp.cpp
copy to src/USER-OMP/pair_colloid_omp.cpp
index 8ed82c5e5..c8bc74407 100644
--- a/src/USER-OMP/pair_lj_cut_omp.cpp
+++ b/src/USER-OMP/pair_colloid_omp.cpp
@@ -1,163 +1,223 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    This software is distributed under the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Axel Kohlmeyer (Temple U)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
-#include "pair_lj_cut_omp.h"
+#include "pair_colloid_omp.h"
 #include "atom.h"
 #include "comm.h"
+#include "error.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
-#include "memory.h"
-#include "error.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
-PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) :
-  PairLJCut(lmp), ThrOMP(lmp, PAIR)
+PairColloidOMP::PairColloidOMP(LAMMPS *lmp) :
+  PairColloid(lmp), ThrOMP(lmp, PAIR)
 {
   respa_enable = 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
-void PairLJCutOMP::compute(int eflag, int vflag)
+void PairColloidOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
     ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
   const int nthreads = comm->nthreads;
   const int inum = list->inum;
 
 #if defined(_OPENMP)
 #pragma omp parallel default(shared)
 #endif
   {
     int ifrom, ito, tid;
     double **f;
 
     f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
 
     if (evflag) {
       if (eflag) {
 	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
 	else eval<1,1,0>(f, ifrom, ito, tid);
       } else {
 	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
 	else eval<1,0,0>(f, ifrom, ito, tid);
       }
     } else {
       if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
       else eval<0,0,0>(f, ifrom, ito, tid);
     }
 
     // reduce per thread forces into global force array.
-    force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid);
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
   } // end of omp parallel region
 
   // reduce per thread energy and virial, if requested.
   if (evflag) ev_reduce_thr(this);
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairColloidOMP::eval(double **f, int iifrom, int iito, int tid)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
-  double rsq,r2inv,r6inv,forcelj,factor_lj;
+  double rsq,r,r2inv,r6inv,forcelj,factor_lj;
+  double c1,c2,fR,dUR,dUA,K[9],h[4],g[4];
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   evdwl = 0.0;
 
   double **x = atom->x;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_lj = force->special_lj;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = iifrom; ii < iito; ++ii) {
 
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
     fxtmp=fytmp=fztmp=0.0;
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
-      if (rsq < cutsq[itype][jtype]) {
+      if (rsq >= cutsq[itype][jtype]) continue;
+      
+      switch(form[itype][jtype]) {
+      case SMALL_SMALL:
 	r2inv = 1.0/rsq;
 	r6inv = r2inv*r2inv*r2inv;
 	forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
 	fpair = factor_lj*forcelj*r2inv;
-
-	fxtmp += delx*fpair;
-	fytmp += dely*fpair;
-	fztmp += delz*fpair;
-	if (NEWTON_PAIR || j < nlocal) {
-	  f[j][0] -= delx*fpair;
-	  f[j][1] -= dely*fpair;
-	  f[j][2] -= delz*fpair;
-	}
-
-	if (EFLAG) {
-	  evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype])
-	    - offset[itype][jtype];
-	  evdwl *= factor_lj;
-	}
-
-	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,0.0,fpair,delx,dely,delz,tid);
+	if (EFLAG) 
+	  evdwl = r6inv*(r6inv*lj3[itype][jtype]-lj4[itype][jtype]) -
+	    offset[itype][jtype];
+	break;
+
+      case SMALL_LARGE:
+	c2 = a2[itype][jtype];
+	K[1] = c2*c2;
+	K[2] = rsq;
+	K[0] = K[1] - rsq;
+	K[4] = rsq*rsq;
+	K[3] = K[1] - K[2];
+	K[3] *= K[3]*K[3];
+	K[6] = K[3]*K[3];
+	fR = sigma3[itype][jtype]*a12[itype][jtype]*c2*K[1]/K[3];
+	fpair = 4.0/15.0*fR*factor_lj * 
+	  (2.0*(K[1]+K[2]) * (K[1]*(5.0*K[1]+22.0*K[2])+5.0*K[4]) * 
+	   sigma6[itype][jtype]/K[6]-5.0) / K[0];
+	if (EFLAG) 
+	  evdwl = 2.0/9.0*fR * 
+	    (1.0-(K[1]*(K[1]*(K[1]/3.0+3.0*K[2])+4.2*K[4])+K[2]*K[4]) *
+	     sigma6[itype][jtype]/K[6]) - offset[itype][jtype];
+	if (rsq <= K[1]) error->one(FLERR,"Overlapping small/large in pair colloid");
+	break;
+
+      case LARGE_LARGE:
+	r = sqrt(rsq);
+	c1 = a1[itype][jtype];
+	c2 = a2[itype][jtype];
+	K[0] = c1*c2;
+	K[1] = c1+c2;
+	K[2] = c1-c2;
+	K[3] = K[1]+r;
+	K[4] = K[1]-r;
+	K[5] = K[2]+r;
+	K[6] = K[2]-r;
+	K[7] = 1.0/(K[3]*K[4]);
+	K[8] = 1.0/(K[5]*K[6]);
+	g[0] = pow(K[3],-7.0);
+	g[1] = pow(K[4],-7.0);
+	g[2] = pow(K[5],-7.0);
+	g[3] = pow(K[6],-7.0);
+	h[0] = ((K[3]+5.0*K[1])*K[3]+30.0*K[0])*g[0];
+	h[1] = ((K[4]+5.0*K[1])*K[4]+30.0*K[0])*g[1];
+	h[2] = ((K[5]+5.0*K[2])*K[5]-30.0*K[0])*g[2];
+	h[3] = ((K[6]+5.0*K[2])*K[6]-30.0*K[0])*g[3];
+	g[0] *= 42.0*K[0]/K[3]+6.0*K[1]+K[3];
+	g[1] *= 42.0*K[0]/K[4]+6.0*K[1]+K[4];
+	g[2] *= -42.0*K[0]/K[5]+6.0*K[2]+K[5];
+	g[3] *= -42.0*K[0]/K[6]+6.0*K[2]+K[6];
+	
+	fR = a12[itype][jtype]*sigma6[itype][jtype]/r/37800.0;
+	evdwl = fR * (h[0]-h[1]-h[2]+h[3]);
+	dUR = evdwl/r + 5.0*fR*(g[0]+g[1]-g[2]-g[3]);
+	dUA = -a12[itype][jtype]/3.0*r*((2.0*K[0]*K[7]+1.0)*K[7] + 
+					(2.0*K[0]*K[8]-1.0)*K[8]);
+	fpair = factor_lj * (dUR+dUA)/r;
+	if (EFLAG)
+	  evdwl += a12[itype][jtype]/6.0 * 
+	    (2.0*K[0]*(K[7]+K[8])-log(K[8]/K[7])) - offset[itype][jtype];
+	if (r <= K[1]) error->one(FLERR,"Overlapping large/large in pair colloid");
+	break;
       }
+      
+      if (EFLAG) evdwl *= factor_lj;
+    
+      fxtmp += delx*fpair;
+      fytmp += dely*fpair;
+      fztmp += delz*fpair;
+      if (NEWTON_PAIR || j < nlocal) {
+	f[j][0] -= delx*fpair;
+	f[j][1] -= dely*fpair;
+	f[j][2] -= delz*fpair;
+      }
+
+      if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
+			       evdwl,0.0,fpair,delx,dely,delz,tid);
     }
     f[i][0] += fxtmp;
     f[i][1] += fytmp;
     f[i][2] += fztmp;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
-double PairLJCutOMP::memory_usage()
+double PairColloidOMP::memory_usage()
 {
   double bytes = memory_usage_thr();
-  bytes += PairLJCut::memory_usage();
+  bytes += PairColloid::memory_usage();
 
   return bytes;
 }
diff --git a/src/USER-OMP/pair_colloid_omp.h b/src/USER-OMP/pair_colloid_omp.h
new file mode 100644
index 000000000..a0be13cbb
--- /dev/null
+++ b/src/USER-OMP/pair_colloid_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(colloid/omp,PairColloidOMP)
+
+#else
+
+#ifndef LMP_PAIR_COLLOID_OMP_H
+#define LMP_PAIR_COLLOID_OMP_H
+
+#include "pair_colloid.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairColloidOMP : public PairColloid, public ThrOMP {
+
+ public:
+  PairColloidOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(double **f, int ifrom, int ito, int tid);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_comb_omp.cpp b/src/USER-OMP/pair_comb_omp.cpp
new file mode 100644
index 000000000..207c122e4
--- /dev/null
+++ b/src/USER-OMP/pair_comb_omp.cpp
@@ -0,0 +1,540 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   This software is distributed under the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "pair_comb_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "group.h"
+#include "force.h"
+#include "memory.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+PairCombOMP::PairCombOMP(LAMMPS *lmp) :
+  PairComb(lmp), ThrOMP(lmp, PAIR)
+{
+  respa_enable = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairCombOMP::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+    ev_setup_thr(this);
+  } else evflag = vflag_fdotr = vflag_atom = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = list->inum;
+
+  // grow coordination array if necessary
+
+  if (atom->nmax > nmax) {
+    memory->destroy(NCo);
+    nmax = atom->nmax;
+    memory->create(NCo,nmax,"pair:NCo");
+  }
+
+#if defined(_OPENMP)
+#pragma omp parallel default(shared)
+#endif
+  {
+    int ifrom, ito, tid;
+    double **f;
+
+    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+
+    if (evflag) {
+      if (eflag) {
+	if (vflag_atom) eval<1,1,1>(f, ifrom, ito, tid);
+	else eval<1,1,0>(f, ifrom, ito, tid);
+      } else {
+	if (vflag_atom) eval<1,0,1>(f, ifrom, ito, tid);
+	else eval<1,0,0>(f, ifrom, ito, tid);
+      }
+    } else eval<0,0,0>(f, ifrom, ito, tid);
+
+    // reduce per thread forces into global force array.
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+  } // end of omp parallel region
+
+  // reduce per thread energy and virial, if requested.
+  if (evflag) ev_reduce_thr(this);
+  if (vflag_fdotr) virial_fdotr_compute();
+}
+
+template <int EVFLAG, int EFLAG, int VFLAG_ATOM>
+void PairCombOMP::eval(double **f, int iifrom, int iito, int tid)
+{
+  int i,j,k,ii,jj,kk,jnum,iparam_i;
+  int itag,jtag,itype,jtype,ktype,iparam_ij,iparam_ijk;
+  double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
+  double rsq,rsq1,rsq2;
+  double delr1[3],delr2[3],fi[3],fj[3],fk[3];
+  double zeta_ij,prefactor;
+  int *ilist,*jlist,*numneigh,**firstneigh;
+  int mr1,mr2,mr3;
+  int rsc,inty;
+  double elp_ij,filp[3],fjlp[3],fklp[3];
+  double iq,jq; 
+  double yaself;
+  double potal,fac11,fac11e;
+  double vionij,fvionij,sr1,sr2,sr3,Eov,Fov;
+
+  evdwl = ecoul = 0.0;
+
+  double **x = atom->x;
+  double *q = atom->q;
+  int *tag = atom->tag;
+  int *type = atom->type;
+  int nlocal = atom->nlocal;
+
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+
+  yaself = vionij = fvionij = Eov = Fov = 0.0; 
+
+  double fxtmp,fytmp,fztmp;
+  double fjxtmp,fjytmp,fjztmp;
+
+  // self energy correction term: potal
+
+  potal_calc(potal,fac11,fac11e);
+
+  // loop over full neighbor list of my atoms
+
+  for (ii = iifrom; ii < iito; ++ii) {
+
+    i = ilist[ii];
+    itag = tag[i];
+    itype = map[type[i]];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    fxtmp = fytmp = fztmp = 0.0;
+
+    iq = q[i];
+    NCo[i] = 0;  
+    iparam_i = elem2param[itype][itype][itype];
+
+    // self energy, only on i atom
+
+    yaself = self(&params[iparam_i],iq,potal);
+
+    if (EVFLAG) ev_tally_thr(this,i,i,nlocal,0,yaself,
+			     0.0,0.0,0.0,0.0,0.0,tid);
+
+    // two-body interactions (long and short repulsive)
+
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      j &= NEIGHMASK;
+      jtag = tag[j];
+
+      if (itag > jtag) {
+	if ((itag+jtag) % 2 == 0) continue;
+      } else if (itag < jtag) {
+	if ((itag+jtag) % 2 == 1) continue;
+      } else {
+	if (x[j][2] < ztmp) continue;
+	if (x[j][2] == ztmp && x[j][1] < ytmp) continue;
+	if (x[j][2] == ztmp && x[j][1] == ytmp && x[j][0] < xtmp) continue;
+      }
+
+      // Qj calculates 2-body Coulombic 
+
+      jtype = map[type[j]];
+      jq = q[j];
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+
+      iparam_ij = elem2param[itype][jtype][jtype];
+
+      // long range q-dependent
+
+      if (rsq > params[iparam_ij].lcutsq) continue;
+
+      inty = intype[itype][jtype];
+
+      // polynomial three-point interpolation
+
+      tri_point(rsq, mr1, mr2, mr3, sr1, sr2, sr3, itype);
+
+      // 1/r energy and forces
+
+      direct(inty,mr1,mr2,mr3,rsq,sr1,sr2,sr3,iq,jq,
+	     potal,fac11,fac11e,vionij,fvionij);
+
+      // field correction to self energy
+
+      field(&params[iparam_ij],rsq,iq,jq,vionij,fvionij);
+
+      // polarization field
+      // sums up long range forces
+
+      fxtmp += delx*fvionij;
+      fytmp += dely*fvionij;
+      fztmp += delz*fvionij;
+      f[j][0] -= delx*fvionij;
+      f[j][1] -= dely*fvionij;
+      f[j][2] -= delz*fvionij;
+
+      if (EVFLAG) 
+	ev_tally_thr(this,i,j,nlocal,/* newton_pair */ 1,
+		     0.0,vionij,fvionij,delx,dely,delz,tid);
+
+      // short range q-independent
+
+      if (rsq > params[iparam_ij].cutsq) continue;
+
+      repulsive(&params[iparam_ij],rsq,fpair,EFLAG,evdwl,iq,jq);
+
+      // repulsion is pure two-body, sums up pair repulsive forces
+
+      fxtmp += delx*fpair;
+      fytmp += dely*fpair;
+      fztmp += delz*fpair;
+      f[j][0] -= delx*fpair;
+      f[j][1] -= dely*fpair;
+      f[j][2] -= delz*fpair;
+
+      if (EVFLAG)
+	ev_tally_thr(this,i,j,nlocal,/* newton_pair */ 1,
+		     evdwl,0.0,fpair,delx,dely,delz,tid);
+    }
+
+    // accumulate coordination number information
+
+    if (cor_flag) {
+      int numcoor = 0;      
+      for (jj = 0; jj < jnum; jj++) {
+        j = jlist[jj];
+	j &= NEIGHMASK;
+	jtype = map[type[j]];
+	iparam_ij = elem2param[itype][jtype][jtype];
+	
+	if(params[iparam_ij].hfocor > 0.0 ) {
+	  delr1[0] = x[j][0] - xtmp;
+	  delr1[1] = x[j][1] - ytmp;
+	  delr1[2] = x[j][2] - ztmp;
+	  rsq1 = vec3_dot(delr1,delr1);
+	  
+	  if (rsq1 > params[iparam_ij].cutsq) continue;
+	  ++numcoor;
+	}
+	NCo[i] = numcoor; 
+      }
+    }
+
+    // three-body interactions
+    // skip immediately if I-J is not within cutoff
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      j &= NEIGHMASK;
+      jtype = map[type[j]];
+      iparam_ij = elem2param[itype][jtype][jtype];
+
+      // this Qj for q-dependent BSi
+
+      jq = q[j];
+
+      delr1[0] = x[j][0] - xtmp;
+      delr1[1] = x[j][1] - ytmp;
+      delr1[2] = x[j][2] - ztmp;
+      rsq1 = vec3_dot(delr1,delr1);
+
+      if (rsq1 > params[iparam_ij].cutsq) continue;
+
+      // accumulate bondorder zeta for each i-j interaction via loop over k
+
+      fjxtmp = fjytmp = fjztmp = 0.0;
+      zeta_ij = 0.0;
+      cuo_flag1 = 0; cuo_flag2 = 0;
+
+      for (kk = 0; kk < jnum; kk++) {
+	if (jj == kk) continue;
+	k = jlist[kk];
+	k &= NEIGHMASK;
+	ktype = map[type[k]];
+	iparam_ijk = elem2param[itype][jtype][ktype];
+
+	delr2[0] = x[k][0] - xtmp;
+	delr2[1] = x[k][1] - ytmp;
+	delr2[2] = x[k][2] - ztmp;
+	rsq2 = vec3_dot(delr2,delr2);
+
+	if (rsq2 > params[iparam_ijk].cutsq) continue;
+
+	zeta_ij += zeta(&params[iparam_ijk],rsq1,rsq2,delr1,delr2);
+
+	if (params[iparam_ijk].hfocor == -2.0) cuo_flag1 = 1;
+	if (params[iparam_ijk].hfocor == -1.0) cuo_flag2 = 1;
+      }
+
+      if (cuo_flag1 && cuo_flag2) cuo_flag = 1;
+      else cuo_flag = 0;
+
+      // pairwise force due to zeta
+
+      force_zeta(&params[iparam_ij],rsq1,zeta_ij,fpair,
+		 prefactor,EFLAG,evdwl,iq,jq);
+
+      // over-coordination correction for HfO2
+
+      if (cor_flag && NCo[i] != 0)
+	Over_cor(&params[iparam_ij],rsq1,NCo[i],Eov, Fov);
+      evdwl +=  Eov;
+      fpair +=  Fov;
+
+      fxtmp += delr1[0]*fpair;
+      fytmp += delr1[1]*fpair;
+      fztmp += delr1[2]*fpair;
+      fjxtmp -= delr1[0]*fpair;
+      fjytmp -= delr1[1]*fpair;
+      fjztmp -= delr1[2]*fpair;
+
+      if (EVFLAG) ev_tally_thr(this,i,j,nlocal,/* newton_pair */ 1,evdwl,0.0,
+			       -fpair,-delr1[0],-delr1[1],-delr1[2],tid);
+
+      // attractive term via loop over k (3-body forces)
+
+      for (kk = 0; kk < jnum; kk++) {
+	if (jj == kk) continue;
+	k = jlist[kk];
+	k &= NEIGHMASK;
+	ktype = map[type[k]];
+	iparam_ijk = elem2param[itype][jtype][ktype];
+
+	delr2[0] = x[k][0] - xtmp;
+	delr2[1] = x[k][1] - ytmp;
+	delr2[2] = x[k][2] - ztmp;
+	rsq2 = vec3_dot(delr2,delr2);
+	if (rsq2 > params[iparam_ijk].cutsq) continue;
+
+	for (rsc = 0; rsc < 3; rsc++)
+	  fi[rsc] = fj[rsc] = fk[rsc] = 0.0;
+
+	attractive(&params[iparam_ijk],prefactor,
+		   rsq1,rsq2,delr1,delr2,fi,fj,fk);
+
+	// 3-body LP and BB correction and forces
+
+	elp_ij = elp(&params[iparam_ijk],rsq1,rsq2,delr1,delr2);
+	flp(&params[iparam_ijk],rsq1,rsq2,delr1,delr2,filp,fjlp,fklp); 
+
+	fxtmp += fi[0] + filp[0];
+	fytmp += fi[1] + filp[1];
+	fztmp += fi[2] + filp[2];
+	fjxtmp += fj[0] + fjlp[0];
+	fjytmp += fj[1] + fjlp[1];
+	fjztmp += fj[2] + fjlp[2];
+	f[k][0] += fk[0] + fklp[0];
+	f[k][1] += fk[1] + fklp[1];
+	f[k][2] += fk[2] + fklp[2];
+
+        if (EVFLAG) 
+	  ev_tally_thr(this,i,j,nlocal,/* newton_pair */ 1,
+		       elp_ij,0.0,0.0,0.0,0.0,0.0, tid);
+	if (VFLAG_ATOM) v_tally3_thr(i,j,k,fj,fk,delr1,delr2,tid);
+      }
+      f[j][0] += fjxtmp;
+      f[j][1] += fjytmp;
+      f[j][2] += fjztmp;
+    }
+    f[i][0] += fxtmp;
+    f[i][1] += fytmp;
+    f[i][2] += fztmp;
+
+    if (cuo_flag) params[iparam_i].cutsq *= 0.65;
+  }
+  cuo_flag = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairCombOMP::yasu_char(double *qf_fix, int &igroup)
+{
+  int ii;
+  double potal,fac11,fac11e;
+
+  const double * const * const x = atom->x;
+  const double * const q = atom->q;
+  const int * const type = atom->type;
+
+  const int inum = list->inum;
+  const int * const ilist = list->ilist;
+  const int * const numneigh = list->numneigh;
+  const int * const * const firstneigh = list->firstneigh;
+
+  const int * const mask = atom->mask;
+  const int groupbit = group->bitmask[igroup];
+
+  qf = qf_fix;
+  for (ii = 0; ii < inum; ii++) {
+    const int i = ilist[ii];
+    if (mask[i] & groupbit)
+      qf[i] = 0.0;
+  }
+
+  // communicating charge force to all nodes, first forward then reverse
+
+  comm->forward_comm_pair(this);
+
+  // self energy correction term: potal
+
+  potal_calc(potal,fac11,fac11e);
+
+  // loop over full neighbor list of my atoms
+#if defined(_OPENMP)
+#pragma omp parallel for private(ii) default(none) shared(potal,fac11e)
+#endif
+  for (ii = 0; ii < inum; ii ++) {
+    double fqi,fqj,fqij,fqji,fqjj,delr1[3],delr2[3];
+    double sr1,sr2,sr3;
+    int mr1,mr2,mr3;
+
+    const int i = ilist[ii];
+
+    if (mask[i] & groupbit) {
+      fqi = fqj = fqij = fqji = fqjj = 0.0; // should not be needed.
+      int itype = map[type[i]];
+      const double xtmp = x[i][0];
+      const double ytmp = x[i][1];
+      const double ztmp = x[i][2];
+      const double iq = q[i];
+      const int iparam_i = elem2param[itype][itype][itype];
+
+      // charge force from self energy
+
+      fqi = qfo_self(&params[iparam_i],iq,potal);
+
+      // two-body interactions
+
+      const int * const jlist = firstneigh[i];
+      const int jnum = numneigh[i];
+
+      for (int jj = 0; jj < jnum; jj++) {
+        const int j = jlist[jj] & NEIGHMASK;
+        const int jtype = map[type[j]];
+        double jq = q[j];
+
+        delr1[0] = x[j][0] - xtmp;
+        delr1[1] = x[j][1] - ytmp;
+        delr1[2] = x[j][2] - ztmp;
+        double rsq1 = vec3_dot(delr1,delr1);
+
+        const int iparam_ij = elem2param[itype][jtype][jtype];
+
+        // long range q-dependent
+
+        if (rsq1 > params[iparam_ij].lcutsq) continue;
+
+        const int inty = intype[itype][jtype];
+
+        // polynomial three-point interpolation
+
+        tri_point(rsq1,mr1,mr2,mr3,sr1,sr2,sr3,itype);
+
+        // 1/r charge forces
+
+        qfo_direct(inty,mr1,mr2,mr3,rsq1,sr1,sr2,sr3,fac11e,fqij);
+
+        // field correction to self energy and charge force
+
+        qfo_field(&params[iparam_ij],rsq1,iq,jq,fqji,fqjj);
+        fqi   += jq * fqij + fqji;
+#if defined(_OPENMP)
+#pragma omp atomic
+#endif
+        qf[j] += (iq * fqij + fqjj);
+
+        // polarization field charge force
+        // three-body interactions
+
+        if (rsq1 > params[iparam_ij].cutsq) continue;
+
+        double zeta_ij = 0.0;
+
+        for (int kk = 0; kk < jnum; kk++) {
+	  if (jj == kk) continue;
+	  const int k = jlist[kk] & NEIGHMASK;
+	  const int ktype = map[type[k]];
+	  const int iparam_ijk = elem2param[itype][jtype][ktype];
+	 
+	  delr2[0] = x[k][0] - xtmp;
+	  delr2[1] = x[k][1] - ytmp;
+	  delr2[2] = x[k][2] - ztmp;
+	  const double rsq2 = vec3_dot(delr2,delr2);
+	 
+	  if (rsq2 > params[iparam_ijk].cutsq) continue;
+	  zeta_ij += zeta(&params[iparam_ijk],rsq1,rsq2,delr1,delr2);
+        }
+
+        // charge force in Aij and Bij
+
+        qfo_short(&params[iparam_ij],rsq1,zeta_ij,iq,jq,fqij,fqjj);
+        fqi += fqij;  
+#if defined(_OPENMP)
+#pragma omp atomic
+#endif
+	qf[j] += fqjj;
+      }
+
+#if defined(_OPENMP)
+#pragma omp atomic
+#endif
+      qf[i] += fqi;
+
+    }
+  }
+
+  comm->reverse_comm_pair(this);
+
+  // sum charge force on each node and return it
+
+  double eneg = 0.0;
+  for (ii = 0; ii < inum; ii++) {
+    const int i = ilist[ii];
+    if (mask[i] & groupbit)
+      eneg += qf[i];
+  }
+  double enegtot;
+  MPI_Allreduce(&eneg,&enegtot,1,MPI_DOUBLE,MPI_SUM,world);
+  return enegtot;
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairCombOMP::memory_usage()
+{
+  double bytes = memory_usage_thr();
+  bytes += PairComb::memory_usage();
+
+  return bytes;
+}
diff --git a/src/USER-OMP/pair_comb_omp.h b/src/USER-OMP/pair_comb_omp.h
new file mode 100644
index 000000000..6f020ea9a
--- /dev/null
+++ b/src/USER-OMP/pair_comb_omp.h
@@ -0,0 +1,45 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(comb/omp,PairCombOMP)
+
+#else
+
+#ifndef LMP_PAIR_COMB_OMP_H
+#define LMP_PAIR_COMB_OMP_H
+
+#include "pair_comb.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairCombOMP : public PairComb, public ThrOMP {
+
+ public:
+  PairCombOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+  virtual double yasu_char(double *, int &);
+
+ private:
+  template <int EVFLAG, int EFLAG, int VFLAG_ATOM>
+  void eval(double **f, int ifrom, int ito, int tid);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_coul_cut_omp.cpp
similarity index 77%
copy from src/USER-OMP/pair_lj_cut_omp.cpp
copy to src/USER-OMP/pair_coul_cut_omp.cpp
index 8ed82c5e5..bb19db3d2 100644
--- a/src/USER-OMP/pair_lj_cut_omp.cpp
+++ b/src/USER-OMP/pair_coul_cut_omp.cpp
@@ -1,163 +1,162 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    This software is distributed under the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Axel Kohlmeyer (Temple U)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
-#include "pair_lj_cut_omp.h"
+#include "pair_coul_cut_omp.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
-#include "memory.h"
-#include "error.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
-PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) :
-  PairLJCut(lmp), ThrOMP(lmp, PAIR)
+PairCoulCutOMP::PairCoulCutOMP(LAMMPS *lmp) :
+  PairCoulCut(lmp), ThrOMP(lmp, PAIR)
 {
   respa_enable = 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
-void PairLJCutOMP::compute(int eflag, int vflag)
+void PairCoulCutOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
     ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
   const int nthreads = comm->nthreads;
   const int inum = list->inum;
 
 #if defined(_OPENMP)
 #pragma omp parallel default(shared)
 #endif
   {
     int ifrom, ito, tid;
     double **f;
 
     f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
 
     if (evflag) {
       if (eflag) {
 	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
 	else eval<1,1,0>(f, ifrom, ito, tid);
       } else {
 	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
 	else eval<1,0,0>(f, ifrom, ito, tid);
       }
     } else {
       if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
       else eval<0,0,0>(f, ifrom, ito, tid);
     }
 
     // reduce per thread forces into global force array.
-    force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid);
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
   } // end of omp parallel region
 
   // reduce per thread energy and virial, if requested.
   if (evflag) ev_reduce_thr(this);
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
+/* ---------------------------------------------------------------------- */
+
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairCoulCutOMP::eval(double **f, int iifrom, int iito, int tid)
 {
   int i,j,ii,jj,jnum,itype,jtype;
-  double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
-  double rsq,r2inv,r6inv,forcelj,factor_lj;
+  double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,ecoul,fpair;
+  double rsq,r2inv,rinv,forcecoul,factor_coul;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
-  evdwl = 0.0;
+  ecoul = 0.0;
 
   double **x = atom->x;
+  double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
-  double *special_lj = force->special_lj;
+  double *special_coul = force->special_coul;
+  double qqrd2e = force->qqrd2e;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = iifrom; ii < iito; ++ii) {
 
     i = ilist[ii];
+    qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
     fxtmp=fytmp=fztmp=0.0;
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
-      factor_lj = special_lj[sbmask(j)];
+      factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
 	r2inv = 1.0/rsq;
-	r6inv = r2inv*r2inv*r2inv;
-	forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
-	fpair = factor_lj*forcelj*r2inv;
+	rinv = sqrt(r2inv);
+	forcecoul = qqrd2e * scale[itype][jtype] * qtmp*q[j]*rinv;
+	fpair = factor_coul*forcecoul * r2inv;
 
 	fxtmp += delx*fpair;
 	fytmp += dely*fpair;
 	fztmp += delz*fpair;
 	if (NEWTON_PAIR || j < nlocal) {
 	  f[j][0] -= delx*fpair;
 	  f[j][1] -= dely*fpair;
 	  f[j][2] -= delz*fpair;
 	}
 
-	if (EFLAG) {
-	  evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype])
-	    - offset[itype][jtype];
-	  evdwl *= factor_lj;
-	}
+	if (EFLAG)
+	  ecoul = factor_coul * qqrd2e * scale[itype][jtype] * qtmp*q[j]*rinv;
 
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,0.0,fpair,delx,dely,delz,tid);
+				 0.0,ecoul,fpair,delx,dely,delz,tid);
       }
     }
     f[i][0] += fxtmp;
     f[i][1] += fytmp;
     f[i][2] += fztmp;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
-double PairLJCutOMP::memory_usage()
+double PairCoulCutOMP::memory_usage()
 {
   double bytes = memory_usage_thr();
-  bytes += PairLJCut::memory_usage();
+  bytes += PairCoulCut::memory_usage();
 
   return bytes;
 }
diff --git a/src/USER-OMP/pair_coul_cut_omp.h b/src/USER-OMP/pair_coul_cut_omp.h
new file mode 100644
index 000000000..eca9958ff
--- /dev/null
+++ b/src/USER-OMP/pair_coul_cut_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(coul/cut/omp,PairCoulCutOMP)
+
+#else
+
+#ifndef LMP_PAIR_COUL_CUT_OMP_H
+#define LMP_PAIR_COUL_CUT_OMP_H
+
+#include "pair_coul_cut.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairCoulCutOMP : public PairCoulCut, public ThrOMP {
+
+ public:
+  PairCoulCutOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(double **f, int ifrom, int ito, int tid);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_coul_debye_omp.cpp
similarity index 76%
copy from src/USER-OMP/pair_lj_cut_omp.cpp
copy to src/USER-OMP/pair_coul_debye_omp.cpp
index 8ed82c5e5..1c2e7b8e0 100644
--- a/src/USER-OMP/pair_lj_cut_omp.cpp
+++ b/src/USER-OMP/pair_coul_debye_omp.cpp
@@ -1,163 +1,163 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    This software is distributed under the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Axel Kohlmeyer (Temple U)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
-#include "pair_lj_cut_omp.h"
+#include "pair_coul_debye_omp.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
-#include "memory.h"
-#include "error.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
-PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) :
-  PairLJCut(lmp), ThrOMP(lmp, PAIR)
+PairCoulDebyeOMP::PairCoulDebyeOMP(LAMMPS *lmp) :
+  PairCoulDebye(lmp), ThrOMP(lmp, PAIR)
 {
   respa_enable = 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
-void PairLJCutOMP::compute(int eflag, int vflag)
+void PairCoulDebyeOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
     ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
   const int nthreads = comm->nthreads;
   const int inum = list->inum;
 
 #if defined(_OPENMP)
 #pragma omp parallel default(shared)
 #endif
   {
     int ifrom, ito, tid;
     double **f;
 
     f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
 
     if (evflag) {
       if (eflag) {
 	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
 	else eval<1,1,0>(f, ifrom, ito, tid);
       } else {
 	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
 	else eval<1,0,0>(f, ifrom, ito, tid);
       }
     } else {
       if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
       else eval<0,0,0>(f, ifrom, ito, tid);
     }
 
     // reduce per thread forces into global force array.
-    force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid);
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
   } // end of omp parallel region
 
   // reduce per thread energy and virial, if requested.
   if (evflag) ev_reduce_thr(this);
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
+/* ---------------------------------------------------------------------- */
+
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairCoulDebyeOMP::eval(double **f, int iifrom, int iito, int tid)
 {
   int i,j,ii,jj,jnum,itype,jtype;
-  double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
-  double rsq,r2inv,r6inv,forcelj,factor_lj;
+  double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,ecoul,fpair;
+  double rsq,r2inv,r,rinv,forcecoul,factor_coul,screening;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
-  evdwl = 0.0;
+  ecoul = 0.0;
 
   double **x = atom->x;
+  double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
-  double *special_lj = force->special_lj;
+  double *special_coul = force->special_coul;
+  double qqrd2e = force->qqrd2e;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = iifrom; ii < iito; ++ii) {
 
     i = ilist[ii];
+    qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
     fxtmp=fytmp=fztmp=0.0;
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
-      factor_lj = special_lj[sbmask(j)];
+      factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
 	r2inv = 1.0/rsq;
-	r6inv = r2inv*r2inv*r2inv;
-	forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
-	fpair = factor_lj*forcelj*r2inv;
+	r = sqrt(rsq);
+	rinv = 1.0/r;
+	screening = exp(-kappa*r);
+	forcecoul = qqrd2e * qtmp*q[j] * screening * (kappa + rinv);
+	fpair = factor_coul*forcecoul * r2inv;
 
 	fxtmp += delx*fpair;
 	fytmp += dely*fpair;
 	fztmp += delz*fpair;
 	if (NEWTON_PAIR || j < nlocal) {
 	  f[j][0] -= delx*fpair;
 	  f[j][1] -= dely*fpair;
 	  f[j][2] -= delz*fpair;
 	}
 
-	if (EFLAG) {
-	  evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype])
-	    - offset[itype][jtype];
-	  evdwl *= factor_lj;
-	}
+	if (EFLAG)
+	  ecoul = factor_coul * qqrd2e * qtmp*q[j] * rinv * screening;
 
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,0.0,fpair,delx,dely,delz,tid);
+				 0.0,ecoul,fpair,delx,dely,delz,tid);
       }
     }
     f[i][0] += fxtmp;
     f[i][1] += fytmp;
     f[i][2] += fztmp;
   }
 }
 
 /* ---------------------------------------------------------------------- */
-
-double PairLJCutOMP::memory_usage()
+double PairCoulDebyeOMP::memory_usage()
 {
   double bytes = memory_usage_thr();
-  bytes += PairLJCut::memory_usage();
+  bytes += PairCoulDebye::memory_usage();
 
   return bytes;
 }
diff --git a/src/USER-OMP/pair_coul_debye_omp.h b/src/USER-OMP/pair_coul_debye_omp.h
new file mode 100644
index 000000000..7ad599bb1
--- /dev/null
+++ b/src/USER-OMP/pair_coul_debye_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(coul/debye/omp,PairCoulDebyeOMP)
+
+#else
+
+#ifndef LMP_PAIR_COUL_DEBYE_OMP_H
+#define LMP_PAIR_COUL_DEBYE_OMP_H
+
+#include "pair_coul_debye.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairCoulDebyeOMP : public PairCoulDebye, public ThrOMP {
+
+ public:
+  PairCoulDebyeOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(double **f, int ifrom, int ito, int tid);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_coul_long_omp.cpp
similarity index 58%
copy from src/USER-OMP/pair_lj_cut_omp.cpp
copy to src/USER-OMP/pair_coul_long_omp.cpp
index 8ed82c5e5..3a2e05159 100644
--- a/src/USER-OMP/pair_lj_cut_omp.cpp
+++ b/src/USER-OMP/pair_coul_long_omp.cpp
@@ -1,163 +1,201 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    This software is distributed under the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Axel Kohlmeyer (Temple U)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
-#include "pair_lj_cut_omp.h"
+#include "pair_coul_long_omp.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
-#include "memory.h"
-#include "error.h"
 
 using namespace LAMMPS_NS;
 
+#define EWALD_F   1.12837917
+#define EWALD_P   0.3275911
+#define A1        0.254829592
+#define A2       -0.284496736
+#define A3        1.421413741
+#define A4       -1.453152027
+#define A5        1.061405429
+
 /* ---------------------------------------------------------------------- */
 
-PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) :
-  PairLJCut(lmp), ThrOMP(lmp, PAIR)
+PairCoulLongOMP::PairCoulLongOMP(LAMMPS *lmp) :
+  PairCoulLong(lmp), ThrOMP(lmp, PAIR)
 {
   respa_enable = 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
-void PairLJCutOMP::compute(int eflag, int vflag)
+void PairCoulLongOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
     ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
   const int nthreads = comm->nthreads;
   const int inum = list->inum;
 
 #if defined(_OPENMP)
 #pragma omp parallel default(shared)
 #endif
   {
     int ifrom, ito, tid;
     double **f;
 
     f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
 
     if (evflag) {
       if (eflag) {
 	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
 	else eval<1,1,0>(f, ifrom, ito, tid);
       } else {
 	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
 	else eval<1,0,0>(f, ifrom, ito, tid);
       }
     } else {
       if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
       else eval<0,0,0>(f, ifrom, ito, tid);
     }
 
     // reduce per thread forces into global force array.
-    force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid);
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
   } // end of omp parallel region
 
   // reduce per thread energy and virial, if requested.
   if (evflag) ev_reduce_thr(this);
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
+/* ---------------------------------------------------------------------- */
+
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairCoulLongOMP::eval(double **f, int iifrom, int iito, int tid)
 {
-  int i,j,ii,jj,jnum,itype,jtype;
-  double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
-  double rsq,r2inv,r6inv,forcelj,factor_lj;
+  int i,j,ii,jj,jnum,itable,itype,jtype;
+  double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,ecoul,fpair;
+  double fraction,table;
+  double r,r2inv,rsq,forcecoul,factor_coul;
+  double grij,expm2,prefactor,t,erfc;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
-  evdwl = 0.0;
+  ecoul = 0.0;
 
   double **x = atom->x;
+  double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
-  double *special_lj = force->special_lj;
+  double *special_coul = force->special_coul;
+  double qqrd2e = force->qqrd2e;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = iifrom; ii < iito; ++ii) {
 
     i = ilist[ii];
+    qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
     fxtmp=fytmp=fztmp=0.0;
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
-      factor_lj = special_lj[sbmask(j)];
+      factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
-      if (rsq < cutsq[itype][jtype]) {
+      if (rsq < cut_coulsq) {
 	r2inv = 1.0/rsq;
-	r6inv = r2inv*r2inv*r2inv;
-	forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
-	fpair = factor_lj*forcelj*r2inv;
+	if (!ncoultablebits || rsq <= tabinnersq) {
+	  r = sqrt(rsq);
+	  grij = g_ewald * r;
+	  expm2 = exp(-grij*grij);
+	  t = 1.0 / (1.0 + EWALD_P*grij);
+	  erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
+	  prefactor = qqrd2e * scale[itype][jtype] * qtmp*q[j]/r;
+	  forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
+	  if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
+	} else {
+	  union_int_float_t rsq_lookup;
+	  rsq_lookup.f = rsq;
+	  itable = rsq_lookup.i & ncoulmask;
+	  itable >>= ncoulshiftbits;
+	  fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable];
+	  table = ftable[itable] + fraction*dftable[itable];
+	  forcecoul = scale[itype][jtype] * qtmp*q[j] * table;
+	  if (factor_coul < 1.0) {
+	    table = ctable[itable] + fraction*dctable[itable];
+	    prefactor = scale[itype][jtype] * qtmp*q[j] * table;
+	    forcecoul -= (1.0-factor_coul)*prefactor;
+	  }
+	}
+
+	fpair = forcecoul * r2inv;
 
 	fxtmp += delx*fpair;
 	fytmp += dely*fpair;
 	fztmp += delz*fpair;
 	if (NEWTON_PAIR || j < nlocal) {
 	  f[j][0] -= delx*fpair;
 	  f[j][1] -= dely*fpair;
 	  f[j][2] -= delz*fpair;
 	}
 
 	if (EFLAG) {
-	  evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype])
-	    - offset[itype][jtype];
-	  evdwl *= factor_lj;
+	  if (!ncoultablebits || rsq <= tabinnersq)
+	    ecoul = prefactor*erfc;
+	  else {
+	    table = etable[itable] + fraction*detable[itable];
+	    ecoul = scale[itype][jtype] * qtmp*q[j] * table;
+	  }
+	  if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
 	}
 
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,0.0,fpair,delx,dely,delz,tid);
+				 0.0,ecoul,fpair,delx,dely,delz,tid);
       }
     }
     f[i][0] += fxtmp;
     f[i][1] += fytmp;
     f[i][2] += fztmp;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
-double PairLJCutOMP::memory_usage()
+double PairCoulLongOMP::memory_usage()
 {
   double bytes = memory_usage_thr();
-  bytes += PairLJCut::memory_usage();
+  bytes += PairCoulLong::memory_usage();
 
   return bytes;
 }
diff --git a/src/USER-OMP/pair_coul_long_omp.h b/src/USER-OMP/pair_coul_long_omp.h
new file mode 100644
index 000000000..7b63f762f
--- /dev/null
+++ b/src/USER-OMP/pair_coul_long_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(coul/long/omp,PairCoulLongOMP)
+
+#else
+
+#ifndef LMP_PAIR_COUL_LONG_OMP_H
+#define LMP_PAIR_COUL_LONG_OMP_H
+
+#include "pair_coul_long.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairCoulLongOMP : public PairCoulLong, public ThrOMP {
+
+ public:
+  PairCoulLongOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(double **f, int ifrom, int ito, int tid);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_dipole_cut_omp.cpp b/src/USER-OMP/pair_dipole_cut_omp.cpp
new file mode 100644
index 000000000..9ba93b19b
--- /dev/null
+++ b/src/USER-OMP/pair_dipole_cut_omp.cpp
@@ -0,0 +1,288 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   This software is distributed under the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "pair_dipole_cut_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+PairDipoleCutOMP::PairDipoleCutOMP(LAMMPS *lmp) :
+  PairDipoleCut(lmp), ThrOMP(lmp, PAIR)
+{
+  respa_enable = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairDipoleCutOMP::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+    ev_setup_thr(this);
+  } else evflag = vflag_fdotr = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = list->inum;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(shared)
+#endif
+  {
+    int ifrom, ito, tid;
+    double **f, **torque;
+
+    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    torque = atom->torque + tid*nall;
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_pair) eval<1,1,1>(f, torque, ifrom, ito, tid);
+	else eval<1,1,0>(f, torque, ifrom, ito, tid);
+      } else {
+	if (force->newton_pair) eval<1,0,1>(f, torque, ifrom, ito, tid);
+	else eval<1,0,0>(f, torque, ifrom, ito, tid);
+      }
+    } else {
+      if (force->newton_pair) eval<0,0,1>(f, torque, ifrom, ito, tid);
+      else eval<0,0,0>(f, torque, ifrom, ito, tid);
+    }
+
+    // reduce per thread forces and torques into global arrays.
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    data_reduce_thr(&(atom->torque[0][0]), nall, nthreads, 3, tid);
+  } // end of omp parallel region
+
+  // reduce per thread energy and virial, if requested.
+  if (evflag) ev_reduce_thr(this);
+  if (vflag_fdotr) virial_fdotr_compute();
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+void PairDipoleCutOMP::eval(double **f, double **torque, int iifrom, int iito, int tid)
+{
+  int i,j,ii,jj,jnum,itype,jtype;
+  double xtmp,ytmp,ztmp,qtmp,delx,dely,delz,evdwl,ecoul;
+  double rsq,rinv,r2inv,r6inv,r3inv,r5inv,r7inv,fx,fy,fz;
+  double forcecoulx,forcecouly,forcecoulz,crossx,crossy,crossz;
+  double tixcoul,tiycoul,tizcoul,tjxcoul,tjycoul,tjzcoul;
+  double fq,pdotp,pidotr,pjdotr,pre1,pre2,pre3,pre4;
+  double forcelj,factor_coul,factor_lj;
+  int *ilist,*jlist,*numneigh,**firstneigh;
+
+  evdwl = 0.0;
+
+  double **x = atom->x;
+  double *q = atom->q;
+  double **mu = atom->mu;
+  int *type = atom->type;
+  int nlocal = atom->nlocal;
+  double *special_coul = force->special_coul;
+  double *special_lj = force->special_lj;
+  double qqrd2e = force->qqrd2e;
+  double fxtmp,fytmp,fztmp,t1tmp,t2tmp,t3tmp;
+
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+
+  // loop over neighbors of my atoms
+
+  for (ii = iifrom; ii < iito; ++ii) {
+
+    i = ilist[ii];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    itype = type[i];
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+    fxtmp=fytmp=fztmp=t1tmp=t2tmp=t3tmp=0.0;
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      factor_coul = special_coul[sbmask(j)];
+      factor_lj = special_lj[sbmask(j)];
+      j &= NEIGHMASK;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+      jtype = type[j];
+
+      if (rsq < cutsq[itype][jtype]) {
+	r2inv = 1.0/rsq;
+	rinv = sqrt(r2inv);
+
+	// atom can have both a charge and dipole
+	// i,j = charge-charge, dipole-dipole, dipole-charge, or charge-dipole
+
+	forcecoulx = forcecouly = forcecoulz = 0.0;
+	tixcoul = tiycoul = tizcoul = 0.0;
+	tjxcoul = tjycoul = tjzcoul = 0.0;
+	
+	if (rsq < cut_coulsq[itype][jtype]) {
+
+	  if (qtmp != 0.0 && q[j] != 0.0) {
+            r3inv = r2inv*rinv;
+	    pre1 = qtmp*q[j]*r3inv;
+
+	    forcecoulx += pre1*delx;
+	    forcecouly += pre1*dely;
+	    forcecoulz += pre1*delz;
+	  }
+
+	  if (mu[i][3] > 0.0 && mu[j][3] > 0.0) { 
+            r3inv = r2inv*rinv;
+            r5inv = r3inv*r2inv;
+	    r7inv = r5inv*r2inv;
+
+            pdotp = mu[i][0]*mu[j][0] + mu[i][1]*mu[j][1] + mu[i][2]*mu[j][2];
+            pidotr = mu[i][0]*delx + mu[i][1]*dely + mu[i][2]*delz;
+            pjdotr = mu[j][0]*delx + mu[j][1]*dely + mu[j][2]*delz;
+
+	    pre1 = 3.0*r5inv*pdotp - 15.0*r7inv*pidotr*pjdotr;
+	    pre2 = 3.0*r5inv*pjdotr;
+	    pre3 = 3.0*r5inv*pidotr;
+	    pre4 = -1.0*r3inv;
+
+	    forcecoulx += pre1*delx + pre2*mu[i][0] + pre3*mu[j][0];
+	    forcecouly += pre1*dely + pre2*mu[i][1] + pre3*mu[j][1];
+	    forcecoulz += pre1*delz + pre2*mu[i][2] + pre3*mu[j][2];
+	    
+	    crossx = pre4 * (mu[i][1]*mu[j][2] - mu[i][2]*mu[j][1]);
+	    crossy = pre4 * (mu[i][2]*mu[j][0] - mu[i][0]*mu[j][2]);
+	    crossz = pre4 * (mu[i][0]*mu[j][1] - mu[i][1]*mu[j][0]);
+
+	    tixcoul += crossx + pre2 * (mu[i][1]*delz - mu[i][2]*dely);
+	    tiycoul += crossy + pre2 * (mu[i][2]*delx - mu[i][0]*delz);
+	    tizcoul += crossz + pre2 * (mu[i][0]*dely - mu[i][1]*delx);
+	    tjxcoul += -crossx + pre3 * (mu[j][1]*delz - mu[j][2]*dely);
+	    tjycoul += -crossy + pre3 * (mu[j][2]*delx - mu[j][0]*delz);
+	    tjzcoul += -crossz + pre3 * (mu[j][0]*dely - mu[j][1]*delx);
+	  }
+
+	  if (mu[i][3] > 0.0 && q[j] != 0.0) { 
+            r3inv = r2inv*rinv;
+            r5inv = r3inv*r2inv;
+            pidotr = mu[i][0]*delx + mu[i][1]*dely + mu[i][2]*delz;
+	    pre1 = 3.0*q[j]*r5inv * pidotr;
+	    pre2 = q[j]*r3inv;
+
+	    forcecoulx += pre2*mu[i][0] - pre1*delx;
+            forcecouly += pre2*mu[i][1] - pre1*dely;
+            forcecoulz += pre2*mu[i][2] - pre1*delz;
+	    tixcoul += pre2 * (mu[i][1]*delz - mu[i][2]*dely);
+	    tiycoul += pre2 * (mu[i][2]*delx - mu[i][0]*delz);
+	    tizcoul += pre2 * (mu[i][0]*dely - mu[i][1]*delx);
+	  }
+
+	  if (mu[j][3] > 0.0 && qtmp != 0.0) { 
+            r3inv = r2inv*rinv;
+            r5inv = r3inv*r2inv;
+            pjdotr = mu[j][0]*delx + mu[j][1]*dely + mu[j][2]*delz;
+	    pre1 = 3.0*qtmp*r5inv * pjdotr;
+	    pre2 = qtmp*r3inv;
+
+	    forcecoulx += pre1*delx - pre2*mu[j][0];
+            forcecouly += pre1*dely - pre2*mu[j][1];
+            forcecoulz += pre1*delz - pre2*mu[j][2];
+	    tjxcoul += -pre2 * (mu[j][1]*delz - mu[j][2]*dely);
+	    tjycoul += -pre2 * (mu[j][2]*delx - mu[j][0]*delz);
+	    tjzcoul += -pre2 * (mu[j][0]*dely - mu[j][1]*delx);
+	  }
+	}
+
+	// LJ interaction
+
+	if (rsq < cut_ljsq[itype][jtype]) {
+	  r6inv = r2inv*r2inv*r2inv;
+	  forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
+	  forcelj *= factor_lj * r2inv;
+	} else forcelj = 0.0;
+	  
+	// total force
+
+	fq = factor_coul*qqrd2e;
+	fx = fq*forcecoulx + delx*forcelj;
+	fy = fq*forcecouly + dely*forcelj;
+	fz = fq*forcecoulz + delz*forcelj;
+	
+	// force & torque accumulation
+
+	fxtmp += fx;
+	fytmp += fy;
+	fztmp += fz;
+	t1tmp += fq*tixcoul;
+	t2tmp += fq*tiycoul;
+	t3tmp += fq*tizcoul;
+
+	if (NEWTON_PAIR || j < nlocal) {
+	  f[j][0] -= fx;
+	  f[j][1] -= fy;
+	  f[j][2] -= fz;
+	  torque[j][0] += fq*tjxcoul;
+	  torque[j][1] += fq*tjycoul;
+	  torque[j][2] += fq*tjzcoul;
+	}
+
+	if (EFLAG) {
+	  if (rsq < cut_coulsq[itype][jtype]) {
+	    ecoul = qtmp*q[j]*rinv;
+	    if (mu[i][3] > 0.0 && mu[j][3] > 0.0)
+	      ecoul += r3inv*pdotp - 3.0*r5inv*pidotr*pjdotr;
+	    if (mu[i][3] > 0.0 && q[j] != 0.0) 
+	      ecoul += -q[j]*r3inv*pidotr;
+	    if (mu[j][3] > 0.0 && qtmp != 0.0)
+	      ecoul += qtmp*r3inv*pjdotr;
+	    ecoul *= factor_coul*qqrd2e;
+	  } else ecoul = 0.0;
+
+	  if (rsq < cut_ljsq[itype][jtype]) {
+	    evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) -
+	      offset[itype][jtype];
+	    evdwl *= factor_lj;
+	  } else evdwl = 0.0;
+	}
+
+	if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,NEWTON_PAIR,
+				     evdwl,ecoul,fx,fy,fz,delx,dely,delz,tid);
+      }
+    }
+    f[i][0] += fxtmp;
+    f[i][1] += fytmp;
+    f[i][2] += fztmp;
+    torque[i][0] += t1tmp;
+    torque[i][1] += t2tmp;
+    torque[i][2] += t3tmp;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairDipoleCutOMP::memory_usage()
+{
+  double bytes = memory_usage_thr();
+  bytes += PairDipoleCut::memory_usage();
+
+  return bytes;
+}
diff --git a/src/USER-OMP/pair_dipole_cut_omp.h b/src/USER-OMP/pair_dipole_cut_omp.h
new file mode 100644
index 000000000..832bd4d3b
--- /dev/null
+++ b/src/USER-OMP/pair_dipole_cut_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(dipole/cut/omp,PairDipoleCutOMP)
+
+#else
+
+#ifndef LMP_PAIR_DIPOLE_CUT_OMP_H
+#define LMP_PAIR_DIPOLE_CUT_OMP_H
+
+#include "pair_dipole_cut.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairDipoleCutOMP : public PairDipoleCut, public ThrOMP {
+
+ public:
+  PairDipoleCutOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(double **f, double **torque, int ifrom, int ito, int tid);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_dipole_sf_omp.cpp b/src/USER-OMP/pair_dipole_sf_omp.cpp
new file mode 100644
index 000000000..9ebc72d41
--- /dev/null
+++ b/src/USER-OMP/pair_dipole_sf_omp.cpp
@@ -0,0 +1,320 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   This software is distributed under the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "pair_dipole_sf_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+PairDipoleSFOMP::PairDipoleSFOMP(LAMMPS *lmp) :
+  PairDipoleSF(lmp), ThrOMP(lmp, PAIR)
+{
+  respa_enable = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairDipoleSFOMP::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+    ev_setup_thr(this);
+  } else evflag = vflag_fdotr = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = list->inum;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(shared)
+#endif
+  {
+    int ifrom, ito, tid;
+    double **f, **torque;
+
+    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    torque = atom->torque + tid*nall;
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_pair) eval<1,1,1>(f, torque, ifrom, ito, tid);
+	else eval<1,1,0>(f, torque, ifrom, ito, tid);
+      } else {
+	if (force->newton_pair) eval<1,0,1>(f, torque, ifrom, ito, tid);
+	else eval<1,0,0>(f, torque, ifrom, ito, tid);
+      }
+    } else {
+      if (force->newton_pair) eval<0,0,1>(f, torque, ifrom, ito, tid);
+      else eval<0,0,0>(f, torque, ifrom, ito, tid);
+    }
+
+    // reduce per thread forces and torques into global arrays.
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    data_reduce_thr(&(atom->torque[0][0]), nall, nthreads, 3, tid);
+  } // end of omp parallel region
+
+  // reduce per thread energy and virial, if requested.
+  if (evflag) ev_reduce_thr(this);
+  if (vflag_fdotr) virial_fdotr_compute();
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+void PairDipoleSFOMP::eval(double **f, double **torque, int iifrom, int iito, int tid)
+{
+  int i,j,ii,jj,jnum,itype,jtype;
+  double xtmp,ytmp,ztmp,qtmp,delx,dely,delz,evdwl,ecoul;
+  double rsq,rinv,r2inv,r6inv,r3inv,r5inv,fx,fy,fz;
+  double forcecoulx,forcecouly,forcecoulz,crossx,crossy,crossz;
+  double tixcoul,tiycoul,tizcoul,tjxcoul,tjycoul,tjzcoul;
+  double fq,pdotp,pidotr,pjdotr,pre1,pre2,pre3,pre4;
+  double forcelj,factor_coul,factor_lj;
+  double presf,afac,bfac,pqfac,qpfac,forceljcut,forceljsf;
+  double aforcecoulx,aforcecouly,aforcecoulz;
+  double bforcecoulx,bforcecouly,bforcecoulz;
+  double rcutlj2inv, rcutcoul2inv,rcutlj6inv;
+  int *ilist,*jlist,*numneigh,**firstneigh;
+
+  evdwl = 0.0;
+
+  double **x = atom->x;
+  double *q = atom->q;
+  double **mu = atom->mu;
+  int *type = atom->type;
+  int nlocal = atom->nlocal;
+  double *special_coul = force->special_coul;
+  double *special_lj = force->special_lj;
+  double qqrd2e = force->qqrd2e;
+  double fxtmp,fytmp,fztmp,t1tmp,t2tmp,t3tmp;
+
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+
+  // loop over neighbors of my atoms
+
+  for (ii = iifrom; ii < iito; ++ii) {
+
+    i = ilist[ii];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    itype = type[i];
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+    fxtmp=fytmp=fztmp=t1tmp=t2tmp=t3tmp=0.0;
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      factor_coul = special_coul[sbmask(j)];
+      factor_lj = special_lj[sbmask(j)];
+      j &= NEIGHMASK;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+      jtype = type[j];
+
+      if (rsq < cutsq[itype][jtype]) {
+	r2inv = 1.0/rsq;
+	rinv = sqrt(r2inv);
+
+	// atom can have both a charge and dipole
+	// i,j = charge-charge, dipole-dipole, dipole-charge, or charge-dipole
+	// atom can have both a charge and dipole
+	// i,j = charge-charge, dipole-dipole, dipole-charge, or charge-dipole
+
+	forcecoulx = forcecouly = forcecoulz = 0.0;
+	tixcoul = tiycoul = tizcoul = 0.0;
+	tjxcoul = tjycoul = tjzcoul = 0.0;
+	
+	if (rsq < cut_coulsq[itype][jtype]) {
+
+	  if (qtmp != 0.0 && q[j] != 0.0) {
+            pre1 = qtmp*q[j]*rinv*(r2inv-1.0/cut_coulsq[itype][jtype]);
+
+	    forcecoulx += pre1*delx;
+	    forcecouly += pre1*dely;
+	    forcecoulz += pre1*delz;
+	  }
+
+	  if (mu[i][3] > 0.0 && mu[j][3] > 0.0) { 
+            r3inv = r2inv*rinv;
+            r5inv = r3inv*r2inv;
+	    rcutcoul2inv=1.0/cut_coulsq[itype][jtype];
+
+            pdotp = mu[i][0]*mu[j][0] + mu[i][1]*mu[j][1] + mu[i][2]*mu[j][2];
+            pidotr = mu[i][0]*delx + mu[i][1]*dely + mu[i][2]*delz;
+            pjdotr = mu[j][0]*delx + mu[j][1]*dely + mu[j][2]*delz;
+
+	    afac = 1.0 - rsq*rsq * rcutcoul2inv*rcutcoul2inv;
+	    pre1 = afac * ( pdotp - 3.0 * r2inv * pidotr * pjdotr );
+	    aforcecoulx = pre1*delx;
+	    aforcecouly = pre1*dely;
+	    aforcecoulz = pre1*delz;
+	    
+	    bfac = 1.0 - 4.0*rsq*sqrt(rsq)*rcutcoul2inv*sqrt(rcutcoul2inv) +
+	      3.0*rsq*rsq*rcutcoul2inv*rcutcoul2inv;
+	    presf = 2.0 * r2inv * pidotr * pjdotr;
+	    bforcecoulx = bfac * (pjdotr*mu[i][0]+pidotr*mu[j][0]-presf*delx);
+	    bforcecouly = bfac * (pjdotr*mu[i][1]+pidotr*mu[j][1]-presf*dely);
+	    bforcecoulz = bfac * (pjdotr*mu[i][2]+pidotr*mu[j][2]-presf*delz);
+	    
+	    forcecoulx += 3.0 * r5inv * ( aforcecoulx + bforcecoulx );
+	    forcecouly += 3.0 * r5inv * ( aforcecouly + bforcecouly );
+	    forcecoulz += 3.0 * r5inv * ( aforcecoulz + bforcecoulz );
+	    
+	    pre2 = 3.0 * bfac * r5inv * pjdotr;
+	    pre3 = 3.0 * bfac * r5inv * pidotr;
+	    pre4 = -bfac * r3inv;
+	    
+	    crossx = pre4 * (mu[i][1]*mu[j][2] - mu[i][2]*mu[j][1]);
+	    crossy = pre4 * (mu[i][2]*mu[j][0] - mu[i][0]*mu[j][2]);
+	    crossz = pre4 * (mu[i][0]*mu[j][1] - mu[i][1]*mu[j][0]);
+
+	    tixcoul += crossx + pre2 * (mu[i][1]*delz - mu[i][2]*dely);
+	    tiycoul += crossy + pre2 * (mu[i][2]*delx - mu[i][0]*delz);
+	    tizcoul += crossz + pre2 * (mu[i][0]*dely - mu[i][1]*delx);
+	    tjxcoul += -crossx + pre3 * (mu[j][1]*delz - mu[j][2]*dely);
+	    tjycoul += -crossy + pre3 * (mu[j][2]*delx - mu[j][0]*delz);
+	    tjzcoul += -crossz + pre3 * (mu[j][0]*dely - mu[j][1]*delx);
+	  }
+
+	  if (mu[i][3] > 0.0 && q[j] != 0.0) { 
+            r3inv = r2inv*rinv;
+            r5inv = r3inv*r2inv;
+            pidotr = mu[i][0]*delx + mu[i][1]*dely + mu[i][2]*delz;
+	    rcutcoul2inv=1.0/cut_coulsq[itype][jtype];
+	    pre1 = 3.0 * q[j] * r5inv * pidotr * (1-rsq*rcutcoul2inv);
+	    pqfac = 1.0 - 3.0*rsq*rcutcoul2inv + 
+	      2.0*rsq*sqrt(rsq)*rcutcoul2inv*sqrt(rcutcoul2inv);
+	    pre2 = q[j] * r3inv * pqfac;
+
+	    forcecoulx += pre2*mu[i][0] - pre1*delx;
+            forcecouly += pre2*mu[i][1] - pre1*dely;
+            forcecoulz += pre2*mu[i][2] - pre1*delz;
+	    tixcoul += pre2 * (mu[i][1]*delz - mu[i][2]*dely);
+	    tiycoul += pre2 * (mu[i][2]*delx - mu[i][0]*delz);
+	    tizcoul += pre2 * (mu[i][0]*dely - mu[i][1]*delx);
+	  }
+
+	  if (mu[j][3] > 0.0 && qtmp != 0.0) { 
+            r3inv = r2inv*rinv;
+            r5inv = r3inv*r2inv;
+            pjdotr = mu[j][0]*delx + mu[j][1]*dely + mu[j][2]*delz;
+	    rcutcoul2inv=1.0/cut_coulsq[itype][jtype];
+	    pre1 = 3.0 * qtmp * r5inv * pjdotr * (1-rsq*rcutcoul2inv);
+	    qpfac = 1.0 - 3.0*rsq*rcutcoul2inv +
+	      2.0*rsq*sqrt(rsq)*rcutcoul2inv*sqrt(rcutcoul2inv);
+	    pre2 = qtmp * r3inv * qpfac;
+
+	    forcecoulx += pre1*delx - pre2*mu[j][0];
+            forcecouly += pre1*dely - pre2*mu[j][1];
+            forcecoulz += pre1*delz - pre2*mu[j][2];
+	    tjxcoul += -pre2 * (mu[j][1]*delz - mu[j][2]*dely);
+	    tjycoul += -pre2 * (mu[j][2]*delx - mu[j][0]*delz);
+	    tjzcoul += -pre2 * (mu[j][0]*dely - mu[j][1]*delx);
+	  }
+	}
+
+	// LJ interaction
+
+	if (rsq < cut_ljsq[itype][jtype]) {
+	  r6inv = r2inv*r2inv*r2inv;
+	  forceljcut = r6inv*(lj1[itype][jtype]*r6inv-lj2[itype][jtype])*r2inv;
+	  
+	  rcutlj2inv = 1.0 / cut_ljsq[itype][jtype];
+	  rcutlj6inv = rcutlj2inv * rcutlj2inv * rcutlj2inv;
+	  forceljsf = (lj1[itype][jtype]*rcutlj6inv - lj2[itype][jtype]) * 
+	    rcutlj6inv * rcutlj2inv;
+
+	  forcelj = factor_lj * (forceljcut - forceljsf);
+	} else forcelj = 0.0;
+	  
+	// total force
+
+	fq = factor_coul*qqrd2e;
+	fx = fq*forcecoulx + delx*forcelj;
+	fy = fq*forcecouly + dely*forcelj;
+	fz = fq*forcecoulz + delz*forcelj;
+	
+	// force & torque accumulation
+
+	fxtmp += fx;
+	fytmp += fy;
+	fztmp += fz;
+	t1tmp += fq*tixcoul;
+	t2tmp += fq*tiycoul;
+	t3tmp += fq*tizcoul;
+
+	if (NEWTON_PAIR || j < nlocal) {
+	  f[j][0] -= fx;
+	  f[j][1] -= fy;
+	  f[j][2] -= fz;
+	  torque[j][0] += fq*tjxcoul;
+	  torque[j][1] += fq*tjycoul;
+	  torque[j][2] += fq*tjzcoul;
+	}
+
+	if (EFLAG) {
+	  if (rsq < cut_coulsq[itype][jtype]) {
+	    ecoul = qtmp * q[j] * rinv *
+	      pow((1.0-sqrt(rsq)/sqrt(cut_coulsq[itype][jtype])),2);
+	    if (mu[i][3] > 0.0 && mu[j][3] > 0.0)
+	      ecoul += bfac * (r3inv*pdotp - 3.0*r5inv*pidotr*pjdotr);
+	    if (mu[i][3] > 0.0 && q[j] != 0.0) 
+	      ecoul += -q[j] * r3inv * pqfac * pidotr;
+	    if (mu[j][3] > 0.0 && qtmp != 0.0)
+	      ecoul += qtmp * r3inv * qpfac * pjdotr;
+	    ecoul *= factor_coul*qqrd2e;
+	  } else ecoul = 0.0;
+	  
+	  if (rsq < cut_ljsq[itype][jtype]) {
+	    evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype])+
+	      rcutlj6inv*(6*lj3[itype][jtype]*rcutlj6inv-3*lj4[itype][jtype])*
+	      rsq*rcutlj2inv+
+	      rcutlj6inv*(-7*lj3[itype][jtype]*rcutlj6inv+4*lj4[itype][jtype]);
+	    evdwl *= factor_lj;
+	  } else evdwl = 0.0;
+	}
+
+	if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,NEWTON_PAIR,
+				     evdwl,ecoul,fx,fy,fz,delx,dely,delz,tid);
+      }
+    }
+    f[i][0] += fxtmp;
+    f[i][1] += fytmp;
+    f[i][2] += fztmp;
+    torque[i][0] += t1tmp;
+    torque[i][1] += t2tmp;
+    torque[i][2] += t3tmp;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairDipoleSFOMP::memory_usage()
+{
+  double bytes = memory_usage_thr();
+  bytes += PairDipoleSF::memory_usage();
+
+  return bytes;
+}
diff --git a/src/USER-OMP/pair_dipole_sf_omp.h b/src/USER-OMP/pair_dipole_sf_omp.h
new file mode 100644
index 000000000..e601e2d56
--- /dev/null
+++ b/src/USER-OMP/pair_dipole_sf_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(dipole/sf/omp,PairDipoleSFOMP)
+
+#else
+
+#ifndef LMP_PAIR_DIPOLE_SF_OMP_H
+#define LMP_PAIR_DIPOLE_SF_OMP_H
+
+#include "pair_dipole_sf.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairDipoleSFOMP : public PairDipoleSF, public ThrOMP {
+
+ public:
+  PairDipoleSFOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(double **f, double **torque, int ifrom, int ito, int tid);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_dpd_omp.cpp
similarity index 62%
copy from src/USER-OMP/pair_lj_cut_omp.cpp
copy to src/USER-OMP/pair_dpd_omp.cpp
index 8ed82c5e5..be1e32f37 100644
--- a/src/USER-OMP/pair_lj_cut_omp.cpp
+++ b/src/USER-OMP/pair_dpd_omp.cpp
@@ -1,163 +1,212 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    This software is distributed under the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Axel Kohlmeyer (Temple U)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
-#include "pair_lj_cut_omp.h"
+#include "pair_dpd_omp.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
-#include "memory.h"
-#include "error.h"
+#include "update.h"
+#include "random_mars.h"
 
 using namespace LAMMPS_NS;
 
+#define EPSILON 1.0e-10
+
 /* ---------------------------------------------------------------------- */
 
-PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) :
-  PairLJCut(lmp), ThrOMP(lmp, PAIR)
+PairDPDOMP::PairDPDOMP(LAMMPS *lmp) :
+  PairDPD(lmp), ThrOMP(lmp, PAIR)
 {
   respa_enable = 0;
+  random_thr = NULL;
+}
+
+/* ---------------------------------------------------------------------- */
+
+PairDPDOMP::~PairDPDOMP() 
+{
+  if (random_thr) {
+    for (int i=1; i < comm->nthreads; ++i)
+      delete random_thr[i];
+
+    delete[] random_thr;
+    random_thr = NULL;
+  }
 }
 
 /* ---------------------------------------------------------------------- */
 
-void PairLJCutOMP::compute(int eflag, int vflag)
+void PairDPDOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
     ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
   const int nthreads = comm->nthreads;
   const int inum = list->inum;
 
+  if (!random_thr)
+    random_thr = new RanMars*[nthreads];
+  
+  random_thr[0] = random;
+
 #if defined(_OPENMP)
 #pragma omp parallel default(shared)
 #endif
   {
     int ifrom, ito, tid;
     double **f;
 
     f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
 
+    if (random_thr && tid > 0)
+      random_thr[tid] = new RanMars(Pair::lmp, seed + comm->me 
+				    + comm->nprocs*tid);
+
     if (evflag) {
       if (eflag) {
 	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
 	else eval<1,1,0>(f, ifrom, ito, tid);
       } else {
 	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
 	else eval<1,0,0>(f, ifrom, ito, tid);
       }
     } else {
       if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
       else eval<0,0,0>(f, ifrom, ito, tid);
     }
 
     // reduce per thread forces into global force array.
-    force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid);
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
   } // end of omp parallel region
 
   // reduce per thread energy and virial, if requested.
   if (evflag) ev_reduce_thr(this);
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairDPDOMP::eval(double **f, int iifrom, int iito, int tid)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
-  double rsq,r2inv,r6inv,forcelj,factor_lj;
+  double vxtmp,vytmp,vztmp,delvx,delvy,delvz;
+  double rsq,r,rinv,dot,wd,randnum,factor_dpd;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   evdwl = 0.0;
 
   double **x = atom->x;
+  double **v = atom->v;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_lj = force->special_lj;
+  double dtinvsqrt = 1.0/sqrt(update->dt);
   double fxtmp,fytmp,fztmp;
+  RanMars &rng = *random_thr[tid];
 
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = iifrom; ii < iito; ++ii) {
 
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
+    vxtmp = v[i][0];
+    vytmp = v[i][1];
+    vztmp = v[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
     fxtmp=fytmp=fztmp=0.0;
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
-      factor_lj = special_lj[sbmask(j)];
+      factor_dpd = special_lj[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
-	r2inv = 1.0/rsq;
-	r6inv = r2inv*r2inv*r2inv;
-	forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
-	fpair = factor_lj*forcelj*r2inv;
+	r = sqrt(rsq);
+	if (r < EPSILON) continue;     // r can be 0.0 in DPD systems
+	rinv = 1.0/r;
+	delvx = vxtmp - v[j][0];
+	delvy = vytmp - v[j][1];
+	delvz = vztmp - v[j][2];
+	dot = delx*delvx + dely*delvy + delz*delvz;
+	wd = 1.0 - r/cut[itype][jtype];
+	randnum = rng.gaussian();
+
+	// conservative force = a0 * wd
+	// drag force = -gamma * wd^2 * (delx dot delv) / r
+	// random force = sigma * wd * rnd * dtinvsqrt;
+
+	fpair = a0[itype][jtype]*wd;
+	fpair -= gamma[itype][jtype]*wd*wd*dot*rinv;
+	fpair += sigma[itype][jtype]*wd*randnum*dtinvsqrt;
+	fpair *= factor_dpd*rinv;	
 
 	fxtmp += delx*fpair;
 	fytmp += dely*fpair;
 	fztmp += delz*fpair;
 	if (NEWTON_PAIR || j < nlocal) {
 	  f[j][0] -= delx*fpair;
 	  f[j][1] -= dely*fpair;
 	  f[j][2] -= delz*fpair;
 	}
 
 	if (EFLAG) {
-	  evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype])
-	    - offset[itype][jtype];
-	  evdwl *= factor_lj;
+          // unshifted eng of conservative term:
+	  // evdwl = -a0[itype][jtype]*r * (1.0-0.5*r/cut[itype][jtype]);
+	  // eng shifted to 0.0 at cutoff
+	  evdwl = 0.5*a0[itype][jtype]*cut[itype][jtype] * wd*wd;
+	  evdwl *= factor_dpd;
 	}
 
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
 				 evdwl,0.0,fpair,delx,dely,delz,tid);
       }
     }
     f[i][0] += fxtmp;
     f[i][1] += fytmp;
     f[i][2] += fztmp;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
-double PairLJCutOMP::memory_usage()
+double PairDPDOMP::memory_usage()
 {
   double bytes = memory_usage_thr();
-  bytes += PairLJCut::memory_usage();
+  bytes += PairDPD::memory_usage();
+  bytes += comm->nthreads * sizeof(RanMars*);
+  bytes += comm->nthreads * sizeof(RanMars);
 
   return bytes;
 }
diff --git a/src/USER-OMP/pair_dpd_omp.h b/src/USER-OMP/pair_dpd_omp.h
new file mode 100644
index 000000000..9385e5444
--- /dev/null
+++ b/src/USER-OMP/pair_dpd_omp.h
@@ -0,0 +1,52 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(dpd/omp,PairDPDOMP)
+
+#else
+
+#ifndef LMP_PAIR_DPD_OMP_H
+#define LMP_PAIR_DPD_OMP_H
+
+#include "pair_dpd.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairDPDOMP : public PairDPD, public ThrOMP {
+
+ public:
+  PairDPDOMP(class LAMMPS *);
+  virtual ~PairDPDOMP();
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ protected:
+  class RanMars **random_thr;
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(double **f, int ifrom, int ito, int tid);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_dpd_tstat_omp.cpp
similarity index 59%
copy from src/USER-OMP/pair_lj_cut_omp.cpp
copy to src/USER-OMP/pair_dpd_tstat_omp.cpp
index 8ed82c5e5..7e3fb8b39 100644
--- a/src/USER-OMP/pair_lj_cut_omp.cpp
+++ b/src/USER-OMP/pair_dpd_tstat_omp.cpp
@@ -1,163 +1,214 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    This software is distributed under the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Axel Kohlmeyer (Temple U)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
-#include "pair_lj_cut_omp.h"
+#include "pair_dpd_tstat_omp.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
-#include "memory.h"
-#include "error.h"
+#include "update.h"
+#include "random_mars.h"
 
 using namespace LAMMPS_NS;
 
+#define EPSILON 1.0e-10
+
 /* ---------------------------------------------------------------------- */
 
-PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) :
-  PairLJCut(lmp), ThrOMP(lmp, PAIR)
+PairDPDTstatOMP::PairDPDTstatOMP(LAMMPS *lmp) :
+  PairDPDTstat(lmp), ThrOMP(lmp, PAIR)
 {
   respa_enable = 0;
+  random_thr = NULL;
 }
 
 /* ---------------------------------------------------------------------- */
 
-void PairLJCutOMP::compute(int eflag, int vflag)
+PairDPDTstatOMP::~PairDPDTstatOMP() 
+{
+  if (random_thr) {
+    for (int i=1; i < comm->nthreads; ++i)
+      delete random_thr[i];
+
+    delete[] random_thr;
+    random_thr = NULL;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairDPDTstatOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
     ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
   const int nthreads = comm->nthreads;
   const int inum = list->inum;
 
+  if (!random_thr)
+    random_thr = new RanMars*[nthreads];
+  
+  random_thr[0] = random;
+
 #if defined(_OPENMP)
 #pragma omp parallel default(shared)
 #endif
   {
     int ifrom, ito, tid;
     double **f;
 
     f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
 
+    if (random_thr && tid > 0)
+      random_thr[tid] = new RanMars(Pair::lmp, seed + comm->me 
+				    + comm->nprocs*tid);
+
     if (evflag) {
       if (eflag) {
 	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
 	else eval<1,1,0>(f, ifrom, ito, tid);
       } else {
 	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
 	else eval<1,0,0>(f, ifrom, ito, tid);
       }
     } else {
       if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
       else eval<0,0,0>(f, ifrom, ito, tid);
     }
 
     // reduce per thread forces into global force array.
-    force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid);
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
   } // end of omp parallel region
 
   // reduce per thread energy and virial, if requested.
   if (evflag) ev_reduce_thr(this);
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairDPDTstatOMP::eval(double **f, int iifrom, int iito, int tid)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
-  double rsq,r2inv,r6inv,forcelj,factor_lj;
+  double vxtmp,vytmp,vztmp,delvx,delvy,delvz;
+  double rsq,r,rinv,dot,wd,randnum,factor_dpd;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   evdwl = 0.0;
 
   double **x = atom->x;
+  double **v = atom->v;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_lj = force->special_lj;
+  double dtinvsqrt = 1.0/sqrt(update->dt);
   double fxtmp,fytmp,fztmp;
+  RanMars &rng = *random_thr[tid];
+
+  // adjust sigma if target T is changing
+
+  if (t_start != t_stop) {
+    double delta = update->ntimestep - update->beginstep;
+    delta /= update->endstep - update->beginstep;
+    temperature = t_start + delta * (t_stop-t_start);
+    double boltz = force->boltz;
+    for (i = 1; i <= atom->ntypes; i++)
+      for (j = i; j <= atom->ntypes; j++)
+	sigma[i][j] = sigma[j][i] = sqrt(2.0*boltz*temperature*gamma[i][j]);
+  }
 
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = iifrom; ii < iito; ++ii) {
 
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
+    vxtmp = v[i][0];
+    vytmp = v[i][1];
+    vztmp = v[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
     fxtmp=fytmp=fztmp=0.0;
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
-      factor_lj = special_lj[sbmask(j)];
+      factor_dpd = special_lj[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
-	r2inv = 1.0/rsq;
-	r6inv = r2inv*r2inv*r2inv;
-	forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
-	fpair = factor_lj*forcelj*r2inv;
+	r = sqrt(rsq);
+	if (r < EPSILON) continue;     // r can be 0.0 in DPD systems
+	rinv = 1.0/r;
+	delvx = vxtmp - v[j][0];
+	delvy = vytmp - v[j][1];
+	delvz = vztmp - v[j][2];
+	dot = delx*delvx + dely*delvy + delz*delvz;
+	wd = 1.0 - r/cut[itype][jtype];
+	randnum = rng.gaussian();
+
+	// drag force = -gamma * wd^2 * (delx dot delv) / r
+	// random force = sigma * wd * rnd * dtinvsqrt;
+
+	fpair = -gamma[itype][jtype]*wd*wd*dot*rinv;
+	fpair += sigma[itype][jtype]*wd*randnum*dtinvsqrt;
+	fpair *= factor_dpd*rinv;	
 
 	fxtmp += delx*fpair;
 	fytmp += dely*fpair;
 	fztmp += delz*fpair;
 	if (NEWTON_PAIR || j < nlocal) {
 	  f[j][0] -= delx*fpair;
 	  f[j][1] -= dely*fpair;
 	  f[j][2] -= delz*fpair;
 	}
 
-	if (EFLAG) {
-	  evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype])
-	    - offset[itype][jtype];
-	  evdwl *= factor_lj;
-	}
-
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,0.0,fpair,delx,dely,delz,tid);
+				 0.0,0.0,fpair,delx,dely,delz,tid);
       }
     }
     f[i][0] += fxtmp;
     f[i][1] += fytmp;
     f[i][2] += fztmp;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
-double PairLJCutOMP::memory_usage()
+double PairDPDTstatOMP::memory_usage()
 {
   double bytes = memory_usage_thr();
-  bytes += PairLJCut::memory_usage();
+  bytes += PairDPDTstat::memory_usage();
+  bytes += comm->nthreads * sizeof(RanMars*);
+  bytes += comm->nthreads * sizeof(RanMars);
 
   return bytes;
 }
diff --git a/src/USER-OMP/pair_dpd_tstat_omp.h b/src/USER-OMP/pair_dpd_tstat_omp.h
new file mode 100644
index 000000000..14f640a92
--- /dev/null
+++ b/src/USER-OMP/pair_dpd_tstat_omp.h
@@ -0,0 +1,52 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(dpd/tstat/omp,PairDPDTstatOMP)
+
+#else
+
+#ifndef LMP_PAIR_DPD_TSTAT_OMP_H
+#define LMP_PAIR_DPD_TSTAT_OMP_H
+
+#include "pair_dpd_tstat.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairDPDTstatOMP : public PairDPDTstat, public ThrOMP {
+
+ public:
+  PairDPDTstatOMP(class LAMMPS *);
+  virtual ~PairDPDTstatOMP();
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ protected:
+  class RanMars **random_thr;
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(double **f, int ifrom, int ito, int tid);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_eam_alloy_omp.cpp b/src/USER-OMP/pair_eam_alloy_omp.cpp
new file mode 100644
index 000000000..54be571b7
--- /dev/null
+++ b/src/USER-OMP/pair_eam_alloy_omp.cpp
@@ -0,0 +1,323 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing authors: Stephen Foiles (SNL), Murray Daw (SNL)
+------------------------------------------------------------------------- */
+
+#include "stdio.h"
+#include "stdlib.h"
+#include "string.h"
+#include "pair_eam_alloy_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "memory.h"
+#include "error.h"
+
+using namespace LAMMPS_NS;
+
+#define MAXLINE 1024
+
+/* ---------------------------------------------------------------------- */
+
+PairEAMAlloyOMP::PairEAMAlloyOMP(LAMMPS *lmp) : PairEAMOMP(lmp)
+{
+  one_coeff = 1;
+}
+
+/* ----------------------------------------------------------------------
+   set coeffs for one or more type pairs
+   read DYNAMO setfl file
+------------------------------------------------------------------------- */
+
+void PairEAMAlloyOMP::coeff(int narg, char **arg)
+{
+  int i,j;
+
+  if (!allocated) allocate();
+
+  if (narg != 3 + atom->ntypes)
+    error->all(FLERR,"Incorrect args for pair coefficients");
+
+  // insure I,J args are * *
+
+  if (strcmp(arg[0],"*") != 0 || strcmp(arg[1],"*") != 0)
+    error->all(FLERR,"Incorrect args for pair coefficients");
+
+  // read EAM setfl file
+
+  if (setfl) {
+    for (i = 0; i < setfl->nelements; i++) delete [] setfl->elements[i];
+    delete [] setfl->elements;
+    delete [] setfl->mass;
+    memory->destroy(setfl->frho);
+    memory->destroy(setfl->rhor);
+    memory->destroy(setfl->z2r);
+    delete setfl;
+  }
+  setfl = new Setfl();
+  read_file(arg[2]);
+
+  // read args that map atom types to elements in potential file
+  // map[i] = which element the Ith atom type is, -1 if NULL
+
+  for (i = 3; i < narg; i++) {
+    if (strcmp(arg[i],"NULL") == 0) {
+      map[i-2] = -1;
+      continue;
+    }
+    for (j = 0; j < setfl->nelements; j++)
+      if (strcmp(arg[i],setfl->elements[j]) == 0) break;
+    if (j < setfl->nelements) map[i-2] = j;
+    else error->all(FLERR,"No matching element in EAM potential file");
+  }
+
+  // clear setflag since coeff() called once with I,J = * *
+
+  int n = atom->ntypes;
+  for (i = 1; i <= n; i++)
+    for (j = i; j <= n; j++)
+      setflag[i][j] = 0;
+
+  // set setflag i,j for type pairs where both are mapped to elements
+  // set mass of atom type if i = j
+
+  int count = 0;
+  for (i = 1; i <= n; i++) {
+    for (j = i; j <= n; j++) {
+      if (map[i] >= 0 && map[j] >= 0) {
+	setflag[i][j] = 1;
+	if (i == j) atom->set_mass(i,setfl->mass[map[i]]);
+	count++;
+      }
+    }
+  }
+
+  if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
+}
+
+/* ----------------------------------------------------------------------
+   read a multi-element DYNAMO setfl file
+------------------------------------------------------------------------- */
+
+void PairEAMAlloyOMP::read_file(char *filename)
+{
+  Setfl *file = setfl;
+
+  // open potential file
+
+  int me = comm->me;
+  FILE *fptr;
+  char line[MAXLINE];
+
+  if (me == 0) {
+    fptr = fopen(filename,"r");
+    if (fptr == NULL) {
+      char str[128];
+      sprintf(str,"Cannot open EAM potential file %s",filename);
+      error->one(FLERR,str);
+    }
+  }
+
+  // read and broadcast header
+  // extract element names from nelements line
+
+  int n;
+  if (me == 0) {
+    fgets(line,MAXLINE,fptr);
+    fgets(line,MAXLINE,fptr);
+    fgets(line,MAXLINE,fptr);
+    fgets(line,MAXLINE,fptr);
+    n = strlen(line) + 1;
+  }
+  MPI_Bcast(&n,1,MPI_INT,0,world);
+  MPI_Bcast(line,n,MPI_CHAR,0,world);
+
+  sscanf(line,"%d",&file->nelements);
+  int nwords = atom->count_words(line);
+  if (nwords != file->nelements + 1)
+    error->all(FLERR,"Incorrect element names in EAM potential file");
+  
+  char **words = new char*[file->nelements+1];
+  nwords = 0;
+  strtok(line," \t\n\r\f");
+  while (words[nwords++] = strtok(NULL," \t\n\r\f")) continue;
+
+  file->elements = new char*[file->nelements];
+  for (int i = 0; i < file->nelements; i++) {
+    n = strlen(words[i]) + 1;
+    file->elements[i] = new char[n];
+    strcpy(file->elements[i],words[i]);
+  }
+  delete [] words;
+
+  if (me == 0) {
+    fgets(line,MAXLINE,fptr);
+    sscanf(line,"%d %lg %d %lg %lg",
+	   &file->nrho,&file->drho,&file->nr,&file->dr,&file->cut);
+  }
+
+  MPI_Bcast(&file->nrho,1,MPI_INT,0,world);
+  MPI_Bcast(&file->drho,1,MPI_DOUBLE,0,world);
+  MPI_Bcast(&file->nr,1,MPI_INT,0,world);
+  MPI_Bcast(&file->dr,1,MPI_DOUBLE,0,world);
+  MPI_Bcast(&file->cut,1,MPI_DOUBLE,0,world);
+
+  file->mass = new double[file->nelements];
+  memory->create(file->frho,file->nelements,file->nrho+1,"pair:frho");
+  memory->create(file->rhor,file->nelements,file->nr+1,"pair:rhor");
+  memory->create(file->z2r,file->nelements,file->nelements,file->nr+1,
+		 "pair:z2r");
+
+  int i,j,tmp;
+  for (i = 0; i < file->nelements; i++) {
+    if (me == 0) {
+      fgets(line,MAXLINE,fptr);
+      sscanf(line,"%d %lg",&tmp,&file->mass[i]);
+    }
+    MPI_Bcast(&file->mass[i],1,MPI_DOUBLE,0,world);
+
+    if (me == 0) grab(fptr,file->nrho,&file->frho[i][1]);
+    MPI_Bcast(&file->frho[i][1],file->nrho,MPI_DOUBLE,0,world);
+    if (me == 0) grab(fptr,file->nr,&file->rhor[i][1]);
+    MPI_Bcast(&file->rhor[i][1],file->nr,MPI_DOUBLE,0,world);
+  }
+
+  for (i = 0; i < file->nelements; i++)
+    for (j = 0; j <= i; j++) {
+      if (me == 0) grab(fptr,file->nr,&file->z2r[i][j][1]);
+      MPI_Bcast(&file->z2r[i][j][1],file->nr,MPI_DOUBLE,0,world);
+    }
+
+  // close the potential file
+
+  if (me == 0) fclose(fptr);
+}
+
+/* ----------------------------------------------------------------------
+   copy read-in setfl potential to standard array format
+------------------------------------------------------------------------- */
+
+void PairEAMAlloyOMP::file2array()
+{
+  int i,j,m,n;
+  int ntypes = atom->ntypes;
+
+  // set function params directly from setfl file
+
+  nrho = setfl->nrho;
+  nr = setfl->nr;
+  drho = setfl->drho;
+  dr = setfl->dr;
+
+  // ------------------------------------------------------------------
+  // setup frho arrays
+  // ------------------------------------------------------------------
+
+  // allocate frho arrays
+  // nfrho = # of setfl elements + 1 for zero array
+  
+  nfrho = setfl->nelements + 1;
+  memory->destroy(frho);
+  memory->create(frho,nfrho,nrho+1,"pair:frho");
+
+  // copy each element's frho to global frho
+
+  for (i = 0; i < setfl->nelements; i++)
+    for (m = 1; m <= nrho; m++) frho[i][m] = setfl->frho[i][m];
+
+  // add extra frho of zeroes for non-EAM types to point to (pair hybrid)
+  // this is necessary b/c fp is still computed for non-EAM atoms
+
+  for (m = 1; m <= nrho; m++) frho[nfrho-1][m] = 0.0;
+
+  // type2frho[i] = which frho array (0 to nfrho-1) each atom type maps to
+  // if atom type doesn't point to element (non-EAM atom in pair hybrid)
+  // then map it to last frho array of zeroes
+
+  for (i = 1; i <= ntypes; i++)
+    if (map[i] >= 0) type2frho[i] = map[i];
+    else type2frho[i] = nfrho-1;
+
+  // ------------------------------------------------------------------
+  // setup rhor arrays
+  // ------------------------------------------------------------------
+
+  // allocate rhor arrays
+  // nrhor = # of setfl elements
+
+  nrhor = setfl->nelements;
+  memory->destroy(rhor);
+  memory->create(rhor,nrhor,nr+1,"pair:rhor");
+
+  // copy each element's rhor to global rhor
+
+  for (i = 0; i < setfl->nelements; i++)
+    for (m = 1; m <= nr; m++) rhor[i][m] = setfl->rhor[i][m];
+
+  // type2rhor[i][j] = which rhor array (0 to nrhor-1) each type pair maps to
+  // for setfl files, I,J mapping only depends on I
+  // OK if map = -1 (non-EAM atom in pair hybrid) b/c type2rhor not used
+
+  for (i = 1; i <= ntypes; i++)
+    for (j = 1; j <= ntypes; j++)
+      type2rhor[i][j] = map[i];
+
+  // ------------------------------------------------------------------
+  // setup z2r arrays
+  // ------------------------------------------------------------------
+
+  // allocate z2r arrays
+  // nz2r = N*(N+1)/2 where N = # of setfl elements
+
+  nz2r = setfl->nelements * (setfl->nelements+1) / 2;
+  memory->destroy(z2r);
+  memory->create(z2r,nz2r,nr+1,"pair:z2r");
+
+  // copy each element pair z2r to global z2r, only for I >= J
+
+  n = 0;
+  for (i = 0; i < setfl->nelements; i++)
+    for (j = 0; j <= i; j++) {
+      for (m = 1; m <= nr; m++) z2r[n][m] = setfl->z2r[i][j][m];
+      n++;
+    }
+
+  // type2z2r[i][j] = which z2r array (0 to nz2r-1) each type pair maps to
+  // set of z2r arrays only fill lower triangular Nelement matrix
+  // value = n = sum over rows of lower-triangular matrix until reach irow,icol
+  // swap indices when irow < icol to stay lower triangular
+  // if map = -1 (non-EAM atom in pair hybrid):
+  //   type2z2r is not used by non-opt
+  //   but set type2z2r to 0 since accessed by opt
+
+  int irow,icol;
+  for (i = 1; i <= ntypes; i++) {
+    for (j = 1; j <= ntypes; j++) {
+      irow = map[i];
+      icol = map[j];
+      if (irow == -1 || icol == -1) {
+	type2z2r[i][j] = 0;
+	continue;
+      }
+      if (irow < icol) {
+	irow = map[j];
+	icol = map[i];
+      }
+      n = 0;
+      for (m = 0; m < irow; m++) n += m + 1;
+      n += icol;
+      type2z2r[i][j] = n;
+    }
+  }
+}
diff --git a/src/USER-OMP/pair_eam_alloy_omp.h b/src/USER-OMP/pair_eam_alloy_omp.h
new file mode 100644
index 000000000..7a71fbc17
--- /dev/null
+++ b/src/USER-OMP/pair_eam_alloy_omp.h
@@ -0,0 +1,43 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(eam/alloy/omp,PairEAMAlloyOMP)
+
+#else
+
+#ifndef LMP_PAIR_EAM_ALLOY_OMP_H
+#define LMP_PAIR_EAM_ALLOY_OMP_H
+
+#include "pair_eam_omp.h"
+
+namespace LAMMPS_NS {
+
+// need virtual public b/c of how eam/alloy/opt inherits from it
+
+class PairEAMAlloyOMP : virtual public PairEAMOMP {
+ public:
+  PairEAMAlloyOMP(class LAMMPS *);
+  virtual ~PairEAMAlloyOMP() {}
+  void coeff(int, char **);
+
+ protected:
+  void read_file(char *);
+  void file2array();
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_eam_fs_omp.cpp b/src/USER-OMP/pair_eam_fs_omp.cpp
new file mode 100644
index 000000000..d0963fa62
--- /dev/null
+++ b/src/USER-OMP/pair_eam_fs_omp.cpp
@@ -0,0 +1,332 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing authors: Tim Lau (MIT)
+------------------------------------------------------------------------- */
+
+#include "stdio.h"
+#include "stdlib.h"
+#include "string.h"
+#include "pair_eam_fs_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "memory.h"
+#include "error.h"
+
+using namespace LAMMPS_NS;
+
+#define MAXLINE 1024
+
+/* ---------------------------------------------------------------------- */
+
+PairEAMFSOMP::PairEAMFSOMP(LAMMPS *lmp) : PairEAMOMP(lmp)
+{
+  one_coeff = 1;
+}
+
+/* ----------------------------------------------------------------------
+   set coeffs for one or more type pairs
+   read EAM Finnis-Sinclair file
+------------------------------------------------------------------------- */
+
+void PairEAMFSOMP::coeff(int narg, char **arg)
+{
+  int i,j;
+
+  if (!allocated) allocate();
+
+  if (narg != 3 + atom->ntypes)
+    error->all(FLERR,"Incorrect args for pair coefficients");
+
+  // insure I,J args are * *
+
+  if (strcmp(arg[0],"*") != 0 || strcmp(arg[1],"*") != 0)
+    error->all(FLERR,"Incorrect args for pair coefficients");
+
+  // read EAM Finnis-Sinclair file
+
+  if (fs) {
+    for (i = 0; i < fs->nelements; i++) delete [] fs->elements[i];
+    delete [] fs->elements;
+    delete [] fs->mass;
+    memory->destroy(fs->frho);
+    memory->destroy(fs->rhor);
+    memory->destroy(fs->z2r);
+    delete fs;
+  }
+  fs = new Fs();
+  read_file(arg[2]);
+
+  // read args that map atom types to elements in potential file
+  // map[i] = which element the Ith atom type is, -1 if NULL
+
+  for (i = 3; i < narg; i++) {
+    if (strcmp(arg[i],"NULL") == 0) {
+      map[i-2] = -1;
+      continue;
+    }
+    for (j = 0; j < fs->nelements; j++)
+      if (strcmp(arg[i],fs->elements[j]) == 0) break;
+    if (j < fs->nelements) map[i-2] = j;
+    else error->all(FLERR,"No matching element in EAM potential file");
+  }
+
+  // clear setflag since coeff() called once with I,J = * *
+
+  int n = atom->ntypes;
+  for (i = 1; i <= n; i++)
+    for (j = i; j <= n; j++)
+      setflag[i][j] = 0;
+
+  // set setflag i,j for type pairs where both are mapped to elements
+  // set mass of atom type if i = j
+
+  int count = 0;
+  for (i = 1; i <= n; i++) {
+    for (j = i; j <= n; j++) {
+      if (map[i] >= 0 && map[j] >= 0) {
+	setflag[i][j] = 1;
+	if (i == j) atom->set_mass(i,fs->mass[map[i]]);
+	count++;
+      }
+    }
+  }
+
+  if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
+}
+
+/* ----------------------------------------------------------------------
+   read a multi-element DYNAMO setfl file
+------------------------------------------------------------------------- */
+
+void PairEAMFSOMP::read_file(char *filename)
+{
+  Fs *file = fs;
+
+  // open potential file
+
+  int me = comm->me;
+  FILE *fptr;
+  char line[MAXLINE];
+
+  if (me == 0) {
+    fptr = fopen(filename,"r");
+    if (fptr == NULL) {
+      char str[128];
+      sprintf(str,"Cannot open EAM potential file %s",filename);
+      error->one(FLERR,str);
+    }
+  }
+
+  // read and broadcast header
+  // extract element names from nelements line
+
+  int n;
+  if (me == 0) {
+    fgets(line,MAXLINE,fptr);
+    fgets(line,MAXLINE,fptr);
+    fgets(line,MAXLINE,fptr);
+    fgets(line,MAXLINE,fptr);
+    n = strlen(line) + 1;
+  }
+  MPI_Bcast(&n,1,MPI_INT,0,world);
+  MPI_Bcast(line,n,MPI_CHAR,0,world);
+
+  sscanf(line,"%d",&file->nelements);
+  int nwords = atom->count_words(line);
+  if (nwords != file->nelements + 1)
+    error->all(FLERR,"Incorrect element names in EAM potential file");
+  
+  char **words = new char*[file->nelements+1];
+  nwords = 0;
+  strtok(line," \t\n\r\f");
+  while (words[nwords++] = strtok(NULL," \t\n\r\f")) continue;
+
+  file->elements = new char*[file->nelements];
+  for (int i = 0; i < file->nelements; i++) {
+    n = strlen(words[i]) + 1;
+    file->elements[i] = new char[n];
+    strcpy(file->elements[i],words[i]);
+  }
+  delete [] words;
+
+  if (me == 0) {
+    fgets(line,MAXLINE,fptr);
+    sscanf(line,"%d %lg %d %lg %lg",
+	   &file->nrho,&file->drho,&file->nr,&file->dr,&file->cut);
+  }
+
+  MPI_Bcast(&file->nrho,1,MPI_INT,0,world);
+  MPI_Bcast(&file->drho,1,MPI_DOUBLE,0,world);
+  MPI_Bcast(&file->nr,1,MPI_INT,0,world);
+  MPI_Bcast(&file->dr,1,MPI_DOUBLE,0,world);
+  MPI_Bcast(&file->cut,1,MPI_DOUBLE,0,world);
+
+  file->mass = new double[file->nelements];
+  memory->create(file->frho,file->nelements,file->nrho+1,
+					      "pair:frho");
+  memory->create(file->rhor,file->nelements,file->nelements,
+		 file->nr+1,"pair:rhor");
+  memory->create(file->z2r,file->nelements,file->nelements,
+		 file->nr+1,"pair:z2r");
+
+  int i,j,tmp;
+  for (i = 0; i < file->nelements; i++) {
+    if (me == 0) {
+      fgets(line,MAXLINE,fptr);
+      sscanf(line,"%d %lg",&tmp,&file->mass[i]);
+    }
+    MPI_Bcast(&file->mass[i],1,MPI_DOUBLE,0,world);
+
+    if (me == 0) grab(fptr,file->nrho,&file->frho[i][1]);
+    MPI_Bcast(&file->frho[i][1],file->nrho,MPI_DOUBLE,0,world);
+
+    for (j = 0; j < file->nelements; j++) {
+      if (me == 0) grab(fptr,file->nr,&file->rhor[i][j][1]);
+      MPI_Bcast(&file->rhor[i][j][1],file->nr,MPI_DOUBLE,0,world);
+    }
+  }
+
+  for (i = 0; i < file->nelements; i++)
+    for (j = 0; j <= i; j++) {
+      if (me == 0) grab(fptr,file->nr,&file->z2r[i][j][1]);
+      MPI_Bcast(&file->z2r[i][j][1],file->nr,MPI_DOUBLE,0,world);
+    }
+
+  // close the potential file
+
+  if (me == 0) fclose(fptr);
+}
+
+/* ----------------------------------------------------------------------
+   copy read-in setfl potential to standard array format
+------------------------------------------------------------------------- */
+
+void PairEAMFSOMP::file2array()
+{
+  int i,j,m,n;
+  int ntypes = atom->ntypes;
+
+  // set function params directly from fs file
+
+  nrho = fs->nrho;
+  nr = fs->nr;
+  drho = fs->drho;
+  dr = fs->dr;
+
+  // ------------------------------------------------------------------
+  // setup frho arrays
+  // ------------------------------------------------------------------
+
+  // allocate frho arrays
+  // nfrho = # of fs elements + 1 for zero array
+  
+  nfrho = fs->nelements + 1;
+  memory->destroy(frho);
+  memory->create(frho,nfrho,nrho+1,"pair:frho");
+
+  // copy each element's frho to global frho
+
+  for (i = 0; i < fs->nelements; i++)
+    for (m = 1; m <= nrho; m++) frho[i][m] = fs->frho[i][m];
+
+  // add extra frho of zeroes for non-EAM types to point to (pair hybrid)
+  // this is necessary b/c fp is still computed for non-EAM atoms
+
+  for (m = 1; m <= nrho; m++) frho[nfrho-1][m] = 0.0;
+
+  // type2frho[i] = which frho array (0 to nfrho-1) each atom type maps to
+  // if atom type doesn't point to element (non-EAM atom in pair hybrid)
+  // then map it to last frho array of zeroes
+
+  for (i = 1; i <= ntypes; i++)
+    if (map[i] >= 0) type2frho[i] = map[i];
+    else type2frho[i] = nfrho-1;
+
+  // ------------------------------------------------------------------
+  // setup rhor arrays
+  // ------------------------------------------------------------------
+
+  // allocate rhor arrays
+  // nrhor = square of # of fs elements
+
+  nrhor = fs->nelements * fs->nelements;
+  memory->destroy(rhor);
+  memory->create(rhor,nrhor,nr+1,"pair:rhor");
+
+  // copy each element pair rhor to global rhor
+
+  n = 0;
+  for (i = 0; i < fs->nelements; i++)
+    for (j = 0; j < fs->nelements; j++) {
+      for (m = 1; m <= nr; m++) rhor[n][m] = fs->rhor[i][j][m];
+      n++;
+    }
+
+  // type2rhor[i][j] = which rhor array (0 to nrhor-1) each type pair maps to
+  // for fs files, there is a full NxN set of rhor arrays
+  // OK if map = -1 (non-EAM atom in pair hybrid) b/c type2rhor not used
+
+  for (i = 1; i <= ntypes; i++)
+    for (j = 1; j <= ntypes; j++)
+      type2rhor[i][j] = map[i] * fs->nelements + map[j];
+
+  // ------------------------------------------------------------------
+  // setup z2r arrays
+  // ------------------------------------------------------------------
+
+  // allocate z2r arrays
+  // nz2r = N*(N+1)/2 where N = # of fs elements
+
+  nz2r = fs->nelements * (fs->nelements+1) / 2;
+  memory->destroy(z2r);
+  memory->create(z2r,nz2r,nr+1,"pair:z2r");
+
+  // copy each element pair z2r to global z2r, only for I >= J
+
+  n = 0;
+  for (i = 0; i < fs->nelements; i++)
+    for (j = 0; j <= i; j++) {
+      for (m = 1; m <= nr; m++) z2r[n][m] = fs->z2r[i][j][m];
+      n++;
+    }
+
+  // type2z2r[i][j] = which z2r array (0 to nz2r-1) each type pair maps to
+  // set of z2r arrays only fill lower triangular Nelement matrix
+  // value = n = sum over rows of lower-triangular matrix until reach irow,icol
+  // swap indices when irow < icol to stay lower triangular
+  // if map = -1 (non-EAM atom in pair hybrid):
+  //   type2z2r is not used by non-opt
+  //   but set type2z2r to 0 since accessed by opt
+
+  int irow,icol;
+  for (i = 1; i <= ntypes; i++) {
+    for (j = 1; j <= ntypes; j++) {
+      irow = map[i];
+      icol = map[j];
+      if (irow == -1 || icol == -1) {
+	type2z2r[i][j] = 0;
+	continue;
+      }
+      if (irow < icol) {
+	irow = map[j];
+	icol = map[i];
+      }
+      n = 0;
+      for (m = 0; m < irow; m++) n += m + 1;
+      n += icol;
+      type2z2r[i][j] = n;
+    }
+  }
+}
diff --git a/src/USER-OMP/pair_eam_fs_omp.h b/src/USER-OMP/pair_eam_fs_omp.h
new file mode 100644
index 000000000..bee6cef76
--- /dev/null
+++ b/src/USER-OMP/pair_eam_fs_omp.h
@@ -0,0 +1,43 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(eam/fs/omp,PairEAMFSOMP)
+
+#else
+
+#ifndef LMP_PAIR_EAM_FS_OMP_H
+#define LMP_PAIR_EAM_FS_OMP_H
+
+#include "pair_eam_omp.h"
+
+namespace LAMMPS_NS {
+
+// need virtual public b/c of how eam/fs/opt inherits from it
+
+class PairEAMFSOMP : virtual public PairEAMOMP {
+ public:
+  PairEAMFSOMP(class LAMMPS *);
+  virtual ~PairEAMFSOMP() {}
+  void coeff(int, char **);
+
+ protected:
+  void read_file(char *);
+  void file2array();
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_eam_omp.cpp b/src/USER-OMP/pair_eam_omp.cpp
new file mode 100644
index 000000000..0ae4d54fb
--- /dev/null
+++ b/src/USER-OMP/pair_eam_omp.cpp
@@ -0,0 +1,303 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   This software is distributed under the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "string.h"
+
+#include "pair_eam_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "memory.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+PairEAMOMP::PairEAMOMP(LAMMPS *lmp) :
+  PairEAM(lmp), ThrOMP(lmp, PAIR)
+{
+  respa_enable = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairEAMOMP::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+    ev_setup_thr(this);
+  } else evflag = vflag_fdotr = eflag_global = eflag_atom = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = list->inum;
+
+  // grow energy and fp arrays if necessary
+  // need to be atom->nmax in length
+
+  if (atom->nmax > nmax) {
+    memory->destroy(rho);
+    memory->destroy(fp);
+    nmax = atom->nmax;
+    memory->create(rho,nthreads*nmax,"pair:rho");
+    memory->create(fp,nmax,"pair:fp");
+  }
+
+#if defined(_OPENMP)
+#pragma omp parallel default(shared)
+#endif
+  {
+    int ifrom, ito, tid;
+    double **f, *rho_t;
+
+    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    if (force->newton_pair)
+      rho_t = rho + tid*nall;
+    else rho_t = rho + tid*atom->nlocal;
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_pair) eval<1,1,1>(f, rho_t, ifrom, ito, tid);
+	else eval<1,1,0>(f, rho_t, ifrom, ito, tid);
+      } else {
+	if (force->newton_pair) eval<1,0,1>(f, rho_t, ifrom, ito, tid);
+	else eval<1,0,0>(f, rho_t, ifrom, ito, tid);
+      }
+    } else {
+      if (force->newton_pair) eval<0,0,1>(f, rho_t, ifrom, ito, tid);
+      else eval<0,0,0>(f, rho_t, ifrom, ito, tid);
+    }
+
+    // reduce per thread forces into global force array.
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+  } // end of omp parallel region
+
+  // reduce per thread energy and virial, if requested.
+  if (evflag) ev_reduce_thr(this);
+  if (vflag_fdotr) virial_fdotr_compute();
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+void PairEAMOMP::eval(double **f, double *rho_t,
+		      int iifrom, int iito, int tid)
+{
+  int i,j,ii,jj,m,jnum,itype,jtype;
+  double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
+  double rsq,r,p,rhoip,rhojp,z2,z2p,recip,phip,psip,phi;
+  double *coeff;
+  int *ilist,*jlist,*numneigh,**firstneigh;
+
+  evdwl = 0.0;
+
+  double **x = atom->x;
+  int *type = atom->type;
+  int nlocal = atom->nlocal;
+  int nall = nlocal + atom->nghost;
+
+  double fxtmp,fytmp,fztmp;
+
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+
+  // zero out density 
+
+  if (NEWTON_PAIR) memset(rho_t, 0, nall*sizeof(double));
+  else memset(rho_t, 0, nlocal*sizeof(double));
+
+  // rho = density at each atom
+  // loop over neighbors of my atoms
+
+  for (ii = iifrom; ii < iito; ii++) {
+    i = ilist[ii];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    itype = type[i];
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      j &= NEIGHMASK;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+
+      if (rsq < cutforcesq) {
+	jtype = type[j];
+	p = sqrt(rsq)*rdr + 1.0;
+	m = static_cast<int> (p);
+	m = MIN(m,nr-1);
+	p -= m;
+	p = MIN(p,1.0);
+	coeff = rhor_spline[type2rhor[jtype][itype]][m];
+	rho_t[i] += ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6];
+	if (NEWTON_PAIR || j < nlocal) {
+	  coeff = rhor_spline[type2rhor[itype][jtype]][m];
+	  rho_t[j] += ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6];
+	}
+      }
+    }
+  }
+
+  // wait until all threads are done with computation
+  sync_threads();
+
+  // communicate and sum densities
+
+  if (NEWTON_PAIR) {
+    // reduce per thread density
+    data_reduce_thr(&(rho[0]), nall, comm->nthreads, 1, tid);
+
+    // wait until reduction is complete
+    sync_threads();
+
+#if defined(_OPENMP)
+#pragma omp master
+#endif
+    { comm->reverse_comm_pair(this); }
+
+    // wait until master thread is done with communication
+    sync_threads();
+  
+  } else {
+    data_reduce_thr(&(rho[0]), nlocal, comm->nthreads, 1, tid);
+
+    // wait until reduction is complete
+    sync_threads();
+  }
+  
+  // fp = derivative of embedding energy at each atom
+  // phi = embedding energy at each atom
+
+  for (ii = iifrom; ii < iito; ii++) {
+    i = ilist[ii];
+    p = rho[i]*rdrho + 1.0;
+    m = static_cast<int> (p);
+    m = MAX(1,MIN(m,nrho-1));
+    p -= m;
+    p = MIN(p,1.0);
+    coeff = frho_spline[type2frho[type[i]]][m];
+    fp[i] = (coeff[0]*p + coeff[1])*p + coeff[2];
+    if (EFLAG) {
+      phi = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6];
+      if (eflag_global) eng_vdwl_thr[tid] += phi;
+      if (eflag_atom) eatom_thr[tid][i] += phi;
+    }
+  }
+
+  // wait until all theads are done with computation
+  sync_threads();
+
+  // communicate derivative of embedding function
+  // MPI communication only on master thread
+#if defined(_OPENMP)
+#pragma omp master
+#endif
+  { comm->forward_comm_pair(this); }
+
+  // wait until master thread is done with communication
+  sync_threads();
+
+  // compute forces on each atom
+  // loop over neighbors of my atoms
+
+  for (ii = iifrom; ii < iito; ii++) {
+    i = ilist[ii];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    itype = type[i];
+    fxtmp = fytmp = fztmp = 0.0;
+
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      j &= NEIGHMASK;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+
+      if (rsq < cutforcesq) {
+	jtype = type[j];
+	r = sqrt(rsq);
+	p = r*rdr + 1.0;
+	m = static_cast<int> (p);
+	m = MIN(m,nr-1);
+	p -= m;
+	p = MIN(p,1.0);
+
+	// rhoip = derivative of (density at atom j due to atom i)
+	// rhojp = derivative of (density at atom i due to atom j)
+	// phi = pair potential energy
+	// phip = phi'
+	// z2 = phi * r
+	// z2p = (phi * r)' = (phi' r) + phi
+	// psip needs both fp[i] and fp[j] terms since r_ij appears in two
+	//   terms of embed eng: Fi(sum rho_ij) and Fj(sum rho_ji)
+	//   hence embed' = Fi(sum rho_ij) rhojp + Fj(sum rho_ji) rhoip
+
+	coeff = rhor_spline[type2rhor[itype][jtype]][m];
+	rhoip = (coeff[0]*p + coeff[1])*p + coeff[2];
+	coeff = rhor_spline[type2rhor[jtype][itype]][m];
+	rhojp = (coeff[0]*p + coeff[1])*p + coeff[2];
+	coeff = z2r_spline[type2z2r[itype][jtype]][m];
+	z2p = (coeff[0]*p + coeff[1])*p + coeff[2];
+	z2 = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6];
+
+	recip = 1.0/r;
+	phi = z2*recip;
+	phip = z2p*recip - phi*recip;
+	psip = fp[i]*rhojp + fp[j]*rhoip + phip;
+	fpair = -psip*recip;
+
+	fxtmp += delx*fpair;
+	fytmp += dely*fpair;
+	fztmp += delz*fpair;
+	if (NEWTON_PAIR || j < nlocal) {
+	  f[j][0] -= delx*fpair;
+	  f[j][1] -= dely*fpair;
+	  f[j][2] -= delz*fpair;
+	}
+
+	if (EFLAG) evdwl = phi;
+	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
+				 evdwl,0.0,fpair,delx,dely,delz,tid);
+      }
+    }
+    f[i][0] += fxtmp;
+    f[i][1] += fytmp;
+    f[i][2] += fztmp;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairEAMOMP::memory_usage()
+{
+  double bytes = memory_usage_thr();
+  bytes += PairEAM::memory_usage();
+
+  return bytes;
+}
diff --git a/src/USER-OMP/pair_eam_omp.h b/src/USER-OMP/pair_eam_omp.h
new file mode 100644
index 000000000..1184cb34b
--- /dev/null
+++ b/src/USER-OMP/pair_eam_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(eam/omp,PairEAMOMP)
+
+#else
+
+#ifndef LMP_PAIR_EAM_OMP_H
+#define LMP_PAIR_EAM_OMP_H
+
+#include "pair_eam.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairEAMOMP : public PairEAM, public ThrOMP {
+
+ public:
+  PairEAMOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(double **f, double *rho_t, int iifrom, int iito, int tid);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_edip_omp.cpp b/src/USER-OMP/pair_edip_omp.cpp
new file mode 100644
index 000000000..65b05c814
--- /dev/null
+++ b/src/USER-OMP/pair_edip_omp.cpp
@@ -0,0 +1,485 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   This software is distributed under the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "pair_edip_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+PairEDIPOMP::PairEDIPOMP(LAMMPS *lmp) :
+  PairEDIP(lmp), ThrOMP(lmp, PAIR)
+{
+  respa_enable = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairEDIPOMP::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+    ev_setup_thr(this);
+  } else evflag = vflag_fdotr = vflag_atom = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = list->inum;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(shared)
+#endif
+  {
+    int ifrom, ito, tid;
+    double **f;
+
+    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+
+    if (evflag) {
+      if (eflag) {
+	if (vflag_atom) eval<1,1,1>(f, ifrom, ito, tid);
+	else eval<1,1,0>(f, ifrom, ito, tid);
+      } else {
+	if (vflag_atom) eval<1,0,1>(f, ifrom, ito, tid);
+	else eval<1,0,0>(f, ifrom, ito, tid);
+      }
+    } else eval<0,0,0>(f, ifrom, ito, tid);
+
+    // reduce per thread forces into global force array.
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+  } // end of omp parallel region
+
+  // reduce per thread energy and virial, if requested.
+  if (evflag) ev_reduce_thr(this);
+  if (vflag_fdotr) virial_fdotr_compute();
+}
+
+template <int EVFLAG, int EFLAG, int VFLAG_ATOM>
+void PairEDIPOMP::eval(double **f, int iifrom, int iito, int tid)
+{
+  int i,j,k,ii,inum,jnum;
+  int itype,jtype,ktype,ijparam,ikparam,ijkparam;
+  double xtmp,ytmp,ztmp,evdwl;
+  int *ilist,*jlist,*numneigh,**firstneigh;
+  register int preForceCoord_counter;
+
+  double invR_ij;
+  double invR_ik;
+  double directorCos_ij_x;
+  double directorCos_ij_y;
+  double directorCos_ij_z;
+  double directorCos_ik_x;
+  double directorCos_ik_y;
+  double directorCos_ik_z;
+  double cosTeta;
+
+  int interpolIDX;
+  double interpolTMP;
+  double interpolDeltaX;
+  double interpolY1;
+  double interpolY2;
+
+  double invRMinusCutoffA;
+  double sigmaInvRMinusCutoffA;
+  double gammInvRMinusCutoffA;
+  double cosTetaDiff;
+  double cosTetaDiffCosTetaDiff;
+  double cutoffFunction_ij;
+  double exp2B_ij;
+  double exp2BDerived_ij;
+  double pow2B_ij;
+  double pow2BDerived_ij;
+  double exp3B_ij;
+  double exp3BDerived_ij;
+  double exp3B_ik;
+  double exp3BDerived_ik;
+  double qFunction;
+  double qFunctionDerived;
+  double tauFunction;
+  double tauFunctionDerived;
+  double expMinusBetaZeta_iZeta_i;
+  double qFunctionCosTetaDiffCosTetaDiff;
+  double expMinusQFunctionCosTetaDiffCosTetaDiff;
+  double zeta_i;
+  double zeta_iDerived;
+  double zeta_iDerivedInvR_ij;
+
+  double forceModCoord_factor;
+  double forceModCoord;
+  double forceModCoord_ij;
+  double forceMod2B;
+  double forceMod3B_factor1_ij;
+  double forceMod3B_factor2_ij;
+  double forceMod3B_factor2;
+  double forceMod3B_factor1_ik;
+  double forceMod3B_factor2_ik;
+  double potentia3B_factor;
+  double potential2B_factor;
+
+  double *pre_thrInvR_ij = preInvR_ij + tid * leadDimInteractionList;
+  double *pre_thrExp3B_ij = preExp3B_ij + tid * leadDimInteractionList;
+  double *pre_thrExp3BDerived_ij = preExp3BDerived_ij + tid * leadDimInteractionList;
+  double *pre_thrExp2B_ij = preExp2B_ij + tid * leadDimInteractionList;
+  double *pre_thrExp2BDerived_ij = preExp2BDerived_ij + tid * leadDimInteractionList;
+  double *pre_thrPow2B_ij = prePow2B_ij + tid * leadDimInteractionList;
+  double *pre_thrForceCoord = preForceCoord + tid * leadDimInteractionList;
+
+  double **x = atom->x;
+  int *type = atom->type;
+  int nlocal = atom->nlocal;
+
+  inum = list->inum;
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+
+  // loop over full neighbor list of my atoms
+
+  for (ii = iifrom; ii < iito; ii++) {
+    zeta_i = 0.0;
+    int numForceCoordPairs = 0;
+
+    i = ilist[ii];
+    itype = map[type[i]];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+
+    // pre-loop to compute environment coordination f(Z)
+
+    for (int neighbor_j = 0; neighbor_j < jnum; neighbor_j++) {
+        j = jlist[neighbor_j];
+        j &= NEIGHMASK;
+
+        double dr_ij[3], r_ij;
+
+        dr_ij[0] = xtmp - x[j][0];
+        dr_ij[1] = ytmp - x[j][1];
+        dr_ij[2] = ztmp - x[j][2];
+        r_ij = dr_ij[0]*dr_ij[0] + dr_ij[1]*dr_ij[1] + dr_ij[2]*dr_ij[2];
+
+        jtype = map[type[j]];
+        ijparam = elem2param[itype][jtype][jtype];
+        if (r_ij > params[ijparam].cutsq) continue;
+
+        r_ij = sqrt(r_ij);
+
+        invR_ij = 1.0 / r_ij;
+        pre_thrInvR_ij[neighbor_j] = invR_ij;
+
+        invRMinusCutoffA =  1.0 / (r_ij - cutoffA);
+        sigmaInvRMinusCutoffA = sigma * invRMinusCutoffA;
+        gammInvRMinusCutoffA = gamm * invRMinusCutoffA;
+
+        interpolDeltaX = r_ij - GRIDSTART;
+        interpolTMP = (interpolDeltaX * GRIDDENSITY);
+        interpolIDX = (int) interpolTMP;
+
+        interpolY1 = exp3B[interpolIDX];
+        interpolY2 = exp3B[interpolIDX+1];
+        exp3B_ij = interpolY1 + (interpolY2 - interpolY1) * 
+	  (interpolTMP-interpolIDX);
+
+        exp3BDerived_ij = - exp3B_ij * gammInvRMinusCutoffA * invRMinusCutoffA;
+
+        pre_thrExp3B_ij[neighbor_j] = exp3B_ij;
+        pre_thrExp3BDerived_ij[neighbor_j] = exp3BDerived_ij;
+
+        interpolY1 = exp2B[interpolIDX];
+        interpolY2 = exp2B[interpolIDX+1];
+        exp2B_ij = interpolY1 + (interpolY2 - interpolY1) * 
+	  (interpolTMP-interpolIDX);
+
+        exp2BDerived_ij = - exp2B_ij * sigmaInvRMinusCutoffA * invRMinusCutoffA;
+
+        pre_thrExp2B_ij[neighbor_j] = exp2B_ij;
+        pre_thrExp2BDerived_ij[neighbor_j] = exp2BDerived_ij;
+
+        interpolY1 = pow2B[interpolIDX];
+        interpolY2 = pow2B[interpolIDX+1];
+        pow2B_ij = interpolY1 + (interpolY2 - interpolY1) * 
+	  (interpolTMP-interpolIDX);
+
+        pre_thrPow2B_ij[neighbor_j] = pow2B_ij;
+
+        // zeta and its derivative
+
+        if (r_ij < cutoffC) zeta_i += 1.0;
+        else {
+            interpolY1 = cutoffFunction[interpolIDX];
+            interpolY2 = cutoffFunction[interpolIDX+1];
+            cutoffFunction_ij = interpolY1 + (interpolY2 - interpolY1) * 
+	      (interpolTMP-interpolIDX);
+
+            zeta_i += cutoffFunction_ij;
+
+            interpolY1 = cutoffFunctionDerived[interpolIDX];
+            interpolY2 = cutoffFunctionDerived[interpolIDX+1];
+            zeta_iDerived = interpolY1 + (interpolY2 - interpolY1) * 
+	      (interpolTMP-interpolIDX);
+
+            zeta_iDerivedInvR_ij = zeta_iDerived * invR_ij;
+
+            preForceCoord_counter=numForceCoordPairs*5;
+            pre_thrForceCoord[preForceCoord_counter+0]=zeta_iDerivedInvR_ij;
+            pre_thrForceCoord[preForceCoord_counter+1]=dr_ij[0];
+            pre_thrForceCoord[preForceCoord_counter+2]=dr_ij[1];
+            pre_thrForceCoord[preForceCoord_counter+3]=dr_ij[2];
+            pre_thrForceCoord[preForceCoord_counter+4]=j;
+            numForceCoordPairs++;
+        }
+    }
+
+    // quantities depending on zeta_i
+
+    interpolDeltaX = zeta_i;
+    interpolTMP = (interpolDeltaX * GRIDDENSITY);
+    interpolIDX = (int) interpolTMP;
+
+    interpolY1 = expMinusBetaZeta_iZeta_iGrid[interpolIDX];
+    interpolY2 = expMinusBetaZeta_iZeta_iGrid[interpolIDX+1];
+    expMinusBetaZeta_iZeta_i = interpolY1 + (interpolY2 - interpolY1) * 
+      (interpolTMP-interpolIDX);
+
+    interpolY1 = qFunctionGrid[interpolIDX];
+    interpolY2 = qFunctionGrid[interpolIDX+1];
+    qFunction = interpolY1 + (interpolY2 - interpolY1) * 
+      (interpolTMP-interpolIDX);
+
+    interpolY1 = tauFunctionGrid[interpolIDX];
+    interpolY2 = tauFunctionGrid[interpolIDX+1];
+    tauFunction = interpolY1 + (interpolY2 - interpolY1) * 
+      (interpolTMP-interpolIDX);
+
+    interpolY1 = tauFunctionDerivedGrid[interpolIDX];
+    interpolY2 = tauFunctionDerivedGrid[interpolIDX+1];
+    tauFunctionDerived = interpolY1 + (interpolY2 - interpolY1) * 
+      (interpolTMP-interpolIDX);
+
+    qFunctionDerived = -mu * qFunction;
+
+    forceModCoord_factor = 2.0 * beta * zeta_i * expMinusBetaZeta_iZeta_i;
+
+    forceModCoord = 0.0;
+
+    // two-body interactions, skip half of them
+
+    for (int neighbor_j = 0; neighbor_j < jnum; neighbor_j++) {
+      double dr_ij[3], r_ij, f_ij[3];
+
+      j = jlist[neighbor_j];
+      j &= NEIGHMASK;
+
+      dr_ij[0] = x[j][0] - xtmp;
+      dr_ij[1] = x[j][1] - ytmp;
+      dr_ij[2] = x[j][2] - ztmp;
+      r_ij = dr_ij[0]*dr_ij[0] + dr_ij[1]*dr_ij[1] + dr_ij[2]*dr_ij[2];
+
+      jtype = map[type[j]];
+      ijparam = elem2param[itype][jtype][jtype];
+      if (r_ij > params[ijparam].cutsq) continue;
+
+      r_ij = sqrt(r_ij);
+
+      invR_ij = pre_thrInvR_ij[neighbor_j];
+      pow2B_ij = pre_thrPow2B_ij[neighbor_j];
+
+      potential2B_factor = pow2B_ij - expMinusBetaZeta_iZeta_i;
+
+      exp2B_ij = pre_thrExp2B_ij[neighbor_j];
+
+      pow2BDerived_ij = - rho * invR_ij * pow2B_ij;
+
+      forceModCoord += (forceModCoord_factor*exp2B_ij);
+
+      exp2BDerived_ij = pre_thrExp2BDerived_ij[neighbor_j];
+      forceMod2B = exp2BDerived_ij * potential2B_factor + 
+	exp2B_ij * pow2BDerived_ij;
+
+      directorCos_ij_x = invR_ij * dr_ij[0];
+      directorCos_ij_y = invR_ij * dr_ij[1];
+      directorCos_ij_z = invR_ij * dr_ij[2];
+
+      exp3B_ij = pre_thrExp3B_ij[neighbor_j];
+      exp3BDerived_ij = pre_thrExp3BDerived_ij[neighbor_j];
+
+      f_ij[0] = forceMod2B * directorCos_ij_x;
+      f_ij[1] = forceMod2B * directorCos_ij_y;
+      f_ij[2] = forceMod2B * directorCos_ij_z;
+
+      f[j][0] -= f_ij[0];
+      f[j][1] -= f_ij[1];
+      f[j][2] -= f_ij[2];
+
+      f[i][0] += f_ij[0];
+      f[i][1] += f_ij[1];
+      f[i][2] += f_ij[2];
+
+      // potential energy
+
+      evdwl = (exp2B_ij * potential2B_factor);
+
+      if (EVFLAG) ev_tally_thr(this,i, j, nlocal, /* newton_pair */ 1, evdwl, 0.0,
+			       -forceMod2B*invR_ij, dr_ij[0], dr_ij[1], dr_ij[2],tid);
+
+      // three-body Forces
+
+      for (int neighbor_k = neighbor_j + 1; neighbor_k < jnum; neighbor_k++) {
+          double dr_ik[3], r_ik, f_ik[3];
+
+          k = jlist[neighbor_k];
+          k &= NEIGHMASK;
+          ktype = map[type[k]];
+          ikparam = elem2param[itype][ktype][ktype];
+          ijkparam = elem2param[itype][jtype][ktype];
+
+          dr_ik[0] = x[k][0] - xtmp;
+          dr_ik[1] = x[k][1] - ytmp;
+          dr_ik[2] = x[k][2] - ztmp;
+          r_ik = dr_ik[0]*dr_ik[0] + dr_ik[1]*dr_ik[1] + dr_ik[2]*dr_ik[2];
+
+          if (r_ik > params[ikparam].cutsq) continue;
+
+          r_ik = sqrt(r_ik);
+
+          invR_ik = pre_thrInvR_ij[neighbor_k];
+
+          directorCos_ik_x = invR_ik * dr_ik[0];
+          directorCos_ik_y = invR_ik * dr_ik[1];
+          directorCos_ik_z = invR_ik * dr_ik[2];
+
+          cosTeta = directorCos_ij_x * directorCos_ik_x + 
+	    directorCos_ij_y * directorCos_ik_y +
+	    directorCos_ij_z * directorCos_ik_z;
+
+          cosTetaDiff = cosTeta + tauFunction;
+          cosTetaDiffCosTetaDiff = cosTetaDiff * cosTetaDiff;
+          qFunctionCosTetaDiffCosTetaDiff = cosTetaDiffCosTetaDiff * qFunction;
+          expMinusQFunctionCosTetaDiffCosTetaDiff = 
+	    exp(-qFunctionCosTetaDiffCosTetaDiff);
+
+          potentia3B_factor = lambda * 
+	    ((1.0 - expMinusQFunctionCosTetaDiffCosTetaDiff) + 
+	     eta * qFunctionCosTetaDiffCosTetaDiff);
+
+          exp3B_ik = pre_thrExp3B_ij[neighbor_k];
+          exp3BDerived_ik = pre_thrExp3BDerived_ij[neighbor_k];
+
+          forceMod3B_factor1_ij = - exp3BDerived_ij * exp3B_ik * 
+	    potentia3B_factor;
+          forceMod3B_factor2 = 2.0 * lambda * exp3B_ij * exp3B_ik * 
+	    qFunction * cosTetaDiff *
+	    (eta + expMinusQFunctionCosTetaDiffCosTetaDiff);
+          forceMod3B_factor2_ij = forceMod3B_factor2 * invR_ij;
+
+          f_ij[0] = forceMod3B_factor1_ij * directorCos_ij_x + 
+	    forceMod3B_factor2_ij *
+	    (cosTeta * directorCos_ij_x - directorCos_ik_x);
+          f_ij[1] = forceMod3B_factor1_ij * directorCos_ij_y + 
+	    forceMod3B_factor2_ij *
+	    (cosTeta * directorCos_ij_y - directorCos_ik_y);
+          f_ij[2] = forceMod3B_factor1_ij * directorCos_ij_z + 
+	    forceMod3B_factor2_ij *
+	    (cosTeta * directorCos_ij_z - directorCos_ik_z);
+
+          forceMod3B_factor1_ik = - exp3BDerived_ik * exp3B_ij * 
+	    potentia3B_factor;
+          forceMod3B_factor2_ik = forceMod3B_factor2 * invR_ik;
+
+          f_ik[0] = forceMod3B_factor1_ik * directorCos_ik_x + 
+	    forceMod3B_factor2_ik *
+	    (cosTeta * directorCos_ik_x - directorCos_ij_x);
+          f_ik[1] = forceMod3B_factor1_ik * directorCos_ik_y + 
+	    forceMod3B_factor2_ik *
+	    (cosTeta * directorCos_ik_y - directorCos_ij_y);
+          f_ik[2] = forceMod3B_factor1_ik * directorCos_ik_z + 
+	    forceMod3B_factor2_ik *
+	    (cosTeta * directorCos_ik_z - directorCos_ij_z);
+
+          forceModCoord += (forceMod3B_factor2 * 
+			    (tauFunctionDerived -  0.5 * mu * cosTetaDiff));
+
+          f[j][0] += f_ij[0];
+          f[j][1] += f_ij[1];
+          f[j][2] += f_ij[2];
+
+          f[k][0] += f_ik[0];
+          f[k][1] += f_ik[1];
+          f[k][2] += f_ik[2];
+
+          f[i][0] -= f_ij[0] + f_ik[0];
+          f[i][1] -= f_ij[1] + f_ik[1];
+          f[i][2] -= f_ij[2] + f_ik[2];
+
+          // potential energy
+
+          evdwl = (exp3B_ij * exp3B_ik * potentia3B_factor);
+
+          if (evflag) ev_tally3(i,j,k,evdwl,0.0,f_ij,f_ik,dr_ij,dr_ik);
+      }
+    }
+
+    // forces due to environment coordination f(Z)
+
+    for (int idx = 0; idx < numForceCoordPairs; idx++) {
+        double dr_ij[3], f_ij[3];
+
+        preForceCoord_counter = idx * 5;
+        zeta_iDerivedInvR_ij=pre_thrForceCoord[preForceCoord_counter+0];
+        dr_ij[0]=pre_thrForceCoord[preForceCoord_counter+1];
+        dr_ij[1]=pre_thrForceCoord[preForceCoord_counter+2];
+        dr_ij[2]=pre_thrForceCoord[preForceCoord_counter+3];
+        j = static_cast<int> (pre_thrForceCoord[preForceCoord_counter+4]);
+
+        forceModCoord_ij = forceModCoord * zeta_iDerivedInvR_ij;
+
+        f_ij[0] = forceModCoord_ij * dr_ij[0];
+        f_ij[1] = forceModCoord_ij * dr_ij[1];
+        f_ij[2] = forceModCoord_ij * dr_ij[2];
+
+        f[j][0] -= f_ij[0];
+        f[j][1] -= f_ij[1];
+        f[j][2] -= f_ij[2];
+
+        f[i][0] += f_ij[0];
+        f[i][1] += f_ij[1];
+        f[i][2] += f_ij[2];
+
+        // potential energy
+
+        evdwl = 0.0;
+        if (EVFLAG) ev_tally_thr(this,i, j, nlocal, /* newton_pair */ 1, 0.0, 0.0,
+				 forceModCoord_ij, dr_ij[0], dr_ij[1], dr_ij[2],tid);
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairEDIPOMP::memory_usage()
+{
+  double bytes = memory_usage_thr();
+  bytes += PairEDIP::memory_usage();
+
+  return bytes;
+}
diff --git a/src/USER-OMP/pair_edip_omp.h b/src/USER-OMP/pair_edip_omp.h
new file mode 100644
index 000000000..55c34db34
--- /dev/null
+++ b/src/USER-OMP/pair_edip_omp.h
@@ -0,0 +1,43 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(edip/omp,PairEDIPOMP)
+
+#else
+
+#ifndef LMP_PAIR_EDIP_OMP_H
+#define LMP_PAIR_EDIP_OMP_H
+
+#include "pair_edip.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairEDIPOMP : public PairEDIP, public ThrOMP {
+
+ public:
+  PairEDIPOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int VFLAG_ATOM>
+  void eval(double **f, int ifrom, int ito, int tid);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_eim_omp.cpp b/src/USER-OMP/pair_eim_omp.cpp
new file mode 100644
index 000000000..d31ad2012
--- /dev/null
+++ b/src/USER-OMP/pair_eim_omp.cpp
@@ -0,0 +1,365 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   This software is distributed under the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "string.h"
+
+#include "pair_eim_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "memory.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+PairEIMOMP::PairEIMOMP(LAMMPS *lmp) :
+  PairEIM(lmp), ThrOMP(lmp, PAIR)
+{
+  respa_enable = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairEIMOMP::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+    ev_setup_thr(this);
+  } else evflag = vflag_fdotr = eflag_global = eflag_atom = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = list->inum;
+
+  // grow energy and fp arrays if necessary
+  // need to be atom->nmax in length
+
+  if (atom->nmax > nmax) {
+    memory->destroy(rho);
+    memory->destroy(fp);
+    nmax = atom->nmax;
+    memory->create(rho,nthreads*nmax,"pair:rho");
+    memory->create(fp,nthreads*nmax,"pair:fp");
+  }
+
+#if defined(_OPENMP)
+#pragma omp parallel default(shared)
+#endif
+  {
+    int ifrom, ito, tid;
+    double **f, *rho_t, *fp_t;
+
+    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    if (force->newton_pair) {
+      rho_t = rho + tid*nall;
+      fp_t = fp + tid*nall;
+    } else {
+      rho_t = rho + tid*atom->nlocal;
+      fp_t = fp + tid*atom->nlocal;
+    }
+    
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_pair) eval<1,1,1>(f, rho_t, fp_t, ifrom, ito, tid);
+	else eval<1,1,0>(f, rho_t, fp_t, ifrom, ito, tid);
+      } else {
+	if (force->newton_pair) eval<1,0,1>(f, rho_t, fp_t, ifrom, ito, tid);
+	else eval<1,0,0>(f, rho_t, fp_t, ifrom, ito, tid);
+      }
+    } else {
+      if (force->newton_pair) eval<0,0,1>(f, rho_t, fp_t, ifrom, ito, tid);
+      else eval<0,0,0>(f, rho_t, fp_t, ifrom, ito, tid);
+    }
+
+    // reduce per thread forces into global force array.
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+  } // end of omp parallel region
+
+  // reduce per thread energy and virial, if requested.
+  if (evflag) ev_reduce_thr(this);
+  if (vflag_fdotr) virial_fdotr_compute();
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+void PairEIMOMP::eval(double **f, double *rho_t, double *fp_t,
+		      int iifrom, int iito, int tid)
+{
+  int i,j,ii,jj,m,jnum,itype,jtype;
+  double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
+  double rsq,r,p,rhoip,rhojp,phip,phi,coul,coulp,recip,psip;
+  double *coeff;
+  int *ilist,*jlist,*numneigh,**firstneigh;
+
+  evdwl = 0.0;
+
+  double **x = atom->x;
+  int *type = atom->type;
+  int nlocal = atom->nlocal;
+  int nall = nlocal + atom->nghost;
+
+  double fxtmp,fytmp,fztmp;
+
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+
+  // zero out density and fp
+
+  if (NEWTON_PAIR) {
+    memset(rho_t, 0, nall*sizeof(double));
+    memset(fp_t, 0, nall*sizeof(double));
+  } else {
+    memset(rho_t, 0, nlocal*sizeof(double));
+    memset(fp_t, 0, nlocal*sizeof(double));
+  }
+
+  // rho = density at each atom
+  // loop over neighbors of my atoms
+
+  for (ii = iifrom; ii < iito; ii++) {
+    i = ilist[ii];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    itype = type[i];
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      j &= NEIGHMASK;
+      jtype = type[j];
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+
+      if (rsq < cutforcesq[itype][jtype]) {
+	p = sqrt(rsq)*rdr + 1.0;
+	m = static_cast<int> (p);
+	m = MIN(m,nr-1);
+	p -= m;
+	p = MIN(p,1.0);
+	coeff = Fij_spline[type2Fij[itype][jtype]][m];
+	rho_t[i] += ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6];
+	if (NEWTON_PAIR || j < nlocal) {
+	  coeff = Fij_spline[type2Fij[jtype][itype]][m];
+	  rho_t[j] += ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6];
+	}
+      }
+    }
+  }
+
+  // wait until all threads are done with computation
+  sync_threads();
+
+  // communicate and sum densities
+  if (NEWTON_PAIR) {
+    // reduce per thread density
+    data_reduce_thr(&(rho[0]), nall, comm->nthreads, 1, tid);
+
+    // wait until reduction is complete
+    sync_threads();
+
+#if defined(_OPENMP)
+#pragma omp master
+#endif
+    { 
+      rhofp = 1;
+      comm->reverse_comm_pair(this); 
+    }
+
+  } else {
+    data_reduce_thr(&(rho[0]), nlocal, comm->nthreads, 1, tid);
+
+    // wait until reduction is complete
+    sync_threads();
+  }
+
+#if defined(_OPENMP)
+#pragma omp master
+#endif
+  { 
+    rhofp = 1;
+    comm->forward_comm_pair(this); 
+  }
+
+  // wait until master is finished communicating
+  sync_threads();
+ 
+  for (ii = iifrom; ii < iito; ii++) {
+    i = ilist[ii];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    itype = type[i];
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+ 
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      j &= NEIGHMASK;
+      jtype = type[j];
+ 
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+ 
+      if (rsq < cutforcesq[itype][jtype]) {
+        p = sqrt(rsq)*rdr + 1.0;
+        m = static_cast<int> (p);
+        m = MIN(m,nr-1);
+        p -= m;
+        p = MIN(p,1.0);
+        coeff = Gij_spline[type2Gij[itype][jtype]][m];
+        fp_t[i] += rho[j]*(((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6]);
+        if (NEWTON_PAIR || j < nlocal) {
+          fp_t[j] += rho[i]*(((coeff[3]*p + coeff[4])*p + coeff[5])*p + 
+			   coeff[6]);
+        }
+      }
+    }
+  }
+
+  // wait until all threads are done with computation
+  sync_threads();
+
+  // communicate and sum modified densities
+  if (NEWTON_PAIR) {
+    // reduce per thread density
+    data_reduce_thr(&(fp[0]), nall, comm->nthreads, 1, tid);
+
+    // wait until reduction is complete
+    sync_threads();
+
+#if defined(_OPENMP)
+#pragma omp master
+#endif
+    { 
+      rhofp = 2;
+      comm->reverse_comm_pair(this); 
+    }
+
+  } else {
+    data_reduce_thr(&(fp[0]), nlocal, comm->nthreads, 1, tid);
+
+    // wait until reduction is complete
+    sync_threads();
+  }
+
+#if defined(_OPENMP)
+#pragma omp master
+#endif
+  { 
+    rhofp = 2;
+    comm->forward_comm_pair(this); 
+  }
+
+  // wait until master is finished communicating
+  sync_threads();
+
+  for (ii = iifrom; ii < iito; ii++) {
+    i = ilist[ii];
+    itype = type[i];
+    if (EFLAG) {
+      phi = 0.5*rho[i]*fp[i];
+      if (eflag_global) eng_vdwl_thr[tid] += phi;
+      if (eflag_atom) eatom_thr[tid][i] += phi;
+    }
+  }
+
+  // compute forces on each atom
+  // loop over neighbors of my atoms
+
+  for (ii = iifrom; ii < iito; ii++) {
+    i = ilist[ii];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    itype = type[i];
+    fxtmp = fytmp = fztmp = 0.0;
+
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      j &= NEIGHMASK;
+      jtype = type[j];
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+
+      if (rsq < cutforcesq[itype][jtype]) {
+	r = sqrt(rsq);
+	p = r*rdr + 1.0;
+	m = static_cast<int> (p);
+	m = MIN(m,nr-1);
+	p -= m;
+	p = MIN(p,1.0);
+
+        // rhoip = derivative of (density at atom j due to atom i)
+        // rhojp = derivative of (density at atom i due to atom j)
+        // phi = pair potential energy
+        // phip = phi'
+
+        coeff = Fij_spline[type2Fij[jtype][itype]][m];
+        rhoip = (coeff[0]*p + coeff[1])*p + coeff[2];
+        coeff = Fij_spline[type2Fij[itype][jtype]][m];
+        rhojp = (coeff[0]*p + coeff[1])*p + coeff[2];
+        coeff = phiij_spline[type2phiij[itype][jtype]][m];
+        phip = (coeff[0]*p + coeff[1])*p + coeff[2];
+        phi = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6];
+        coeff = Gij_spline[type2Gij[itype][jtype]][m];
+        coul = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6];
+        coulp = (coeff[0]*p + coeff[1])*p + coeff[2];
+        psip = phip + (rho[i]*rho[j]-q0[itype]*q0[jtype])*coulp +
+               fp[i]*rhojp + fp[j]*rhoip;
+        recip = 1.0/r;
+        fpair = -psip*recip;
+	fxtmp += delx*fpair;
+	fytmp += dely*fpair;
+	fztmp += delz*fpair;
+	if (NEWTON_PAIR || j < nlocal) {
+	  f[j][0] -= delx*fpair;
+	  f[j][1] -= dely*fpair;
+	  f[j][2] -= delz*fpair;
+	}
+
+	if (EFLAG) evdwl = phi-q0[itype]*q0[jtype]*coul;
+	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
+				 evdwl,0.0,fpair,delx,dely,delz,tid);
+      }
+    }
+    f[i][0] += fxtmp;
+    f[i][1] += fytmp;
+    f[i][2] += fztmp;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairEIMOMP::memory_usage()
+{
+  double bytes = memory_usage_thr();
+  bytes += PairEIM::memory_usage();
+
+  return bytes;
+}
diff --git a/src/USER-OMP/pair_eim_omp.h b/src/USER-OMP/pair_eim_omp.h
new file mode 100644
index 000000000..3693492e0
--- /dev/null
+++ b/src/USER-OMP/pair_eim_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(eim/omp,PairEIMOMP)
+
+#else
+
+#ifndef LMP_PAIR_EIM_OMP_H
+#define LMP_PAIR_EIM_OMP_H
+
+#include "pair_eim.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairEIMOMP : public PairEIM, public ThrOMP {
+
+ public:
+  PairEIMOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(double **f, double *rho_t, double *fp_t, int iifrom, int iito, int tid);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_gauss_omp.cpp
similarity index 80%
copy from src/USER-OMP/pair_lj_cut_omp.cpp
copy to src/USER-OMP/pair_gauss_omp.cpp
index 8ed82c5e5..e8b255d0b 100644
--- a/src/USER-OMP/pair_lj_cut_omp.cpp
+++ b/src/USER-OMP/pair_gauss_omp.cpp
@@ -1,163 +1,170 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    This software is distributed under the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Axel Kohlmeyer (Temple U)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
-#include "pair_lj_cut_omp.h"
+#include "pair_gauss_omp.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
-#include "memory.h"
-#include "error.h"
 
 using namespace LAMMPS_NS;
 
+#define EPSILON 1.0e-10
 /* ---------------------------------------------------------------------- */
 
-PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) :
-  PairLJCut(lmp), ThrOMP(lmp, PAIR)
+PairGaussOMP::PairGaussOMP(LAMMPS *lmp) :
+  PairGauss(lmp), ThrOMP(lmp, PAIR)
 {
   respa_enable = 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
-void PairLJCutOMP::compute(int eflag, int vflag)
+void PairGaussOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
     ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
   const int nthreads = comm->nthreads;
   const int inum = list->inum;
 
 #if defined(_OPENMP)
 #pragma omp parallel default(shared)
 #endif
   {
     int ifrom, ito, tid;
     double **f;
 
     f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
 
     if (evflag) {
       if (eflag) {
 	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
 	else eval<1,1,0>(f, ifrom, ito, tid);
       } else {
 	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
 	else eval<1,0,0>(f, ifrom, ito, tid);
       }
     } else {
       if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
       else eval<0,0,0>(f, ifrom, ito, tid);
     }
 
     // reduce per thread forces into global force array.
-    force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid);
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
   } // end of omp parallel region
 
   // reduce per thread energy and virial, if requested.
   if (evflag) ev_reduce_thr(this);
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairGaussOMP::eval(double **f, int iifrom, int iito, int tid)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
-  double rsq,r2inv,r6inv,forcelj,factor_lj;
+  double r,rsq,r2inv,forcelj,factor_lj;
   int *ilist,*jlist,*numneigh,**firstneigh;
+  int occ = 0;
 
   evdwl = 0.0;
 
   double **x = atom->x;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_lj = force->special_lj;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = iifrom; ii < iito; ++ii) {
 
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
     fxtmp=fytmp=fztmp=0.0;
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
+      // define a Gaussian well to be occupied if
+      // the site it interacts with is within the force maximum    
+    
+      if (EFLAG)
+	if (eflag_global && rsq < 0.5/b[itype][jtype]) occ++;
+
       if (rsq < cutsq[itype][jtype]) {
 	r2inv = 1.0/rsq;
-	r6inv = r2inv*r2inv*r2inv;
-	forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
+	r = sqrt(rsq);
+	forcelj = - 2.0*a[itype][jtype]*b[itype][jtype] * rsq * 
+	  exp(-b[itype][jtype]*rsq); 
 	fpair = factor_lj*forcelj*r2inv;
 
 	fxtmp += delx*fpair;
 	fytmp += dely*fpair;
 	fztmp += delz*fpair;
 	if (NEWTON_PAIR || j < nlocal) {
 	  f[j][0] -= delx*fpair;
 	  f[j][1] -= dely*fpair;
 	  f[j][2] -= delz*fpair;
 	}
 
 	if (EFLAG) {
-	  evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype])
-	    - offset[itype][jtype];
+	  evdwl = -(a[itype][jtype]*exp(-b[itype][jtype]*rsq) -
+		    offset[itype][jtype]);
 	  evdwl *= factor_lj;
 	}
 
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
 				 evdwl,0.0,fpair,delx,dely,delz,tid);
       }
     }
     f[i][0] += fxtmp;
     f[i][1] += fytmp;
     f[i][2] += fztmp;
   }
+  if (eflag_global) pvector[0] = occ;
 }
 
 /* ---------------------------------------------------------------------- */
 
-double PairLJCutOMP::memory_usage()
+double PairGaussOMP::memory_usage()
 {
   double bytes = memory_usage_thr();
-  bytes += PairLJCut::memory_usage();
+  bytes += PairGauss::memory_usage();
 
   return bytes;
 }
diff --git a/src/USER-OMP/pair_gauss_omp.h b/src/USER-OMP/pair_gauss_omp.h
new file mode 100644
index 000000000..7f8fc9a85
--- /dev/null
+++ b/src/USER-OMP/pair_gauss_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(gauss/omp,PairGaussOMP)
+
+#else
+
+#ifndef LMP_PAIR_GAUSS_OMP_H
+#define LMP_PAIR_GAUSS_OMP_H
+
+#include "pair_gauss.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairGaussOMP : public PairGauss, public ThrOMP {
+
+ public:
+  PairGaussOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(double **f, int ifrom, int ito, int tid);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_gayberne_omp.cpp b/src/USER-OMP/pair_gayberne_omp.cpp
new file mode 100644
index 000000000..ff115e8ef
--- /dev/null
+++ b/src/USER-OMP/pair_gayberne_omp.cpp
@@ -0,0 +1,227 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   This software is distributed under the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "pair_gayberne_omp.h"
+#include "math_extra.h"
+#include "atom.h"
+#include "comm.h"
+#include "atom_vec_ellipsoid.h"
+#include "force.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+PairGayBerneOMP::PairGayBerneOMP(LAMMPS *lmp) :
+  PairGayBerne(lmp), ThrOMP(lmp, PAIR)
+{
+  respa_enable = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairGayBerneOMP::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+    ev_setup_thr(this);
+  } else evflag = vflag_fdotr = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = list->inum;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(shared)
+#endif
+  {
+    int ifrom, ito, tid;
+    double **f, **torque;
+
+    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    torque = atom->torque + tid*nall;
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_pair) eval<1,1,1>(f, torque, ifrom, ito, tid);
+	else eval<1,1,0>(f, torque, ifrom, ito, tid);
+      } else {
+	if (force->newton_pair) eval<1,0,1>(f, torque, ifrom, ito, tid);
+	else eval<1,0,0>(f, torque, ifrom, ito, tid);
+      }
+    } else {
+      if (force->newton_pair) eval<0,0,1>(f, torque, ifrom, ito, tid);
+      else eval<0,0,0>(f, torque, ifrom, ito, tid);
+    }
+
+    // reduce per thread forces and torques into global arrays.
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    data_reduce_thr(&(atom->torque[0][0]), nall, nthreads, 3, tid);
+  } // end of omp parallel region
+
+  // reduce per thread energy and virial, if requested.
+  if (evflag) ev_reduce_thr(this);
+  if (vflag_fdotr) virial_fdotr_compute();
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+void PairGayBerneOMP::eval(double **f, double **tor, int iifrom, int iito, int tid)
+{
+  int i,j,ii,jj,jnum,itype,jtype;
+  double evdwl,one_eng,rsq,r2inv,r6inv,forcelj,factor_lj;
+  double fforce[3],ttor[3],rtor[3],r12[3];
+  double a1[3][3],b1[3][3],g1[3][3],a2[3][3],b2[3][3],g2[3][3],temp[3][3];
+  int *ilist,*jlist,*numneigh,**firstneigh;
+  double *iquat,*jquat;
+
+  double **x = atom->x;
+  int *ellipsoid = atom->ellipsoid;
+  int *type = atom->type;
+  int nlocal = atom->nlocal;
+  double *special_lj = force->special_lj;
+
+  AtomVecEllipsoid::Bonus *bonus = avec->bonus;
+
+  double fxtmp,fytmp,fztmp,t1tmp,t2tmp,t3tmp;
+
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+
+  // loop over neighbors of my atoms
+
+  for (ii = iifrom; ii < iito; ++ii) {
+
+    i = ilist[ii];
+    itype = type[i];
+
+    if (form[itype][itype] == ELLIPSE_ELLIPSE) {
+      iquat = bonus[ellipsoid[i]].quat;
+      MathExtra::quat_to_mat_trans(iquat,a1);
+      MathExtra::diag_times3(well[itype],a1,temp);
+      MathExtra::transpose_times3(a1,temp,b1);
+      MathExtra::diag_times3(shape2[itype],a1,temp);
+      MathExtra::transpose_times3(a1,temp,g1);
+    }
+
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      factor_lj = special_lj[sbmask(j)];
+      j &= NEIGHMASK;
+
+      // r12 = center to center vector
+
+      r12[0] = x[j][0]-x[i][0];
+      r12[1] = x[j][1]-x[i][1];
+      r12[2] = x[j][2]-x[i][2];
+      rsq = MathExtra::dot3(r12,r12);
+      jtype = type[j];
+
+      if (rsq < cutsq[itype][jtype]) {
+
+	switch (form[itype][jtype]) {
+	case SPHERE_SPHERE:
+	  r2inv = 1.0/rsq;
+	  r6inv = r2inv*r2inv*r2inv;
+	  forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
+	  forcelj *= -r2inv;
+	  if (EFLAG) 
+	    one_eng = r6inv*(r6inv*lj3[itype][jtype]-lj4[itype][jtype]) -
+	      offset[itype][jtype];
+	  fforce[0] = r12[0]*forcelj;
+	  fforce[1] = r12[1]*forcelj;
+	  fforce[2] = r12[2]*forcelj;
+	  ttor[0] = ttor[1] = ttor[2] = 0.0;
+	  rtor[0] = rtor[1] = rtor[2] = 0.0;
+	  break;
+
+        case SPHERE_ELLIPSE:
+	  jquat = bonus[ellipsoid[j]].quat;
+	  MathExtra::quat_to_mat_trans(jquat,a2);
+	  MathExtra::diag_times3(well[jtype],a2,temp);
+	  MathExtra::transpose_times3(a2,temp,b2);
+	  MathExtra::diag_times3(shape2[jtype],a2,temp);
+	  MathExtra::transpose_times3(a2,temp,g2);
+	  one_eng = gayberne_lj(j,i,a2,b2,g2,r12,rsq,fforce,rtor);
+	  ttor[0] = ttor[1] = ttor[2] = 0.0;
+	  break;
+
+        case ELLIPSE_SPHERE:
+	  one_eng = gayberne_lj(i,j,a1,b1,g1,r12,rsq,fforce,ttor);
+	  rtor[0] = rtor[1] = rtor[2] = 0.0;
+	  break;
+
+	default:
+	  jquat = bonus[ellipsoid[j]].quat;
+	  MathExtra::quat_to_mat_trans(jquat,a2);
+	  MathExtra::diag_times3(well[jtype],a2,temp);
+	  MathExtra::transpose_times3(a2,temp,b2);
+	  MathExtra::diag_times3(shape2[jtype],a2,temp);
+	  MathExtra::transpose_times3(a2,temp,g2);
+	  one_eng = gayberne_analytic(i,j,a1,a2,b1,b2,g1,g2,r12,rsq,
+				      fforce,ttor,rtor);
+	  break;
+	}
+
+        fforce[0] *= factor_lj;
+	fforce[1] *= factor_lj;
+	fforce[2] *= factor_lj;
+        ttor[0] *= factor_lj;
+	ttor[1] *= factor_lj;
+	ttor[2] *= factor_lj;
+
+        f[i][0] += fforce[0];
+	f[i][1] += fforce[1];
+	f[i][2] += fforce[2];
+        tor[i][0] += ttor[0];
+	tor[i][1] += ttor[1];
+	tor[i][2] += ttor[2];
+
+        if (NEWTON_PAIR || j < nlocal) {
+          rtor[0] *= factor_lj;
+	  rtor[1] *= factor_lj;
+	  rtor[2] *= factor_lj;
+          f[j][0] -= fforce[0];
+	  f[j][1] -= fforce[1];
+	  f[j][2] -= fforce[2];
+          tor[j][0] += rtor[0];
+	  tor[j][1] += rtor[1];
+	  tor[j][2] += rtor[2];
+        }
+
+        if (EFLAG) evdwl = factor_lj*one_eng;
+
+	if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,NEWTON_PAIR,
+				     evdwl,0.0,fforce[0],fforce[1],fforce[2],
+				     -r12[0],-r12[1],-r12[2],tid);
+      }
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairGayBerneOMP::memory_usage()
+{
+  double bytes = memory_usage_thr();
+  bytes += PairGayBerne::memory_usage();
+
+  return bytes;
+}
diff --git a/src/USER-OMP/pair_gayberne_omp.h b/src/USER-OMP/pair_gayberne_omp.h
new file mode 100644
index 000000000..737b4ec67
--- /dev/null
+++ b/src/USER-OMP/pair_gayberne_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(gayberne/omp,PairGayBerneOMP)
+
+#else
+
+#ifndef LMP_PAIR_GAYBERNE_OMP_H
+#define LMP_PAIR_GAYBERNE_OMP_H
+
+#include "pair_gayberne.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairGayBerneOMP : public PairGayBerne, public ThrOMP {
+
+ public:
+  PairGayBerneOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(double **f, double **torque, int ifrom, int ito, int tid);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_gran_hertz_history_omp.cpp b/src/USER-OMP/pair_gran_hertz_history_omp.cpp
new file mode 100644
index 000000000..1866833af
--- /dev/null
+++ b/src/USER-OMP/pair_gran_hertz_history_omp.cpp
@@ -0,0 +1,298 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   This software is distributed under the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "pair_gran_hertz_history_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+#include "update.h"
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+PairGranHertzHistoryOMP::PairGranHertzHistoryOMP(LAMMPS *lmp) :
+  PairGranHertzHistory(lmp), ThrOMP(lmp, PAIR)
+{
+  respa_enable = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairGranHertzHistoryOMP::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+    ev_setup_thr(this);
+  } else evflag = vflag_fdotr = 0;
+
+  const int shearupdate = (update->ntimestep > laststep) ? 1 : 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = list->inum;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(shared)
+#endif
+  {
+    int ifrom, ito, tid;
+    double **f, **torque;
+
+    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    torque = atom->torque + tid*nall;
+
+    if (evflag)
+      if (shearupdate) eval<1,1>(f, torque, ifrom, ito, tid);
+      else eval<1,0>(f, torque, ifrom, ito, tid);
+    else 
+      if (shearupdate) eval<0,1>(f, torque, ifrom, ito, tid);
+      else eval<0,0>(f, torque, ifrom, ito, tid);
+
+    // reduce per thread forces and torque into global arrays.
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    data_reduce_thr(&(atom->torque[0][0]), nall, nthreads, 3, tid);
+  } // end of omp parallel region
+
+  // reduce per thread energy and virial, if requested.
+  if (evflag) ev_reduce_thr(this);
+
+  laststep = update->ntimestep;
+}
+
+template <int EVFLAG, int SHEARUPDATE>
+void PairGranHertzHistoryOMP::eval(double **f, double **torque, int iifrom, int iito, int tid)
+{
+  int i,j,ii,jj,jnum,itype,jtype;
+  double xtmp,ytmp,ztmp,delx,dely,delz,fx,fy,fz;
+  double radi,radj,radsum,rsq,r,rinv,rsqinv;
+  double vr1,vr2,vr3,vnnr,vn1,vn2,vn3,vt1,vt2,vt3;
+  double wr1,wr2,wr3;
+  double vtr1,vtr2,vtr3,vrel;
+  double meff,damp,ccel,tor1,tor2,tor3;
+  double fn,fs,fs1,fs2,fs3;
+  double shrmag,rsht,polyhertz;
+  int *ilist,*jlist,*numneigh,**firstneigh;
+  int *touch,**firsttouch;
+  double *shear,*allshear,**firstshear;
+
+  double **x = atom->x;
+  double **v = atom->v;
+  double **omega = atom->omega;
+  double *radius = atom->radius;
+  double *rmass = atom->rmass;
+  double *mass = atom->mass;
+  int *type = atom->type;
+  int *mask = atom->mask;
+  int nlocal = atom->nlocal;
+  double fxtmp,fytmp,fztmp;
+  double t1tmp,t2tmp,t3tmp;
+
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+  firsttouch = list->listgranhistory->firstneigh;
+  firstshear = list->listgranhistory->firstdouble;
+
+  // loop over neighbors of my atoms
+
+  for (ii = iifrom; ii < iito; ++ii) {
+
+    i = ilist[ii];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    radi = radius[i];
+    touch = firsttouch[i];
+    allshear = firstshear[i];
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+    fxtmp=fytmp=fztmp=t1tmp=t2tmp=t3tmp=0.0;
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      j &= NEIGHMASK;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+      radj = radius[j];
+      radsum = radi + radj;
+
+      if (rsq >= radsum*radsum) {
+
+	// unset non-touching neighbors
+
+        touch[jj] = 0;
+	shear = &allshear[3*jj];
+        shear[0] = 0.0;
+        shear[1] = 0.0;
+        shear[2] = 0.0;
+
+      } else {
+	r = sqrt(rsq);
+	rinv = 1.0/r;
+	rsqinv = 1.0/rsq;
+
+	// relative translational velocity
+
+	vr1 = v[i][0] - v[j][0];
+	vr2 = v[i][1] - v[j][1];
+	vr3 = v[i][2] - v[j][2];
+
+	// normal component
+
+	vnnr = vr1*delx + vr2*dely + vr3*delz;
+	vn1 = delx*vnnr * rsqinv;
+	vn2 = dely*vnnr * rsqinv;
+	vn3 = delz*vnnr * rsqinv;
+
+	// tangential component
+
+	vt1 = vr1 - vn1;
+	vt2 = vr2 - vn2;
+	vt3 = vr3 - vn3;
+
+	// relative rotational velocity
+
+	wr1 = (radi*omega[i][0] + radj*omega[j][0]) * rinv;
+	wr2 = (radi*omega[i][1] + radj*omega[j][1]) * rinv;
+	wr3 = (radi*omega[i][2] + radj*omega[j][2]) * rinv;
+
+	// normal force = Hertzian contact + normal velocity damping
+
+	if (rmass) {
+	  meff = rmass[i]*rmass[j] / (rmass[i]+rmass[j]);
+	  if (mask[i] & freeze_group_bit) meff = rmass[j];
+	  if (mask[j] & freeze_group_bit) meff = rmass[i];
+	} else {
+	  itype = type[i];
+	  jtype = type[j];
+	  meff = mass[itype]*mass[jtype] / (mass[itype]+mass[jtype]);
+	  if (mask[i] & freeze_group_bit) meff = mass[jtype];
+	  if (mask[j] & freeze_group_bit) meff = mass[itype];
+	}
+
+	damp = meff*gamman*vnnr*rsqinv;
+	ccel = kn*(radsum-r)*rinv - damp;
+	polyhertz = sqrt((radsum-r)*radi*radj / radsum);
+	ccel *= polyhertz;
+
+	// relative velocities
+
+	vtr1 = vt1 - (delz*wr2-dely*wr3);
+	vtr2 = vt2 - (delx*wr3-delz*wr1);
+	vtr3 = vt3 - (dely*wr1-delx*wr2);
+	vrel = vtr1*vtr1 + vtr2*vtr2 + vtr3*vtr3;
+	vrel = sqrt(vrel);
+
+	// shear history effects
+
+	touch[jj] = 1;
+	shear = &allshear[3*jj];
+
+	if (SHEARUPDATE) {
+	  shear[0] += vtr1*dt;
+	  shear[1] += vtr2*dt;
+	  shear[2] += vtr3*dt;
+	}
+        shrmag = sqrt(shear[0]*shear[0] + shear[1]*shear[1] +
+		      shear[2]*shear[2]);
+
+	// rotate shear displacements
+
+	rsht = shear[0]*delx + shear[1]*dely + shear[2]*delz;
+	rsht *= rsqinv;
+	if (SHEARUPDATE) {
+	  shear[0] -= rsht*delx;
+	  shear[1] -= rsht*dely;
+	  shear[2] -= rsht*delz;
+	}
+
+	// tangential forces = shear + tangential velocity damping
+
+        fs1 = -polyhertz * (kt*shear[0] + meff*gammat*vtr1);
+        fs2 = -polyhertz * (kt*shear[1] + meff*gammat*vtr2);
+        fs3 = -polyhertz * (kt*shear[2] + meff*gammat*vtr3);
+
+	// rescale frictional displacements and forces if needed
+
+	fs = sqrt(fs1*fs1 + fs2*fs2 + fs3*fs3);
+	fn = xmu * fabs(ccel*r);
+
+	if (fs > fn) {
+	  if (shrmag != 0.0) {
+	    const double fnfs = fn/fs;
+	    const double mgkt = meff*gammat/kt;
+	    shear[0] = fnfs * (shear[0] + mgkt*vtr1) - mgkt*vtr1;
+	    shear[1] = fnfs * (shear[1] + mgkt*vtr2) - mgkt*vtr2;
+	    shear[2] = fnfs * (shear[2] + mgkt*vtr3) - mgkt*vtr3;
+	    fs1 *= fnfs;
+	    fs2 *= fnfs;
+	    fs3 *= fnfs;
+	  } else fs1 = fs2 = fs3 = 0.0;
+	}
+
+	// forces & torques
+
+	fx = delx*ccel + fs1;
+	fy = dely*ccel + fs2;
+	fz = delz*ccel + fs3;
+	fxtmp  += fx;
+	fytmp  += fy;
+	fztmp  += fz;
+
+	tor1 = rinv * (dely*fs3 - delz*fs2);
+	tor2 = rinv * (delz*fs1 - delx*fs3);
+	tor3 = rinv * (delx*fs2 - dely*fs1);
+	t1tmp -= radi*tor1;
+	t2tmp -= radi*tor2;
+	t3tmp -= radi*tor3;
+
+	if (j < nlocal) {
+	  f[j][0] -= fx;
+	  f[j][1] -= fy;
+	  f[j][2] -= fz;
+	  torque[j][0] -= radj*tor1;
+	  torque[j][1] -= radj*tor2;
+	  torque[j][2] -= radj*tor3;
+	}
+
+	if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,/* newton_pair */ 0,
+				     0.0,0.0,fx,fy,fz,delx,dely,delz,tid);
+
+      }
+    }
+    f[i][0] += fxtmp;
+    f[i][1] += fytmp;
+    f[i][2] += fztmp;
+    torque[i][0] += t1tmp;
+    torque[i][1] += t2tmp;
+    torque[i][2] += t3tmp;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairGranHertzHistoryOMP::memory_usage()
+{
+  double bytes = memory_usage_thr();
+  bytes += PairGranHertzHistory::memory_usage();
+
+  return bytes;
+}
diff --git a/src/USER-OMP/pair_gran_hertz_history_omp.h b/src/USER-OMP/pair_gran_hertz_history_omp.h
new file mode 100644
index 000000000..66d7bc0fa
--- /dev/null
+++ b/src/USER-OMP/pair_gran_hertz_history_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(gran/hertz/history/omp,PairGranHertzHistoryOMP)
+
+#else
+
+#ifndef LMP_PAIR_GRAN_HERTZ_HISTORY_OMP_H
+#define LMP_PAIR_GRAN_HERTZ_HISTORY_OMP_H
+
+#include "pair_gran_hertz_history.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairGranHertzHistoryOMP : public PairGranHertzHistory, public ThrOMP {
+
+ public:
+  PairGranHertzHistoryOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int SHEARUPDATE>
+  void eval(double **f, double **torque, int ifrom, int ito, int tid);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_gran_hooke_history_omp.cpp b/src/USER-OMP/pair_gran_hooke_history_omp.cpp
new file mode 100644
index 000000000..ad0537b51
--- /dev/null
+++ b/src/USER-OMP/pair_gran_hooke_history_omp.cpp
@@ -0,0 +1,301 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   This software is distributed under the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "pair_gran_hooke_history_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+#include "update.h"
+
+#include "string.h"
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+PairGranHookeHistoryOMP::PairGranHookeHistoryOMP(LAMMPS *lmp) :
+  PairGranHookeHistory(lmp), ThrOMP(lmp, PAIR)
+{
+  respa_enable = 0;
+  // trigger use of OpenMP version of FixShearHistory
+  suffix = new char[4];
+  memcpy(suffix,"omp",4);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairGranHookeHistoryOMP::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+    ev_setup_thr(this);
+  } else evflag = vflag_fdotr = 0;
+
+  const int shearupdate = (update->ntimestep > laststep) ? 1 : 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = list->inum;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(shared)
+#endif
+  {
+    int ifrom, ito, tid;
+    double **f, **torque;
+
+    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    torque = atom->torque + tid*nall;
+
+    if (evflag)
+      if (shearupdate) eval<1,1>(f, torque, ifrom, ito, tid);
+      else eval<1,0>(f, torque, ifrom, ito, tid);
+    else 
+      if (shearupdate) eval<0,1>(f, torque, ifrom, ito, tid);
+      else eval<0,0>(f, torque, ifrom, ito, tid);
+
+    // reduce per thread forces and torque into global arrays.
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    data_reduce_thr(&(atom->torque[0][0]), nall, nthreads, 3, tid);
+  } // end of omp parallel region
+
+  // reduce per thread energy and virial, if requested.
+  if (evflag) ev_reduce_thr(this);
+
+  laststep = update->ntimestep;
+}
+
+template <int EVFLAG, int SHEARUPDATE>
+void PairGranHookeHistoryOMP::eval(double **f, double **torque, int iifrom, int iito, int tid)
+{
+  int i,j,ii,jj,jnum,itype,jtype;
+  double xtmp,ytmp,ztmp,delx,dely,delz,fx,fy,fz;
+  double radi,radj,radsum,rsq,r,rinv,rsqinv;
+  double vr1,vr2,vr3,vnnr,vn1,vn2,vn3,vt1,vt2,vt3;
+  double wr1,wr2,wr3;
+  double vtr1,vtr2,vtr3,vrel;
+  double meff,damp,ccel,tor1,tor2,tor3;
+  double fn,fs,fs1,fs2,fs3;
+  double shrmag,rsht;
+  int *ilist,*jlist,*numneigh,**firstneigh;
+  int *touch,**firsttouch;
+  double *shear,*allshear,**firstshear;
+
+  double **x = atom->x;
+  double **v = atom->v;
+  double **omega = atom->omega;
+  double *radius = atom->radius;
+  double *rmass = atom->rmass;
+  double *mass = atom->mass;
+  int *type = atom->type;
+  int *mask = atom->mask;
+  int nlocal = atom->nlocal;
+  double fxtmp,fytmp,fztmp;
+  double t1tmp,t2tmp,t3tmp;
+
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+  firsttouch = listgranhistory->firstneigh;
+  firstshear = listgranhistory->firstdouble;
+
+  // loop over neighbors of my atoms
+
+  for (ii = iifrom; ii < iito; ++ii) {
+
+    i = ilist[ii];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    radi = radius[i];
+    touch = firsttouch[i];
+    allshear = firstshear[i];
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+    fxtmp=fytmp=fztmp=t1tmp=t2tmp=t3tmp=0.0;
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      j &= NEIGHMASK;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+      radj = radius[j];
+      radsum = radi + radj;
+
+      if (rsq >= radsum*radsum) {
+
+	// unset non-touching neighbors
+
+        touch[jj] = 0;
+	shear = &allshear[3*jj];
+        shear[0] = 0.0;
+        shear[1] = 0.0;
+        shear[2] = 0.0;
+
+      } else {
+	r = sqrt(rsq);
+	rinv = 1.0/r;
+	rsqinv = 1.0/rsq;
+
+	// relative translational velocity
+
+	vr1 = v[i][0] - v[j][0];
+	vr2 = v[i][1] - v[j][1];
+	vr3 = v[i][2] - v[j][2];
+
+	// normal component
+
+	vnnr = vr1*delx + vr2*dely + vr3*delz;
+	vn1 = delx*vnnr * rsqinv;
+	vn2 = dely*vnnr * rsqinv;
+	vn3 = delz*vnnr * rsqinv;
+
+	// tangential component
+
+	vt1 = vr1 - vn1;
+	vt2 = vr2 - vn2;
+	vt3 = vr3 - vn3;
+
+	// relative rotational velocity
+
+	wr1 = (radi*omega[i][0] + radj*omega[j][0]) * rinv;
+	wr2 = (radi*omega[i][1] + radj*omega[j][1]) * rinv;
+	wr3 = (radi*omega[i][2] + radj*omega[j][2]) * rinv;
+
+	// normal forces = Hookian contact + normal velocity damping
+
+	if (rmass) {
+	  meff = rmass[i]*rmass[j] / (rmass[i]+rmass[j]);
+	  if (mask[i] & freeze_group_bit) meff = rmass[j];
+	  if (mask[j] & freeze_group_bit) meff = rmass[i];
+	} else {
+	  itype = type[i];
+	  jtype = type[j];
+	  meff = mass[itype]*mass[jtype] / (mass[itype]+mass[jtype]);
+	  if (mask[i] & freeze_group_bit) meff = mass[jtype];
+	  if (mask[j] & freeze_group_bit) meff = mass[itype];
+	}
+
+	damp = meff*gamman*vnnr*rsqinv;
+	ccel = kn*(radsum-r)*rinv - damp;
+
+	// relative velocities
+
+	vtr1 = vt1 - (delz*wr2-dely*wr3);
+	vtr2 = vt2 - (delx*wr3-delz*wr1);
+	vtr3 = vt3 - (dely*wr1-delx*wr2);
+	vrel = vtr1*vtr1 + vtr2*vtr2 + vtr3*vtr3;
+	vrel = sqrt(vrel);
+
+	// shear history effects
+
+	touch[jj] = 1;
+	shear = &allshear[3*jj];
+
+	if (SHEARUPDATE) {
+	  shear[0] += vtr1*dt;
+	  shear[1] += vtr2*dt;
+	  shear[2] += vtr3*dt;
+	}
+        shrmag = sqrt(shear[0]*shear[0] + shear[1]*shear[1] +
+		      shear[2]*shear[2]);
+
+	// rotate shear displacements
+
+	rsht = shear[0]*delx + shear[1]*dely + shear[2]*delz;
+	rsht *= rsqinv;
+	if (SHEARUPDATE) {
+	  shear[0] -= rsht*delx;
+	  shear[1] -= rsht*dely;
+	  shear[2] -= rsht*delz;
+	}
+
+	// tangential forces = shear + tangential velocity damping
+
+	fs1 = - (kt*shear[0] + meff*gammat*vtr1);
+	fs2 = - (kt*shear[1] + meff*gammat*vtr2);
+	fs3 = - (kt*shear[2] + meff*gammat*vtr3);
+
+	// rescale frictional displacements and forces if needed
+
+	fs = sqrt(fs1*fs1 + fs2*fs2 + fs3*fs3);
+	fn = xmu * fabs(ccel*r);
+
+	if (fs > fn) {
+	  if (shrmag != 0.0) {
+	    const double fnfs = fn/fs;
+	    const double mgkt = meff*gammat/kt;
+	    shear[0] = fnfs * (shear[0] + mgkt*vtr1) - mgkt*vtr1;
+	    shear[1] = fnfs * (shear[1] + mgkt*vtr2) - mgkt*vtr2;
+	    shear[2] = fnfs * (shear[2] + mgkt*vtr3) - mgkt*vtr3;
+	    fs1 *= fnfs;
+	    fs2 *= fnfs;
+	    fs3 *= fnfs;
+	  } else fs1 = fs2 = fs3 = 0.0;
+	}
+
+	// forces & torques
+
+	fx = delx*ccel + fs1;
+	fy = dely*ccel + fs2;
+	fz = delz*ccel + fs3;
+	fxtmp  += fx;
+	fytmp  += fy;
+	fztmp  += fz;
+
+	tor1 = rinv * (dely*fs3 - delz*fs2);
+	tor2 = rinv * (delz*fs1 - delx*fs3);
+	tor3 = rinv * (delx*fs2 - dely*fs1);
+	t1tmp -= radi*tor1;
+	t2tmp -= radi*tor2;
+	t3tmp -= radi*tor3;
+
+	if (j < nlocal) {
+	  f[j][0] -= fx;
+	  f[j][1] -= fy;
+	  f[j][2] -= fz;
+	  torque[j][0] -= radj*tor1;
+	  torque[j][1] -= radj*tor2;
+	  torque[j][2] -= radj*tor3;
+	}
+
+	if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,/* newton_pair */ 0,
+				     0.0,0.0,fx,fy,fz,delx,dely,delz,tid);
+
+      }
+    }
+    f[i][0] += fxtmp;
+    f[i][1] += fytmp;
+    f[i][2] += fztmp;
+    torque[i][0] += t1tmp;
+    torque[i][1] += t2tmp;
+    torque[i][2] += t3tmp;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairGranHookeHistoryOMP::memory_usage()
+{
+  double bytes = memory_usage_thr();
+  bytes += PairGranHookeHistory::memory_usage();
+
+  return bytes;
+}
diff --git a/src/USER-OMP/pair_gran_hooke_history_omp.h b/src/USER-OMP/pair_gran_hooke_history_omp.h
new file mode 100644
index 000000000..33325025f
--- /dev/null
+++ b/src/USER-OMP/pair_gran_hooke_history_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(gran/hooke/history/omp,PairGranHookeHistoryOMP)
+
+#else
+
+#ifndef LMP_PAIR_GRAN_HOOKE_HISTORY_OMP_H
+#define LMP_PAIR_GRAN_HOOKE_HISTORY_OMP_H
+
+#include "pair_gran_hooke_history.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairGranHookeHistoryOMP : public PairGranHookeHistory, public ThrOMP {
+
+ public:
+  PairGranHookeHistoryOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int SHEARUPDATE>
+  void eval(double **f, double **torque, int ifrom, int ito, int tid);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_gran_hooke_omp.cpp b/src/USER-OMP/pair_gran_hooke_omp.cpp
new file mode 100644
index 000000000..d6991fa45
--- /dev/null
+++ b/src/USER-OMP/pair_gran_hooke_omp.cpp
@@ -0,0 +1,240 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   This software is distributed under the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "pair_gran_hooke_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+PairGranHookeOMP::PairGranHookeOMP(LAMMPS *lmp) :
+  PairGranHooke(lmp), ThrOMP(lmp, PAIR)
+{
+  respa_enable = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairGranHookeOMP::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+    ev_setup_thr(this);
+  } else evflag = vflag_fdotr = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = list->inum;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(shared)
+#endif
+  {
+    int ifrom, ito, tid;
+    double **f, **torque;
+
+    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    torque = atom->torque + tid*nall;
+
+    if (evflag)
+      if (force->newton_pair) eval<1,1>(f, torque, ifrom, ito, tid);
+      else eval<1,0>(f, torque, ifrom, ito, tid);
+    else 
+      if (force->newton_pair) eval<0,1>(f, torque, ifrom, ito, tid);
+      else eval<0,0>(f, torque, ifrom, ito, tid);
+
+    // reduce per thread forces and torque into global arrays.
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    data_reduce_thr(&(atom->torque[0][0]), nall, nthreads, 3, tid);
+  } // end of omp parallel region
+
+  // reduce per thread energy and virial, if requested.
+  if (evflag) ev_reduce_thr(this);
+}
+
+template <int EVFLAG, int NEWTON_PAIR>
+void PairGranHookeOMP::eval(double **f, double **torque, int iifrom, int iito, int tid)
+{
+  int i,j,ii,jj,jnum,itype,jtype;
+  double xtmp,ytmp,ztmp,delx,dely,delz,fx,fy,fz;
+  double radi,radj,radsum,rsq,r,rinv,rsqinv;
+  double vr1,vr2,vr3,vnnr,vn1,vn2,vn3,vt1,vt2,vt3;
+  double wr1,wr2,wr3;
+  double vtr1,vtr2,vtr3,vrel;
+  double meff,damp,ccel,tor1,tor2,tor3;
+  double fn,fs,ft,fs1,fs2,fs3;
+  int *ilist,*jlist,*numneigh,**firstneigh;
+
+  double **x = atom->x;
+  double **v = atom->v;
+  double **omega = atom->omega;
+  double *radius = atom->radius;
+  double *rmass = atom->rmass;
+  double *mass = atom->mass;
+  int *type = atom->type;
+  int *mask = atom->mask;
+  int nlocal = atom->nlocal;
+  double fxtmp,fytmp,fztmp;
+  double t1tmp,t2tmp,t3tmp;
+
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+
+  // loop over neighbors of my atoms
+
+  for (ii = iifrom; ii < iito; ++ii) {
+
+    i = ilist[ii];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    radi = radius[i];
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+    fxtmp=fytmp=fztmp=t1tmp=t2tmp=t3tmp=0.0;
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      j &= NEIGHMASK;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+      radj = radius[j];
+      radsum = radi + radj;
+
+      if (rsq < radsum*radsum) {
+	r = sqrt(rsq);
+	rinv = 1.0/r;
+	rsqinv = 1.0/rsq;
+
+	// relative translational velocity
+
+	vr1 = v[i][0] - v[j][0];
+	vr2 = v[i][1] - v[j][1];
+	vr3 = v[i][2] - v[j][2];
+
+	// normal component
+
+	vnnr = vr1*delx + vr2*dely + vr3*delz;
+	vn1 = delx*vnnr * rsqinv;
+	vn2 = dely*vnnr * rsqinv;
+	vn3 = delz*vnnr * rsqinv;
+
+	// tangential component
+
+	vt1 = vr1 - vn1;
+	vt2 = vr2 - vn2;
+	vt3 = vr3 - vn3;
+
+	// relative rotational velocity
+
+	wr1 = (radi*omega[i][0] + radj*omega[j][0]) * rinv;
+	wr2 = (radi*omega[i][1] + radj*omega[j][1]) * rinv;
+	wr3 = (radi*omega[i][2] + radj*omega[j][2]) * rinv;
+
+	// normal forces = Hookian contact + normal velocity damping
+
+	if (rmass) {
+	  meff = rmass[i]*rmass[j] / (rmass[i]+rmass[j]);
+	  if (mask[i] & freeze_group_bit) meff = rmass[j];
+	  if (mask[j] & freeze_group_bit) meff = rmass[i];
+	} else {
+	  itype = type[i];
+	  jtype = type[j];
+	  meff = mass[itype]*mass[jtype] / (mass[itype]+mass[jtype]);
+	  if (mask[i] & freeze_group_bit) meff = mass[jtype];
+	  if (mask[j] & freeze_group_bit) meff = mass[itype];
+	}
+
+	damp = meff*gamman*vnnr*rsqinv;
+	ccel = kn*(radsum-r)*rinv - damp;
+
+	// relative velocities
+
+	vtr1 = vt1 - (delz*wr2-dely*wr3);
+	vtr2 = vt2 - (delx*wr3-delz*wr1);
+	vtr3 = vt3 - (dely*wr1-delx*wr2);
+	vrel = vtr1*vtr1 + vtr2*vtr2 + vtr3*vtr3;
+	vrel = sqrt(vrel);
+
+	// force normalization
+
+	fn = xmu * fabs(ccel*r);
+	fs = meff*gammat*vrel;
+	if (vrel != 0.0) ft = MIN(fn,fs) / vrel;
+	else ft = 0.0;
+
+	// tangential force due to tangential velocity damping
+
+	fs1 = -ft*vtr1;
+	fs2 = -ft*vtr2;
+	fs3 = -ft*vtr3;
+
+	// forces & torques
+
+	fx = delx*ccel + fs1;
+	fy = dely*ccel + fs2;
+	fz = delz*ccel + fs3;
+	fxtmp  += fx;
+	fytmp  += fy;
+	fztmp  += fz;
+
+	tor1 = rinv * (dely*fs3 - delz*fs2);
+	tor2 = rinv * (delz*fs1 - delx*fs3);
+	tor3 = rinv * (delx*fs2 - dely*fs1);
+	t1tmp -= radi*tor1;
+	t2tmp -= radi*tor2;
+	t3tmp -= radi*tor3;
+
+	if (NEWTON_PAIR || j < nlocal) {
+	  f[j][0] -= fx;
+	  f[j][1] -= fy;
+	  f[j][2] -= fz;
+	  torque[j][0] -= radj*tor1;
+	  torque[j][1] -= radj*tor2;
+	  torque[j][2] -= radj*tor3;
+	}
+
+	if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,NEWTON_PAIR,
+				     0.0,0.0,fx,fy,fz,delx,dely,delz,tid);
+
+      }
+    }
+    f[i][0] += fxtmp;
+    f[i][1] += fytmp;
+    f[i][2] += fztmp;
+    torque[i][0] += t1tmp;
+    torque[i][1] += t2tmp;
+    torque[i][2] += t3tmp;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairGranHookeOMP::memory_usage()
+{
+  double bytes = memory_usage_thr();
+  bytes += PairGranHooke::memory_usage();
+
+  return bytes;
+}
diff --git a/src/USER-OMP/pair_gran_hooke_omp.h b/src/USER-OMP/pair_gran_hooke_omp.h
new file mode 100644
index 000000000..f2b093778
--- /dev/null
+++ b/src/USER-OMP/pair_gran_hooke_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(gran/hooke/omp,PairGranHookeOMP)
+
+#else
+
+#ifndef LMP_PAIR_GRAN_HOOKE_OMP_H
+#define LMP_PAIR_GRAN_HOOKE_OMP_H
+
+#include "pair_gran_hooke.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairGranHookeOMP : public PairGranHooke, public ThrOMP {
+
+ public:
+  PairGranHookeOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int NEWTON_PAIR>
+  void eval(double **f, double **torque, int ifrom, int ito, int tid);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_hbond_dreiding_lj_omp.cpp b/src/USER-OMP/pair_hbond_dreiding_lj_omp.cpp
new file mode 100644
index 000000000..012fd596b
--- /dev/null
+++ b/src/USER-OMP/pair_hbond_dreiding_lj_omp.cpp
@@ -0,0 +1,299 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   This software is distributed under the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "pair_hbond_dreiding_lj_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "domain.h"
+#include "force.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+
+#include "math_const.h"
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+
+#define SMALL 0.001
+
+/* ---------------------------------------------------------------------- */
+
+PairHbondDreidingLJOMP::PairHbondDreidingLJOMP(LAMMPS *lmp) :
+  PairHbondDreidingLJ(lmp), ThrOMP(lmp, PAIR)
+{
+  respa_enable = 0;
+  hbcount_thr = hbeng_thr = NULL;
+}
+
+/* ---------------------------------------------------------------------- */
+
+PairHbondDreidingLJOMP::~PairHbondDreidingLJOMP()
+{
+  respa_enable = 0;
+  if (hbcount_thr) {
+    delete[] hbcount_thr;
+    delete[] hbeng_thr;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairHbondDreidingLJOMP::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+    ev_setup_thr(this);
+  } else evflag = vflag_fdotr = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = list->inum;
+
+  if (!hbcount_thr) {
+    hbcount_thr = new double[nthreads];
+    hbeng_thr = new double[nthreads];
+  }
+
+  for (int i=0; i < nthreads; ++i) {
+    hbcount_thr[i] = 0.0;
+    hbeng_thr[i] = 0.0;
+  }
+
+#if defined(_OPENMP)
+#pragma omp parallel default(shared)
+#endif
+  {
+    int ifrom, ito, tid;
+    double **f;
+
+    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
+	else eval<1,1,0>(f, ifrom, ito, tid);
+      } else {
+	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
+	else eval<1,0,0>(f, ifrom, ito, tid);
+      }
+    } else {
+      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
+      else eval<0,0,0>(f, ifrom, ito, tid);
+    }
+
+    // reduce per thread forces into global force array.
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+  } // end of omp parallel region
+
+  // reduce per thread energy and virial, if requested.
+  if (evflag) ev_reduce_thr(this);
+  if (vflag_fdotr) virial_fdotr_compute();
+
+  // reduce per thread hbond data
+  if (eflag_global) {
+    pvector[0] = 0.0;
+    pvector[1] = 0.0;
+    for (int i=0; i < nthreads; ++i) {
+      pvector[0] += hbcount_thr[i];
+      pvector[1] += hbeng_thr[i];
+    }
+  }
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+void PairHbondDreidingLJOMP::eval(double **f, int iifrom, int iito, int tid)
+{
+  int i,j,k,m,ii,jj,kk,jnum,knum,itype,jtype,ktype;
+  double xtmp,ytmp,ztmp,delx,dely,delz,rsq,rsq1,rsq2,r1,r2;
+  double factor_hb,force_angle,force_kernel,evdwl,eng_lj;
+  double c,s,a,b,ac,a11,a12,a22,vx1,vx2,vy1,vy2,vz1,vz2;
+  double fi[3],fj[3],delr1[3],delr2[3];
+  double r2inv,r10inv;
+  double switch1,switch2;
+  int *ilist,*jlist,*klist,*numneigh,**firstneigh;
+  Param *pm;
+
+  evdwl = 0.0;
+
+  double **x = atom->x;
+  int *type = atom->type;
+  int **special = atom->special;
+  int **nspecial = atom->nspecial;
+  double *special_lj = force->special_lj;
+  double fxtmp,fytmp,fztmp;
+
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+
+  
+  // ii = loop over donors
+  // jj = loop over acceptors
+  // kk = loop over hydrogens bonded to donor
+
+  int hbcount = 0;
+  double hbeng = 0.0;
+
+  for (ii = iifrom; ii < iito; ++ii) {
+
+    i = ilist[ii];
+    itype = type[i];
+    if (!donor[itype]) continue;
+
+    klist = special[i];
+    knum = nspecial[i][0];
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+    fxtmp=fytmp=fztmp=0.0;
+
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      factor_hb = special_lj[sbmask(j)];
+      j &= NEIGHMASK;
+
+      jtype = type[j];
+      if (!acceptor[jtype]) continue;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+
+      for (kk = 0; kk < knum; kk++) {
+	k = atom->map(klist[kk]);
+	if (k < 0) continue;
+	ktype = type[k];
+	m = type2param[itype][jtype][ktype];
+	if (m < 0) continue;
+	pm = &params[m];
+
+	if (rsq < pm->cut_outersq) {
+	  delr1[0] = xtmp - x[k][0];
+	  delr1[1] = ytmp - x[k][1];
+	  delr1[2] = ztmp - x[k][2];
+	  domain->minimum_image(delr1);
+	  rsq1 = delr1[0]*delr1[0] + delr1[1]*delr1[1] + delr1[2]*delr1[2];
+	  r1 = sqrt(rsq1);
+	  
+	  delr2[0] = x[j][0] - x[k][0];
+	  delr2[1] = x[j][1] - x[k][1];
+	  delr2[2] = x[j][2] - x[k][2];
+	  domain->minimum_image(delr2);
+	  rsq2 = delr2[0]*delr2[0] + delr2[1]*delr2[1] + delr2[2]*delr2[2];
+	  r2 = sqrt(rsq2);
+	  
+	  // angle (cos and sin)
+	  
+	  c = delr1[0]*delr2[0] + delr1[1]*delr2[1] + delr1[2]*delr2[2];
+	  c /= r1*r2;
+	  if (c > 1.0) c = 1.0;
+	  if (c < -1.0) c = -1.0;
+	  ac = acos(c);
+
+	  if (ac > pm->cut_angle && ac < (2.0*MY_PI - pm->cut_angle)) {
+	    s = sqrt(1.0 - c*c);
+	    if (s < SMALL) s = SMALL;
+
+	    // LJ-specific kernel
+
+	    r2inv = 1.0/rsq;
+	    r10inv = r2inv*r2inv*r2inv*r2inv*r2inv;
+	    force_kernel = r10inv*(pm->lj1*r2inv - pm->lj2)*r2inv * 
+	      pow(c,pm->ap);
+	    force_angle = pm->ap * r10inv*(pm->lj3*r2inv - pm->lj4) * 
+	      pow(c,pm->ap-1)*s;
+
+	    eng_lj = r10inv*(pm->lj3*r2inv - pm->lj4);
+	    if (rsq > pm->cut_innersq) {
+	      switch1 = (pm->cut_outersq-rsq) * (pm->cut_outersq-rsq) * 
+			(pm->cut_outersq + 2.0*rsq - 3.0*pm->cut_innersq) /
+			pm->denom_vdw;
+	      switch2 = 12.0*rsq * (pm->cut_outersq-rsq) *
+			(rsq-pm->cut_innersq) / pm->denom_vdw;
+	      force_kernel = force_kernel*switch1 + eng_lj*switch2;
+	      eng_lj *= switch1;
+	    }
+
+	    if (EFLAG) {
+	      evdwl = eng_lj * pow(c,pm->ap);
+	      evdwl *= factor_hb;
+	    }
+
+	    a = factor_hb*force_angle/s;
+	    b = factor_hb*force_kernel;
+	    
+	    a11 = a*c / rsq1;
+	    a12 = -a / (r1*r2);
+	    a22 = a*c / rsq2;
+	    
+	    vx1 = a11*delr1[0] + a12*delr2[0];
+	    vx2 = a22*delr2[0] + a12*delr1[0];
+	    vy1 = a11*delr1[1] + a12*delr2[1];
+	    vy2 = a22*delr2[1] + a12*delr1[1];
+	    vz1 = a11*delr1[2] + a12*delr2[2];
+	    vz2 = a22*delr2[2] + a12*delr1[2];
+	    
+	    fi[0] = vx1 + b*delx;
+	    fi[1] = vy1 + b*dely;
+	    fi[2] = vz1 + b*delz;
+	    fj[0] = vx2 - b*delx;
+	    fj[1] = vy2 - b*dely;
+	    fj[2] = vz2 - b*delz;
+
+	    fxtmp += fi[0];
+	    fytmp += fi[1];
+	    fztmp += fi[2];
+
+	    f[j][0] += fj[0];
+	    f[j][1] += fj[1];
+	    f[j][2] += fj[2];
+	    
+	    f[k][0] -= vx1 + vx2;
+	    f[k][1] -= vy1 + vy2;
+	    f[k][2] -= vz1 + vz2;
+
+	    // KIJ instead of IJK b/c delr1/delr2 are both with respect to k
+
+	    if (EVFLAG) ev_tally3_thr(this,k,i,j,evdwl,0.0,fi,fj,delr1,delr2,tid);
+	    if (EFLAG) {
+	      hbcount++;
+	      hbeng += evdwl;
+	    }
+	  }
+	}
+      }
+    }
+    f[i][0] += fxtmp;
+    f[i][1] += fytmp;
+    f[i][2] += fztmp;
+  }
+  hbcount_thr[tid] = static_cast<double>(hbcount);
+  hbeng_thr[tid] = hbeng;
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairHbondDreidingLJOMP::memory_usage()
+{
+  double bytes = memory_usage_thr();
+  bytes += comm->nthreads * 2 * sizeof(double);
+  bytes += PairHbondDreidingLJ::memory_usage();
+
+  return bytes;
+}
diff --git a/src/USER-OMP/pair_hbond_dreiding_lj_omp.h b/src/USER-OMP/pair_hbond_dreiding_lj_omp.h
new file mode 100644
index 000000000..1aef78490
--- /dev/null
+++ b/src/USER-OMP/pair_hbond_dreiding_lj_omp.h
@@ -0,0 +1,52 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(hbond/dreiding/lj/omp,PairHbondDreidingLJOMP)
+
+#else
+
+#ifndef LMP_PAIR_HBOND_DREIDING_LJ_OMP_H
+#define LMP_PAIR_HBOND_DREIDING_LJ_OMP_H
+
+#include "pair_hbond_dreiding_lj.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairHbondDreidingLJOMP : public PairHbondDreidingLJ, public ThrOMP {
+
+ public:
+  PairHbondDreidingLJOMP(class LAMMPS *);
+  virtual ~PairHbondDreidingLJOMP();
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ protected:
+  double *hbcount_thr, *hbeng_thr;
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(double **f, int ifrom, int ito, int tid);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_hbond_dreiding_morse_omp.cpp b/src/USER-OMP/pair_hbond_dreiding_morse_omp.cpp
new file mode 100644
index 000000000..b6c966f8c
--- /dev/null
+++ b/src/USER-OMP/pair_hbond_dreiding_morse_omp.cpp
@@ -0,0 +1,297 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   This software is distributed under the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "pair_hbond_dreiding_morse_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "domain.h"
+#include "force.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+
+#include "math_const.h"
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+
+#define SMALL 0.001
+
+/* ---------------------------------------------------------------------- */
+
+PairHbondDreidingMorseOMP::PairHbondDreidingMorseOMP(LAMMPS *lmp) :
+  PairHbondDreidingMorse(lmp), ThrOMP(lmp, PAIR)
+{
+  respa_enable = 0;
+  hbcount_thr = hbeng_thr = NULL;
+}
+
+/* ---------------------------------------------------------------------- */
+
+PairHbondDreidingMorseOMP::~PairHbondDreidingMorseOMP()
+{
+  respa_enable = 0;
+  if (hbcount_thr) {
+    delete[] hbcount_thr;
+    delete[] hbeng_thr;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairHbondDreidingMorseOMP::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+    ev_setup_thr(this);
+  } else evflag = vflag_fdotr = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = list->inum;
+
+  if (!hbcount_thr) {
+    hbcount_thr = new double[nthreads];
+    hbeng_thr = new double[nthreads];
+  }
+
+  for (int i=0; i < nthreads; ++i) {
+    hbcount_thr[i] = 0.0;
+    hbeng_thr[i] = 0.0;
+  }
+
+#if defined(_OPENMP)
+#pragma omp parallel default(shared)
+#endif
+  {
+    int ifrom, ito, tid;
+    double **f;
+
+    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
+	else eval<1,1,0>(f, ifrom, ito, tid);
+      } else {
+	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
+	else eval<1,0,0>(f, ifrom, ito, tid);
+      }
+    } else {
+      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
+      else eval<0,0,0>(f, ifrom, ito, tid);
+    }
+
+    // reduce per thread forces into global force array.
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+  } // end of omp parallel region
+
+  // reduce per thread energy and virial, if requested.
+  if (evflag) ev_reduce_thr(this);
+  if (vflag_fdotr) virial_fdotr_compute();
+
+  // reduce per thread hbond data
+  if (eflag_global) {
+    pvector[0] = 0.0;
+    pvector[1] = 0.0;
+    for (int i=0; i < nthreads; ++i) {
+      pvector[0] += hbcount_thr[i];
+      pvector[1] += hbeng_thr[i];
+    }
+  }
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+void PairHbondDreidingMorseOMP::eval(double **f, int iifrom, int iito, int tid)
+{
+  int i,j,k,m,ii,jj,kk,jnum,knum,itype,jtype,ktype;
+  double xtmp,ytmp,ztmp,delx,dely,delz,rsq,rsq1,rsq2,r1,r2;
+  double factor_hb,force_angle,force_kernel,evdwl;
+  double c,s,a,b,ac,a11,a12,a22,vx1,vx2,vy1,vy2,vz1,vz2;
+  double fi[3],fj[3],delr1[3],delr2[3];
+  double r,dr,dexp,eng_morse,switch1,switch2;
+  int *ilist,*jlist,*klist,*numneigh,**firstneigh;
+  Param *pm;
+
+  evdwl = 0.0;
+
+  double **x = atom->x;
+  int *type = atom->type;
+  int **special = atom->special;
+  int **nspecial = atom->nspecial;
+  double *special_lj = force->special_lj;
+  double fxtmp,fytmp,fztmp;
+
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+
+  
+  // ii = loop over donors
+  // jj = loop over acceptors
+  // kk = loop over hydrogens bonded to donor
+
+  int hbcount = 0;
+  double hbeng = 0.0;
+
+  for (ii = iifrom; ii < iito; ++ii) {
+
+    i = ilist[ii];
+    itype = type[i];
+    if (!donor[itype]) continue;
+
+    klist = special[i];
+    knum = nspecial[i][0];
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+    fxtmp=fytmp=fztmp=0.0;
+
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      factor_hb = special_lj[sbmask(j)];
+      j &= NEIGHMASK;
+
+      jtype = type[j];
+      if (!acceptor[jtype]) continue;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+
+      for (kk = 0; kk < knum; kk++) {
+	k = atom->map(klist[kk]);
+	if (k < 0) continue;
+	ktype = type[k];
+	m = type2param[itype][jtype][ktype];
+	if (m < 0) continue;
+	pm = &params[m];
+
+	if (rsq < pm->cut_outersq) {
+	  delr1[0] = xtmp - x[k][0];
+	  delr1[1] = ytmp - x[k][1];
+	  delr1[2] = ztmp - x[k][2];
+	  domain->minimum_image(delr1);
+	  rsq1 = delr1[0]*delr1[0] + delr1[1]*delr1[1] + delr1[2]*delr1[2];
+	  r1 = sqrt(rsq1);
+	  
+	  delr2[0] = x[j][0] - x[k][0];
+	  delr2[1] = x[j][1] - x[k][1];
+	  delr2[2] = x[j][2] - x[k][2];
+	  domain->minimum_image(delr2);
+	  rsq2 = delr2[0]*delr2[0] + delr2[1]*delr2[1] + delr2[2]*delr2[2];
+	  r2 = sqrt(rsq2);
+	  
+	  // angle (cos and sin)
+	  
+	  c = delr1[0]*delr2[0] + delr1[1]*delr2[1] + delr1[2]*delr2[2];
+	  c /= r1*r2;
+	  if (c > 1.0) c = 1.0;
+	  if (c < -1.0) c = -1.0;
+	  ac = acos(c);
+
+	  if (ac > pm->cut_angle && ac < (2.0*MY_PI - pm->cut_angle)) {
+	    s = sqrt(1.0 - c*c);
+	    if (s < SMALL) s = SMALL;
+
+	    // Morse-specific kernel
+
+	    r = sqrt(rsq);
+	    dr = r - pm->r0;
+	    dexp = exp(-pm->alpha * dr);
+	    force_kernel = pm->morse1*(dexp*dexp - dexp)/r * pow(c,pm->ap);
+	    force_angle = pm->ap * eng_morse * pow(c,pm->ap-1)*s;
+
+	    eng_morse = pm->d0 * (dexp*dexp - 2.0*dexp);
+	    if (rsq > pm->cut_innersq) {
+	      switch1 = (pm->cut_outersq-rsq) * (pm->cut_outersq-rsq) *
+			(pm->cut_outersq + 2.0*rsq - 3.0*pm->cut_innersq) /
+			pm->denom_vdw;
+	      switch2 = 12.0*rsq * (pm->cut_outersq-rsq) *
+			(rsq-pm->cut_innersq) / pm->denom_vdw;
+	      force_kernel = force_kernel*switch1 + eng_morse*switch2;
+	      eng_morse *= switch1;
+	    }
+
+	    if (EFLAG) {
+	      evdwl = eng_morse * pow(c,params[m].ap);
+	      evdwl *= factor_hb;
+	    }
+
+	    a = factor_hb*force_angle/s;
+	    b = factor_hb*force_kernel;
+	    
+	    a11 = a*c / rsq1;
+	    a12 = -a / (r1*r2);
+	    a22 = a*c / rsq2;
+	    
+	    vx1 = a11*delr1[0] + a12*delr2[0];
+	    vx2 = a22*delr2[0] + a12*delr1[0];
+	    vy1 = a11*delr1[1] + a12*delr2[1];
+	    vy2 = a22*delr2[1] + a12*delr1[1];
+	    vz1 = a11*delr1[2] + a12*delr2[2];
+	    vz2 = a22*delr2[2] + a12*delr1[2];
+	    
+	    fi[0] = vx1 + b*delx;
+	    fi[1] = vy1 + b*dely;
+	    fi[2] = vz1 + b*delz;
+	    fj[0] = vx2 - b*delx;
+	    fj[1] = vy2 - b*dely;
+	    fj[2] = vz2 - b*delz;
+
+	    fxtmp += fi[0];
+	    fytmp += fi[1];
+	    fztmp += fi[2];
+
+	    f[j][0] += fj[0];
+	    f[j][1] += fj[1];
+	    f[j][2] += fj[2];
+	    
+	    f[k][0] -= vx1 + vx2;
+	    f[k][1] -= vy1 + vy2;
+	    f[k][2] -= vz1 + vz2;
+
+	    // KIJ instead of IJK b/c delr1/delr2 are both with respect to k
+
+	    if (EVFLAG) ev_tally3_thr(this,k,i,j,evdwl,0.0,fi,fj,delr1,delr2,tid);
+	    if (EFLAG) {
+	      hbcount++;
+	      hbeng += evdwl;
+	    }
+	  }
+	}
+      }
+    }
+    f[i][0] += fxtmp;
+    f[i][1] += fytmp;
+    f[i][2] += fztmp;
+  }
+  hbcount_thr[tid] = static_cast<double>(hbcount);
+  hbeng_thr[tid] = hbeng;
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairHbondDreidingMorseOMP::memory_usage()
+{
+  double bytes = memory_usage_thr();
+  bytes += comm->nthreads * 2 * sizeof(double);
+  bytes += PairHbondDreidingMorse::memory_usage();
+
+  return bytes;
+}
diff --git a/src/USER-OMP/pair_hbond_dreiding_morse_omp.h b/src/USER-OMP/pair_hbond_dreiding_morse_omp.h
new file mode 100644
index 000000000..2a13c618c
--- /dev/null
+++ b/src/USER-OMP/pair_hbond_dreiding_morse_omp.h
@@ -0,0 +1,52 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(hbond/dreiding/morse/omp,PairHbondDreidingMorseOMP)
+
+#else
+
+#ifndef LMP_PAIR_HBOND_DREIDING_MORSE_OMP_H
+#define LMP_PAIR_HBOND_DREIDING_MORSE_OMP_H
+
+#include "pair_hbond_dreiding_morse.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairHbondDreidingMorseOMP : public PairHbondDreidingMorse, public ThrOMP {
+
+ public:
+  PairHbondDreidingMorseOMP(class LAMMPS *);
+  virtual ~PairHbondDreidingMorseOMP();
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ protected:
+  double *hbcount_thr, *hbeng_thr;
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(double **f, int ifrom, int ito, int tid);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_lj96_cut_omp.cpp
similarity index 86%
copy from src/USER-OMP/pair_lj_cut_omp.cpp
copy to src/USER-OMP/pair_lj96_cut_omp.cpp
index 8ed82c5e5..f0998363e 100644
--- a/src/USER-OMP/pair_lj_cut_omp.cpp
+++ b/src/USER-OMP/pair_lj96_cut_omp.cpp
@@ -1,163 +1,162 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    This software is distributed under the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Axel Kohlmeyer (Temple U)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
-#include "pair_lj_cut_omp.h"
+#include "pair_lj96_cut_omp.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
-#include "memory.h"
-#include "error.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
-PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) :
-  PairLJCut(lmp), ThrOMP(lmp, PAIR)
+PairLJ96CutOMP::PairLJ96CutOMP(LAMMPS *lmp) :
+  PairLJ96Cut(lmp), ThrOMP(lmp, PAIR)
 {
   respa_enable = 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
-void PairLJCutOMP::compute(int eflag, int vflag)
+void PairLJ96CutOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
     ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
   const int nthreads = comm->nthreads;
   const int inum = list->inum;
 
 #if defined(_OPENMP)
 #pragma omp parallel default(shared)
 #endif
   {
     int ifrom, ito, tid;
     double **f;
 
     f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
 
     if (evflag) {
       if (eflag) {
 	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
 	else eval<1,1,0>(f, ifrom, ito, tid);
       } else {
 	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
 	else eval<1,0,0>(f, ifrom, ito, tid);
       }
     } else {
       if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
       else eval<0,0,0>(f, ifrom, ito, tid);
     }
 
     // reduce per thread forces into global force array.
-    force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid);
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
   } // end of omp parallel region
 
   // reduce per thread energy and virial, if requested.
   if (evflag) ev_reduce_thr(this);
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairLJ96CutOMP::eval(double **f, int iifrom, int iito, int tid)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
-  double rsq,r2inv,r6inv,forcelj,factor_lj;
+  double rsq,r2inv,r3inv,r6inv,forcelj,factor_lj;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   evdwl = 0.0;
 
   double **x = atom->x;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_lj = force->special_lj;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = iifrom; ii < iito; ++ii) {
 
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
     fxtmp=fytmp=fztmp=0.0;
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
 	r2inv = 1.0/rsq;
 	r6inv = r2inv*r2inv*r2inv;
-	forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
+	r3inv = sqrt(r6inv);
+
+	forcelj = r6inv * (lj1[itype][jtype]*r3inv - lj2[itype][jtype]);
 	fpair = factor_lj*forcelj*r2inv;
 
 	fxtmp += delx*fpair;
 	fytmp += dely*fpair;
 	fztmp += delz*fpair;
 	if (NEWTON_PAIR || j < nlocal) {
 	  f[j][0] -= delx*fpair;
 	  f[j][1] -= dely*fpair;
 	  f[j][2] -= delz*fpair;
 	}
 
 	if (EFLAG) {
-	  evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype])
+	  evdwl = r6inv*(lj3[itype][jtype]*r3inv-lj4[itype][jtype])
 	    - offset[itype][jtype];
 	  evdwl *= factor_lj;
 	}
 
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
 				 evdwl,0.0,fpair,delx,dely,delz,tid);
       }
     }
     f[i][0] += fxtmp;
     f[i][1] += fytmp;
     f[i][2] += fztmp;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
-double PairLJCutOMP::memory_usage()
+double PairLJ96CutOMP::memory_usage()
 {
   double bytes = memory_usage_thr();
-  bytes += PairLJCut::memory_usage();
+  bytes += PairLJ96Cut::memory_usage();
 
   return bytes;
 }
diff --git a/src/USER-OMP/pair_lj96_cut_omp.h b/src/USER-OMP/pair_lj96_cut_omp.h
new file mode 100644
index 000000000..333212303
--- /dev/null
+++ b/src/USER-OMP/pair_lj96_cut_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(lj96/cut/omp,PairLJ96CutOMP)
+
+#else
+
+#ifndef LMP_PAIR_LJ96_CUT_OMP_H
+#define LMP_PAIR_LJ96_CUT_OMP_H
+
+#include "pair_lj96_cut.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairLJ96CutOMP : public PairLJ96Cut, public ThrOMP {
+
+ public:
+  PairLJ96CutOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(double **f, int ifrom, int ito, int tid);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_lj_charmm_coul_charmm_implicit_omp.cpp
similarity index 54%
copy from src/USER-OMP/pair_lj_cut_omp.cpp
copy to src/USER-OMP/pair_lj_charmm_coul_charmm_implicit_omp.cpp
index 8ed82c5e5..32ad05acd 100644
--- a/src/USER-OMP/pair_lj_cut_omp.cpp
+++ b/src/USER-OMP/pair_lj_charmm_coul_charmm_implicit_omp.cpp
@@ -1,163 +1,213 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    This software is distributed under the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Axel Kohlmeyer (Temple U)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
-#include "pair_lj_cut_omp.h"
+#include "pair_lj_charmm_coul_charmm_implicit_omp.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
-#include "memory.h"
-#include "error.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
-PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) :
-  PairLJCut(lmp), ThrOMP(lmp, PAIR)
+PairLJCharmmCoulCharmmImplicitOMP::PairLJCharmmCoulCharmmImplicitOMP(LAMMPS *lmp) :
+  PairLJCharmmCoulCharmmImplicit(lmp), ThrOMP(lmp, PAIR)
 {
   respa_enable = 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
-void PairLJCutOMP::compute(int eflag, int vflag)
+void PairLJCharmmCoulCharmmImplicitOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
     ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
   const int nthreads = comm->nthreads;
   const int inum = list->inum;
 
 #if defined(_OPENMP)
 #pragma omp parallel default(shared)
 #endif
   {
     int ifrom, ito, tid;
     double **f;
 
     f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
 
     if (evflag) {
       if (eflag) {
 	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
 	else eval<1,1,0>(f, ifrom, ito, tid);
       } else {
 	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
 	else eval<1,0,0>(f, ifrom, ito, tid);
       }
     } else {
       if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
       else eval<0,0,0>(f, ifrom, ito, tid);
     }
 
     // reduce per thread forces into global force array.
-    force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid);
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
   } // end of omp parallel region
 
   // reduce per thread energy and virial, if requested.
   if (evflag) ev_reduce_thr(this);
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
+/* ---------------------------------------------------------------------- */
+
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairLJCharmmCoulCharmmImplicitOMP::eval(double **f, int iifrom, int iito, int tid)
 {
   int i,j,ii,jj,jnum,itype,jtype;
-  double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
-  double rsq,r2inv,r6inv,forcelj,factor_lj;
+  double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
+  double rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
+  double philj,switch1,switch2;
+  double invdenom_coul,invdenom_lj;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
-  evdwl = 0.0;
+  evdwl = ecoul = 0.0;
 
   double **x = atom->x;
+  double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
+  double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
+  double qqrd2e = force->qqrd2e;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
+  invdenom_coul = (denom_coul != 0.0) ? 1.0/denom_coul : 0.0;
+  invdenom_lj   = (denom_lj   != 0.0) ? 1.0/denom_lj   : 0.0;
 
   // loop over neighbors of my atoms
 
   for (ii = iifrom; ii < iito; ++ii) {
 
     i = ilist[ii];
+    qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
     fxtmp=fytmp=fztmp=0.0;
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
+      factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
-      if (rsq < cutsq[itype][jtype]) {
+      if (rsq < cut_bothsq) {
 	r2inv = 1.0/rsq;
-	r6inv = r2inv*r2inv*r2inv;
-	forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
-	fpair = factor_lj*forcelj*r2inv;
+
+	if (rsq < cut_coulsq) {
+	  forcecoul = 2.0 * qqrd2e * qtmp*q[j]*r2inv;
+	  if (rsq > cut_coul_innersq) {
+	    switch1 = (cut_coulsq-rsq) * (cut_coulsq-rsq) *
+	      (cut_coulsq + 2.0*rsq - 3.0*cut_coul_innersq) * invdenom_coul;
+	    switch2 = 12.0*rsq * (cut_coulsq-rsq) * 
+	      (rsq-cut_coul_innersq) * invdenom_coul;
+	    forcecoul *= switch1 + switch2;
+	  }
+	  forcecoul *= factor_coul;
+	} else forcecoul = 0.0;
+
+	if (rsq < cut_ljsq) {
+	  r6inv = r2inv*r2inv*r2inv;
+	  jtype = type[j];
+	  forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
+	  if (rsq > cut_lj_innersq) {
+	    switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) *
+	      (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) * invdenom_lj;
+	    switch2 = 12.0*rsq * (cut_ljsq-rsq) * 
+	      (rsq-cut_lj_innersq) * invdenom_lj;
+	    philj = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]);
+	    forcelj = forcelj*switch1 + philj*switch2;
+	  }
+	  forcelj *= factor_lj;
+	} else forcelj = 0.0;
+
+	fpair = (forcecoul + forcelj) * r2inv;
 
 	fxtmp += delx*fpair;
 	fytmp += dely*fpair;
 	fztmp += delz*fpair;
 	if (NEWTON_PAIR || j < nlocal) {
 	  f[j][0] -= delx*fpair;
 	  f[j][1] -= dely*fpair;
 	  f[j][2] -= delz*fpair;
 	}
 
 	if (EFLAG) {
-	  evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype])
-	    - offset[itype][jtype];
-	  evdwl *= factor_lj;
+	  if (rsq < cut_coulsq) {
+	    ecoul = qqrd2e * qtmp*q[j]*r2inv;
+	    if (rsq > cut_coul_innersq) {
+	      switch1 = (cut_coulsq-rsq) * (cut_coulsq-rsq) *
+		(cut_coulsq + 2.0*rsq - 3.0*cut_coul_innersq) *
+		invdenom_coul;
+	      ecoul *= switch1;
+	    }
+	    ecoul *= factor_coul;
+	  } else ecoul = 0.0;
+	  if (rsq < cut_ljsq) {
+	    evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]);
+	    if (rsq > cut_lj_innersq) {
+	      switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) *
+		(cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) * invdenom_lj;
+	      evdwl *= switch1;
+	    }
+	    evdwl *= factor_lj;
+	  } else evdwl = 0.0;
 	}
-
+	
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,0.0,fpair,delx,dely,delz,tid);
+				 evdwl,ecoul,fpair,delx,dely,delz,tid);
       }
     }
     f[i][0] += fxtmp;
     f[i][1] += fytmp;
     f[i][2] += fztmp;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
-double PairLJCutOMP::memory_usage()
+double PairLJCharmmCoulCharmmImplicitOMP::memory_usage()
 {
   double bytes = memory_usage_thr();
-  bytes += PairLJCut::memory_usage();
+  bytes += PairLJCharmmCoulCharmmImplicit::memory_usage();
 
   return bytes;
 }
diff --git a/src/USER-OMP/pair_lj_charmm_coul_charmm_implicit_omp.h b/src/USER-OMP/pair_lj_charmm_coul_charmm_implicit_omp.h
new file mode 100644
index 000000000..ba016d7d3
--- /dev/null
+++ b/src/USER-OMP/pair_lj_charmm_coul_charmm_implicit_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(lj/charmm/coul/charmm/implicit/omp,PairLJCharmmCoulCharmmImplicitOMP)
+
+#else
+
+#ifndef LMP_PAIR_LJ_CHARMM_COUL_CHARMM_IMPLICIT_OMP_H
+#define LMP_PAIR_LJ_CHARMM_COUL_CHARMM_IMPLICIT_OMP_H
+
+#include "pair_lj_charmm_coul_charmm_implicit.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairLJCharmmCoulCharmmImplicitOMP : public PairLJCharmmCoulCharmmImplicit, public ThrOMP {
+
+ public:
+  PairLJCharmmCoulCharmmImplicitOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(double **f, int ifrom, int ito, int tid);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_lj_charmm_coul_charmm_omp.cpp
similarity index 55%
copy from src/USER-OMP/pair_lj_cut_omp.cpp
copy to src/USER-OMP/pair_lj_charmm_coul_charmm_omp.cpp
index 8ed82c5e5..6dac7a17f 100644
--- a/src/USER-OMP/pair_lj_cut_omp.cpp
+++ b/src/USER-OMP/pair_lj_charmm_coul_charmm_omp.cpp
@@ -1,163 +1,213 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    This software is distributed under the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Axel Kohlmeyer (Temple U)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
-#include "pair_lj_cut_omp.h"
+#include "pair_lj_charmm_coul_charmm_omp.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
-#include "memory.h"
-#include "error.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
-PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) :
-  PairLJCut(lmp), ThrOMP(lmp, PAIR)
+PairLJCharmmCoulCharmmOMP::PairLJCharmmCoulCharmmOMP(LAMMPS *lmp) :
+  PairLJCharmmCoulCharmm(lmp), ThrOMP(lmp, PAIR)
 {
   respa_enable = 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
-void PairLJCutOMP::compute(int eflag, int vflag)
+void PairLJCharmmCoulCharmmOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
     ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
   const int nthreads = comm->nthreads;
   const int inum = list->inum;
 
 #if defined(_OPENMP)
 #pragma omp parallel default(shared)
 #endif
   {
     int ifrom, ito, tid;
     double **f;
 
     f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
 
     if (evflag) {
       if (eflag) {
 	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
 	else eval<1,1,0>(f, ifrom, ito, tid);
       } else {
 	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
 	else eval<1,0,0>(f, ifrom, ito, tid);
       }
     } else {
       if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
       else eval<0,0,0>(f, ifrom, ito, tid);
     }
 
     // reduce per thread forces into global force array.
-    force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid);
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
   } // end of omp parallel region
 
   // reduce per thread energy and virial, if requested.
   if (evflag) ev_reduce_thr(this);
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
+/* ---------------------------------------------------------------------- */
+
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairLJCharmmCoulCharmmOMP::eval(double **f, int iifrom, int iito, int tid)
 {
   int i,j,ii,jj,jnum,itype,jtype;
-  double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
-  double rsq,r2inv,r6inv,forcelj,factor_lj;
+  double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
+  double rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
+  double philj,switch1,switch2;
+  double invdenom_coul,invdenom_lj;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
-  evdwl = 0.0;
+  evdwl = ecoul = 0.0;
 
   double **x = atom->x;
+  double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
+  double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
+  double qqrd2e = force->qqrd2e;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
+  invdenom_coul = (denom_coul != 0.0) ? 1.0/denom_coul : 0.0;
+  invdenom_lj   = (denom_lj   != 0.0) ? 1.0/denom_lj   : 0.0;
 
   // loop over neighbors of my atoms
 
   for (ii = iifrom; ii < iito; ++ii) {
 
     i = ilist[ii];
+    qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
     fxtmp=fytmp=fztmp=0.0;
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
+      factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
-      if (rsq < cutsq[itype][jtype]) {
+      if (rsq < cut_bothsq) {
 	r2inv = 1.0/rsq;
-	r6inv = r2inv*r2inv*r2inv;
-	forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
-	fpair = factor_lj*forcelj*r2inv;
+
+	if (rsq < cut_coulsq) {
+	  forcecoul = qqrd2e * qtmp*q[j]*sqrt(r2inv);
+	  if (rsq > cut_coul_innersq) {
+	    switch1 = (cut_coulsq-rsq) * (cut_coulsq-rsq) *
+	      (cut_coulsq + 2.0*rsq - 3.0*cut_coul_innersq) * invdenom_coul;
+	    switch2 = 12.0*rsq * (cut_coulsq-rsq) * 
+	      (rsq-cut_coul_innersq) * invdenom_coul;
+	    forcecoul *= switch1 + switch2;
+	  }
+	  forcecoul *= factor_coul;
+	} else forcecoul = 0.0;
+
+	if (rsq < cut_ljsq) {
+	  r6inv = r2inv*r2inv*r2inv;
+	  jtype = type[j];
+	  forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
+	  if (rsq > cut_lj_innersq) {
+	    switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) *
+	      (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) * invdenom_lj;
+	    switch2 = 12.0*rsq * (cut_ljsq-rsq) * 
+	      (rsq-cut_lj_innersq) * invdenom_lj;
+	    philj = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]);
+	    forcelj = forcelj*switch1 + philj*switch2;
+	  }
+	  forcelj *= factor_lj;
+	} else forcelj = 0.0;
+
+	fpair = (forcecoul + forcelj) * r2inv;
 
 	fxtmp += delx*fpair;
 	fytmp += dely*fpair;
 	fztmp += delz*fpair;
 	if (NEWTON_PAIR || j < nlocal) {
 	  f[j][0] -= delx*fpair;
 	  f[j][1] -= dely*fpair;
 	  f[j][2] -= delz*fpair;
 	}
 
 	if (EFLAG) {
-	  evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype])
-	    - offset[itype][jtype];
-	  evdwl *= factor_lj;
+	  if (rsq < cut_coulsq) {
+	    ecoul = qqrd2e * qtmp*q[j]*sqrt(r2inv);
+	    if (rsq > cut_coul_innersq) {
+	      switch1 = (cut_coulsq-rsq) * (cut_coulsq-rsq) *
+		(cut_coulsq + 2.0*rsq - 3.0*cut_coul_innersq) *
+		invdenom_coul;
+	      ecoul *= switch1;
+	    }
+	    ecoul *= factor_coul;
+	  } else ecoul = 0.0;
+	  if (rsq < cut_ljsq) {
+	    evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]);
+	    if (rsq > cut_lj_innersq) {
+	      switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) *
+		(cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) * invdenom_lj;
+	      evdwl *= switch1;
+	    }
+	    evdwl *= factor_lj;
+	  } else evdwl = 0.0;
 	}
-
+	
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,0.0,fpair,delx,dely,delz,tid);
+				 evdwl,ecoul,fpair,delx,dely,delz,tid);
       }
     }
     f[i][0] += fxtmp;
     f[i][1] += fytmp;
     f[i][2] += fztmp;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
-double PairLJCutOMP::memory_usage()
+double PairLJCharmmCoulCharmmOMP::memory_usage()
 {
   double bytes = memory_usage_thr();
-  bytes += PairLJCut::memory_usage();
+  bytes += PairLJCharmmCoulCharmm::memory_usage();
 
   return bytes;
 }
diff --git a/src/USER-OMP/pair_lj_charmm_coul_charmm_omp.h b/src/USER-OMP/pair_lj_charmm_coul_charmm_omp.h
new file mode 100644
index 000000000..f2889b05f
--- /dev/null
+++ b/src/USER-OMP/pair_lj_charmm_coul_charmm_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(lj/charmm/coul/charmm/omp,PairLJCharmmCoulCharmmOMP)
+
+#else
+
+#ifndef LMP_PAIR_LJ_CHARMM_COUL_CHARMM_OMP_H
+#define LMP_PAIR_LJ_CHARMM_COUL_CHARMM_OMP_H
+
+#include "pair_lj_charmm_coul_charmm.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairLJCharmmCoulCharmmOMP : public PairLJCharmmCoulCharmm, public ThrOMP {
+
+ public:
+  PairLJCharmmCoulCharmmOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(double **f, int ifrom, int ito, int tid);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_lj_charmm_coul_long_omp.cpp
similarity index 50%
copy from src/USER-OMP/pair_lj_cut_omp.cpp
copy to src/USER-OMP/pair_lj_charmm_coul_long_omp.cpp
index 8ed82c5e5..c99f27f2e 100644
--- a/src/USER-OMP/pair_lj_cut_omp.cpp
+++ b/src/USER-OMP/pair_lj_charmm_coul_long_omp.cpp
@@ -1,163 +1,234 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    This software is distributed under the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Axel Kohlmeyer (Temple U)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
-#include "pair_lj_cut_omp.h"
+#include "pair_lj_charmm_coul_long_omp.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
-#include "memory.h"
-#include "error.h"
 
 using namespace LAMMPS_NS;
 
+#define EWALD_F   1.12837917
+#define EWALD_P   0.3275911
+#define A1        0.254829592
+#define A2       -0.284496736
+#define A3        1.421413741
+#define A4       -1.453152027
+#define A5        1.061405429
+
 /* ---------------------------------------------------------------------- */
 
-PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) :
-  PairLJCut(lmp), ThrOMP(lmp, PAIR)
+PairLJCharmmCoulLongOMP::PairLJCharmmCoulLongOMP(LAMMPS *lmp) :
+  PairLJCharmmCoulLong(lmp), ThrOMP(lmp, PAIR)
 {
   respa_enable = 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
-void PairLJCutOMP::compute(int eflag, int vflag)
+void PairLJCharmmCoulLongOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
     ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
   const int nthreads = comm->nthreads;
   const int inum = list->inum;
 
 #if defined(_OPENMP)
 #pragma omp parallel default(shared)
 #endif
   {
     int ifrom, ito, tid;
     double **f;
 
     f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
 
     if (evflag) {
       if (eflag) {
 	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
 	else eval<1,1,0>(f, ifrom, ito, tid);
       } else {
 	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
 	else eval<1,0,0>(f, ifrom, ito, tid);
       }
     } else {
       if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
       else eval<0,0,0>(f, ifrom, ito, tid);
     }
 
     // reduce per thread forces into global force array.
-    force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid);
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
   } // end of omp parallel region
 
   // reduce per thread energy and virial, if requested.
   if (evflag) ev_reduce_thr(this);
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
+/* ---------------------------------------------------------------------- */
+
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairLJCharmmCoulLongOMP::eval(double **f, int iifrom, int iito, int tid)
 {
-  int i,j,ii,jj,jnum,itype,jtype;
-  double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
-  double rsq,r2inv,r6inv,forcelj,factor_lj;
+  int i,j,ii,jj,jnum,itype,jtype,itable;
+  double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
+  double fraction,table;
+  double r,rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
+  double grij,expm2,prefactor,t,erfc;
+  double philj,switch1,switch2;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
-  evdwl = 0.0;
+  evdwl = ecoul = 0.0;
 
   double **x = atom->x;
+  double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
+  double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
+  double qqrd2e = force->qqrd2e;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = iifrom; ii < iito; ++ii) {
 
     i = ilist[ii];
+    qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
     fxtmp=fytmp=fztmp=0.0;
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
+      factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
 	r2inv = 1.0/rsq;
-	r6inv = r2inv*r2inv*r2inv;
-	forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
-	fpair = factor_lj*forcelj*r2inv;
+
+	if (rsq < cut_coulsq) {
+	  if (!ncoultablebits || rsq <= tabinnersq) {
+	    r = sqrt(rsq);
+	    grij = g_ewald * r;
+	    expm2 = exp(-grij*grij);
+	    t = 1.0 / (1.0 + EWALD_P*grij);
+	    erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
+	    prefactor = qqrd2e * qtmp*q[j]/r;
+	    forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
+	    if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
+	  } else {
+	    union_int_float_t rsq_lookup;
+	    rsq_lookup.f = rsq;
+	    itable = rsq_lookup.i & ncoulmask;
+	    itable >>= ncoulshiftbits;
+	    fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable];
+	    table = ftable[itable] + fraction*dftable[itable];
+	    forcecoul = qtmp*q[j] * table;
+	    if (factor_coul < 1.0) {
+	      table = ctable[itable] + fraction*dctable[itable];
+	      prefactor = qtmp*q[j] * table;
+	      forcecoul -= (1.0-factor_coul)*prefactor;
+	    }
+	  }
+	} else forcecoul = 0.0;
+
+	if (rsq < cut_ljsq) {
+	  r6inv = r2inv*r2inv*r2inv;
+	  jtype = type[j];
+	  forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
+	  if (rsq > cut_lj_innersq) {
+	    switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) *
+	      (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj;
+	    switch2 = 12.0*rsq * (cut_ljsq-rsq) * 
+	      (rsq-cut_lj_innersq) / denom_lj;
+	    philj = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]);
+	    forcelj = forcelj*switch1 + philj*switch2;
+	  }
+	  forcelj *= factor_lj;
+	} else forcelj = 0.0;
+
+	fpair = (forcecoul + forcelj) * r2inv;
 
 	fxtmp += delx*fpair;
 	fytmp += dely*fpair;
 	fztmp += delz*fpair;
 	if (NEWTON_PAIR || j < nlocal) {
 	  f[j][0] -= delx*fpair;
 	  f[j][1] -= dely*fpair;
 	  f[j][2] -= delz*fpair;
 	}
 
 	if (EFLAG) {
-	  evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype])
-	    - offset[itype][jtype];
-	  evdwl *= factor_lj;
+	  if (rsq < cut_coulsq) {
+	    if (!ncoultablebits || rsq <= tabinnersq)
+	      ecoul = prefactor*erfc;
+	    else {
+	      table = etable[itable] + fraction*detable[itable];
+	      ecoul = qtmp*q[j] * table;
+	    }
+	    if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
+	  } else ecoul = 0.0;
+
+	  if (rsq < cut_ljsq) {
+	    evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]);
+	    if (rsq > cut_lj_innersq) {
+	      switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) *
+		(cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj;
+	      evdwl *= switch1;
+	    }
+	    evdwl *= factor_lj;
+	  } else evdwl = 0.0;
 	}
-
+	
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,0.0,fpair,delx,dely,delz,tid);
+				 evdwl,ecoul,fpair,delx,dely,delz,tid);
       }
     }
     f[i][0] += fxtmp;
     f[i][1] += fytmp;
     f[i][2] += fztmp;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
-double PairLJCutOMP::memory_usage()
+double PairLJCharmmCoulLongOMP::memory_usage()
 {
   double bytes = memory_usage_thr();
-  bytes += PairLJCut::memory_usage();
+  bytes += PairLJCharmmCoulLong::memory_usage();
 
   return bytes;
 }
diff --git a/src/USER-OMP/pair_lj_charmm_coul_long_omp.h b/src/USER-OMP/pair_lj_charmm_coul_long_omp.h
new file mode 100644
index 000000000..b14e4c1fe
--- /dev/null
+++ b/src/USER-OMP/pair_lj_charmm_coul_long_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(lj/charmm/coul/long/omp,PairLJCharmmCoulLongOMP)
+
+#else
+
+#ifndef LMP_PAIR_LJ_CHARMM_COUL_LONG_OMP_H
+#define LMP_PAIR_LJ_CHARMM_COUL_LONG_OMP_H
+
+#include "pair_lj_charmm_coul_long.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairLJCharmmCoulLongOMP : public PairLJCharmmCoulLong, public ThrOMP {
+
+ public:
+  PairLJCharmmCoulLongOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(double **f, int ifrom, int ito, int tid);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_lj_class2_coul_cut_omp.cpp
similarity index 68%
copy from src/USER-OMP/pair_lj_cut_omp.cpp
copy to src/USER-OMP/pair_lj_class2_coul_cut_omp.cpp
index 8ed82c5e5..032188279 100644
--- a/src/USER-OMP/pair_lj_cut_omp.cpp
+++ b/src/USER-OMP/pair_lj_class2_coul_cut_omp.cpp
@@ -1,163 +1,185 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    This software is distributed under the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Axel Kohlmeyer (Temple U)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
-#include "pair_lj_cut_omp.h"
+#include "pair_lj_class2_coul_cut_omp.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
-#include "memory.h"
-#include "error.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
-PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) :
-  PairLJCut(lmp), ThrOMP(lmp, PAIR)
+PairLJClass2CoulCutOMP::PairLJClass2CoulCutOMP(LAMMPS *lmp) :
+  PairLJClass2CoulCut(lmp), ThrOMP(lmp, PAIR)
 {
   respa_enable = 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
-void PairLJCutOMP::compute(int eflag, int vflag)
+void PairLJClass2CoulCutOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
     ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
   const int nthreads = comm->nthreads;
   const int inum = list->inum;
 
 #if defined(_OPENMP)
 #pragma omp parallel default(shared)
 #endif
   {
     int ifrom, ito, tid;
     double **f;
 
     f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
 
     if (evflag) {
       if (eflag) {
 	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
 	else eval<1,1,0>(f, ifrom, ito, tid);
       } else {
 	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
 	else eval<1,0,0>(f, ifrom, ito, tid);
       }
     } else {
       if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
       else eval<0,0,0>(f, ifrom, ito, tid);
     }
 
     // reduce per thread forces into global force array.
-    force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid);
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
   } // end of omp parallel region
 
   // reduce per thread energy and virial, if requested.
   if (evflag) ev_reduce_thr(this);
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
+/* ---------------------------------------------------------------------- */
+
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairLJClass2CoulCutOMP::eval(double **f, int iifrom, int iito, int tid)
 {
   int i,j,ii,jj,jnum,itype,jtype;
-  double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
-  double rsq,r2inv,r6inv,forcelj,factor_lj;
+  double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
+  double rsq,rinv,r2inv,r3inv,r6inv,forcecoul,forcelj;
+  double factor_coul,factor_lj;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
-  evdwl = 0.0;
+  evdwl = ecoul = 0.0;
 
   double **x = atom->x;
+  double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
+  double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
+  double qqrd2e = force->qqrd2e;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = iifrom; ii < iito; ++ii) {
 
     i = ilist[ii];
+    qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
     fxtmp=fytmp=fztmp=0.0;
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
+      factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
 	r2inv = 1.0/rsq;
-	r6inv = r2inv*r2inv*r2inv;
-	forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
-	fpair = factor_lj*forcelj*r2inv;
+	rinv = sqrt(r2inv);
+
+	if (rsq < cut_coulsq[itype][jtype]) {
+	  forcecoul = qqrd2e * qtmp*q[j]*rinv;
+	  forcecoul *= factor_coul;
+	} else forcecoul = 0.0;
+
+	if (rsq < cut_ljsq[itype][jtype]) {
+	  r3inv = r2inv*rinv;
+	  r6inv = r3inv*r3inv;
+	  forcelj = r6inv * (lj1[itype][jtype]*r3inv - lj2[itype][jtype]);
+	  forcelj *= factor_lj;
+	} else forcelj = 0.0;
+
+	fpair = (forcecoul + forcelj) * r2inv;
 
 	fxtmp += delx*fpair;
 	fytmp += dely*fpair;
 	fztmp += delz*fpair;
 	if (NEWTON_PAIR || j < nlocal) {
 	  f[j][0] -= delx*fpair;
 	  f[j][1] -= dely*fpair;
 	  f[j][2] -= delz*fpair;
 	}
 
 	if (EFLAG) {
-	  evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype])
-	    - offset[itype][jtype];
-	  evdwl *= factor_lj;
+	  if (rsq < cut_coulsq[itype][jtype])
+	    ecoul = factor_coul * qqrd2e * qtmp*q[j]*rinv;
+	  else ecoul = 0.0;
+	  if (rsq < cut_ljsq[itype][jtype]) {
+	    evdwl = r6inv*(lj3[itype][jtype]*r3inv-lj4[itype][jtype]) -
+	      offset[itype][jtype];
+	    evdwl *= factor_lj;
+	  } else evdwl = 0.0;
 	}
-
+	
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,0.0,fpair,delx,dely,delz,tid);
+				 evdwl,ecoul,fpair,delx,dely,delz,tid);
       }
     }
     f[i][0] += fxtmp;
     f[i][1] += fytmp;
     f[i][2] += fztmp;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
-double PairLJCutOMP::memory_usage()
+double PairLJClass2CoulCutOMP::memory_usage()
 {
   double bytes = memory_usage_thr();
-  bytes += PairLJCut::memory_usage();
+  bytes += PairLJClass2CoulCut::memory_usage();
 
   return bytes;
 }
diff --git a/src/USER-OMP/pair_lj_class2_coul_cut_omp.h b/src/USER-OMP/pair_lj_class2_coul_cut_omp.h
new file mode 100644
index 000000000..5fe489569
--- /dev/null
+++ b/src/USER-OMP/pair_lj_class2_coul_cut_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(lj/class2/coul/cut/omp,PairLJClass2CoulCutOMP)
+
+#else
+
+#ifndef LMP_PAIR_LJ_CLASS2_COUL_CUT_OMP_H
+#define LMP_PAIR_LJ_CLASS2_COUL_CUT_OMP_H
+
+#include "pair_lj_class2_coul_cut.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairLJClass2CoulCutOMP : public PairLJClass2CoulCut, public ThrOMP {
+
+ public:
+  PairLJClass2CoulCutOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(double **f, int ifrom, int ito, int tid);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_lj_class2_coul_long_omp.cpp
similarity index 62%
copy from src/USER-OMP/pair_lj_cut_omp.cpp
copy to src/USER-OMP/pair_lj_class2_coul_long_omp.cpp
index 8ed82c5e5..84d26ceb1 100644
--- a/src/USER-OMP/pair_lj_cut_omp.cpp
+++ b/src/USER-OMP/pair_lj_class2_coul_long_omp.cpp
@@ -1,163 +1,201 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    This software is distributed under the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Axel Kohlmeyer (Temple U)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
-#include "pair_lj_cut_omp.h"
+#include "pair_lj_class2_coul_long_omp.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
-#include "memory.h"
-#include "error.h"
 
 using namespace LAMMPS_NS;
 
+#define EWALD_F   1.12837917
+#define EWALD_P   0.3275911
+#define A1        0.254829592
+#define A2       -0.284496736
+#define A3        1.421413741
+#define A4       -1.453152027
+#define A5        1.061405429
+
 /* ---------------------------------------------------------------------- */
 
-PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) :
-  PairLJCut(lmp), ThrOMP(lmp, PAIR)
+PairLJClass2CoulLongOMP::PairLJClass2CoulLongOMP(LAMMPS *lmp) :
+  PairLJClass2CoulLong(lmp), ThrOMP(lmp, PAIR)
 {
   respa_enable = 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
-void PairLJCutOMP::compute(int eflag, int vflag)
+void PairLJClass2CoulLongOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
     ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
   const int nthreads = comm->nthreads;
   const int inum = list->inum;
 
 #if defined(_OPENMP)
 #pragma omp parallel default(shared)
 #endif
   {
     int ifrom, ito, tid;
     double **f;
 
     f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
 
     if (evflag) {
       if (eflag) {
 	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
 	else eval<1,1,0>(f, ifrom, ito, tid);
       } else {
 	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
 	else eval<1,0,0>(f, ifrom, ito, tid);
       }
     } else {
       if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
       else eval<0,0,0>(f, ifrom, ito, tid);
     }
 
     // reduce per thread forces into global force array.
-    force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid);
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
   } // end of omp parallel region
 
   // reduce per thread energy and virial, if requested.
   if (evflag) ev_reduce_thr(this);
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
+/* ---------------------------------------------------------------------- */
+
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairLJClass2CoulLongOMP::eval(double **f, int iifrom, int iito, int tid)
 {
   int i,j,ii,jj,jnum,itype,jtype;
-  double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
-  double rsq,r2inv,r6inv,forcelj,factor_lj;
+  double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
+  double r,rsq,rinv,r2inv,r3inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
+  double grij,expm2,prefactor,t,erfc;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
-  evdwl = 0.0;
+  evdwl = ecoul = 0.0;
 
   double **x = atom->x;
+  double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
+  double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
+  double qqrd2e = force->qqrd2e;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = iifrom; ii < iito; ++ii) {
 
     i = ilist[ii];
+    qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
     fxtmp=fytmp=fztmp=0.0;
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
+      factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
 	r2inv = 1.0/rsq;
-	r6inv = r2inv*r2inv*r2inv;
-	forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
-	fpair = factor_lj*forcelj*r2inv;
+
+	if (rsq < cut_coulsq) {
+	  r = sqrt(rsq);
+	  grij = g_ewald * r;
+	  expm2 = exp(-grij*grij);
+	  t = 1.0 / (1.0 + EWALD_P*grij);
+	  erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
+	  prefactor = qqrd2e * qtmp*q[j]/r;
+	  forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
+	  if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
+	} else forcecoul = 0.0;
+
+	if (rsq < cut_ljsq[itype][jtype]) {
+	  rinv = sqrt(r2inv);
+	  r3inv = r2inv*rinv;
+	  r6inv = r3inv*r3inv;
+	  forcelj = r6inv * (lj1[itype][jtype]*r3inv - lj2[itype][jtype]);
+	  forcelj *= factor_lj;
+	} else forcelj = 0.0;
+
+	fpair = (forcecoul + forcelj) * r2inv;
 
 	fxtmp += delx*fpair;
 	fytmp += dely*fpair;
 	fztmp += delz*fpair;
 	if (NEWTON_PAIR || j < nlocal) {
 	  f[j][0] -= delx*fpair;
 	  f[j][1] -= dely*fpair;
 	  f[j][2] -= delz*fpair;
 	}
 
 	if (EFLAG) {
-	  evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype])
-	    - offset[itype][jtype];
-	  evdwl *= factor_lj;
+	  if (rsq < cut_coulsq) {
+	    ecoul = prefactor*erfc;
+	    if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
+	  } else ecoul = 0.0;
+
+	  if (rsq < cut_ljsq[itype][jtype]) {
+	    evdwl = r6inv*(lj3[itype][jtype]*r3inv-lj4[itype][jtype]) -
+	      offset[itype][jtype];
+	    evdwl *= factor_lj;
+	  } else evdwl = 0.0;
 	}
-
+	
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,0.0,fpair,delx,dely,delz,tid);
+				 evdwl,ecoul,fpair,delx,dely,delz,tid);
       }
     }
     f[i][0] += fxtmp;
     f[i][1] += fytmp;
     f[i][2] += fztmp;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
-double PairLJCutOMP::memory_usage()
+double PairLJClass2CoulLongOMP::memory_usage()
 {
   double bytes = memory_usage_thr();
-  bytes += PairLJCut::memory_usage();
+  bytes += PairLJClass2CoulLong::memory_usage();
 
   return bytes;
 }
diff --git a/src/USER-OMP/pair_lj_class2_coul_long_omp.h b/src/USER-OMP/pair_lj_class2_coul_long_omp.h
new file mode 100644
index 000000000..da4ac3680
--- /dev/null
+++ b/src/USER-OMP/pair_lj_class2_coul_long_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(lj/class2/coul/long/omp,PairLJClass2CoulLongOMP)
+
+#else
+
+#ifndef LMP_PAIR_LJ_CLASS2_COUL_LONG_OMP_H
+#define LMP_PAIR_LJ_CLASS2_COUL_LONG_OMP_H
+
+#include "pair_lj_class2_coul_long.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairLJClass2CoulLongOMP : public PairLJClass2CoulLong, public ThrOMP {
+
+ public:
+  PairLJClass2CoulLongOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(double **f, int ifrom, int ito, int tid);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_lj_class2_omp.cpp
similarity index 86%
copy from src/USER-OMP/pair_lj_cut_omp.cpp
copy to src/USER-OMP/pair_lj_class2_omp.cpp
index 8ed82c5e5..4f5d2550f 100644
--- a/src/USER-OMP/pair_lj_cut_omp.cpp
+++ b/src/USER-OMP/pair_lj_class2_omp.cpp
@@ -1,163 +1,162 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    This software is distributed under the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Axel Kohlmeyer (Temple U)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
-#include "pair_lj_cut_omp.h"
+#include "pair_lj_class2_omp.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
-#include "memory.h"
-#include "error.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
-PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) :
-  PairLJCut(lmp), ThrOMP(lmp, PAIR)
+PairLJClass2OMP::PairLJClass2OMP(LAMMPS *lmp) :
+  PairLJClass2(lmp), ThrOMP(lmp, PAIR)
 {
   respa_enable = 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
-void PairLJCutOMP::compute(int eflag, int vflag)
+void PairLJClass2OMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
     ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
   const int nthreads = comm->nthreads;
   const int inum = list->inum;
 
 #if defined(_OPENMP)
 #pragma omp parallel default(shared)
 #endif
   {
     int ifrom, ito, tid;
     double **f;
 
     f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
 
     if (evflag) {
       if (eflag) {
 	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
 	else eval<1,1,0>(f, ifrom, ito, tid);
       } else {
 	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
 	else eval<1,0,0>(f, ifrom, ito, tid);
       }
     } else {
       if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
       else eval<0,0,0>(f, ifrom, ito, tid);
     }
 
     // reduce per thread forces into global force array.
-    force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid);
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
   } // end of omp parallel region
 
   // reduce per thread energy and virial, if requested.
   if (evflag) ev_reduce_thr(this);
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairLJClass2OMP::eval(double **f, int iifrom, int iito, int tid)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
-  double rsq,r2inv,r6inv,forcelj,factor_lj;
+  double rsq,r2inv,r3inv,r6inv,forcelj,factor_lj;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   evdwl = 0.0;
 
   double **x = atom->x;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_lj = force->special_lj;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = iifrom; ii < iito; ++ii) {
 
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
     fxtmp=fytmp=fztmp=0.0;
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
 	r2inv = 1.0/rsq;
 	r6inv = r2inv*r2inv*r2inv;
-	forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
+	r3inv = sqrt(r6inv);
+
+	forcelj = r6inv * (lj1[itype][jtype]*r3inv - lj2[itype][jtype]);
 	fpair = factor_lj*forcelj*r2inv;
 
 	fxtmp += delx*fpair;
 	fytmp += dely*fpair;
 	fztmp += delz*fpair;
 	if (NEWTON_PAIR || j < nlocal) {
 	  f[j][0] -= delx*fpair;
 	  f[j][1] -= dely*fpair;
 	  f[j][2] -= delz*fpair;
 	}
 
 	if (EFLAG) {
-	  evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype])
+	  evdwl = r6inv*(lj3[itype][jtype]*r3inv-lj4[itype][jtype])
 	    - offset[itype][jtype];
 	  evdwl *= factor_lj;
 	}
 
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
 				 evdwl,0.0,fpair,delx,dely,delz,tid);
       }
     }
     f[i][0] += fxtmp;
     f[i][1] += fytmp;
     f[i][2] += fztmp;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
-double PairLJCutOMP::memory_usage()
+double PairLJClass2OMP::memory_usage()
 {
   double bytes = memory_usage_thr();
-  bytes += PairLJCut::memory_usage();
+  bytes += PairLJClass2::memory_usage();
 
   return bytes;
 }
diff --git a/src/USER-OMP/pair_lj_class2_omp.h b/src/USER-OMP/pair_lj_class2_omp.h
new file mode 100644
index 000000000..cfe24bb71
--- /dev/null
+++ b/src/USER-OMP/pair_lj_class2_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(lj/class2/omp,PairLJClass2OMP)
+
+#else
+
+#ifndef LMP_PAIR_LJ_CLASS2_OMP_H
+#define LMP_PAIR_LJ_CLASS2_OMP_H
+
+#include "pair_lj_class2.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairLJClass2OMP : public PairLJClass2, public ThrOMP {
+
+ public:
+  PairLJClass2OMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(double **f, int ifrom, int ito, int tid);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_lj_coul_omp.cpp b/src/USER-OMP/pair_lj_coul_omp.cpp
new file mode 100644
index 000000000..23e2a8d90
--- /dev/null
+++ b/src/USER-OMP/pair_lj_coul_omp.cpp
@@ -0,0 +1,234 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   This software is distributed under the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "pair_lj_coul_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "math_vector.h"
+#include "force.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+
+using namespace LAMMPS_NS;
+
+#define EWALD_F   1.12837917
+#define EWALD_P   0.3275911
+#define A1        0.254829592
+#define A2       -0.284496736
+#define A3        1.421413741
+#define A4       -1.453152027
+#define A5        1.061405429
+
+/* ---------------------------------------------------------------------- */
+
+PairLJCoulOMP::PairLJCoulOMP(LAMMPS *lmp) :
+  PairLJCoul(lmp), ThrOMP(lmp, PAIR)
+{
+  respa_enable = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairLJCoulOMP::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+    ev_setup_thr(this);
+  } else evflag = vflag_fdotr = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = list->inum;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(shared)
+#endif
+  {
+    int ifrom, ito, tid;
+    double **f;
+
+    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
+	else eval<1,1,0>(f, ifrom, ito, tid);
+      } else {
+	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
+	else eval<1,0,0>(f, ifrom, ito, tid);
+      }
+    } else {
+      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
+      else eval<0,0,0>(f, ifrom, ito, tid);
+    }
+
+    // reduce per thread forces into global force array.
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+  } // end of omp parallel region
+
+  // reduce per thread energy and virial, if requested.
+  if (evflag) ev_reduce_thr(this);
+  if (vflag_fdotr) virial_fdotr_compute();
+}
+
+/* ---------------------------------------------------------------------- */
+
+template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+void PairLJCoulOMP::eval(double **f, int iifrom, int iito, int tid)
+{
+  double evdwl,ecoul,fpair;
+  evdwl = ecoul = 0.0;
+
+  double **x = atom->x;
+  double *q = atom->q;
+  int *type = atom->type;
+  int nlocal = atom->nlocal;
+  double *special_coul = force->special_coul;
+  double *special_lj = force->special_lj;
+  double qqrd2e = force->qqrd2e;
+
+  double *x0 = x[0];
+  double *f0 = f[0], *fi = f0;
+
+  int *ilist = list->ilist;
+
+  // loop over neighbors of my atoms
+
+  int i, ii, j, order1 = ewald_order&(1<<1), order6 = ewald_order&(1<<6);
+  int *jneigh, *jneighn, typei, typej, ni;
+  double qi, qri, *cutsqi, *cut_ljsqi, *lj1i, *lj2i, *lj3i, *lj4i, *offseti;
+  double rsq, r2inv, force_coul, force_lj;
+  double g2 = g_ewald*g_ewald, g6 = g2*g2*g2, g8 = g6*g2;
+  vector xi, d;
+
+  for (ii = iifrom; ii < iito; ++ii) {			// loop over my atoms
+    i = ilist[ii]; fi = f0+3*i;
+    if (order1) qri = (qi = q[i])*qqrd2e;		// initialize constants
+    offseti = offset[typei = type[i]];
+    lj1i = lj1[typei]; lj2i = lj2[typei]; lj3i = lj3[typei]; lj4i = lj4[typei];
+    cutsqi = cutsq[typei]; cut_ljsqi = cut_ljsq[typei];
+    memcpy(xi, x0+(i+(i<<1)), sizeof(vector));
+    jneighn = (jneigh = list->firstneigh[i])+list->numneigh[i];
+
+    for (; jneigh<jneighn; ++jneigh) {			// loop over neighbors
+      j = *jneigh;
+      ni = sbmask(j);
+      j &= NEIGHMASK;
+      
+      { register double *xj = x0+(j+(j<<1));
+	d[0] = xi[0] - xj[0];				// pair vector
+	d[1] = xi[1] - xj[1];
+	d[2] = xi[2] - xj[2]; }
+
+      if ((rsq = vec_dot(d, d)) >= cutsqi[typej = type[j]]) continue;
+      r2inv = 1.0/rsq;
+
+      if (order1 && (rsq < cut_coulsq)) {		// coulombic
+	if (!ncoultablebits || rsq <= tabinnersq) {	// series real space
+	  register double r = sqrt(rsq), x = g_ewald*r;
+	  register double s = qri*q[j], t = 1.0/(1.0+EWALD_P*x);
+	  if (ni == 0) {
+	    s *= g_ewald*exp(-x*x);
+	    force_coul = (t *= ((((t*A5+A4)*t+A3)*t+A2)*t+A1)*s/x)+EWALD_F*s;
+	    if (EFLAG) ecoul = t;
+	  }
+	  else {					// special case
+	    r = s*(1.0-special_coul[ni])/r; s *= g_ewald*exp(-x*x);
+	    force_coul = (t *= ((((t*A5+A4)*t+A3)*t+A2)*t+A1)*s/x)+EWALD_F*s-r;
+	    if (EFLAG) ecoul = t-r;
+	  }
+	}						// table real space
+	else {
+	  register union_int_float_t t;
+	  t.f = rsq;
+	  register const int k = (t.i & ncoulmask)>>ncoulshiftbits;
+	  register double f = (rsq-rtable[k])*drtable[k], qiqj = qi*q[j];
+	  if (ni == 0) {
+	    force_coul = qiqj*(ftable[k]+f*dftable[k]);
+	    if (EFLAG) ecoul = qiqj*(etable[k]+f*detable[k]);
+	  }
+	  else {					// special case
+	    t.f = (1.0-special_coul[ni])*(ctable[k]+f*dctable[k]);
+	    force_coul = qiqj*(ftable[k]+f*dftable[k]-t.f);
+	    if (EFLAG) ecoul = qiqj*(etable[k]+f*detable[k]-t.f);
+	  }
+	}
+      }
+      else force_coul = ecoul = 0.0;
+
+      if (rsq < cut_ljsqi[typej]) {			// lj
+       	if (order6) {					// long-range lj
+	  register double rn = r2inv*r2inv*r2inv;
+	  register double x2 = g2*rsq, a2 = 1.0/x2;
+	  x2 = a2*exp(-x2)*lj4i[typej];
+	  if (ni == 0) {
+	    force_lj =
+	      (rn*=rn)*lj1i[typej]-g8*(((6.0*a2+6.0)*a2+3.0)*a2+1.0)*x2*rsq;
+	    if (EFLAG)
+	      evdwl = rn*lj3i[typej]-g6*((a2+1.0)*a2+0.5)*x2;
+	  }
+	  else {					// special case
+	    register double f = special_lj[ni], t = rn*(1.0-f);
+	    force_lj = f*(rn *= rn)*lj1i[typej]-
+	      g8*(((6.0*a2+6.0)*a2+3.0)*a2+1.0)*x2*rsq+t*lj2i[typej];
+	    if (EFLAG) 
+	      evdwl = f*rn*lj3i[typej]-g6*((a2+1.0)*a2+0.5)*x2+t*lj4i[typej];
+	  }
+	}
+	else {						// cut lj
+	  register double rn = r2inv*r2inv*r2inv;
+	  if (ni == 0) {
+	    force_lj = rn*(rn*lj1i[typej]-lj2i[typej]);
+	    if (EFLAG) evdwl = rn*(rn*lj3i[typej]-lj4i[typej])-offseti[typej];
+	  }
+	  else {					// special case
+	    register double f = special_lj[ni];
+	    force_lj = f*rn*(rn*lj1i[typej]-lj2i[typej]);
+	    if (EFLAG)
+	      evdwl = f * (rn*(rn*lj3i[typej]-lj4i[typej])-offseti[typej]);
+	  }
+	}
+      }
+      else force_lj = evdwl = 0.0;
+
+      fpair = (force_coul+force_lj)*r2inv;
+
+      if (NEWTON_PAIR || j < nlocal) {
+	register double *fj = f0+(j+(j<<1)), f;
+	fi[0] += f = d[0]*fpair; fj[0] -= f;
+	fi[1] += f = d[1]*fpair; fj[1] -= f;
+	fi[2] += f = d[2]*fpair; fj[2] -= f;
+      }
+      else {
+	fi[0] += d[0]*fpair;
+	fi[1] += d[1]*fpair;
+	fi[2] += d[2]*fpair;
+      }
+      
+      if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,
+			       evdwl,ecoul,fpair,d[0],d[1],d[2],tid);
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairLJCoulOMP::memory_usage()
+{
+  double bytes = memory_usage_thr();
+  bytes += PairLJCoul::memory_usage();
+
+  return bytes;
+}
diff --git a/src/USER-OMP/pair_lj_coul_omp.h b/src/USER-OMP/pair_lj_coul_omp.h
new file mode 100644
index 000000000..619e609ba
--- /dev/null
+++ b/src/USER-OMP/pair_lj_coul_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(lj/coul/omp,PairLJCoulOMP)
+
+#else
+
+#ifndef LMP_PAIR_LJ_COUL_OMP_H
+#define LMP_PAIR_LJ_COUL_OMP_H
+
+#include "pair_lj_coul.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairLJCoulOMP : public PairLJCoul, public ThrOMP {
+
+ public:
+  PairLJCoulOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(double **f, int ifrom, int ito, int tid);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_lj_cubic_omp.cpp
similarity index 79%
copy from src/USER-OMP/pair_lj_cut_omp.cpp
copy to src/USER-OMP/pair_lj_cubic_omp.cpp
index 8ed82c5e5..4f806bd71 100644
--- a/src/USER-OMP/pair_lj_cut_omp.cpp
+++ b/src/USER-OMP/pair_lj_cubic_omp.cpp
@@ -1,163 +1,173 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    This software is distributed under the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Axel Kohlmeyer (Temple U)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
-#include "pair_lj_cut_omp.h"
+#include "pair_lj_cubic_omp.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
-#include "memory.h"
-#include "error.h"
 
 using namespace LAMMPS_NS;
+using namespace PairLJCubicConstants;
 
 /* ---------------------------------------------------------------------- */
 
-PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) :
-  PairLJCut(lmp), ThrOMP(lmp, PAIR)
+PairLJCubicOMP::PairLJCubicOMP(LAMMPS *lmp) :
+  PairLJCubic(lmp), ThrOMP(lmp, PAIR)
 {
   respa_enable = 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
-void PairLJCutOMP::compute(int eflag, int vflag)
+void PairLJCubicOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
     ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
   const int nthreads = comm->nthreads;
   const int inum = list->inum;
 
 #if defined(_OPENMP)
 #pragma omp parallel default(shared)
 #endif
   {
     int ifrom, ito, tid;
     double **f;
 
     f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
 
     if (evflag) {
       if (eflag) {
 	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
 	else eval<1,1,0>(f, ifrom, ito, tid);
       } else {
 	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
 	else eval<1,0,0>(f, ifrom, ito, tid);
       }
     } else {
       if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
       else eval<0,0,0>(f, ifrom, ito, tid);
     }
 
     // reduce per thread forces into global force array.
-    force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid);
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
   } // end of omp parallel region
 
   // reduce per thread energy and virial, if requested.
   if (evflag) ev_reduce_thr(this);
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairLJCubicOMP::eval(double **f, int iifrom, int iito, int tid)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
   double rsq,r2inv,r6inv,forcelj,factor_lj;
+  double r,t,rmin;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   evdwl = 0.0;
 
   double **x = atom->x;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_lj = force->special_lj;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = iifrom; ii < iito; ++ii) {
 
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
     fxtmp=fytmp=fztmp=0.0;
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
 	r2inv = 1.0/rsq;
-	r6inv = r2inv*r2inv*r2inv;
-	forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
+        if (rsq <= cut_inner_sq[itype][jtype]) {
+	  r6inv = r2inv*r2inv*r2inv;
+	  forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
+	} else {
+	  r = sqrt(rsq); 
+	  rmin = sigma[itype][jtype]*RT6TWO;
+	  t = (r - cut_inner[itype][jtype])/rmin;
+	  forcelj = epsilon[itype][jtype]*(-DPHIDS + A3*t*t/2.0)*r/rmin;
+        }
 	fpair = factor_lj*forcelj*r2inv;
 
 	fxtmp += delx*fpair;
 	fytmp += dely*fpair;
 	fztmp += delz*fpair;
 	if (NEWTON_PAIR || j < nlocal) {
 	  f[j][0] -= delx*fpair;
 	  f[j][1] -= dely*fpair;
 	  f[j][2] -= delz*fpair;
 	}
 
 	if (EFLAG) {
-	  evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype])
-	    - offset[itype][jtype];
+          if (rsq <= cut_inner_sq[itype][jtype])
+	    evdwl = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]); 
+	  else
+	    evdwl = epsilon[itype][jtype]*
+	      (PHIS + DPHIDS*t - A3*t*t*t/6.0);
+
 	  evdwl *= factor_lj;
 	}
 
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
 				 evdwl,0.0,fpair,delx,dely,delz,tid);
       }
     }
     f[i][0] += fxtmp;
     f[i][1] += fytmp;
     f[i][2] += fztmp;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
-double PairLJCutOMP::memory_usage()
+double PairLJCubicOMP::memory_usage()
 {
   double bytes = memory_usage_thr();
-  bytes += PairLJCut::memory_usage();
+  bytes += PairLJCubic::memory_usage();
 
   return bytes;
 }
diff --git a/src/USER-OMP/pair_lj_cubic_omp.h b/src/USER-OMP/pair_lj_cubic_omp.h
new file mode 100644
index 000000000..559a6125a
--- /dev/null
+++ b/src/USER-OMP/pair_lj_cubic_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(lj/cubic/omp,PairLJCubicOMP)
+
+#else
+
+#ifndef LMP_PAIR_LJ_CUBIC_OMP_H
+#define LMP_PAIR_LJ_CUBIC_OMP_H
+
+#include "pair_lj_cubic.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairLJCubicOMP : public PairLJCubic, public ThrOMP {
+
+ public:
+  PairLJCubicOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(double **f, int ifrom, int ito, int tid);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_lj_cut_coul_cut_omp.cpp
similarity index 69%
copy from src/USER-OMP/pair_lj_cut_omp.cpp
copy to src/USER-OMP/pair_lj_cut_coul_cut_omp.cpp
index 8ed82c5e5..be98ec38f 100644
--- a/src/USER-OMP/pair_lj_cut_omp.cpp
+++ b/src/USER-OMP/pair_lj_cut_coul_cut_omp.cpp
@@ -1,163 +1,183 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    This software is distributed under the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Axel Kohlmeyer (Temple U)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
-#include "pair_lj_cut_omp.h"
+#include "pair_lj_cut_coul_cut_omp.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
-#include "memory.h"
-#include "error.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
-PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) :
-  PairLJCut(lmp), ThrOMP(lmp, PAIR)
+PairLJCutCoulCutOMP::PairLJCutCoulCutOMP(LAMMPS *lmp) :
+  PairLJCutCoulCut(lmp), ThrOMP(lmp, PAIR)
 {
   respa_enable = 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
-void PairLJCutOMP::compute(int eflag, int vflag)
+void PairLJCutCoulCutOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
     ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
   const int nthreads = comm->nthreads;
   const int inum = list->inum;
 
 #if defined(_OPENMP)
 #pragma omp parallel default(shared)
 #endif
   {
     int ifrom, ito, tid;
     double **f;
 
     f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
 
     if (evflag) {
       if (eflag) {
 	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
 	else eval<1,1,0>(f, ifrom, ito, tid);
       } else {
 	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
 	else eval<1,0,0>(f, ifrom, ito, tid);
       }
     } else {
       if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
       else eval<0,0,0>(f, ifrom, ito, tid);
     }
 
     // reduce per thread forces into global force array.
-    force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid);
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
   } // end of omp parallel region
 
   // reduce per thread energy and virial, if requested.
   if (evflag) ev_reduce_thr(this);
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
+/* ---------------------------------------------------------------------- */
+
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairLJCutCoulCutOMP::eval(double **f, int iifrom, int iito, int tid)
 {
   int i,j,ii,jj,jnum,itype,jtype;
-  double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
-  double rsq,r2inv,r6inv,forcelj,factor_lj;
+  double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
+  double rsq,rinv,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
-  evdwl = 0.0;
+  evdwl = ecoul = 0.0;
 
   double **x = atom->x;
+  double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
+  double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
+  double qqrd2e = force->qqrd2e;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = iifrom; ii < iito; ++ii) {
 
     i = ilist[ii];
+    qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
     fxtmp=fytmp=fztmp=0.0;
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
+      factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
 	r2inv = 1.0/rsq;
-	r6inv = r2inv*r2inv*r2inv;
-	forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
-	fpair = factor_lj*forcelj*r2inv;
+
+	if (rsq < cut_coulsq[itype][jtype]) {
+	  rinv = sqrt(r2inv);
+	  forcecoul = qqrd2e * qtmp*q[j]*rinv;
+	  forcecoul *= factor_coul;
+	} else forcecoul = 0.0;
+
+	if (rsq < cut_ljsq[itype][jtype]) {
+	  r6inv = r2inv*r2inv*r2inv;
+	  forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
+	  forcelj *= factor_lj;
+	} else forcelj = 0.0;
+
+	fpair = (forcecoul + forcelj) * r2inv;
 
 	fxtmp += delx*fpair;
 	fytmp += dely*fpair;
 	fztmp += delz*fpair;
 	if (NEWTON_PAIR || j < nlocal) {
 	  f[j][0] -= delx*fpair;
 	  f[j][1] -= dely*fpair;
 	  f[j][2] -= delz*fpair;
 	}
 
 	if (EFLAG) {
-	  evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype])
-	    - offset[itype][jtype];
-	  evdwl *= factor_lj;
-	}
+	  if (rsq < cut_coulsq[itype][jtype])
+	    ecoul = factor_coul * qqrd2e * qtmp*q[j]*rinv;
+	  else ecoul = 0.0;
+	  if (rsq < cut_ljsq[itype][jtype]) {
+	    evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) -
+	      offset[itype][jtype];
+	    evdwl *= factor_lj;
+	  }
+	} else evdwl = 0.0;
 
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,0.0,fpair,delx,dely,delz,tid);
+				 evdwl,ecoul,fpair,delx,dely,delz,tid);
       }
     }
     f[i][0] += fxtmp;
     f[i][1] += fytmp;
     f[i][2] += fztmp;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
-double PairLJCutOMP::memory_usage()
+double PairLJCutCoulCutOMP::memory_usage()
 {
   double bytes = memory_usage_thr();
-  bytes += PairLJCut::memory_usage();
+  bytes += PairLJCutCoulCut::memory_usage();
 
   return bytes;
 }
diff --git a/src/USER-OMP/pair_lj_cut_coul_cut_omp.h b/src/USER-OMP/pair_lj_cut_coul_cut_omp.h
new file mode 100644
index 000000000..c8c34e259
--- /dev/null
+++ b/src/USER-OMP/pair_lj_cut_coul_cut_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(lj/cut/coul/cut/omp,PairLJCutCoulCutOMP)
+
+#else
+
+#ifndef LMP_PAIR_LJ_CUT_COUL_CUT_OMP_H
+#define LMP_PAIR_LJ_CUT_COUL_CUT_OMP_H
+
+#include "pair_lj_cut_coul_cut.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairLJCutCoulCutOMP : public PairLJCutCoulCut, public ThrOMP {
+
+ public:
+  PairLJCutCoulCutOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(double **f, int ifrom, int ito, int tid);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_lj_cut_coul_debye_omp.cpp
similarity index 67%
copy from src/USER-OMP/pair_lj_cut_omp.cpp
copy to src/USER-OMP/pair_lj_cut_coul_debye_omp.cpp
index 8ed82c5e5..13a4a1906 100644
--- a/src/USER-OMP/pair_lj_cut_omp.cpp
+++ b/src/USER-OMP/pair_lj_cut_coul_debye_omp.cpp
@@ -1,163 +1,186 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    This software is distributed under the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Axel Kohlmeyer (Temple U)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
-#include "pair_lj_cut_omp.h"
+#include "pair_lj_cut_coul_debye_omp.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
-#include "memory.h"
-#include "error.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
-PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) :
-  PairLJCut(lmp), ThrOMP(lmp, PAIR)
+PairLJCutCoulDebyeOMP::PairLJCutCoulDebyeOMP(LAMMPS *lmp) :
+  PairLJCutCoulDebye(lmp), ThrOMP(lmp, PAIR)
 {
   respa_enable = 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
-void PairLJCutOMP::compute(int eflag, int vflag)
+void PairLJCutCoulDebyeOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
     ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
   const int nthreads = comm->nthreads;
   const int inum = list->inum;
 
 #if defined(_OPENMP)
 #pragma omp parallel default(shared)
 #endif
   {
     int ifrom, ito, tid;
     double **f;
 
     f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
 
     if (evflag) {
       if (eflag) {
 	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
 	else eval<1,1,0>(f, ifrom, ito, tid);
       } else {
 	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
 	else eval<1,0,0>(f, ifrom, ito, tid);
       }
     } else {
       if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
       else eval<0,0,0>(f, ifrom, ito, tid);
     }
 
     // reduce per thread forces into global force array.
-    force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid);
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
   } // end of omp parallel region
 
   // reduce per thread energy and virial, if requested.
   if (evflag) ev_reduce_thr(this);
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
+/* ---------------------------------------------------------------------- */
+
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairLJCutCoulDebyeOMP::eval(double **f, int iifrom, int iito, int tid)
 {
   int i,j,ii,jj,jnum,itype,jtype;
-  double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
-  double rsq,r2inv,r6inv,forcelj,factor_lj;
+  double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
+  double rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
+  double r,rinv,screening;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
-  evdwl = 0.0;
+  evdwl = ecoul = 0.0;
 
   double **x = atom->x;
+  double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
+  double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
+  double qqrd2e = force->qqrd2e;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = iifrom; ii < iito; ++ii) {
 
     i = ilist[ii];
+    qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
     fxtmp=fytmp=fztmp=0.0;
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
+      factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
 	r2inv = 1.0/rsq;
-	r6inv = r2inv*r2inv*r2inv;
-	forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
-	fpair = factor_lj*forcelj*r2inv;
+
+
+	if (rsq < cut_coulsq[itype][jtype]) {
+	  r = sqrt(rsq);
+	  rinv = 1.0/r;
+	  screening = exp(-kappa*r);
+	  forcecoul = qqrd2e * qtmp*q[j] * screening * (kappa + rinv);
+	  forcecoul *= factor_coul;
+	} else forcecoul = 0.0;
+
+	if (rsq < cut_ljsq[itype][jtype]) {
+	  r6inv = r2inv*r2inv*r2inv;
+	  forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
+	  forcelj *= factor_lj;
+	} else forcelj = 0.0;
+
+	fpair = (forcecoul + forcelj) * r2inv;
 
 	fxtmp += delx*fpair;
 	fytmp += dely*fpair;
 	fztmp += delz*fpair;
 	if (NEWTON_PAIR || j < nlocal) {
 	  f[j][0] -= delx*fpair;
 	  f[j][1] -= dely*fpair;
 	  f[j][2] -= delz*fpair;
 	}
 
 	if (EFLAG) {
-	  evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype])
-	    - offset[itype][jtype];
-	  evdwl *= factor_lj;
+	  if (rsq < cut_coulsq[itype][jtype])
+	    ecoul = factor_coul * qqrd2e * qtmp*q[j] * rinv * screening;
+	  else ecoul = 0.0;
+	  if (rsq < cut_ljsq[itype][jtype]) {
+	    evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) -
+	      offset[itype][jtype];
+	    evdwl *= factor_lj;
+	  } else evdwl = 0.0;
 	}
-
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,0.0,fpair,delx,dely,delz,tid);
+				 evdwl,ecoul,fpair,delx,dely,delz,tid);
       }
     }
     f[i][0] += fxtmp;
     f[i][1] += fytmp;
     f[i][2] += fztmp;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
-double PairLJCutOMP::memory_usage()
+double PairLJCutCoulDebyeOMP::memory_usage()
 {
   double bytes = memory_usage_thr();
-  bytes += PairLJCut::memory_usage();
+  bytes += PairLJCutCoulDebye::memory_usage();
 
   return bytes;
 }
diff --git a/src/USER-OMP/pair_lj_cut_coul_debye_omp.h b/src/USER-OMP/pair_lj_cut_coul_debye_omp.h
new file mode 100644
index 000000000..00cf540be
--- /dev/null
+++ b/src/USER-OMP/pair_lj_cut_coul_debye_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(lj/cut/coul/debye/omp,PairLJCutCoulDebyeOMP)
+
+#else
+
+#ifndef LMP_PAIR_LJ_CUT_COUL_DEBYE_OMP_H
+#define LMP_PAIR_LJ_CUT_COUL_DEBYE_OMP_H
+
+#include "pair_lj_cut_coul_debye.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairLJCutCoulDebyeOMP : public PairLJCutCoulDebye, public ThrOMP {
+
+ public:
+  PairLJCutCoulDebyeOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(double **f, int ifrom, int ito, int tid);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_lj_cut_coul_long_omp.cpp
similarity index 54%
copy from src/USER-OMP/pair_lj_cut_omp.cpp
copy to src/USER-OMP/pair_lj_cut_coul_long_omp.cpp
index 8ed82c5e5..1d8f977c9 100644
--- a/src/USER-OMP/pair_lj_cut_omp.cpp
+++ b/src/USER-OMP/pair_lj_cut_coul_long_omp.cpp
@@ -1,163 +1,220 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    This software is distributed under the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Axel Kohlmeyer (Temple U)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
-#include "pair_lj_cut_omp.h"
+#include "pair_lj_cut_coul_long_omp.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
-#include "memory.h"
-#include "error.h"
 
 using namespace LAMMPS_NS;
 
+#define EWALD_F   1.12837917
+#define EWALD_P   0.3275911
+#define A1        0.254829592
+#define A2       -0.284496736
+#define A3        1.421413741
+#define A4       -1.453152027
+#define A5        1.061405429
+
 /* ---------------------------------------------------------------------- */
 
-PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) :
-  PairLJCut(lmp), ThrOMP(lmp, PAIR)
+PairLJCutCoulLongOMP::PairLJCutCoulLongOMP(LAMMPS *lmp) :
+  PairLJCutCoulLong(lmp), ThrOMP(lmp, PAIR)
 {
   respa_enable = 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
-void PairLJCutOMP::compute(int eflag, int vflag)
+void PairLJCutCoulLongOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
     ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
   const int nthreads = comm->nthreads;
   const int inum = list->inum;
 
 #if defined(_OPENMP)
 #pragma omp parallel default(shared)
 #endif
   {
     int ifrom, ito, tid;
     double **f;
 
     f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
 
     if (evflag) {
       if (eflag) {
 	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
 	else eval<1,1,0>(f, ifrom, ito, tid);
       } else {
 	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
 	else eval<1,0,0>(f, ifrom, ito, tid);
       }
     } else {
       if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
       else eval<0,0,0>(f, ifrom, ito, tid);
     }
 
     // reduce per thread forces into global force array.
-    force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid);
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
   } // end of omp parallel region
 
   // reduce per thread energy and virial, if requested.
   if (evflag) ev_reduce_thr(this);
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
+/* ---------------------------------------------------------------------- */
+
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairLJCutCoulLongOMP::eval(double **f, int iifrom, int iito, int tid)
 {
-  int i,j,ii,jj,jnum,itype,jtype;
-  double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
-  double rsq,r2inv,r6inv,forcelj,factor_lj;
+  int i,j,ii,jj,jnum,itype,jtype,itable;
+  double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
+  double fraction,table;
+  double r,rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
+  double grij,expm2,prefactor,t,erfc;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
-  evdwl = 0.0;
+  evdwl = ecoul = 0.0;
 
   double **x = atom->x;
+  double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
+  double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
+  double qqrd2e = force->qqrd2e;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = iifrom; ii < iito; ++ii) {
 
     i = ilist[ii];
+    qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
     fxtmp=fytmp=fztmp=0.0;
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
+      factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
 	r2inv = 1.0/rsq;
-	r6inv = r2inv*r2inv*r2inv;
-	forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
-	fpair = factor_lj*forcelj*r2inv;
+
+	if (rsq < cut_coulsq) {
+	  if (!ncoultablebits || rsq <= tabinnersq) {
+	    r = sqrt(rsq);
+	    grij = g_ewald * r;
+	    expm2 = exp(-grij*grij);
+	    t = 1.0 / (1.0 + EWALD_P*grij);
+	    erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
+	    prefactor = qqrd2e * qtmp*q[j]/r;
+	    forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
+	    if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
+	  } else {
+	    union_int_float_t rsq_lookup;
+	    rsq_lookup.f = rsq;
+	    itable = rsq_lookup.i & ncoulmask;
+	    itable >>= ncoulshiftbits;
+	    fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable];
+	    table = ftable[itable] + fraction*dftable[itable];
+	    forcecoul = qtmp*q[j] * table;
+	    if (factor_coul < 1.0) {
+	      table = ctable[itable] + fraction*dctable[itable];
+	      prefactor = qtmp*q[j] * table;
+	      forcecoul -= (1.0-factor_coul)*prefactor;
+	    }
+	  }
+	} else forcecoul = 0.0;
+
+	if (rsq < cut_ljsq[itype][jtype]) {
+	  r6inv = r2inv*r2inv*r2inv;
+	  forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
+	  forcelj *= factor_lj;
+	} else forcelj = 0.0;
+
+	fpair = (forcecoul + forcelj) * r2inv;
 
 	fxtmp += delx*fpair;
 	fytmp += dely*fpair;
 	fztmp += delz*fpair;
 	if (NEWTON_PAIR || j < nlocal) {
 	  f[j][0] -= delx*fpair;
 	  f[j][1] -= dely*fpair;
 	  f[j][2] -= delz*fpair;
 	}
 
 	if (EFLAG) {
-	  evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype])
-	    - offset[itype][jtype];
-	  evdwl *= factor_lj;
+	  if (rsq < cut_coulsq) {
+	    if (!ncoultablebits || rsq <= tabinnersq)
+	      ecoul = prefactor*erfc;
+	    else {
+	      table = etable[itable] + fraction*detable[itable];
+	      ecoul = qtmp*q[j] * table;
+	    }
+	    if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
+	  } else ecoul = 0.0;
+
+	  if (rsq < cut_ljsq[itype][jtype]) {
+	    evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) -
+	      offset[itype][jtype];
+	    evdwl *= factor_lj;
+	  } else evdwl = 0.0;
 	}
-
+	
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,0.0,fpair,delx,dely,delz,tid);
+				 evdwl,ecoul,fpair,delx,dely,delz,tid);
       }
     }
     f[i][0] += fxtmp;
     f[i][1] += fytmp;
     f[i][2] += fztmp;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
-double PairLJCutOMP::memory_usage()
+double PairLJCutCoulLongOMP::memory_usage()
 {
   double bytes = memory_usage_thr();
-  bytes += PairLJCut::memory_usage();
+  bytes += PairLJCutCoulLong::memory_usage();
 
   return bytes;
 }
diff --git a/src/USER-OMP/pair_lj_cut_coul_long_omp.h b/src/USER-OMP/pair_lj_cut_coul_long_omp.h
new file mode 100644
index 000000000..ac408ba88
--- /dev/null
+++ b/src/USER-OMP/pair_lj_cut_coul_long_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(lj/cut/coul/long/omp,PairLJCutCoulLongOMP)
+
+#else
+
+#ifndef LMP_PAIR_LJ_CUT_COUL_LONG_OMP_H
+#define LMP_PAIR_LJ_CUT_COUL_LONG_OMP_H
+
+#include "pair_lj_cut_coul_long.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairLJCutCoulLongOMP : public PairLJCutCoulLong, public ThrOMP {
+
+ public:
+  PairLJCutCoulLongOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(double **f, int ifrom, int ito, int tid);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_lj_cut_coul_long_tip4p_omp.cpp b/src/USER-OMP/pair_lj_cut_coul_long_tip4p_omp.cpp
new file mode 100644
index 000000000..6ada944c5
--- /dev/null
+++ b/src/USER-OMP/pair_lj_cut_coul_long_tip4p_omp.cpp
@@ -0,0 +1,462 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   This software is distributed under the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "pair_lj_cut_coul_long_tip4p_omp.h"
+#include "atom.h"
+#include "domain.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "error.h"
+#include "memory.h"
+#include "neigh_list.h"
+
+using namespace LAMMPS_NS;
+
+#define EWALD_F   1.12837917
+#define EWALD_P   0.3275911
+#define A1        0.254829592
+#define A2       -0.284496736
+#define A3        1.421413741
+#define A4       -1.453152027
+#define A5        1.061405429
+
+/* ---------------------------------------------------------------------- */
+
+PairLJCutCoulLongTIP4POMP::PairLJCutCoulLongTIP4POMP(LAMMPS *lmp) :
+  PairLJCutCoulLongTIP4P(lmp), ThrOMP(lmp, PAIR)
+{
+  respa_enable = 0;
+
+  // for caching m-shift corrected positions
+  maxmpos = 0;
+  h1idx = h2idx = NULL;
+  mpos = NULL;
+}
+
+/* ---------------------------------------------------------------------- */
+
+PairLJCutCoulLongTIP4POMP::~PairLJCutCoulLongTIP4POMP()
+{
+  memory->destroy(h1idx);
+  memory->destroy(h2idx);
+  memory->destroy(mpos);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairLJCutCoulLongTIP4POMP::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+    ev_setup_thr(this);
+  } else evflag = vflag_fdotr = 0;
+
+  const int nlocal = atom->nlocal;
+  const int nall = nlocal + atom->nghost;
+
+  // reallocate per-atom arrays, if necessary
+  if (nall > maxmpos) {
+    maxmpos = nall;
+    memory->grow(mpos,maxmpos,3,"pair:mpos");
+    memory->grow(h1idx,maxmpos,"pair:h1idx");
+    memory->grow(h2idx,maxmpos,"pair:h2idx");
+  }
+
+  // cache corrected M positions in mpos[]
+  double **x = atom->x;
+  int *type = atom->type;
+  for (int i = 0; i < nlocal; i++) {
+    if (type[i] == typeO) {
+      find_M(i,h1idx[i],h2idx[i],mpos[i]);
+    } else {
+      mpos[i][0] = x[i][0];
+      mpos[i][1] = x[i][1];
+      mpos[i][2] = x[i][2];
+    }
+  }
+  for (int i = nlocal; i < nall; i++) {
+    if (type[i] == typeO) {
+      find_M_permissive(i,h1idx[i],h2idx[i],mpos[i]);
+    } else {
+      mpos[i][0] = x[i][0];
+      mpos[i][1] = x[i][1];
+      mpos[i][2] = x[i][2];
+    }
+  }
+
+  const int nthreads = comm->nthreads;
+  const int inum = list->inum;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(shared)
+#endif
+  {
+    int ifrom, ito, tid;
+    double **f;
+
+    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+
+    if (evflag) {
+      if (eflag) {
+	if (vflag) eval<1,1,1>(f, ifrom, ito, tid);
+	else eval<1,1,0>(f, ifrom, ito, tid);
+      } else {
+	if (vflag) eval<1,0,1>(f, ifrom, ito, tid);
+	else eval<1,0,0>(f, ifrom, ito, tid);
+      }
+    } else {
+      eval<0,0,0>(f, ifrom, ito, tid);
+    }
+
+    // reduce per thread forces into global force array.
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+  } // end of omp parallel region
+
+  // reduce per thread energy and virial, if requested.
+  if (evflag) ev_reduce_thr(this);
+  if (vflag_fdotr) virial_fdotr_compute();
+}
+
+/* ---------------------------------------------------------------------- */
+
+template <int EVFLAG, int EFLAG, int VFLAG>
+void PairLJCutCoulLongTIP4POMP::eval(double **f, int iifrom, int iito, int tid)
+{
+  int i,j,ii,jj,jnum,itype,jtype,itable;
+  int n,vlist[6];
+  int iH1,iH2,jH1,jH2;
+  double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul;
+  double fraction,table;
+  double delxOM,delyOM,delzOM;
+  double r,rsq,r2inv,r6inv,forcecoul,forcelj,cforce;
+  double factor_coul,factor_lj;
+  double grij,expm2,prefactor,t,erfc,ddotf;
+  double v[6],xH1[3],xH2[3];
+  double fdx,fdy,fdz,f1x,f1y,f1z,fOx,fOy,fOz,fHx,fHy,fHz;
+  double *x1,*x2;
+  int *ilist,*jlist,*numneigh,**firstneigh;
+
+  evdwl = ecoul = 0.0;
+
+  double **x = atom->x;
+  double *q = atom->q;
+  int *type = atom->type;
+  int nlocal = atom->nlocal;
+  double *special_coul = force->special_coul;
+  double *special_lj = force->special_lj;
+  double qqrd2e = force->qqrd2e;
+  double fxtmp,fytmp,fztmp;
+
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+
+  // loop over neighbors of my atoms
+
+  for (ii = iifrom; ii < iito; ++ii) {
+
+    i = ilist[ii];
+    qtmp = q[i];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    itype = type[i];
+
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+    fxtmp=fytmp=fztmp=0.0;
+    x1 = mpos[i];
+    iH1 = h1idx[i];
+    iH2 = h2idx[i];
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      factor_lj = special_lj[sbmask(j)];
+      factor_coul = special_coul[sbmask(j)];
+      j &= NEIGHMASK;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+      jtype = type[j];
+
+      if (rsq < cutsq[itype][jtype]) {
+
+	r2inv = 1.0/rsq;
+	if (rsq < cut_ljsq[itype][jtype]) {
+	  r6inv = r2inv*r2inv*r2inv;
+	  forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
+	  forcelj *= factor_lj * r2inv;
+
+	  fxtmp += delx*forcelj;
+	  fytmp += dely*forcelj;
+	  fztmp += delz*forcelj;
+	  f[j][0] -= delx*forcelj;
+	  f[j][1] -= dely*forcelj;
+	  f[j][2] -= delz*forcelj;
+
+	  if (EFLAG) {
+	    evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) -
+	      offset[itype][jtype];
+	    evdwl *= factor_lj;
+	  } else evdwl = 0.0;
+
+	  if (EVFLAG) ev_tally_thr(this,i,j,nlocal, /* newton_pair = */ 1,
+				   evdwl,0.0,forcelj,delx,dely,delz,tid);
+	}
+
+	// adjust rsq and delxyz for off-site O charge(s)
+
+	if (itype == typeO || jtype == typeO) { 
+	  x2 = mpos[j];
+	  jH1 = h1idx[j];
+	  jH2 = h2idx[j];
+	  if (jtype == typeO  && ( jH1 < 0 || jH2 < 0 ))
+	    error->one(FLERR,"TIP4P hydrogen is missing");
+	  delx = x1[0] - x2[0];
+	  dely = x1[1] - x2[1];
+	  delz = x1[2] - x2[2];
+	  rsq = delx*delx + dely*dely + delz*delz;
+	}
+
+	// test current rsq against cutoff and compute Coulombic force
+
+	if (rsq < cut_coulsq) {
+	  r2inv = 1 / rsq;
+	  if (!ncoultablebits || rsq <= tabinnersq) {
+	    r = sqrt(rsq);
+	    grij = g_ewald * r;
+	    expm2 = exp(-grij*grij);
+	    t = 1.0 / (1.0 + EWALD_P*grij);
+	    erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
+	    prefactor = qqrd2e * qtmp*q[j]/r;
+	    forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
+	    if (factor_coul < 1.0) {
+	      forcecoul -= (1.0-factor_coul)*prefactor; 
+	    }
+	  } else {
+	    union_int_float_t rsq_lookup;
+	    rsq_lookup.f = rsq;
+	    itable = rsq_lookup.i & ncoulmask;
+	    itable >>= ncoulshiftbits;
+	    fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable];
+	    table = ftable[itable] + fraction*dftable[itable];
+	    forcecoul = qtmp*q[j] * table;
+	    if (factor_coul < 1.0) {
+	      table = ctable[itable] + fraction*dctable[itable];
+	      prefactor = qtmp*q[j] * table;
+	      forcecoul -= (1.0-factor_coul)*prefactor;
+	    }
+	  }
+
+	  cforce = forcecoul * r2inv;
+
+	  // if i,j are not O atoms, force is applied directly
+	  // if i or j are O atoms, force is on fictitious atom & partitioned
+	  // force partitioning due to Feenstra, J Comp Chem, 20, 786 (1999)
+	  // f_f = fictitious force, fO = f_f (1 - 2 alpha), fH = alpha f_f
+	  // preserves total force and torque on water molecule
+	  // virial = sum(r x F) where each water's atoms are near xi and xj
+	  // vlist stores 2,4,6 atoms whose forces contribute to virial
+
+	  n = 0;
+
+	  if (itype != typeO) {
+	    fxtmp += delx * cforce;
+	    fytmp += dely * cforce;
+	    fztmp += delz * cforce;
+
+	    if (VFLAG) {
+	      v[0] = x[i][0] * delx * cforce;
+	      v[1] = x[i][1] * dely * cforce;
+	      v[2] = x[i][2] * delz * cforce;
+	      v[3] = x[i][0] * dely * cforce;
+	      v[4] = x[i][0] * delz * cforce;
+	      v[5] = x[i][1] * delz * cforce;
+	      vlist[n++] = i;
+	    }
+
+	  } else {
+
+            fdx = delx*cforce;
+            fdy = dely*cforce;
+            fdz = delz*cforce;
+
+            delxOM = x[i][0] - x1[0];
+            delyOM = x[i][1] - x1[1];
+            delzOM = x[i][2] - x1[2];
+
+            ddotf = (delxOM * fdx + delyOM * fdy + delzOM * fdz) /
+	      (qdist*qdist);
+
+	    f1x = alpha * (fdx - ddotf * delxOM);
+	    f1y = alpha * (fdy - ddotf * delyOM);
+	    f1z = alpha * (fdz - ddotf * delzOM);
+
+            fOx = fdx - f1x;
+            fOy = fdy - f1y;
+            fOz = fdz - f1z;
+
+            fHx = 0.5 * f1x;
+            fHy = 0.5 * f1y;
+            fHz = 0.5 * f1z;
+
+            fxtmp += fOx;
+            fytmp += fOy;
+            fztmp += fOz;
+
+            f[iH1][0] += fHx;
+            f[iH1][1] += fHy;
+            f[iH1][2] += fHz;
+
+            f[iH2][0] += fHx;
+            f[iH2][1] += fHy;
+            f[iH2][2] += fHz;
+
+	    if (VFLAG) {
+	      domain->closest_image(x[i],x[iH1],xH1);
+	      domain->closest_image(x[i],x[iH2],xH2);
+
+	      v[0] = x[i][0]*fOx + xH1[0]*fHx + xH2[0]*fHx;
+	      v[1] = x[i][1]*fOy + xH1[1]*fHy + xH2[1]*fHy;
+	      v[2] = x[i][2]*fOz + xH1[2]*fHz + xH2[2]*fHz;
+	      v[3] = x[i][0]*fOy + xH1[0]*fHy + xH2[0]*fHy;
+	      v[4] = x[i][0]*fOz + xH1[0]*fHz + xH2[0]*fHz;
+	      v[5] = x[i][1]*fOz + xH1[1]*fHz + xH2[1]*fHz;
+
+	      vlist[n++] = i;
+	      vlist[n++] = iH1;
+	      vlist[n++] = iH2;
+	    }
+	  }
+
+	  if (jtype != typeO) {
+	    f[j][0] -= delx * cforce;
+	    f[j][1] -= dely * cforce;
+	    f[j][2] -= delz * cforce;
+
+	    if (VFLAG) {
+	      v[0] -= x[j][0] * delx * cforce;
+	      v[1] -= x[j][1] * dely * cforce;
+	      v[2] -= x[j][2] * delz * cforce;
+	      v[3] -= x[j][0] * dely * cforce;
+	      v[4] -= x[j][0] * delz * cforce;
+	      v[5] -= x[j][1] * delz * cforce;
+	      vlist[n++] = j;
+	    }
+
+	  } else {
+
+	    fdx = -delx*cforce;
+	    fdy = -dely*cforce;
+	    fdz = -delz*cforce;
+
+	    delxOM = x[j][0] - x2[0];
+	    delyOM = x[j][1] - x2[1];
+	    delzOM = x[j][2] - x2[2];
+
+            ddotf = (delxOM * fdx + delyOM * fdy + delzOM * fdz) /
+	      (qdist*qdist);
+
+	    f1x = alpha * (fdx - ddotf * delxOM);
+	    f1y = alpha * (fdy - ddotf * delyOM);
+	    f1z = alpha * (fdz - ddotf * delzOM);
+
+            fOx = fdx - f1x;
+            fOy = fdy - f1y;
+            fOz = fdz - f1z;
+
+            fHx = 0.5 * f1x;
+            fHy = 0.5 * f1y;
+            fHz = 0.5 * f1z;
+
+	    f[j][0] += fOx;
+	    f[j][1] += fOy;
+	    f[j][2] += fOz;
+
+            f[jH1][0] += fHx;
+            f[jH1][1] += fHy;
+            f[jH1][2] += fHz;
+
+            f[jH2][0] += fHx;
+            f[jH2][1] += fHy;
+            f[jH2][2] += fHz;
+
+	    if (VFLAG) {
+	      domain->closest_image(x[j],x[jH1],xH1);
+	      domain->closest_image(x[j],x[jH2],xH2);
+
+	      v[0] += x[j][0]*fOx + xH1[0]*fHx + xH2[0]*fHx;
+	      v[1] += x[j][1]*fOy + xH1[1]*fHy + xH2[1]*fHy;
+	      v[2] += x[j][2]*fOz + xH1[2]*fHz + xH2[2]*fHz;
+	      v[3] += x[j][0]*fOy + xH1[0]*fHy + xH2[0]*fHy;
+	      v[4] += x[j][0]*fOz + xH1[0]*fHz + xH2[0]*fHz;
+	      v[5] += x[j][1]*fOz + xH1[1]*fHz + xH2[1]*fHz;
+
+	      vlist[n++] = j;
+	      vlist[n++] = jH1;
+	      vlist[n++] = jH2;
+	    }
+	  }
+
+	  if (EFLAG) {
+	    if (!ncoultablebits || rsq <= tabinnersq)
+	      ecoul = prefactor*erfc;
+	    else {
+	      table = etable[itable] + fraction*detable[itable];
+	      ecoul = qtmp*q[j] * table;
+	    }
+	    if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
+	  } else ecoul = 0.0;
+
+	  if (EVFLAG) ev_tally_list_thr(this,n,vlist,ecoul,v,tid);
+	}
+      }
+    }
+    f[i][0] += fxtmp;
+    f[i][1] += fytmp;
+    f[i][2] += fztmp;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairLJCutCoulLongTIP4POMP::find_M_permissive(int i, int &iH1, int &iH2, double *xM)
+{
+  // test that O is correctly bonded to 2 succesive H atoms
+
+   iH1 = atom->map(atom->tag[i] + 1);
+   iH2 = atom->map(atom->tag[i] + 2);
+
+   if (iH1 == -1 || iH2 == -1)
+      return;
+   else
+      find_M(i,iH1,iH2,xM);
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairLJCutCoulLongTIP4POMP::memory_usage()
+{
+  double bytes = memory_usage_thr();
+  bytes += PairLJCutCoulLongTIP4P::memory_usage();
+  bytes += 2 * maxmpos * sizeof(int);
+  bytes += 3 * maxmpos * sizeof(double);
+  bytes += maxmpos * sizeof(double *);
+
+  return bytes;
+}
diff --git a/src/USER-OMP/pair_lj_cut_coul_long_tip4p_omp.h b/src/USER-OMP/pair_lj_cut_coul_long_tip4p_omp.h
new file mode 100644
index 000000000..093fc0216
--- /dev/null
+++ b/src/USER-OMP/pair_lj_cut_coul_long_tip4p_omp.h
@@ -0,0 +1,57 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(lj/cut/coul/long/tip4p/omp,PairLJCutCoulLongTIP4POMP)
+
+#else
+
+#ifndef LMP_PAIR_LJ_CUT_COUL_LONG_TIP4P_OMP_H
+#define LMP_PAIR_LJ_CUT_COUL_LONG_TIP4P_OMP_H
+
+#include "pair_lj_cut_coul_long_tip4p.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairLJCutCoulLongTIP4POMP : public PairLJCutCoulLongTIP4P, public ThrOMP {
+
+ public:
+  PairLJCutCoulLongTIP4POMP(class LAMMPS *);
+  virtual ~PairLJCutCoulLongTIP4POMP();
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ protected:
+
+  // this is to cache m-shift corrected positions.
+  int maxmpos;        // size of the following arrays
+  int *h1idx, *h2idx; // local index of hydrogen atoms
+  double **mpos;      // coordinates corrected for m-shift.
+  void find_M_permissive(int, int &, int &, double *);
+
+ private:
+  template <int EVFLAG, int EFLAG, int VFLAG>
+  void eval(double **f, int ifrom, int ito, int tid);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_lj_cut_omp.cpp
index 8ed82c5e5..3d82149fe 100644
--- a/src/USER-OMP/pair_lj_cut_omp.cpp
+++ b/src/USER-OMP/pair_lj_cut_omp.cpp
@@ -1,163 +1,160 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    This software is distributed under the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Axel Kohlmeyer (Temple U)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "pair_lj_cut_omp.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
-#include "memory.h"
-#include "error.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
 PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) :
   PairLJCut(lmp), ThrOMP(lmp, PAIR)
 {
   respa_enable = 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairLJCutOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
     ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
   const int nthreads = comm->nthreads;
   const int inum = list->inum;
 
 #if defined(_OPENMP)
 #pragma omp parallel default(shared)
 #endif
   {
     int ifrom, ito, tid;
     double **f;
 
     f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
 
     if (evflag) {
       if (eflag) {
 	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
 	else eval<1,1,0>(f, ifrom, ito, tid);
       } else {
 	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
 	else eval<1,0,0>(f, ifrom, ito, tid);
       }
     } else {
       if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
       else eval<0,0,0>(f, ifrom, ito, tid);
     }
 
     // reduce per thread forces into global force array.
-    force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid);
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
   } // end of omp parallel region
 
   // reduce per thread energy and virial, if requested.
   if (evflag) ev_reduce_thr(this);
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
 void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
   double rsq,r2inv,r6inv,forcelj,factor_lj;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   evdwl = 0.0;
 
   double **x = atom->x;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_lj = force->special_lj;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = iifrom; ii < iito; ++ii) {
 
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
     fxtmp=fytmp=fztmp=0.0;
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
 	r2inv = 1.0/rsq;
 	r6inv = r2inv*r2inv*r2inv;
 	forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
 	fpair = factor_lj*forcelj*r2inv;
 
 	fxtmp += delx*fpair;
 	fytmp += dely*fpair;
 	fztmp += delz*fpair;
 	if (NEWTON_PAIR || j < nlocal) {
 	  f[j][0] -= delx*fpair;
 	  f[j][1] -= dely*fpair;
 	  f[j][2] -= delz*fpair;
 	}
 
 	if (EFLAG) {
 	  evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype])
 	    - offset[itype][jtype];
 	  evdwl *= factor_lj;
 	}
 
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
 				 evdwl,0.0,fpair,delx,dely,delz,tid);
       }
     }
     f[i][0] += fxtmp;
     f[i][1] += fytmp;
     f[i][2] += fztmp;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 double PairLJCutOMP::memory_usage()
 {
   double bytes = memory_usage_thr();
   bytes += PairLJCut::memory_usage();
 
   return bytes;
 }
diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_lj_expand_omp.cpp
similarity index 87%
copy from src/USER-OMP/pair_lj_cut_omp.cpp
copy to src/USER-OMP/pair_lj_expand_omp.cpp
index 8ed82c5e5..7b06503ee 100644
--- a/src/USER-OMP/pair_lj_cut_omp.cpp
+++ b/src/USER-OMP/pair_lj_expand_omp.cpp
@@ -1,163 +1,164 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    This software is distributed under the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Axel Kohlmeyer (Temple U)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
-#include "pair_lj_cut_omp.h"
+#include "pair_lj_expand_omp.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
-#include "memory.h"
-#include "error.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
-PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) :
-  PairLJCut(lmp), ThrOMP(lmp, PAIR)
+PairLJExpandOMP::PairLJExpandOMP(LAMMPS *lmp) :
+  PairLJExpand(lmp), ThrOMP(lmp, PAIR)
 {
   respa_enable = 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
-void PairLJCutOMP::compute(int eflag, int vflag)
+void PairLJExpandOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
     ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
   const int nthreads = comm->nthreads;
   const int inum = list->inum;
 
 #if defined(_OPENMP)
 #pragma omp parallel default(shared)
 #endif
   {
     int ifrom, ito, tid;
     double **f;
 
     f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
 
     if (evflag) {
       if (eflag) {
 	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
 	else eval<1,1,0>(f, ifrom, ito, tid);
       } else {
 	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
 	else eval<1,0,0>(f, ifrom, ito, tid);
       }
     } else {
       if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
       else eval<0,0,0>(f, ifrom, ito, tid);
     }
 
     // reduce per thread forces into global force array.
-    force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid);
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
   } // end of omp parallel region
 
   // reduce per thread energy and virial, if requested.
   if (evflag) ev_reduce_thr(this);
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairLJExpandOMP::eval(double **f, int iifrom, int iito, int tid)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
   double rsq,r2inv,r6inv,forcelj,factor_lj;
+  double r,rshift,rshiftsq;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   evdwl = 0.0;
 
   double **x = atom->x;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_lj = force->special_lj;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = iifrom; ii < iito; ++ii) {
 
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
     fxtmp=fytmp=fztmp=0.0;
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
-	r2inv = 1.0/rsq;
+	r = sqrt(rsq);
+	rshift = r - shift[itype][jtype];
+	rshiftsq = rshift*rshift;
+	r2inv = 1.0/rshiftsq;
 	r6inv = r2inv*r2inv*r2inv;
 	forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
-	fpair = factor_lj*forcelj*r2inv;
+	fpair = factor_lj*forcelj/rshift/r;
 
 	fxtmp += delx*fpair;
 	fytmp += dely*fpair;
 	fztmp += delz*fpair;
 	if (NEWTON_PAIR || j < nlocal) {
 	  f[j][0] -= delx*fpair;
 	  f[j][1] -= dely*fpair;
 	  f[j][2] -= delz*fpair;
 	}
 
 	if (EFLAG) {
 	  evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype])
 	    - offset[itype][jtype];
 	  evdwl *= factor_lj;
 	}
 
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
 				 evdwl,0.0,fpair,delx,dely,delz,tid);
       }
     }
     f[i][0] += fxtmp;
     f[i][1] += fytmp;
     f[i][2] += fztmp;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
-double PairLJCutOMP::memory_usage()
+double PairLJExpandOMP::memory_usage()
 {
   double bytes = memory_usage_thr();
-  bytes += PairLJCut::memory_usage();
+  bytes += PairLJExpand::memory_usage();
 
   return bytes;
 }
diff --git a/src/USER-OMP/pair_lj_expand_omp.h b/src/USER-OMP/pair_lj_expand_omp.h
new file mode 100644
index 000000000..29488deae
--- /dev/null
+++ b/src/USER-OMP/pair_lj_expand_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(lj/cut/omp,PairLJExpandOMP)
+
+#else
+
+#ifndef LMP_PAIR_LJ_EXPAND_OMP_H
+#define LMP_PAIR_LJ_EXPAND_OMP_H
+
+#include "pair_lj_expand.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairLJExpandOMP : public PairLJExpand, public ThrOMP {
+
+ public:
+  PairLJExpandOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(double **f, int ifrom, int ito, int tid);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_lj_gromacs_coul_gromacs_omp.cpp
similarity index 57%
copy from src/USER-OMP/pair_lj_cut_omp.cpp
copy to src/USER-OMP/pair_lj_gromacs_coul_gromacs_omp.cpp
index 8ed82c5e5..2e97fa1b5 100644
--- a/src/USER-OMP/pair_lj_cut_omp.cpp
+++ b/src/USER-OMP/pair_lj_gromacs_coul_gromacs_omp.cpp
@@ -1,163 +1,210 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    This software is distributed under the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Axel Kohlmeyer (Temple U)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
-#include "pair_lj_cut_omp.h"
+#include "pair_lj_gromacs_coul_gromacs_omp.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
-#include "memory.h"
-#include "error.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
-PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) :
-  PairLJCut(lmp), ThrOMP(lmp, PAIR)
+PairLJGromacsCoulGromacsOMP::PairLJGromacsCoulGromacsOMP(LAMMPS *lmp) :
+  PairLJGromacsCoulGromacs(lmp), ThrOMP(lmp, PAIR)
 {
   respa_enable = 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
-void PairLJCutOMP::compute(int eflag, int vflag)
+void PairLJGromacsCoulGromacsOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
     ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
   const int nthreads = comm->nthreads;
   const int inum = list->inum;
 
 #if defined(_OPENMP)
 #pragma omp parallel default(shared)
 #endif
   {
     int ifrom, ito, tid;
     double **f;
 
     f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
 
     if (evflag) {
       if (eflag) {
 	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
 	else eval<1,1,0>(f, ifrom, ito, tid);
       } else {
 	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
 	else eval<1,0,0>(f, ifrom, ito, tid);
       }
     } else {
       if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
       else eval<0,0,0>(f, ifrom, ito, tid);
     }
 
     // reduce per thread forces into global force array.
-    force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid);
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
   } // end of omp parallel region
 
   // reduce per thread energy and virial, if requested.
   if (evflag) ev_reduce_thr(this);
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
+/* ---------------------------------------------------------------------- */
+
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairLJGromacsCoulGromacsOMP::eval(double **f, int iifrom, int iito, int tid)
 {
   int i,j,ii,jj,jnum,itype,jtype;
-  double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
-  double rsq,r2inv,r6inv,forcelj,factor_lj;
+  double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
+  double rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
+  double r,tlj,tc,fswitch,fswitchcoul,eswitch,ecoulswitch;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
-  evdwl = 0.0;
+  evdwl = ecoul = 0.0;
 
   double **x = atom->x;
+  double *q = atom->q;
   int *type = atom->type;
   int nlocal = atom->nlocal;
+  double *special_coul = force->special_coul;
   double *special_lj = force->special_lj;
+  double qqrd2e = force->qqrd2e;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = iifrom; ii < iito; ++ii) {
 
     i = ilist[ii];
+    qtmp = q[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
     fxtmp=fytmp=fztmp=0.0;
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
+      factor_coul = special_coul[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
 	r2inv = 1.0/rsq;
-	r6inv = r2inv*r2inv*r2inv;
-	forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
-	fpair = factor_lj*forcelj*r2inv;
+
+	// skip if qi or qj = 0.0 since this potential may be used as
+	// coarse-grain model with many uncharged atoms
+
+	if (rsq < cut_coulsq && qtmp != 0.0 && q[j] != 0.0) {
+	  forcecoul = qqrd2e * qtmp*q[j]*sqrt(r2inv);
+          if (rsq > cut_coul_innersq) {
+            r = sqrt(rsq); 
+	    tc = r - cut_coul_inner;
+            fswitchcoul = qqrd2e * qtmp*q[j]*r*tc*tc*(coulsw1 + coulsw2*tc);
+            forcecoul += fswitchcoul;
+          }
+	  forcecoul *= factor_coul;
+	} else forcecoul = 0.0;
+
+	if (rsq < cut_ljsq) {
+	  r6inv = r2inv*r2inv*r2inv;
+	  jtype = type[j];
+	  forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
+          if (rsq > cut_lj_innersq) {
+            r = sqrt(rsq); 
+	    tlj = r - cut_lj_inner;
+	    fswitch = r*tlj*tlj*(ljsw1[itype][jtype] + 
+				 ljsw2[itype][jtype]*tlj);
+	    forcelj += fswitch;
+          }
+	  forcelj *= factor_lj;
+	} else forcelj = 0.0;
+
+	fpair = (forcecoul + forcelj) * r2inv;
 
 	fxtmp += delx*fpair;
 	fytmp += dely*fpair;
 	fztmp += delz*fpair;
 	if (NEWTON_PAIR || j < nlocal) {
 	  f[j][0] -= delx*fpair;
 	  f[j][1] -= dely*fpair;
 	  f[j][2] -= delz*fpair;
 	}
 
 	if (EFLAG) {
-	  evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype])
-	    - offset[itype][jtype];
-	  evdwl *= factor_lj;
+	  if (rsq < cut_coulsq) {
+	    ecoul = qqrd2e * qtmp*q[j] * (sqrt(r2inv) - coulsw5);
+            if (rsq > cut_coul_innersq) {
+              ecoulswitch = tc*tc*tc * (coulsw3 + coulsw4*tc);
+              ecoul += qqrd2e*qtmp*q[j]*ecoulswitch;
+            }
+	    ecoul *= factor_coul;
+	  } else ecoul = 0.0;
+	  if (rsq < cut_ljsq) {
+	    evdwl = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]);
+	    evdwl += ljsw5[itype][jtype];
+            if (rsq > cut_lj_innersq) {
+              eswitch = tlj*tlj*tlj * 
+		(ljsw3[itype][jtype] + ljsw4[itype][jtype]*tlj);
+              evdwl += eswitch;
+            }
+	    evdwl *= factor_lj;
+	  } else evdwl = 0.0;
 	}
-
+	
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
-				 evdwl,0.0,fpair,delx,dely,delz,tid);
+				 evdwl,ecoul,fpair,delx,dely,delz,tid);
       }
     }
     f[i][0] += fxtmp;
     f[i][1] += fytmp;
     f[i][2] += fztmp;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
-double PairLJCutOMP::memory_usage()
+double PairLJGromacsCoulGromacsOMP::memory_usage()
 {
   double bytes = memory_usage_thr();
-  bytes += PairLJCut::memory_usage();
+  bytes += PairLJGromacsCoulGromacs::memory_usage();
 
   return bytes;
 }
diff --git a/src/USER-OMP/pair_lj_gromacs_coul_gromacs_omp.h b/src/USER-OMP/pair_lj_gromacs_coul_gromacs_omp.h
new file mode 100644
index 000000000..d789bd679
--- /dev/null
+++ b/src/USER-OMP/pair_lj_gromacs_coul_gromacs_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(lj/gromacs/coul/gromacs/omp,PairLJGromacsCoulGromacsOMP)
+
+#else
+
+#ifndef LMP_PAIR_LJ_GROMACS_COUL_GROMACS_OMP_H
+#define LMP_PAIR_LJ_GROMACS_COUL_GROMACS_OMP_H
+
+#include "pair_lj_gromacs_coul_gromacs.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairLJGromacsCoulGromacsOMP : public PairLJGromacsCoulGromacs, public ThrOMP {
+
+ public:
+  PairLJGromacsCoulGromacsOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(double **f, int ifrom, int ito, int tid);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_lj_gromacs_omp.cpp
similarity index 80%
copy from src/USER-OMP/pair_lj_cut_omp.cpp
copy to src/USER-OMP/pair_lj_gromacs_omp.cpp
index 8ed82c5e5..f1c7d2faf 100644
--- a/src/USER-OMP/pair_lj_cut_omp.cpp
+++ b/src/USER-OMP/pair_lj_gromacs_omp.cpp
@@ -1,163 +1,172 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    This software is distributed under the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Axel Kohlmeyer (Temple U)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
-#include "pair_lj_cut_omp.h"
+#include "pair_lj_gromacs_omp.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
-#include "memory.h"
-#include "error.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
-PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) :
-  PairLJCut(lmp), ThrOMP(lmp, PAIR)
+PairLJGromacsOMP::PairLJGromacsOMP(LAMMPS *lmp) :
+  PairLJGromacs(lmp), ThrOMP(lmp, PAIR)
 {
   respa_enable = 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
-void PairLJCutOMP::compute(int eflag, int vflag)
+void PairLJGromacsOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
     ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
   const int nthreads = comm->nthreads;
   const int inum = list->inum;
 
 #if defined(_OPENMP)
 #pragma omp parallel default(shared)
 #endif
   {
     int ifrom, ito, tid;
     double **f;
 
     f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
 
     if (evflag) {
       if (eflag) {
 	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
 	else eval<1,1,0>(f, ifrom, ito, tid);
       } else {
 	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
 	else eval<1,0,0>(f, ifrom, ito, tid);
       }
     } else {
       if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
       else eval<0,0,0>(f, ifrom, ito, tid);
     }
 
     // reduce per thread forces into global force array.
-    force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid);
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
   } // end of omp parallel region
 
   // reduce per thread energy and virial, if requested.
   if (evflag) ev_reduce_thr(this);
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairLJGromacsOMP::eval(double **f, int iifrom, int iito, int tid)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
   double rsq,r2inv,r6inv,forcelj,factor_lj;
+  double r,t,fswitch,eswitch;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   evdwl = 0.0;
 
   double **x = atom->x;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_lj = force->special_lj;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = iifrom; ii < iito; ++ii) {
 
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
     fxtmp=fytmp=fztmp=0.0;
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
 	r2inv = 1.0/rsq;
 	r6inv = r2inv*r2inv*r2inv;
 	forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
+        if (rsq > cut_inner_sq[itype][jtype]) {
+          r = sqrt(rsq); 
+	  t = r - cut_inner[itype][jtype];
+	  fswitch = r*t*t*(ljsw1[itype][jtype] + ljsw2[itype][jtype]*t);
+	  forcelj += fswitch;
+        }
+
 	fpair = factor_lj*forcelj*r2inv;
 
 	fxtmp += delx*fpair;
 	fytmp += dely*fpair;
 	fztmp += delz*fpair;
 	if (NEWTON_PAIR || j < nlocal) {
 	  f[j][0] -= delx*fpair;
 	  f[j][1] -= dely*fpair;
 	  f[j][2] -= delz*fpair;
 	}
 
 	if (EFLAG) {
-	  evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype])
-	    - offset[itype][jtype];
+	  evdwl = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]); 
+	  evdwl += ljsw5[itype][jtype];
+          if (rsq > cut_inner_sq[itype][jtype]) {
+            eswitch = t*t*t*(ljsw3[itype][jtype] + ljsw4[itype][jtype]*t);
+            evdwl += eswitch;
+          }
 	  evdwl *= factor_lj;
 	}
 
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
 				 evdwl,0.0,fpair,delx,dely,delz,tid);
       }
     }
     f[i][0] += fxtmp;
     f[i][1] += fytmp;
     f[i][2] += fztmp;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
-double PairLJCutOMP::memory_usage()
+double PairLJGromacsOMP::memory_usage()
 {
   double bytes = memory_usage_thr();
-  bytes += PairLJCut::memory_usage();
+  bytes += PairLJGromacs::memory_usage();
 
   return bytes;
 }
diff --git a/src/USER-OMP/pair_lj_gromacs_omp.h b/src/USER-OMP/pair_lj_gromacs_omp.h
new file mode 100644
index 000000000..d192a414e
--- /dev/null
+++ b/src/USER-OMP/pair_lj_gromacs_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(lj/gromacs/omp,PairLJGromacsOMP)
+
+#else
+
+#ifndef LMP_PAIR_LJ_GROMACS_OMP_H
+#define LMP_PAIR_LJ_GROMACS_OMP_H
+
+#include "pair_lj_gromacs.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairLJGromacsOMP : public PairLJGromacs, public ThrOMP {
+
+ public:
+  PairLJGromacsOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(double **f, int ifrom, int ito, int tid);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_lj_sf_omp.cpp
similarity index 83%
copy from src/USER-OMP/pair_lj_cut_omp.cpp
copy to src/USER-OMP/pair_lj_sf_omp.cpp
index 8ed82c5e5..55ee908e4 100644
--- a/src/USER-OMP/pair_lj_cut_omp.cpp
+++ b/src/USER-OMP/pair_lj_sf_omp.cpp
@@ -1,163 +1,163 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    This software is distributed under the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Axel Kohlmeyer (Temple U)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
-#include "pair_lj_cut_omp.h"
+#include "pair_lj_sf_omp.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
-#include "memory.h"
-#include "error.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
-PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) :
-  PairLJCut(lmp), ThrOMP(lmp, PAIR)
+PairLJShiftedForceOMP::PairLJShiftedForceOMP(LAMMPS *lmp) :
+  PairLJShiftedForce(lmp), ThrOMP(lmp, PAIR)
 {
   respa_enable = 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
-void PairLJCutOMP::compute(int eflag, int vflag)
+void PairLJShiftedForceOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
     ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
   const int nthreads = comm->nthreads;
   const int inum = list->inum;
 
 #if defined(_OPENMP)
 #pragma omp parallel default(shared)
 #endif
   {
     int ifrom, ito, tid;
     double **f;
 
     f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
 
     if (evflag) {
       if (eflag) {
 	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
 	else eval<1,1,0>(f, ifrom, ito, tid);
       } else {
 	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
 	else eval<1,0,0>(f, ifrom, ito, tid);
       }
     } else {
       if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
       else eval<0,0,0>(f, ifrom, ito, tid);
     }
 
     // reduce per thread forces into global force array.
-    force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid);
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
   } // end of omp parallel region
 
   // reduce per thread energy and virial, if requested.
   if (evflag) ev_reduce_thr(this);
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairLJShiftedForceOMP::eval(double **f, int iifrom, int iito, int tid)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
-  double rsq,r2inv,r6inv,forcelj,factor_lj;
+  double t,rsq,r2inv,r6inv,forcelj,factor_lj;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   evdwl = 0.0;
 
   double **x = atom->x;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_lj = force->special_lj;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = iifrom; ii < iito; ++ii) {
 
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
     fxtmp=fytmp=fztmp=0.0;
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
 	r2inv = 1.0/rsq;
 	r6inv = r2inv*r2inv*r2inv;
+	t = sqrt(r2inv*cutsq[itype][jtype]);
+	forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]) - 
+	  t*foffset[itype][jtype];
 	forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
 	fpair = factor_lj*forcelj*r2inv;
 
 	fxtmp += delx*fpair;
 	fytmp += dely*fpair;
 	fztmp += delz*fpair;
 	if (NEWTON_PAIR || j < nlocal) {
 	  f[j][0] -= delx*fpair;
 	  f[j][1] -= dely*fpair;
 	  f[j][2] -= delz*fpair;
 	}
 
 	if (EFLAG) {
-	  evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype])
-	    - offset[itype][jtype];
+	  evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) +
+	    (t-1.0)*foffset[itype][jtype] - offset[itype][jtype];
 	  evdwl *= factor_lj;
 	}
 
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
 				 evdwl,0.0,fpair,delx,dely,delz,tid);
       }
     }
     f[i][0] += fxtmp;
     f[i][1] += fytmp;
     f[i][2] += fztmp;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
-double PairLJCutOMP::memory_usage()
+double PairLJShiftedForceOMP::memory_usage()
 {
   double bytes = memory_usage_thr();
-  bytes += PairLJCut::memory_usage();
+  bytes += PairLJShiftedForce::memory_usage();
 
   return bytes;
 }
diff --git a/src/USER-OMP/pair_lj_sf_omp.h b/src/USER-OMP/pair_lj_sf_omp.h
new file mode 100644
index 000000000..6fba43fb8
--- /dev/null
+++ b/src/USER-OMP/pair_lj_sf_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(lj/sf/omp,PairLJShiftedForceOMP)
+
+#else
+
+#ifndef LMP_PAIR_LJ_SF_OMP_H
+#define LMP_PAIR_LJ_SF_OMP_H
+
+#include "pair_lj_sf.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairLJShiftedForceOMP : public PairLJShiftedForce, public ThrOMP {
+
+ public:
+  PairLJShiftedForceOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(double **f, int ifrom, int ito, int tid);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_lj_smooth_omp.cpp
similarity index 76%
copy from src/USER-OMP/pair_lj_cut_omp.cpp
copy to src/USER-OMP/pair_lj_smooth_omp.cpp
index 8ed82c5e5..1ad88044a 100644
--- a/src/USER-OMP/pair_lj_cut_omp.cpp
+++ b/src/USER-OMP/pair_lj_smooth_omp.cpp
@@ -1,163 +1,176 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    This software is distributed under the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Axel Kohlmeyer (Temple U)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
-#include "pair_lj_cut_omp.h"
+#include "pair_lj_smooth_omp.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
-#include "memory.h"
-#include "error.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
-PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) :
-  PairLJCut(lmp), ThrOMP(lmp, PAIR)
+PairLJSmoothOMP::PairLJSmoothOMP(LAMMPS *lmp) :
+  PairLJSmooth(lmp), ThrOMP(lmp, PAIR)
 {
   respa_enable = 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
-void PairLJCutOMP::compute(int eflag, int vflag)
+void PairLJSmoothOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
     ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
   const int nthreads = comm->nthreads;
   const int inum = list->inum;
 
 #if defined(_OPENMP)
 #pragma omp parallel default(shared)
 #endif
   {
     int ifrom, ito, tid;
     double **f;
 
     f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
 
     if (evflag) {
       if (eflag) {
 	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
 	else eval<1,1,0>(f, ifrom, ito, tid);
       } else {
 	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
 	else eval<1,0,0>(f, ifrom, ito, tid);
       }
     } else {
       if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
       else eval<0,0,0>(f, ifrom, ito, tid);
     }
 
     // reduce per thread forces into global force array.
-    force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid);
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
   } // end of omp parallel region
 
   // reduce per thread energy and virial, if requested.
   if (evflag) ev_reduce_thr(this);
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairLJSmoothOMP::eval(double **f, int iifrom, int iito, int tid)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
   double rsq,r2inv,r6inv,forcelj,factor_lj;
+  double r,t,tsq,fskin;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   evdwl = 0.0;
 
   double **x = atom->x;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_lj = force->special_lj;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = iifrom; ii < iito; ++ii) {
 
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
     fxtmp=fytmp=fztmp=0.0;
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
 	r2inv = 1.0/rsq;
-	r6inv = r2inv*r2inv*r2inv;
-	forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
+	if (rsq < cut_inner_sq[itype][jtype]) {
+	  r6inv = r2inv*r2inv*r2inv;
+	  forcelj = r6inv * (lj1[itype][jtype]*r6inv-lj2[itype][jtype]);
+	} else {
+	  r = sqrt(rsq); 
+	  t = r - cut_inner[itype][jtype];
+	  tsq = t*t;
+	  fskin = ljsw1[itype][jtype] + ljsw2[itype][jtype]*t +
+	    ljsw3[itype][jtype]*tsq + ljsw4[itype][jtype]*tsq*t; 
+	  forcelj = fskin*r;
+	}
+        
 	fpair = factor_lj*forcelj*r2inv;
 
 	fxtmp += delx*fpair;
 	fytmp += dely*fpair;
 	fztmp += delz*fpair;
 	if (NEWTON_PAIR || j < nlocal) {
 	  f[j][0] -= delx*fpair;
 	  f[j][1] -= dely*fpair;
 	  f[j][2] -= delz*fpair;
 	}
 
 	if (EFLAG) {
-	  evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype])
-	    - offset[itype][jtype];
+	  if (rsq < cut_inner_sq[itype][jtype])
+	    evdwl = r6inv * (lj3[itype][jtype]*r6inv - 
+			     lj4[itype][jtype]) - offset[itype][jtype];
+	  else
+	    evdwl = ljsw0[itype][jtype] - ljsw1[itype][jtype]*t -
+	      ljsw2[itype][jtype]*tsq/2.0 - ljsw3[itype][jtype]*tsq*t/3.0 -
+	      ljsw4[itype][jtype]*tsq*tsq/4.0 - offset[itype][jtype];
 	  evdwl *= factor_lj;
 	}
 
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
 				 evdwl,0.0,fpair,delx,dely,delz,tid);
       }
     }
     f[i][0] += fxtmp;
     f[i][1] += fytmp;
     f[i][2] += fztmp;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
-double PairLJCutOMP::memory_usage()
+double PairLJSmoothOMP::memory_usage()
 {
   double bytes = memory_usage_thr();
-  bytes += PairLJCut::memory_usage();
+  bytes += PairLJSmooth::memory_usage();
 
   return bytes;
 }
diff --git a/src/USER-OMP/pair_lj_smooth_omp.h b/src/USER-OMP/pair_lj_smooth_omp.h
new file mode 100644
index 000000000..de27a4008
--- /dev/null
+++ b/src/USER-OMP/pair_lj_smooth_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(lj/smooth/omp,PairLJSmoothOMP)
+
+#else
+
+#ifndef LMP_PAIR_LJ_SMOOTH_OMP_H
+#define LMP_PAIR_LJ_SMOOTH_OMP_H
+
+#include "pair_lj_smooth.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairLJSmoothOMP : public PairLJSmooth, public ThrOMP {
+
+ public:
+  PairLJSmoothOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(double **f, int ifrom, int ito, int tid);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_lubricate_omp.cpp b/src/USER-OMP/pair_lubricate_omp.cpp
new file mode 100644
index 000000000..d45e0bf1b
--- /dev/null
+++ b/src/USER-OMP/pair_lubricate_omp.cpp
@@ -0,0 +1,328 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   This software is distributed under the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "pair_lubricate_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "error.h"
+#include "force.h"
+#include "update.h"
+#include "neighbor.h"
+#include "random_mars.h"
+#include "neigh_list.h"
+
+#include "math_const.h"
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+
+/* ---------------------------------------------------------------------- */
+
+PairLubricateOMP::PairLubricateOMP(LAMMPS *lmp) :
+  PairLubricate(lmp), ThrOMP(lmp, PAIR)
+{
+  respa_enable = 0;
+  random_thr = NULL;
+}
+
+/* ---------------------------------------------------------------------- */
+
+PairLubricateOMP::~PairLubricateOMP()
+{
+  if (random_thr) {
+    for (int i=1; i < comm->nthreads; ++i)
+      delete random_thr[i];
+
+    delete[] random_thr;
+    random_thr = NULL;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairLubricateOMP::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+    ev_setup_thr(this);
+  } else evflag = vflag_fdotr = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = list->inum;
+
+  if (!random_thr)
+    random_thr = new RanMars*[nthreads];
+  
+  random_thr[0] = random;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(shared)
+#endif
+  {
+    int ifrom, ito, tid;
+    double **f, **torque;
+
+    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    torque = atom->torque + tid*nall;
+
+    if (random_thr && tid > 0)
+      random_thr[tid] = new RanMars(Pair::lmp, seed + comm->me 
+				    + comm->nprocs*tid);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_pair) eval<1,1,1>(f, torque, ifrom, ito, tid);
+	else eval<1,1,0>(f, torque, ifrom, ito, tid);
+      } else {
+	if (force->newton_pair) eval<1,0,1>(f, torque, ifrom, ito, tid);
+	else eval<1,0,0>(f, torque, ifrom, ito, tid);
+      }
+    } else {
+      if (force->newton_pair) eval<0,0,1>(f, torque, ifrom, ito, tid);
+      else eval<0,0,0>(f, torque, ifrom, ito, tid);
+    }
+
+    // reduce per thread forces and torques into global arrays.
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    data_reduce_thr(&(atom->torque[0][0]), nall, nthreads, 3, tid);
+  } // end of omp parallel region
+
+  // reduce per thread energy and virial, if requested.
+  if (evflag) ev_reduce_thr(this);
+  if (vflag_fdotr) virial_fdotr_compute();
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+void PairLubricateOMP::eval(double **f, double **torque, int iifrom, int iito, int tid)
+{
+  int i,j,ii,jj,jnum,itype,jtype;
+  double xtmp,ytmp,ztmp,delx,dely,delz,fpair,fx,fy,fz,tx,ty,tz;
+  double rsq,r,h_sep,radi,tfmag;
+  double vr1,vr2,vr3,vnnr,vn1,vn2,vn3;
+  double vt1,vt2,vt3,w1,w2,w3,v_shear1,v_shear2,v_shear3;
+  double omega_t_1,omega_t_2,omega_t_3;
+  double n_cross_omega_t_1,n_cross_omega_t_2,n_cross_omega_t_3;
+  double wr1,wr2,wr3,wnnr,wn1,wn2,wn3;
+  double P_dot_wrel_1,P_dot_wrel_2,P_dot_wrel_3;
+  double a_squeeze,a_shear,a_pump,a_twist;
+  int *ilist,*jlist,*numneigh,**firstneigh;
+
+  double **x = atom->x;
+  double **v = atom->v;
+  double **omega = atom->omega;
+  double *radius = atom->radius;
+  int *type = atom->type;
+  int nlocal = atom->nlocal;
+  double vxmu2f = force->vxmu2f;
+  RanMars &rng = *random_thr[tid];
+
+  double prethermostat = sqrt(2.0 * force->boltz * t_target / update->dt);
+  prethermostat *= sqrt(force->vxmu2f/force->ftm2v/force->mvv2e);
+
+  double fxtmp,fytmp,fztmp;
+
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+
+  a_squeeze = a_shear = a_pump = a_twist = 0.0;
+
+  // loop over neighbors of my atoms
+
+  for (ii = iifrom; ii < iito; ++ii) {
+
+    i = ilist[ii];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    radi = radius[i];
+    itype = type[i];
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+    fxtmp=fytmp=fztmp=0.0;
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      j &= NEIGHMASK;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+      jtype = type[j];
+
+      if (rsq < cutsq[itype][jtype]) {
+
+	r = sqrt(rsq);
+
+        // relative translational velocity 
+
+        vr1 = v[i][0] - v[j][0];
+        vr2 = v[i][1] - v[j][1];
+        vr3 = v[i][2] - v[j][2];
+
+        // normal component N.(v1-v2) = nn.(v1-v2)
+
+        vnnr = vr1*delx + vr2*dely + vr3*delz;
+	vnnr /= r;
+	vn1 = delx*vnnr / r;
+        vn2 = dely*vnnr / r;
+        vn3 = delz*vnnr / r;
+
+        // tangential component -P.(v1-v2)
+	// P = (I - nn) where n is vector between centers
+     
+        vt1 = vr1 - vn1;
+        vt2 = vr2 - vn2;
+        vt3 = vr3 - vn3;
+
+        // additive rotational velocity = omega_1 + omega_2
+
+	w1 = omega[i][0] + omega[j][0];
+	w2 = omega[i][1] + omega[j][1];
+	w3 = omega[i][2] + omega[j][2];
+
+        // relative velocities n X P . (v1-v2) = n X (I-nn) . (v1-v2)
+
+        v_shear1 = (dely*vt3 - delz*vt2) / r;
+        v_shear2 = -(delx*vt3 - delz*vt1) / r;
+        v_shear3 = (delx*vt2 - dely*vt1) / r;
+
+        // relative rotation rate P.(omega1 + omega2)
+
+	omega_t_1 = w1 - delx*(delx*w1) / rsq;
+	omega_t_2 = w2 - dely*(dely*w2) / rsq;
+	omega_t_3 = w3 - delz*(delz*w3) / rsq;
+
+        // n X omega_t
+
+        n_cross_omega_t_1 =  (dely*omega_t_3 - delz*omega_t_2) / r;
+        n_cross_omega_t_2 =  -(delx*omega_t_3 - delz*omega_t_1) / r;
+        n_cross_omega_t_3 =  (delx*omega_t_2 - dely*omega_t_1) / r;
+
+        // N.(w1-w2) and P.(w1-w2)
+
+	wr1 = omega[i][0] - omega[j][0];
+	wr2 = omega[i][1] - omega[j][1];
+	wr3 = omega[i][2] - omega[j][2];
+ 
+	wnnr = wr1*delx + wr2*dely + wr3*delz;
+	wn1 = delx*wnnr / rsq;
+	wn2 = dely*wnnr / rsq;
+	wn3 = delz*wnnr / rsq;
+
+        P_dot_wrel_1 = wr1 - delx*(delx*wr1)/rsq; 
+        P_dot_wrel_2 = wr2 - dely*(dely*wr2)/rsq; 
+        P_dot_wrel_3 = wr3 - delz*(delz*wr3)/rsq; 
+
+        // compute components of pair-hydro
+
+        h_sep = r - 2.0*radi;
+
+	if (flag1)
+	  a_squeeze = (3.0*MY_PI*mu*2.0*radi/2.0) * (2.0*radi/4.0/h_sep);
+	if (flag2) 
+	  a_shear = (MY_PI*mu*2.*radi/2.0) *
+	    log(2.0*radi/2.0/h_sep)*(2.0*radi+h_sep)*(2.0*radi+h_sep)/4.0;
+	if (flag3) 
+	  a_pump = (MY_PI*mu*pow(2.0*radi,4)/8.0) *
+	    ((3.0/20.0) * log(2.0*radi/2.0/h_sep) + 
+	     (63.0/250.0) * (h_sep/2.0/radi) * log(2.0*radi/2.0/h_sep));
+	if (flag4)
+	  a_twist = (MY_PI*mu*pow(2.0*radi,4)/4.0) *
+	    (h_sep/2.0/radi) * log(2.0/(2.0*h_sep));
+
+        if (h_sep >= cut_inner[itype][jtype]) {
+          fx = -a_squeeze*vn1 - a_shear*(2.0/r)*(2.0/r)*vt1 + 
+	    (2.0/r)*a_shear*n_cross_omega_t_1;
+          fy = -a_squeeze*vn2 - a_shear*(2.0/r)*(2.0/r)*vt2 + 
+	    (2.0/r)*a_shear*n_cross_omega_t_2;
+          fz = -a_squeeze*vn3 - a_shear*(2.0/r)*(2.0/r)*vt3 +
+	    (2.0/r)*a_shear*n_cross_omega_t_3;
+	  fx *= vxmu2f;
+	  fy *= vxmu2f;
+	  fz *= vxmu2f;
+
+	  // add in thermostat force
+
+	  tfmag = prethermostat*sqrt(a_squeeze)*(rng.uniform()-0.5);
+	  fx -= tfmag * delx/r;
+	  fy -= tfmag * dely/r;
+	  fz -= tfmag * delz/r;
+	  
+	  tx = -(2.0/r)*a_shear*v_shear1 - a_shear*omega_t_1 - 
+	    a_pump*P_dot_wrel_1 - a_twist*wn1;
+	  ty = -(2.0/r)*a_shear*v_shear2 - a_shear*omega_t_2 - 
+	    a_pump*P_dot_wrel_2 - a_twist*wn2;
+	  tz = -(2.0/r)*a_shear*v_shear3 - a_shear*omega_t_3 - 
+	    a_pump*P_dot_wrel_3 - a_twist*wn3;
+	  torque[i][0] += vxmu2f * tx;
+	  torque[i][1] += vxmu2f * ty;
+	  torque[i][2] += vxmu2f * tz;
+
+        } else {
+	  a_squeeze = (3.0*MY_PI*mu*2.0*radi/2.0) * 
+	    (2.0*radi/4.0/cut_inner[itype][jtype]);
+	  fpair = -a_squeeze*vnnr;
+	  fpair *= vxmu2f;
+
+	  // add in thermostat force
+
+	  fpair -= prethermostat*sqrt(a_squeeze)*(rng.uniform()-0.5);
+
+	  fx = fpair * delx/r;
+	  fy = fpair * dely/r;
+	  fz = fpair * delz/r;
+	}
+
+    	f[i][0] += fx;
+	f[i][1] += fy;
+	f[i][2] += fz;
+
+	if (NEWTON_PAIR || j < nlocal) {
+	  f[j][0] -= fx;
+	  f[j][1] -= fy;
+	  f[j][2] -= fz;
+
+	  if (h_sep >= cut_inner[itype][jtype]) {
+	    tx = -(2.0/r)*a_shear*v_shear1 - a_shear*omega_t_1 + 
+	      a_pump*P_dot_wrel_1 + a_twist*wn1;
+	    ty = -(2.0/r)*a_shear*v_shear2 - a_shear*omega_t_2 + 
+	      a_pump*P_dot_wrel_2 + a_twist*wn2;
+	    tz = -(2.0/r)*a_shear*v_shear3 - a_shear*omega_t_3 + 
+	      a_pump*P_dot_wrel_3 + a_twist*wn3;
+	    torque[j][0] += vxmu2f * tx;
+	    torque[j][1] += vxmu2f * ty;
+	    torque[j][2] += vxmu2f * tz;
+	  }
+	}
+
+	if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,NEWTON_PAIR,
+				     0.0,0.0,fx,fy,fz,delx,dely,delz,tid);
+      }
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairLubricateOMP::memory_usage()
+{
+  double bytes = memory_usage_thr();
+  bytes += PairLubricate::memory_usage();
+
+  return bytes;
+}
diff --git a/src/USER-OMP/pair_lubricate_omp.h b/src/USER-OMP/pair_lubricate_omp.h
new file mode 100644
index 000000000..d36d19046
--- /dev/null
+++ b/src/USER-OMP/pair_lubricate_omp.h
@@ -0,0 +1,52 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(lubricate/omp,PairLubricateOMP)
+
+#else
+
+#ifndef LMP_PAIR_LUBRICATE_OMP_H
+#define LMP_PAIR_LUBRICATE_OMP_H
+
+#include "pair_lubricate.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairLubricateOMP : public PairLubricate, public ThrOMP {
+
+ public:
+  PairLubricateOMP(class LAMMPS *);
+  virtual ~PairLubricateOMP();
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ protected:
+  class RanMars **random_thr;
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(double **f, double **torque, int ifrom, int ito, int tid);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_morse_omp.cpp
similarity index 83%
copy from src/USER-OMP/pair_lj_cut_omp.cpp
copy to src/USER-OMP/pair_morse_omp.cpp
index 8ed82c5e5..a53e35a97 100644
--- a/src/USER-OMP/pair_lj_cut_omp.cpp
+++ b/src/USER-OMP/pair_morse_omp.cpp
@@ -1,163 +1,160 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    This software is distributed under the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Axel Kohlmeyer (Temple U)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
-#include "pair_lj_cut_omp.h"
+#include "pair_morse_omp.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
-#include "memory.h"
-#include "error.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
-PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) :
-  PairLJCut(lmp), ThrOMP(lmp, PAIR)
+PairMorseOMP::PairMorseOMP(LAMMPS *lmp) :
+  PairMorse(lmp), ThrOMP(lmp, PAIR)
 {
   respa_enable = 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
-void PairLJCutOMP::compute(int eflag, int vflag)
+void PairMorseOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
     ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
   const int nthreads = comm->nthreads;
   const int inum = list->inum;
 
 #if defined(_OPENMP)
 #pragma omp parallel default(shared)
 #endif
   {
     int ifrom, ito, tid;
     double **f;
 
     f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
 
     if (evflag) {
       if (eflag) {
 	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
 	else eval<1,1,0>(f, ifrom, ito, tid);
       } else {
 	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
 	else eval<1,0,0>(f, ifrom, ito, tid);
       }
     } else {
       if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
       else eval<0,0,0>(f, ifrom, ito, tid);
     }
 
     // reduce per thread forces into global force array.
-    force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid);
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
   } // end of omp parallel region
 
   // reduce per thread energy and virial, if requested.
   if (evflag) ev_reduce_thr(this);
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairMorseOMP::eval(double **f, int iifrom, int iito, int tid)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
-  double rsq,r2inv,r6inv,forcelj,factor_lj;
+  double rsq,r,dr,dexp,factor_lj;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   evdwl = 0.0;
 
   double **x = atom->x;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_lj = force->special_lj;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = iifrom; ii < iito; ++ii) {
 
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
     fxtmp=fytmp=fztmp=0.0;
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
-	r2inv = 1.0/rsq;
-	r6inv = r2inv*r2inv*r2inv;
-	forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
-	fpair = factor_lj*forcelj*r2inv;
+	r = sqrt(rsq);
+	dr = r - r0[itype][jtype];
+	dexp = exp(-alpha[itype][jtype] * dr);
+	fpair = factor_lj * morse1[itype][jtype] * (dexp*dexp - dexp) / r;
 
 	fxtmp += delx*fpair;
 	fytmp += dely*fpair;
 	fztmp += delz*fpair;
 	if (NEWTON_PAIR || j < nlocal) {
 	  f[j][0] -= delx*fpair;
 	  f[j][1] -= dely*fpair;
 	  f[j][2] -= delz*fpair;
 	}
 
 	if (EFLAG) {
-	  evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype])
-	    - offset[itype][jtype];
+	  evdwl = d0[itype][jtype] * (dexp*dexp - 2.0*dexp) -
+	    offset[itype][jtype];
 	  evdwl *= factor_lj;
 	}
 
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
 				 evdwl,0.0,fpair,delx,dely,delz,tid);
       }
     }
     f[i][0] += fxtmp;
     f[i][1] += fytmp;
     f[i][2] += fztmp;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
-double PairLJCutOMP::memory_usage()
+double PairMorseOMP::memory_usage()
 {
   double bytes = memory_usage_thr();
-  bytes += PairLJCut::memory_usage();
+  bytes += PairMorse::memory_usage();
 
   return bytes;
 }
diff --git a/src/USER-OMP/pair_morse_omp.h b/src/USER-OMP/pair_morse_omp.h
new file mode 100644
index 000000000..a966e6f11
--- /dev/null
+++ b/src/USER-OMP/pair_morse_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(morse/omp,PairMorseOMP)
+
+#else
+
+#ifndef LMP_PAIR_MORSE_OMP_H
+#define LMP_PAIR_MORSE_OMP_H
+
+#include "pair_morse.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairMorseOMP : public PairMorse, public ThrOMP {
+
+ public:
+  PairMorseOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(double **f, int ifrom, int ito, int tid);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_peri_lps_omp.cpp b/src/USER-OMP/pair_peri_lps_omp.cpp
new file mode 100644
index 000000000..7cb1e8308
--- /dev/null
+++ b/src/USER-OMP/pair_peri_lps_omp.cpp
@@ -0,0 +1,456 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   This software is distributed under the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "float.h"
+#include "pair_peri_lps_omp.h"
+#include "fix.h"
+#include "fix_peri_neigh.h"
+#include "atom.h"
+#include "comm.h"
+#include "domain.h"
+#include "force.h"
+#include "memory.h"
+#include "lattice.h"
+#include "modify.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+PairPeriLPSOMP::PairPeriLPSOMP(LAMMPS *lmp) :
+  PairPeriLPS(lmp), ThrOMP(lmp, PAIR)
+{
+  respa_enable = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairPeriLPSOMP::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+    ev_setup_thr(this);
+  } else evflag = vflag_fdotr = eflag_global = eflag_atom = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = list->inum;
+
+  // grow bond forces array if necessary
+
+  if (atom->nmax > nmax) {
+    memory->destroy(s0_new);				
+    memory->destroy(theta);				
+    nmax = atom->nmax;
+    memory->create(s0_new,nmax,"pair:s0_new");
+    memory->create(theta,nmax,"pair:theta");
+  }
+
+#if defined(_OPENMP)
+#pragma omp parallel default(shared)
+#endif
+  {
+    int ifrom, ito, tid;
+    double **f;
+
+    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
+	else eval<1,1,0>(f, ifrom, ito, tid);
+      } else {
+	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
+	else eval<1,0,0>(f, ifrom, ito, tid);
+      }
+    } else {
+      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
+      else eval<0,0,0>(f, ifrom, ito, tid);
+    }
+
+    // reduce per thread forces into global force array.
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+  } // end of omp parallel region
+
+  // reduce per thread energy and virial, if requested.
+  if (evflag) ev_reduce_thr(this);
+  if (vflag_fdotr) virial_fdotr_compute();
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+void PairPeriLPSOMP::eval(double **f, int iifrom, int iito, int tid)
+{
+  int i,j,ii,jj,jnum,itype,jtype;
+  double xtmp,ytmp,ztmp,delx,dely,delz;
+  double xtmp0,ytmp0,ztmp0,delx0,dely0,delz0,rsq0;
+  double rsq,r,dr,rk,evdwl,fpair,fbond;
+  int *ilist,*jlist,*numneigh,**firstneigh;
+  double d_ij,delta,stretch;
+
+  evdwl = 0.0;
+
+  double **x = atom->x;
+  int *type = atom->type;
+  int nlocal = atom->nlocal;
+  double fxtmp,fytmp,fztmp;
+
+  double *vfrac = atom->vfrac;
+  double *s0 = atom->s0;
+  double **x0 = atom->x0;
+  double **r0   = ((FixPeriNeigh *) modify->fix[ifix_peri])->r0;
+  int **partner = ((FixPeriNeigh *) modify->fix[ifix_peri])->partner;
+  int *npartner = ((FixPeriNeigh *) modify->fix[ifix_peri])->npartner;
+  double *wvolume = ((FixPeriNeigh *) modify->fix[ifix_peri])->wvolume;
+
+  // lc = lattice constant
+  // init_style guarantees it's the same in x, y, and z
+
+  double lc = domain->lattice->xlattice;
+  double half_lc = 0.5*lc;
+  double vfrac_scale = 1.0;
+
+  // short-range forces
+
+  int periodic = (domain->xperiodic || domain->yperiodic || domain->zperiodic);
+
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+
+  // loop over neighbors of my atoms
+  // need minimg() for x0 difference since not ghosted
+
+  for (ii = iifrom; ii < iito; ++ii) {
+
+    i = ilist[ii];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    xtmp0 = x0[i][0];
+    ytmp0 = x0[i][1];
+    ztmp0 = x0[i][2];
+    itype = type[i];
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+    fxtmp=fytmp=fztmp=0.0;
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      j &= NEIGHMASK;
+ 
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+
+      rsq = delx*delx + dely*dely + delz*delz;
+      delx0 = xtmp0 - x0[j][0];
+      dely0 = ytmp0 - x0[j][1];
+      delz0 = ztmp0 - x0[j][2];
+      if (periodic) domain->minimum_image(delx0,dely0,delz0);
+      rsq0 = delx0*delx0 + dely0*dely0 + delz0*delz0;
+      jtype = type[j];
+ 
+      r = sqrt(rsq);
+
+      // short-range interaction distance based on initial particle position
+      // 0.9 and 1.35 are constants
+
+      d_ij = MIN(0.9*sqrt(rsq0),1.35*lc);
+
+      // short-range contact forces
+      // 15 is constant taken from the EMU Theory Manual
+      // Silling, 12 May 2005, p 18
+
+      if (r < d_ij) {
+        dr = r - d_ij;
+
+        // kshort based upon short-range force constant
+	// of the bond-based theory used in PMB model
+
+        double kshort = (15.0 * 18.0 * bulkmodulus[itype][itype]) /
+	  (3.141592653589793 * cutsq[itype][jtype] * cutsq[itype][jtype]);
+        rk = (kshort * vfrac[j]) * (dr / cut[itype][jtype]);
+
+        if (r > 0.0) fpair = -(rk/r);
+        else fpair = 0.0;
+
+	fxtmp += delx*fpair;
+	fytmp += dely*fpair;
+	fztmp += delz*fpair;
+	if (NEWTON_PAIR || j < nlocal) {
+	  f[j][0] -= delx*fpair;
+	  f[j][1] -= dely*fpair;
+	  f[j][2] -= delz*fpair;
+	}
+
+        if (EFLAG) evdwl = 0.5*rk*dr;
+	if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,evdwl,0.0,
+				 fpair*vfrac[i],delx,dely,delz,tid);
+      }
+    }
+    f[i][0] += fxtmp;
+    f[i][1] += fytmp;
+    f[i][2] += fztmp;
+  }
+
+  // wait until all threads are done since we
+  // need to distribute the work differently.
+  sync_threads();
+
+#if defined(_OPENMP)
+  // each thread works on a fixed chunk of atoms.
+  const int idelta = 1 + nlocal/comm->nthreads;
+  iifrom = tid*idelta;
+  iito   = iifrom + idelta;
+  if (iito > nlocal)
+    iito = nlocal;
+#else 
+  iifrom = 0;
+  iito = nlocal;
+#endif
+
+  // Compute the dilatation on each particle
+  compute_dilatation_thr(iifrom, iito);
+
+  // wait until all threads are done before communication
+  sync_threads();
+
+#if defined(_OPENMP)
+#pragma omp master
+#endif
+  { // communicate dilatation (theta) of each particle	
+    comm->forward_comm_pair(this);
+    // communicate wighted volume (wvolume) upon every reneighbor
+    if (neighbor->ago == 0)
+      comm->forward_comm_fix(modify->fix[ifix_peri]);
+  }
+
+  sync_threads();
+
+  // Volume-dependent part of the energy
+  if (EFLAG) {
+    for (i = iifrom; i < iito; i++) {   
+      itype = type[i];
+      if (eflag_global)
+	eng_vdwl_thr[tid] += 0.5 * bulkmodulus[itype][itype] * (theta[i] * theta[i]);
+      if (eflag_atom)
+	eatom_thr[tid][i] += 0.5 * bulkmodulus[itype][itype] * (theta[i] * theta[i]);
+    }
+  }
+
+  // loop over my particles and their partners
+  // partner list contains all bond partners, so I-J appears twice
+  // if bond already broken, skip this partner
+  // first = true if this is first neighbor of particle i
+
+  bool first;
+  double omega_minus, omega_plus;
+
+  for (i = iifrom; i < iito; ++i) {
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    xtmp0 = x0[i][0];			
+    ytmp0 = x0[i][1];		
+    ztmp0 = x0[i][2];			
+    itype = type[i];
+    jnum = npartner[i];
+    first = true;
+
+    for (jj = 0; jj < jnum; jj++) {
+      if (partner[i][jj] == 0) continue;
+      j = atom->map(partner[i][jj]);
+
+      // check if lost a partner without first breaking bond
+
+      if (j < 0) {
+        partner[i][jj] = 0;
+        continue;
+      }
+
+      // compute force density, add to PD equation of motion
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      if (periodic) domain->minimum_image(delx,dely,delz);
+      rsq = delx*delx + dely*dely + delz*delz;
+      delx0 = xtmp0 - x0[j][0];						
+      dely0 = ytmp0 - x0[j][1];						
+      delz0 = ztmp0 - x0[j][2];						
+      if (periodic) domain->minimum_image(delx0,dely0,delz0);   
+      jtype = type[j];
+      delta = cut[itype][jtype];
+      r = sqrt(rsq);
+      dr = r - r0[i][jj];
+
+      // avoid roundoff errors
+
+      if (fabs(dr) < 2.2204e-016) dr = 0.0;
+
+      // scale vfrac[j] if particle j near the horizon
+
+      if ((fabs(r0[i][jj] - delta)) <= half_lc)
+        vfrac_scale = (-1.0/(2*half_lc))*(r0[i][jj]) + 
+	  (1.0 + ((delta - half_lc)/(2*half_lc) ) );
+      else vfrac_scale = 1.0;
+
+      omega_plus  = influence_function(-1.0*delx0,-1.0*dely0,-1.0*delz0);
+      omega_minus = influence_function(delx0,dely0,delz0);
+      rk = ( (3.0 * bulkmodulus[itype][itype]) - 
+	     (5.0 * shearmodulus[itype][itype]) ) * vfrac[j] * vfrac_scale * 
+	( (omega_plus * theta[i] / wvolume[i]) + 
+	  ( omega_minus * theta[j] / wvolume[j] ) ) * r0[i][jj]; 
+      rk +=  15.0 * ( shearmodulus[itype][itype] * vfrac[j] * vfrac_scale ) *
+	( (omega_plus / wvolume[i]) + (omega_minus / wvolume[j]) ) * dr; 
+
+      if (r > 0.0) fbond = -(rk/r);
+      else fbond = 0.0;
+
+      f[i][0] += delx*fbond;
+      f[i][1] += dely*fbond;
+      f[i][2] += delz*fbond;
+
+      // since I-J is double counted, set newton off & use 1/2 factor and I,I 
+
+      double deviatoric_extension = dr - (theta[i]* r0[i][jj] / 3.0);
+      if (EFLAG) evdwl = 0.5 * 15 * (shearmodulus[itype][itype]/wvolume[i]) * 
+		   omega_plus*(deviatoric_extension * deviatoric_extension) *
+		   vfrac[j] * vfrac_scale;
+      if (EVFLAG) ev_tally_thr(this,i,i,nlocal,0,0.5*evdwl,0.0,
+			       0.5*fbond*vfrac[i],delx,dely,delz,tid);
+
+      // find stretch in bond I-J and break if necessary
+      // use s0 from previous timestep
+
+      stretch = dr / r0[i][jj];
+      if (stretch > MIN(s0[i],s0[j])) partner[i][jj] = 0;
+
+      // update s0 for next timestep
+
+      if (first)
+         s0_new[i] = s00[itype][jtype] - (alpha[itype][jtype] * stretch);
+      else
+         s0_new[i] = MAX(s0_new[i],s00[itype][jtype] - (alpha[itype][jtype] * stretch));
+
+      first = false;
+    }
+  }
+
+  sync_threads();
+
+  // store new s0 (in parallel)
+  for (i = iifrom; i < iito; i++) s0[i] = s0_new[i]; 
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairPeriLPSOMP::compute_dilatation_thr(int ifrom, int ito)
+{
+  int i,j,jj,jnum,itype,jtype;
+  double xtmp,ytmp,ztmp,delx,dely,delz;
+  double xtmp0,ytmp0,ztmp0,delx0,dely0,delz0;
+  double rsq,r,dr;
+  double delta;
+
+  double **x = atom->x;
+  int *type = atom->type;
+  double **x0 = atom->x0;
+  double *vfrac = atom->vfrac;
+  double vfrac_scale = 1.0;
+  
+  double lc = domain->lattice->xlattice;
+  double half_lc = 0.5*lc;
+
+  double **r0   = ((FixPeriNeigh *) modify->fix[ifix_peri])->r0;
+  int **partner = ((FixPeriNeigh *) modify->fix[ifix_peri])->partner;
+  int *npartner = ((FixPeriNeigh *) modify->fix[ifix_peri])->npartner;
+  double *wvolume = ((FixPeriNeigh *) modify->fix[ifix_peri])->wvolume;
+
+  int periodic = domain->xperiodic || domain->yperiodic || domain->zperiodic;
+
+  // compute the dilatation theta
+
+  for (i = ifrom; i < ito; i++) {
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    xtmp0 = x0[i][0];
+    ytmp0 = x0[i][1];
+    ztmp0 = x0[i][2];
+    jnum = npartner[i];
+    theta[i] = 0.0;
+    itype = type[i];
+
+    for (jj = 0; jj < jnum; jj++) {
+
+      // if bond already broken, skip this partner
+      if (partner[i][jj] == 0) continue;
+
+      // Look up local index of this partner particle
+      j = atom->map(partner[i][jj]);
+
+      // Skip if particle is "lost"
+      if (j < 0) continue;
+
+      // Compute force density and add to PD equation of motion
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      if (periodic) domain->minimum_image(delx,dely,delz);
+      rsq = delx*delx + dely*dely + delz*delz;
+      delx0 = xtmp0 - x0[j][0];
+      dely0 = ytmp0 - x0[j][1];
+      delz0 = ztmp0 - x0[j][2];
+      if (periodic) domain->minimum_image(delx0,dely0,delz0);
+
+      r = sqrt(rsq);
+      dr = r - r0[i][jj];
+      if (fabs(dr) < 2.2204e-016) dr = 0.0;
+
+      jtype = type[j];
+      delta = cut[itype][jtype];
+
+      // scale vfrac[j] if particle j near the horizon
+
+      if ((fabs(r0[i][jj] - delta)) <= half_lc)
+        vfrac_scale = (-1.0/(2*half_lc))*(r0[i][jj]) +
+          (1.0 + ((delta - half_lc)/(2*half_lc) ) );
+      else vfrac_scale = 1.0;
+
+      theta[i] += influence_function(delx0, dely0, delz0) * r0[i][jj] * dr *
+	vfrac[j] * vfrac_scale;
+    }
+
+    // if wvolume[i] is zero, then particle i has no bonds
+    // therefore, the dilatation is set to 
+
+    if (wvolume[i] != 0.0) theta[i] = (3.0/wvolume[i]) * theta[i];
+    else theta[i] = 0;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairPeriLPSOMP::memory_usage()
+{
+  double bytes = memory_usage_thr();
+  bytes += PairPeriLPS::memory_usage();
+
+  return bytes;
+}
+
diff --git a/src/USER-OMP/pair_peri_lps_omp.h b/src/USER-OMP/pair_peri_lps_omp.h
new file mode 100644
index 000000000..2068830ca
--- /dev/null
+++ b/src/USER-OMP/pair_peri_lps_omp.h
@@ -0,0 +1,52 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(peri/lps/omp,PairPeriLPSOMP)
+
+#else
+
+#ifndef LMP_PAIR_PERI_LPS_OMP_H
+#define LMP_PAIR_PERI_LPS_OMP_H
+
+#include "pair_peri_lps.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairPeriLPSOMP : public PairPeriLPS, public ThrOMP {
+
+ public:
+  PairPeriLPSOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ protected:
+  void compute_dilatation_thr(int ifrom, int ito);
+
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(double **f, int ifrom, int ito, int tid);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_peri_pmb_omp.cpp b/src/USER-OMP/pair_peri_pmb_omp.cpp
new file mode 100644
index 000000000..4e46d142d
--- /dev/null
+++ b/src/USER-OMP/pair_peri_pmb_omp.cpp
@@ -0,0 +1,312 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   This software is distributed under the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "float.h"
+#include "pair_peri_pmb_omp.h"
+#include "fix.h"
+#include "fix_peri_neigh.h"
+#include "atom.h"
+#include "comm.h"
+#include "domain.h"
+#include "force.h"
+#include "memory.h"
+#include "lattice.h"
+#include "modify.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+PairPeriPMBOMP::PairPeriPMBOMP(LAMMPS *lmp) :
+  PairPeriPMB(lmp), ThrOMP(lmp, PAIR)
+{
+  respa_enable = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairPeriPMBOMP::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+    ev_setup_thr(this);
+  } else evflag = vflag_fdotr = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = list->inum;
+
+  // grow bond forces array if necessary
+
+  if (atom->nmax > nmax) {
+    memory->destroy(s0_new);
+    nmax = atom->nmax;
+    memory->create(s0_new,nmax,"pair:s0_new");
+  }
+
+#if defined(_OPENMP)
+#pragma omp parallel default(shared)
+#endif
+  {
+    int ifrom, ito, tid;
+    double **f;
+
+    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
+	else eval<1,1,0>(f, ifrom, ito, tid);
+      } else {
+	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
+	else eval<1,0,0>(f, ifrom, ito, tid);
+      }
+    } else {
+      if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
+      else eval<0,0,0>(f, ifrom, ito, tid);
+    }
+
+    // reduce per thread forces into global force array.
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+  } // end of omp parallel region
+
+  // reduce per thread energy and virial, if requested.
+  if (evflag) ev_reduce_thr(this);
+  if (vflag_fdotr) virial_fdotr_compute();
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+void PairPeriPMBOMP::eval(double **f, int iifrom, int iito, int tid)
+{
+  int i,j,ii,jj,jnum,itype,jtype;
+  double xtmp,ytmp,ztmp,delx,dely,delz;
+  double xtmp0,ytmp0,ztmp0,delx0,dely0,delz0,rsq0;
+  double rsq,r,dr,rk,evdwl,fpair,fbond;
+  int *ilist,*jlist,*numneigh,**firstneigh;
+  double d_ij,delta,stretch;
+
+  evdwl = 0.0;
+
+  double **x = atom->x;
+  int *type = atom->type;
+  int nlocal = atom->nlocal;
+  double fxtmp,fytmp,fztmp;
+
+  double *vfrac = atom->vfrac;
+  double *s0 = atom->s0;
+  double **x0 = atom->x0;
+  double **r0   = ((FixPeriNeigh *) modify->fix[ifix_peri])->r0;
+  int **partner = ((FixPeriNeigh *) modify->fix[ifix_peri])->partner;
+  int *npartner = ((FixPeriNeigh *) modify->fix[ifix_peri])->npartner;
+
+  // lc = lattice constant
+  // init_style guarantees it's the same in x, y, and z
+
+  double lc = domain->lattice->xlattice;
+  double half_lc = 0.5*lc;
+  double vfrac_scale = 1.0;
+
+  // short-range forces
+
+  int periodic = (domain->xperiodic || domain->yperiodic || domain->zperiodic);
+
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+
+  // loop over neighbors of my atoms
+  // need minimg() for x0 difference since not ghosted
+
+  for (ii = iifrom; ii < iito; ++ii) {
+
+    i = ilist[ii];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    xtmp0 = x0[i][0];
+    ytmp0 = x0[i][1];
+    ztmp0 = x0[i][2];
+    itype = type[i];
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+    fxtmp=fytmp=fztmp=0.0;
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      j &= NEIGHMASK;
+ 
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+      delx0 = xtmp0 - x0[j][0];
+      dely0 = ytmp0 - x0[j][1];
+      delz0 = ztmp0 - x0[j][2];
+      if (periodic) domain->minimum_image(delx0,dely0,delz0);
+      rsq0 = delx0*delx0 + dely0*dely0 + delz0*delz0;
+      jtype = type[j];
+ 
+      r = sqrt(rsq);
+
+      // short-range interaction distance based on initial particle position
+      // 0.9 and 1.35 are constants
+
+      d_ij = MIN(0.9*sqrt(rsq0),1.35*lc);
+
+      // short-range contact forces
+      // 15 is constant taken from the EMU Theory Manual
+      // Silling, 12 May 2005, p 18
+
+      if (r < d_ij) {
+        dr = r - d_ij;
+
+        rk = (15.0 * kspring[itype][jtype] * vfrac[j]) * 
+	  (dr / cut[itype][jtype]);
+        if (r > 0.0) fpair = -(rk/r);
+        else fpair = 0.0;
+
+	fxtmp += delx*fpair;
+	fytmp += dely*fpair;
+	fztmp += delz*fpair;
+	if (NEWTON_PAIR || j < nlocal) {
+	  f[j][0] -= delx*fpair;
+	  f[j][1] -= dely*fpair;
+	  f[j][2] -= delz*fpair;
+	}
+
+        if (EFLAG) evdwl = 0.5*rk*dr;
+	if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,evdwl,0.0,
+				 fpair*vfrac[i],delx,dely,delz,tid);
+      }
+    }
+    f[i][0] += fxtmp;
+    f[i][1] += fytmp;
+    f[i][2] += fztmp;
+  }
+
+  // wait until all threads are done since we
+  // need to distribute the work differently.
+  sync_threads();
+
+#if defined(_OPENMP)
+  // each thread works on a fixed chunk of atoms.
+  const int idelta = 1 + nlocal/comm->nthreads;
+  iifrom = tid*idelta;
+  iito   = iifrom + idelta;
+  if (iito > nlocal)
+    iito = nlocal;
+#else 
+  iifrom = 0;
+  iito = nlocal;
+#endif
+
+  // loop over my particles and their partners
+  // partner list contains all bond partners, so I-J appears twice
+  // if bond already broken, skip this partner
+  // first = true if this is first neighbor of particle i
+
+  bool first;
+
+  for (i = iifrom; i < iito; ++i) {
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    itype = type[i];
+    jnum = npartner[i];
+    s0_new[i] = DBL_MAX;
+    first = true;
+
+    for (jj = 0; jj < jnum; jj++) {
+      if (partner[i][jj] == 0) continue;
+      j = atom->map(partner[i][jj]);
+
+      // check if lost a partner without first breaking bond
+
+      if (j < 0) {
+        partner[i][jj] = 0;
+        continue;
+      }
+
+      // compute force density, add to PD equation of motion
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      if (periodic) domain->minimum_image(delx,dely,delz);
+      rsq = delx*delx + dely*dely + delz*delz;
+      jtype = type[j];
+      delta = cut[itype][jtype];
+      r = sqrt(rsq);
+      dr = r - r0[i][jj];
+
+      // avoid roundoff errors
+
+      if (fabs(dr) < 2.2204e-016) dr = 0.0;
+
+      // scale vfrac[j] if particle j near the horizon
+
+      if ((fabs(r0[i][jj] - delta)) <= half_lc)
+        vfrac_scale = (-1.0/(2*half_lc))*(r0[i][jj]) + 
+	  (1.0 + ((delta - half_lc)/(2*half_lc) ) );
+      else vfrac_scale = 1.0;
+
+      stretch = dr / r0[i][jj];
+      rk = (kspring[itype][jtype] * vfrac[j]) * vfrac_scale * stretch;
+      if (r > 0.0) fbond = -(rk/r);
+      else fbond = 0.0;
+
+      f[i][0] += delx*fbond;
+      f[i][1] += dely*fbond;
+      f[i][2] += delz*fbond;
+
+      // since I-J is double counted, set newton off & use 1/2 factor and I,I 
+
+      if (EFLAG) evdwl = 0.5*rk*dr;
+      if (EVFLAG) 
+	ev_tally_thr(this,i,i,nlocal,0,0.5*evdwl,0.0,
+		     0.5*fbond*vfrac[i],delx,dely,delz,tid);
+
+      // find stretch in bond I-J and break if necessary
+      // use s0 from previous timestep
+
+      if (stretch > MIN(s0[i],s0[j])) partner[i][jj] = 0;
+
+      // update s0 for next timestep
+
+      if (first)
+         s0_new[i] = s00[itype][jtype] - (alpha[itype][jtype] * stretch);
+      else
+         s0_new[i] = MAX(s0_new[i],s00[itype][jtype] - (alpha[itype][jtype] * stretch));
+      first = false;
+    }
+  }
+
+  sync_threads();
+
+  // store new s0
+  for (i = iifrom; i < iito; i++) s0[i] = s0_new[i]; 
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairPeriPMBOMP::memory_usage()
+{
+  double bytes = memory_usage_thr();
+  bytes += PairPeriPMB::memory_usage();
+
+  return bytes;
+}
diff --git a/src/USER-OMP/pair_peri_pmb_omp.h b/src/USER-OMP/pair_peri_pmb_omp.h
new file mode 100644
index 000000000..9940e5ed1
--- /dev/null
+++ b/src/USER-OMP/pair_peri_pmb_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(peri/pmb/omp,PairPeriPMBOMP)
+
+#else
+
+#ifndef LMP_PAIR_PERI_PMB_OMP_H
+#define LMP_PAIR_PERI_PMB_OMP_H
+
+#include "pair_peri_pmb.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairPeriPMBOMP : public PairPeriPMB, public ThrOMP {
+
+ public:
+  PairPeriPMBOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(double **f, int ifrom, int ito, int tid);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_rebo_omp.cpp b/src/USER-OMP/pair_rebo_omp.cpp
new file mode 100644
index 000000000..70b5c4e8a
--- /dev/null
+++ b/src/USER-OMP/pair_rebo_omp.cpp
@@ -0,0 +1,33 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "pair_rebo_omp.h"
+#include "error.h"
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+PairREBOOMP::PairREBOOMP(LAMMPS *lmp) : PairAIREBOOMP(lmp) {}
+
+/* ----------------------------------------------------------------------
+   global settings
+------------------------------------------------------------------------- */
+
+void PairREBOOMP::settings(int narg, char **arg)
+{
+  if (narg != 0) error->all(FLERR,"Illegal pair_style command");
+
+  cutlj = 0.0;
+  ljflag = torflag = 0;
+}
diff --git a/src/USER-OMP/pair_rebo_omp.h b/src/USER-OMP/pair_rebo_omp.h
new file mode 100644
index 000000000..4606e56ae
--- /dev/null
+++ b/src/USER-OMP/pair_rebo_omp.h
@@ -0,0 +1,36 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(rebo/omp,PairREBOOMP)
+
+#else
+
+#ifndef LMP_PAIR_REBO_OMP_H
+#define LMP_PAIR_REBO_OMP_H
+
+#include "pair_airebo_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairREBOOMP : public PairAIREBOOMP {
+ public:
+  PairREBOOMP(class LAMMPS *);
+  virtual void settings(int, char **);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_resquared_omp.cpp b/src/USER-OMP/pair_resquared_omp.cpp
new file mode 100644
index 000000000..487055305
--- /dev/null
+++ b/src/USER-OMP/pair_resquared_omp.cpp
@@ -0,0 +1,210 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   This software is distributed under the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "pair_resquared_omp.h"
+#include "math_extra.h"
+#include "atom.h"
+#include "comm.h"
+#include "atom_vec_ellipsoid.h"
+#include "force.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+PairRESquaredOMP::PairRESquaredOMP(LAMMPS *lmp) :
+  PairRESquared(lmp), ThrOMP(lmp, PAIR)
+{
+  respa_enable = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairRESquaredOMP::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+    ev_setup_thr(this);
+  } else evflag = vflag_fdotr = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = list->inum;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(shared)
+#endif
+  {
+    int ifrom, ito, tid;
+    double **f, **torque;
+
+    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+    torque = atom->torque + tid*nall;
+
+    if (evflag) {
+      if (eflag) {
+	if (force->newton_pair) eval<1,1,1>(f, torque, ifrom, ito, tid);
+	else eval<1,1,0>(f, torque, ifrom, ito, tid);
+      } else {
+	if (force->newton_pair) eval<1,0,1>(f, torque, ifrom, ito, tid);
+	else eval<1,0,0>(f, torque, ifrom, ito, tid);
+      }
+    } else {
+      if (force->newton_pair) eval<0,0,1>(f, torque, ifrom, ito, tid);
+      else eval<0,0,0>(f, torque, ifrom, ito, tid);
+    }
+
+    // reduce per thread forces and torques into global arrays.
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+    data_reduce_thr(&(atom->torque[0][0]), nall, nthreads, 3, tid);
+  } // end of omp parallel region
+
+  // reduce per thread energy and virial, if requested.
+  if (evflag) ev_reduce_thr(this);
+  if (vflag_fdotr) virial_fdotr_compute();
+}
+
+template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+void PairRESquaredOMP::eval(double **f, double **tor, int iifrom, int iito, int tid)
+{
+  int i,j,ii,jj,jnum,itype,jtype;
+  double evdwl,one_eng,rsq,r2inv,r6inv,forcelj,factor_lj;
+  double fforce[3],ttor[3],rtor[3],r12[3];
+  int *ilist,*jlist,*numneigh,**firstneigh;
+  RE2Vars wi,wj;
+
+  double **x = atom->x;
+  int *ellipsoid = atom->ellipsoid;
+  int *type = atom->type;
+  int nlocal = atom->nlocal;
+  double *special_lj = force->special_lj;
+
+  double fxtmp,fytmp,fztmp,t1tmp,t2tmp,t3tmp;
+
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+
+  // loop over neighbors of my atoms
+
+  for (ii = iifrom; ii < iito; ++ii) {
+
+    i = ilist[ii];
+    itype = type[i];
+
+    // not a LJ sphere
+
+    if (lshape[itype] != 0.0) precompute_i(i,wi);
+
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      factor_lj = special_lj[sbmask(j)];
+      j &= NEIGHMASK;
+
+      // r12 = center to center vector
+
+      r12[0] = x[j][0]-x[i][0];
+      r12[1] = x[j][1]-x[i][1];
+      r12[2] = x[j][2]-x[i][2];
+      rsq = MathExtra::dot3(r12,r12);
+      jtype = type[j];
+
+      // compute if less than cutoff
+
+      if (rsq < cutsq[itype][jtype]) {
+        switch (form[itype][jtype]) {
+
+         case SPHERE_SPHERE:
+          r2inv = 1.0/rsq;
+          r6inv = r2inv*r2inv*r2inv;
+          forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
+          forcelj *= -r2inv;
+          if (EFLAG) one_eng =
+              r6inv*(r6inv*lj3[itype][jtype]-lj4[itype][jtype]) -
+              offset[itype][jtype];
+          fforce[0] = r12[0]*forcelj;
+          fforce[1] = r12[1]*forcelj;
+          fforce[2] = r12[2]*forcelj;
+          break;
+
+         case SPHERE_ELLIPSE:
+          precompute_i(j,wj);
+          if (NEWTON_PAIR || j < nlocal) {
+            one_eng = resquared_lj(j,i,wj,r12,rsq,fforce,rtor,true);
+            tor[j][0] += rtor[0]*factor_lj;
+            tor[j][1] += rtor[1]*factor_lj;
+            tor[j][2] += rtor[2]*factor_lj;
+          } else
+            one_eng = resquared_lj(j,i,wj,r12,rsq,fforce,rtor,false);
+          break;
+
+         case ELLIPSE_SPHERE:
+          one_eng = resquared_lj(i,j,wi,r12,rsq,fforce,ttor,true);
+          tor[i][0] += ttor[0]*factor_lj;
+          tor[i][1] += ttor[1]*factor_lj;
+          tor[i][2] += ttor[2]*factor_lj;
+          break;
+
+         default:
+          precompute_i(j,wj);
+          one_eng = resquared_analytic(i,j,wi,wj,r12,rsq,fforce,ttor,rtor);
+          tor[i][0] += ttor[0]*factor_lj;
+          tor[i][1] += ttor[1]*factor_lj;
+          tor[i][2] += ttor[2]*factor_lj;
+          if (NEWTON_PAIR || j < nlocal) {
+            tor[j][0] += rtor[0]*factor_lj;
+            tor[j][1] += rtor[1]*factor_lj;
+            tor[j][2] += rtor[2]*factor_lj;
+          }
+         break;
+        }
+
+        fforce[0] *= factor_lj;
+        fforce[1] *= factor_lj;
+        fforce[2] *= factor_lj;
+        f[i][0] += fforce[0];
+        f[i][1] += fforce[1];
+        f[i][2] += fforce[2];
+
+        if (NEWTON_PAIR || j < nlocal) {
+          f[j][0] -= fforce[0];
+          f[j][1] -= fforce[1];
+          f[j][2] -= fforce[2];
+        }
+
+        if (EFLAG) evdwl = factor_lj*one_eng;
+
+	if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,NEWTON_PAIR,
+				     evdwl,0.0,fforce[0],fforce[1],fforce[2],
+				     -r12[0],-r12[1],-r12[2],tid);
+      }
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairRESquaredOMP::memory_usage()
+{
+  double bytes = memory_usage_thr();
+  bytes += PairRESquared::memory_usage();
+
+  return bytes;
+}
diff --git a/src/USER-OMP/pair_resquared_omp.h b/src/USER-OMP/pair_resquared_omp.h
new file mode 100644
index 000000000..2a50bb6dd
--- /dev/null
+++ b/src/USER-OMP/pair_resquared_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(resquared/omp,PairRESquaredOMP)
+
+#else
+
+#ifndef LMP_PAIR_RESQUARED_OMP_H
+#define LMP_PAIR_RESQUARED_OMP_H
+
+#include "pair_resquared.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairRESquaredOMP : public PairRESquared, public ThrOMP {
+
+ public:
+  PairRESquaredOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(double **f, double **torque, int ifrom, int ito, int tid);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_soft_omp.cpp
similarity index 82%
copy from src/USER-OMP/pair_lj_cut_omp.cpp
copy to src/USER-OMP/pair_soft_omp.cpp
index 8ed82c5e5..7667efa98 100644
--- a/src/USER-OMP/pair_lj_cut_omp.cpp
+++ b/src/USER-OMP/pair_soft_omp.cpp
@@ -1,163 +1,160 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    This software is distributed under the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Axel Kohlmeyer (Temple U)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
-#include "pair_lj_cut_omp.h"
+#include "pair_soft_omp.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
-#include "memory.h"
-#include "error.h"
 
 using namespace LAMMPS_NS;
 
+#define SMALL 1.0e-4
+
 /* ---------------------------------------------------------------------- */
 
-PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) :
-  PairLJCut(lmp), ThrOMP(lmp, PAIR)
+PairSoftOMP::PairSoftOMP(LAMMPS *lmp) :
+  PairSoft(lmp), ThrOMP(lmp, PAIR)
 {
   respa_enable = 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
-void PairLJCutOMP::compute(int eflag, int vflag)
+void PairSoftOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
     ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
   const int nthreads = comm->nthreads;
   const int inum = list->inum;
 
 #if defined(_OPENMP)
 #pragma omp parallel default(shared)
 #endif
   {
     int ifrom, ito, tid;
     double **f;
 
     f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
 
     if (evflag) {
       if (eflag) {
 	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
 	else eval<1,1,0>(f, ifrom, ito, tid);
       } else {
 	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
 	else eval<1,0,0>(f, ifrom, ito, tid);
       }
     } else {
       if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
       else eval<0,0,0>(f, ifrom, ito, tid);
     }
 
     // reduce per thread forces into global force array.
-    force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid);
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
   } // end of omp parallel region
 
   // reduce per thread energy and virial, if requested.
   if (evflag) ev_reduce_thr(this);
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairSoftOMP::eval(double **f, int iifrom, int iito, int tid)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
-  double rsq,r2inv,r6inv,forcelj,factor_lj;
+  double r,rsq,arg,factor_lj;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   evdwl = 0.0;
 
   double **x = atom->x;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_lj = force->special_lj;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = iifrom; ii < iito; ++ii) {
 
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
     fxtmp=fytmp=fztmp=0.0;
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
-	r2inv = 1.0/rsq;
-	r6inv = r2inv*r2inv*r2inv;
-	forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
-	fpair = factor_lj*forcelj*r2inv;
+	r = sqrt(rsq);
+	arg = PI/cut[itype][jtype];
+	if (r > SMALL) fpair = factor_lj * prefactor[itype][jtype] * 
+		       sin(arg*r) * arg/r;
+	else fpair = 0.0;
 
 	fxtmp += delx*fpair;
 	fytmp += dely*fpair;
 	fztmp += delz*fpair;
 	if (NEWTON_PAIR || j < nlocal) {
 	  f[j][0] -= delx*fpair;
 	  f[j][1] -= dely*fpair;
 	  f[j][2] -= delz*fpair;
 	}
 
-	if (EFLAG) {
-	  evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype])
-	    - offset[itype][jtype];
-	  evdwl *= factor_lj;
-	}
+	if (EFLAG)
+	  evdwl = factor_lj * prefactor[itype][jtype] * (1.0+cos(arg*r));
 
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
 				 evdwl,0.0,fpair,delx,dely,delz,tid);
       }
     }
     f[i][0] += fxtmp;
     f[i][1] += fytmp;
     f[i][2] += fztmp;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
-double PairLJCutOMP::memory_usage()
+double PairSoftOMP::memory_usage()
 {
   double bytes = memory_usage_thr();
-  bytes += PairLJCut::memory_usage();
+  bytes += PairSoft::memory_usage();
 
   return bytes;
 }
diff --git a/src/USER-OMP/pair_soft_omp.h b/src/USER-OMP/pair_soft_omp.h
new file mode 100644
index 000000000..840d87460
--- /dev/null
+++ b/src/USER-OMP/pair_soft_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(soft/omp,PairSoftOMP)
+
+#else
+
+#ifndef LMP_PAIR_SOFT_OMP_H
+#define LMP_PAIR_SOFT_OMP_H
+
+#include "pair_soft.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairSoftOMP : public PairSoft, public ThrOMP {
+
+ public:
+  PairSoftOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(double **f, int ifrom, int ito, int tid);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_sw_omp.cpp b/src/USER-OMP/pair_sw_omp.cpp
new file mode 100644
index 000000000..5d7f1a60d
--- /dev/null
+++ b/src/USER-OMP/pair_sw_omp.cpp
@@ -0,0 +1,212 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   This software is distributed under the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "pair_sw_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+PairSWOMP::PairSWOMP(LAMMPS *lmp) :
+  PairSW(lmp), ThrOMP(lmp, PAIR)
+{
+  respa_enable = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairSWOMP::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+    ev_setup_thr(this);
+  } else evflag = vflag_fdotr = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = list->inum;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(shared)
+#endif
+  {
+    int ifrom, ito, tid;
+    double **f;
+
+    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+
+    if (evflag) {
+      if (eflag) {
+	eval<1,1>(f, ifrom, ito, tid);
+      } else {
+	eval<1,0>(f, ifrom, ito, tid);
+      }
+    } else eval<0,0>(f, ifrom, ito, tid);
+
+    // reduce per thread forces into global force array.
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+  } // end of omp parallel region
+
+  // reduce per thread energy and virial, if requested.
+  if (evflag) ev_reduce_thr(this);
+  if (vflag_fdotr) virial_fdotr_compute();
+}
+
+template <int EVFLAG, int EFLAG>
+void PairSWOMP::eval(double **f, int iifrom, int iito, int tid)
+{
+  int i,j,k,ii,jj,kk,jnum,jnumm1,itag,jtag;
+  int itype,jtype,ktype,ijparam,ikparam,ijkparam;
+  double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
+  double rsq,rsq1,rsq2;
+  double delr1[3],delr2[3],fj[3],fk[3];
+  int *ilist,*jlist,*numneigh,**firstneigh;
+
+  evdwl = 0.0;
+
+  double **x = atom->x;
+  int *tag = atom->tag;
+  int *type = atom->type;
+  int nlocal = atom->nlocal;
+
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+
+  double fxtmp,fytmp,fztmp;
+
+  // loop over neighbors of my atoms
+
+  for (ii = iifrom; ii < iito; ++ii) {
+
+    i = ilist[ii];
+    itag = tag[i];
+    itype = map[type[i]];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    fxtmp = fytmp = fztmp = 0.0;
+
+    // two-body interactions, skip half of them
+
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      j &= NEIGHMASK;
+      jtag = tag[j];
+
+      if (itag > jtag) {
+	if ((itag+jtag) % 2 == 0) continue;
+      } else if (itag < jtag) {
+	if ((itag+jtag) % 2 == 1) continue;
+      } else {
+	if (x[j][2] < ztmp) continue;
+	if (x[j][2] == ztmp && x[j][1] < ytmp) continue;
+	if (x[j][2] == ztmp && x[j][1] == ytmp && x[j][0] < xtmp) continue;
+      }
+
+      jtype = map[type[j]];
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+
+      ijparam = elem2param[itype][jtype][jtype];
+      if (rsq > params[ijparam].cutsq) continue;
+
+      twobody(&params[ijparam],rsq,fpair,EFLAG,evdwl);
+
+      fxtmp += delx*fpair;
+      fytmp += dely*fpair;
+      fztmp += delz*fpair;
+      f[j][0] -= delx*fpair;
+      f[j][1] -= dely*fpair;
+      f[j][2] -= delz*fpair;
+
+      if (EVFLAG) ev_tally_thr(this,i,j,nlocal,/* newton_pair */ 1,
+			       evdwl,0.0,fpair,delx,dely,delz,tid);
+    }
+
+    jnumm1 = jnum - 1;
+
+    for (jj = 0; jj < jnumm1; jj++) {
+      j = jlist[jj];
+      j &= NEIGHMASK;
+      jtype = map[type[j]];
+      ijparam = elem2param[itype][jtype][jtype];
+      delr1[0] = x[j][0] - xtmp;
+      delr1[1] = x[j][1] - ytmp;
+      delr1[2] = x[j][2] - ztmp;
+      rsq1 = delr1[0]*delr1[0] + delr1[1]*delr1[1] + delr1[2]*delr1[2];
+      if (rsq1 > params[ijparam].cutsq) continue;
+
+      double fjxtmp,fjytmp,fjztmp;
+      fjxtmp = fjytmp = fjztmp = 0.0;
+
+      for (kk = jj+1; kk < jnum; kk++) {
+	k = jlist[kk];
+	k &= NEIGHMASK;
+	ktype = map[type[k]];
+	ikparam = elem2param[itype][ktype][ktype];
+	ijkparam = elem2param[itype][jtype][ktype];
+
+	delr2[0] = x[k][0] - xtmp;
+	delr2[1] = x[k][1] - ytmp;
+	delr2[2] = x[k][2] - ztmp;
+	rsq2 = delr2[0]*delr2[0] + delr2[1]*delr2[1] + delr2[2]*delr2[2];
+	if (rsq2 > params[ikparam].cutsq) continue;
+
+	threebody(&params[ijparam],&params[ikparam],&params[ijkparam],
+		  rsq1,rsq2,delr1,delr2,fj,fk,EFLAG,evdwl);
+
+	fxtmp -= fj[0] + fk[0];
+	fytmp -= fj[1] + fk[1];
+	fztmp -= fj[2] + fk[2];
+	fjxtmp += fj[0];
+	fjytmp += fj[1];
+	fjztmp += fj[2];
+	f[k][0] += fk[0];
+	f[k][1] += fk[1];
+	f[k][2] += fk[2];
+
+	if (EVFLAG) ev_tally3_thr(this,i,j,k,evdwl,0.0,fj,fk,delr1,delr2,tid);
+      }
+      f[j][0] += fjxtmp;
+      f[j][1] += fjytmp;
+      f[j][2] += fjztmp;
+    }
+    f[i][0] += fxtmp;
+    f[i][1] += fytmp;
+    f[i][2] += fztmp;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairSWOMP::memory_usage()
+{
+  double bytes = memory_usage_thr();
+  bytes += PairSW::memory_usage();
+
+  return bytes;
+}
diff --git a/src/USER-OMP/pair_sw_omp.h b/src/USER-OMP/pair_sw_omp.h
new file mode 100644
index 000000000..40052d7d4
--- /dev/null
+++ b/src/USER-OMP/pair_sw_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(sw/omp,PairSWOMP)
+
+#else
+
+#ifndef LMP_PAIR_SW_OMP_H
+#define LMP_PAIR_SW_OMP_H
+
+#include "pair_sw.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairSWOMP : public PairSW, public ThrOMP {
+
+ public:
+  PairSWOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG>
+  void eval(double **f, int ifrom, int ito, int tid);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_table_omp.cpp
similarity index 61%
copy from src/USER-OMP/pair_lj_cut_omp.cpp
copy to src/USER-OMP/pair_table_omp.cpp
index 8ed82c5e5..6b14d4c98 100644
--- a/src/USER-OMP/pair_lj_cut_omp.cpp
+++ b/src/USER-OMP/pair_table_omp.cpp
@@ -1,163 +1,202 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    This software is distributed under the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Axel Kohlmeyer (Temple U)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
-#include "pair_lj_cut_omp.h"
+#include "pair_table_omp.h"
 #include "atom.h"
 #include "comm.h"
+#include "error.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
-#include "memory.h"
-#include "error.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
-PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) :
-  PairLJCut(lmp), ThrOMP(lmp, PAIR)
+PairTableOMP::PairTableOMP(LAMMPS *lmp) :
+  PairTable(lmp), ThrOMP(lmp, PAIR)
 {
   respa_enable = 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
-void PairLJCutOMP::compute(int eflag, int vflag)
+void PairTableOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
     ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
   const int nthreads = comm->nthreads;
   const int inum = list->inum;
 
 #if defined(_OPENMP)
 #pragma omp parallel default(shared)
 #endif
   {
     int ifrom, ito, tid;
     double **f;
 
     f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
 
     if (evflag) {
       if (eflag) {
 	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
 	else eval<1,1,0>(f, ifrom, ito, tid);
       } else {
 	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
 	else eval<1,0,0>(f, ifrom, ito, tid);
       }
     } else {
       if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
       else eval<0,0,0>(f, ifrom, ito, tid);
     }
 
     // reduce per thread forces into global force array.
-    force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid);
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
   } // end of omp parallel region
 
   // reduce per thread energy and virial, if requested.
   if (evflag) ev_reduce_thr(this);
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairTableOMP::eval(double **f, int iifrom, int iito, int tid)
 {
-  int i,j,ii,jj,jnum,itype,jtype;
+  int i,j,ii,jj,jnum,itype,jtype,itable;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
-  double rsq,r2inv,r6inv,forcelj,factor_lj;
+  double rsq,factor_lj,fraction,value,a,b;
   int *ilist,*jlist,*numneigh,**firstneigh;
+  Table *tb;
+  
+  union_int_float_t rsq_lookup;
+  int tlm1 = tablength - 1;
 
   evdwl = 0.0;
 
   double **x = atom->x;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_lj = force->special_lj;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = iifrom; ii < iito; ++ii) {
 
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
     fxtmp=fytmp=fztmp=0.0;
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       factor_lj = special_lj[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
-
+      
       if (rsq < cutsq[itype][jtype]) {
-	r2inv = 1.0/rsq;
-	r6inv = r2inv*r2inv*r2inv;
-	forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
-	fpair = factor_lj*forcelj*r2inv;
+	tb = &tables[tabindex[itype][jtype]];
+	if (rsq < tb->innersq)
+	  error->one(FLERR,"Pair distance < table inner cutoff");
+ 
+	if (tabstyle == LOOKUP) {
+	  itable = static_cast<int> ((rsq - tb->innersq) * tb->invdelta);
+	  if (itable >= tlm1)
+	    error->one(FLERR,"Pair distance > table outer cutoff");
+	  fpair = factor_lj * tb->f[itable];
+	} else if (tabstyle == LINEAR) {
+	  itable = static_cast<int> ((rsq - tb->innersq) * tb->invdelta);
+	  if (itable >= tlm1)
+	    error->one(FLERR,"Pair distance > table outer cutoff");
+	  fraction = (rsq - tb->rsq[itable]) * tb->invdelta;
+	  value = tb->f[itable] + fraction*tb->df[itable];
+	  fpair = factor_lj * value;
+	} else if (tabstyle == SPLINE) {
+	  itable = static_cast<int> ((rsq - tb->innersq) * tb->invdelta);
+	  if (itable >= tlm1)
+	    error->one(FLERR,"Pair distance > table outer cutoff");
+	  b = (rsq - tb->rsq[itable]) * tb->invdelta;
+	  a = 1.0 - b;
+	  value = a * tb->f[itable] + b * tb->f[itable+1] + 
+	    ((a*a*a-a)*tb->f2[itable] + (b*b*b-b)*tb->f2[itable+1]) * 
+            tb->deltasq6;
+	  fpair = factor_lj * value;
+	} else {
+	  rsq_lookup.f = rsq;
+	  itable = rsq_lookup.i & tb->nmask;
+	  itable >>= tb->nshiftbits;
+	  fraction = (rsq_lookup.f - tb->rsq[itable]) * tb->drsq[itable];
+	  value = tb->f[itable] + fraction*tb->df[itable];
+	  fpair = factor_lj * value;
+	}
 
 	fxtmp += delx*fpair;
 	fytmp += dely*fpair;
 	fztmp += delz*fpair;
 	if (NEWTON_PAIR || j < nlocal) {
 	  f[j][0] -= delx*fpair;
 	  f[j][1] -= dely*fpair;
 	  f[j][2] -= delz*fpair;
 	}
 
 	if (EFLAG) {
-	  evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype])
-	    - offset[itype][jtype];
+	  if (tabstyle == LOOKUP)
+	    evdwl = tb->e[itable];
+	  else if (tabstyle == LINEAR || tabstyle == BITMAP)
+	    evdwl = tb->e[itable] + fraction*tb->de[itable];
+	  else
+	    evdwl = a * tb->e[itable] + b * tb->e[itable+1] + 
+	      ((a*a*a-a)*tb->e2[itable] + (b*b*b-b)*tb->e2[itable+1]) * 
+	      tb->deltasq6;
 	  evdwl *= factor_lj;
 	}
 
-	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
+	if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,
 				 evdwl,0.0,fpair,delx,dely,delz,tid);
       }
     }
+
     f[i][0] += fxtmp;
     f[i][1] += fytmp;
     f[i][2] += fztmp;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
-double PairLJCutOMP::memory_usage()
+double PairTableOMP::memory_usage()
 {
   double bytes = memory_usage_thr();
-  bytes += PairLJCut::memory_usage();
+  bytes += PairTable::memory_usage();
 
   return bytes;
 }
diff --git a/src/USER-OMP/pair_table_omp.h b/src/USER-OMP/pair_table_omp.h
new file mode 100644
index 000000000..6fd1ce74a
--- /dev/null
+++ b/src/USER-OMP/pair_table_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(table/omp,PairTableOMP)
+
+#else
+
+#ifndef LMP_PAIR_TABLE_OMP_H
+#define LMP_PAIR_TABLE_OMP_H
+
+#include "pair_table.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairTableOMP : public PairTable, public ThrOMP {
+
+ public:
+  PairTableOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(double **f, int ifrom, int ito, int tid);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_tersoff_omp.cpp b/src/USER-OMP/pair_tersoff_omp.cpp
new file mode 100644
index 000000000..f59a8488f
--- /dev/null
+++ b/src/USER-OMP/pair_tersoff_omp.cpp
@@ -0,0 +1,252 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   This software is distributed under the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "pair_tersoff_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+PairTersoffOMP::PairTersoffOMP(LAMMPS *lmp) :
+  PairTersoff(lmp), ThrOMP(lmp, PAIR)
+{
+  respa_enable = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairTersoffOMP::compute(int eflag, int vflag)
+{
+  if (eflag || vflag) {
+    ev_setup(eflag,vflag);
+    ev_setup_thr(this);
+  } else evflag = vflag_fdotr = vflag_atom = 0;
+
+  const int nall = atom->nlocal + atom->nghost;
+  const int nthreads = comm->nthreads;
+  const int inum = list->inum;
+
+#if defined(_OPENMP)
+#pragma omp parallel default(shared)
+#endif
+  {
+    int ifrom, ito, tid;
+    double **f;
+
+    f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
+
+    if (evflag) {
+      if (eflag) {
+	if (vflag_atom) eval<1,1,1>(f, ifrom, ito, tid);
+	else eval<1,1,0>(f, ifrom, ito, tid);
+      } else {
+	if (vflag_atom) eval<1,0,1>(f, ifrom, ito, tid);
+	else eval<1,0,0>(f, ifrom, ito, tid);
+      }
+    } else eval<0,0,0>(f, ifrom, ito, tid);
+
+    // reduce per thread forces into global force array.
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
+  } // end of omp parallel region
+
+  // reduce per thread energy and virial, if requested.
+  if (evflag) ev_reduce_thr(this);
+  if (vflag_fdotr) virial_fdotr_compute();
+}
+
+template <int EVFLAG, int EFLAG, int VFLAG_ATOM>
+void PairTersoffOMP::eval(double **f, int iifrom, int iito, int tid)
+{
+  int i,j,k,ii,jj,kk,jnum;
+  int itag,jtag,itype,jtype,ktype,iparam_ij,iparam_ijk;
+  double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
+  double rsq,rsq1,rsq2;
+  double delr1[3],delr2[3],fi[3],fj[3],fk[3];
+  double zeta_ij,prefactor;
+  int *ilist,*jlist,*numneigh,**firstneigh;
+
+  evdwl = 0.0;
+
+  double **x = atom->x;
+  int *tag = atom->tag;
+  int *type = atom->type;
+  int nlocal = atom->nlocal;
+
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+
+  double fxtmp,fytmp,fztmp;
+
+  // loop over full neighbor list of my atoms
+
+  for (ii = iifrom; ii < iito; ++ii) {
+
+    i = ilist[ii];
+    itag = tag[i];
+    itype = map[type[i]];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    fxtmp = fytmp = fztmp = 0.0;
+
+    // two-body interactions, skip half of them
+
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      j &= NEIGHMASK;
+      jtag = tag[j];
+
+      if (itag > jtag) {
+	if ((itag+jtag) % 2 == 0) continue;
+      } else if (itag < jtag) {
+	if ((itag+jtag) % 2 == 1) continue;
+      } else {
+	if (x[j][2] < ztmp) continue;
+	if (x[j][2] == ztmp && x[j][1] < ytmp) continue;
+	if (x[j][2] == ztmp && x[j][1] == ytmp && x[j][0] < xtmp) continue;
+      }
+
+      jtype = map[type[j]];
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx*delx + dely*dely + delz*delz;
+
+      iparam_ij = elem2param[itype][jtype][jtype];
+      if (rsq > params[iparam_ij].cutsq) continue;
+
+      repulsive(&params[iparam_ij],rsq,fpair,EFLAG,evdwl);
+
+      fxtmp += delx*fpair;
+      fytmp += dely*fpair;
+      fztmp += delz*fpair;
+      f[j][0] -= delx*fpair;
+      f[j][1] -= dely*fpair;
+      f[j][2] -= delz*fpair;
+
+      if (EVFLAG) ev_tally_thr(this,i,j,nlocal,/* newton_pair */ 1,
+			       evdwl,0.0,fpair,delx,dely,delz,tid);
+    }
+
+    // three-body interactions
+    // skip immediately if I-J is not within cutoff
+    double fjxtmp,fjytmp,fjztmp;
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      j &= NEIGHMASK;
+      jtype = map[type[j]];
+      iparam_ij = elem2param[itype][jtype][jtype];
+
+      delr1[0] = x[j][0] - xtmp;
+      delr1[1] = x[j][1] - ytmp;
+      delr1[2] = x[j][2] - ztmp;
+      rsq1 = delr1[0]*delr1[0] + delr1[1]*delr1[1] + delr1[2]*delr1[2];
+      if (rsq1 > params[iparam_ij].cutsq) continue;
+
+      // accumulate bondorder zeta for each i-j interaction via loop over k
+
+      fjxtmp = fjytmp = fjztmp = 0.0;
+      zeta_ij = 0.0;
+
+      for (kk = 0; kk < jnum; kk++) {
+	if (jj == kk) continue;
+	k = jlist[kk];
+	k &= NEIGHMASK;
+	ktype = map[type[k]];
+	iparam_ijk = elem2param[itype][jtype][ktype];
+
+	delr2[0] = x[k][0] - xtmp;
+	delr2[1] = x[k][1] - ytmp;
+	delr2[2] = x[k][2] - ztmp;
+	rsq2 = delr2[0]*delr2[0] + delr2[1]*delr2[1] + delr2[2]*delr2[2];
+	if (rsq2 > params[iparam_ijk].cutsq) continue;
+
+	zeta_ij += zeta(&params[iparam_ijk],rsq1,rsq2,delr1,delr2);
+      }
+
+      // pairwise force due to zeta
+
+      force_zeta(&params[iparam_ij],rsq1,zeta_ij,fpair,prefactor,EFLAG,evdwl);
+
+      fxtmp += delr1[0]*fpair;
+      fytmp += delr1[1]*fpair;
+      fztmp += delr1[2]*fpair;
+      fjxtmp -= delr1[0]*fpair;
+      fjytmp -= delr1[1]*fpair;
+      fjztmp -= delr1[2]*fpair;
+
+      if (EVFLAG) ev_tally_thr(this,i,j,nlocal,/* newton_pair */ 1,evdwl,0.0,
+			       -fpair,-delr1[0],-delr1[1],-delr1[2],tid);
+
+      // attractive term via loop over k
+
+      for (kk = 0; kk < jnum; kk++) {
+	if (jj == kk) continue;
+	k = jlist[kk];
+	k &= NEIGHMASK;
+	ktype = map[type[k]];
+	iparam_ijk = elem2param[itype][jtype][ktype];
+
+	delr2[0] = x[k][0] - xtmp;
+	delr2[1] = x[k][1] - ytmp;
+	delr2[2] = x[k][2] - ztmp;
+	rsq2 = delr2[0]*delr2[0] + delr2[1]*delr2[1] + delr2[2]*delr2[2];
+	if (rsq2 > params[iparam_ijk].cutsq) continue;
+
+	attractive(&params[iparam_ijk],prefactor,
+		   rsq1,rsq2,delr1,delr2,fi,fj,fk);
+
+	fxtmp += fi[0];
+	fytmp += fi[1];
+	fztmp += fi[2];
+	fjxtmp += fj[0];
+	fjytmp += fj[1];
+	fjztmp += fj[2];
+	f[k][0] += fk[0];
+	f[k][1] += fk[1];
+	f[k][2] += fk[2];
+
+	if (VFLAG_ATOM) v_tally3_thr(i,j,k,fj,fk,delr1,delr2,tid);
+      }
+      f[j][0] += fjxtmp;
+      f[j][1] += fjytmp;
+      f[j][2] += fjztmp;
+    }
+    f[i][0] += fxtmp;
+    f[i][1] += fytmp;
+    f[i][2] += fztmp;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairTersoffOMP::memory_usage()
+{
+  double bytes = memory_usage_thr();
+  bytes += PairTersoff::memory_usage();
+
+  return bytes;
+}
diff --git a/src/USER-OMP/pair_tersoff_omp.h b/src/USER-OMP/pair_tersoff_omp.h
new file mode 100644
index 000000000..5e5dc066d
--- /dev/null
+++ b/src/USER-OMP/pair_tersoff_omp.h
@@ -0,0 +1,43 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(tersoff/omp,PairTersoffOMP)
+
+#else
+
+#ifndef LMP_PAIR_TERSOFF_OMP_H
+#define LMP_PAIR_TERSOFF_OMP_H
+
+#include "pair_tersoff.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairTersoffOMP : public PairTersoff, public ThrOMP {
+
+ public:
+  PairTersoffOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int VFLAG_ATOM>
+  void eval(double **f, int ifrom, int ito, int tid);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_tersoff_zbl_omp.cpp b/src/USER-OMP/pair_tersoff_zbl_omp.cpp
new file mode 100644
index 000000000..4265d84fb
--- /dev/null
+++ b/src/USER-OMP/pair_tersoff_zbl_omp.cpp
@@ -0,0 +1,296 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Aidan Thompson (SNL) - original Tersoff implementation
+                        David Farrell (NWU) - ZBL addition
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "stdio.h"
+#include "stdlib.h"
+#include "string.h"
+#include "pair_tersoff_zbl_omp.h"
+#include "atom.h"
+#include "update.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+#include "neigh_request.h"
+#include "force.h"
+#include "comm.h"
+#include "memory.h"
+#include "error.h"
+
+#include "math_const.h"
+using namespace LAMMPS_NS;
+using namespace MathConst;
+
+#define MAXLINE 1024
+#define DELTA 4
+
+/* ----------------------------------------------------------------------
+   Fermi-like smoothing function
+------------------------------------------------------------------------- */
+
+static double F_fermi(const double r, const double expsc, const double cut)
+{
+  return 1.0 / (1.0 + exp(-expsc*(r-cut)));
+}
+
+/* ----------------------------------------------------------------------
+   Fermi-like smoothing function derivative with respect to r
+------------------------------------------------------------------------- */
+
+static double F_fermi_d(const double r, const double expsc, const double cut)
+{
+  return expsc*exp(-expsc*(r-cut)) / pow(1.0 + exp(-expsc*(r-cut)),2.0);
+}
+
+/* ---------------------------------------------------------------------- */
+
+PairTersoffZBLOMP::PairTersoffZBLOMP(LAMMPS *lmp) : PairTersoffOMP(lmp)
+{
+  // hard-wired constants in metal or real units
+  // a0 = Bohr radius
+  // epsilon0 = permittivity of vacuum = q / energy-distance units
+  // e = unit charge
+  // 1 Kcal/mole = 0.043365121 eV
+
+  if (strcmp(update->unit_style,"metal") == 0) {
+    global_a_0 = 0.529;
+    global_epsilon_0 = 0.00552635;
+    global_e = 1.0;
+  } else if (strcmp(update->unit_style,"real") == 0) {
+    global_a_0 = 0.529;
+    global_epsilon_0 = 0.00552635 * 0.043365121;
+    global_e = 1.0;
+  } else error->all(FLERR,"Pair tersoff/zbl requires metal or real units");
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairTersoffZBLOMP::read_file(char *file)
+{
+  int params_per_line = 21;
+  char **words = new char*[params_per_line+1];
+
+  delete [] params;
+  params = NULL;
+  nparams = 0;
+
+  // open file on proc 0
+
+  FILE *fp;
+  if (comm->me == 0) {
+    fp = fopen(file,"r");
+    if (fp == NULL) {
+      char str[128];
+      sprintf(str,"Cannot open Tersoff potential file %s",file);
+      error->one(FLERR,str);
+    }
+  }
+
+  // read each line out of file, skipping blank lines or leading '#'
+  // store line of params if all 3 element tags are in element list
+
+  int n,nwords,ielement,jelement,kelement;
+  char line[MAXLINE],*ptr;
+  int eof = 0;
+
+  while (1) {
+    if (comm->me == 0) {
+      ptr = fgets(line,MAXLINE,fp);
+      if (ptr == NULL) {
+	eof = 1;
+	fclose(fp);
+      } else n = strlen(line) + 1;
+    }
+    MPI_Bcast(&eof,1,MPI_INT,0,world);
+    if (eof) break;
+    MPI_Bcast(&n,1,MPI_INT,0,world);
+    MPI_Bcast(line,n,MPI_CHAR,0,world);
+
+    // strip comment, skip line if blank
+
+    if (ptr = strchr(line,'#')) *ptr = '\0';
+    nwords = atom->count_words(line);
+    if (nwords == 0) continue;
+
+    // concatenate additional lines until have params_per_line words
+
+    while (nwords < params_per_line) {
+      n = strlen(line);
+      if (comm->me == 0) {
+        ptr = fgets(&line[n],MAXLINE-n,fp);
+        if (ptr == NULL) {
+	  eof = 1;
+	  fclose(fp);
+        } else n = strlen(line) + 1;
+      }
+      MPI_Bcast(&eof,1,MPI_INT,0,world);
+      if (eof) break;
+      MPI_Bcast(&n,1,MPI_INT,0,world);
+      MPI_Bcast(line,n,MPI_CHAR,0,world);
+      if (ptr = strchr(line,'#')) *ptr = '\0';
+      nwords = atom->count_words(line);
+    }
+
+    if (nwords != params_per_line)
+      error->all(FLERR,"Incorrect format in Tersoff potential file");
+
+    // words = ptrs to all words in line
+
+    nwords = 0;
+    words[nwords++] = strtok(line," \t\n\r\f");
+    while (words[nwords++] = strtok(NULL," \t\n\r\f")) continue;
+
+    // ielement,jelement,kelement = 1st args
+    // if all 3 args are in element list, then parse this line
+    // else skip to next line
+
+    for (ielement = 0; ielement < nelements; ielement++)
+      if (strcmp(words[0],elements[ielement]) == 0) break;
+    if (ielement == nelements) continue;
+    for (jelement = 0; jelement < nelements; jelement++)
+      if (strcmp(words[1],elements[jelement]) == 0) break;
+    if (jelement == nelements) continue;
+    for (kelement = 0; kelement < nelements; kelement++)
+      if (strcmp(words[2],elements[kelement]) == 0) break;
+    if (kelement == nelements) continue;
+
+    // load up parameter settings and error check their values
+
+    if (nparams == maxparam) {
+      maxparam += DELTA;
+      params = (Param *) memory->srealloc(params,maxparam*sizeof(Param),
+					  "pair:params");
+    }
+
+    params[nparams].ielement = ielement;
+    params[nparams].jelement = jelement;
+    params[nparams].kelement = kelement;
+    params[nparams].powerm = atof(words[3]);
+    params[nparams].gamma = atof(words[4]);
+    params[nparams].lam3 = atof(words[5]);
+    params[nparams].c = atof(words[6]);
+    params[nparams].d = atof(words[7]);
+    params[nparams].h = atof(words[8]);
+    params[nparams].powern = atof(words[9]);
+    params[nparams].beta = atof(words[10]);
+    params[nparams].lam2 = atof(words[11]);
+    params[nparams].bigb = atof(words[12]);
+    params[nparams].bigr = atof(words[13]);
+    params[nparams].bigd = atof(words[14]);
+    params[nparams].lam1 = atof(words[15]);
+    params[nparams].biga = atof(words[16]);
+    params[nparams].Z_i = atof(words[17]);
+    params[nparams].Z_j = atof(words[18]);
+    params[nparams].ZBLcut = atof(words[19]);
+    params[nparams].ZBLexpscale = atof(words[20]);
+
+    // currently only allow m exponent of 1 or 3
+
+    params[nparams].powermint = int(params[nparams].powerm);
+
+    if (
+	params[nparams].lam3 < 0.0 || params[nparams].c < 0.0 || 
+	params[nparams].d < 0.0 || params[nparams].powern < 0.0 || 
+	params[nparams].beta < 0.0 || params[nparams].lam2 < 0.0 || 
+	params[nparams].bigb < 0.0 || params[nparams].bigr < 0.0 ||
+	params[nparams].bigd < 0.0 ||
+	params[nparams].bigd > params[nparams].bigr ||
+	params[nparams].lam3 < 0.0 || params[nparams].biga < 0.0 ||
+	params[nparams].powerm - params[nparams].powermint != 0.0 ||
+        (params[nparams].powermint != 3 && params[nparams].powermint != 1) ||
+	params[nparams].gamma < 0.0 ||
+	params[nparams].Z_i < 1.0 || params[nparams].Z_j < 1.0 ||
+	params[nparams].ZBLcut < 0.0 || params[nparams].ZBLexpscale < 0.0)
+      error->all(FLERR,"Illegal Tersoff parameter");
+
+    nparams++;
+  }
+
+  delete [] words;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairTersoffZBLOMP::force_zeta(Param *param, double rsq, double zeta_ij,
+				   double &fforce, double &prefactor,
+				   int eflag, double &eng)
+{
+  double r,fa,fa_d,bij;
+
+  r = sqrt(rsq);
+
+  fa = (r > param->bigr + param->bigd) ? 0.0 :
+    -param->bigb * exp(-param->lam2 * r) * ters_fc(r,param) * 
+    F_fermi(r,param->ZBLexpscale,param->ZBLcut);
+
+  fa_d = (r > param->bigr + param->bigd) ? 0.0 :
+    param->bigb * exp(-param->lam2 * r) *
+    (param->lam2 * ters_fc(r,param) * 
+     F_fermi(r,param->ZBLexpscale,param->ZBLcut) - 
+     ters_fc_d(r,param) * F_fermi(r,param->ZBLexpscale,param->ZBLcut)
+     - ters_fc(r,param) * F_fermi_d(r,param->ZBLexpscale,param->ZBLcut));
+
+  bij = ters_bij(zeta_ij,param);
+  fforce = 0.5*bij*fa_d / r;
+  prefactor = -0.5*fa * ters_bij_d(zeta_ij,param);
+  if (eflag) eng = 0.5*bij*fa;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairTersoffZBLOMP::repulsive(Param *param, double rsq, double &fforce,
+			       int eflag, double &eng)
+{
+  double r,tmp_fc,tmp_fc_d,tmp_exp;
+
+  // Tersoff repulsive portion
+
+  r = sqrt(rsq);
+  tmp_fc = ters_fc(r,param);
+  tmp_fc_d = ters_fc_d(r,param);
+  tmp_exp = exp(-param->lam1 * r);
+  double fforce_ters = param->biga * tmp_exp * (tmp_fc_d - tmp_fc*param->lam1);
+  double eng_ters = tmp_fc * param->biga * tmp_exp;
+	
+  // ZBL repulsive portion
+
+  double esq = pow(global_e,2.0);
+  double a_ij = (0.8854*global_a_0) / 
+    (pow(param->Z_i,0.23) + pow(param->Z_j,0.23));
+  double premult = (param->Z_i * param->Z_j * esq)/(4.0*MY_PI*global_epsilon_0);
+  double r_ov_a = r/a_ij;
+  double phi = 0.1818*exp(-3.2*r_ov_a) + 0.5099*exp(-0.9423*r_ov_a) + 
+    0.2802*exp(-0.4029*r_ov_a) + 0.02817*exp(-0.2016*r_ov_a);
+  double dphi = (1.0/a_ij) * (-3.2*0.1818*exp(-3.2*r_ov_a) - 
+			      0.9423*0.5099*exp(-0.9423*r_ov_a) - 
+			      0.4029*0.2802*exp(-0.4029*r_ov_a) - 
+			      0.2016*0.02817*exp(-0.2016*r_ov_a));
+  double fforce_ZBL = premult*-pow(r,-2.0)* phi + premult*pow(r,-1.0)*dphi;
+  double eng_ZBL = premult*(1.0/r)*phi;
+  
+  // combine two parts with smoothing by Fermi-like function
+
+  fforce = -(-F_fermi_d(r,param->ZBLexpscale,param->ZBLcut) * eng_ZBL +
+	     (1.0 - F_fermi(r,param->ZBLexpscale,param->ZBLcut))*fforce_ZBL +
+	     F_fermi_d(r,param->ZBLexpscale,param->ZBLcut)*eng_ters +
+	     F_fermi(r,param->ZBLexpscale,param->ZBLcut)*fforce_ters) / r;
+  
+  if (eflag)
+    eng = (1.0 - F_fermi(r,param->ZBLexpscale,param->ZBLcut))*eng_ZBL +
+      F_fermi(r,param->ZBLexpscale,param->ZBLcut)*eng_ters;
+}
+
diff --git a/src/USER-OMP/pair_tersoff_zbl_omp.h b/src/USER-OMP/pair_tersoff_zbl_omp.h
new file mode 100644
index 000000000..84d6ef113
--- /dev/null
+++ b/src/USER-OMP/pair_tersoff_zbl_omp.h
@@ -0,0 +1,45 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(tersoff/zbl/omp,PairTersoffZBLOMP)
+
+#else
+
+#ifndef LMP_PAIR_TERSOFF_ZBL_OMP_H
+#define LMP_PAIR_TERSOFF_ZBL_OMP_H
+
+#include "pair_tersoff_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairTersoffZBLOMP : public PairTersoffOMP {
+ public:
+  PairTersoffZBLOMP(class LAMMPS *);
+  virtual ~PairTersoffZBLOMP() {}
+
+ protected:
+  double global_a_0;		// Bohr radius for Coulomb repulsion
+  double global_epsilon_0;	// permittivity of vacuum for Coulomb repulsion
+  double global_e;		// proton charge (negative of electron charge)
+
+  virtual void read_file(char *);
+  virtual void repulsive(Param *, double, double &, int, double &);
+  virtual void force_zeta(Param *, double, double, double &, double &, int, double &);
+
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_yukawa_colloid_omp.cpp
similarity index 79%
copy from src/USER-OMP/pair_lj_cut_omp.cpp
copy to src/USER-OMP/pair_yukawa_colloid_omp.cpp
index 8ed82c5e5..710ad9df1 100644
--- a/src/USER-OMP/pair_lj_cut_omp.cpp
+++ b/src/USER-OMP/pair_yukawa_colloid_omp.cpp
@@ -1,163 +1,164 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    This software is distributed under the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Axel Kohlmeyer (Temple U)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
-#include "pair_lj_cut_omp.h"
+#include "pair_yukawa_colloid_omp.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
-#include "memory.h"
-#include "error.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
-PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) :
-  PairLJCut(lmp), ThrOMP(lmp, PAIR)
+PairYukawaColloidOMP::PairYukawaColloidOMP(LAMMPS *lmp) :
+  PairYukawaColloid(lmp), ThrOMP(lmp, PAIR)
 {
   respa_enable = 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
-void PairLJCutOMP::compute(int eflag, int vflag)
+void PairYukawaColloidOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
     ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
   const int nthreads = comm->nthreads;
   const int inum = list->inum;
 
 #if defined(_OPENMP)
 #pragma omp parallel default(shared)
 #endif
   {
     int ifrom, ito, tid;
     double **f;
 
     f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
 
     if (evflag) {
       if (eflag) {
 	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
 	else eval<1,1,0>(f, ifrom, ito, tid);
       } else {
 	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
 	else eval<1,0,0>(f, ifrom, ito, tid);
       }
     } else {
       if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
       else eval<0,0,0>(f, ifrom, ito, tid);
     }
 
     // reduce per thread forces into global force array.
-    force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid);
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
   } // end of omp parallel region
 
   // reduce per thread energy and virial, if requested.
   if (evflag) ev_reduce_thr(this);
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairYukawaColloidOMP::eval(double **f, int iifrom, int iito, int tid)
 {
   int i,j,ii,jj,jnum,itype,jtype;
-  double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
-  double rsq,r2inv,r6inv,forcelj,factor_lj;
+  double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair,radi,radj;
+  double rsq,r,rinv,r2inv,screening,forceyukawa,factor;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   evdwl = 0.0;
 
   double **x = atom->x;
+  double *radius = atom->radius;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_lj = force->special_lj;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = iifrom; ii < iito; ++ii) {
 
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
+    radi = radius[i];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
     fxtmp=fytmp=fztmp=0.0;
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
-      factor_lj = special_lj[sbmask(j)];
+      factor = special_lj[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
+      radj = radius[j];
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
 	r2inv = 1.0/rsq;
-	r6inv = r2inv*r2inv*r2inv;
-	forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
-	fpair = factor_lj*forcelj*r2inv;
+	r = sqrt(rsq);
+	rinv = 1.0/r;
+	screening = exp(-kappa*(r-(radi+radj)));
+	forceyukawa = a[itype][jtype] * screening;
+
+	fpair = factor*forceyukawa * rinv;
 
 	fxtmp += delx*fpair;
 	fytmp += dely*fpair;
 	fztmp += delz*fpair;
 	if (NEWTON_PAIR || j < nlocal) {
 	  f[j][0] -= delx*fpair;
 	  f[j][1] -= dely*fpair;
 	  f[j][2] -= delz*fpair;
 	}
 
 	if (EFLAG) {
-	  evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype])
-	    - offset[itype][jtype];
-	  evdwl *= factor_lj;
+	  evdwl = a[itype][jtype]/kappa * screening - offset[itype][jtype];
+	  evdwl *= factor;
 	}
-
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
 				 evdwl,0.0,fpair,delx,dely,delz,tid);
       }
     }
     f[i][0] += fxtmp;
     f[i][1] += fytmp;
     f[i][2] += fztmp;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
-double PairLJCutOMP::memory_usage()
+double PairYukawaColloidOMP::memory_usage()
 {
   double bytes = memory_usage_thr();
-  bytes += PairLJCut::memory_usage();
+  bytes += PairYukawaColloid::memory_usage();
 
   return bytes;
 }
diff --git a/src/USER-OMP/pair_yukawa_colloid_omp.h b/src/USER-OMP/pair_yukawa_colloid_omp.h
new file mode 100644
index 000000000..9483cd15c
--- /dev/null
+++ b/src/USER-OMP/pair_yukawa_colloid_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(yukawa/colloid/omp,PairYukawaColloidOMP)
+
+#else
+
+#ifndef LMP_PAIR_YUKAWA_COLLOID_OMP_H
+#define LMP_PAIR_YUKAWA_COLLOID_OMP_H
+
+#include "pair_yukawa_colloid.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairYukawaColloidOMP : public PairYukawaColloid, public ThrOMP {
+
+ public:
+  PairYukawaColloidOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(double **f, int ifrom, int ito, int tid);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/pair_lj_cut_omp.cpp b/src/USER-OMP/pair_yukawa_omp.cpp
similarity index 82%
copy from src/USER-OMP/pair_lj_cut_omp.cpp
copy to src/USER-OMP/pair_yukawa_omp.cpp
index 8ed82c5e5..1380e2239 100644
--- a/src/USER-OMP/pair_lj_cut_omp.cpp
+++ b/src/USER-OMP/pair_yukawa_omp.cpp
@@ -1,163 +1,162 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    This software is distributed under the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Axel Kohlmeyer (Temple U)
 ------------------------------------------------------------------------- */
 
 #include "math.h"
-#include "pair_lj_cut_omp.h"
+#include "pair_yukawa_omp.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
-#include "neigh_request.h"
-#include "memory.h"
-#include "error.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
-PairLJCutOMP::PairLJCutOMP(LAMMPS *lmp) :
-  PairLJCut(lmp), ThrOMP(lmp, PAIR)
+PairYukawaOMP::PairYukawaOMP(LAMMPS *lmp) :
+  PairYukawa(lmp), ThrOMP(lmp, PAIR)
 {
   respa_enable = 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
-void PairLJCutOMP::compute(int eflag, int vflag)
+void PairYukawaOMP::compute(int eflag, int vflag)
 {
   if (eflag || vflag) {
     ev_setup(eflag,vflag);
     ev_setup_thr(this);
   } else evflag = vflag_fdotr = 0;
 
   const int nall = atom->nlocal + atom->nghost;
   const int nthreads = comm->nthreads;
   const int inum = list->inum;
 
 #if defined(_OPENMP)
 #pragma omp parallel default(shared)
 #endif
   {
     int ifrom, ito, tid;
     double **f;
 
     f = loop_setup_thr(atom->f, ifrom, ito, tid, inum, nall, nthreads);
 
     if (evflag) {
       if (eflag) {
 	if (force->newton_pair) eval<1,1,1>(f, ifrom, ito, tid);
 	else eval<1,1,0>(f, ifrom, ito, tid);
       } else {
 	if (force->newton_pair) eval<1,0,1>(f, ifrom, ito, tid);
 	else eval<1,0,0>(f, ifrom, ito, tid);
       }
     } else {
       if (force->newton_pair) eval<0,0,1>(f, ifrom, ito, tid);
       else eval<0,0,0>(f, ifrom, ito, tid);
     }
 
     // reduce per thread forces into global force array.
-    force_reduce_thr(&(atom->f[0][0]), nall, nthreads, tid);
+    data_reduce_thr(&(atom->f[0][0]), nall, nthreads, 3, tid);
   } // end of omp parallel region
 
   // reduce per thread energy and virial, if requested.
   if (evflag) ev_reduce_thr(this);
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
-void PairLJCutOMP::eval(double **f, int iifrom, int iito, int tid)
+void PairYukawaOMP::eval(double **f, int iifrom, int iito, int tid)
 {
   int i,j,ii,jj,jnum,itype,jtype;
   double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
-  double rsq,r2inv,r6inv,forcelj,factor_lj;
+  double rsq,r2inv,r,rinv,screening,forceyukawa,factor;
   int *ilist,*jlist,*numneigh,**firstneigh;
 
   evdwl = 0.0;
 
   double **x = atom->x;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   double *special_lj = force->special_lj;
   double fxtmp,fytmp,fztmp;
 
   ilist = list->ilist;
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   // loop over neighbors of my atoms
 
   for (ii = iifrom; ii < iito; ++ii) {
 
     i = ilist[ii];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
     itype = type[i];
     jlist = firstneigh[i];
     jnum = numneigh[i];
     fxtmp=fytmp=fztmp=0.0;
 
     for (jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
-      factor_lj = special_lj[sbmask(j)];
+      factor = special_lj[sbmask(j)];
       j &= NEIGHMASK;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
       rsq = delx*delx + dely*dely + delz*delz;
       jtype = type[j];
 
       if (rsq < cutsq[itype][jtype]) {
 	r2inv = 1.0/rsq;
-	r6inv = r2inv*r2inv*r2inv;
-	forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
-	fpair = factor_lj*forcelj*r2inv;
+	r = sqrt(rsq);
+	rinv = 1.0/r;
+	screening = exp(-kappa*r);
+	forceyukawa = a[itype][jtype] * screening * (kappa + rinv);
+
+	fpair = factor*forceyukawa * r2inv;
 
 	fxtmp += delx*fpair;
 	fytmp += dely*fpair;
 	fztmp += delz*fpair;
 	if (NEWTON_PAIR || j < nlocal) {
 	  f[j][0] -= delx*fpair;
 	  f[j][1] -= dely*fpair;
 	  f[j][2] -= delz*fpair;
 	}
 
 	if (EFLAG) {
-	  evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype])
-	    - offset[itype][jtype];
-	  evdwl *= factor_lj;
+	  evdwl = a[itype][jtype] * screening * rinv - offset[itype][jtype];
+	  evdwl *= factor;
 	}
 
 	if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
 				 evdwl,0.0,fpair,delx,dely,delz,tid);
       }
     }
     f[i][0] += fxtmp;
     f[i][1] += fytmp;
     f[i][2] += fztmp;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
-double PairLJCutOMP::memory_usage()
+double PairYukawaOMP::memory_usage()
 {
   double bytes = memory_usage_thr();
-  bytes += PairLJCut::memory_usage();
+  bytes += PairYukawa::memory_usage();
 
   return bytes;
 }
diff --git a/src/USER-OMP/pair_yukawa_omp.h b/src/USER-OMP/pair_yukawa_omp.h
new file mode 100644
index 000000000..e363ac6d1
--- /dev/null
+++ b/src/USER-OMP/pair_yukawa_omp.h
@@ -0,0 +1,48 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Axel Kohlmeyer (Temple U)
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(yukawa/omp,PairYukawaOMP)
+
+#else
+
+#ifndef LMP_PAIR_YUKAWA_OMP_H
+#define LMP_PAIR_YUKAWA_OMP_H
+
+#include "pair_yukawa.h"
+#include "thr_omp.h"
+
+namespace LAMMPS_NS {
+
+class PairYukawaOMP : public PairYukawa, public ThrOMP {
+
+ public:
+  PairYukawaOMP(class LAMMPS *);
+
+  virtual void compute(int, int);
+  virtual double memory_usage();
+
+ private:
+  template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
+  void eval(double **f, int ifrom, int ito, int tid);
+};
+
+}
+
+#endif
+#endif
diff --git a/src/USER-OMP/thr_omp.cpp b/src/USER-OMP/thr_omp.cpp
index d05fae5b3..37ce1f198 100644
--- a/src/USER-OMP/thr_omp.cpp
+++ b/src/USER-OMP/thr_omp.cpp
@@ -1,392 +1,833 @@
 /* -------------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under 
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    OpenMP based threading support for LAMMPS
    Contributing author: Axel Kohlmeyer (Temple U)
 ------------------------------------------------------------------------- */
 
 #include "thr_omp.h"
 
 #include "memory.h"
 
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 
 #include "pair.h"
 #include "dihedral.h"
 
 #if defined(_OPENMP)
 #include <omp.h>
 #endif
 
+#include "math_const.h"
+
 using namespace LAMMPS_NS;
+using namespace MathConst;
 
 /* ---------------------------------------------------------------------- */
 
 ThrOMP::ThrOMP(LAMMPS *ptr, int style) : thr_style(style), lmp(ptr)
 {
   // initialize fixed size per thread storage
   eng_vdwl_thr = eng_coul_thr = eng_bond_thr = NULL;
   virial_thr = NULL;
+
   lmp->memory->create(eng_vdwl_thr,lmp->comm->nthreads,"thr_omp:eng_vdwl_thr");
   lmp->memory->create(eng_coul_thr,lmp->comm->nthreads,"thr_omp:eng_coul_thr");
   lmp->memory->create(eng_bond_thr,lmp->comm->nthreads,"thr_omp:eng_bond_thr");
   lmp->memory->create(virial_thr,lmp->comm->nthreads,6,"thr_omp:virial_thr");
 
   // variable size per thread, per atom storage
   // the actually allocation happens via memory->grow() in ev_steup_thr()
   maxeatom_thr = maxvatom_thr = 0;
+  evflag_global = evflag_atom = 0;
   eatom_thr = NULL;
   vatom_thr = NULL;
 }
 
 /* ---------------------------------------------------------------------- */
 
 ThrOMP::~ThrOMP()
 {
   lmp->memory->destroy(eng_vdwl_thr);
   lmp->memory->destroy(eng_coul_thr);
   lmp->memory->destroy(eng_bond_thr);
   lmp->memory->destroy(virial_thr);
   lmp->memory->destroy(eatom_thr);
   lmp->memory->destroy(vatom_thr);
 }
 
 /* ---------------------------------------------------------------------- */
 
-void ThrOMP::ev_zero_acc_thr(int ntotal, int eflag_global, int vflag_global,
+void ThrOMP::ev_setup_acc_thr(int ntotal, int eflag_global, int vflag_global,
 			     int eflag_atom, int vflag_atom, int nthreads)
 {
   int t,i;
+
+  evflag_global = (eflag_global || vflag_global);
+  evflag_atom = (eflag_atom || vflag_atom);
   
   for (t = 0; t < nthreads; ++t) {
 
     if (eflag_global) 
       eng_vdwl_thr[t] = eng_coul_thr[t] = eng_bond_thr[t] = 0.0;
 
     if (vflag_global) 
       for (i = 0; i < 6; ++i)
 	virial_thr[t][i] = 0.0;
 
     if (eflag_atom)
       for (i = 0; i < ntotal; ++i)
 	eatom_thr[t][i] = 0.0;
     
     if (vflag_atom)
       for (i = 0; i < ntotal; ++i) {
         vatom_thr[t][i][0] = 0.0;
         vatom_thr[t][i][1] = 0.0;
         vatom_thr[t][i][2] = 0.0;
         vatom_thr[t][i][3] = 0.0;
         vatom_thr[t][i][4] = 0.0;
         vatom_thr[t][i][5] = 0.0;
       }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ThrOMP::ev_setup_thr(Dihedral *dihed)
 {
   int nthreads = lmp->comm->nthreads;
 
   // reallocate per-atom arrays if necessary
   if (dihed->eflag_atom && lmp->atom->nmax > maxeatom_thr) {
     maxeatom_thr = lmp->atom->nmax;
     lmp->memory->grow(eatom_thr,nthreads,maxeatom_thr,"thr_omp:eatom_thr");
   }
   if (dihed->vflag_atom && lmp->atom->nmax > maxvatom_thr) {
     maxvatom_thr = lmp->atom->nmax;
     lmp->memory->grow(vatom_thr,nthreads,maxeatom_thr,6,"thr_omp:vatom_thr");
   }
 
   int ntotal = (lmp->force->newton_bond) ? 
     (lmp->atom->nlocal + lmp->atom->nghost) : lmp->atom->nlocal;
 
-  // zero per thread accumulators
-  ev_zero_acc_thr(ntotal, dihed->eflag_global, dihed->vflag_global,
-		  dihed->eflag_atom, dihed->vflag_atom, nthreads);
+  // set up per thread accumulators
+  ev_setup_acc_thr(ntotal, dihed->eflag_global, dihed->vflag_global,
+		   dihed->eflag_atom, dihed->vflag_atom, nthreads);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ThrOMP::ev_setup_thr(Pair *pair)
 {
   int nthreads = lmp->comm->nthreads;
 
   // reallocate per-atom arrays if necessary
   if (pair->eflag_atom && lmp->atom->nmax > maxeatom_thr) {
     maxeatom_thr = lmp->atom->nmax;
     lmp->memory->grow(eatom_thr,nthreads,maxeatom_thr,"thr_omp:eatom_thr");
   }
   if (pair->vflag_atom && lmp->atom->nmax > maxvatom_thr) {
     maxvatom_thr = lmp->atom->nmax;
     lmp->memory->grow(vatom_thr,nthreads,maxeatom_thr,6,"thr_omp:vatom_thr");
   }
 
   int ntotal = (lmp->force->newton) ?
     (lmp->atom->nlocal + lmp->atom->nghost) : lmp->atom->nlocal;
 
-  // zero per thread accumulators
-  ev_zero_acc_thr(ntotal, pair->eflag_global, pair->vflag_global,
-		  pair->eflag_atom, pair->vflag_atom, nthreads);
+  // set up per thread accumulators
+  ev_setup_acc_thr(ntotal, pair->eflag_global, pair->vflag_global,
+		   pair->eflag_atom, pair->vflag_atom, nthreads);
 }
 
 /* ----------------------------------------------------------------------
    reduce the per thread accumulated E/V data into the canonical accumulators.
 ------------------------------------------------------------------------- */
 void ThrOMP::ev_reduce_thr(Dihedral *dihed)
 {
   int nthreads = lmp->comm->nthreads;
   int ntotal = (lmp->force->newton_bond) ?
     (lmp->atom->nlocal + lmp->atom->nghost) : lmp->atom->nlocal;
 
   for (int n = 0; n < nthreads; ++n) {
     dihed->energy += eng_bond_thr[n];
     if (dihed->vflag_either) {
       dihed->virial[0] += virial_thr[n][0];
       dihed->virial[1] += virial_thr[n][1];
       dihed->virial[2] += virial_thr[n][2];
       dihed->virial[3] += virial_thr[n][3];
       dihed->virial[4] += virial_thr[n][4];
       dihed->virial[5] += virial_thr[n][5];
       if (dihed->vflag_atom) {
         for (int i = 0; i < ntotal; ++i) {
           dihed->vatom[i][0] += vatom_thr[n][i][0];
           dihed->vatom[i][1] += vatom_thr[n][i][1];
           dihed->vatom[i][2] += vatom_thr[n][i][2];
           dihed->vatom[i][3] += vatom_thr[n][i][3];
           dihed->vatom[i][4] += vatom_thr[n][i][4];
           dihed->vatom[i][5] += vatom_thr[n][i][5];
         }
       }
     }
     if (dihed->eflag_atom) {
       for (int i = 0; i < ntotal; ++i) {
         dihed->eatom[i] += eatom_thr[n][i];
       }
     }
   }
 }
 
+/* ----------------------------------------------------------------------
+   reduce the per thread accumulated E/V data into the canonical accumulators.
+------------------------------------------------------------------------- */
+void ThrOMP::ev_reduce_thr(Pair *pair)
+{
+  const int nthreads = lmp->comm->nthreads;
+  const int ntotal = (lmp->force->newton) ? 
+    (lmp->atom->nlocal + lmp->atom->nghost) : lmp->atom->nlocal;
+
+  for (int n = 0; n < nthreads; ++n) {
+    pair->eng_vdwl += eng_vdwl_thr[n];
+    pair->eng_coul += eng_coul_thr[n];
+    if (pair->vflag_either) {
+      pair->virial[0] += virial_thr[n][0];
+      pair->virial[1] += virial_thr[n][1];
+      pair->virial[2] += virial_thr[n][2];
+      pair->virial[3] += virial_thr[n][3];
+      pair->virial[4] += virial_thr[n][4];
+      pair->virial[5] += virial_thr[n][5];
+      if (pair->vflag_atom) {
+        for (int i = 0; i < ntotal; ++i) {
+          pair->vatom[i][0] += vatom_thr[n][i][0];
+          pair->vatom[i][1] += vatom_thr[n][i][1];
+          pair->vatom[i][2] += vatom_thr[n][i][2];
+          pair->vatom[i][3] += vatom_thr[n][i][3];
+          pair->vatom[i][4] += vatom_thr[n][i][4];
+          pair->vatom[i][5] += vatom_thr[n][i][5];
+        }
+      }
+    }
+    if (pair->eflag_atom) {
+      for (int i = 0; i < ntotal; ++i) {
+        pair->eatom[i] += eatom_thr[n][i];
+      }
+    }
+  }
+}
+
 /* ----------------------------------------------------------------------
    tally eng_vdwl and virial into per thread global and per-atom accumulators
    need i < nlocal test since called by bond_quartic and dihedral_charmm
 ------------------------------------------------------------------------- */
 
 void ThrOMP::ev_tally_thr(Pair *pair, int i, int j, int nlocal,
 			  int newton_pair, double evdwl, double ecoul,
 			  double fpair, double delx, double dely,
 			  double delz, int tid)
 {
   double evdwlhalf,ecoulhalf,epairhalf,v[6];
 
   if (pair->eflag_either) {
     if (pair->eflag_global) {
       if (newton_pair) {
 	eng_vdwl_thr[tid] += evdwl;
 	eng_coul_thr[tid] += ecoul;
       } else {
 	evdwlhalf = 0.5*evdwl;
 	ecoulhalf = 0.5*ecoul;
 	if (i < nlocal) {
 	  eng_vdwl_thr[tid] += evdwlhalf;
 	  eng_coul_thr[tid] += ecoulhalf;
 	}
 	if (j < nlocal) {
 	  eng_vdwl_thr[tid] += evdwlhalf;
 	  eng_coul_thr[tid] += ecoulhalf;
 	}
       }
     }
     if (pair->eflag_atom) {
       epairhalf = 0.5 * (evdwl + ecoul);
       if (newton_pair || i < nlocal) eatom_thr[tid][i] += epairhalf;
       if (newton_pair || j < nlocal) eatom_thr[tid][j] += epairhalf;
     }
   }
 
   if (pair->vflag_either) {
     v[0] = delx*delx*fpair;
     v[1] = dely*dely*fpair;
     v[2] = delz*delz*fpair;
     v[3] = delx*dely*fpair;
     v[4] = delx*delz*fpair;
     v[5] = dely*delz*fpair;
 
     if (pair->vflag_global) {
       if (newton_pair) {
 	virial_thr[tid][0] += v[0];
 	virial_thr[tid][1] += v[1];
 	virial_thr[tid][2] += v[2];
 	virial_thr[tid][3] += v[3];
 	virial_thr[tid][4] += v[4];
 	virial_thr[tid][5] += v[5];
       } else {
 	if (i < nlocal) {
 	  virial_thr[tid][0] += 0.5*v[0];
 	  virial_thr[tid][1] += 0.5*v[1];
 	  virial_thr[tid][2] += 0.5*v[2];
 	  virial_thr[tid][3] += 0.5*v[3];
 	  virial_thr[tid][4] += 0.5*v[4];
 	  virial_thr[tid][5] += 0.5*v[5];
 	}
 	if (j < nlocal) {
 	  virial_thr[tid][0] += 0.5*v[0];
 	  virial_thr[tid][1] += 0.5*v[1];
 	  virial_thr[tid][2] += 0.5*v[2];
 	  virial_thr[tid][3] += 0.5*v[3];
 	  virial_thr[tid][4] += 0.5*v[4];
 	  virial_thr[tid][5] += 0.5*v[5];
 	}
       }
     }
 
     if (pair->vflag_atom) {
       if (newton_pair || i < nlocal) {
 	vatom_thr[tid][i][0] += 0.5*v[0];
 	vatom_thr[tid][i][1] += 0.5*v[1];
 	vatom_thr[tid][i][2] += 0.5*v[2];
 	vatom_thr[tid][i][3] += 0.5*v[3];
 	vatom_thr[tid][i][4] += 0.5*v[4];
 	vatom_thr[tid][i][5] += 0.5*v[5];
       }
       if (newton_pair || j < nlocal) {
 	vatom_thr[tid][j][0] += 0.5*v[0];
 	vatom_thr[tid][j][1] += 0.5*v[1];
 	vatom_thr[tid][j][2] += 0.5*v[2];
 	vatom_thr[tid][j][3] += 0.5*v[3];
 	vatom_thr[tid][j][4] += 0.5*v[4];
 	vatom_thr[tid][j][5] += 0.5*v[5];
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
-   reduce the per thread accumulated E/V data into the canonical accumulators.
+   tally eng_vdwl and virial into global and per-atom accumulators
+   for virial, have delx,dely,delz and fx,fy,fz
 ------------------------------------------------------------------------- */
-void ThrOMP::ev_reduce_thr(Pair *pair)
+
+void ThrOMP::ev_tally_xyz_thr(Pair *pair, int i, int j, int nlocal,
+			      int newton_pair, double evdwl, double ecoul,
+			      double fx, double fy, double fz,
+			      double delx, double dely, double delz, int tid)
 {
-  const int nthreads = lmp->comm->nthreads;
-  const int ntotal = (lmp->force->newton) ? 
-    (lmp->atom->nlocal + lmp->atom->nghost) : lmp->atom->nlocal;
+  double evdwlhalf,ecoulhalf,epairhalf,v[6];
 
-  for (int n = 0; n < nthreads; ++n) {
-    pair->eng_vdwl += eng_vdwl_thr[n];
-    pair->eng_coul += eng_coul_thr[n];
-    if (pair->vflag_either) {
-      pair->virial[0] += virial_thr[n][0];
-      pair->virial[1] += virial_thr[n][1];
-      pair->virial[2] += virial_thr[n][2];
-      pair->virial[3] += virial_thr[n][3];
-      pair->virial[4] += virial_thr[n][4];
-      pair->virial[5] += virial_thr[n][5];
-      if (pair->vflag_atom) {
-        for (int i = 0; i < ntotal; ++i) {
-          pair->vatom[i][0] += vatom_thr[n][i][0];
-          pair->vatom[i][1] += vatom_thr[n][i][1];
-          pair->vatom[i][2] += vatom_thr[n][i][2];
-          pair->vatom[i][3] += vatom_thr[n][i][3];
-          pair->vatom[i][4] += vatom_thr[n][i][4];
-          pair->vatom[i][5] += vatom_thr[n][i][5];
-        }
+  if (pair->eflag_either) {
+    if (pair->eflag_global) {
+      if (newton_pair) {
+	eng_vdwl_thr[tid] += evdwl;
+	eng_coul_thr[tid] += ecoul;
+      } else {
+	evdwlhalf = 0.5*evdwl;
+	ecoulhalf = 0.5*ecoul;
+	if (i < nlocal) {
+	  eng_vdwl_thr[tid] += evdwlhalf;
+	  eng_coul_thr[tid] += ecoulhalf;
+	}
+	if (j < nlocal) {
+	  eng_vdwl_thr[tid] += evdwlhalf;
+	  eng_coul_thr[tid] += ecoulhalf;
+	}
       }
     }
     if (pair->eflag_atom) {
-      for (int i = 0; i < ntotal; ++i) {
-        pair->eatom[i] += eatom_thr[n][i];
+      epairhalf = 0.5 * (evdwl + ecoul);
+      if (newton_pair || i < nlocal) eatom_thr[tid][i] += epairhalf;
+      if (newton_pair || j < nlocal) eatom_thr[tid][j] += epairhalf;
+    }
+  }
+
+  if (pair->vflag_either) {
+    v[0] = delx*fx;
+    v[1] = dely*fy;
+    v[2] = delz*fz;
+    v[3] = delx*fy;
+    v[4] = delx*fz;
+    v[5] = dely*fz;
+
+    if (pair->vflag_global) {
+      if (newton_pair) {
+	virial_thr[tid][0] += v[0];
+	virial_thr[tid][1] += v[1];
+	virial_thr[tid][2] += v[2];
+	virial_thr[tid][3] += v[3];
+	virial_thr[tid][4] += v[4];
+	virial_thr[tid][5] += v[5];
+      } else {
+	if (i < nlocal) {
+	  virial_thr[tid][0] += 0.5*v[0];
+	  virial_thr[tid][1] += 0.5*v[1];
+	  virial_thr[tid][2] += 0.5*v[2];
+	  virial_thr[tid][3] += 0.5*v[3];
+	  virial_thr[tid][4] += 0.5*v[4];
+	  virial_thr[tid][5] += 0.5*v[5];
+	}
+	if (j < nlocal) {
+	  virial_thr[tid][0] += 0.5*v[0];
+	  virial_thr[tid][1] += 0.5*v[1];
+	  virial_thr[tid][2] += 0.5*v[2];
+	  virial_thr[tid][3] += 0.5*v[3];
+	  virial_thr[tid][4] += 0.5*v[4];
+	  virial_thr[tid][5] += 0.5*v[5];
+	}
+      }
+    }
+
+    if (pair->vflag_atom) {
+      if (newton_pair || i < nlocal) {
+	vatom_thr[tid][i][0] += 0.5*v[0];
+	vatom_thr[tid][i][1] += 0.5*v[1];
+	vatom_thr[tid][i][2] += 0.5*v[2];
+	vatom_thr[tid][i][3] += 0.5*v[3];
+	vatom_thr[tid][i][4] += 0.5*v[4];
+	vatom_thr[tid][i][5] += 0.5*v[5];
+      }
+      if (newton_pair || j < nlocal) {
+	vatom_thr[tid][j][0] += 0.5*v[0];
+	vatom_thr[tid][j][1] += 0.5*v[1];
+	vatom_thr[tid][j][2] += 0.5*v[2];
+	vatom_thr[tid][j][3] += 0.5*v[3];
+	vatom_thr[tid][j][4] += 0.5*v[4];
+	vatom_thr[tid][j][5] += 0.5*v[5];
+      }
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   tally eng_vdwl and virial into global and per-atom accumulators
+   called by SW and hbond potentials, newton_pair is always on
+   virial = riFi + rjFj + rkFk = (rj-ri) Fj + (rk-ri) Fk = drji*fj + drki*fk
+ ------------------------------------------------------------------------- */
+
+void ThrOMP::ev_tally3_thr(Pair *pair, int i, int j, int k, double evdwl, double ecoul,
+			   double *fj, double *fk, double *drji, double *drki, int tid)
+{
+  double epairthird,v[6];
+
+  if (pair->eflag_either) {
+    if (pair->eflag_global) {
+      eng_vdwl_thr[tid] += evdwl;
+      eng_coul_thr[tid] += ecoul;
+    }
+    if (pair->eflag_atom) {
+      epairthird = THIRD * (evdwl + ecoul);
+      eatom_thr[tid][i] += epairthird;
+      eatom_thr[tid][j] += epairthird;
+      eatom_thr[tid][k] += epairthird;
+    }
+  }
+
+  if (pair->vflag_either) {
+    v[0] = drji[0]*fj[0] + drki[0]*fk[0];
+    v[1] = drji[1]*fj[1] + drki[1]*fk[1];
+    v[2] = drji[2]*fj[2] + drki[2]*fk[2];
+    v[3] = drji[0]*fj[1] + drki[0]*fk[1];
+    v[4] = drji[0]*fj[2] + drki[0]*fk[2];
+    v[5] = drji[1]*fj[2] + drki[1]*fk[2];
+      
+    if (pair->vflag_global) {
+      virial_thr[tid][0] += v[0];
+      virial_thr[tid][1] += v[1];
+      virial_thr[tid][2] += v[2];
+      virial_thr[tid][3] += v[3];
+      virial_thr[tid][4] += v[4];
+      virial_thr[tid][5] += v[5];
+    }
+
+    if (pair->vflag_atom) {
+      for (int n=0; n < 6; ++n) {
+	vatom_thr[tid][i][n] += THIRD*v[n];
+	vatom_thr[tid][j][n] += THIRD*v[n];
+	vatom_thr[tid][k][n] += THIRD*v[n];
+      }
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   tally eng_vdwl and virial into global and per-atom accumulators
+   called by AIREBO potential, newton_pair is always on
+ ------------------------------------------------------------------------- */
+
+void ThrOMP::ev_tally4_thr(Pair *pair, int i, int j, int k, int m, double evdwl,
+			   double *fi, double *fj, double *fk,
+			   double *drim, double *drjm, double *drkm,int tid)
+{
+  double epairfourth,v[6];
+
+  if (pair->eflag_either) {
+    if (pair->eflag_global) eng_vdwl_thr[tid] += evdwl;
+    if (pair->eflag_atom) {
+      epairfourth = 0.25 * evdwl;
+      eatom_thr[tid][i] += epairfourth;
+      eatom_thr[tid][j] += epairfourth;
+      eatom_thr[tid][k] += epairfourth;
+      eatom_thr[tid][m] += epairfourth;
+    }
+  }
+
+  if (pair->vflag_atom) {
+    v[0] = 0.25 * (drim[0]*fi[0] + drjm[0]*fj[0] + drkm[0]*fk[0]);
+    v[1] = 0.25 * (drim[1]*fi[1] + drjm[1]*fj[1] + drkm[1]*fk[1]);
+    v[2] = 0.25 * (drim[2]*fi[2] + drjm[2]*fj[2] + drkm[2]*fk[2]);
+    v[3] = 0.25 * (drim[0]*fi[1] + drjm[0]*fj[1] + drkm[0]*fk[1]);
+    v[4] = 0.25 * (drim[0]*fi[2] + drjm[0]*fj[2] + drkm[0]*fk[2]);
+    v[5] = 0.25 * (drim[1]*fi[2] + drjm[1]*fj[2] + drkm[1]*fk[2]);
+    
+    vatom_thr[tid][i][0] += v[0]; vatom_thr[tid][i][1] += v[1]; vatom_thr[tid][i][2] += v[2];
+    vatom_thr[tid][i][3] += v[3]; vatom_thr[tid][i][4] += v[4]; vatom_thr[tid][i][5] += v[5];
+    vatom_thr[tid][j][0] += v[0]; vatom_thr[tid][j][1] += v[1]; vatom_thr[tid][j][2] += v[2];
+    vatom_thr[tid][j][3] += v[3]; vatom_thr[tid][j][4] += v[4]; vatom_thr[tid][j][5] += v[5];
+    vatom_thr[tid][k][0] += v[0]; vatom_thr[tid][k][1] += v[1]; vatom_thr[tid][k][2] += v[2];
+    vatom_thr[tid][k][3] += v[3]; vatom_thr[tid][k][4] += v[4]; vatom_thr[tid][k][5] += v[5];
+    vatom_thr[tid][m][0] += v[0]; vatom_thr[tid][m][1] += v[1]; vatom_thr[tid][m][2] += v[2];
+    vatom_thr[tid][m][3] += v[3]; vatom_thr[tid][m][4] += v[4]; vatom_thr[tid][m][5] += v[5];
+  }
+}
+
+/* ----------------------------------------------------------------------
+   tally ecoul and virial into each of n atoms in list
+   called by TIP4P potential, newton_pair is always on
+   changes v values by dividing by n
+ ------------------------------------------------------------------------- */
+
+void ThrOMP::ev_tally_list_thr(Pair *pair, int n, int *list, double ecoul, double *v, int tid)
+{
+  int i,j;
+
+  if (pair->eflag_either) {
+    if (pair->eflag_global) eng_coul_thr[tid] += ecoul;
+    if (pair->eflag_atom) {
+      double epairatom = ecoul/n;
+      for (i = 0; i < n; i++) eatom_thr[tid][list[i]] += epairatom;
+    }
+  }
+
+  if (pair->vflag_either) {
+    if (pair->vflag_global) {
+      virial_thr[tid][0] += v[0];
+      virial_thr[tid][1] += v[1];
+      virial_thr[tid][2] += v[2];
+      virial_thr[tid][3] += v[3];
+      virial_thr[tid][4] += v[4];
+      virial_thr[tid][5] += v[5];
+    }
+
+    if (pair->vflag_atom) {
+      v[0] /= n;
+      v[1] /= n;
+      v[2] /= n;
+      v[3] /= n;
+      v[4] /= n;
+      v[5] /= n;
+      for (i = 0; i < n; i++) {
+	j = list[i];
+	vatom_thr[tid][j][0] += v[0];
+	vatom_thr[tid][j][1] += v[1];
+	vatom_thr[tid][j][2] += v[2];
+	vatom_thr[tid][j][3] += v[3];
+	vatom_thr[tid][j][4] += v[4];
+	vatom_thr[tid][j][5] += v[5];
       }
     }
   }
 }
 
+/* ----------------------------------------------------------------------
+   tally energy and virial into global and per-atom accumulators
+   virial = r1F1 + r2F2 + r3F3 + r4F4 = (r1-r2) F1 + (r3-r2) F3 + (r4-r2) F4
+          = (r1-r2) F1 + (r3-r2) F3 + (r4-r3 + r3-r2) F4
+	  = vb1*f1 + vb2*f3 + (vb3+vb2)*f4
+------------------------------------------------------------------------- */
+
+void ThrOMP::ev_tally_thr(Dihedral *dihed, int i1, int i2, int i3, int i4,
+			  int nlocal, int newton_bond,
+			  double edihedral, double *f1, double *f3, double *f4,
+			  double vb1x, double vb1y, double vb1z,
+			  double vb2x, double vb2y, double vb2z,
+			  double vb3x, double vb3y, double vb3z, int tid)
+{
+  double edihedralquarter,v[6];
+  int cnt;
+
+  if (dihed->eflag_either) {
+    if (dihed->eflag_global) {
+      if (newton_bond) {
+	eng_bond_thr[tid] += edihedral;
+      } else {
+	edihedralquarter = 0.25*edihedral;
+	cnt = 0;
+	if (i1 < nlocal) ++cnt;
+	if (i2 < nlocal) ++cnt;
+	if (i3 < nlocal) ++cnt;
+	if (i4 < nlocal) ++cnt;
+	eng_bond_thr[tid] += static_cast<double>(cnt) * edihedralquarter;
+      }
+    }
+    if (dihed->eflag_atom) {
+      edihedralquarter = 0.25*edihedral;
+      if (newton_bond || i1 < nlocal) eatom_thr[tid][i1] += edihedralquarter;
+      if (newton_bond || i2 < nlocal) eatom_thr[tid][i2] += edihedralquarter;
+      if (newton_bond || i3 < nlocal) eatom_thr[tid][i3] += edihedralquarter;
+      if (newton_bond || i4 < nlocal) eatom_thr[tid][i4] += edihedralquarter;
+    }
+  }
+
+  if (dihed->vflag_either) {
+    v[0] = vb1x*f1[0] + vb2x*f3[0] + (vb3x+vb2x)*f4[0];
+    v[1] = vb1y*f1[1] + vb2y*f3[1] + (vb3y+vb2y)*f4[1];
+    v[2] = vb1z*f1[2] + vb2z*f3[2] + (vb3z+vb2z)*f4[2];
+    v[3] = vb1x*f1[1] + vb2x*f3[1] + (vb3x+vb2x)*f4[1];
+    v[4] = vb1x*f1[2] + vb2x*f3[2] + (vb3x+vb2x)*f4[2];
+    v[5] = vb1y*f1[2] + vb2y*f3[2] + (vb3y+vb2y)*f4[2];
+
+    if (dihed->vflag_global) {
+      if (newton_bond) {
+	virial_thr[tid][0] += v[0];
+	virial_thr[tid][1] += v[1];
+	virial_thr[tid][2] += v[2];
+	virial_thr[tid][3] += v[3];
+	virial_thr[tid][4] += v[4];
+	virial_thr[tid][5] += v[5];
+      } else {
+	if (i1 < nlocal) {
+	  virial_thr[tid][0] += 0.25*v[0];
+	  virial_thr[tid][1] += 0.25*v[1];
+	  virial_thr[tid][2] += 0.25*v[2];
+	  virial_thr[tid][3] += 0.25*v[3];
+	  virial_thr[tid][4] += 0.25*v[4];
+	  virial_thr[tid][5] += 0.25*v[5];
+	}
+	if (i2 < nlocal) {
+	  virial_thr[tid][0] += 0.25*v[0];
+	  virial_thr[tid][1] += 0.25*v[1];
+	  virial_thr[tid][2] += 0.25*v[2];
+	  virial_thr[tid][3] += 0.25*v[3];
+	  virial_thr[tid][4] += 0.25*v[4];
+	  virial_thr[tid][5] += 0.25*v[5];
+	}
+	if (i3 < nlocal) {
+	  virial_thr[tid][0] += 0.25*v[0];
+	  virial_thr[tid][1] += 0.25*v[1];
+	  virial_thr[tid][2] += 0.25*v[2];
+	  virial_thr[tid][3] += 0.25*v[3];
+	  virial_thr[tid][4] += 0.25*v[4];
+	  virial_thr[tid][5] += 0.25*v[5];
+	}
+	if (i4 < nlocal) {
+	  virial_thr[tid][0] += 0.25*v[0];
+	  virial_thr[tid][1] += 0.25*v[1];
+	  virial_thr[tid][2] += 0.25*v[2];
+	  virial_thr[tid][3] += 0.25*v[3];
+	  virial_thr[tid][4] += 0.25*v[4];
+	  virial_thr[tid][5] += 0.25*v[5];
+	}
+      }
+    }
+
+    if (dihed->vflag_atom) {
+      if (newton_bond || i1 < nlocal) {
+	vatom_thr[tid][i1][0] += 0.25*v[0];
+	vatom_thr[tid][i1][1] += 0.25*v[1];
+	vatom_thr[tid][i1][2] += 0.25*v[2];
+	vatom_thr[tid][i1][3] += 0.25*v[3];
+	vatom_thr[tid][i1][4] += 0.25*v[4];
+	vatom_thr[tid][i1][5] += 0.25*v[5];
+      }
+      if (newton_bond || i2 < nlocal) {
+	vatom_thr[tid][i2][0] += 0.25*v[0];
+	vatom_thr[tid][i2][1] += 0.25*v[1];
+	vatom_thr[tid][i2][2] += 0.25*v[2];
+	vatom_thr[tid][i2][3] += 0.25*v[3];
+	vatom_thr[tid][i2][4] += 0.25*v[4];
+	vatom_thr[tid][i2][5] += 0.25*v[5];
+      }
+      if (newton_bond || i3 < nlocal) {
+	vatom_thr[tid][i3][0] += 0.25*v[0];
+	vatom_thr[tid][i3][1] += 0.25*v[1];
+	vatom_thr[tid][i3][2] += 0.25*v[2];
+	vatom_thr[tid][i3][3] += 0.25*v[3];
+	vatom_thr[tid][i3][4] += 0.25*v[4];
+	vatom_thr[tid][i3][5] += 0.25*v[5];
+      }
+      if (newton_bond || i4 < nlocal) {
+	vatom_thr[tid][i4][0] += 0.25*v[0];
+	vatom_thr[tid][i4][1] += 0.25*v[1];
+	vatom_thr[tid][i4][2] += 0.25*v[2];
+	vatom_thr[tid][i4][3] += 0.25*v[3];
+	vatom_thr[tid][i4][4] += 0.25*v[4];
+	vatom_thr[tid][i4][5] += 0.25*v[5];
+      }
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   tally virial into per-atom accumulators
+   called by AIREBO potential, newton_pair is always on
+   fpair is magnitude of force on atom I
+------------------------------------------------------------------------- */
+
+void ThrOMP::v_tally2_thr(int i, int j, double fpair, double *drij, int tid)
+{
+  double v[6];
+  
+  v[0] = 0.5 * drij[0]*drij[0]*fpair;
+  v[1] = 0.5 * drij[1]*drij[1]*fpair;
+  v[2] = 0.5 * drij[2]*drij[2]*fpair;
+  v[3] = 0.5 * drij[0]*drij[1]*fpair;
+  v[4] = 0.5 * drij[0]*drij[2]*fpair;
+  v[5] = 0.5 * drij[1]*drij[2]*fpair;
+
+  vatom_thr[tid][i][0] += v[0]; vatom_thr[tid][i][1] += v[1]; vatom_thr[tid][i][2] += v[2];
+  vatom_thr[tid][i][3] += v[3]; vatom_thr[tid][i][4] += v[4]; vatom_thr[tid][i][5] += v[5];
+  vatom_thr[tid][j][0] += v[0]; vatom_thr[tid][j][1] += v[1]; vatom_thr[tid][j][2] += v[2];
+  vatom_thr[tid][j][3] += v[3]; vatom_thr[tid][j][4] += v[4]; vatom_thr[tid][j][5] += v[5];
+}
+
+/* ----------------------------------------------------------------------
+   tally virial into per-atom accumulators
+   called by AIREBO and Tersoff potential, newton_pair is always on
+------------------------------------------------------------------------- */
+
+void ThrOMP::v_tally3_thr(int i, int j, int k, double *fi, double *fj,
+			  double *drik, double *drjk, int tid)
+{
+  double v[6];
+  
+  v[0] = THIRD * (drik[0]*fi[0] + drjk[0]*fj[0]);
+  v[1] = THIRD * (drik[1]*fi[1] + drjk[1]*fj[1]);
+  v[2] = THIRD * (drik[2]*fi[2] + drjk[2]*fj[2]);
+  v[3] = THIRD * (drik[0]*fi[1] + drjk[0]*fj[1]);
+  v[4] = THIRD * (drik[0]*fi[2] + drjk[0]*fj[2]);
+  v[5] = THIRD * (drik[1]*fi[2] + drjk[1]*fj[2]);
+
+  vatom_thr[tid][i][0] += v[0]; vatom_thr[tid][i][1] += v[1]; vatom_thr[tid][i][2] += v[2];
+  vatom_thr[tid][i][3] += v[3]; vatom_thr[tid][i][4] += v[4]; vatom_thr[tid][i][5] += v[5];
+  vatom_thr[tid][j][0] += v[0]; vatom_thr[tid][j][1] += v[1]; vatom_thr[tid][j][2] += v[2];
+  vatom_thr[tid][j][3] += v[3]; vatom_thr[tid][j][4] += v[4]; vatom_thr[tid][j][5] += v[5];
+  vatom_thr[tid][k][0] += v[0]; vatom_thr[tid][k][1] += v[1]; vatom_thr[tid][k][2] += v[2];
+  vatom_thr[tid][k][3] += v[3]; vatom_thr[tid][k][4] += v[4]; vatom_thr[tid][k][5] += v[5];
+}
+
+/* ----------------------------------------------------------------------
+   tally virial into per-atom accumulators
+   called by AIREBO potential, newton_pair is always on
+------------------------------------------------------------------------- */
+
+void ThrOMP::v_tally4_thr(int i, int j, int k, int m,
+			  double *fi, double *fj, double *fk,
+			  double *drim, double *drjm, double *drkm, int tid)
+{
+  double v[6];
+
+  v[0] = 0.25 * (drim[0]*fi[0] + drjm[0]*fj[0] + drkm[0]*fk[0]);
+  v[1] = 0.25 * (drim[1]*fi[1] + drjm[1]*fj[1] + drkm[1]*fk[1]);
+  v[2] = 0.25 * (drim[2]*fi[2] + drjm[2]*fj[2] + drkm[2]*fk[2]);
+  v[3] = 0.25 * (drim[0]*fi[1] + drjm[0]*fj[1] + drkm[0]*fk[1]);
+  v[4] = 0.25 * (drim[0]*fi[2] + drjm[0]*fj[2] + drkm[0]*fk[2]);
+  v[5] = 0.25 * (drim[1]*fi[2] + drjm[1]*fj[2] + drkm[1]*fk[2]);
+
+  vatom_thr[tid][i][0] += v[0]; vatom_thr[tid][i][1] += v[1]; vatom_thr[tid][i][2] += v[2];
+  vatom_thr[tid][i][3] += v[3]; vatom_thr[tid][i][4] += v[4]; vatom_thr[tid][i][5] += v[5];
+  vatom_thr[tid][j][0] += v[0]; vatom_thr[tid][j][1] += v[1]; vatom_thr[tid][j][2] += v[2];
+  vatom_thr[tid][j][3] += v[3]; vatom_thr[tid][j][4] += v[4]; vatom_thr[tid][j][5] += v[5];
+  vatom_thr[tid][k][0] += v[0]; vatom_thr[tid][k][1] += v[1]; vatom_thr[tid][k][2] += v[2];
+  vatom_thr[tid][k][3] += v[3]; vatom_thr[tid][k][4] += v[4]; vatom_thr[tid][k][5] += v[5];
+  vatom_thr[tid][m][0] += v[0]; vatom_thr[tid][m][1] += v[1]; vatom_thr[tid][m][2] += v[2];
+  vatom_thr[tid][m][3] += v[3]; vatom_thr[tid][m][4] += v[4]; vatom_thr[tid][m][5] += v[5];
+}
+
 /* ---------------------------------------------------------------------- */
 
 // set loop range thread id, and force array offset for threaded runs.
 double **ThrOMP::loop_setup_thr(double **f, int &ifrom, int &ito, int &tid,
 				int inum, int nall, int nthreads)
 {
 #if defined(_OPENMP)
-  if (nthreads > 1) {
-    tid = omp_get_thread_num();
+  tid = omp_get_thread_num();
 
-    // each thread works on a fixed chunk of atoms.
-    const int idelta = 1 + inum/nthreads;
-    ifrom = tid*idelta;
-    ito   = ifrom + idelta;
-    if (ito > inum)
-      ito = inum;
-
-    return f + nall*tid;
-
-  } else {
-#endif
-    tid = 0;
-    ifrom = 0;
+  // each thread works on a fixed chunk of atoms.
+  const int idelta = 1 + inum/nthreads;
+  ifrom = tid*idelta;
+  ito   = ifrom + idelta;
+  if (ito > inum)
     ito = inum;
-    return f;
-#if defined(_OPENMP)
-  }
+
+  return f + nall*tid;
+#else
+  tid = 0;
+  ifrom = 0;
+  ito = inum;
+  return f;
 #endif
 }
 
 /* ---------------------------------------------------------------------- */
 
-// reduce per thread forces into the first part of the force
+// reduce per thread data into the first part of the data
 // array that is used for the non-threaded parts and reset
-// the temporary storage to 0.0. this routine depends on the
-// forces arrays stored in this order x1,y1,z1,x2,y2,z2,...
+// the temporary storage to 0.0. this routine depends on
+// multi-dimensional arrays like force stored in this order
+// x1,y1,z1,x2,y2,z2,...
 // we need to post a barrier to wait until all threads are done
-// with computing forces.
-void ThrOMP::force_reduce_thr(double *fall, int nall,
-			      int nthreads, int tid)
+// with writing to the array .
+void ThrOMP::data_reduce_thr(double *dall, int nall, int nthreads,
+			     int ndim, int tid)
 {
 #if defined(_OPENMP)
   // NOOP in non-threaded execution.
   if (nthreads == 1) return;
 #pragma omp barrier
   {
-    double *f;
-    const int idelta = 1 + nall/nthreads;
-    const int ifrom = 3*tid*idelta;
-    const int ito   = 3*(((ifrom + idelta) > nall) ? nall : (ifrom + idelta));
-
-    for (int n = 1; n < nthreads; ++n) {
-      const int toffs = 3*n*nall;
-      f = fall + toffs;
-      for (int m = ifrom; m < ito; ++m) {
-	fall[m] += f[m];
-	f[m] = 0.0;
+    const int nvals = ndim*nall;
+    const int idelta = nvals/nthreads + 1;
+    const int ifrom = tid*idelta;
+    const int ito   = ((ifrom + idelta) > nvals) ? nvals : (ifrom + idelta);
+
+    for (int m = ifrom; m < ito; ++m) {
+      for (int n = 1; n < nthreads; ++n) {
+	dall[m] += dall[n*nvals + m];
+	dall[n*nvals + m] = 0.0;
       }
     }
   }
 #else
   // NOOP in non-threaded execution.
   return;
 #endif
 }
 
 /* ---------------------------------------------------------------------- */
 
 double ThrOMP::memory_usage_thr() 
 {
   const int nthreads=lmp->comm->nthreads;
 
   double bytes = nthreads * (3 + 7) * sizeof(double);
   bytes += nthreads * maxeatom_thr * sizeof(double);
   bytes += nthreads * maxvatom_thr * 6 * sizeof(double);
   return bytes;
 }
diff --git a/src/USER-OMP/thr_omp.h b/src/USER-OMP/thr_omp.h
index 24963e91d..9966c9de0 100644
--- a/src/USER-OMP/thr_omp.h
+++ b/src/USER-OMP/thr_omp.h
@@ -1,79 +1,114 @@
 /* -*- c++ -*- -------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under 
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Axel Kohlmeyer (Temple U)
 ------------------------------------------------------------------------- */
 
 #ifndef LMP_THR_OMP_H
 #define LMP_THR_OMP_H
 
 #include "pointers.h"
 
 namespace LAMMPS_NS {
 
 // forward declarations
 class Pair;
 class Dihedral;
 
 class ThrOMP {
+ public:
+  struct global {
+    double eng_vdwl;
+    double eng_coul;
+    double eng_bond;
+    double virial[6];
+  };
 
  protected:
   const int thr_style;
   enum {PAIR=1, BOND, ANGLE, DIHEDRAL, IMPROPER, KSPACE, FIX, COMPUTE};
 
   LAMMPS *lmp;           // reference to base lammps object.
 
   double *eng_vdwl_thr;  // per thread accumulated vdw energy
   double *eng_coul_thr;  // per thread accumulated coulomb energies
   double *eng_bond_thr;  // per thread accumlated bonded energy
 
   double **virial_thr;   // per thread virial
   double **eatom_thr;    // per thread per atom energy
   double ***vatom_thr;   // per thread per atom virial
 
   int maxeatom_thr, maxvatom_thr;
+  int evflag_global, evflag_atom;
   
  public:
   ThrOMP(LAMMPS *, int);
   virtual ~ThrOMP();
 
   double memory_usage_thr();
 
+  inline void sync_threads() {
+#if defined(_OPENMP)
+#pragma omp barrier
+#endif
+      { ; }
+    };
+
  protected:
   // extra ev_tally work for threaded styles
   void ev_setup_thr(Pair *);
   void ev_setup_thr(Dihedral *);
 
   void ev_reduce_thr(Pair *);
   void ev_reduce_thr(Dihedral *);
 
  private:
   // internal method to be used by multiple ev_setup_thr() methods
-  void ev_zero_acc_thr(int, int, int, int, int, int);
+  void ev_setup_acc_thr(int, int, int, int, int, int);
 
  protected:
   // threading adapted versions of the ev_tally infrastructure
+  // style specific versions (need access to style class flags)
   void ev_tally_thr(Pair *, int, int, int, int, double, double,
 		    double, double, double, double, int);
+  void ev_tally_xyz_thr(Pair *, int, int, int, int, double, double,
+			double, double, double, double, double, double, int);
+  void ev_tally3_thr(Pair *, int, int, int, double, double,
+		     double *, double *, double *, double *, int);
+  void ev_tally4_thr(Pair *, int, int, int, int, double, 
+		     double *, double *, double *,
+		     double *, double *, double *, int);
+  void ev_tally_list_thr(Pair *, int, int *, double , double *, int);
+
+  void ev_tally_thr(Dihedral *, int, int, int, int, int, int, double,
+		    double *, double *, double *, double, double, double,
+		    double, double, double, double, double, double, int);
+
+  // style independent versions
+  void v_tally2_thr(int, int, double, double *, int);
+  void v_tally3_thr(int, int, int, double *, double *, double *, double *, int);
+  void v_tally4_thr(int, int, int, int, double *, double *, double *,
+		    double *, double *, double *, int);
 
  protected:
   // set loop range, thread id, and force array offset for threaded runs.
   double **loop_setup_thr(double **, int &, int &, int &, int, int, int);
 
-  // reduce per thread forces into the first part of the force array
-  void force_reduce_thr(double *, int, int, int);
+  // reduce per thread data into the first part of the array
+  void data_reduce_thr(double *, int, int, int, int);
+
 };
 
 }
 #endif