diff --git a/examples/USER/openmp/in.gauss-diel-omp b/examples/USER/openmp/in.gauss-diel-omp
index 2119fb700..851caf6ef 100644
--- a/examples/USER/openmp/in.gauss-diel-omp
+++ b/examples/USER/openmp/in.gauss-diel-omp
@@ -1,96 +1,98 @@
 # Ionic surfactant system: S12S
 
 units           lj
 dimension       3
 atom_style      full
 
 read_data       data.gauss-diel
 
 pair_style      hybrid/overlay          &
                 lj/cut/omp 3.5          &
-                coul/long 18.0          &
+                coul/long/omp 25.0      &
                 gauss/cut/omp 3.4       &
-                coul/diel 2.5
+                coul/diel/omp 2.5
 pair_modify    shift yes
 
 dielectric      0.4255
 kspace_style    pppm/cg 0.0001
+#kspace_style     ewald/omp 0.0001
 kspace_modify   mesh 12 12 12 order 3
 
 bond_style      harmonic
 angle_style     harmonic
 dihedral_style  opls
 
 pair_coeff  1     1     lj/cut/omp       0.5 1.775 3.268        # HG   HG  
-pair_coeff  1     1     coul/long                               # HG   HG  
+pair_coeff  1     1     coul/long/omp                           # HG   HG  
 pair_coeff  1     1     gauss/cut/omp    0.1 2.549 0.1525           
 pair_coeff  1     2     lj/cut/omp       0.31623 1.5329 1.7206  # HG   CM  
 pair_coeff  1     3     lj/cut/omp       0.31623 1.5329 1.7206  # HG   CT  
 pair_coeff  1     4     lj/cut/omp       0.05 1.75 4.375        # HG   CI  
-pair_coeff  1     4     coul/long                               # HG   CI  
+pair_coeff  1     4     coul/long/omp                           # HG   CI  
 pair_coeff  1     4     gauss/cut/omp    0.2805 1.45 0.112 
-pair_coeff  1     4     coul/diel        78. 1.375 0.112 
+pair_coeff  1     4     coul/diel/omp    78. 1.375 0.112 
 pair_coeff  2     2     lj/cut/omp       0.2000 1.2910 3.2275   # CM   CM  
 pair_coeff  2     3     lj/cut/omp       0.2000 1.2910 3.2275   # CM   CT  
 pair_coeff  2     4     lj/cut/omp       0.4472 1.1455 1.28585  # CM   CI  
 pair_coeff  3     3     lj/cut/omp       1.95 1.291 3.2275      # CT   CT  
 pair_coeff  3     4     lj/cut/omp       0.4472 1.1455 1.28585  # CT   CI  
 pair_coeff  4     4     lj/cut/omp       1.0 10. 1.12246        # CI   CI 
-pair_coeff  4     4     coul/long                               # CI   CI 
+pair_coeff  4     4     coul/long/omp                           # CI   CI 
 
 bond_coeff  1      12650.0000   0.7500 # HG CM FROM TOP
 bond_coeff  2      12650.0000   0.5000 # CM CM FROM TOP
 bond_coeff  3      12650.0000   0.5000 # CM CM FROM TOP
 bond_coeff  4      12650.0000   0.5000 # CM CM FROM TOP
 bond_coeff  5      12650.0000   0.5000 # CM CM FROM TOP
 bond_coeff  6      12650.0000   0.5000 # CM CM FROM TOP
 bond_coeff  7      12650.0000   0.5000 # CM CM FROM TOP
 bond_coeff  8      12650.0000   0.5000 # CM CM FROM TOP
 bond_coeff  9      12650.0000   0.5000 # CM CM FROM TOP
 bond_coeff  10     12650.0000   0.5000 # CM CM FROM TOP
 bond_coeff  11     12650.0000   0.5000 # CM CM FROM TOP
 bond_coeff  12     12650.0000   0.5000 # CM CT FROM TOP
 
 angle_coeff 1           85.7600 109.5000 # HG CM CM FROM TOP
 angle_coeff 2           85.7600 111.0000 # CM CM CM FROM TOP
 angle_coeff 3           85.7600 111.0000 # CM CM CM FROM TOP
 angle_coeff 4           85.7600 111.0000 # CM CM CM FROM TOP
 angle_coeff 5           85.7600 111.0000 # CM CM CM FROM TOP
 angle_coeff 6           85.7600 111.0000 # CM CM CM FROM TOP
 angle_coeff 7           85.7600 111.0000 # CM CM CM FROM TOP
 angle_coeff 8           85.7600 111.0000 # CM CM CM FROM TOP
 angle_coeff 9           85.7600 111.0000 # CM CM CM FROM TOP
 angle_coeff 10          85.7600 111.0000 # CM CM CM FROM TOP
 angle_coeff 11          85.7600 111.0000 # CM CM CT FROM TOP
 
 dihedral_coeff 1     5.7431 -2.53241 5.0742 0.0 # HG CM CM CM FROM TOP
 dihedral_coeff 2     5.7431 -2.53241 5.0742 0.0 # CM CM CM CM FROM TOP
 dihedral_coeff 3     5.7431 -2.53241 5.0742 0.0 # CM CM CM CM FROM TOP
 dihedral_coeff 4     5.7431 -2.53241 5.0742 0.0 # CM CM CM CM FROM TOP
 dihedral_coeff 5     5.7431 -2.53241 5.0742 0.0 # CM CM CM CM FROM TOP
 dihedral_coeff 6     5.7431 -2.53241 5.0742 0.0 # CM CM CM CM FROM TOP
 dihedral_coeff 7     5.7431 -2.53241 5.0742 0.0 # CM CM CM CM FROM TOP
 dihedral_coeff 8     5.7431 -2.53241 5.0742 0.0 # CM CM CM CM FROM TOP
 dihedral_coeff 9     5.7431 -2.53241 5.0742 0.0 # CM CM CM CM FROM TOP
 dihedral_coeff 10    5.7431 -2.53241 5.0742 0.0 # CM CM CM CT FROM TOP
 
 timestep        0.002  
 
 reset_timestep  0
 
 group           cions type 4
 group           sds subtract all cions
 
 velocity        all create 1. 87287 dist gaussian
 
 neighbor        1.5 multi
+communicate     multi
 neigh_modify    exclude molecule sds
 neigh_modify    every 5 delay 0 check yes
 
 fix             1 all nve/limit 0.2
 fix             2 all langevin 1.0 1.0 0.05 18273
 
 thermo_style    multi
 thermo          500
 
 run             2000
diff --git a/src/USER-OPENMP/Install.sh b/src/USER-OPENMP/Install.sh
index 0d0cc0195..c7be34c10 100644
--- a/src/USER-OPENMP/Install.sh
+++ b/src/USER-OPENMP/Install.sh
@@ -1,159 +1,162 @@
 # Install/unInstall package files in LAMMPS
 
 if (test $1 = 1) then
 
   cp pair_omp.h ..
   cp pair_omp.cpp ..
 
   cp pair_buck_omp.h ..
   cp pair_buck_omp.cpp ..
   cp pair_buck_coul_cut_omp.h ..
   cp pair_buck_coul_cut_omp.cpp ..
   cp pair_coul_cut_omp.h ..
   cp pair_coul_cut_omp.cpp ..
   cp pair_coul_debye_omp.h ..
   cp pair_coul_debye_omp.cpp ..
   cp pair_dpd_omp.h ..
   cp pair_dpd_omp.cpp ..
   cp pair_dpd_tstat_omp.h ..
   cp pair_dpd_tstat_omp.cpp ..
   cp pair_lj_charmm_coul_charmm_implicit_omp.h ..
   cp pair_lj_charmm_coul_charmm_implicit_omp.cpp ..
   cp pair_lj_charmm_coul_charmm_omp.h ..
   cp pair_lj_charmm_coul_charmm_omp.cpp ..
   cp pair_lj_cut_coul_cut_omp.h ..
   cp pair_lj_cut_coul_cut_omp.cpp ..
   cp pair_lj_cut_coul_debye_omp.h ..
   cp pair_lj_cut_coul_debye_omp.cpp ..
   cp pair_lj_cut_omp.h ..
   cp pair_lj_cut_omp.cpp ..
   cp pair_lj_expand_omp.h ..
   cp pair_lj_expand_omp.cpp ..
   cp pair_lj_gromacs_coul_gromacs_omp.h ..
   cp pair_lj_gromacs_coul_gromacs_omp.cpp ..
   cp pair_lj96_cut_omp.h ..
   cp pair_lj96_cut_omp.cpp ..
   cp pair_lj_gromacs_omp.h ..
   cp pair_lj_gromacs_omp.cpp ..
   cp pair_lj_smooth_omp.h ..
   cp pair_lj_smooth_omp.cpp ..
   cp pair_morse_omp.h ..
   cp pair_morse_omp.cpp ..
   cp pair_soft_omp.h ..
   cp pair_soft_omp.cpp ..
   cp pair_table_omp.h ..
   cp pair_table_omp.cpp ..
   cp pair_yukawa_omp.h ..
   cp pair_yukawa_omp.cpp ..
 
   if (test -e ../pair_cg_cmm.cpp) then
     cp pair_cg_cmm_omp.h ..
     cp pair_cg_cmm_omp.cpp ..
   fi
 
   if (test -e ../pair_airebo.cpp) then
     cp pair_airebo_omp.h ..
     cp pair_airebo_omp.cpp ..
     cp pair_sw_omp.h ..
     cp pair_sw_omp.cpp ..
     cp pair_tersoff_omp.h ..
     cp pair_tersoff_omp.cpp ..
     cp pair_tersoff_zbl_omp.h ..
     cp pair_tersoff_zbl_omp.cpp ..
   fi
 
   if (test -e ../pair_gauss.cpp) then
     cp pair_coul_diel_omp.h ..
     cp pair_coul_diel_omp.cpp ..
     cp pair_gauss_cut_omp.h ..
     cp pair_gauss_cut_omp.cpp ..
   fi
 
   if (test -e ../pair_lj_charmm_coul_long.cpp) then
     cp pair_born_coul_long_omp.h ..
     cp pair_born_coul_long_omp.cpp ..
     cp pair_buck_coul_long_omp.h ..
     cp pair_buck_coul_long_omp.cpp ..
     cp pair_coul_long_omp.h ..
     cp pair_coul_long_omp.cpp ..
     cp pair_lj_charmm_coul_long_omp.h ..
     cp pair_lj_charmm_coul_long_omp.cpp ..
     cp pair_lj_cut_coul_long_omp.h ..
     cp pair_lj_cut_coul_long_omp.cpp ..
+    cp ewald_omp.h ..
+    cp ewald_omp.cpp ..
   fi
 
 elif (test $1 = 0) then
 
   rm ../pair_omp.h
   rm ../pair_omp.cpp
 
   rm ../pair_buck_omp.h
   rm ../pair_buck_omp.cpp
   rm ../pair_buck_coul_cut_omp.h
   rm ../pair_buck_coul_cut_omp.cpp
   rm ../pair_coul_cut_omp.h
   rm ../pair_coul_cut_omp.cpp
   rm ../pair_coul_debye_omp.h
   rm ../pair_coul_debye_omp.cpp
   rm ../pair_dpd_omp.h
   rm ../pair_dpd_omp.cpp
   rm ../pair_dpd_tstat_omp.h
   rm ../pair_dpd_tstat_omp.cpp
   rm ../pair_lj_charmm_coul_charmm_implicit_omp.h
   rm ../pair_lj_charmm_coul_charmm_implicit_omp.cpp
   rm ../pair_lj_charmm_coul_charmm_omp.h
   rm ../pair_lj_charmm_coul_charmm_omp.cpp
   rm ../pair_lj_cut_coul_cut_omp.h
   rm ../pair_lj_cut_coul_cut_omp.cpp
   rm ../pair_lj_cut_coul_debye_omp.h
   rm ../pair_lj_cut_coul_debye_omp.cpp
   rm ../pair_lj_cut_omp.h
   rm ../pair_lj_cut_omp.cpp
   rm ../pair_lj_expand_omp.h
   rm ../pair_lj_expand_omp.cpp
   rm ../pair_lj_gromacs_coul_gromacs_omp.h
   rm ../pair_lj_gromacs_coul_gromacs_omp.cpp
   rm ../pair_lj96_cut_omp.h
   rm ../pair_lj96_cut_omp.cpp
   rm ../pair_lj_gromacs_omp.h
   rm ../pair_lj_gromacs_omp.cpp
   rm ../pair_lj_smooth_omp.h
   rm ../pair_lj_smooth_omp.cpp
   rm ../pair_morse_omp.h
   rm ../pair_morse_omp.cpp
   rm ../pair_soft_omp.h
   rm ../pair_soft_omp.cpp
   rm ../pair_table_omp.h
   rm ../pair_table_omp.cpp
   rm ../pair_yukawa_omp.h
   rm ../pair_yukawa_omp.cpp
 
   rm -f ../pair_cg_cmm_omp.h
   rm -f ../pair_cg_cmm_omp.cpp
 
   rm -f ../pair_airebo_omp.h
   rm -f ../pair_airebo_omp.cpp
   rm -f ../pair_sw_omp.h
   rm -f ../pair_sw_omp.cpp
   rm -f ../pair_tersoff_omp.h
   rm -f ../pair_tersoff_omp.cpp
   rm -f ../pair_tersoff_zbl_omp.h
   rm -f ../pair_tersoff_zbl_omp.cpp
 
   rm -f ../pair_coul_diel_omp.h
   rm -f ../pair_coul_diel_omp.cpp
   rm -f ../pair_gauss_cut_omp.h
   rm -f ../pair_gauss_cut_omp.cpp
 
   rm -f ../pair_born_coul_long_omp.h
   rm -f ../pair_born_coul_long_omp.cpp
   rm -f ../pair_buck_coul_long_omp.h
   rm -f ../pair_buck_coul_long_omp.cpp
   rm -f ../pair_coul_long_omp.h
   rm -f ../pair_coul_long_omp.cpp
   rm -f ../pair_lj_charmm_coul_long_omp.h
   rm -f ../pair_lj_charmm_coul_long_omp.cpp
   rm -f ../pair_lj_cut_coul_long_omp.h
   rm -f ../pair_lj_cut_coul_long_omp.cpp
-
+  rm -f ../ewald_omp.h
+  rm -f ../ewald_omp.cpp
 fi
diff --git a/src/USER-OPENMP/ewald_omp.cpp b/src/USER-OPENMP/ewald_omp.cpp
new file mode 100644
index 000000000..31d27a405
--- /dev/null
+++ b/src/USER-OPENMP/ewald_omp.cpp
@@ -0,0 +1,869 @@
+
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing authors: Roy Pollock (LLNL), Paul Crozier (SNL)
+------------------------------------------------------------------------- */
+
+#include "mpi.h"
+#include "stdlib.h"
+#include "stdio.h"
+#include "math.h"
+#include "ewald_omp.h"
+#include "atom.h"
+#include "comm.h"
+#include "force.h"
+#include "pair.h"
+#include "domain.h"
+#include "memory.h"
+#include "error.h"
+
+using namespace LAMMPS_NS;
+
+#define SMALL 0.00001
+#define SMALLQ 0.01
+
+#define MIN(a,b) ((a) < (b) ? (a) : (b))
+#define MAX(a,b) ((a) > (b) ? (a) : (b))
+
+/* ---------------------------------------------------------------------- */
+
+EwaldOMP::EwaldOMP(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg)
+{
+  if (narg != 1) error->all("Illegal kspace_style ewald command");
+
+  precision = atof(arg[0]);
+  PI = 4.0*atan(1.0);
+
+  kmax = 0;
+  kxvecs = kyvecs = kzvecs = NULL;
+  ug = NULL;
+  eg = vg = NULL;
+  sfacrl = sfacim = sfacrl_all = sfacim_all = NULL;
+
+  nmax = 0;
+  ek = NULL;
+  cs = sn = NULL;
+  
+  is_charged = NULL;
+  num_charged = 0;
+
+  kcount = 0;
+}
+
+/* ----------------------------------------------------------------------
+   free all memory 
+------------------------------------------------------------------------- */
+
+EwaldOMP::~EwaldOMP()
+{
+  deallocate();
+  memory->destroy_2d_double_array(ek);
+  memory->destroy_3d_double_array(cs,-kmax_created);
+  memory->destroy_3d_double_array(sn,-kmax_created);
+  memory->sfree(is_charged);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void EwaldOMP::init()
+{
+  if (comm->me == 0) {
+    if (screen) fprintf(screen,"Ewald initialization ...\n");
+    if (logfile) fprintf(logfile,"Ewald initialization ...\n");
+  }
+
+  // error check
+
+  if (domain->triclinic) error->all("Cannot use Ewald with triclinic box");
+  if (domain->dimension == 2) 
+    error->all("Cannot use Ewald with 2d simulation");
+
+  if (!atom->q_flag) error->all("Kspace style requires atom attribute q");
+
+  if (slabflag == 0 && domain->nonperiodic > 0)
+    error->all("Cannot use nonperiodic boundaries with Ewald");
+  if (slabflag == 1) {
+    if (domain->xperiodic != 1 || domain->yperiodic != 1 || 
+	domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1)
+      error->all("Incorrect boundaries with slab Ewald");
+  }
+
+  // extract short-range Coulombic cutoff from pair style
+
+  qqrd2e = force->qqrd2e;
+
+  if (force->pair == NULL)
+    error->all("KSpace style is incompatible with Pair style");
+  double *p_cutoff = (double *) force->pair->extract("cut_coul");
+  if (p_cutoff == NULL)
+    error->all("KSpace style is incompatible with Pair style");
+  double cutoff = *p_cutoff;
+
+  qsum = qsqsum = 0.0;
+  for (int i = 0; i < atom->nlocal; i++) {
+    qsum += atom->q[i];
+    qsqsum += atom->q[i]*atom->q[i];
+  }
+
+  double tmp;
+  MPI_Allreduce(&qsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
+  qsum = tmp;
+  MPI_Allreduce(&qsqsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
+  qsqsum = tmp;
+
+  if (qsqsum == 0.0)
+    error->all("Cannot use kspace solver on system with no charge");
+  if (fabs(qsum) > SMALL && comm->me == 0) {
+    char str[128];
+    sprintf(str,"System is not charge neutral, net charge = %g",qsum);
+    error->warning(str);
+  }
+
+  // setup K-space resolution
+
+  g_ewald = (1.35 - 0.15*log(precision))/cutoff;
+  gsqmx = -4.0*g_ewald*g_ewald*log(precision);
+
+  if (comm->me == 0) {
+    if (screen) fprintf(screen,"  G vector = %g\n",g_ewald);
+    if (logfile) fprintf(logfile,"  G vector = %g\n",g_ewald);
+  }
+
+  // setup Ewald coefficients so can print stats
+
+  setup();
+
+  if (comm->me == 0) {
+    if (screen) fprintf(screen,"  vectors: actual 1d max = %d %d %d\n",
+			kcount,kmax,kmax3d);
+    if (logfile) fprintf(logfile,"  vectors: actual 1d max = %d %d %d\n",
+			 kcount,kmax,kmax3d);
+  }
+}
+
+/* ----------------------------------------------------------------------
+   adjust Ewald coeffs, called initially and whenever volume has changed 
+------------------------------------------------------------------------- */
+
+void EwaldOMP::setup()
+{
+  // volume-dependent factors
+
+  double xprd = domain->xprd;
+  double yprd = domain->yprd;
+  double zprd = domain->zprd;
+  
+  // adjustment of z dimension for 2d slab Ewald
+  // 3d Ewald just uses zprd since slab_volfactor = 1.0
+
+  double zprd_slab = zprd*slab_volfactor;
+  volume = xprd * yprd * zprd_slab;
+
+  unitk[0] = 2.0*PI/xprd;
+  unitk[1] = 2.0*PI/yprd;
+  unitk[2] = 2.0*PI/zprd_slab;
+
+  // determine kmax
+  // function of current box size, precision, G_ewald (short-range cutoff)
+
+  int nkxmx = static_cast<int> ((g_ewald*xprd/PI) * sqrt(-log(precision)));
+  int nkymx = static_cast<int> ((g_ewald*yprd/PI) * sqrt(-log(precision)));
+  int nkzmx = 
+    static_cast<int> ((g_ewald*zprd_slab/PI) * sqrt(-log(precision)));
+
+  int kmax_old = kmax;
+  kmax = MAX(nkxmx,nkymx);
+  kmax = MAX(kmax,nkzmx);
+  kmax3d = 4*kmax*kmax*kmax + 6*kmax*kmax + 3*kmax;
+
+  // if size has grown, reallocate k-dependent and nlocal-dependent arrays
+
+  if (kmax > kmax_old) {
+    deallocate();
+    allocate();
+
+    memory->destroy_2d_double_array(ek);
+    memory->destroy_3d_double_array(cs,-kmax_created);
+    memory->destroy_3d_double_array(sn,-kmax_created);
+    nmax = atom->nmax;
+    ek = memory->create_2d_double_array(nmax,3,"ewald:ek");
+    cs = memory->create_3d_double_array(-kmax,kmax,3,nmax,"ewald:cs");
+    sn = memory->create_3d_double_array(-kmax,kmax,3,nmax,"ewald:sn");
+    is_charged = static_cast<int *>(memory->smalloc(nmax*sizeof(int),"ewald:is_charged"));
+    kmax_created = kmax;
+  }
+
+  // pre-compute Ewald coefficients
+
+  coeffs();
+}
+
+/* ----------------------------------------------------------------------
+   compute the Ewald long-range force, energy, virial 
+------------------------------------------------------------------------- */
+
+void EwaldOMP::compute(int eflag, int vflag)
+{
+  int i,j,k,n;
+
+  energy = 0.0;
+  if (vflag) for (n = 0; n < 6; n++) virial[n] = 0.0;
+
+  // extend size of per-atom arrays if necessary
+
+  if (atom->nlocal > nmax) {
+    memory->destroy_2d_double_array(ek);
+    memory->destroy_3d_double_array(cs,-kmax_created);
+    memory->destroy_3d_double_array(sn,-kmax_created);
+    memory->sfree(is_charged);
+    nmax = atom->nmax;
+    ek = memory->create_2d_double_array(nmax,3,"ewald:ek");
+    cs = memory->create_3d_double_array(-kmax,kmax,3,nmax,"ewald:cs");
+    sn = memory->create_3d_double_array(-kmax,kmax,3,nmax,"ewald:sn");
+    is_charged = static_cast<int *>(memory->smalloc(nmax*sizeof(int),"ewald:is_charged"));
+    kmax_created = kmax;
+  }
+
+  num_charged=0;
+  for (i=0; i < atom->nlocal; ++i)
+    if (fabs(atom->q[i]) > SMALLQ) {
+      is_charged[num_charged] = i;
+      ++num_charged;
+    }
+
+  // partial structure factors on each processor
+  // total structure factor by summing over procs
+
+  eik_dot_r();
+  MPI_Allreduce(sfacrl,sfacrl_all,kcount,MPI_DOUBLE,MPI_SUM,world);
+  MPI_Allreduce(sfacim,sfacim_all,kcount,MPI_DOUBLE,MPI_SUM,world);
+
+  // K-space portion of electric field
+  // double loop over K-vectors and local atoms
+
+  double **f = atom->f;
+  double *q = atom->q;
+  int nlocal = atom->nlocal;
+
+  for (j = 0; j < num_charged; j++) {
+    int kx,ky,kz;
+    double cypz,sypz,exprl,expim,partial;
+    double ek0, ek1, ek2;
+
+    int i = is_charged[j];
+    ek0=ek1=ek2=0.0;
+
+    for (k = 0; k < kcount; k++) {
+      kx = kxvecs[k];
+      ky = kyvecs[k];
+      kz = kzvecs[k];
+
+      cypz = cs[ky][1][i]*cs[kz][2][i] - sn[ky][1][i]*sn[kz][2][i];
+      sypz = sn[ky][1][i]*cs[kz][2][i] + cs[ky][1][i]*sn[kz][2][i];
+      exprl = cs[kx][0][i]*cypz - sn[kx][0][i]*sypz;
+      expim = sn[kx][0][i]*cypz + cs[kx][0][i]*sypz;
+      partial = expim*sfacrl_all[k] - exprl*sfacim_all[k];
+
+      ek0 += partial*eg[k][0];
+      ek1 += partial*eg[k][1];
+      ek2 += partial*eg[k][2];
+    }
+
+    // convert E-field to force
+
+    f[i][0] += qqrd2e*q[i]*ek0;
+    f[i][1] += qqrd2e*q[i]*ek1;
+    f[i][2] += qqrd2e*q[i]*ek2;
+  }
+ 
+  // energy if requested
+
+  if (eflag) {
+    for (k = 0; k < kcount; k++)
+      energy += ug[k] * (sfacrl_all[k]*sfacrl_all[k] + 
+			 sfacim_all[k]*sfacim_all[k]);
+    PI = 4.0*atan(1.0);
+    energy -= g_ewald*qsqsum/1.772453851 + 
+      0.5*PI*qsum*qsum / (g_ewald*g_ewald*volume);
+    energy *= qqrd2e;
+  }
+
+  // virial if requested
+
+  if (vflag) {
+    double uk;
+    for (k = 0; k < kcount; k++) {
+      uk = ug[k] * (sfacrl_all[k]*sfacrl_all[k] + sfacim_all[k]*sfacim_all[k]);
+      for (n = 0; n < 6; n++) virial[n] += uk*vg[k][n];
+    }
+    for (n = 0; n < 6; n++) virial[n] *= qqrd2e;
+  }
+
+  if (slabflag) slabcorr(eflag);
+  
+}
+
+/* ---------------------------------------------------------------------- */
+
+void EwaldOMP::eik_dot_r()
+{
+  int i,j,k,l,m,n,ic;
+  double cstr1,sstr1,cstr2,sstr2,cstr3,sstr3,cstr4,sstr4;
+  double sqk,clpm,slpm;
+
+  double **x = atom->x;
+  double *q = atom->q;
+  int nlocal = atom->nlocal;
+
+  n = 0;
+
+  // (k,0,0), (0,l,0), (0,0,m)
+
+  for (ic = 0; ic < 3; ic++) {
+    sqk = unitk[ic]*unitk[ic];
+    if (sqk <= gsqmx) {
+      cstr1 = 0.0;
+      sstr1 = 0.0;
+      for (j = 0; j < num_charged; j++) {
+        i = is_charged[j];
+	cs[0][ic][i] = 1.0;
+	sn[0][ic][i] = 0.0;
+	cs[1][ic][i] = cos(unitk[ic]*x[i][ic]);
+	sn[1][ic][i] = sin(unitk[ic]*x[i][ic]);
+	cs[-1][ic][i] = cs[1][ic][i];
+	sn[-1][ic][i] = -sn[1][ic][i];
+	cstr1 += q[i]*cs[1][ic][i];
+	sstr1 += q[i]*sn[1][ic][i];
+      }
+      sfacrl[n] = cstr1;
+      sfacim[n++] = sstr1;
+    }
+  }
+
+  for (m = 2; m <= kmax; m++) {
+    for (ic = 0; ic < 3; ic++) {
+      sqk = m*unitk[ic] * m*unitk[ic];
+      if (sqk <= gsqmx) {
+	cstr1 = 0.0;
+	sstr1 = 0.0;
+        for (j = 0; j < num_charged; j++) {
+          i = is_charged[j];
+	  cs[m][ic][i] = cs[m-1][ic][i]*cs[1][ic][i] - 
+	    sn[m-1][ic][i]*sn[1][ic][i];
+	  sn[m][ic][i] = sn[m-1][ic][i]*cs[1][ic][i] + 
+	    cs[m-1][ic][i]*sn[1][ic][i];
+	  cs[-m][ic][i] = cs[m][ic][i];
+	  sn[-m][ic][i] = -sn[m][ic][i];
+	  cstr1 += q[i]*cs[m][ic][i];
+	  sstr1 += q[i]*sn[m][ic][i];
+	}
+	sfacrl[n] = cstr1;
+	sfacim[n++] = sstr1;
+      }
+    }
+  }
+
+  // 1 = (k,l,0), 2 = (k,-l,0)
+
+  for (k = 1; k <= kmax; k++) {
+    for (l = 1; l <= kmax; l++) {
+      sqk = (k*unitk[0] * k*unitk[0]) + (l*unitk[1] * l*unitk[1]);
+      if (sqk <= gsqmx) {
+	cstr1 = 0.0;
+	sstr1 = 0.0;
+	cstr2 = 0.0;
+	sstr2 = 0.0;
+        for (j = 0; j < num_charged; j++) {
+          i = is_charged[j];
+	  cstr1 += q[i]*(cs[k][0][i]*cs[l][1][i] - sn[k][0][i]*sn[l][1][i]);
+	  sstr1 += q[i]*(sn[k][0][i]*cs[l][1][i] + cs[k][0][i]*sn[l][1][i]);
+	  cstr2 += q[i]*(cs[k][0][i]*cs[l][1][i] + sn[k][0][i]*sn[l][1][i]);
+	  sstr2 += q[i]*(sn[k][0][i]*cs[l][1][i] - cs[k][0][i]*sn[l][1][i]);
+	}
+	sfacrl[n] = cstr1;
+	sfacim[n++] = sstr1;
+	sfacrl[n] = cstr2;
+	sfacim[n++] = sstr2;
+      }
+    }
+  }
+
+  // 1 = (0,l,m), 2 = (0,l,-m)
+
+  for (l = 1; l <= kmax; l++) {
+    for (m = 1; m <= kmax; m++) {
+      sqk = (l*unitk[1] * l*unitk[1]) + (m*unitk[2] * m*unitk[2]);
+      if (sqk <= gsqmx) {
+	cstr1 = 0.0;
+	sstr1 = 0.0;
+	cstr2 = 0.0;
+	sstr2 = 0.0;
+        for (j = 0; j < num_charged; j++) {
+          i = is_charged[j];
+	  cstr1 += q[i]*(cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i]);
+	  sstr1 += q[i]*(sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i]);
+	  cstr2 += q[i]*(cs[l][1][i]*cs[m][2][i] + sn[l][1][i]*sn[m][2][i]);
+	  sstr2 += q[i]*(sn[l][1][i]*cs[m][2][i] - cs[l][1][i]*sn[m][2][i]);
+	}
+	sfacrl[n] = cstr1;
+	sfacim[n++] = sstr1;
+	sfacrl[n] = cstr2;
+	sfacim[n++] = sstr2;
+      }
+    }
+  }
+
+  // 1 = (k,0,m), 2 = (k,0,-m)
+
+  for (k = 1; k <= kmax; k++) {
+    for (m = 1; m <= kmax; m++) {
+      sqk = (k*unitk[0] * k*unitk[0]) + (m*unitk[2] * m*unitk[2]);
+      if (sqk <= gsqmx) {
+	cstr1 = 0.0;
+	sstr1 = 0.0;
+	cstr2 = 0.0;
+	sstr2 = 0.0;
+        for (j = 0; j < num_charged; j++) {
+          i = is_charged[j];
+	  cstr1 += q[i]*(cs[k][0][i]*cs[m][2][i] - sn[k][0][i]*sn[m][2][i]);
+	  sstr1 += q[i]*(sn[k][0][i]*cs[m][2][i] + cs[k][0][i]*sn[m][2][i]);
+	  cstr2 += q[i]*(cs[k][0][i]*cs[m][2][i] + sn[k][0][i]*sn[m][2][i]);
+	  sstr2 += q[i]*(sn[k][0][i]*cs[m][2][i] - cs[k][0][i]*sn[m][2][i]);
+	}
+	sfacrl[n] = cstr1;
+	sfacim[n++] = sstr1;
+	sfacrl[n] = cstr2;
+	sfacim[n++] = sstr2;
+      }
+    }
+  }
+
+  // 1 = (k,l,m), 2 = (k,-l,m), 3 = (k,l,-m), 4 = (k,-l,-m)
+
+  for (k = 1; k <= kmax; k++) {
+    for (l = 1; l <= kmax; l++) {
+      for (m = 1; m <= kmax; m++) {
+	sqk = (k*unitk[0] * k*unitk[0]) + (l*unitk[1] * l*unitk[1]) +
+	  (m*unitk[2] * m*unitk[2]);
+	if (sqk <= gsqmx) {
+	  cstr1 = 0.0;
+	  sstr1 = 0.0;
+	  cstr2 = 0.0;
+	  sstr2 = 0.0;
+	  cstr3 = 0.0;
+	  sstr3 = 0.0;
+	  cstr4 = 0.0;
+	  sstr4 = 0.0;
+          for (j = 0; j < num_charged; j++) {
+            i = is_charged[j];
+	    clpm = cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i];
+	    slpm = sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i];
+	    cstr1 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm);
+	    sstr1 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm);
+	    
+	    clpm = cs[l][1][i]*cs[m][2][i] + sn[l][1][i]*sn[m][2][i];
+	    slpm = -sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i];
+	    cstr2 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm);
+	    sstr2 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm);
+	    
+	    clpm = cs[l][1][i]*cs[m][2][i] + sn[l][1][i]*sn[m][2][i];
+	    slpm = sn[l][1][i]*cs[m][2][i] - cs[l][1][i]*sn[m][2][i];
+	    cstr3 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm);
+	    sstr3 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm);
+	    
+	    clpm = cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i];
+	    slpm = -sn[l][1][i]*cs[m][2][i] - cs[l][1][i]*sn[m][2][i];
+	    cstr4 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm);
+	    sstr4 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm);
+	  }
+	  sfacrl[n] = cstr1;
+	  sfacim[n++] = sstr1;
+	  sfacrl[n] = cstr2;
+	  sfacim[n++] = sstr2;
+	  sfacrl[n] = cstr3;
+	  sfacim[n++] = sstr3;
+	  sfacrl[n] = cstr4;
+	  sfacim[n++] = sstr4;
+	}
+      }
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   pre-compute coefficients for each Ewald K-vector 
+------------------------------------------------------------------------- */
+
+void EwaldOMP::coeffs()
+{
+  int k,l,m;
+  double sqk,vterm;
+
+  double unitkx = unitk[0];
+  double unitky = unitk[1];
+  double unitkz = unitk[2];
+  double g_ewald_sq_inv = 1.0 / (g_ewald*g_ewald);
+  double preu = 4.0*PI/volume;
+
+  kcount = 0;
+
+  // (k,0,0), (0,l,0), (0,0,m)
+
+  for (m = 1; m <= kmax; m++) {
+    sqk = (m*unitkx) * (m*unitkx);
+    if (sqk <= gsqmx) {
+      kxvecs[kcount] = m;
+      kyvecs[kcount] = 0;
+      kzvecs[kcount] = 0;
+      ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
+      eg[kcount][0] = 2.0*unitkx*m*ug[kcount];
+      eg[kcount][1] = 0.0;
+      eg[kcount][2] = 0.0;
+      vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv);
+      vg[kcount][0] = 1.0 + vterm*(unitkx*m)*(unitkx*m);
+      vg[kcount][1] = 1.0;
+      vg[kcount][2] = 1.0;
+      vg[kcount][3] = 0.0;
+      vg[kcount][4] = 0.0;
+      vg[kcount][5] = 0.0;
+      kcount++;
+    }
+    sqk = (m*unitky) * (m*unitky);
+    if (sqk <= gsqmx) {
+      kxvecs[kcount] = 0;
+      kyvecs[kcount] = m;
+      kzvecs[kcount] = 0;
+      ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
+      eg[kcount][0] = 0.0;
+      eg[kcount][1] = 2.0*unitky*m*ug[kcount];
+      eg[kcount][2] = 0.0;
+      vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv);
+      vg[kcount][0] = 1.0;
+      vg[kcount][1] = 1.0 + vterm*(unitky*m)*(unitky*m);
+      vg[kcount][2] = 1.0;
+      vg[kcount][3] = 0.0;
+      vg[kcount][4] = 0.0;
+      vg[kcount][5] = 0.0;
+      kcount++;
+    }
+    sqk = (m*unitkz) * (m*unitkz);
+    if (sqk <= gsqmx) {
+      kxvecs[kcount] = 0;
+      kyvecs[kcount] = 0;
+      kzvecs[kcount] = m;
+      ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
+      eg[kcount][0] = 0.0;
+      eg[kcount][1] = 0.0;
+      eg[kcount][2] = 2.0*unitkz*m*ug[kcount];
+      vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv);
+      vg[kcount][0] = 1.0;
+      vg[kcount][1] = 1.0;
+      vg[kcount][2] = 1.0 + vterm*(unitkz*m)*(unitkz*m);
+      vg[kcount][3] = 0.0;
+      vg[kcount][4] = 0.0;
+      vg[kcount][5] = 0.0;
+      kcount++;
+    }
+  }
+
+  // 1 = (k,l,0), 2 = (k,-l,0)
+
+  for (k = 1; k <= kmax; k++) {
+    for (l = 1; l <= kmax; l++) {
+      sqk = (unitkx*k) * (unitkx*k) + (unitky*l) * (unitky*l);
+      if (sqk <= gsqmx) {
+	kxvecs[kcount] = k;
+	kyvecs[kcount] = l;
+	kzvecs[kcount] = 0;
+	ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
+	eg[kcount][0] = 2.0*unitkx*k*ug[kcount];
+	eg[kcount][1] = 2.0*unitky*l*ug[kcount];
+	eg[kcount][2] = 0.0;
+	vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv);
+	vg[kcount][0] = 1.0 + vterm*(unitkx*k)*(unitkx*k);
+	vg[kcount][1] = 1.0 + vterm*(unitky*l)*(unitky*l);
+	vg[kcount][2] = 1.0;
+	vg[kcount][3] = vterm*unitkx*k*unitky*l;
+	vg[kcount][4] = 0.0;
+	vg[kcount][5] = 0.0;
+	kcount++;
+
+	kxvecs[kcount] = k;
+	kyvecs[kcount] = -l;
+	kzvecs[kcount] = 0;
+	ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
+	eg[kcount][0] = 2.0*unitkx*k*ug[kcount];
+	eg[kcount][1] = -2.0*unitky*l*ug[kcount];
+	eg[kcount][2] = 0.0;
+	vg[kcount][0] = 1.0 + vterm*(unitkx*k)*(unitkx*k);
+	vg[kcount][1] = 1.0 + vterm*(unitky*l)*(unitky*l);
+	vg[kcount][2] = 1.0;
+	vg[kcount][3] = -vterm*unitkx*k*unitky*l;
+	vg[kcount][4] = 0.0;
+	vg[kcount][5] = 0.0;
+	kcount++;;
+      }
+    }
+  }
+
+  // 1 = (0,l,m), 2 = (0,l,-m)
+
+  for (l = 1; l <= kmax; l++) {
+    for (m = 1; m <= kmax; m++) {
+      sqk = (unitky*l) * (unitky*l) + (unitkz*m) * (unitkz*m);
+      if (sqk <= gsqmx) {
+	kxvecs[kcount] = 0;
+	kyvecs[kcount] = l;
+	kzvecs[kcount] = m;
+	ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
+	eg[kcount][0] =  0.0;
+	eg[kcount][1] =  2.0*unitky*l*ug[kcount];
+	eg[kcount][2] =  2.0*unitkz*m*ug[kcount];
+	vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv);
+	vg[kcount][0] = 1.0;
+	vg[kcount][1] = 1.0 + vterm*(unitky*l)*(unitky*l);
+	vg[kcount][2] = 1.0 + vterm*(unitkz*m)*(unitkz*m);
+	vg[kcount][3] = 0.0;
+	vg[kcount][4] = 0.0;
+	vg[kcount][5] = vterm*unitky*l*unitkz*m;
+	kcount++;
+
+	kxvecs[kcount] = 0;
+	kyvecs[kcount] = l;
+	kzvecs[kcount] = -m;
+	ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
+	eg[kcount][0] =  0.0;
+	eg[kcount][1] =  2.0*unitky*l*ug[kcount];
+	eg[kcount][2] = -2.0*unitkz*m*ug[kcount];
+	vg[kcount][0] = 1.0;
+	vg[kcount][1] = 1.0 + vterm*(unitky*l)*(unitky*l);
+	vg[kcount][2] = 1.0 + vterm*(unitkz*m)*(unitkz*m);
+	vg[kcount][3] = 0.0;
+	vg[kcount][4] = 0.0;
+	vg[kcount][5] = -vterm*unitky*l*unitkz*m;
+	kcount++;
+      }
+    }
+  }
+
+  // 1 = (k,0,m), 2 = (k,0,-m)
+
+  for (k = 1; k <= kmax; k++) {
+    for (m = 1; m <= kmax; m++) {
+      sqk = (unitkx*k) * (unitkx*k) + (unitkz*m) * (unitkz*m);
+      if (sqk <= gsqmx) {
+	kxvecs[kcount] = k;
+	kyvecs[kcount] = 0;
+	kzvecs[kcount] = m;
+	ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
+	eg[kcount][0] =  2.0*unitkx*k*ug[kcount];
+	eg[kcount][1] =  0.0;
+	eg[kcount][2] =  2.0*unitkz*m*ug[kcount];
+	vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv);
+	vg[kcount][0] = 1.0 + vterm*(unitkx*k)*(unitkx*k);
+	vg[kcount][1] = 1.0;
+	vg[kcount][2] = 1.0 + vterm*(unitkz*m)*(unitkz*m);
+	vg[kcount][3] = 0.0;
+	vg[kcount][4] = vterm*unitkx*k*unitkz*m;
+	vg[kcount][5] = 0.0;
+	kcount++;
+
+	kxvecs[kcount] = k;
+	kyvecs[kcount] = 0;
+	kzvecs[kcount] = -m;
+	ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
+	eg[kcount][0] =  2.0*unitkx*k*ug[kcount];
+	eg[kcount][1] =  0.0;
+	eg[kcount][2] = -2.0*unitkz*m*ug[kcount];
+	vg[kcount][0] = 1.0 + vterm*(unitkx*k)*(unitkx*k);
+	vg[kcount][1] = 1.0;
+	vg[kcount][2] = 1.0 + vterm*(unitkz*m)*(unitkz*m);
+	vg[kcount][3] = 0.0;
+	vg[kcount][4] = -vterm*unitkx*k*unitkz*m;
+	vg[kcount][5] = 0.0;
+	kcount++;
+      }
+    }
+  }
+
+  // 1 = (k,l,m), 2 = (k,-l,m), 3 = (k,l,-m), 4 = (k,-l,-m)
+
+  for (k = 1; k <= kmax; k++) {
+    for (l = 1; l <= kmax; l++) {
+      for (m = 1; m <= kmax; m++) {
+	sqk = (unitkx*k) * (unitkx*k) + (unitky*l) * (unitky*l) + 
+	  (unitkz*m) * (unitkz*m);
+	if (sqk <= gsqmx) {
+	  kxvecs[kcount] = k;
+	  kyvecs[kcount] = l;
+	  kzvecs[kcount] = m;
+	  ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
+	  eg[kcount][0] = 2.0*unitkx*k*ug[kcount];
+	  eg[kcount][1] = 2.0*unitky*l*ug[kcount];
+	  eg[kcount][2] = 2.0*unitkz*m*ug[kcount];
+	  vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv);
+	  vg[kcount][0] = 1.0 + vterm*(unitkx*k)*(unitkx*k);
+	  vg[kcount][1] = 1.0 + vterm*(unitky*l)*(unitky*l);
+	  vg[kcount][2] = 1.0 + vterm*(unitkz*m)*(unitkz*m);
+	  vg[kcount][3] = vterm*unitkx*k*unitky*l;
+	  vg[kcount][4] = vterm*unitkx*k*unitkz*m;
+	  vg[kcount][5] = vterm*unitky*l*unitkz*m;
+	  kcount++;
+
+	  kxvecs[kcount] = k;
+	  kyvecs[kcount] = -l;
+	  kzvecs[kcount] = m;
+	  ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
+	  eg[kcount][0] = 2.0*unitkx*k*ug[kcount];
+	  eg[kcount][1] = -2.0*unitky*l*ug[kcount];
+	  eg[kcount][2] = 2.0*unitkz*m*ug[kcount];
+	  vg[kcount][0] = 1.0 + vterm*(unitkx*k)*(unitkx*k);
+	  vg[kcount][1] = 1.0 + vterm*(unitky*l)*(unitky*l);
+	  vg[kcount][2] = 1.0 + vterm*(unitkz*m)*(unitkz*m);
+	  vg[kcount][3] = -vterm*unitkx*k*unitky*l;
+	  vg[kcount][4] = vterm*unitkx*k*unitkz*m;
+	  vg[kcount][5] = -vterm*unitky*l*unitkz*m;
+	  kcount++;
+
+	  kxvecs[kcount] = k;
+	  kyvecs[kcount] = l;
+	  kzvecs[kcount] = -m;
+	  ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
+	  eg[kcount][0] = 2.0*unitkx*k*ug[kcount];
+	  eg[kcount][1] = 2.0*unitky*l*ug[kcount];
+	  eg[kcount][2] = -2.0*unitkz*m*ug[kcount];
+	  vg[kcount][0] = 1.0 + vterm*(unitkx*k)*(unitkx*k);
+	  vg[kcount][1] = 1.0 + vterm*(unitky*l)*(unitky*l);
+	  vg[kcount][2] = 1.0 + vterm*(unitkz*m)*(unitkz*m);
+	  vg[kcount][3] = vterm*unitkx*k*unitky*l;
+	  vg[kcount][4] = -vterm*unitkx*k*unitkz*m;
+	  vg[kcount][5] = -vterm*unitky*l*unitkz*m;
+	  kcount++;
+
+	  kxvecs[kcount] = k;
+	  kyvecs[kcount] = -l;
+	  kzvecs[kcount] = -m;
+	  ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk;
+	  eg[kcount][0] = 2.0*unitkx*k*ug[kcount];
+	  eg[kcount][1] = -2.0*unitky*l*ug[kcount];
+	  eg[kcount][2] = -2.0*unitkz*m*ug[kcount];
+	  vg[kcount][0] = 1.0 + vterm*(unitkx*k)*(unitkx*k);
+	  vg[kcount][1] = 1.0 + vterm*(unitky*l)*(unitky*l);
+	  vg[kcount][2] = 1.0 + vterm*(unitkz*m)*(unitkz*m);
+	  vg[kcount][3] = -vterm*unitkx*k*unitky*l;
+	  vg[kcount][4] = -vterm*unitkx*k*unitkz*m;
+	  vg[kcount][5] = vterm*unitky*l*unitkz*m;
+	  kcount++;;
+	}
+      }
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   allocate memory that depends on # of K-vectors 
+------------------------------------------------------------------------- */
+
+void EwaldOMP::allocate()
+{
+  kxvecs = new int[kmax3d];
+  kyvecs = new int[kmax3d];
+  kzvecs = new int[kmax3d];
+
+  ug = new double[kmax3d];
+  eg = memory->create_2d_double_array(kmax3d,3,"ewald:eg");
+  vg = memory->create_2d_double_array(kmax3d,6,"ewald:vg");
+
+  sfacrl = new double[kmax3d];
+  sfacim = new double[kmax3d];
+  sfacrl_all = new double[kmax3d];
+  sfacim_all = new double[kmax3d];
+}
+
+/* ----------------------------------------------------------------------
+   deallocate memory that depends on # of K-vectors 
+------------------------------------------------------------------------- */
+
+void EwaldOMP::deallocate()
+{
+  delete [] kxvecs;
+  delete [] kyvecs;
+  delete [] kzvecs;
+  
+  delete [] ug;
+  memory->destroy_2d_double_array(eg);
+  memory->destroy_2d_double_array(vg);
+
+  delete [] sfacrl;
+  delete [] sfacim;
+  delete [] sfacrl_all;
+  delete [] sfacim_all;
+}
+
+/* ----------------------------------------------------------------------
+   Slab-geometry correction term to dampen inter-slab interactions between
+   periodically repeating slabs.  Yields good approximation to 2-D Ewald if 
+   adequate empty space is left between repeating slabs (J. Chem. Phys. 
+   111, 3155).  Slabs defined here to be parallel to the xy plane. 
+------------------------------------------------------------------------- */
+
+void EwaldOMP::slabcorr(int eflag)
+{
+  // compute local contribution to global dipole moment
+  
+  double *q = atom->q;
+  double **x = atom->x;
+  int nlocal = atom->nlocal;
+
+  double dipole = 0.0;
+  int i,j;
+  for (j = 0; j < num_charged; j++) {
+    i = is_charged[j];
+    dipole += q[i]*x[i][2];
+  }
+
+  // sum local contributions to get global dipole moment
+
+  double dipole_all;
+  MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world);
+
+  // compute corrections
+  
+  double e_slabcorr = 2.0*PI*dipole_all*dipole_all/volume;
+  
+  if (eflag) energy += qqrd2e*e_slabcorr;
+
+  // add on force corrections
+
+  double ffact = -4.0*PI*dipole_all/volume; 
+  double **f = atom->f;
+
+  for (int i = 0; i < nlocal; i++) f[i][2] += qqrd2e*q[i]*ffact;
+}
+
+/* ----------------------------------------------------------------------
+   memory usage of local arrays 
+------------------------------------------------------------------------- */
+
+double EwaldOMP::memory_usage()
+{
+  double bytes = 3 * kmax3d * sizeof(int);
+  bytes += (1 + 3 + 6) * kmax3d * sizeof(double);
+  bytes += 4 * kmax3d * sizeof(double);
+  bytes += nmax*3 * sizeof(double);
+  bytes += 2 * (2*kmax+1)*3*nmax * sizeof(double);
+  bytes += nmax * sizeof(int);
+  return bytes;
+}
diff --git a/src/USER-OPENMP/ewald_omp.h b/src/USER-OPENMP/ewald_omp.h
new file mode 100644
index 000000000..4892c922f
--- /dev/null
+++ b/src/USER-OPENMP/ewald_omp.h
@@ -0,0 +1,63 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef KSPACE_CLASS
+
+KSpaceStyle(ewald/omp,EwaldOMP)
+
+#else
+
+#ifndef LMP_EWALD_OMP_H
+#define LMP_EWALD_OMP_H
+
+#include "kspace.h"
+
+namespace LAMMPS_NS {
+
+class EwaldOMP : public KSpace {
+ public:
+  EwaldOMP(class LAMMPS *, int, char **);
+  ~EwaldOMP();
+  void init();
+  void setup();
+  void compute(int, int);
+  double memory_usage();
+
+ private:
+  double PI;
+  double precision;
+  int kcount,kmax,kmax3d,kmax_created;
+  double qqrd2e;
+  double gsqmx,qsum,qsqsum,volume;
+  int nmax,num_charged;
+
+  double unitk[3];
+  int *kxvecs,*kyvecs,*kzvecs;
+  int *is_charged;
+  double *ug;
+  double **eg,**vg;
+  double **ek;
+  double *sfacrl,*sfacim,*sfacrl_all,*sfacim_all;
+  double ***cs,***sn;
+
+  void eik_dot_r();
+  void coeffs();
+  void allocate();
+  void deallocate();
+  void slabcorr(int);
+};
+
+}
+
+#endif
+#endif