diff --git a/src/angle.cpp b/src/angle.cpp
index b668171d7..99fd5c21a 100644
--- a/src/angle.cpp
+++ b/src/angle.cpp
@@ -1,234 +1,238 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "angle.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "math_const.h"
 #include "suffix.h"
 #include "atom_masks.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
 /* ---------------------------------------------------------------------- */
 
 Angle::Angle(LAMMPS *lmp) : Pointers(lmp)
 {
   energy = 0.0;
   writedata = 1;
 
   allocated = 0;
   suffix_flag = Suffix::NONE;
 
   maxeatom = maxvatom = 0;
   eatom = NULL;
   vatom = NULL;
   setflag = NULL;
 
   datamask = ALL_MASK;
   datamask_ext = ALL_MASK;
+
+  execution_space = Host;
+  datamask_read = ALL_MASK;
+  datamask_modify = ALL_MASK;
 }
 
 /* ---------------------------------------------------------------------- */
 
 Angle::~Angle()
 {
   memory->destroy(eatom);
   memory->destroy(vatom);
 }
 
 /* ----------------------------------------------------------------------
    check if all coeffs are set
 ------------------------------------------------------------------------- */
 
 void Angle::init()
 {
   if (!allocated && atom->nangletypes) 
     error->all(FLERR,"Angle coeffs are not set");
   for (int i = 1; i <= atom->nangletypes; i++)
     if (setflag[i] == 0) error->all(FLERR,"All angle coeffs are not set");
 
   init_style();
 }
 
 /* ----------------------------------------------------------------------
    setup for energy, virial computation
    see integrate::ev_set() for values of eflag (0-3) and vflag (0-6)
 ------------------------------------------------------------------------- */
 
 void Angle::ev_setup(int eflag, int vflag)
 {
   int i,n;
 
   evflag = 1;
 
   eflag_either = eflag;
   eflag_global = eflag % 2;
   eflag_atom = eflag / 2;
 
   vflag_either = vflag;
   vflag_global = vflag % 4;
   vflag_atom = vflag / 4;
 
   // reallocate per-atom arrays if necessary
 
   if (eflag_atom && atom->nmax > maxeatom) {
     maxeatom = atom->nmax;
     memory->destroy(eatom);
     memory->create(eatom,comm->nthreads*maxeatom,"bond:eatom");
   }
   if (vflag_atom && atom->nmax > maxvatom) {
     maxvatom = atom->nmax;
     memory->destroy(vatom);
     memory->create(vatom,comm->nthreads*maxvatom,6,"bond:vatom");
   }
 
   // zero accumulators
 
   if (eflag_global) energy = 0.0;
   if (vflag_global) for (i = 0; i < 6; i++) virial[i] = 0.0;
   if (eflag_atom) {
     n = atom->nlocal;
     if (force->newton_bond) n += atom->nghost;
     for (i = 0; i < n; i++) eatom[i] = 0.0;
   }
   if (vflag_atom) {
     n = atom->nlocal;
     if (force->newton_bond) n += atom->nghost;
     for (i = 0; i < n; i++) {
       vatom[i][0] = 0.0;
       vatom[i][1] = 0.0;
       vatom[i][2] = 0.0;
       vatom[i][3] = 0.0;
       vatom[i][4] = 0.0;
       vatom[i][5] = 0.0;
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    tally energy and virial into global and per-atom accumulators
    virial = r1F1 + r2F2 + r3F3 = (r1-r2) F1 + (r3-r2) F3 = del1*f1 + del2*f3
 ------------------------------------------------------------------------- */
 
 void Angle::ev_tally(int i, int j, int k, int nlocal, int newton_bond,
                      double eangle, double *f1, double *f3,
                      double delx1, double dely1, double delz1,
                      double delx2, double dely2, double delz2)
 {
   double eanglethird,v[6];
 
   if (eflag_either) {
     if (eflag_global) {
       if (newton_bond) energy += eangle;
       else {
         eanglethird = THIRD*eangle;
         if (i < nlocal) energy += eanglethird;
         if (j < nlocal) energy += eanglethird;
         if (k < nlocal) energy += eanglethird;
       }
     }
     if (eflag_atom) {
       eanglethird = THIRD*eangle;
       if (newton_bond || i < nlocal) eatom[i] += eanglethird;
       if (newton_bond || j < nlocal) eatom[j] += eanglethird;
       if (newton_bond || k < nlocal) eatom[k] += eanglethird;
     }
   }
 
   if (vflag_either) {
     v[0] = delx1*f1[0] + delx2*f3[0];
     v[1] = dely1*f1[1] + dely2*f3[1];
     v[2] = delz1*f1[2] + delz2*f3[2];
     v[3] = delx1*f1[1] + delx2*f3[1];
     v[4] = delx1*f1[2] + delx2*f3[2];
     v[5] = dely1*f1[2] + dely2*f3[2];
 
     if (vflag_global) {
       if (newton_bond) {
         virial[0] += v[0];
         virial[1] += v[1];
         virial[2] += v[2];
         virial[3] += v[3];
         virial[4] += v[4];
         virial[5] += v[5];
       } else {
         if (i < nlocal) {
           virial[0] += THIRD*v[0];
           virial[1] += THIRD*v[1];
           virial[2] += THIRD*v[2];
           virial[3] += THIRD*v[3];
           virial[4] += THIRD*v[4];
           virial[5] += THIRD*v[5];
         }
         if (j < nlocal) {
           virial[0] += THIRD*v[0];
           virial[1] += THIRD*v[1];
           virial[2] += THIRD*v[2];
           virial[3] += THIRD*v[3];
           virial[4] += THIRD*v[4];
           virial[5] += THIRD*v[5];
         }
         if (k < nlocal) {
           virial[0] += THIRD*v[0];
           virial[1] += THIRD*v[1];
           virial[2] += THIRD*v[2];
           virial[3] += THIRD*v[3];
           virial[4] += THIRD*v[4];
           virial[5] += THIRD*v[5];
         }
       }
     }
 
     if (vflag_atom) {
       if (newton_bond || i < nlocal) {
         vatom[i][0] += THIRD*v[0];
         vatom[i][1] += THIRD*v[1];
         vatom[i][2] += THIRD*v[2];
         vatom[i][3] += THIRD*v[3];
         vatom[i][4] += THIRD*v[4];
         vatom[i][5] += THIRD*v[5];
       }
       if (newton_bond || j < nlocal) {
         vatom[j][0] += THIRD*v[0];
         vatom[j][1] += THIRD*v[1];
         vatom[j][2] += THIRD*v[2];
         vatom[j][3] += THIRD*v[3];
         vatom[j][4] += THIRD*v[4];
         vatom[j][5] += THIRD*v[5];
       }
       if (newton_bond || k < nlocal) {
         vatom[k][0] += THIRD*v[0];
         vatom[k][1] += THIRD*v[1];
         vatom[k][2] += THIRD*v[2];
         vatom[k][3] += THIRD*v[3];
         vatom[k][4] += THIRD*v[4];
         vatom[k][5] += THIRD*v[5];
       }
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 double Angle::memory_usage()
 {
   double bytes = comm->nthreads*maxeatom * sizeof(double);
   bytes += comm->nthreads*maxvatom*6 * sizeof(double);
   return bytes;
 }
diff --git a/src/angle.h b/src/angle.h
index e29cfb880..3f05739c8 100644
--- a/src/angle.h
+++ b/src/angle.h
@@ -1,81 +1,85 @@
 /* -*- c++ -*- ----------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifndef LMP_ANGLE_H
 #define LMP_ANGLE_H
 
 #include "stdio.h"
 #include "pointers.h"
 
 namespace LAMMPS_NS {
 
 class Angle : protected Pointers {
   friend class ThrOMP;
   friend class FixOMP;
  public:
   int allocated;
   int *setflag;
   int writedata;                  // 1 if writes coeffs to data file
   double energy;                  // accumulated energies
   double virial[6];               // accumlated virial
   double *eatom,**vatom;          // accumulated per-atom energy/virial
   unsigned int datamask;
   unsigned int datamask_ext;
 
+  // KOKKOS host/device flag and data masks
+  ExecutionSpace execution_space;
+  unsigned int datamask_read,datamask_modify;
+
   Angle(class LAMMPS *);
   virtual ~Angle();
   virtual void init();
   virtual void compute(int, int) = 0;
   virtual void settings(int, char **) {}
   virtual void coeff(int, char **) = 0;
   virtual void init_style() {};
   virtual double equilibrium_angle(int) = 0;
   virtual void write_restart(FILE *) = 0;
   virtual void read_restart(FILE *) = 0;
   virtual void write_data(FILE *) {}
   virtual double single(int, int, int, int) = 0;
   virtual double memory_usage();
 
   virtual unsigned int data_mask() {return datamask;}
   virtual unsigned int data_mask_ext() {return datamask_ext;}
 
  protected:
   int suffix_flag;             // suffix compatibility flag
 
   int evflag;
   int eflag_either,eflag_global,eflag_atom;
   int vflag_either,vflag_global,vflag_atom;
   int maxeatom,maxvatom;
 
   void ev_setup(int, int);
   void ev_tally(int, int, int, int, int, double, double *, double *,
                 double, double, double, double, double, double);
 };
 
 }
 
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Angle coeffs are not set
 
 No angle coefficients have been assigned in the data file or via the
 angle_coeff command.
 
 E: All angle coeffs are not set
 
 All angle coefficients must be set in the data file or by the
 angle_coeff command before running a simulation.
 
 */
diff --git a/src/bond.cpp b/src/bond.cpp
index ec560b719..f4104a7b6 100644
--- a/src/bond.cpp
+++ b/src/bond.cpp
@@ -1,214 +1,218 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #include "string.h"
 #include "bond.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "suffix.h"
 #include "atom_masks.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
 /* -----------------------------------------------------------------------
    set bond contribution to Vdwl energy to 0.0
    a particular bond style can override this
 ------------------------------------------------------------------------- */
 
 Bond::Bond(LAMMPS *lmp) : Pointers(lmp)
 {
   energy = 0.0;
   writedata = 1;
 
   allocated = 0;
   suffix_flag = Suffix::NONE;
 
   maxeatom = maxvatom = 0;
   eatom = NULL;
   vatom = NULL;
   setflag = NULL;
 
   datamask = ALL_MASK;
   datamask_ext = ALL_MASK;
+
+  execution_space = Host;
+  datamask_read = ALL_MASK;
+  datamask_modify = ALL_MASK;
 }
 
 /* ---------------------------------------------------------------------- */
 
 Bond::~Bond()
 {
   memory->destroy(eatom);
   memory->destroy(vatom);
 }
 
 /* ----------------------------------------------------------------------
    check if all coeffs are set
 ------------------------------------------------------------------------- */
 
 void Bond::init()
 {
   if (!allocated && atom->nbondtypes) 
     error->all(FLERR,"Bond coeffs are not set");
   for (int i = 1; i <= atom->nbondtypes; i++)
     if (setflag[i] == 0) error->all(FLERR,"All bond coeffs are not set");
   init_style();
 }
 
 /* ----------------------------------------------------------------------
    setup for energy, virial computation
    see integrate::ev_set() for values of eflag (0-3) and vflag (0-6)
 ------------------------------------------------------------------------- */
 
 void Bond::ev_setup(int eflag, int vflag)
 {
   int i,n;
 
   evflag = 1;
 
   eflag_either = eflag;
   eflag_global = eflag % 2;
   eflag_atom = eflag / 2;
 
   vflag_either = vflag;
   vflag_global = vflag % 4;
   vflag_atom = vflag / 4;
 
   // reallocate per-atom arrays if necessary
 
   if (eflag_atom && atom->nmax > maxeatom) {
     maxeatom = atom->nmax;
     memory->destroy(eatom);
     memory->create(eatom,comm->nthreads*maxeatom,"bond:eatom");
   }
   if (vflag_atom && atom->nmax > maxvatom) {
     maxvatom = atom->nmax;
     memory->destroy(vatom);
     memory->create(vatom,comm->nthreads*maxvatom,6,"bond:vatom");
   }
 
   // zero accumulators
 
   if (eflag_global) energy = 0.0;
   if (vflag_global) for (i = 0; i < 6; i++) virial[i] = 0.0;
   if (eflag_atom) {
     n = atom->nlocal;
     if (force->newton_bond) n += atom->nghost;
     for (i = 0; i < n; i++) eatom[i] = 0.0;
   }
   if (vflag_atom) {
     n = atom->nlocal;
     if (force->newton_bond) n += atom->nghost;
     for (i = 0; i < n; i++) {
       vatom[i][0] = 0.0;
       vatom[i][1] = 0.0;
       vatom[i][2] = 0.0;
       vatom[i][3] = 0.0;
       vatom[i][4] = 0.0;
       vatom[i][5] = 0.0;
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    tally energy and virial into global and per-atom accumulators
 ------------------------------------------------------------------------- */
 
 void Bond::ev_tally(int i, int j, int nlocal, int newton_bond,
                     double ebond, double fbond,
                     double delx, double dely, double delz)
 {
   double ebondhalf,v[6];
 
   if (eflag_either) {
     if (eflag_global) {
       if (newton_bond) energy += ebond;
       else {
         ebondhalf = 0.5*ebond;
         if (i < nlocal) energy += ebondhalf;
         if (j < nlocal) energy += ebondhalf;
       }
     }
     if (eflag_atom) {
       ebondhalf = 0.5*ebond;
       if (newton_bond || i < nlocal) eatom[i] += ebondhalf;
       if (newton_bond || j < nlocal) eatom[j] += ebondhalf;
     }
   }
 
   if (vflag_either) {
     v[0] = delx*delx*fbond;
     v[1] = dely*dely*fbond;
     v[2] = delz*delz*fbond;
     v[3] = delx*dely*fbond;
     v[4] = delx*delz*fbond;
     v[5] = dely*delz*fbond;
 
     if (vflag_global) {
       if (newton_bond) {
         virial[0] += v[0];
         virial[1] += v[1];
         virial[2] += v[2];
         virial[3] += v[3];
         virial[4] += v[4];
         virial[5] += v[5];
       } else {
         if (i < nlocal) {
           virial[0] += 0.5*v[0];
           virial[1] += 0.5*v[1];
           virial[2] += 0.5*v[2];
           virial[3] += 0.5*v[3];
           virial[4] += 0.5*v[4];
           virial[5] += 0.5*v[5];
         }
         if (j < nlocal) {
           virial[0] += 0.5*v[0];
           virial[1] += 0.5*v[1];
           virial[2] += 0.5*v[2];
           virial[3] += 0.5*v[3];
           virial[4] += 0.5*v[4];
           virial[5] += 0.5*v[5];
         }
       }
     }
 
     if (vflag_atom) {
       if (newton_bond || i < nlocal) {
         vatom[i][0] += 0.5*v[0];
         vatom[i][1] += 0.5*v[1];
         vatom[i][2] += 0.5*v[2];
         vatom[i][3] += 0.5*v[3];
         vatom[i][4] += 0.5*v[4];
         vatom[i][5] += 0.5*v[5];
       }
       if (newton_bond || j < nlocal) {
         vatom[j][0] += 0.5*v[0];
         vatom[j][1] += 0.5*v[1];
         vatom[j][2] += 0.5*v[2];
         vatom[j][3] += 0.5*v[3];
         vatom[j][4] += 0.5*v[4];
         vatom[j][5] += 0.5*v[5];
       }
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 double Bond::memory_usage()
 {
   double bytes = comm->nthreads*maxeatom * sizeof(double);
   bytes += comm->nthreads*maxvatom*6 * sizeof(double);
   return bytes;
 }
diff --git a/src/bond.h b/src/bond.h
index 988a505d8..a802142f4 100644
--- a/src/bond.h
+++ b/src/bond.h
@@ -1,80 +1,84 @@
 /* -*- c++ -*- ----------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifndef LMP_BOND_H
 #define LMP_BOND_H
 
 #include "stdio.h"
 #include "pointers.h"
 
 namespace LAMMPS_NS {
 
 class Bond : protected Pointers {
   friend class ThrOMP;
   friend class FixOMP;
  public:
   int allocated;
   int *setflag;
   int writedata;                  // 1 if writes coeffs to data file
   double energy;                  // accumulated energies
   double virial[6];               // accumlated virial
   double *eatom,**vatom;          // accumulated per-atom energy/virial
   unsigned int datamask;
   unsigned int datamask_ext;
 
+  // KOKKOS host/device flag and data masks
+  ExecutionSpace execution_space;
+  unsigned int datamask_read,datamask_modify;
+
   Bond(class LAMMPS *);
   virtual ~Bond();
   virtual void init();
   virtual void init_style() {}
   virtual void compute(int, int) = 0;
   virtual void settings(int, char **) {}
   virtual void coeff(int, char **) = 0;
   virtual double equilibrium_distance(int) = 0;
   virtual void write_restart(FILE *) = 0;
   virtual void read_restart(FILE *) = 0;
   virtual void write_data(FILE *) {}
   virtual double single(int, double, int, int, double &) = 0;
   virtual double memory_usage();
 
   virtual unsigned int data_mask() {return datamask;}
   virtual unsigned int data_mask_ext() {return datamask_ext;}
 
  protected:
   int suffix_flag;             // suffix compatibility flag
 
   int evflag;
   int eflag_either,eflag_global,eflag_atom;
   int vflag_either,vflag_global,vflag_atom;
   int maxeatom,maxvatom;
 
   void ev_setup(int, int);
   void ev_tally(int, int, int, int, double, double, double, double, double);
 };
 
 }
 
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Bond coeffs are not set
 
 No bond coefficients have been assigned in the data file or via the
 bond_coeff command.
 
 E: All bond coeffs are not set
 
 All bond coefficients must be set in the data file or by the
 bond_coeff command before running a simulation.
 
 */
diff --git a/src/dihedral.cpp b/src/dihedral.cpp
index 26976d580..d6e24c6ef 100644
--- a/src/dihedral.cpp
+++ b/src/dihedral.cpp
@@ -1,256 +1,260 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "dihedral.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "pair.h"
 #include "suffix.h"
 #include "atom_masks.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
 /* ----------------------------------------------------------------------
    set dihedral contribution to Vdwl and Coulombic energy to 0.0
    DihedralCharmm will override this
 ------------------------------------------------------------------------- */
 
 Dihedral::Dihedral(LAMMPS *lmp) : Pointers(lmp)
 {
   energy = 0.0;
   writedata = 0;
 
   allocated = 0;
 
   maxeatom = maxvatom = 0;
   eatom = NULL;
   vatom = NULL;
   setflag = NULL;
 
   datamask = ALL_MASK;
   datamask_ext = ALL_MASK;
+
+  execution_space = Host;
+  datamask_read = ALL_MASK;
+  datamask_modify = ALL_MASK;
 }
 
 /* ---------------------------------------------------------------------- */
 
 Dihedral::~Dihedral()
 {
   memory->destroy(eatom);
   memory->destroy(vatom);
 }
 
 /* ----------------------------------------------------------------------
    check if all coeffs are set
 ------------------------------------------------------------------------- */
 
 void Dihedral::init()
 {
   if (!allocated && atom->ndihedraltypes) 
     error->all(FLERR,"Dihedral coeffs are not set");
   for (int i = 1; i <= atom->ndihedraltypes; i++)
     if (setflag[i] == 0) error->all(FLERR,"All dihedral coeffs are not set");
   init_style();
 }
 
 /* ----------------------------------------------------------------------
    setup for energy, virial computation
    see integrate::ev_set() for values of eflag (0-3) and vflag (0-6)
 ------------------------------------------------------------------------- */
 
 void Dihedral::ev_setup(int eflag, int vflag)
 {
   int i,n;
 
   evflag = 1;
 
   eflag_either = eflag;
   eflag_global = eflag % 2;
   eflag_atom = eflag / 2;
 
   vflag_either = vflag;
   vflag_global = vflag % 4;
   vflag_atom = vflag / 4;
 
   // reallocate per-atom arrays if necessary
 
   if (eflag_atom && atom->nmax > maxeatom) {
     maxeatom = atom->nmax;
     memory->destroy(eatom);
     memory->create(eatom,comm->nthreads*maxeatom,"bond:eatom");
   }
   if (vflag_atom && atom->nmax > maxvatom) {
     maxvatom = atom->nmax;
     memory->destroy(vatom);
     memory->create(vatom,comm->nthreads*maxvatom,6,"bond:vatom");
   }
 
   // zero accumulators
 
   if (eflag_global) energy = 0.0;
   if (vflag_global) for (i = 0; i < 6; i++) virial[i] = 0.0;
   if (eflag_atom) {
     n = atom->nlocal;
     if (force->newton_bond) n += atom->nghost;
     for (i = 0; i < n; i++) eatom[i] = 0.0;
   }
   if (vflag_atom) {
     n = atom->nlocal;
     if (force->newton_bond) n += atom->nghost;
     for (i = 0; i < n; i++) {
       vatom[i][0] = 0.0;
       vatom[i][1] = 0.0;
       vatom[i][2] = 0.0;
       vatom[i][3] = 0.0;
       vatom[i][4] = 0.0;
       vatom[i][5] = 0.0;
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    tally energy and virial into global and per-atom accumulators
    virial = r1F1 + r2F2 + r3F3 + r4F4 = (r1-r2) F1 + (r3-r2) F3 + (r4-r2) F4
           = (r1-r2) F1 + (r3-r2) F3 + (r4-r3 + r3-r2) F4
           = vb1*f1 + vb2*f3 + (vb3+vb2)*f4
 ------------------------------------------------------------------------- */
 
 void Dihedral::ev_tally(int i1, int i2, int i3, int i4,
                         int nlocal, int newton_bond,
                         double edihedral, double *f1, double *f3, double *f4,
                         double vb1x, double vb1y, double vb1z,
                         double vb2x, double vb2y, double vb2z,
                         double vb3x, double vb3y, double vb3z)
 {
   double edihedralquarter,v[6];
 
   if (eflag_either) {
     if (eflag_global) {
       if (newton_bond) energy += edihedral;
       else {
         edihedralquarter = 0.25*edihedral;
         if (i1 < nlocal) energy += edihedralquarter;
         if (i2 < nlocal) energy += edihedralquarter;
         if (i3 < nlocal) energy += edihedralquarter;
         if (i4 < nlocal) energy += edihedralquarter;
       }
     }
     if (eflag_atom) {
       edihedralquarter = 0.25*edihedral;
       if (newton_bond || i1 < nlocal) eatom[i1] += edihedralquarter;
       if (newton_bond || i2 < nlocal) eatom[i2] += edihedralquarter;
       if (newton_bond || i3 < nlocal) eatom[i3] += edihedralquarter;
       if (newton_bond || i4 < nlocal) eatom[i4] += edihedralquarter;
     }
   }
 
   if (vflag_either) {
     v[0] = vb1x*f1[0] + vb2x*f3[0] + (vb3x+vb2x)*f4[0];
     v[1] = vb1y*f1[1] + vb2y*f3[1] + (vb3y+vb2y)*f4[1];
     v[2] = vb1z*f1[2] + vb2z*f3[2] + (vb3z+vb2z)*f4[2];
     v[3] = vb1x*f1[1] + vb2x*f3[1] + (vb3x+vb2x)*f4[1];
     v[4] = vb1x*f1[2] + vb2x*f3[2] + (vb3x+vb2x)*f4[2];
     v[5] = vb1y*f1[2] + vb2y*f3[2] + (vb3y+vb2y)*f4[2];
 
     if (vflag_global) {
       if (newton_bond) {
         virial[0] += v[0];
         virial[1] += v[1];
         virial[2] += v[2];
         virial[3] += v[3];
         virial[4] += v[4];
         virial[5] += v[5];
       } else {
         if (i1 < nlocal) {
           virial[0] += 0.25*v[0];
           virial[1] += 0.25*v[1];
           virial[2] += 0.25*v[2];
           virial[3] += 0.25*v[3];
           virial[4] += 0.25*v[4];
           virial[5] += 0.25*v[5];
         }
         if (i2 < nlocal) {
           virial[0] += 0.25*v[0];
           virial[1] += 0.25*v[1];
           virial[2] += 0.25*v[2];
           virial[3] += 0.25*v[3];
           virial[4] += 0.25*v[4];
           virial[5] += 0.25*v[5];
         }
         if (i3 < nlocal) {
           virial[0] += 0.25*v[0];
           virial[1] += 0.25*v[1];
           virial[2] += 0.25*v[2];
           virial[3] += 0.25*v[3];
           virial[4] += 0.25*v[4];
           virial[5] += 0.25*v[5];
         }
         if (i4 < nlocal) {
           virial[0] += 0.25*v[0];
           virial[1] += 0.25*v[1];
           virial[2] += 0.25*v[2];
           virial[3] += 0.25*v[3];
           virial[4] += 0.25*v[4];
           virial[5] += 0.25*v[5];
         }
       }
     }
 
     if (vflag_atom) {
       if (newton_bond || i1 < nlocal) {
         vatom[i1][0] += 0.25*v[0];
         vatom[i1][1] += 0.25*v[1];
         vatom[i1][2] += 0.25*v[2];
         vatom[i1][3] += 0.25*v[3];
         vatom[i1][4] += 0.25*v[4];
         vatom[i1][5] += 0.25*v[5];
       }
       if (newton_bond || i2 < nlocal) {
         vatom[i2][0] += 0.25*v[0];
         vatom[i2][1] += 0.25*v[1];
         vatom[i2][2] += 0.25*v[2];
         vatom[i2][3] += 0.25*v[3];
         vatom[i2][4] += 0.25*v[4];
         vatom[i2][5] += 0.25*v[5];
       }
       if (newton_bond || i3 < nlocal) {
         vatom[i3][0] += 0.25*v[0];
         vatom[i3][1] += 0.25*v[1];
         vatom[i3][2] += 0.25*v[2];
         vatom[i3][3] += 0.25*v[3];
         vatom[i3][4] += 0.25*v[4];
         vatom[i3][5] += 0.25*v[5];
       }
       if (newton_bond || i4 < nlocal) {
         vatom[i4][0] += 0.25*v[0];
         vatom[i4][1] += 0.25*v[1];
         vatom[i4][2] += 0.25*v[2];
         vatom[i4][3] += 0.25*v[3];
         vatom[i4][4] += 0.25*v[4];
         vatom[i4][5] += 0.25*v[5];
       }
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 double Dihedral::memory_usage()
 {
   double bytes = comm->nthreads*maxeatom * sizeof(double);
   bytes += comm->nthreads*maxvatom*6 * sizeof(double);
   return bytes;
 }
diff --git a/src/dihedral.h b/src/dihedral.h
index 07c2b3b55..5d8c25060 100644
--- a/src/dihedral.h
+++ b/src/dihedral.h
@@ -1,80 +1,84 @@
 /* -*- c++ -*- ----------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifndef LMP_DIHEDRAL_H
 #define LMP_DIHEDRAL_H
 
 #include "stdio.h"
 #include "pointers.h"
 
 namespace LAMMPS_NS {
 
 class Dihedral : protected Pointers {
   friend class ThrOMP;
   friend class FixOMP;
  public:
   int allocated;
   int *setflag;
   int writedata;                     // 1 if writes coeffs to data file
   double energy;                     // accumulated energy
   double virial[6];                  // accumlated virial
   double *eatom,**vatom;             // accumulated per-atom energy/virial
   unsigned int datamask;
   unsigned int datamask_ext;
 
+  // KOKKOS host/device flag and data masks
+  ExecutionSpace execution_space;
+  unsigned int datamask_read,datamask_modify;
+
   Dihedral(class LAMMPS *);
   virtual ~Dihedral();
   virtual void init();
   virtual void init_style() {}
   virtual void compute(int, int) = 0;
   virtual void settings(int, char **) {}
   virtual void coeff(int, char **) = 0;
   virtual void write_restart(FILE *) = 0;
   virtual void read_restart(FILE *) = 0;
   virtual void write_data(FILE *) {}
   virtual double memory_usage();
 
   virtual unsigned int data_mask() {return datamask;}
   virtual unsigned int data_mask_ext() {return datamask_ext;}
 
  protected:
   int suffix_flag;             // suffix compatibility flag
 
   int evflag;
   int eflag_either,eflag_global,eflag_atom;
   int vflag_either,vflag_global,vflag_atom;
   int maxeatom,maxvatom;
 
   void ev_setup(int, int);
   void ev_tally(int, int, int, int, int, int, double,
                 double *, double *, double *, double, double, double,
                 double, double, double, double, double, double);
 };
 
 }
 
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Dihedral coeffs are not set
 
 No dihedral coefficients have been assigned in the data file or via
 the dihedral_coeff command.
 
 E: All dihedral coeffs are not set
 
 All dihedral coefficients must be set in the data file or by the
 dihedral_coeff command before running a simulation.
 
 */
diff --git a/src/fix_langevin.cpp b/src/fix_langevin.cpp
index bf14e5a54..0bc920666 100644
--- a/src/fix_langevin.cpp
+++ b/src/fix_langevin.cpp
@@ -1,916 +1,916 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing authors: Carolyn Phillips (U Mich), reservoir energy tally
                          Aidan Thompson (SNL) GJF formulation
 ------------------------------------------------------------------------- */
 
 #include "mpi.h"
 #include "math.h"
 #include "string.h"
 #include "stdlib.h"
 #include "fix_langevin.h"
 #include "math_extra.h"
 #include "atom.h"
 #include "atom_vec_ellipsoid.h"
 #include "force.h"
 #include "update.h"
 #include "modify.h"
 #include "compute.h"
 #include "domain.h"
 #include "region.h"
 #include "respa.h"
 #include "comm.h"
 #include "input.h"
 #include "variable.h"
 #include "random_mars.h"
 #include "memory.h"
 #include "error.h"
 #include "group.h"
 
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
 enum{NOBIAS,BIAS};
 enum{CONSTANT,EQUAL,ATOM};
 
 #define SINERTIA 0.4          // moment of inertia prefactor for sphere
 #define EINERTIA 0.2          // moment of inertia prefactor for ellipsoid
 
 /* ---------------------------------------------------------------------- */
 
 FixLangevin::FixLangevin(LAMMPS *lmp, int narg, char **arg) :
   Fix(lmp, narg, arg)
 {
   if (narg < 7) error->all(FLERR,"Illegal fix langevin command");
 
   scalar_flag = 1;
   global_freq = 1;
   extscalar = 1;
   nevery = 1;
 
   tstr = NULL;
   if (strstr(arg[3],"v_") == arg[3]) {
     int n = strlen(&arg[3][2]) + 1;
     tstr = new char[n];
     strcpy(tstr,&arg[3][2]);
   } else {
     t_start = force->numeric(FLERR,arg[3]);
     t_target = t_start;
     tstyle = CONSTANT;
   }
 
   t_stop = force->numeric(FLERR,arg[4]);
   t_period = force->numeric(FLERR,arg[5]);
-  int seed = force->inumeric(FLERR,arg[6]);
+  seed = force->inumeric(FLERR,arg[6]);
 
   if (t_period <= 0.0) error->all(FLERR,"Fix langevin period must be > 0.0");
   if (seed <= 0) error->all(FLERR,"Illegal fix langevin command");
 
   // initialize Marsaglia RNG with processor-unique seed
 
   random = new RanMars(lmp,seed + comm->me);
 
   // allocate per-type arrays for force prefactors
 
   gfactor1 = new double[atom->ntypes+1];
   gfactor2 = new double[atom->ntypes+1];
   ratio = new double[atom->ntypes+1];
 
   // optional args
 
   for (int i = 1; i <= atom->ntypes; i++) ratio[i] = 1.0;
   ascale = 0.0;
   gjfflag = 0;
   oflag = 0;
   tallyflag = 0;
   zeroflag = 0;
 
   int iarg = 7;
   while (iarg < narg) {
     if (strcmp(arg[iarg],"angmom") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix langevin command");
       if (strcmp(arg[iarg+1],"no") == 0) ascale = 0.0;
       else ascale = force->numeric(FLERR,arg[iarg+1]);
       iarg += 2;
     } else if (strcmp(arg[iarg],"gjf") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix langevin command");
       if (strcmp(arg[iarg+1],"no") == 0) gjfflag = 0;
       else if (strcmp(arg[iarg+1],"yes") == 0) gjfflag = 1;
       else error->all(FLERR,"Illegal fix langevin command");
       iarg += 2;
     } else if (strcmp(arg[iarg],"omega") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix langevin command");
       if (strcmp(arg[iarg+1],"no") == 0) oflag = 0;
       else if (strcmp(arg[iarg+1],"yes") == 0) oflag = 1;
       else error->all(FLERR,"Illegal fix langevin command");
       iarg += 2;
     } else if (strcmp(arg[iarg],"scale") == 0) {
       if (iarg+3 > narg) error->all(FLERR,"Illegal fix langevin command");
       int itype = force->inumeric(FLERR,arg[iarg+1]);
       double scale = force->numeric(FLERR,arg[iarg+2]);
       if (itype <= 0 || itype > atom->ntypes)
         error->all(FLERR,"Illegal fix langevin command");
       ratio[itype] = scale;
       iarg += 3;
     } else if (strcmp(arg[iarg],"tally") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix langevin command");
       if (strcmp(arg[iarg+1],"no") == 0) tallyflag = 0;
       else if (strcmp(arg[iarg+1],"yes") == 0) tallyflag = 1;
       else error->all(FLERR,"Illegal fix langevin command");
       iarg += 2;
     } else if (strcmp(arg[iarg],"zero") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix langevin command");
       if (strcmp(arg[iarg+1],"no") == 0) zeroflag = 0;
       else if (strcmp(arg[iarg+1],"yes") == 0) zeroflag = 1;
       else error->all(FLERR,"Illegal fix langevin command");
       iarg += 2;
     } else error->all(FLERR,"Illegal fix langevin command");
   }
 
   // set temperature = NULL, user can override via fix_modify if wants bias
 
   id_temp = NULL;
   temperature = NULL;
 
   // flangevin is unallocated until first call to setup()
   // compute_scalar checks for this and returns 0.0 if flangevin is NULL
 
   energy = 0.0;
   flangevin = NULL;
   franprev = NULL;
   tforce = NULL;
   maxatom1 = maxatom2 = 0;
 
   // Setup atom-based array for franprev
   // register with Atom class
   // No need to set peratom_flag
   // as this data is for internal use only
 
   if (gjfflag) {
     nvalues = 3;
     grow_arrays(atom->nmax);
     atom->add_callback(0);
 
   // initialize franprev to zero
 
     int nlocal = atom->nlocal;
     for (int i = 0; i < nlocal; i++) {
       franprev[i][0] = 0.0;
       franprev[i][1] = 0.0;
       franprev[i][2] = 0.0;
     }
   }
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 FixLangevin::~FixLangevin()
 {
   delete random;
   delete [] tstr;
   delete [] gfactor1;
   delete [] gfactor2;
   delete [] ratio;
   delete [] id_temp;
   memory->destroy(flangevin);
   memory->destroy(tforce);
 
   if (gjfflag) {
     memory->destroy(franprev);
     atom->delete_callback(id,0);
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 int FixLangevin::setmask()
 {
   int mask = 0;
   mask |= POST_FORCE;
   mask |= POST_FORCE_RESPA;
   mask |= END_OF_STEP;
   mask |= THERMO_ENERGY;
   return mask;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixLangevin::init()
 {
   if (oflag && !atom->sphere_flag)
     error->all(FLERR,"Fix langevin omega requires atom style sphere");
   if (ascale && !atom->ellipsoid_flag)
     error->all(FLERR,"Fix langevin angmom requires atom style ellipsoid");
 
   // check variable
 
   if (tstr) {
     tvar = input->variable->find(tstr);
     if (tvar < 0)
       error->all(FLERR,"Variable name for fix langevin does not exist");
     if (input->variable->equalstyle(tvar)) tstyle = EQUAL;
     else if (input->variable->atomstyle(tvar)) tstyle = ATOM;
     else error->all(FLERR,"Variable for fix langevin is invalid style");
   }
 
   // if oflag or ascale set, check that all group particles are finite-size
 
   if (oflag) {
     double *radius = atom->radius;
     int *mask = atom->mask;
     int nlocal = atom->nlocal;
 
     for (int i = 0; i < nlocal; i++)
       if (mask[i] & groupbit)
         if (radius[i] == 0.0)
           error->one(FLERR,"Fix langevin omega requires extended particles");
   }
 
   if (ascale) {
     avec = (AtomVecEllipsoid *) atom->style_match("ellipsoid");
     if (!avec)
       error->all(FLERR,"Fix langevin angmom requires atom style ellipsoid");
 
     int *ellipsoid = atom->ellipsoid;
     int *mask = atom->mask;
     int nlocal = atom->nlocal;
 
     for (int i = 0; i < nlocal; i++)
       if (mask[i] & groupbit)
         if (ellipsoid[i] < 0)
           error->one(FLERR,"Fix langevin angmom requires extended particles");
   }
 
   // set force prefactors
 
   if (!atom->rmass) {
     for (int i = 1; i <= atom->ntypes; i++) {
       gfactor1[i] = -atom->mass[i] / t_period / force->ftm2v;
       gfactor2[i] = sqrt(atom->mass[i]) *
         sqrt(24.0*force->boltz/t_period/update->dt/force->mvv2e) /
         force->ftm2v;
       gfactor1[i] *= 1.0/ratio[i];
       gfactor2[i] *= 1.0/sqrt(ratio[i]);
     }
   }
 
   if (temperature && temperature->tempbias) tbiasflag = BIAS;
   else tbiasflag = NOBIAS;
 
   if (strstr(update->integrate_style,"respa"))
     nlevels_respa = ((Respa *) update->integrate)->nlevels;
 
   if (gjfflag) gjffac = 1.0/(1.0+update->dt/2.0/t_period);
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixLangevin::setup(int vflag)
 {
   if (strstr(update->integrate_style,"verlet"))
     post_force(vflag);
   else {
     ((Respa *) update->integrate)->copy_flevel_f(nlevels_respa-1);
     post_force_respa(vflag,nlevels_respa-1,0);
     ((Respa *) update->integrate)->copy_f_flevel(nlevels_respa-1);
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixLangevin::post_force(int vflag)
 {
   double *rmass = atom->rmass;
 
   // enumerate all 2^6 possibilities for template parameters
   // this avoids testing them inside inner loop: 
   // TSTYLEATOM, GJF, TALLY, BIAS, RMASS, ZERO
 
 #ifdef TEMPLATED_FIX_LANGEVIN
   if (tstyle == ATOM)
     if (gjfflag)
       if (tallyflag)
 	if (tbiasflag == BIAS)
 	  if (rmass)
 	    if (zeroflag) post_force_templated<1,1,1,1,1,1>();
             else          post_force_templated<1,1,1,1,1,0>();
 	  else
 	    if (zeroflag) post_force_templated<1,1,1,1,0,1>();
             else          post_force_templated<1,1,1,1,0,0>();
 	else
 	  if (rmass)
 	    if (zeroflag) post_force_templated<1,1,1,0,1,1>();
 	    else          post_force_templated<1,1,1,0,1,0>();
 	  else
 	    if (zeroflag) post_force_templated<1,1,1,0,0,1>();
 	    else          post_force_templated<1,1,1,0,0,0>();
       else
 	if (tbiasflag == BIAS)
 	  if (rmass)
 	    if (zeroflag) post_force_templated<1,1,0,1,1,1>();
 	    else          post_force_templated<1,1,0,1,1,0>();
 	  else
 	    if (zeroflag) post_force_templated<1,1,0,1,0,1>();
 	    else          post_force_templated<1,1,0,1,0,0>();
 	else
 	  if (rmass)
 	    if (zeroflag) post_force_templated<1,1,0,0,1,1>();
 	    else          post_force_templated<1,1,0,0,1,0>();
 	  else
 	    if (zeroflag) post_force_templated<1,1,0,0,0,1>();
 	    else          post_force_templated<1,1,0,0,0,0>();
     else
       if (tallyflag)
 	if (tbiasflag == BIAS)
 	  if (rmass)
 	    if (zeroflag) post_force_templated<1,0,1,1,1,1>();
 	    else          post_force_templated<1,0,1,1,1,0>();
 	  else
 	    if (zeroflag) post_force_templated<1,0,1,1,0,1>();
 	    else          post_force_templated<1,0,1,1,0,0>();
 	else
 	  if (rmass)
 	    if (zeroflag) post_force_templated<1,0,1,0,1,1>();
 	    else          post_force_templated<1,0,1,0,1,0>();
 	  else
 	    if (zeroflag) post_force_templated<1,0,1,0,0,1>();
 	    else          post_force_templated<1,0,1,0,0,0>();
       else
 	if (tbiasflag == BIAS)
 	  if (rmass)
 	    if (zeroflag) post_force_templated<1,0,0,1,1,1>();
 	    else          post_force_templated<1,0,0,1,1,0>();
 	  else
 	    if (zeroflag) post_force_templated<1,0,0,1,0,1>();
 	    else          post_force_templated<1,0,0,1,0,0>();
 	else
 	  if (rmass)
 	    if (zeroflag) post_force_templated<1,0,0,0,1,1>();
 	    else          post_force_templated<1,0,0,0,1,0>();
 	  else
 	    if (zeroflag) post_force_templated<1,0,0,0,0,1>();
 	    else          post_force_templated<1,0,0,0,0,0>();
   else
     if (gjfflag)
       if (tallyflag)
 	if (tbiasflag == BIAS)
 	  if (rmass)
 	    if (zeroflag) post_force_templated<0,1,1,1,1,1>();
 	    else          post_force_templated<0,1,1,1,1,0>();
 	  else
 	    if (zeroflag) post_force_templated<0,1,1,1,0,1>();
 	    else          post_force_templated<0,1,1,1,0,0>();
 	else
 	  if (rmass)
 	    if (zeroflag) post_force_templated<0,1,1,0,1,1>();
 	    else          post_force_templated<0,1,1,0,1,0>();
 	  else
 	    if (zeroflag) post_force_templated<0,1,1,0,0,1>();
 	    else          post_force_templated<0,1,1,0,0,0>();
       else
 	if (tbiasflag == BIAS)
 	  if (rmass)
 	    if (zeroflag) post_force_templated<0,1,0,1,1,1>();
 	    else          post_force_templated<0,1,0,1,1,0>();
 	  else
 	    if (zeroflag) post_force_templated<0,1,0,1,0,1>();
 	    else          post_force_templated<0,1,0,1,0,0>();
 	else
 	  if (rmass)
 	    if (zeroflag) post_force_templated<0,1,0,0,1,1>();
 	    else          post_force_templated<0,1,0,0,1,0>();
 	  else
 	    if (zeroflag) post_force_templated<0,1,0,0,0,1>();
 	    else          post_force_templated<0,1,0,0,0,0>();
     else
       if (tallyflag)
 	if (tbiasflag == BIAS)
 	  if (rmass)
 	    if (zeroflag) post_force_templated<0,0,1,1,1,1>();
 	    else          post_force_templated<0,0,1,1,1,0>();
 	  else
 	    if (zeroflag) post_force_templated<0,0,1,1,0,1>();
 	    else          post_force_templated<0,0,1,1,0,0>();
 	else
 	  if (rmass)
 	    if (zeroflag) post_force_templated<0,0,1,0,1,1>();
 	    else          post_force_templated<0,0,1,0,1,0>();
 	  else
 	    if (zeroflag) post_force_templated<0,0,1,0,0,1>();
 	    else          post_force_templated<0,0,1,0,0,0>();
       else
 	if (tbiasflag == BIAS)
 	  if (rmass)
 	    if (zeroflag) post_force_templated<0,0,0,1,1,1>();
 	    else          post_force_templated<0,0,0,1,1,0>();
 	  else
 	    if (zeroflag) post_force_templated<0,0,0,1,0,1>();
 	    else          post_force_templated<0,0,0,1,0,0>();
 	else
 	  if (rmass)
 	    if (zeroflag) post_force_templated<0,0,0,0,1,1>();
 	    else          post_force_templated<0,0,0,0,1,0>();
 	  else
 	    if (zeroflag) post_force_templated<0,0,0,0,0,1>();
 	    else          post_force_templated<0,0,0,0,0,0>();
 #else
   post_force_untemplated(int(tstyle==ATOM), gjfflag, tallyflag, 
 			 int(tbiasflag==BIAS), int(rmass!=NULL), zeroflag);
 #endif
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixLangevin::post_force_respa(int vflag, int ilevel, int iloop)
 {
   if (ilevel == nlevels_respa-1) post_force(vflag);
 }
 
 /* ----------------------------------------------------------------------
    modify forces using one of the many Langevin styles
 ------------------------------------------------------------------------- */
 
 #ifdef TEMPLATED_FIX_LANGEVIN
 template < int Tp_TSTYLEATOM, int Tp_GJF, int Tp_TALLY, 
 	   int Tp_BIAS, int Tp_RMASS, int Tp_ZERO >
 void FixLangevin::post_force_templated()
 #else
 void FixLangevin::post_force_untemplated
   (int Tp_TSTYLEATOM, int Tp_GJF, int Tp_TALLY, 
    int Tp_BIAS, int Tp_RMASS, int Tp_ZERO)
 #endif
 {
   double gamma1,gamma2;
 
   double **v = atom->v;
   double **f = atom->f;
   double *rmass = atom->rmass;
   int *type = atom->type;
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
 
   // apply damping and thermostat to atoms in group
 
   // for Tp_TSTYLEATOM:
   //   use per-atom per-coord target temperature
   // for Tp_GJF:
   //   use Gronbech-Jensen/Farago algorithm
   //   else use regular algorithm
   // for Tp_TALLY:
   //   store drag plus random forces in flangevin[nlocal][3]
   // for Tp_BIAS:
   //   calculate temperature since some computes require temp
   //   computed on current nlocal atoms to remove bias
   //   test v = 0 since some computes mask non-participating atoms via v = 0
   //   and added force has extra term not multiplied by v = 0
   // for Tp_RMASS:
   //   use per-atom masses
   //   else use per-type masses
   // for Tp_ZERO:
   //   sum random force over all atoms in group
   //   subtract sum/count from each atom in group
 
   double fdrag[3],fran[3],fsum[3],fsumall[3];
   bigint count;
   double fswap;
 
   double boltz = force->boltz;
   double dt = update->dt;
   double mvv2e = force->mvv2e;
   double ftm2v = force->ftm2v;
 
   compute_target();
 
   if (Tp_ZERO) {
     fsum[0] = fsum[1] = fsum[2] = 0.0;
     count = group->count(igroup);
     if (count == 0)
       error->all(FLERR,"Cannot zero Langevin force of 0 atoms");
   }
 
   // reallocate flangevin if necessary
 
   if (Tp_TALLY) {
     if (atom->nlocal > maxatom1) {
       memory->destroy(flangevin);
       maxatom1 = atom->nmax;
       memory->create(flangevin,maxatom1,3,"langevin:flangevin");
     }
   }
 
   if (Tp_BIAS) temperature->compute_scalar();
 
   for (int i = 0; i < nlocal; i++) {
     if (mask[i] & groupbit) {
       if (Tp_TSTYLEATOM) tsqrt = sqrt(tforce[i]);
       if (Tp_RMASS) {
 	gamma1 = -rmass[i] / t_period / ftm2v;
 	gamma2 = sqrt(rmass[i]) * sqrt(24.0*boltz/t_period/dt/mvv2e) / ftm2v;
 	gamma1 *= 1.0/ratio[type[i]];
 	gamma2 *= 1.0/sqrt(ratio[type[i]]) * tsqrt;
       } else {
 	gamma1 = gfactor1[type[i]];
 	gamma2 = gfactor2[type[i]] * tsqrt;
       }
 
       fran[0] = gamma2*(random->uniform()-0.5);
       fran[1] = gamma2*(random->uniform()-0.5);
       fran[2] = gamma2*(random->uniform()-0.5);
 
       if (Tp_BIAS) {
 	temperature->remove_bias(i,v[i]);
 	fdrag[0] = gamma1*v[i][0];
 	fdrag[1] = gamma1*v[i][1];
 	fdrag[2] = gamma1*v[i][2];
 	if (v[i][0] == 0.0) fran[0] = 0.0;
 	if (v[i][1] == 0.0) fran[1] = 0.0;
 	if (v[i][2] == 0.0) fran[2] = 0.0;
 	temperature->restore_bias(i,v[i]);
       } else {
 	fdrag[0] = gamma1*v[i][0];
 	fdrag[1] = gamma1*v[i][1];
 	fdrag[2] = gamma1*v[i][2];
       }
 
       if (Tp_GJF) {
 	fswap = 0.5*(fran[0]+franprev[i][0]);
 	franprev[i][0] = fran[0];
 	fran[0] = fswap;
 	fswap = 0.5*(fran[1]+franprev[i][1]);
 	franprev[i][1] = fran[1];
 	fran[1] = fswap;
 	fswap = 0.5*(fran[2]+franprev[i][2]);
 	franprev[i][2] = fran[2];
 	fran[2] = fswap;
 
 	fdrag[0] *= gjffac;
 	fdrag[1] *= gjffac;
 	fdrag[2] *= gjffac;
 	fran[0] *= gjffac;
 	fran[1] *= gjffac;
 	fran[2] *= gjffac;
 	f[i][0] *= gjffac;
 	f[i][1] *= gjffac;
 	f[i][2] *= gjffac;
       }
 
       f[i][0] += fdrag[0] + fran[0];
       f[i][1] += fdrag[1] + fran[1];
       f[i][2] += fdrag[2] + fran[2];
 
       if (Tp_TALLY) {
 	flangevin[i][0] = fdrag[0] + fran[0];
 	flangevin[i][1] = fdrag[1] + fran[1];
 	flangevin[i][2] = fdrag[2] + fran[2];
       }
 
       if (Tp_ZERO) {
 	fsum[0] += fran[0];
 	fsum[1] += fran[1];
 	fsum[2] += fran[2];
       }
     }
   }
 
   // set total force to zero
 
   if (Tp_ZERO) {
     MPI_Allreduce(fsum,fsumall,3,MPI_DOUBLE,MPI_SUM,world);
     fsumall[0] /= count;
     fsumall[1] /= count;
     fsumall[2] /= count;
     for (int i = 0; i < nlocal; i++) {
       if (mask[i] & groupbit) {
         f[i][0] -= fsumall[0];
         f[i][1] -= fsumall[1];
         f[i][2] -= fsumall[2];
       }
     }
   }
 
   // thermostat omega and angmom
 
   if (oflag) omega_thermostat();
   if (ascale) angmom_thermostat();
 }
 
 /* ----------------------------------------------------------------------
    set current t_target and t_sqrt
 ------------------------------------------------------------------------- */
 
 void FixLangevin::compute_target()
 {
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
 
   double delta = update->ntimestep - update->beginstep;
   if (delta != 0.0) delta /= update->endstep - update->beginstep;
 
   // if variable temp, evaluate variable, wrap with clear/add
   // reallocate tforce array if necessary
 
   if (tstyle == CONSTANT) {
     t_target = t_start + delta * (t_stop-t_start);
     tsqrt = sqrt(t_target);
   } else {
     modify->clearstep_compute();
     if (tstyle == EQUAL) {
       t_target = input->variable->compute_equal(tvar);
       if (t_target < 0.0)
         error->one(FLERR,"Fix langevin variable returned negative temperature");
       tsqrt = sqrt(t_target);
     } else {
       if (nlocal > maxatom2) {
         maxatom2 = atom->nmax;
         memory->destroy(tforce);
         memory->create(tforce,maxatom2,"langevin:tforce");
       }
       input->variable->compute_atom(tvar,igroup,tforce,1,0);
       for (int i = 0; i < nlocal; i++)
         if (mask[i] & groupbit)
             if (tforce[i] < 0.0)
               error->one(FLERR,
                          "Fix langevin variable returned negative temperature");
     }
     modify->addstep_compute(update->ntimestep + 1);
   }
 }
 
 /* ----------------------------------------------------------------------
    thermostat rotational dof via omega
 ------------------------------------------------------------------------- */
 
 void FixLangevin::omega_thermostat()
 {
   double gamma1,gamma2;
 
   double boltz = force->boltz;
   double dt = update->dt;
   double mvv2e = force->mvv2e;
   double ftm2v = force->ftm2v;
 
   double **torque = atom->torque;
   double **omega = atom->omega;
   double *radius = atom->radius;
   double *rmass = atom->rmass;
   int *mask = atom->mask;
   int *type = atom->type;
   int nlocal = atom->nlocal;
 
   // rescale gamma1/gamma2 by 10/3 & sqrt(10/3) for spherical particles
   // does not affect rotational thermosatting
   // gives correct rotational diffusivity behavior
 
   double tendivthree = 10.0/3.0;
   double tran[3];
   double inertiaone;
   
   for (int i = 0; i < nlocal; i++) {
     if (mask[i] & groupbit) {
       inertiaone = SINERTIA*radius[i]*radius[i]*rmass[i];
       if (tstyle == ATOM) tsqrt = sqrt(tforce[i]);
       gamma1 = -tendivthree*inertiaone / t_period / ftm2v;
       gamma2 = sqrt(inertiaone) * sqrt(80.0*boltz/t_period/dt/mvv2e) / ftm2v;
       gamma1 *= 1.0/ratio[type[i]];
       gamma2 *= 1.0/sqrt(ratio[type[i]]) * tsqrt;
       tran[0] = gamma2*(random->uniform()-0.5);
       tran[1] = gamma2*(random->uniform()-0.5);
       tran[2] = gamma2*(random->uniform()-0.5);
       torque[i][0] += gamma1*omega[i][0] + tran[0];
       torque[i][1] += gamma1*omega[i][1] + tran[1];
       torque[i][2] += gamma1*omega[i][2] + tran[2];
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    thermostat rotational dof via angmom
 ------------------------------------------------------------------------- */
 
 void FixLangevin::angmom_thermostat()
 {
   double gamma1,gamma2;
 
   double boltz = force->boltz;
   double dt = update->dt;
   double mvv2e = force->mvv2e;
   double ftm2v = force->ftm2v;
 
   AtomVecEllipsoid::Bonus *bonus = avec->bonus;
   double **torque = atom->torque;
   double **angmom = atom->angmom;
   double *rmass = atom->rmass;
   int *ellipsoid = atom->ellipsoid;
   int *mask = atom->mask;
   int *type = atom->type;
   int nlocal = atom->nlocal;
 
   // rescale gamma1/gamma2 by ascale for aspherical particles
   // does not affect rotational thermosatting
   // gives correct rotational diffusivity behavior if (nearly) spherical
   // any value will be incorrect for rotational diffusivity if aspherical
 
   double inertia[3],omega[3],tran[3];
   double *shape,*quat;
 
   for (int i = 0; i < nlocal; i++) {
     if (mask[i] & groupbit) {
       shape = bonus[ellipsoid[i]].shape;
       inertia[0] = EINERTIA*rmass[i] * (shape[1]*shape[1]+shape[2]*shape[2]);
       inertia[1] = EINERTIA*rmass[i] * (shape[0]*shape[0]+shape[2]*shape[2]);
       inertia[2] = EINERTIA*rmass[i] * (shape[0]*shape[0]+shape[1]*shape[1]);
       quat = bonus[ellipsoid[i]].quat;
       MathExtra::mq_to_omega(angmom[i],quat,inertia,omega);
 
       if (tstyle == ATOM) tsqrt = sqrt(tforce[i]);
       gamma1 = -ascale / t_period / ftm2v;
       gamma2 = sqrt(ascale*24.0*boltz/t_period/dt/mvv2e) / ftm2v;
       gamma1 *= 1.0/ratio[type[i]];
       gamma2 *= 1.0/sqrt(ratio[type[i]]) * tsqrt;
       tran[0] = sqrt(inertia[0])*gamma2*(random->uniform()-0.5);
       tran[1] = sqrt(inertia[1])*gamma2*(random->uniform()-0.5);
       tran[2] = sqrt(inertia[2])*gamma2*(random->uniform()-0.5);
       torque[i][0] += inertia[0]*gamma1*omega[0] + tran[0];
       torque[i][1] += inertia[1]*gamma1*omega[1] + tran[1];
       torque[i][2] += inertia[2]*gamma1*omega[2] + tran[2];
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    tally energy transfer to thermal reservoir
 ------------------------------------------------------------------------- */
 
 void FixLangevin::end_of_step()
 {
   if (!tallyflag) return;
 
   double **v = atom->v;
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
 
   energy_onestep = 0.0;
 
   for (int i = 0; i < nlocal; i++)
     if (mask[i] & groupbit)
       energy_onestep += flangevin[i][0]*v[i][0] + flangevin[i][1]*v[i][1] +
         flangevin[i][2]*v[i][2];
 
   energy += energy_onestep*update->dt;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixLangevin::reset_target(double t_new)
 {
   t_target = t_start = t_stop = t_new;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixLangevin::reset_dt()
 {
   if (atom->mass) {
     for (int i = 1; i <= atom->ntypes; i++) {
       gfactor2[i] = sqrt(atom->mass[i]) *
         sqrt(24.0*force->boltz/t_period/update->dt/force->mvv2e) /
         force->ftm2v;
       gfactor2[i] *= 1.0/sqrt(ratio[i]);
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 int FixLangevin::modify_param(int narg, char **arg)
 {
   if (strcmp(arg[0],"temp") == 0) {
     if (narg < 2) error->all(FLERR,"Illegal fix_modify command");
     delete [] id_temp;
     int n = strlen(arg[1]) + 1;
     id_temp = new char[n];
     strcpy(id_temp,arg[1]);
 
     int icompute = modify->find_compute(id_temp);
     if (icompute < 0)
       error->all(FLERR,"Could not find fix_modify temperature ID");
     temperature = modify->compute[icompute];
 
     if (temperature->tempflag == 0)
       error->all(FLERR,
                  "Fix_modify temperature ID does not compute temperature");
     if (temperature->igroup != igroup && comm->me == 0)
       error->warning(FLERR,"Group for fix_modify temp != fix group");
     return 2;
   }
   return 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 double FixLangevin::compute_scalar()
 {
   if (!tallyflag || flangevin == NULL) return 0.0;
 
   // capture the very first energy transfer to thermal reservoir
 
   double **v = atom->v;
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
 
   if (update->ntimestep == update->beginstep) {
     energy_onestep = 0.0;
     for (int i = 0; i < nlocal; i++)
       if (mask[i] & groupbit)
         energy_onestep += flangevin[i][0]*v[i][0] + flangevin[i][1]*v[i][1] +
           flangevin[i][2]*v[i][2];
     energy = 0.5*energy_onestep*update->dt;
   }
 
   // convert midstep energy back to previous fullstep energy
 
   double energy_me = energy - 0.5*energy_onestep*update->dt;
 
   double energy_all;
   MPI_Allreduce(&energy_me,&energy_all,1,MPI_DOUBLE,MPI_SUM,world);
   return -energy_all;
 }
 
 /* ----------------------------------------------------------------------
    extract thermostat properties
 ------------------------------------------------------------------------- */
 
 void *FixLangevin::extract(const char *str, int &dim)
 {
   dim = 0;
   if (strcmp(str,"t_target") == 0) {
     return &t_target;
   }
   return NULL;
 }
 
 /* ----------------------------------------------------------------------
    memory usage of tally array
 ------------------------------------------------------------------------- */
 
 double FixLangevin::memory_usage()
 {
   double bytes = 0.0;
   if (gjfflag) bytes += atom->nmax*3 * sizeof(double);
   if (tallyflag) bytes += atom->nmax*3 * sizeof(double);
   if (tforce) bytes += atom->nmax * sizeof(double);
   return bytes;
 }
 
 /* ----------------------------------------------------------------------
    allocate atom-based array for franprev 
 ------------------------------------------------------------------------- */
 
 void FixLangevin::grow_arrays(int nmax)
 {
   memory->grow(franprev,nmax,3,"fix_langevin:franprev");
 }
 
 /* ----------------------------------------------------------------------
    copy values within local atom-based array
 ------------------------------------------------------------------------- */
 
 void FixLangevin::copy_arrays(int i, int j, int delflag)
 {
   for (int m = 0; m < nvalues; m++)
     franprev[j][m] = franprev[i][m];
 }
 
 /* ----------------------------------------------------------------------
    pack values in local atom-based array for exchange with another proc
 ------------------------------------------------------------------------- */
 
 int FixLangevin::pack_exchange(int i, double *buf)
 {
   for (int m = 0; m < nvalues; m++) buf[m] = franprev[i][m];
   return nvalues;
 }
 
 /* ----------------------------------------------------------------------
    unpack values in local atom-based array from exchange with another proc
 ------------------------------------------------------------------------- */
 
 int FixLangevin::unpack_exchange(int nlocal, double *buf)
 {
   for (int m = 0; m < nvalues; m++) franprev[nlocal][m] = buf[m];
   return nvalues;
 }
diff --git a/src/fix_langevin.h b/src/fix_langevin.h
index bba551663..ecfca919a 100644
--- a/src/fix_langevin.h
+++ b/src/fix_langevin.h
@@ -1,152 +1,153 @@
 /* -*- c++ -*- ----------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef FIX_CLASS
 
 FixStyle(langevin,FixLangevin)
 
 #else
 
 #ifndef LMP_FIX_LANGEVIN_H
 #define LMP_FIX_LANGEVIN_H
 
 #include "fix.h"
 
 namespace LAMMPS_NS {
 
 class FixLangevin : public Fix {
  public:
   FixLangevin(class LAMMPS *, int, char **);
   virtual ~FixLangevin();
   int setmask();
   void init();
   void setup(int);
   virtual void post_force(int);
   void post_force_respa(int, int, int);
   virtual void end_of_step();
   void reset_target(double);
   void reset_dt();
   int modify_param(int, char **);
   virtual double compute_scalar();
   double memory_usage();
   virtual void *extract(const char *, int &);
   void grow_arrays(int);
   void copy_arrays(int, int, int);
   int pack_exchange(int, double *);
   int unpack_exchange(int, double *);
 
  protected:
   int gjfflag,oflag,tallyflag,zeroflag,tbiasflag;
   double ascale;
   double t_start,t_stop,t_period,t_target;
   double *gfactor1,*gfactor2,*ratio;
   double energy,energy_onestep;
   double tsqrt;
   int tstyle,tvar;
   double gjffac;
   char *tstr;
 
   class AtomVecEllipsoid *avec;
 
   int maxatom1,maxatom2;
   double **flangevin;
   double *tforce;
   double **franprev;
   int nvalues;
 
   char *id_temp;
   class Compute *temperature;
 
   int nlevels_respa;
   class RanMars *random;
+  int seed;
 
   // comment next line to turn off templating
 #define TEMPLATED_FIX_LANGEVIN
 #ifdef TEMPLATED_FIX_LANGEVIN
   template < int Tp_TSTYLEATOM, int Tp_GJF, int Tp_TALLY, 
 	     int Tp_BIAS, int Tp_RMASS, int Tp_ZERO > 
   void post_force_templated();
 #else
   void post_force_untemplated(int, int, int, 
 			      int, int, int);
 #endif
   void omega_thermostat();
   void angmom_thermostat();
   void compute_target();
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
 E: Fix langevin period must be > 0.0
 
 The time window for temperature relaxation must be > 0
 
 E: Fix langevin omega requires atom style sphere
 
 Self-explanatory.
 
 E: Fix langevin angmom requires atom style ellipsoid
 
 Self-explanatory.
 
 E: Variable name for fix langevin does not exist
 
 Self-explanatory.
 
 E: Variable for fix langevin is invalid style
 
 It must be an equal-style variable.
 
 E: Fix langevin omega requires extended particles
 
 One of the particles has radius 0.0.
 
 E: Fix langevin angmom requires extended particles
 
 This fix option cannot be used with point paritlces.
 
 E: Cannot zero Langevin force of 0 atoms
 
 The group has zero atoms, so you cannot request its force
 be zeroed.
 
 E: Fix langevin variable returned negative temperature
 
 Self-explanatory.
 
 E: Could not find fix_modify temperature ID
 
 The compute ID for computing temperature does not exist.
 
 E: Fix_modify temperature ID does not compute temperature
 
 The compute ID assigned to the fix must compute temperature.
 
 W: Group for fix_modify temp != fix group
 
 The fix_modify command is specifying a temperature computation that
 computes a temperature on a different group of atoms than the fix
 itself operates on.  This is probably not what you want to do.
 
 */
diff --git a/src/improper.cpp b/src/improper.cpp
index 91753cb9f..90460d76c 100644
--- a/src/improper.cpp
+++ b/src/improper.cpp
@@ -1,252 +1,256 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #include "math.h"
 #include "improper.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "suffix.h"
 #include "atom_masks.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
 Improper::Improper(LAMMPS *lmp) : Pointers(lmp)
 {
   energy = 0.0;
   writedata = 0;
 
   allocated = 0;
   suffix_flag = Suffix::NONE;
 
   maxeatom = maxvatom = 0;
   eatom = NULL;
   vatom = NULL;
   setflag = NULL;
 
   datamask = ALL_MASK;
   datamask_ext = ALL_MASK;
+
+  execution_space = Host;
+  datamask_read = ALL_MASK;
+  datamask_modify = ALL_MASK;
 }
 
 /* ---------------------------------------------------------------------- */
 
 Improper::~Improper()
 {
   memory->destroy(eatom);
   memory->destroy(vatom);
 }
 
 /* ----------------------------------------------------------------------
    check if all coeffs are set
 ------------------------------------------------------------------------- */
 
 void Improper::init()
 {
   if (!allocated && atom->nimpropertypes) 
     error->all(FLERR,"Improper coeffs are not set");
   for (int i = 1; i <= atom->nimpropertypes; i++)
     if (setflag[i] == 0) error->all(FLERR,"All improper coeffs are not set");
 }
 
 /* ----------------------------------------------------------------------
    setup for energy, virial computation
    see integrate::ev_set() for values of eflag (0-3) and vflag (0-6)
 ------------------------------------------------------------------------- */
 
 void Improper::ev_setup(int eflag, int vflag)
 {
   int i,n;
 
   evflag = 1;
 
   eflag_either = eflag;
   eflag_global = eflag % 2;
   eflag_atom = eflag / 2;
 
   vflag_either = vflag;
   vflag_global = vflag % 4;
   vflag_atom = vflag / 4;
 
   // reallocate per-atom arrays if necessary
 
   if (eflag_atom && atom->nmax > maxeatom) {
     maxeatom = atom->nmax;
     memory->destroy(eatom);
     memory->create(eatom,comm->nthreads*maxeatom,"bond:eatom");
   }
   if (vflag_atom && atom->nmax > maxvatom) {
     maxvatom = atom->nmax;
     memory->destroy(vatom);
     memory->create(vatom,comm->nthreads*maxvatom,6,"bond:vatom");
   }
 
   // zero accumulators
 
   if (eflag_global) energy = 0.0;
   if (vflag_global) for (i = 0; i < 6; i++) virial[i] = 0.0;
   if (eflag_atom) {
     n = atom->nlocal;
     if (force->newton_bond) n += atom->nghost;
     for (i = 0; i < n; i++) eatom[i] = 0.0;
   }
   if (vflag_atom) {
     n = atom->nlocal;
     if (force->newton_bond) n += atom->nghost;
     for (i = 0; i < n; i++) {
       vatom[i][0] = 0.0;
       vatom[i][1] = 0.0;
       vatom[i][2] = 0.0;
       vatom[i][3] = 0.0;
       vatom[i][4] = 0.0;
       vatom[i][5] = 0.0;
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    tally energy and virial into global and per-atom accumulators
    virial = r1F1 + r2F2 + r3F3 + r4F4 = (r1-r2) F1 + (r3-r2) F3 + (r4-r2) F4
           = (r1-r2) F1 + (r3-r2) F3 + (r4-r3 + r3-r2) F4
           = vb1*f1 + vb2*f3 + (vb3+vb2)*f4
 ------------------------------------------------------------------------- */
 
 void Improper::ev_tally(int i1, int i2, int i3, int i4,
                         int nlocal, int newton_bond,
                         double eimproper, double *f1, double *f3, double *f4,
                         double vb1x, double vb1y, double vb1z,
                         double vb2x, double vb2y, double vb2z,
                         double vb3x, double vb3y, double vb3z)
 {
   double eimproperquarter,v[6];
 
   if (eflag_either) {
     if (eflag_global) {
       if (newton_bond) energy += eimproper;
       else {
         eimproperquarter = 0.25*eimproper;
         if (i1 < nlocal) energy += eimproperquarter;
         if (i2 < nlocal) energy += eimproperquarter;
         if (i3 < nlocal) energy += eimproperquarter;
         if (i4 < nlocal) energy += eimproperquarter;
       }
     }
     if (eflag_atom) {
       eimproperquarter = 0.25*eimproper;
       if (newton_bond || i1 < nlocal) eatom[i1] += eimproperquarter;
       if (newton_bond || i2 < nlocal) eatom[i2] += eimproperquarter;
       if (newton_bond || i3 < nlocal) eatom[i3] += eimproperquarter;
       if (newton_bond || i4 < nlocal) eatom[i4] += eimproperquarter;
     }
   }
 
   if (vflag_either) {
     v[0] = vb1x*f1[0] + vb2x*f3[0] + (vb3x+vb2x)*f4[0];
     v[1] = vb1y*f1[1] + vb2y*f3[1] + (vb3y+vb2y)*f4[1];
     v[2] = vb1z*f1[2] + vb2z*f3[2] + (vb3z+vb2z)*f4[2];
     v[3] = vb1x*f1[1] + vb2x*f3[1] + (vb3x+vb2x)*f4[1];
     v[4] = vb1x*f1[2] + vb2x*f3[2] + (vb3x+vb2x)*f4[2];
     v[5] = vb1y*f1[2] + vb2y*f3[2] + (vb3y+vb2y)*f4[2];
 
     if (vflag_global) {
       if (newton_bond) {
         virial[0] += v[0];
         virial[1] += v[1];
         virial[2] += v[2];
         virial[3] += v[3];
         virial[4] += v[4];
         virial[5] += v[5];
       } else {
         if (i1 < nlocal) {
           virial[0] += 0.25*v[0];
           virial[1] += 0.25*v[1];
           virial[2] += 0.25*v[2];
           virial[3] += 0.25*v[3];
           virial[4] += 0.25*v[4];
           virial[5] += 0.25*v[5];
         }
         if (i2 < nlocal) {
           virial[0] += 0.25*v[0];
           virial[1] += 0.25*v[1];
           virial[2] += 0.25*v[2];
           virial[3] += 0.25*v[3];
           virial[4] += 0.25*v[4];
           virial[5] += 0.25*v[5];
         }
         if (i3 < nlocal) {
           virial[0] += 0.25*v[0];
           virial[1] += 0.25*v[1];
           virial[2] += 0.25*v[2];
           virial[3] += 0.25*v[3];
           virial[4] += 0.25*v[4];
           virial[5] += 0.25*v[5];
         }
         if (i4 < nlocal) {
           virial[0] += 0.25*v[0];
           virial[1] += 0.25*v[1];
           virial[2] += 0.25*v[2];
           virial[3] += 0.25*v[3];
           virial[4] += 0.25*v[4];
           virial[5] += 0.25*v[5];
         }
       }
     }
 
     if (vflag_atom) {
       if (newton_bond || i1 < nlocal) {
         vatom[i1][0] += 0.25*v[0];
         vatom[i1][1] += 0.25*v[1];
         vatom[i1][2] += 0.25*v[2];
         vatom[i1][3] += 0.25*v[3];
         vatom[i1][4] += 0.25*v[4];
         vatom[i1][5] += 0.25*v[5];
       }
       if (newton_bond || i2 < nlocal) {
         vatom[i2][0] += 0.25*v[0];
         vatom[i2][1] += 0.25*v[1];
         vatom[i2][2] += 0.25*v[2];
         vatom[i2][3] += 0.25*v[3];
         vatom[i2][4] += 0.25*v[4];
         vatom[i2][5] += 0.25*v[5];
       }
       if (newton_bond || i3 < nlocal) {
         vatom[i3][0] += 0.25*v[0];
         vatom[i3][1] += 0.25*v[1];
         vatom[i3][2] += 0.25*v[2];
         vatom[i3][3] += 0.25*v[3];
         vatom[i3][4] += 0.25*v[4];
         vatom[i3][5] += 0.25*v[5];
       }
       if (newton_bond || i4 < nlocal) {
         vatom[i4][0] += 0.25*v[0];
         vatom[i4][1] += 0.25*v[1];
         vatom[i4][2] += 0.25*v[2];
         vatom[i4][3] += 0.25*v[3];
         vatom[i4][4] += 0.25*v[4];
         vatom[i4][5] += 0.25*v[5];
       }
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 double Improper::memory_usage()
 {
   double bytes = comm->nthreads*maxeatom * sizeof(double);
   bytes += comm->nthreads*maxvatom*6 * sizeof(double);
   return bytes;
 }
diff --git a/src/improper.h b/src/improper.h
index ac61dabba..77c38af76 100644
--- a/src/improper.h
+++ b/src/improper.h
@@ -1,79 +1,83 @@
 /* -*- c++ -*- ----------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifndef LMP_IMPROPER_H
 #define LMP_IMPROPER_H
 
 #include "stdio.h"
 #include "pointers.h"
 
 namespace LAMMPS_NS {
 
 class Improper : protected Pointers {
   friend class ThrOMP;
   friend class FixOMP;
  public:
   int allocated;
   int *setflag;
   int writedata;                  // 1 if writes coeffs to data file
   double energy;                  // accumulated energies
   double virial[6];               // accumlated virial
   double *eatom,**vatom;          // accumulated per-atom energy/virial
   unsigned int datamask;
   unsigned int datamask_ext;
 
+  // KOKKOS host/device flag and data masks
+  ExecutionSpace execution_space;
+  unsigned int datamask_read,datamask_modify;
+
   Improper(class LAMMPS *);
   virtual ~Improper();
   virtual void init();
   virtual void compute(int, int) = 0;
   virtual void settings(int, char **) {}
   virtual void coeff(int, char **) = 0;
   virtual void write_restart(FILE *) = 0;
   virtual void read_restart(FILE *) = 0;
   virtual void write_data(FILE *) {}
   virtual double memory_usage();
 
   virtual unsigned int data_mask() {return datamask;}
   virtual unsigned int data_mask_ext() {return datamask_ext;}
 
  protected:
   int suffix_flag;             // suffix compatibility flag
 
   int evflag;
   int eflag_either,eflag_global,eflag_atom;
   int vflag_either,vflag_global,vflag_atom;
   int maxeatom,maxvatom;
 
   void ev_setup(int, int);
   void ev_tally(int, int, int, int, int, int, double,
                 double *, double *, double *, double, double, double,
                 double, double, double, double, double, double);
 };
 
 }
 
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Improper coeffs are not set
 
 No improper coefficients have been assigned in the data file or via
 the improper_coeff command.
 
 E: All improper coeffs are not set
 
 All improper coefficients must be set in the data file or by the
 improper_coeff command before running a simulation.
 
 */
diff --git a/src/kspace.cpp b/src/kspace.cpp
index c12af7c84..ce01081cb 100644
--- a/src/kspace.cpp
+++ b/src/kspace.cpp
@@ -1,553 +1,557 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #include "stdlib.h"
 #include "string.h"
 #include "kspace.h"
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
 #include "pair.h"
 #include "memory.h"
 #include "atom_masks.h"
 #include "error.h"
 #include "suffix.h"
 #include "domain.h"
 
 using namespace LAMMPS_NS;
 
 #define SMALL 0.00001
 
 /* ---------------------------------------------------------------------- */
 
 KSpace::KSpace(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
 {
   energy = 0.0;
   virial[0] = virial[1] = virial[2] = virial[3] = virial[4] = virial[5] = 0.0;
 
   triclinic_support = 1;
   ewaldflag = pppmflag = msmflag = dispersionflag = tip4pflag = dipoleflag = 0;
   compute_flag = 1;
   group_group_enable = 0;
   stagger_flag = 0;
 
   order = 5;
   gridflag = 0;
   gewaldflag = 0;
   minorder = 2;
   overlap_allowed = 1;
   fftbench = 0;
 
   // default to using MPI collectives for FFT/remap only on IBM BlueGene
 
 #ifdef __bg__
   collective_flag = 1;
 #else
   collective_flag = 0;
 #endif
 
   kewaldflag = 0;
 
   order_6 = 5;
   gridflag_6 = 0;
   gewaldflag_6 = 0;
     
   slabflag = 0;
   differentiation_flag = 0;
   slab_volfactor = 1;
   suffix_flag = Suffix::NONE;
   adjust_cutoff_flag = 1;
   scalar_pressure_flag = 0;
 
   accuracy_absolute = -1.0;
   accuracy_real_6 = -1.0;
   accuracy_kspace_6 = -1.0;
   two_charge_force = force->qqr2e *
     (force->qelectron * force->qelectron) /
     (force->angstrom * force->angstrom);
 
   neighrequest_flag = 1;
   mixflag = 0;
 
   splittol = 1.0e-6;
 
   maxeatom = maxvatom = 0;
   eatom = NULL;
   vatom = NULL;
 
   datamask = ALL_MASK;
   datamask_ext = ALL_MASK;
 
+  execution_space = Host;
+  datamask_read = ALL_MASK;
+  datamask_modify = ALL_MASK;
+
   memory->create(gcons,7,7,"kspace:gcons");
   gcons[2][0] = 15.0 / 8.0;
   gcons[2][1] = -5.0 / 4.0;
   gcons[2][2] = 3.0 / 8.0;
   gcons[3][0] = 35.0 / 16.0;
   gcons[3][1] = -35.0 / 16.0;
   gcons[3][2] = 21.0 / 16.0;
   gcons[3][3] = -5.0 / 16.0;
   gcons[4][0] = 315.0 / 128.0;
   gcons[4][1] = -105.0 / 32.0;
   gcons[4][2] = 189.0 / 64.0;
   gcons[4][3] = -45.0 / 32.0;
   gcons[4][4] = 35.0 / 128.0;
   gcons[5][0] = 693.0 / 256.0;
   gcons[5][1] = -1155.0 / 256.0;
   gcons[5][2] = 693.0 / 128.0;
   gcons[5][3] = -495.0 / 128.0;
   gcons[5][4] = 385.0 / 256.0;
   gcons[5][5] = -63.0 / 256.0;
   gcons[6][0] = 3003.0 / 1024.0;
   gcons[6][1] = -3003.0 / 512.0;
   gcons[6][2] = 9009.0 / 1024.0;
   gcons[6][3] = -2145.0 / 256.0;
   gcons[6][4] = 5005.0 / 1024.0;
   gcons[6][5] = -819.0 / 512.0;
   gcons[6][6] = 231.0 / 1024.0;
 
   memory->create(dgcons,7,6,"kspace:dgcons");
   dgcons[2][0] = -5.0 / 2.0;
   dgcons[2][1] = 3.0 / 2.0;
   dgcons[3][0] = -35.0 / 8.0;
   dgcons[3][1] = 21.0 / 4.0;
   dgcons[3][2] = -15.0 / 8.0;
   dgcons[4][0] = -105.0 / 16.0;
   dgcons[4][1] = 189.0 / 16.0;
   dgcons[4][2] = -135.0 / 16.0;
   dgcons[4][3] = 35.0 / 16.0;
   dgcons[5][0] = -1155.0 / 128.0;
   dgcons[5][1] = 693.0 / 32.0;
   dgcons[5][2] = -1485.0 / 64.0;
   dgcons[5][3] = 385.0 / 32.0;
   dgcons[5][4] = -315.0 / 128.0;
   dgcons[6][0] = -3003.0 / 256.0;
   dgcons[6][1] = 9009.0 / 256.0;
   dgcons[6][2] = -6435.0 / 128.0;
   dgcons[6][3] = 5005.0 / 128.0;
   dgcons[6][4] = -4095.0 / 256.0;
   dgcons[6][5] = 693.0 / 256.0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 KSpace::~KSpace()
 {
   memory->destroy(eatom);
   memory->destroy(vatom);
   memory->destroy(gcons);
   memory->destroy(dgcons);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void KSpace::triclinic_check()
 {
   if (domain->triclinic && triclinic_support != 1)
     error->all(FLERR,"KSpace style does not yet support triclinic geometries");
 }
 
 /* ---------------------------------------------------------------------- */
 
 void KSpace::compute_dummy(int eflag, int vflag)
 {
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = evflag_atom = eflag_global = vflag_global =
          eflag_atom = vflag_atom = 0;
 }
 
 /* ----------------------------------------------------------------------
    check that pair style is compatible with long-range solver
 ------------------------------------------------------------------------- */
 
 void KSpace::pair_check()
 {
   if (force->pair == NULL)
     error->all(FLERR,"KSpace solver requires a pair style");
 
   if (ewaldflag && !force->pair->ewaldflag)
     error->all(FLERR,"KSpace style is incompatible with Pair style");
   if (pppmflag && !force->pair->pppmflag)
     error->all(FLERR,"KSpace style is incompatible with Pair style");
   if (msmflag && !force->pair->msmflag)
     error->all(FLERR,"KSpace style is incompatible with Pair style");
   if (dispersionflag && !force->pair->dispersionflag)
     error->all(FLERR,"KSpace style is incompatible with Pair style");
   if (dipoleflag && !force->pair->dipoleflag)
     error->all(FLERR,"KSpace style is incompatible with Pair style");
   if (tip4pflag && !force->pair->tip4pflag)
     error->all(FLERR,"KSpace style is incompatible with Pair style");
 
   if (force->pair->dispersionflag && !dispersionflag)
     error->all(FLERR,"KSpace style is incompatible with Pair style");
   if (force->pair->tip4pflag && !tip4pflag)
     error->all(FLERR,"KSpace style is incompatible with Pair style");
 }
 
 /* ----------------------------------------------------------------------
    setup for energy, virial computation
    see integrate::ev_set() for values of eflag (0-3) and vflag (0-6)
 ------------------------------------------------------------------------- */
 
 void KSpace::ev_setup(int eflag, int vflag)
 {
   int i,n;
 
   evflag = 1;
 
   eflag_either = eflag;
   eflag_global = eflag % 2;
   eflag_atom = eflag / 2;
 
   vflag_either = vflag;
   vflag_global = vflag % 4;
   vflag_atom = vflag / 4;
 
   if (eflag_atom || vflag_atom) evflag_atom = 1;
   else evflag_atom = 0;
 
   // reallocate per-atom arrays if necessary
 
   if (eflag_atom && atom->nmax > maxeatom) {
     maxeatom = atom->nmax;
     memory->destroy(eatom);
     memory->create(eatom,maxeatom,"kspace:eatom");
   }
   if (vflag_atom && atom->nmax > maxvatom) {
     maxvatom = atom->nmax;
     memory->destroy(vatom);
     memory->create(vatom,maxvatom,6,"kspace:vatom");
   }
 
   // zero accumulators
 
   if (eflag_global) energy = 0.0;
   if (vflag_global) for (i = 0; i < 6; i++) virial[i] = 0.0;
   if (eflag_atom) {
     n = atom->nlocal;
     if (tip4pflag) n += atom->nghost;
     for (i = 0; i < n; i++) eatom[i] = 0.0;
   }
   if (vflag_atom) {
     n = atom->nlocal;
     if (tip4pflag) n += atom->nghost;
     for (i = 0; i < n; i++) {
       vatom[i][0] = 0.0;
       vatom[i][1] = 0.0;
       vatom[i][2] = 0.0;
       vatom[i][3] = 0.0;
       vatom[i][4] = 0.0;
       vatom[i][5] = 0.0;
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    compute qsum,qsqsum,q2 and give error/warning if not charge neutral
    only called initially or when particle count changes
 ------------------------------------------------------------------------- */
 
 void KSpace::qsum_qsq(int flag)
 {
   qsum = qsqsum = 0.0;
   for (int i = 0; i < atom->nlocal; i++) {
     qsum += atom->q[i];
     qsqsum += atom->q[i]*atom->q[i];
   }
 
   double tmp;
   MPI_Allreduce(&qsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
   qsum = tmp;
   MPI_Allreduce(&qsqsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
   qsqsum = tmp;
 
   if (qsqsum == 0.0)
     error->all(FLERR,"Cannot use kspace solver on system with no charge");
 
   q2 = qsqsum * force->qqrd2e;
 
   // not yet sure of the correction needed for non-neutral systems
 
   if (fabs(qsum) > SMALL) {
     char str[128];
     sprintf(str,"System is not charge neutral, net charge = %g",qsum);
     if (flag) error->all(FLERR,str);
     else if (comm->me == 0) error->warning(FLERR,str);
   }
 }
 
 /* ----------------------------------------------------------------------
    estimate the accuracy of the short-range coulomb tables
 ------------------------------------------------------------------------- */
 
 double KSpace::estimate_table_accuracy(double q2_over_sqrt, double spr)
 {
   double table_accuracy = 0.0;
   int nctb = force->pair->ncoultablebits;
   if (nctb) {
     double empirical_precision[17];
     empirical_precision[6] =  6.99E-03;
     empirical_precision[7] =  1.78E-03;
     empirical_precision[8] =  4.72E-04;
     empirical_precision[9] =  1.17E-04;
     empirical_precision[10] = 2.95E-05;
     empirical_precision[11] = 7.41E-06;
     empirical_precision[12] = 1.76E-06;
     empirical_precision[13] = 9.28E-07;
     empirical_precision[14] = 7.46E-07;
     empirical_precision[15] = 7.32E-07;
     empirical_precision[16] = 7.30E-07;
     if (nctb <= 6) table_accuracy = empirical_precision[6];
     else if (nctb <= 16) table_accuracy = empirical_precision[nctb];
     else table_accuracy = empirical_precision[16];
     table_accuracy *= q2_over_sqrt;
     if ((table_accuracy > spr) && (comm->me == 0))
       error->warning(FLERR,"For better accuracy use 'pair_modify table 0'");
   }
 
   return table_accuracy;
 }
 
 /* ----------------------------------------------------------------------
    convert box coords vector to transposed triclinic lamda (0-1) coords
    vector, lamda = [(H^-1)^T] * v, does not preserve vector magnitude
    v and lamda can point to same 3-vector
 ------------------------------------------------------------------------- */
 
 void KSpace::x2lamdaT(double *v, double *lamda)
 {
   double *h_inv = domain->h_inv;
   double lamda_tmp[3];
 
   lamda_tmp[0] = h_inv[0]*v[0];
   lamda_tmp[1] = h_inv[5]*v[0] + h_inv[1]*v[1];
   lamda_tmp[2] = h_inv[4]*v[0] + h_inv[3]*v[1] + h_inv[2]*v[2];
 
   lamda[0] = lamda_tmp[0];
   lamda[1] = lamda_tmp[1];
   lamda[2] = lamda_tmp[2];
 }
 
 /* ----------------------------------------------------------------------
    convert lamda (0-1) coords vector to transposed box coords vector
    lamda = (H^T) * v, does not preserve vector magnitude
    v and lamda can point to same 3-vector
 ------------------------------------------------------------------------- */
 
 void KSpace::lamda2xT(double *lamda, double *v)
 {
   double *h = domain->h;
   double v_tmp[3];
 
   v_tmp[0] = h[0]*lamda[0];
   v_tmp[1] = h[5]*lamda[0] + h[1]*lamda[1];
   v_tmp[2] = h[4]*lamda[0] + h[3]*lamda[1] + h[2]*lamda[2];
 
   v[0] = v_tmp[0];
   v[1] = v_tmp[1];
   v[2] = v_tmp[2];
 }
 
 /* ----------------------------------------------------------------------
    convert triclinic lamda (0-1) coords vector to box coords vector
    v = H * lamda, does not preserve vector magnitude
    lamda and v can point to same 3-vector
 ------------------------------------------------------------------------- */
 
 void KSpace::lamda2xvector(double *lamda, double *v)
 {
   double *h = domain->h;
 
   v[0] = h[0]*lamda[0] + h[5]*lamda[1] + h[4]*lamda[2];
   v[1] = h[1]*lamda[1] + h[3]*lamda[2];
   v[2] = h[2]*lamda[2];
 }
 
 /* ----------------------------------------------------------------------
    convert a sphere in box coords to an ellipsoid in lamda (0-1)
    coords and return the tight (axis-aligned) bounding box, does not 
    preserve vector magnitude
    see http://www.loria.fr/~shornus/ellipsoid-bbox.html and
    http://yiningkarlli.blogspot.com/2013/02/
      bounding-boxes-for-ellipsoidsfigure.html
 ------------------------------------------------------------------------- */
 
 void KSpace::kspacebbox(double r, double *b)
 {
   double *h = domain->h;
   double lx,ly,lz,xy,xz,yz;
   lx = h[0];
   ly = h[1];
   lz = h[2];
   yz = h[3];
   xz = h[4];
   xy = h[5];
 
   b[0] = r*sqrt(ly*ly*lz*lz + ly*ly*xz*xz - 2.0*ly*xy*xz*yz + lz*lz*xy*xy + 
          xy*xy*yz*yz)/(lx*ly*lz);
   b[1] = r*sqrt(lz*lz + yz*yz)/(ly*lz);
   b[2] = r/lz;
 }
 
 /* ----------------------------------------------------------------------
    modify parameters of the KSpace style
 ------------------------------------------------------------------------- */
 
 void KSpace::modify_params(int narg, char **arg)
 {
   int iarg = 0;
   while (iarg < narg) {
     if (strcmp(arg[iarg],"mesh") == 0) {
       if (iarg+4 > narg) error->all(FLERR,"Illegal kspace_modify command");
       nx_pppm = nx_msm_max = force->inumeric(FLERR,arg[iarg+1]);
       ny_pppm = ny_msm_max = force->inumeric(FLERR,arg[iarg+2]);
       nz_pppm = nz_msm_max = force->inumeric(FLERR,arg[iarg+3]);
       if (nx_pppm == 0 && ny_pppm == 0 && nz_pppm == 0) gridflag = 0;
       else gridflag = 1;
       iarg += 4;
     } else if (strcmp(arg[iarg],"mesh/disp") == 0) {
       if (iarg+4 > narg) error->all(FLERR,"Illegal kspace_modify command");
       nx_pppm_6 = force->inumeric(FLERR,arg[iarg+1]);
       ny_pppm_6 = force->inumeric(FLERR,arg[iarg+2]);
       nz_pppm_6 = force->inumeric(FLERR,arg[iarg+3]);
       if (nx_pppm_6 == 0 || ny_pppm_6 == 0 || nz_pppm_6 == 0) gridflag_6 = 0;
       else gridflag_6 = 1;
       iarg += 4;
     } else if (strcmp(arg[iarg],"order") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal kspace_modify command");
       order = force->inumeric(FLERR,arg[iarg+1]);
       iarg += 2;
     } else if (strcmp(arg[iarg],"order/disp") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal kspace_modify command");
       order_6 = force->inumeric(FLERR,arg[iarg+1]);
       iarg += 2;
     } else if (strcmp(arg[iarg],"minorder") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal kspace_modify command");
       minorder = force->inumeric(FLERR,arg[iarg+1]);
       if (minorder < 2) error->all(FLERR,"Illegal kspace_modify command");
       iarg += 2;
     } else if (strcmp(arg[iarg],"overlap") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal kspace_modify command");
       if (strcmp(arg[iarg+1],"yes") == 0) overlap_allowed = 1;
       else if (strcmp(arg[iarg+1],"no") == 0) overlap_allowed = 0;
       else error->all(FLERR,"Illegal kspace_modify command");
       iarg += 2;
     } else if (strcmp(arg[iarg],"force") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal kspace_modify command");
       accuracy_absolute = force->numeric(FLERR,arg[iarg+1]);
       iarg += 2;
     } else if (strcmp(arg[iarg],"gewald") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal kspace_modify command");
       g_ewald = force->numeric(FLERR,arg[iarg+1]);
       if (g_ewald == 0.0) gewaldflag = 0;
       else gewaldflag = 1;
       iarg += 2;
     } else if (strcmp(arg[iarg],"gewald/disp") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal kspace_modify command");
       g_ewald_6 = force->numeric(FLERR,arg[iarg+1]);
       if (g_ewald_6 == 0.0) gewaldflag_6 = 0;
       else gewaldflag_6 = 1;
       iarg += 2;
     } else if (strcmp(arg[iarg],"slab") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal kspace_modify command");
       if (strcmp(arg[iarg+1],"nozforce") == 0) {
         slabflag = 2;
       } else {
         slabflag = 1;
         slab_volfactor = force->numeric(FLERR,arg[iarg+1]);
         if (slab_volfactor <= 1.0)
           error->all(FLERR,"Bad kspace_modify slab parameter");
         if (slab_volfactor < 2.0 && comm->me == 0)
           error->warning(FLERR,"Kspace_modify slab param < 2.0 may "
                          "cause unphysical behavior");
       }
       iarg += 2;
     } else if (strcmp(arg[iarg],"compute") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal kspace_modify command");
       if (strcmp(arg[iarg+1],"yes") == 0) compute_flag = 1;
       else if (strcmp(arg[iarg+1],"no") == 0) compute_flag = 0;
       else error->all(FLERR,"Illegal kspace_modify command");
       iarg += 2;
     } else if (strcmp(arg[iarg],"fftbench") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal kspace_modify command");
       if (strcmp(arg[iarg+1],"yes") == 0) fftbench = 1;
       else if (strcmp(arg[iarg+1],"no") == 0) fftbench = 0;
       else error->all(FLERR,"Illegal kspace_modify command");
       iarg += 2;
     } else if (strcmp(arg[iarg],"collective") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal kspace_modify command");
       if (strcmp(arg[iarg+1],"yes") == 0) collective_flag = 1;
       else if (strcmp(arg[iarg+1],"no") == 0) collective_flag = 0;
       else error->all(FLERR,"Illegal kspace_modify command");
       iarg += 2;
     } else if (strcmp(arg[iarg],"diff") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal kspace_modify command");
       if (strcmp(arg[iarg+1],"ad") == 0) differentiation_flag = 1;
       else if (strcmp(arg[iarg+1],"ik") == 0) differentiation_flag = 0;
       else error->all(FLERR, "Illegal kspace_modify command");
       iarg += 2;
     } else if (strcmp(arg[iarg],"cutoff/adjust") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal kspace_modify command");
       if (strcmp(arg[iarg+1],"yes") == 0) adjust_cutoff_flag = 1;
       else if (strcmp(arg[iarg+1],"no") == 0) adjust_cutoff_flag = 0;
       else error->all(FLERR,"Illegal kspace_modify command");
       iarg += 2;
     } else if (strcmp(arg[iarg],"kmax/ewald") == 0) {
       if (iarg+4 > narg) error->all(FLERR,"Illegal kspace_modify command");
       kx_ewald = atoi(arg[iarg+1]);
       ky_ewald = atoi(arg[iarg+2]);
       kz_ewald = atoi(arg[iarg+3]);
       if (kx_ewald < 0 || ky_ewald < 0 || kz_ewald < 0)
 	error->all(FLERR,"Bad kspace_modify kmax/ewald parameter");
       if (kx_ewald > 0 && ky_ewald > 0 && kz_ewald > 0)
 	kewaldflag = 1;
       else
 	kewaldflag = 0;
       iarg += 4;
     } else if (strcmp(arg[iarg],"mix/disp") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal kspace_modify command");
       if (strcmp(arg[iarg+1],"pair") == 0) mixflag = 0;
       else if (strcmp(arg[iarg+1],"geom") == 0) mixflag = 1;
       else if (strcmp(arg[iarg+1],"none") == 0) mixflag = 2;
       else error->all(FLERR,"Illegal kspace_modify command");
       iarg += 2;
     } else if (strcmp(arg[iarg],"force/disp/real") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal kspace_modify command");
       accuracy_real_6 = atof(arg[iarg+1]);
       iarg += 2;
     } else if (strcmp(arg[iarg],"force/disp/kspace") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal kspace_modify command");
       accuracy_kspace_6 = atof(arg[iarg+1]);
       iarg += 2;
     } else if (strcmp(arg[iarg],"eigtol") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal kspace_modify command");
       splittol = atof(arg[iarg+1]);
       if (splittol >= 1.0) 
         error->all(FLERR,"Kspace_modify eigtol must be smaller than one");
       iarg += 2;
     } else if (strcmp(arg[iarg],"pressure/scalar") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal kspace_modify command");
       if (strcmp(arg[iarg+1],"yes") == 0) scalar_pressure_flag = 1;
       else if (strcmp(arg[iarg+1],"no") == 0) scalar_pressure_flag = 0;
       else error->all(FLERR,"Illegal kspace_modify command");
       iarg += 2;
     } else error->all(FLERR,"Illegal kspace_modify command");
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void *KSpace::extract(const char *str)
 {
   if (strcmp(str,"scale") == 0) return (void *) &scale;
   return NULL;
 }
diff --git a/src/kspace.h b/src/kspace.h
index 967ae46b3..6d8ae294f 100644
--- a/src/kspace.h
+++ b/src/kspace.h
@@ -1,234 +1,238 @@
 /* -*- c++ -*- ----------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifndef LMP_KSPACE_H
 #define LMP_KSPACE_H
 
 #include "pointers.h"
 
 #ifdef FFT_SINGLE
 typedef float FFT_SCALAR;
 #define MPI_FFT_SCALAR MPI_FLOAT
 #else
 typedef double FFT_SCALAR;
 #define MPI_FFT_SCALAR MPI_DOUBLE
 #endif
 
 namespace LAMMPS_NS {
 
 class KSpace : protected Pointers {
   friend class ThrOMP;
   friend class FixOMP;
  public:
   double energy;                 // accumulated energies
   double energy_1,energy_6;
   double virial[6];              // accumlated virial
   double *eatom,**vatom;         // accumulated per-atom energy/virial
   double e2group;                // accumulated group-group energy
   double f2group[3];             // accumulated group-group force
   int triclinic_support;         // 1 if supports triclinic geometries
 
   int ewaldflag;                 // 1 if a Ewald solver
   int pppmflag;                  // 1 if a PPPM solver
   int msmflag;                   // 1 if a MSM solver
   int dispersionflag;            // 1 if a LJ/dispersion solver
   int tip4pflag;                 // 1 if a TIP4P solver
   int dipoleflag;                // 1 if a dipole solver
   int differentiation_flag;
   int neighrequest_flag;         // used to avoid obsolete construction 
                                  // of neighbor lists
   int mixflag;                   // 1 if geometric mixing rules are enforced 
                                  // for LJ coefficients
   int slabflag;
   int scalar_pressure_flag;      // 1 if using MSM fast scalar pressure
   double slab_volfactor;
 
 
   int order,order_6,order_allocated;
   double accuracy;                  // accuracy of KSpace solver (force units)
   double accuracy_absolute;         // user-specifed accuracy in force units
   double accuracy_relative;         // user-specified dimensionless accuracy
                                     // accurary = acc_rel * two_charge_force
   double accuracy_real_6;           // real space accuracy for 
                                     // dispersion solver (force units)
   double accuracy_kspace_6;         // reciprocal space accuracy for 
                                     // dispersion solver (force units)
   double two_charge_force;          // force in user units of two point
                                     // charges separated by 1 Angstrom
 
   double g_ewald,g_ewald_6;
   int nx_pppm,ny_pppm,nz_pppm;           // global FFT grid for Coulombics
   int nx_pppm_6,ny_pppm_6,nz_pppm_6;     // global FFT grid for dispersion
   int nx_msm_max,ny_msm_max,nz_msm_max;
 
   int group_group_enable;         // 1 if style supports group/group calculation
 
   unsigned int datamask;
   unsigned int datamask_ext;
 
+  // KOKKOS host/device flag and data masks
+  ExecutionSpace execution_space;
+  unsigned int datamask_read,datamask_modify;
+
   int compute_flag;               // 0 if skip compute()
   int fftbench;                   // 0 if skip FFT timing
   int collective_flag;            // 1 if use MPI collectives for FFT/remap
   int stagger_flag;               // 1 if using staggered PPPM grids
 
   double splittol;                // tolerance for when to truncate splitting
 
   KSpace(class LAMMPS *, int, char **);
   virtual ~KSpace();
   void triclinic_check();
   void modify_params(int, char **);
   void *extract(const char *);
   void compute_dummy(int, int);
 
   // triclinic
 
   void x2lamdaT(double *, double *);
   void lamda2xT(double *, double *);
   void lamda2xvector(double *, double *);
   void kspacebbox(double, double *);
 
   // general child-class methods
 
   virtual void init() = 0;
   virtual void setup() = 0;
   virtual void setup_grid() {};
   virtual void compute(int, int) = 0;
   virtual void compute_group_group(int, int, int) {};
 
   virtual void pack_forward(int, FFT_SCALAR *, int, int *) {};
   virtual void unpack_forward(int, FFT_SCALAR *, int, int *) {};
   virtual void pack_reverse(int, FFT_SCALAR *, int, int *) {};
   virtual void unpack_reverse(int, FFT_SCALAR *, int, int *) {};
 
   virtual int timing(int, double &, double &) {return 0;}
   virtual int timing_1d(int, double &) {return 0;}
   virtual int timing_3d(int, double &) {return 0;}
   virtual double memory_usage() {return 0.0;}
 
 /* ----------------------------------------------------------------------
    compute gamma for MSM and pair styles
    see Eq 4 from Parallel Computing 35 (2009) 164–177
 ------------------------------------------------------------------------- */
 
   double gamma(const double &rho) const 
   {
     if (rho <= 1.0) {
       const int split_order = order/2;
       const double rho2 = rho*rho;
       double g = gcons[split_order][0];
       double rho_n = rho2;
       for (int n = 1; n <= split_order; n++) {
         g += gcons[split_order][n]*rho_n;
         rho_n *= rho2;
       }
       return g;
     } else return (1.0/rho);
   }
 
 /* ----------------------------------------------------------------------
    compute the derivative of gamma for MSM and pair styles
    see Eq 4 from Parallel Computing 35 (2009) 164-177
 ------------------------------------------------------------------------- */
 
   double dgamma(const double &rho) const 
   {
     if (rho <= 1.0) {
       const int split_order = order/2;
       const double rho2 = rho*rho;
       double dg = dgcons[split_order][0]*rho;
       double rho_n = rho*rho2;
       for (int n = 1; n < split_order; n++) {
         dg += dgcons[split_order][n]*rho_n;
         rho_n *= rho2;
       }
       return dg;
     } else return (-1.0/rho/rho);
   }
   
   double **get_gcons() { return gcons; }
   double **get_dgcons() { return dgcons; }
 
  protected:
   int gridflag,gridflag_6;
   int gewaldflag,gewaldflag_6;
   int minorder,overlap_allowed;
   int adjust_cutoff_flag;
   int suffix_flag;                  // suffix compatibility flag
   bigint natoms_original;
   double scale,qqrd2e;
   double qsum,qsqsum,q2;
   double **gcons,**dgcons;          // accumulated per-atom energy/virial
 
   int evflag,evflag_atom;
   int eflag_either,eflag_global,eflag_atom;
   int vflag_either,vflag_global,vflag_atom;
   int maxeatom,maxvatom;
 
   int kewaldflag;                   // 1 if kspace range set for Ewald sum
   int kx_ewald,ky_ewald,kz_ewald;   // kspace settings for Ewald sum
 
   void qsum_qsq(int);
   void pair_check();
   void ev_setup(int, int);
   double estimate_table_accuracy(double, double);
 };
 
 }
 
 #endif
 
 /* ERROR/WARNING messages:
 
 E: KSpace style does not yet support triclinic geometries
 
 The specified kspace style does not allow for non-orthogonal
 simulation boxes.
 
 E: KSpace solver requires a pair style
 
 No pair style is defined.
 
 E: KSpace style is incompatible with Pair style
 
 Setting a kspace style requires that a pair style with a long-range
 Coulombic or dispersion component be used.
 
 W: For better accuracy use 'pair_modify table 0'
 
 The user-specified force accuracy cannot be achieved unless the table
 feature is disabled by using 'pair_modify table 0'.
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
 E: Bad kspace_modify slab parameter
 
 Kspace_modify value for the slab/volume keyword must be >= 2.0.
 
 W: Kspace_modify slab param < 2.0 may cause unphysical behavior
 
 The kspace_modify slab parameter should be larger to insure periodic
 grids padded with empty space do not overlap.
 
 E: Bad kspace_modify kmax/ewald parameter
 
 Kspace_modify values for the kmax/ewald keyword must be integers > 0
 
 E: Kspace_modify eigtol must be smaller than one
 
 Self-explanatory.
 
 */
diff --git a/src/memory.h b/src/memory.h
index b14d0590c..1cc3b9ccb 100644
--- a/src/memory.h
+++ b/src/memory.h
@@ -1,624 +1,628 @@
 /* -*- c++ -*- ----------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifndef LMP_MEMORY_H
 #define LMP_MEMORY_H
 
 #include "lmptype.h"
 #include "pointers.h"
 #ifdef LMP_KOKKOS
 #include "kokkos_type.h"
 #endif
 
 namespace LAMMPS_NS {
 
 class Memory : protected Pointers {
  public:
   Memory(class LAMMPS *);
 
   void *smalloc(bigint n, const char *);
   void *srealloc(void *, bigint n, const char *);
   void sfree(void *);
   void fail(const char *);
 
   // Kokkos memory allocation functions
 
 #ifdef LMP_KOKKOS
 #include "memory_kokkos.h"
 #endif
 
 /* ----------------------------------------------------------------------
    create/grow/destroy vecs and multidim arrays with contiguous memory blocks
    only use with primitive data types, e.g. 1d vec of ints, 2d array of doubles
    fail() prevents use with pointers, 
      e.g. 1d vec of int*, due to mismatched destroy
    avoid use with non-primitive data types to avoid code bloat
    for these other cases, use smalloc/srealloc/sfree directly
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    create a 1d array
 ------------------------------------------------------------------------- */
 
   template <typename TYPE>
   TYPE *create(TYPE *&array, int n, const char *name)
   {
     bigint nbytes = ((bigint) sizeof(TYPE)) * n;
     array = (TYPE *) smalloc(nbytes,name);
     return array;
   }
 
   template <typename TYPE>
   TYPE **create(TYPE **&array, int n, const char *name) 
   {fail(name); return NULL;}
 
 /* ----------------------------------------------------------------------
    grow or shrink 1d array
 ------------------------------------------------------------------------- */
 
   template <typename TYPE>
   TYPE *grow(TYPE *&array, int n, const char *name)
   {
     if (array == NULL) return create(array,n,name);
 
     bigint nbytes = ((bigint) sizeof(TYPE)) * n;
     array = (TYPE *) srealloc(array,nbytes,name);
     return array;
   }
 
   template <typename TYPE>
   TYPE **grow(TYPE **&array, int n, const char *name)
   {fail(name); return NULL;}
 
 /* ----------------------------------------------------------------------
    destroy a 1d array
 ------------------------------------------------------------------------- */
 
   template <typename TYPE>
   void destroy(TYPE *array)
   {sfree(array);}
 
 /* ----------------------------------------------------------------------
    create a 1d array with index from nlo to nhi inclusive
    cannot grow it
 ------------------------------------------------------------------------- */
 
   template <typename TYPE>
   TYPE *create1d_offset(TYPE *&array, int nlo, int nhi, const char *name)
   {
     bigint nbytes = ((bigint) sizeof(TYPE)) * (nhi-nlo+1);
     array = (TYPE *) smalloc(nbytes,name);
     array -= nlo;
     return array;
   }
 
   template <typename TYPE>
   TYPE **create1d_offset(TYPE **&array, int nlo, int nhi, const char *name)
   {fail(name); return NULL;}
 
 /* ----------------------------------------------------------------------
    destroy a 1d array with index offset
 ------------------------------------------------------------------------- */
 
   template <typename TYPE>
   void destroy1d_offset(TYPE *array, int offset)
   {
     if (array) sfree(&array[offset]);
   }
 
 /* ----------------------------------------------------------------------
    create a 2d array
 ------------------------------------------------------------------------- */
 
   template <typename TYPE>
   TYPE **create(TYPE **&array, int n1, int n2, const char *name)
   {
     bigint nbytes = ((bigint) sizeof(TYPE)) * n1*n2;
     TYPE *data = (TYPE *) smalloc(nbytes,name);
     nbytes = ((bigint) sizeof(TYPE *)) * n1;
     array = (TYPE **) smalloc(nbytes,name);
     
     bigint n = 0;
     for (int i = 0; i < n1; i++) {
       array[i] = &data[n];
       n += n2;
     }
     return array;
   }
 
   template <typename TYPE>
   TYPE ***create(TYPE ***&array, int n1, int n2, const char *name)
   {fail(name); return NULL;}
 
 /* ----------------------------------------------------------------------
    grow or shrink 1st dim of a 2d array
    last dim must stay the same
 ------------------------------------------------------------------------- */
 
   template <typename TYPE>
   TYPE **grow(TYPE **&array, int n1, int n2, const char *name)
   {
     if (array == NULL) return create(array,n1,n2,name);
     
     bigint nbytes = ((bigint) sizeof(TYPE)) * n1*n2;
     TYPE *data = (TYPE *) srealloc(array[0],nbytes,name);
     nbytes = ((bigint) sizeof(TYPE *)) * n1;
     array = (TYPE **) srealloc(array,nbytes,name);
     
     bigint n = 0;
     for (int i = 0; i < n1; i++) {
       array[i] = &data[n];
       n += n2;
     }
     return array;
   }
   
   template <typename TYPE>
   TYPE ***grow(TYPE ***&array, int n1, int n2, const char *name)
   {fail(name); return NULL;}
   
 /* ----------------------------------------------------------------------
    destroy a 2d array
 ------------------------------------------------------------------------- */
 
   template <typename TYPE>
   void destroy(TYPE **array)
   {
     if (array == NULL) return;
     sfree(array[0]);
     sfree(array);
   }
 
 /* ----------------------------------------------------------------------
    create a 2d array with a ragged 2nd dimension
 ------------------------------------------------------------------------- */
 
   template <typename TYPE>
   TYPE **create_ragged(TYPE **&array, int n1, int *n2, const char *name)
   {
     bigint n2sum = 0;
     for (int i = 0; i < n1; i++) n2sum += n2[i];
     
     bigint nbytes = ((bigint) sizeof(TYPE)) * n2sum;
     TYPE *data = (TYPE *) smalloc(nbytes,name);
     nbytes = ((bigint) sizeof(TYPE *)) * n1;
     array = (TYPE **) smalloc(nbytes,name);
     
     bigint n = 0;
     for (int i = 0; i < n1; i++) {
       array[i] = &data[n];
       n += n2[i];
     }
     return array;
   }
   
   template <typename TYPE>
   TYPE ***create_ragged(TYPE ***&array, int n1, int *n2, const char *name)
   {fail(name); return NULL;}
   
 /* ----------------------------------------------------------------------
    create a 2d array with 2nd index from n2lo to n2hi inclusive
    cannot grow it
 ------------------------------------------------------------------------- */
 
   template <typename TYPE>
   TYPE **create2d_offset(TYPE **&array, int n1, int n2lo, int n2hi,
                          const char *name)
   {
     int n2 = n2hi - n2lo + 1;
     create(array,n1,n2,name);
     for (int i = 0; i < n1; i++) array[i] -= n2lo;
     return array;
   }
   
   template <typename TYPE>
   TYPE ***create2d_offset(TYPE ***&array, int n1, int n2lo, int n2hi,
                           const char *name) {fail(name); return NULL;}
   
 /* ----------------------------------------------------------------------
    destroy a 2d array with 2nd index offset
 ------------------------------------------------------------------------- */
   
   template <typename TYPE>
   void destroy2d_offset(TYPE **array, int offset)
   {
     if (array == NULL) return;
     sfree(&array[0][offset]);
     sfree(array);
   }
   
 /* ----------------------------------------------------------------------
    create a 3d array
 ------------------------------------------------------------------------- */
 
   template <typename TYPE>
   TYPE ***create(TYPE ***&array, int n1, int n2, int n3, const char *name)
   {
     bigint nbytes = ((bigint) sizeof(TYPE)) * n1*n2*n3;
     TYPE *data = (TYPE *) smalloc(nbytes,name);
     nbytes = ((bigint) sizeof(TYPE *)) * n1*n2;
     TYPE **plane = (TYPE **) smalloc(nbytes,name);
     nbytes = ((bigint) sizeof(TYPE **)) * n1;
     array = (TYPE ***) smalloc(nbytes,name);
     
     int i,j;
     bigint m;
     bigint n = 0;
     for (i = 0; i < n1; i++) {
       m = ((bigint) i) * n2;
       array[i] = &plane[m];
       for (j = 0; j < n2; j++) {
         plane[m+j] = &data[n];
         n += n3;
       }
     }
     return array;
   }
 
   template <typename TYPE>
   TYPE ****create(TYPE ****&array, int n1, int n2, int n3, const char *name)
   {fail(name); return NULL;}
 
 /* ----------------------------------------------------------------------
    grow or shrink 1st dim of a 3d array
    last 2 dims must stay the same
 ------------------------------------------------------------------------- */
 
   template <typename TYPE>
   TYPE ***grow(TYPE ***&array, int n1, int n2, int n3, const char *name)
   {
     if (array == NULL) return create(array,n1,n2,n3,name);
     
     bigint nbytes = ((bigint) sizeof(TYPE)) * n1*n2*n3;
     TYPE *data = (TYPE *) srealloc(array[0][0],nbytes,name);
     nbytes = ((bigint) sizeof(TYPE *)) * n1*n2;
     TYPE **plane = (TYPE **) srealloc(array[0],nbytes,name);
     nbytes = ((bigint) sizeof(TYPE **)) * n1;
     array = (TYPE ***) srealloc(array,nbytes,name);
     
     int i,j;
     bigint m;
     bigint n = 0;
     for (i = 0; i < n1; i++) {
       m = ((bigint) i) * n2;
       array[i] = &plane[m];
       for (j = 0; j < n2; j++) {
         plane[m+j] = &data[n];
         n += n3;
       }
     }
     return array;
   }
   
   template <typename TYPE>
   TYPE ****grow(TYPE ****&array, int n1, int n2, int n3, const char *name)
   {fail(name); return NULL;}
 
 /* ----------------------------------------------------------------------
    destroy a 3d array
 ------------------------------------------------------------------------- */
 
   template <typename TYPE>
   void destroy(TYPE ***array)
   {
     if (array == NULL) return;
     sfree(array[0][0]);
     sfree(array[0]);
     sfree(array);
   }
 
 /* ----------------------------------------------------------------------
    create a 3d array with 1st index from n1lo to n1hi inclusive
    cannot grow it
 ------------------------------------------------------------------------- */
 
   template <typename TYPE>
   TYPE ***create3d_offset(TYPE ***&array, int n1lo, int n1hi,
                           int n2, int n3, const char *name)
   {
     int n1 = n1hi - n1lo + 1;
     create(array,n1,n2,n3,name);
     array -= n1lo;
     return array;
   }
   
   template <typename TYPE>
   TYPE ****create3d_offset(TYPE ****&array, int n1lo, int n1hi,
                            int n2, int n3, const char *name)
   {fail(name); return NULL;}
 
 /* ----------------------------------------------------------------------
    free a 3d array with 1st index offset
 ------------------------------------------------------------------------- */
 
   template <typename TYPE>
   void destroy3d_offset(TYPE ***array, int offset)
   {
     if (array) destroy(&array[offset]);
   }
 
 /* ----------------------------------------------------------------------
    create a 3d array with
    1st index from n1lo to n1hi inclusive,
    2nd index from n2lo to n2hi inclusive,
    3rd index from n3lo to n3hi inclusive
    cannot grow it
 ------------------------------------------------------------------------- */
 
   template <typename TYPE>
   TYPE ***create3d_offset(TYPE ***&array, int n1lo, int n1hi,
                           int n2lo, int n2hi, int n3lo, int n3hi,
                           const char *name)
   {
     int n1 = n1hi - n1lo + 1;
     int n2 = n2hi - n2lo + 1;
     int n3 = n3hi - n3lo + 1;
     create(array,n1,n2,n3,name);
     
     bigint m = ((bigint) n1) * n2;
     for (bigint i = 0; i < m; i++) array[0][i] -= n3lo;
     for (int i = 0; i < n1; i++) array[i] -= n2lo;
     array -= n1lo;
     return array;
   }
   
   template <typename TYPE>
   TYPE ****create3d_offset(TYPE ****&array, int n1lo, int n1hi,
                            int n2lo, int n2hi, int n3lo, int n3hi,
                            const char *name)
   {fail(name); return NULL;}
 
 /* ----------------------------------------------------------------------
    free a 3d array with all 3 indices offset
 ------------------------------------------------------------------------- */
 
   template <typename TYPE>
   void destroy3d_offset(TYPE ***array,
                         int n1_offset, int n2_offset, int n3_offset)
   {
     if (array == NULL) return;
     sfree(&array[n1_offset][n2_offset][n3_offset]);
     sfree(&array[n1_offset][n2_offset]);
     sfree(&array[n1_offset]);
   }
   
 /* ----------------------------------------------------------------------
    create a 4d array
 ------------------------------------------------------------------------- */
 
   template <typename TYPE>
   TYPE ****create(TYPE ****&array, int n1, int n2, int n3, int n4,
                   const char *name)
   {
     bigint nbytes = ((bigint) sizeof(TYPE)) * n1*n2*n3*n4;
     TYPE *data = (TYPE *) smalloc(nbytes,name);
     nbytes = ((bigint) sizeof(TYPE *)) * n1*n2*n3;
     TYPE **cube = (TYPE **) smalloc(nbytes,name);
     nbytes = ((bigint) sizeof(TYPE **)) * n1*n2;
     TYPE ***plane = (TYPE ***) smalloc(nbytes,name);
     nbytes = ((bigint) sizeof(TYPE ***)) * n1;
     array = (TYPE ****) smalloc(nbytes,name);
 
     int i,j,k;
     bigint m1,m2;
     bigint n = 0;
     for (i = 0; i < n1; i++) {
       m2 = ((bigint) i) * n2;
       array[i] = &plane[m2];
       for (j = 0; j < n2; j++) {
         m1 = ((bigint) i) * n2 + j;
         m2 = ((bigint) i) * n2*n3 + j*n3;
         plane[m1] = &cube[m2];
         for (k = 0; k < n3; k++) {
           m1 = ((bigint) i) * n2*n3 + j*n3 + k;
           cube[m1] = &data[n];
           n += n4;
         }
       }
     }
     return array;
   }
   
   template <typename TYPE>
   TYPE *****create(TYPE *****&array, int n1, int n2, int n3, int n4,
                    const char *name)
   {fail(name); return NULL;}
 
 /* ----------------------------------------------------------------------
    destroy a 4d array
 ------------------------------------------------------------------------- */
 
   template <typename TYPE>
   void destroy(TYPE ****array)
   {
     if (array == NULL) return;
     sfree(array[0][0][0]);
     sfree(array[0][0]);
     sfree(array[0]);
     sfree(array);
   }
 
 /* ----------------------------------------------------------------------
    create a 4d array with indices
    2nd index from n2lo to n2hi inclusive
    3rd index from n3lo to n3hi inclusive
    4th index from n4lo to n4hi inclusive
    cannot grow it
 ------------------------------------------------------------------------- */
 
   template <typename TYPE>
   TYPE ****create4d_offset(TYPE ****&array, int n1, int n2lo, int n2hi,
                            int n3lo, int n3hi, int n4lo, int n4hi,
                            const char *name)
   {
     int n2 = n2hi - n2lo + 1;
     int n3 = n3hi - n3lo + 1;
     int n4 = n4hi - n4lo + 1;
     create(array,n1,n2,n3,n4,name);
     
     bigint m = ((bigint) n1) * n2 * n3;
     for (bigint i = 0; i < m; i++) array[0][0][i] -= n4lo;
     m = ((bigint) n1) * n2;
     for (bigint i = 0; i < m; i++) array [0][i] -= n3lo;
     for (int i = 0; i < n1; i++) array[i] -= n2lo;
     return array;
   }
   
   template <typename TYPE>
   TYPE ****create4d_offset(TYPE *****&array, int n1, int n2lo, int n2hi,
                            int n3lo, int n3hi, int n4lo, int n4hi,
                            const char *name)
   {fail(name); return NULL;}
   
 /* ----------------------------------------------------------------------
    free a 4d array with indices 2,3, and 4 offset
 ------------------------------------------------------------------------- */
 
   template <typename TYPE>
   void destroy4d_offset(TYPE ****array, 
                         int n2_offset, int n3_offset, int n4_offset)
   {
     if (array == NULL) return;
     sfree(&array[0][n2_offset][n3_offset][n4_offset]);
     sfree(&array[0][n2_offset][n3_offset]);
     sfree(&array[0][n2_offset]);
     sfree(array);
   }
 
 /* ----------------------------------------------------------------------
    create a 5d array
 ------------------------------------------------------------------------- */
 
   template <typename TYPE>
   TYPE *****create(TYPE *****&array, int n1, int n2, int n3, int n4,
                    int n5, const char *name)
   {
     bigint nbytes = ((bigint) sizeof(TYPE)) * n1*n2*n3*n4*n5;
     TYPE *data = (TYPE *) smalloc(nbytes,name);
     nbytes = ((bigint) sizeof(TYPE *)) * n1*n2*n3*n4;
     TYPE **level4 = (TYPE **) smalloc(nbytes,name);
     nbytes = ((bigint) sizeof(TYPE **)) * n1*n2*n3;
     TYPE ***level3 = (TYPE ***) smalloc(nbytes,name);
     nbytes = ((bigint) sizeof(TYPE ***)) * n1*n2;
     TYPE ****level2 = (TYPE ****) smalloc(nbytes,name);
     nbytes = ((bigint) sizeof(TYPE ****)) * n1;
     array = (TYPE *****) smalloc(nbytes,name);
     
     int i,j,k,l;
     bigint m1,m2;
     bigint n = 0;
     for (i = 0; i < n1; i++) {
       m2 = ((bigint) i) * n2;
       array[i] = &level2[m2];
       for (j = 0; j < n2; j++) {
         m1 = ((bigint) i) * n2 + j;
         m2 = ((bigint) i) * n2*n3 +  ((bigint) j) * n3;
         level2[m1] = &level3[m2];
         for (k = 0; k < n3; k++) {
           m1 = ((bigint) i) * n2*n3 +  ((bigint) j) * n3 + k;
           m2 = ((bigint) i) * n2*n3*n4 +
             ((bigint) j) * n3*n4 + ((bigint) k) * n4;
           level3[m1] = &level4[m2];
           for (l = 0; l < n4; l++) {
             m1 = ((bigint) i) * n2*n3*n4 +
               ((bigint) j) * n3*n4 + ((bigint) k) * n4 + l;
             level4[m1] = &data[n];
             n += n5;
           }
         }
       }
     }
     return array;
   }
   
   template <typename TYPE>
   TYPE ******create(TYPE ******&array, int n1, int n2, int n3, int n4,
                     int n5, const char *name)
   {fail(name); return NULL;}
 
 /* ----------------------------------------------------------------------
    destroy a 5d array
 ------------------------------------------------------------------------- */
 
   template <typename TYPE>
   void destroy(TYPE *****array)
   {
     if (array == NULL) return;
     sfree(array[0][0][0][0]);
     sfree(array[0][0][0]);
     sfree(array[0][0]);
     sfree(array[0]);
     sfree(array);
   }
   
 /* ----------------------------------------------------------------------
    memory usage of arrays, including pointers
 ------------------------------------------------------------------------- */
 
   template <typename TYPE>
   bigint usage(TYPE *array, int n)
   {
+    (void) array;
     bigint bytes = ((bigint) sizeof(TYPE)) * n;
     return bytes;
   }
   
   template <typename TYPE>
   bigint usage(TYPE **array, int n1, int n2)
   {
+    (void) array;
     bigint bytes = ((bigint) sizeof(TYPE)) * n1*n2;
     bytes += ((bigint) sizeof(TYPE *)) * n1;
     return bytes;
   }
   
   template <typename TYPE>
   bigint usage(TYPE ***array, int n1, int n2, int n3)
   {
+    (void) array;
     bigint bytes = ((bigint) sizeof(TYPE)) * n1*n2*n3;
     bytes += ((bigint) sizeof(TYPE *)) * n1*n2;
     bytes += ((bigint) sizeof(TYPE **)) * n1;
     return bytes;
   }
   
   template <typename TYPE>
   bigint usage(TYPE ****array, int n1, int n2, int n3, int n4)
   {
+    (void) array;
     bigint bytes = ((bigint) sizeof(TYPE)) * n1*n2*n3*n4;
     bytes += ((bigint) sizeof(TYPE *)) * n1*n2*n3;
     bytes += ((bigint) sizeof(TYPE **)) * n1*n2;
     bytes += ((bigint) sizeof(TYPE ***)) * n1;
     return bytes;
   }
 };
 
 }
 
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Failed to allocate %ld bytes for array %s
 
 Your LAMMPS simulation has run out of memory.  You need to run a
 smaller simulation or on more processors.
 
 E: Failed to reallocate %ld bytes for array %s
 
 Your LAMMPS simulation has run out of memory.  You need to run a
 smaller simulation or on more processors.
 
 E: Cannot create/grow a vector/array of pointers for %s
 
 LAMMPS code is making an illegal call to the templated memory
 allocaters, to create a vector or array of pointers.
 
 */