diff --git a/src/KOKKOS/fix_qeq_reax_kokkos.cpp b/src/KOKKOS/fix_qeq_reax_kokkos.cpp
index e54b53ae8..5a1d3c7f1 100644
--- a/src/KOKKOS/fix_qeq_reax_kokkos.cpp
+++ b/src/KOKKOS/fix_qeq_reax_kokkos.cpp
@@ -1,1192 +1,1193 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Ray Shan (SNL), Stan Moore (SNL)
 ------------------------------------------------------------------------- */
 
 #include <math.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include "fix_qeq_reax_kokkos.h"
 #include "kokkos.h"
 #include "atom.h"
 #include "atom_masks.h"
 #include "atom_kokkos.h"
 #include "comm.h"
 #include "force.h"
 #include "group.h"
 #include "modify.h"
 #include "neighbor.h"
 #include "neigh_list_kokkos.h"
 #include "neigh_request.h"
 #include "update.h"
 #include "integrate.h"
 #include "respa.h"
 #include "math_const.h"
 #include "memory.h"
 #include "error.h"
 #include "pair_reaxc_kokkos.h"
 
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
 #define SMALL 0.0001
 #define EV_TO_KCAL_PER_MOL 14.4
 
 #define TEAMSIZE 128
 
 /* ---------------------------------------------------------------------- */
 
 template<class DeviceType>
 FixQEqReaxKokkos<DeviceType>::
 FixQEqReaxKokkos(LAMMPS *lmp, int narg, char **arg) :
   FixQEqReax(lmp, narg, arg)
 {
   kokkosable = 1;
   atomKK = (AtomKokkos *) atom;
   execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
 
   datamask_read = X_MASK | V_MASK | F_MASK | MASK_MASK | Q_MASK | TYPE_MASK;
   datamask_modify = Q_MASK | X_MASK;
 
   nmax = nmax = m_cap = 0;
   allocated_flag = 0;
+  nprev = 4;
 }
 
 /* ---------------------------------------------------------------------- */
 
 template<class DeviceType>
 FixQEqReaxKokkos<DeviceType>::~FixQEqReaxKokkos()
 {
   if (copymode) return;
 }
 
 /* ---------------------------------------------------------------------- */
 
 template<class DeviceType>
 void FixQEqReaxKokkos<DeviceType>::init()
 {
   atomKK->k_q.modify<LMPHostType>();
   atomKK->k_q.sync<DeviceType>();
 
   FixQEqReax::init();
 
   neighflag = lmp->kokkos->neighflag_qeq;
   int irequest = neighbor->nrequest - 1;
   
   neighbor->requests[irequest]->
     kokkos_host = Kokkos::Impl::is_same<DeviceType,LMPHostType>::value &&
     !Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
   neighbor->requests[irequest]->
     kokkos_device = Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
     
   if (neighflag == FULL) {
     neighbor->requests[irequest]->fix = 1;
     neighbor->requests[irequest]->pair = 0;
     neighbor->requests[irequest]->full = 1;
     neighbor->requests[irequest]->half = 0;
   } else { //if (neighflag == HALF || neighflag == HALFTHREAD)
     neighbor->requests[irequest]->fix = 1;
     neighbor->requests[irequest]->pair = 0;
     neighbor->requests[irequest]->full = 0;
     neighbor->requests[irequest]->half = 1;
     neighbor->requests[irequest]->ghost = 1;
   }
 
   int ntypes = atom->ntypes;
   k_params = Kokkos::DualView<params_qeq*,Kokkos::LayoutRight,DeviceType>
     ("FixQEqReax::params",ntypes+1);
   params = k_params.template view<DeviceType>();
 
   for (n = 1; n <= ntypes; n++) {
     k_params.h_view(n).chi = chi[n];
     k_params.h_view(n).eta = eta[n];
     k_params.h_view(n).gamma = gamma[n];
   }
   k_params.template modify<LMPHostType>();
 
   cutsq = swb * swb;
 
   init_shielding_k();
   init_hist();
 }
 
 /* ---------------------------------------------------------------------- */
 
 template<class DeviceType>
 void FixQEqReaxKokkos<DeviceType>::init_shielding_k()
 {
   int i,j;
   int ntypes = atom->ntypes;
 
   k_shield = DAT::tdual_ffloat_2d("qeq/kk:shield",ntypes+1,ntypes+1);
   d_shield = k_shield.template view<DeviceType>();
 
   for( i = 1; i <= ntypes; ++i )
     for( j = 1; j <= ntypes; ++j )
       k_shield.h_view(i,j) = pow( gamma[i] * gamma[j], -1.5 );
 
   k_shield.template modify<LMPHostType>();
   k_shield.template sync<DeviceType>();
 
   k_tap = DAT::tdual_ffloat_1d("qeq/kk:tap",8);
   d_tap = k_tap.template view<DeviceType>();
 
   for (i = 0; i < 8; i ++)
     k_tap.h_view(i) = Tap[i];
 
   k_tap.template modify<LMPHostType>();
   k_tap.template sync<DeviceType>();
 }
 
 /* ---------------------------------------------------------------------- */
 
 template<class DeviceType>
 void FixQEqReaxKokkos<DeviceType>::init_hist()
 {
   int i,j;
 
-  k_s_hist = DAT::tdual_ffloat_2d("qeq/kk:s_hist",atom->nmax,5);
+  k_s_hist = DAT::tdual_ffloat_2d("qeq/kk:s_hist",atom->nmax,nprev);
   d_s_hist = k_s_hist.template view<DeviceType>();
   h_s_hist = k_s_hist.h_view;
-  k_t_hist = DAT::tdual_ffloat_2d("qeq/kk:t_hist",atom->nmax,5);
+  k_t_hist = DAT::tdual_ffloat_2d("qeq/kk:t_hist",atom->nmax,nprev);
   d_t_hist = k_t_hist.template view<DeviceType>();
   h_t_hist = k_t_hist.h_view;
 
   for( i = 0; i < atom->nmax; i++ )
-    for( j = 0; j < 5; j++ )
+    for( j = 0; j < nprev; j++ )
       k_s_hist.h_view(i,j) = k_t_hist.h_view(i,j) = 0.0;
 
   k_s_hist.template modify<LMPHostType>();
   k_s_hist.template sync<DeviceType>();
 
   k_t_hist.template modify<LMPHostType>();
   k_t_hist.template sync<DeviceType>();
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 template<class DeviceType>
 void FixQEqReaxKokkos<DeviceType>::setup_pre_force(int vflag)
 {
   //neighbor->build_one(list);
 
   pre_force(vflag);
 }
 
 /* ---------------------------------------------------------------------- */
 
 template<class DeviceType>
 void FixQEqReaxKokkos<DeviceType>::pre_force(int vflag)
 {
   if (update->ntimestep % nevery) return;
 
   atomKK->sync(execution_space,datamask_read);
   atomKK->modified(execution_space,datamask_modify);
 
   x = atomKK->k_x.view<DeviceType>();
   v = atomKK->k_v.view<DeviceType>();
   f = atomKK->k_f.view<DeviceType>();
   q = atomKK->k_q.view<DeviceType>();
   tag = atomKK->k_tag.view<DeviceType>();
   type = atomKK->k_type.view<DeviceType>();
   mask = atomKK->k_mask.view<DeviceType>();
   nlocal = atomKK->nlocal;
   nall = atom->nlocal + atom->nghost;
   newton_pair = force->newton_pair;
 
   k_params.template sync<DeviceType>();
   k_shield.template sync<DeviceType>();
   k_tap.template sync<DeviceType>();
 
   NeighListKokkos<DeviceType>* k_list = static_cast<NeighListKokkos<DeviceType>*>(list);
   d_numneigh = k_list->d_numneigh;
   d_neighbors = k_list->d_neighbors;
   d_ilist = k_list->d_ilist;
   inum = list->inum;
 
   copymode = 1;
 
   int teamsize = TEAMSIZE;
 
   // allocate
   allocate_array();
 
   // get max number of neighbor
   if (!allocated_flag || update->ntimestep == neighbor->lastcall)
     allocate_matrix();
 
   // compute_H
   FixQEqReaxKokkosComputeHFunctor<DeviceType> computeH_functor(this);
   Kokkos::parallel_scan(inum,computeH_functor);
 
   // init_matvec
   FixQEqReaxKokkosMatVecFunctor<DeviceType> matvec_functor(this);
   Kokkos::parallel_for(inum,matvec_functor);
 
   // comm->forward_comm_fix(this); //Dist_vector( s );
   pack_flag = 2;
   k_s.template modify<DeviceType>();
   k_s.template sync<LMPHostType>();
   comm->forward_comm_fix(this);
   k_s.template modify<LMPHostType>();
   k_s.template sync<DeviceType>();
 
   // comm->forward_comm_fix(this); //Dist_vector( t );
   pack_flag = 3;
   k_t.template modify<DeviceType>();
   k_t.template sync<LMPHostType>();
   comm->forward_comm_fix(this);
   k_t.template modify<LMPHostType>();
   k_t.template sync<DeviceType>();
 
   // 1st cg solve over b_s, s
   cg_solve1();
 
   // 2nd cg solve over b_t, t
   cg_solve2();
 
   // calculate_Q();
   calculate_q();
 
   copymode = 0;
 
   if (!allocated_flag)
     allocated_flag = 1;
 }
 
 /* ---------------------------------------------------------------------- */
 
 template<class DeviceType>
 KOKKOS_INLINE_FUNCTION
 void FixQEqReaxKokkos<DeviceType>::num_neigh_item(int ii, int &maxneigh) const
 {
   const int i = d_ilist[ii];
   maxneigh += d_numneigh[i];
 }
 
 /* ---------------------------------------------------------------------- */
 
 template<class DeviceType>
 void FixQEqReaxKokkos<DeviceType>::allocate_matrix()
 {
   int i,ii,m;
   const int inum = list->inum;
 
   nmax = atom->nmax;
 
   // determine the total space for the H matrix
 
   m_cap = 0;
   FixQEqReaxKokkosNumNeighFunctor<DeviceType> neigh_functor(this);
   Kokkos::parallel_reduce(inum,neigh_functor,m_cap);
 
   d_firstnbr = typename AT::t_int_1d("qeq/kk:firstnbr",nmax);
   d_numnbrs = typename AT::t_int_1d("qeq/kk:numnbrs",nmax);
   d_jlist = typename AT::t_int_1d("qeq/kk:jlist",m_cap);
   d_val = typename AT::t_ffloat_1d("qeq/kk:val",m_cap);
 }
 
 /* ---------------------------------------------------------------------- */
 
 template<class DeviceType>
 void FixQEqReaxKokkos<DeviceType>::allocate_array()
 {
   if (atom->nmax > nmax) {
     nmax = atom->nmax;
 
     k_o = DAT::tdual_ffloat_1d("qeq/kk:h_o",nmax);
     d_o = k_o.template view<DeviceType>();
     h_o = k_o.h_view;
 
     d_Hdia_inv = typename AT::t_ffloat_1d("qeq/kk:h_Hdia_inv",nmax);
 
     d_b_s = typename AT::t_ffloat_1d("qeq/kk:h_b_s",nmax);
 
     d_b_t = typename AT::t_ffloat_1d("qeq/kk:h_b_t",nmax);
 
     k_s = DAT::tdual_ffloat_1d("qeq/kk:h_s",nmax);
     d_s = k_s.template view<DeviceType>();
     h_s = k_s.h_view;
 
     k_t = DAT::tdual_ffloat_1d("qeq/kk:h_t",nmax);
     d_t = k_t.template view<DeviceType>();
     h_t = k_t.h_view;
 
     d_p = typename AT::t_ffloat_1d("qeq/kk:h_p",nmax);
 
     d_r = typename AT::t_ffloat_1d("qeq/kk:h_r",nmax);
 
     k_d = DAT::tdual_ffloat_1d("qeq/kk:h_d",nmax);
     d_d = k_d.template view<DeviceType>();
     h_d = k_d.h_view;
 
-    k_s_hist = DAT::tdual_ffloat_2d("qeq/kk:s_hist",nmax,5);
+    k_s_hist = DAT::tdual_ffloat_2d("qeq/kk:s_hist",nmax,nprev);
     d_s_hist = k_s_hist.template view<DeviceType>();
     h_s_hist = k_s_hist.h_view;
 
-    k_t_hist = DAT::tdual_ffloat_2d("qeq/kk:t_hist",nmax,5);
+    k_t_hist = DAT::tdual_ffloat_2d("qeq/kk:t_hist",nmax,nprev);
     d_t_hist = k_t_hist.template view<DeviceType>();
     h_t_hist = k_t_hist.h_view;
   }
 
   // init_storage
   const int ignum = atom->nlocal + atom->nghost;
   FixQEqReaxKokkosZeroFunctor<DeviceType> zero_functor(this);
   Kokkos::parallel_for(ignum,zero_functor);
 
 }
 /* ---------------------------------------------------------------------- */
 
 template<class DeviceType>
 KOKKOS_INLINE_FUNCTION
 void FixQEqReaxKokkos<DeviceType>::zero_item(int ii) const
 {
   const int i = d_ilist[ii];
   const int itype = type(i);
 
   if (mask[i] & groupbit) {
     d_Hdia_inv[i] = 1.0 / params(itype).eta;
     d_b_s[i] = -params(itype).chi;
     d_b_t[i] = -1.0;
     d_s[i] = 0.0;
     d_t[i] = 0.0;
     d_p[i] = 0.0;
     d_o[i] = 0.0;
     d_r[i] = 0.0;
     d_d[i] = 0.0;
-    //for( int j = 0; j < 5; j++ )
+    //for( int j = 0; j < nprev; j++ )
       //d_s_hist(i,j) = d_t_hist(i,j) = 0.0;
   }
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 template<class DeviceType>
 KOKKOS_INLINE_FUNCTION
 void FixQEqReaxKokkos<DeviceType>::compute_h_item(int ii, int &m_fill, const bool &final) const
 {
   const int i = d_ilist[ii];
   int j,jj,jtype,flag;
 
   if (mask[i] & groupbit) {
 
     const X_FLOAT xtmp = x(i,0);
     const X_FLOAT ytmp = x(i,1);
     const X_FLOAT ztmp = x(i,2);
     const int itype = type(i);
     const tagint itag = tag(i);
     const int jnum = d_numneigh[i];
     if (final)
       d_firstnbr[i] = m_fill;
 
     for (jj = 0; jj < jnum; jj++) {
       j = d_neighbors(i,jj);
       j &= NEIGHMASK;
       jtype = type(j);
 
       const X_FLOAT delx = x(j,0) - xtmp;
       const X_FLOAT dely = x(j,1) - ytmp;
       const X_FLOAT delz = x(j,2) - ztmp;
 
       if (neighflag != FULL) {
         const tagint jtag = tag(j);
         flag = 0;
         if (j < nlocal) flag = 1;
         else if (itag < jtag) flag = 1;
         else if (itag == jtag) {
           if (delz > SMALL) flag = 1;
           else if (fabs(delz) < SMALL) {
             if (dely > SMALL) flag = 1;
             else if (fabs(dely) < SMALL && delx > SMALL)
               flag = 1;
           }
         }
         if (!flag) continue;
       }
 
       const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
       if (rsq > cutsq) continue;
 
       if (final) {
         const F_FLOAT r = sqrt(rsq);
         d_jlist(m_fill) = j;
         const F_FLOAT shldij = d_shield(itype,jtype);
         d_val(m_fill) = calculate_H_k(r,shldij);
       }
       m_fill++;
     }
     if (final)
       d_numnbrs[i] = m_fill - d_firstnbr[i];
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 template<class DeviceType>
 KOKKOS_INLINE_FUNCTION
 double FixQEqReaxKokkos<DeviceType>::calculate_H_k(const F_FLOAT &r, const F_FLOAT &shld) const
 {
   F_FLOAT taper, denom;
 
   taper = d_tap[7] * r + d_tap[6];
   taper = taper * r + d_tap[5];
   taper = taper * r + d_tap[4];
   taper = taper * r + d_tap[3];
   taper = taper * r + d_tap[2];
   taper = taper * r + d_tap[1];
   taper = taper * r + d_tap[0];
 
   denom = r * r * r + shld;
   denom = pow(denom,0.3333333333333);
 
   return taper * EV_TO_KCAL_PER_MOL / denom;
 }
 
 /* ---------------------------------------------------------------------- */
 
 template<class DeviceType>
 KOKKOS_INLINE_FUNCTION
 void FixQEqReaxKokkos<DeviceType>::mat_vec_item(int ii) const
 {
   const int i = d_ilist[ii];
   const int itype = type(i);
 
   if (mask[i] & groupbit) {
     d_Hdia_inv[i] = 1.0 / params(itype).eta;
     d_b_s[i] = -params(itype).chi;
     d_b_t[i] = -1.0;
     d_t[i] = d_t_hist(i,2) + 3*(d_t_hist(i,0) - d_t_hist(i,1));
     d_s[i] = 4*(d_s_hist(i,0)+d_s_hist(i,2))-(6*d_s_hist(i,1)+d_s_hist(i,3));
   }
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 template<class DeviceType>
 void FixQEqReaxKokkos<DeviceType>::cg_solve1()
 // b = b_s, x = s;
 {
   const int inum = list->inum;
   const int ignum = inum + list->gnum;
   F_FLOAT tmp, sig_old, b_norm;
 
   const int teamsize = TEAMSIZE;
 
   // sparse_matvec( &H, x, q );
   FixQEqReaxKokkosSparse12Functor<DeviceType> sparse12_functor(this);
   Kokkos::parallel_for(inum,sparse12_functor);
   if (neighflag != FULL) {
     Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagZeroQGhosts>(nlocal,nlocal+atom->nghost),*this);
     if (neighflag == HALF) {
       FixQEqReaxKokkosSparse13Functor<DeviceType,HALF> sparse13_functor(this);
       Kokkos::parallel_for(inum,sparse13_functor);
     } else {
       FixQEqReaxKokkosSparse13Functor<DeviceType,HALFTHREAD> sparse13_functor(this);
       Kokkos::parallel_for(inum,sparse13_functor);
     }
   } else {
     Kokkos::parallel_for(Kokkos::TeamPolicy <DeviceType, TagSparseMatvec1> (inum, teamsize), *this);
   }
 
   if (neighflag != FULL) {
     k_o.template modify<DeviceType>();
     k_o.template sync<LMPHostType>();
     comm->reverse_comm_fix(this); //Coll_vector( q );
     k_o.template modify<LMPHostType>();
     k_o.template sync<DeviceType>();
   }
 
   // vector_sum( r , 1.,  b, -1., q, nn );
   // preconditioning: d[j] = r[j] * Hdia_inv[j];
   // b_norm = parallel_norm( b, nn );
   F_FLOAT my_norm = 0.0;
   FixQEqReaxKokkosNorm1Functor<DeviceType> norm1_functor(this);
   Kokkos::parallel_reduce(inum,norm1_functor,my_norm);
   F_FLOAT norm_sqr = 0.0;
   MPI_Allreduce( &my_norm, &norm_sqr, 1, MPI_DOUBLE, MPI_SUM, world );
   b_norm = sqrt(norm_sqr);
 
   // sig_new = parallel_dot( r, d, nn);
   F_FLOAT my_dot = 0.0;
   FixQEqReaxKokkosDot1Functor<DeviceType> dot1_functor(this);
   Kokkos::parallel_reduce(inum,dot1_functor,my_dot);
   F_FLOAT dot_sqr = 0.0;
   MPI_Allreduce( &my_dot, &dot_sqr, 1, MPI_DOUBLE, MPI_SUM, world );
   F_FLOAT sig_new = dot_sqr;
 
   int loop;
   const int loopmax = 200;
   for (loop = 1; loop < loopmax & sqrt(sig_new)/b_norm > tolerance; loop++) {
 
     // comm->forward_comm_fix(this); //Dist_vector( d );
     pack_flag = 1;
     k_d.template modify<DeviceType>();
     k_d.template sync<LMPHostType>();
     comm->forward_comm_fix(this);
     k_d.template modify<LMPHostType>();
     k_d.template sync<DeviceType>();
 
     // sparse_matvec( &H, d, q );
     FixQEqReaxKokkosSparse22Functor<DeviceType> sparse22_functor(this);
     Kokkos::parallel_for(inum,sparse22_functor);
     if (neighflag != FULL) {
       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagZeroQGhosts>(nlocal,nlocal+atom->nghost),*this);
       if (neighflag == HALF) {
         FixQEqReaxKokkosSparse23Functor<DeviceType,HALF> sparse23_functor(this);
         Kokkos::parallel_for(inum,sparse23_functor);
       } else {
         FixQEqReaxKokkosSparse23Functor<DeviceType,HALFTHREAD> sparse23_functor(this);
         Kokkos::parallel_for(inum,sparse23_functor);
       }
     } else {
       Kokkos::parallel_for(Kokkos::TeamPolicy <DeviceType, TagSparseMatvec2> (inum, teamsize), *this);
     }
 
 
     if (neighflag != FULL) {
       k_o.template modify<DeviceType>();
       k_o.template sync<LMPHostType>();
       comm->reverse_comm_fix(this); //Coll_vector( q );
       k_o.template modify<LMPHostType>();
       k_o.template sync<DeviceType>();
     }
 
     // tmp = parallel_dot( d, q, nn);
     my_dot = dot_sqr = 0.0;
     FixQEqReaxKokkosDot2Functor<DeviceType> dot2_functor(this);
     Kokkos::parallel_reduce(inum,dot2_functor,my_dot);
     MPI_Allreduce( &my_dot, &dot_sqr, 1, MPI_DOUBLE, MPI_SUM, world );
     tmp = dot_sqr;
 
     alpha = sig_new / tmp;
 
     sig_old = sig_new;
 
     // vector_add( s, alpha, d, nn );
     // vector_add( r, -alpha, q, nn );
     my_dot = dot_sqr = 0.0;
     FixQEqReaxKokkosPrecon1Functor<DeviceType> precon1_functor(this);
     Kokkos::parallel_for(inum,precon1_functor);
     // preconditioning: p[j] = r[j] * Hdia_inv[j];
     // sig_new = parallel_dot( r, p, nn);
     FixQEqReaxKokkosPreconFunctor<DeviceType> precon_functor(this);
     Kokkos::parallel_reduce(inum,precon_functor,my_dot);
     MPI_Allreduce( &my_dot, &dot_sqr, 1, MPI_DOUBLE, MPI_SUM, world );
     sig_new = dot_sqr;
 
     beta = sig_new / sig_old;
 
     // vector_sum( d, 1., p, beta, d, nn );
     FixQEqReaxKokkosVecSum2Functor<DeviceType> vecsum2_functor(this);
     Kokkos::parallel_for(inum,vecsum2_functor);
   }
 
   if (loop >= loopmax && comm->me == 0) {
     char str[128];
     sprintf(str,"Fix qeq/reax cg_solve1 convergence failed after %d iterations "
             "at " BIGINT_FORMAT " step: %f",loop,update->ntimestep,sqrt(sig_new)/b_norm);
     error->warning(FLERR,str);
     //error->all(FLERR,str);
   }
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 template<class DeviceType>
 void FixQEqReaxKokkos<DeviceType>::cg_solve2()
 // b = b_t, x = t;
 {
   const int inum = list->inum;
   const int ignum = inum + list->gnum;
   F_FLOAT tmp, sig_old, b_norm;
 
   const int teamsize = TEAMSIZE;
 
   // sparse_matvec( &H, x, q );
   FixQEqReaxKokkosSparse32Functor<DeviceType> sparse32_functor(this);
   Kokkos::parallel_for(inum,sparse32_functor);
   if (neighflag != FULL) {
     Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagZeroQGhosts>(nlocal,nlocal+atom->nghost),*this);
     if (neighflag == HALF) {
       FixQEqReaxKokkosSparse33Functor<DeviceType,HALF> sparse33_functor(this);
       Kokkos::parallel_for(inum,sparse33_functor);
     } else {
       FixQEqReaxKokkosSparse33Functor<DeviceType,HALFTHREAD> sparse33_functor(this);
       Kokkos::parallel_for(inum,sparse33_functor);
     }
   } else {
     Kokkos::parallel_for(Kokkos::TeamPolicy <DeviceType, TagSparseMatvec3> (inum, teamsize), *this);
   }
 
   if (neighflag != FULL) {
     k_o.template modify<DeviceType>();
     k_o.template sync<LMPHostType>();
     comm->reverse_comm_fix(this); //Coll_vector( q );
     k_o.template modify<LMPHostType>();
     k_o.template sync<DeviceType>();
   }
 
   // vector_sum( r , 1.,  b, -1., q, nn );
   // preconditioning: d[j] = r[j] * Hdia_inv[j];
   // b_norm = parallel_norm( b, nn );
   F_FLOAT my_norm = 0.0;
   FixQEqReaxKokkosNorm2Functor<DeviceType> norm2_functor(this);
   Kokkos::parallel_reduce(inum,norm2_functor,my_norm);
   F_FLOAT norm_sqr = 0.0;
   MPI_Allreduce( &my_norm, &norm_sqr, 1, MPI_DOUBLE, MPI_SUM, world );
   b_norm = sqrt(norm_sqr);
 
   // sig_new = parallel_dot( r, d, nn);
   F_FLOAT my_dot = 0.0;
   FixQEqReaxKokkosDot1Functor<DeviceType> dot1_functor(this);
   Kokkos::parallel_reduce(inum,dot1_functor,my_dot);
   F_FLOAT dot_sqr = 0.0;
   MPI_Allreduce( &my_dot, &dot_sqr, 1, MPI_DOUBLE, MPI_SUM, world );
   F_FLOAT sig_new = dot_sqr;
 
   int loop;
   const int loopmax = 200;
   for (loop = 1; loop < loopmax & sqrt(sig_new)/b_norm > tolerance; loop++) {
 
     // comm->forward_comm_fix(this); //Dist_vector( d );
     pack_flag = 1;
     k_d.template modify<DeviceType>();
     k_d.template sync<LMPHostType>();
     comm->forward_comm_fix(this);
     k_d.template modify<LMPHostType>();
     k_d.template sync<DeviceType>();
 
     // sparse_matvec( &H, d, q );
     FixQEqReaxKokkosSparse22Functor<DeviceType> sparse22_functor(this);
     Kokkos::parallel_for(inum,sparse22_functor);
     if (neighflag != FULL) {
       Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagZeroQGhosts>(nlocal,nlocal+atom->nghost),*this);
       if (neighflag == HALF) {
         FixQEqReaxKokkosSparse23Functor<DeviceType,HALF> sparse23_functor(this);
         Kokkos::parallel_for(inum,sparse23_functor);
       } else {
         FixQEqReaxKokkosSparse23Functor<DeviceType,HALFTHREAD> sparse23_functor(this);
         Kokkos::parallel_for(inum,sparse23_functor);
       }
     } else {
       Kokkos::parallel_for(Kokkos::TeamPolicy <DeviceType, TagSparseMatvec2> (inum, teamsize), *this);
     }
 
     if (neighflag != FULL) {
       k_o.template modify<DeviceType>();
       k_o.template sync<LMPHostType>();
       comm->reverse_comm_fix(this); //Coll_vector( q );
       k_o.template modify<LMPHostType>();
       k_o.template sync<DeviceType>();
     }
 
     // tmp = parallel_dot( d, q, nn);
     my_dot = dot_sqr = 0.0;
     FixQEqReaxKokkosDot2Functor<DeviceType> dot2_functor(this);
     Kokkos::parallel_reduce(inum,dot2_functor,my_dot);
     MPI_Allreduce( &my_dot, &dot_sqr, 1, MPI_DOUBLE, MPI_SUM, world );
     tmp = dot_sqr;
 
     alpha = sig_new / tmp;
 
     sig_old = sig_new;
 
     // vector_add( t, alpha, d, nn );
     // vector_add( r, -alpha, q, nn );
     my_dot = dot_sqr = 0.0;
     FixQEqReaxKokkosPrecon2Functor<DeviceType> precon2_functor(this);
     Kokkos::parallel_for(inum,precon2_functor);
     // preconditioning: p[j] = r[j] * Hdia_inv[j];
     // sig_new = parallel_dot( r, p, nn);
     FixQEqReaxKokkosPreconFunctor<DeviceType> precon_functor(this);
     Kokkos::parallel_reduce(inum,precon_functor,my_dot);
     MPI_Allreduce( &my_dot, &dot_sqr, 1, MPI_DOUBLE, MPI_SUM, world );
     sig_new = dot_sqr;
 
     beta = sig_new / sig_old;
 
     // vector_sum( d, 1., p, beta, d, nn );
     FixQEqReaxKokkosVecSum2Functor<DeviceType> vecsum2_functor(this);
     Kokkos::parallel_for(inum,vecsum2_functor);
   }
 
   if (loop >= loopmax && comm->me == 0) {
     char str[128];
     sprintf(str,"Fix qeq/reax cg_solve2 convergence failed after %d iterations "
             "at " BIGINT_FORMAT " step: %f",loop,update->ntimestep,sqrt(sig_new)/b_norm);
     error->warning(FLERR,str);
     //error->all(FLERR,str);
   }
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 template<class DeviceType>
 void FixQEqReaxKokkos<DeviceType>::calculate_q()
 {
   F_FLOAT sum, sum_all;
   const int inum = list->inum;
 
   // s_sum = parallel_vector_acc( s, nn );
   sum = sum_all = 0.0;
   FixQEqReaxKokkosVecAcc1Functor<DeviceType> vecacc1_functor(this);
   Kokkos::parallel_reduce(inum,vecacc1_functor,sum);
   MPI_Allreduce(&sum, &sum_all, 1, MPI_DOUBLE, MPI_SUM, world );
   const F_FLOAT s_sum = sum_all;
 
   // t_sum = parallel_vector_acc( t, nn);
   sum = sum_all = 0.0;
   FixQEqReaxKokkosVecAcc2Functor<DeviceType> vecacc2_functor(this);
   Kokkos::parallel_reduce(inum,vecacc2_functor,sum);
   MPI_Allreduce(&sum, &sum_all, 1, MPI_DOUBLE, MPI_SUM, world );
   const F_FLOAT t_sum = sum_all;
 
   // u = s_sum / t_sum;
   delta = s_sum/t_sum;
 
   // q[i] = s[i] - u * t[i];
   FixQEqReaxKokkosCalculateQFunctor<DeviceType> calculateQ_functor(this);
   Kokkos::parallel_for(inum,calculateQ_functor);
 
   pack_flag = 4;
   //comm->forward_comm_fix( this ); //Dist_vector( atom->q );
   atomKK->k_q.modify<DeviceType>();
   atomKK->k_q.sync<LMPHostType>();
   comm->forward_comm_fix(this);
   atomKK->k_q.modify<LMPHostType>();
   atomKK->k_q.sync<DeviceType>();
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 template<class DeviceType>
 KOKKOS_INLINE_FUNCTION
 void FixQEqReaxKokkos<DeviceType>::sparse12_item(int ii) const
 {
   const int i = d_ilist[ii];
   const int itype = type(i);
   if (mask[i] & groupbit) {
     d_o[i] = params(itype).eta * d_s[i];
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 template<class DeviceType>
 template<int NEIGHFLAG>
 KOKKOS_INLINE_FUNCTION
 void FixQEqReaxKokkos<DeviceType>::sparse13_item(int ii) const
 {
   // The q array is atomic for Half/Thread neighbor style
   Kokkos::View<F_FLOAT*, typename DAT::t_float_1d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_o = d_o;
 
   const int i = d_ilist[ii];
   if (mask[i] & groupbit) {
     F_FLOAT tmp = 0.0;
     for(int jj = d_firstnbr[i]; jj < d_firstnbr[i] + d_numnbrs[i]; jj++) {
       const int j = d_jlist(jj);
       tmp += d_val(jj) * d_s[j];
       a_o[j] += d_val(jj) * d_s[i];
     }
     a_o[i] += tmp;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 template<class DeviceType>
 KOKKOS_INLINE_FUNCTION
 void FixQEqReaxKokkos<DeviceType>::operator() (TagSparseMatvec1, const membertype1 &team) const
 {
   const int i = d_ilist[team.league_rank()];
   if (mask[i] & groupbit) {
     F_FLOAT doitmp;
     Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team, d_firstnbr[i], d_firstnbr[i] + d_numnbrs[i]), [&] (const int &jj, F_FLOAT &doi) {
       const int j = d_jlist(jj);
       doi += d_val(jj) * d_s[j];
     }, doitmp);
     Kokkos::single(Kokkos::PerTeam(team), [&] () {d_o[i] += doitmp;});
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 template<class DeviceType>
 KOKKOS_INLINE_FUNCTION
 void FixQEqReaxKokkos<DeviceType>::sparse22_item(int ii) const
 {
   const int i = d_ilist[ii];
   const int itype = type(i);
   if (mask[i] & groupbit) {
     d_o[i] = params(itype).eta * d_d[i];
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 template<class DeviceType>
 template<int NEIGHFLAG>
 KOKKOS_INLINE_FUNCTION
 void FixQEqReaxKokkos<DeviceType>::sparse23_item(int ii) const
 {
   // The q array is atomic for Half/Thread neighbor style
   Kokkos::View<F_FLOAT*, typename DAT::t_float_1d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_o = d_o;
 
   const int i = d_ilist[ii];
   if (mask[i] & groupbit) {
     F_FLOAT tmp = 0.0;
     for(int jj = d_firstnbr[i]; jj < d_firstnbr[i] + d_numnbrs[i]; jj++) {
       const int j = d_jlist(jj);
       tmp += d_val(jj) * d_d[j];
       a_o[j] += d_val(jj) * d_d[i];
     }
     a_o[i] += tmp;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 template<class DeviceType>
 KOKKOS_INLINE_FUNCTION
 void FixQEqReaxKokkos<DeviceType>::operator() (TagSparseMatvec2, const membertype2 &team) const
 {
   const int i = d_ilist[team.league_rank()];
   if (mask[i] & groupbit) {
     F_FLOAT doitmp;
     Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team, d_firstnbr[i], d_firstnbr[i] + d_numnbrs[i]), [&] (const int &jj, F_FLOAT &doi) {
       const int j = d_jlist(jj);
       doi += d_val(jj) * d_d[j];
     }, doitmp);
     Kokkos::single(Kokkos::PerTeam(team), [&] () {d_o[i] += doitmp; });
   }
 }
 
 template<class DeviceType>
 KOKKOS_INLINE_FUNCTION
 void FixQEqReaxKokkos<DeviceType>::operator() (TagZeroQGhosts, const int &i) const
 {
   if (mask[i] & groupbit)
     d_o[i] = 0.0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 template<class DeviceType>
 KOKKOS_INLINE_FUNCTION
 void FixQEqReaxKokkos<DeviceType>::sparse32_item(int ii) const
 {
   const int i = d_ilist[ii];
   const int itype = type(i);
   if (mask[i] & groupbit)
     d_o[i] = params(itype).eta * d_t[i];
 }
 
 /* ---------------------------------------------------------------------- */
 
 template<class DeviceType>
 template<int NEIGHFLAG>
 KOKKOS_INLINE_FUNCTION
 void FixQEqReaxKokkos<DeviceType>::sparse33_item(int ii) const
 {
   // The q array is atomic for Half/Thread neighbor style
   Kokkos::View<F_FLOAT*, typename DAT::t_float_1d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_o = d_o;
 
   const int i = d_ilist[ii];
   if (mask[i] & groupbit) {
     F_FLOAT tmp = 0.0;
     for(int jj = d_firstnbr[i]; jj < d_firstnbr[i] + d_numnbrs[i]; jj++) {
       const int j = d_jlist(jj);
       tmp += d_val(jj) * d_t[j];
       a_o[j] += d_val(jj) * d_t[i];
     }
     a_o[i] += tmp;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 template<class DeviceType>
 KOKKOS_INLINE_FUNCTION
 void FixQEqReaxKokkos<DeviceType>::operator() (TagSparseMatvec3, const membertype3 &team) const
 {
   const int i = d_ilist[team.league_rank()];
   if (mask[i] & groupbit) {
     F_FLOAT doitmp;
     Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team, d_firstnbr[i], d_firstnbr[i] + d_numnbrs[i]), [&] (const int &jj, F_FLOAT &doi) {
       const int j = d_jlist(jj);
       doi += d_val(jj) * d_t[j];
     }, doitmp);
     Kokkos::single(Kokkos::PerTeam(team), [&] () {d_o[i] += doitmp;});
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 template<class DeviceType>
 KOKKOS_INLINE_FUNCTION
 void FixQEqReaxKokkos<DeviceType>::vecsum2_item(int ii) const
 {
   const int i = d_ilist[ii];
   if (mask[i] & groupbit)
     d_d[i] = 1.0 * d_p[i] + beta * d_d[i];
 }
 
 /* ---------------------------------------------------------------------- */
 
 template<class DeviceType>
 KOKKOS_INLINE_FUNCTION
 double FixQEqReaxKokkos<DeviceType>::norm1_item(int ii) const
 {
   F_FLOAT tmp = 0;
   const int i = d_ilist[ii];
   if (mask[i] & groupbit) {
     d_r[i] = 1.0*d_b_s[i] + -1.0*d_o[i];
     d_d[i] = d_r[i] * d_Hdia_inv[i];
     tmp = d_b_s[i] * d_b_s[i];
   }
   return tmp;
 }
 
 /* ---------------------------------------------------------------------- */
 
 template<class DeviceType>
 KOKKOS_INLINE_FUNCTION
 double FixQEqReaxKokkos<DeviceType>::norm2_item(int ii) const
 {
   F_FLOAT tmp = 0;
   const int i = d_ilist[ii];
   if (mask[i] & groupbit) {
     d_r[i] = 1.0*d_b_t[i] + -1.0*d_o[i];
     d_d[i] = d_r[i] * d_Hdia_inv[i];
     tmp = d_b_t[i] * d_b_t[i];
   }
   return tmp;
 }
 
 /* ---------------------------------------------------------------------- */
 
 template<class DeviceType>
 KOKKOS_INLINE_FUNCTION
 double FixQEqReaxKokkos<DeviceType>::dot1_item(int ii) const
 {
   F_FLOAT tmp = 0.0;
   const int i = d_ilist[ii];
   if (mask[i] & groupbit)
     tmp = d_r[i] * d_d[i];
   return tmp;
 }
 
 /* ---------------------------------------------------------------------- */
 
 template<class DeviceType>
 KOKKOS_INLINE_FUNCTION
 double FixQEqReaxKokkos<DeviceType>::dot2_item(int ii) const
 {
   double tmp = 0.0;
   const int i = d_ilist[ii];
   if (mask[i] & groupbit) {
     tmp = d_d[i] * d_o[i];
   }
   return tmp;
 }
 
 /* ---------------------------------------------------------------------- */
 
 template<class DeviceType>
 KOKKOS_INLINE_FUNCTION
 void FixQEqReaxKokkos<DeviceType>::precon1_item(int ii) const
 {
   const int i = d_ilist[ii];
   if (mask[i] & groupbit) {
     d_s[i] += alpha * d_d[i];
     d_r[i] += -alpha * d_o[i];
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 template<class DeviceType>
 KOKKOS_INLINE_FUNCTION
 void FixQEqReaxKokkos<DeviceType>::precon2_item(int ii) const
 {
   const int i = d_ilist[ii];
   if (mask[i] & groupbit) {
     d_t[i] += alpha * d_d[i];
     d_r[i] += -alpha * d_o[i];
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 template<class DeviceType>
 KOKKOS_INLINE_FUNCTION
 double FixQEqReaxKokkos<DeviceType>::precon_item(int ii) const
 {
   F_FLOAT tmp = 0.0;
   const int i = d_ilist[ii];
   if (mask[i] & groupbit) {
     d_p[i] = d_r[i] * d_Hdia_inv[i];
     tmp = d_r[i] * d_p[i];
   }
   return tmp;
 }
 
 /* ---------------------------------------------------------------------- */
 
 template<class DeviceType>
 KOKKOS_INLINE_FUNCTION
 double FixQEqReaxKokkos<DeviceType>::vecacc1_item(int ii) const
 {
   F_FLOAT tmp = 0.0;
   const int i = d_ilist[ii];
   if (mask[i] & groupbit)
     tmp = d_s[i];
   return tmp;
 }
 
 /* ---------------------------------------------------------------------- */
 
 template<class DeviceType>
 KOKKOS_INLINE_FUNCTION
 double FixQEqReaxKokkos<DeviceType>::vecacc2_item(int ii) const
 {
   F_FLOAT tmp = 0.0;
   const int i = d_ilist[ii];
   if (mask[i] & groupbit) {
     tmp = d_t[i];
   }
   return tmp;
 }
 
 /* ---------------------------------------------------------------------- */
 
 template<class DeviceType>
 KOKKOS_INLINE_FUNCTION
 void FixQEqReaxKokkos<DeviceType>::calculate_q_item(int ii) const
 {
   const int i = d_ilist[ii];
   if (mask[i] & groupbit) {
     q(i) = d_s[i] - delta * d_t[i];
 
     for (int k = 4; k > 0; --k) {
       d_s_hist(i,k) = d_s_hist(i,k-1);
       d_t_hist(i,k) = d_t_hist(i,k-1);
     }
     d_s_hist(i,0) = d_s[i];
     d_t_hist(i,0) = d_t[i];
   }
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 template<class DeviceType>
 int FixQEqReaxKokkos<DeviceType>::pack_forward_comm(int n, int *list, double *buf,
                                int pbc_flag, int *pbc)
 {
   int m;
 
   if( pack_flag == 1)
     for(m = 0; m < n; m++) buf[m] = h_d[list[m]];
   else if( pack_flag == 2 )
     for(m = 0; m < n; m++) buf[m] = h_s[list[m]];
   else if( pack_flag == 3 )
     for(m = 0; m < n; m++) buf[m] = h_t[list[m]];
   else if( pack_flag == 4 )
     for(m = 0; m < n; m++) buf[m] = atom->q[list[m]];
 
   return n;
 }
 
 /* ---------------------------------------------------------------------- */
 
 template<class DeviceType>
 void FixQEqReaxKokkos<DeviceType>::unpack_forward_comm(int n, int first, double *buf)
 {
   int i, m;
 
   if( pack_flag == 1)
     for(m = 0, i = first; m < n; m++, i++) h_d[i] = buf[m];
   else if( pack_flag == 2)
     for(m = 0, i = first; m < n; m++, i++) h_s[i] = buf[m];
   else if( pack_flag == 3)
     for(m = 0, i = first; m < n; m++, i++) h_t[i] = buf[m];
   else if( pack_flag == 4)
     for(m = 0, i = first; m < n; m++, i++) atom->q[i] = buf[m];
 }
 
 /* ---------------------------------------------------------------------- */
 
 template<class DeviceType>
 int FixQEqReaxKokkos<DeviceType>::pack_reverse_comm(int n, int first, double *buf)
 {
   int i, m;
   for(m = 0, i = first; m < n; m++, i++) {
     buf[m] = h_o[i];
   }
   return n;
 }
 
 /* ---------------------------------------------------------------------- */
 
 template<class DeviceType>
 void FixQEqReaxKokkos<DeviceType>::unpack_reverse_comm(int n, int *list, double *buf)
 {
   for(int m = 0; m < n; m++) {
     h_o[list[m]] += buf[m];
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 template<class DeviceType>
 void FixQEqReaxKokkos<DeviceType>::cleanup_copy()
 {
   id = style = NULL;
 }
 
 /* ----------------------------------------------------------------------
    memory usage of local atom-based arrays
 ------------------------------------------------------------------------- */
 
 template<class DeviceType>
 double FixQEqReaxKokkos<DeviceType>::memory_usage()
 {
   double bytes;
 
-  bytes = atom->nmax*5*2 * sizeof(F_FLOAT); // s_hist & t_hist
+  bytes = atom->nmax*nprev*2 * sizeof(F_FLOAT); // s_hist & t_hist
   bytes += atom->nmax*8 * sizeof(F_FLOAT); // storage
   bytes += n_cap*2 * sizeof(int); // matrix...
   bytes += m_cap * sizeof(int);
   bytes += m_cap * sizeof(F_FLOAT);
 
   return bytes;
 }
 
 /* ---------------------------------------------------------------------- */\
 
 namespace LAMMPS_NS {
 template class FixQEqReaxKokkos<LMPDeviceType>;
 #ifdef KOKKOS_HAVE_CUDA
 template class FixQEqReaxKokkos<LMPHostType>;
 #endif
 }
diff --git a/src/USER-REAXC/fix_qeq_reax.cpp b/src/USER-REAXC/fix_qeq_reax.cpp
index 9d165f3fd..33b70c972 100644
--- a/src/USER-REAXC/fix_qeq_reax.cpp
+++ b/src/USER-REAXC/fix_qeq_reax.cpp
@@ -1,1106 +1,1106 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Hasan Metin Aktulga, Purdue University
    (now at Lawrence Berkeley National Laboratory, hmaktulga@lbl.gov)
 
      Hybrid and sub-group capabilities: Ray Shan (Sandia)
 ------------------------------------------------------------------------- */
 
 #include <math.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include "fix_qeq_reax.h"
 #include "pair_reaxc.h"
 #include "atom.h"
 #include "comm.h"
 #include "domain.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "update.h"
 #include "force.h"
 #include "group.h"
 #include "pair.h"
 #include "respa.h"
 #include "memory.h"
 #include "citeme.h"
 #include "error.h"
 #include "reaxc_defs.h"
 
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
 #define EV_TO_KCAL_PER_MOL 14.4
 //#define DANGER_ZONE     0.95
 //#define LOOSE_ZONE      0.7
 #define SQR(x) ((x)*(x))
 #define CUBE(x) ((x)*(x)*(x))
 #define MIN_NBRS 100
 
 static const char cite_fix_qeq_reax[] =
   "fix qeq/reax command:\n\n"
   "@Article{Aktulga12,\n"
   " author = {H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama},\n"
   " title = {Parallel reactive molecular dynamics: Numerical methods and algorithmic techniques},\n"
   " journal = {Parallel Computing},\n"
   " year =    2012,\n"
   " volume =  38,\n"
   " pages =   {245--259}\n"
   "}\n\n";
 
 /* ---------------------------------------------------------------------- */
 
 FixQEqReax::FixQEqReax(LAMMPS *lmp, int narg, char **arg) :
   Fix(lmp, narg, arg), pertype_option(NULL)
 {
   if (lmp->citeme) lmp->citeme->add(cite_fix_qeq_reax);
 
   if (narg<8 || narg>9) error->all(FLERR,"Illegal fix qeq/reax command");
 
   nevery = force->inumeric(FLERR,arg[3]);
   if (nevery <= 0) error->all(FLERR,"Illegal fix qeq/reax command");
 
   swa = force->numeric(FLERR,arg[4]);
   swb = force->numeric(FLERR,arg[5]);
   tolerance = force->numeric(FLERR,arg[6]);
   int len = strlen(arg[7]) + 1;
   pertype_option = new char[len];
   strcpy(pertype_option,arg[7]);
 
   // dual CG support only available for USER-OMP variant
   // check for compatibility is in Fix::post_constructor()
   dual_enabled = 0;
   if (narg == 9) {
     if (strcmp(arg[8],"dual") == 0) dual_enabled = 1;
     else error->all(FLERR,"Illegal fix qeq/reax command");
   }
   shld = NULL;
 
   n = n_cap = 0;
   N = nmax = 0;
   m_fill = m_cap = 0;
   pack_flag = 0;
   s = NULL;
   t = NULL;
-  nprev = 5;
+  nprev = 4;
 
   Hdia_inv = NULL;
   b_s = NULL;
   b_t = NULL;
   b_prc = NULL;
   b_prm = NULL;
 
   // CG
   p = NULL;
   q = NULL;
   r = NULL;
   d = NULL;
 
   // H matrix
   H.firstnbr = NULL;
   H.numnbrs = NULL;
   H.jlist = NULL;
   H.val = NULL;
 
   // dual CG support
   // Update comm sizes for this fix
   if (dual_enabled) comm_forward = comm_reverse = 2;
   else comm_forward = comm_reverse = 1;
 
   // perform initial allocation of atom-based arrays
   // register with Atom class
 
   reaxc = NULL;
   reaxc = (PairReaxC *) force->pair_match("reax/c",0);
 
   if (reaxc) {
     s_hist = t_hist = NULL;
     grow_arrays(atom->nmax);
     atom->add_callback(0);
     for (int i = 0; i < atom->nmax; i++)
       for (int j = 0; j < nprev; ++j)
         s_hist[i][j] = t_hist[i][j] = 0;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 FixQEqReax::~FixQEqReax()
 {
   if (copymode) return;
 
   delete[] pertype_option;
 
   // unregister callbacks to this fix from Atom class
 
   atom->delete_callback(id,0);
 
   memory->destroy(s_hist);
   memory->destroy(t_hist);
 
   deallocate_storage();
   deallocate_matrix();
 
   memory->destroy(shld);
 
   if (!reaxflag) {
     memory->destroy(chi);
     memory->destroy(eta);
     memory->destroy(gamma);
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqReax::post_constructor()
 {
   pertype_parameters(pertype_option);
   if (dual_enabled)
     error->all(FLERR,"Dual keyword only supported with fix qeq/reax/omp");
 }
 
 /* ---------------------------------------------------------------------- */
 
 int FixQEqReax::setmask()
 {
   int mask = 0;
   mask |= PRE_FORCE;
   mask |= PRE_FORCE_RESPA;
   mask |= MIN_PRE_FORCE;
   return mask;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqReax::pertype_parameters(char *arg)
 {
   if (strcmp(arg,"reax/c") == 0) {
     reaxflag = 1;
     Pair *pair = force->pair_match("reax/c",0);
     if (pair == NULL) error->all(FLERR,"No pair reax/c for fix qeq/reax");
 
     int tmp;
     chi = (double *) pair->extract("chi",tmp);
     eta = (double *) pair->extract("eta",tmp);
     gamma = (double *) pair->extract("gamma",tmp);
     if (chi == NULL || eta == NULL || gamma == NULL)
       error->all(FLERR,
                  "Fix qeq/reax could not extract params from pair reax/c");
     return;
   }
 
   int i,itype,ntypes;
   double v1,v2,v3;
   FILE *pf;
 
   reaxflag = 0;
   ntypes = atom->ntypes;
 
   memory->create(chi,ntypes+1,"qeq/reax:chi");
   memory->create(eta,ntypes+1,"qeq/reax:eta");
   memory->create(gamma,ntypes+1,"qeq/reax:gamma");
 
   if (comm->me == 0) {
     if ((pf = fopen(arg,"r")) == NULL)
       error->one(FLERR,"Fix qeq/reax parameter file could not be found");
 
     for (i = 1; i <= ntypes && !feof(pf); i++) {
       fscanf(pf,"%d %lg %lg %lg",&itype,&v1,&v2,&v3);
       if (itype < 1 || itype > ntypes)
         error->one(FLERR,"Fix qeq/reax invalid atom type in param file");
       chi[itype] = v1;
       eta[itype] = v2;
       gamma[itype] = v3;
     }
     if (i <= ntypes) error->one(FLERR,"Invalid param file for fix qeq/reax");
     fclose(pf);
   }
 
   MPI_Bcast(&chi[1],ntypes,MPI_DOUBLE,0,world);
   MPI_Bcast(&eta[1],ntypes,MPI_DOUBLE,0,world);
   MPI_Bcast(&gamma[1],ntypes,MPI_DOUBLE,0,world);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqReax::allocate_storage()
 {
   nmax = atom->nmax;
 
   memory->create(s,nmax,"qeq:s");
   memory->create(t,nmax,"qeq:t");
 
   memory->create(Hdia_inv,nmax,"qeq:Hdia_inv");
   memory->create(b_s,nmax,"qeq:b_s");
   memory->create(b_t,nmax,"qeq:b_t");
   memory->create(b_prc,nmax,"qeq:b_prc");
   memory->create(b_prm,nmax,"qeq:b_prm");
 
   // dual CG support
   int size = nmax;
   if (dual_enabled) size*= 2;
 
   memory->create(p,size,"qeq:p");
   memory->create(q,size,"qeq:q");
   memory->create(r,size,"qeq:r");
   memory->create(d,size,"qeq:d");
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqReax::deallocate_storage()
 {
   memory->destroy(s);
   memory->destroy(t);
 
   memory->destroy( Hdia_inv );
   memory->destroy( b_s );
   memory->destroy( b_t );
   memory->destroy( b_prc );
   memory->destroy( b_prm );
 
   memory->destroy( p );
   memory->destroy( q );
   memory->destroy( r );
   memory->destroy( d );
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqReax::reallocate_storage()
 {
   deallocate_storage();
   allocate_storage();
   init_storage();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqReax::allocate_matrix()
 {
   int i,ii,inum,m;
   int *ilist, *numneigh;
 
   int mincap;
   double safezone;
 
   if (reaxflag) {
     mincap = reaxc->system->mincap;
     safezone = reaxc->system->safezone;
   } else {
     mincap = MIN_CAP;
     safezone = SAFE_ZONE;
   }
 
   n = atom->nlocal;
   n_cap = MAX( (int)(n * safezone), mincap);
 
   // determine the total space for the H matrix
 
   if (reaxc) {
     inum = reaxc->list->inum;
     ilist = reaxc->list->ilist;
     numneigh = reaxc->list->numneigh;
   } else {
     inum = list->inum;
     ilist = list->ilist;
     numneigh = list->numneigh;
   }
 
   m = 0;
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     m += numneigh[i];
   }
   m_cap = MAX( (int)(m * safezone), mincap * MIN_NBRS);
 
   H.n = n_cap;
   H.m = m_cap;
   memory->create(H.firstnbr,n_cap,"qeq:H.firstnbr");
   memory->create(H.numnbrs,n_cap,"qeq:H.numnbrs");
   memory->create(H.jlist,m_cap,"qeq:H.jlist");
   memory->create(H.val,m_cap,"qeq:H.val");
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqReax::deallocate_matrix()
 {
   memory->destroy( H.firstnbr );
   memory->destroy( H.numnbrs );
   memory->destroy( H.jlist );
   memory->destroy( H.val );
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqReax::reallocate_matrix()
 {
   deallocate_matrix();
   allocate_matrix();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqReax::init()
 {
   if (!atom->q_flag)
     error->all(FLERR,"Fix qeq/reax requires atom attribute q");
 
   ngroup = group->count(igroup);
   if (ngroup == 0) error->all(FLERR,"Fix qeq/reax group has no atoms");
 
   // need a half neighbor list w/ Newton off and ghost neighbors
   // built whenever re-neighboring occurs
 
   int irequest = neighbor->request(this,instance_me);
   neighbor->requests[irequest]->pair = 0;
   neighbor->requests[irequest]->fix = 1;
   neighbor->requests[irequest]->newton = 2;
   neighbor->requests[irequest]->ghost = 1;
 
   init_shielding();
   init_taper();
 
   if (strstr(update->integrate_style,"respa"))
     nlevels_respa = ((Respa *) update->integrate)->nlevels;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqReax::init_list(int id, NeighList *ptr)
 {
   list = ptr;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqReax::init_shielding()
 {
   int i,j;
   int ntypes;
 
   ntypes = atom->ntypes;
   if (shld == NULL)
     memory->create(shld,ntypes+1,ntypes+1,"qeq:shielding");
 
   for (i = 1; i <= ntypes; ++i)
     for (j = 1; j <= ntypes; ++j)
       shld[i][j] = pow( gamma[i] * gamma[j], -1.5);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqReax::init_taper()
 {
   double d7, swa2, swa3, swb2, swb3;
 
   if (fabs(swa) > 0.01 && comm->me == 0)
     error->warning(FLERR,"Fix qeq/reax has non-zero lower Taper radius cutoff");
   if (swb < 0)
     error->all(FLERR, "Fix qeq/reax has negative upper Taper radius cutoff");
   else if (swb < 5 && comm->me == 0)
     error->warning(FLERR,"Fix qeq/reax has very low Taper radius cutoff");
 
   d7 = pow( swb - swa, 7);
   swa2 = SQR( swa);
   swa3 = CUBE( swa);
   swb2 = SQR( swb);
   swb3 = CUBE( swb);
 
   Tap[7] =  20.0 / d7;
   Tap[6] = -70.0 * (swa + swb) / d7;
   Tap[5] =  84.0 * (swa2 + 3.0*swa*swb + swb2) / d7;
   Tap[4] = -35.0 * (swa3 + 9.0*swa2*swb + 9.0*swa*swb2 + swb3) / d7;
   Tap[3] = 140.0 * (swa3*swb + 3.0*swa2*swb2 + swa*swb3) / d7;
   Tap[2] =-210.0 * (swa3*swb2 + swa2*swb3) / d7;
   Tap[1] = 140.0 * swa3 * swb3 / d7;
   Tap[0] = (-35.0*swa3*swb2*swb2 + 21.0*swa2*swb3*swb2 +
             7.0*swa*swb3*swb3 + swb3*swb3*swb) / d7;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqReax::setup_pre_force(int vflag)
 {
   deallocate_storage();
   allocate_storage();
 
   init_storage();
 
   deallocate_matrix();
   allocate_matrix();
 
   pre_force(vflag);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqReax::setup_pre_force_respa(int vflag, int ilevel)
 {
   if (ilevel < nlevels_respa-1) return;
   setup_pre_force(vflag);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqReax::min_setup_pre_force(int vflag)
 {
   setup_pre_force(vflag);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqReax::init_storage()
 {
   int NN;
 
   if (reaxc)
     NN = reaxc->list->inum + reaxc->list->gnum;
   else
     NN = list->inum + list->gnum;
 
   for (int i = 0; i < NN; i++) {
     Hdia_inv[i] = 1. / eta[atom->type[i]];
     b_s[i] = -chi[atom->type[i]];
     b_t[i] = -1.0;
     b_prc[i] = 0;
     b_prm[i] = 0;
     s[i] = t[i] = 0;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqReax::pre_force(int vflag)
 {
   double t_start, t_end;
 
   if (update->ntimestep % nevery) return;
   if (comm->me == 0) t_start = MPI_Wtime();
 
   n = atom->nlocal;
   N = atom->nlocal + atom->nghost;
 
   // grow arrays if necessary
   // need to be atom->nmax in length
 
   if (atom->nmax > nmax) reallocate_storage();
   if (n > n_cap*DANGER_ZONE || m_fill > m_cap*DANGER_ZONE)
     reallocate_matrix();
 
   init_matvec();
 
   matvecs_s = CG(b_s, s);    	// CG on s - parallel
   matvecs_t = CG(b_t, t);       // CG on t - parallel
   matvecs = matvecs_s + matvecs_t;
 
   calculate_Q();
 
   if (comm->me == 0) {
     t_end = MPI_Wtime();
     qeq_time = t_end - t_start;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqReax::pre_force_respa(int vflag, int ilevel, int iloop)
 {
   if (ilevel == nlevels_respa-1) pre_force(vflag);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqReax::min_pre_force(int vflag)
 {
   pre_force(vflag);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqReax::init_matvec()
 {
   /* fill-in H matrix */
   compute_H();
 
   int nn, ii, i;
   int *ilist;
 
   if (reaxc) {
     nn = reaxc->list->inum;
     ilist = reaxc->list->ilist;
   } else {
     nn = list->inum;
     ilist = list->ilist;
   }
 
   for (ii = 0; ii < nn; ++ii) {
     i = ilist[ii];
     if (atom->mask[i] & groupbit) {
 
       /* init pre-conditioner for H and init solution vectors */
       Hdia_inv[i] = 1. / eta[ atom->type[i] ];
       b_s[i]      = -chi[ atom->type[i] ];
       b_t[i]      = -1.0;
 
       /* linear extrapolation for s & t from previous solutions */
       //s[i] = 2 * s_hist[i][0] - s_hist[i][1];
       //t[i] = 2 * t_hist[i][0] - t_hist[i][1];
 
       /* quadratic extrapolation for s & t from previous solutions */
       //s[i] = s_hist[i][2] + 3 * ( s_hist[i][0] - s_hist[i][1] );
       t[i] = t_hist[i][2] + 3 * ( t_hist[i][0] - t_hist[i][1]);
 
       /* cubic extrapolation for s & t from previous solutions */
       s[i] = 4*(s_hist[i][0]+s_hist[i][2])-(6*s_hist[i][1]+s_hist[i][3]);
       //t[i] = 4*(t_hist[i][0]+t_hist[i][2])-(6*t_hist[i][1]+t_hist[i][3]);
     }
   }
 
   pack_flag = 2;
   comm->forward_comm_fix(this); //Dist_vector( s );
   pack_flag = 3;
   comm->forward_comm_fix(this); //Dist_vector( t );
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqReax::compute_H()
 {
   int inum, jnum, *ilist, *jlist, *numneigh, **firstneigh;
   int i, j, ii, jj, flag;
   double dx, dy, dz, r_sqr;
   const double SMALL = 0.0001;
 
   int *type = atom->type;
   tagint *tag = atom->tag;
   double **x = atom->x;
   int *mask = atom->mask;
 
   if (reaxc) {
     inum = reaxc->list->inum;
     ilist = reaxc->list->ilist;
     numneigh = reaxc->list->numneigh;
     firstneigh = reaxc->list->firstneigh;
   } else {
     inum = list->inum;
     ilist = list->ilist;
     numneigh = list->numneigh;
     firstneigh = list->firstneigh;
   }
 
   // fill in the H matrix
   m_fill = 0;
   r_sqr = 0;
   for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     if (mask[i] & groupbit) {
       jlist = firstneigh[i];
       jnum = numneigh[i];
       H.firstnbr[i] = m_fill;
 
       for (jj = 0; jj < jnum; jj++) {
         j = jlist[jj];
 
         dx = x[j][0] - x[i][0];
         dy = x[j][1] - x[i][1];
         dz = x[j][2] - x[i][2];
         r_sqr = SQR(dx) + SQR(dy) + SQR(dz);
 
         flag = 0;
         if (r_sqr <= SQR(swb)) {
           if (j < n) flag = 1;
           else if (tag[i] < tag[j]) flag = 1;
           else if (tag[i] == tag[j]) {
             if (dz > SMALL) flag = 1;
             else if (fabs(dz) < SMALL) {
               if (dy > SMALL) flag = 1;
               else if (fabs(dy) < SMALL && dx > SMALL)
                 flag = 1;
 	    }
 	  }
 	}
 
         if (flag) {
           H.jlist[m_fill] = j;
           H.val[m_fill] = calculate_H( sqrt(r_sqr), shld[type[i]][type[j]]);
           m_fill++;
         }
       }
       H.numnbrs[i] = m_fill - H.firstnbr[i];
     }
   }
 
   if (m_fill >= H.m) {
     char str[128];
     sprintf(str,"H matrix size has been exceeded: m_fill=%d H.m=%d\n",
              m_fill, H.m);
     error->warning(FLERR,str);
     error->all(FLERR,"Fix qeq/reax has insufficient QEq matrix size");
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 double FixQEqReax::calculate_H( double r, double gamma)
 {
   double Taper, denom;
 
   Taper = Tap[7] * r + Tap[6];
   Taper = Taper * r + Tap[5];
   Taper = Taper * r + Tap[4];
   Taper = Taper * r + Tap[3];
   Taper = Taper * r + Tap[2];
   Taper = Taper * r + Tap[1];
   Taper = Taper * r + Tap[0];
 
   denom = r * r * r + gamma;
   denom = pow(denom,0.3333333333333);
 
   return Taper * EV_TO_KCAL_PER_MOL / denom;
 }
 
 /* ---------------------------------------------------------------------- */
 
 int FixQEqReax::CG( double *b, double *x)
 {
   int  i, j, imax;
   double tmp, alpha, beta, b_norm;
   double sig_old, sig_new;
 
   int nn, jj;
   int *ilist;
   if (reaxc) {
     nn = reaxc->list->inum;
     ilist = reaxc->list->ilist;
   } else {
     nn = list->inum;
     ilist = list->ilist;
   }
 
   imax = 200;
 
   pack_flag = 1;
   sparse_matvec( &H, x, q);
   comm->reverse_comm_fix(this); //Coll_Vector( q );
 
   vector_sum( r , 1.,  b, -1., q, nn);
 
   for (jj = 0; jj < nn; ++jj) {
     j = ilist[jj];
     if (atom->mask[j] & groupbit)
       d[j] = r[j] * Hdia_inv[j]; //pre-condition
   }
 
   b_norm = parallel_norm( b, nn);
   sig_new = parallel_dot( r, d, nn);
 
   for (i = 1; i < imax && sqrt(sig_new) / b_norm > tolerance; ++i) {
     comm->forward_comm_fix(this); //Dist_vector( d );
     sparse_matvec( &H, d, q );
     comm->reverse_comm_fix(this); //Coll_vector( q );
 
     tmp = parallel_dot( d, q, nn);
     alpha = sig_new / tmp;
 
     vector_add( x, alpha, d, nn );
     vector_add( r, -alpha, q, nn );
 
     // pre-conditioning
     for (jj = 0; jj < nn; ++jj) {
       j = ilist[jj];
       if (atom->mask[j] & groupbit)
         p[j] = r[j] * Hdia_inv[j];
     }
 
     sig_old = sig_new;
     sig_new = parallel_dot( r, p, nn);
 
     beta = sig_new / sig_old;
     vector_sum( d, 1., p, beta, d, nn );
   }
 
   if (i >= imax && comm->me == 0) {
     char str[128];
     sprintf(str,"Fix qeq/reax CG convergence failed after %d iterations "
             "at " BIGINT_FORMAT " step",i,update->ntimestep);
     error->warning(FLERR,str);
   }
 
   return i;
 }
 
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqReax::sparse_matvec( sparse_matrix *A, double *x, double *b)
 {
   int i, j, itr_j;
   int nn, NN, ii;
   int *ilist;
 
   if (reaxc) {
     nn = reaxc->list->inum;
     NN = reaxc->list->inum + reaxc->list->gnum;
     ilist = reaxc->list->ilist;
   } else {
     nn = list->inum;
     NN = list->inum + list->gnum;
     ilist = list->ilist;
   }
 
   for (ii = 0; ii < nn; ++ii) {
     i = ilist[ii];
     if (atom->mask[i] & groupbit)
       b[i] = eta[ atom->type[i] ] * x[i];
   }
 
   for (ii = nn; ii < NN; ++ii) {
     i = ilist[ii];
     if (atom->mask[i] & groupbit)
       b[i] = 0;
   }
 
   for (ii = 0; ii < nn; ++ii) {
     i = ilist[ii];
     if (atom->mask[i] & groupbit) {
       for (itr_j=A->firstnbr[i]; itr_j<A->firstnbr[i]+A->numnbrs[i]; itr_j++) {
         j = A->jlist[itr_j];
         b[i] += A->val[itr_j] * x[j];
         b[j] += A->val[itr_j] * x[i];
       }
     }
   }
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqReax::calculate_Q()
 {
   int i, k;
   double u, s_sum, t_sum;
   double *q = atom->q;
 
   int nn, ii;
   int *ilist;
 
   if (reaxc) {
     nn = reaxc->list->inum;
     ilist = reaxc->list->ilist;
   } else {
     nn = list->inum;
     ilist = list->ilist;
   }
 
   s_sum = parallel_vector_acc( s, nn);
   t_sum = parallel_vector_acc( t, nn);
   u = s_sum / t_sum;
 
   for (ii = 0; ii < nn; ++ii) {
     i = ilist[ii];
     if (atom->mask[i] & groupbit) {
       q[i] = s[i] - u * t[i];
 
       /* backup s & t */
       for (k = 4; k > 0; --k) {
         s_hist[i][k] = s_hist[i][k-1];
         t_hist[i][k] = t_hist[i][k-1];
       }
       s_hist[i][0] = s[i];
       t_hist[i][0] = t[i];
     }
   }
 
   pack_flag = 4;
   comm->forward_comm_fix(this); //Dist_vector( atom->q );
 }
 
 /* ---------------------------------------------------------------------- */
 
 int FixQEqReax::pack_forward_comm(int n, int *list, double *buf,
                                   int pbc_flag, int *pbc)
 {
   int m;
 
   if (pack_flag == 1)
     for(m = 0; m < n; m++) buf[m] = d[list[m]];
   else if (pack_flag == 2)
     for(m = 0; m < n; m++) buf[m] = s[list[m]];
   else if (pack_flag == 3)
     for(m = 0; m < n; m++) buf[m] = t[list[m]];
   else if (pack_flag == 4)
     for(m = 0; m < n; m++) buf[m] = atom->q[list[m]];
   else if (pack_flag == 5) {
     m = 0;
     for(int i = 0; i < n; i++) {
       int j = 2 * list[i];
       buf[m++] = d[j  ];
       buf[m++] = d[j+1];
     }
     return m;
   }
   return n;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqReax::unpack_forward_comm(int n, int first, double *buf)
 {
   int i, m;
 
   if (pack_flag == 1)
     for(m = 0, i = first; m < n; m++, i++) d[i] = buf[m];
   else if (pack_flag == 2)
     for(m = 0, i = first; m < n; m++, i++) s[i] = buf[m];
   else if (pack_flag == 3)
     for(m = 0, i = first; m < n; m++, i++) t[i] = buf[m];
   else if (pack_flag == 4)
     for(m = 0, i = first; m < n; m++, i++) atom->q[i] = buf[m];
   else if (pack_flag == 5) {
     int last = first + n;
     m = 0;
     for(i = first; i < last; i++) {
       int j = 2 * i;
       d[j  ] = buf[m++];
       d[j+1] = buf[m++];
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 int FixQEqReax::pack_reverse_comm(int n, int first, double *buf)
 {
   int i, m;
   if (pack_flag == 5) {
     m = 0;
     int last = first + n;
     for(i = first; i < last; i++) {
       int indxI = 2 * i;
       buf[m++] = q[indxI  ];
       buf[m++] = q[indxI+1];
     }
     return m;
   } else {
     for (m = 0, i = first; m < n; m++, i++) buf[m] = q[i];
     return n;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqReax::unpack_reverse_comm(int n, int *list, double *buf)
 {
   if (pack_flag == 5) {
     int m = 0;
     for(int i = 0; i < n; i++) {
       int indxI = 2 * list[i];
       q[indxI  ] += buf[m++];
       q[indxI+1] += buf[m++];
     }
   } else {
     for (int m = 0; m < n; m++) q[list[m]] += buf[m];
   }
 }
 
 /* ----------------------------------------------------------------------
    memory usage of local atom-based arrays
 ------------------------------------------------------------------------- */
 
 double FixQEqReax::memory_usage()
 {
   double bytes;
 
   bytes = atom->nmax*nprev*2 * sizeof(double); // s_hist & t_hist
   bytes += atom->nmax*11 * sizeof(double); // storage
   bytes += n_cap*2 * sizeof(int); // matrix...
   bytes += m_cap * sizeof(int);
   bytes += m_cap * sizeof(double);
 
   if (dual_enabled)
     bytes += atom->nmax*4 * sizeof(double); // double size for q, d, r, and p
 
   return bytes;
 }
 
 /* ----------------------------------------------------------------------
    allocate fictitious charge arrays
 ------------------------------------------------------------------------- */
 
 void FixQEqReax::grow_arrays(int nmax)
 {
   memory->grow(s_hist,nmax,nprev,"qeq:s_hist");
   memory->grow(t_hist,nmax,nprev,"qeq:t_hist");
 }
 
 /* ----------------------------------------------------------------------
    copy values within fictitious charge arrays
 ------------------------------------------------------------------------- */
 
 void FixQEqReax::copy_arrays(int i, int j, int delflag)
 {
   for (int m = 0; m < nprev; m++) {
     s_hist[j][m] = s_hist[i][m];
     t_hist[j][m] = t_hist[i][m];
   }
 }
 
 /* ----------------------------------------------------------------------
    pack values in local atom-based array for exchange with another proc
 ------------------------------------------------------------------------- */
 
 int FixQEqReax::pack_exchange(int i, double *buf)
 {
   for (int m = 0; m < nprev; m++) buf[m] = s_hist[i][m];
   for (int m = 0; m < nprev; m++) buf[nprev+m] = t_hist[i][m];
   return nprev*2;
 }
 
 /* ----------------------------------------------------------------------
    unpack values in local atom-based array from exchange with another proc
 ------------------------------------------------------------------------- */
 
 int FixQEqReax::unpack_exchange(int nlocal, double *buf)
 {
   for (int m = 0; m < nprev; m++) s_hist[nlocal][m] = buf[m];
   for (int m = 0; m < nprev; m++) t_hist[nlocal][m] = buf[nprev+m];
   return nprev*2;
 }
 
 /* ---------------------------------------------------------------------- */
 
 double FixQEqReax::parallel_norm( double *v, int n)
 {
   int  i;
   double my_sum, norm_sqr;
 
   int ii;
   int *ilist;
 
   if (reaxc)
     ilist = reaxc->list->ilist;
   else
     ilist = list->ilist;
 
   my_sum = 0.0;
   norm_sqr = 0.0;
   for (ii = 0; ii < n; ++ii) {
     i = ilist[ii];
     if (atom->mask[i] & groupbit)
       my_sum += SQR( v[i]);
   }
 
   MPI_Allreduce( &my_sum, &norm_sqr, 1, MPI_DOUBLE, MPI_SUM, world);
 
   return sqrt( norm_sqr);
 }
 
 /* ---------------------------------------------------------------------- */
 
 double FixQEqReax::parallel_dot( double *v1, double *v2, int n)
 {
   int  i;
   double my_dot, res;
 
   int ii;
   int *ilist;
 
   if (reaxc)
     ilist = reaxc->list->ilist;
   else
     ilist = list->ilist;
 
   my_dot = 0.0;
   res = 0.0;
   for (ii = 0; ii < n; ++ii) {
     i = ilist[ii];
     if (atom->mask[i] & groupbit)
       my_dot += v1[i] * v2[i];
   }
 
   MPI_Allreduce( &my_dot, &res, 1, MPI_DOUBLE, MPI_SUM, world);
 
   return res;
 }
 
 /* ---------------------------------------------------------------------- */
 
 double FixQEqReax::parallel_vector_acc( double *v, int n)
 {
   int  i;
   double my_acc, res;
 
   int ii;
   int *ilist;
 
   if (reaxc)
     ilist = reaxc->list->ilist;
   else
     ilist = list->ilist;
 
   my_acc = 0.0;
   res = 0.0;
   for (ii = 0; ii < n; ++ii) {
     i = ilist[ii];
     if (atom->mask[i] & groupbit)
       my_acc += v[i];
   }
 
   MPI_Allreduce( &my_acc, &res, 1, MPI_DOUBLE, MPI_SUM, world);
 
   return res;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqReax::vector_sum( double* dest, double c, double* v,
                                 double d, double* y, int k)
 {
   int kk;
   int *ilist;
 
   if (reaxc)
     ilist = reaxc->list->ilist;
   else
     ilist = list->ilist;
 
   for (--k; k>=0; --k) {
     kk = ilist[k];
     if (atom->mask[kk] & groupbit)
       dest[kk] = c * v[kk] + d * y[kk];
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixQEqReax::vector_add( double* dest, double c, double* v, int k)
 {
   int kk;
   int *ilist;
 
   if (reaxc)
     ilist = reaxc->list->ilist;
   else
     ilist = list->ilist;
 
   for (--k; k>=0; --k) {
     kk = ilist[k];
     if (atom->mask[kk] & groupbit)
       dest[kk] += c * v[kk];
   }
 }