Page MenuHomec4science

pair_reax_c_kokkos.cpp
No OneTemporary

File Metadata

Created
Sat, Nov 9, 22:00

pair_reax_c_kokkos.cpp

/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Ray Shan (SNL), Stan Moore (SNL)
------------------------------------------------------------------------- */
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "pair_reax_c_kokkos.h"
#include "kokkos.h"
#include "atom_kokkos.h"
#include "comm.h"
#include "force.h"
#include "neighbor.h"
#include "neigh_request.h"
#include "neigh_list_kokkos.h"
#include "update.h"
#include "integrate.h"
#include "respa.h"
#include "math_const.h"
#include "math_special.h"
#include "memory.h"
#include "error.h"
#include "atom_masks.h"
#include "reaxc_defs.h"
#include "reaxc_lookup.h"
#include "reaxc_tool_box.h"
#define TEAMSIZE 128
/* ---------------------------------------------------------------------- */
namespace LAMMPS_NS{
using namespace MathConst;
using namespace MathSpecial;
template<class DeviceType>
PairReaxCKokkos<DeviceType>::PairReaxCKokkos(LAMMPS *lmp) : PairReaxC(lmp)
{
respa_enable = 0;
cut_nbsq = cut_hbsq = cut_bosq = 0.0;
atomKK = (AtomKokkos *) atom;
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
datamask_read = X_MASK | Q_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK;
datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK;
k_resize_bo = DAT::tdual_int_scalar("pair:resize_bo");
d_resize_bo = k_resize_bo.view<DeviceType>();
k_resize_hb = DAT::tdual_int_scalar("pair:resize_hb");
d_resize_hb = k_resize_hb.view<DeviceType>();
nmax = 0;
maxbo = 1;
maxhb = 1;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
PairReaxCKokkos<DeviceType>::~PairReaxCKokkos()
{
if (!copymode) {
memory->destroy_kokkos(k_eatom,eatom);
memory->destroy_kokkos(k_vatom,vatom);
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void PairReaxCKokkos<DeviceType>::allocate()
{
int n = atom->ntypes;
k_params_sing = Kokkos::DualView<params_sing*,typename DeviceType::array_layout,DeviceType>
("PairReaxC::params_sing",n+1);
paramssing = k_params_sing.d_view;
k_params_twbp = Kokkos::DualView<params_twbp**,typename DeviceType::array_layout,DeviceType>
("PairReaxC::params_twbp",n+1,n+1);
paramstwbp = k_params_twbp.d_view;
k_params_thbp = Kokkos::DualView<params_thbp***,typename DeviceType::array_layout,DeviceType>
("PairReaxC::params_thbp",n+1,n+1,n+1);
paramsthbp = k_params_thbp.d_view;
k_params_fbp = Kokkos::DualView<params_fbp****,typename DeviceType::array_layout,DeviceType>
("PairReaxC::params_fbp",n+1,n+1,n+1,n+1);
paramsfbp = k_params_fbp.d_view;
k_params_hbp = Kokkos::DualView<params_hbp***,typename DeviceType::array_layout,DeviceType>
("PairReaxC::params_hbp",n+1,n+1,n+1);
paramshbp = k_params_hbp.d_view;
k_tap = DAT::tdual_ffloat_1d("pair:tap",8);
d_tap = k_tap.d_view;
h_tap = k_tap.h_view;
}
/* ----------------------------------------------------------------------
init specific to this pair style
------------------------------------------------------------------------- */
template<class DeviceType>
void PairReaxCKokkos<DeviceType>::init_style()
{
PairReaxC::init_style();
// irequest = neigh request made by parent class
neighflag = lmp->kokkos->neighflag;
int irequest = neighbor->nrequest - 1;
neighbor->requests[irequest]->
kokkos_host = Kokkos::Impl::is_same<DeviceType,LMPHostType>::value &&
!Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
neighbor->requests[irequest]->
kokkos_device = Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
neighbor->requests[irequest]->ghost = 1;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
neighbor->requests[irequest]->ghost = 1;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with reax/c/kk");
}
allocate();
setup();
init_md();
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void PairReaxCKokkos<DeviceType>::setup()
{
int i,j,k,m;
int n = atom->ntypes;
// general parameters
for (i = 0; i < 39; i ++)
gp[i] = system->reax_param.gp.l[i];
p_boc1 = gp[0];
p_boc2 = gp[1];
// vdw parameters
vdwflag = system->reax_param.gp.vdw_type;
lgflag = control->lgflag;
// atom, bond, angle, dihedral, H-bond specific parameters
two_body_parameters *twbp;
// valence angle (3-body) parameters
three_body_header *thbh;
three_body_parameters *thbp;
// torsion angle (4-body) parameters
four_body_header *fbh;
four_body_parameters *fbp;
// hydrogen bond parameters
hbond_parameters *hbp;
for (i = 1; i <= n; i++) {
// general
k_params_sing.h_view(i).mass = system->reax_param.sbp[map[i]].mass;
// polarization
k_params_sing.h_view(i).chi = system->reax_param.sbp[map[i]].chi;
k_params_sing.h_view(i).eta = system->reax_param.sbp[map[i]].eta;
// bond order
k_params_sing.h_view(i).r_s = system->reax_param.sbp[map[i]].r_s;
k_params_sing.h_view(i).r_pi = system->reax_param.sbp[map[i]].r_pi;
k_params_sing.h_view(i).r_pi2 = system->reax_param.sbp[map[i]].r_pi_pi;
k_params_sing.h_view(i).valency = system->reax_param.sbp[map[i]].valency;
k_params_sing.h_view(i).valency_val = system->reax_param.sbp[map[i]].valency_val;
k_params_sing.h_view(i).valency_boc = system->reax_param.sbp[map[i]].valency_boc;
k_params_sing.h_view(i).valency_e = system->reax_param.sbp[map[i]].valency_e;
k_params_sing.h_view(i).nlp_opt = system->reax_param.sbp[map[i]].nlp_opt;
// multibody
k_params_sing.h_view(i).p_lp2 = system->reax_param.sbp[map[i]].p_lp2;
k_params_sing.h_view(i).p_ovun2 = system->reax_param.sbp[map[i]].p_ovun2;
k_params_sing.h_view(i).p_ovun5 = system->reax_param.sbp[map[i]].p_ovun5;
// angular
k_params_sing.h_view(i).p_val3 = system->reax_param.sbp[map[i]].p_val3;
k_params_sing.h_view(i).p_val5 = system->reax_param.sbp[map[i]].p_val5;
// hydrogen bond
k_params_sing.h_view(i).p_hbond = system->reax_param.sbp[map[i]].p_hbond;
for (j = 1; j <= n; j++) {
twbp = &(system->reax_param.tbp[map[i]][map[j]]);
// vdW
k_params_twbp.h_view(i,j).gamma = twbp->gamma;
k_params_twbp.h_view(i,j).gamma_w = twbp->gamma_w;
k_params_twbp.h_view(i,j).alpha = twbp->alpha;
k_params_twbp.h_view(i,j).r_vdw = twbp->r_vdW;
k_params_twbp.h_view(i,j).epsilon = twbp->D;
k_params_twbp.h_view(i,j).acore = twbp->acore;
k_params_twbp.h_view(i,j).ecore = twbp->ecore;
k_params_twbp.h_view(i,j).rcore = twbp->rcore;
k_params_twbp.h_view(i,j).lgre = twbp->lgre;
k_params_twbp.h_view(i,j).lgcij = twbp->lgcij;
// bond order
k_params_twbp.h_view(i,j).r_s = twbp->r_s;
k_params_twbp.h_view(i,j).r_pi = twbp->r_p;
k_params_twbp.h_view(i,j).r_pi2 = twbp->r_pp;
k_params_twbp.h_view(i,j).p_bo1 = twbp->p_bo1;
k_params_twbp.h_view(i,j).p_bo2 = twbp->p_bo2;
k_params_twbp.h_view(i,j).p_bo3 = twbp->p_bo3;
k_params_twbp.h_view(i,j).p_bo4 = twbp->p_bo4;
k_params_twbp.h_view(i,j).p_bo5 = twbp->p_bo5;
k_params_twbp.h_view(i,j).p_bo6 = twbp->p_bo6;
k_params_twbp.h_view(i,j).p_boc3 = twbp->p_boc3;
k_params_twbp.h_view(i,j).p_boc4 = twbp->p_boc4;
k_params_twbp.h_view(i,j).p_boc5 = twbp->p_boc5;
k_params_twbp.h_view(i,j).ovc = twbp->ovc;
k_params_twbp.h_view(i,j).v13cor = twbp->v13cor;
// bond energy
k_params_twbp.h_view(i,j).p_be1 = twbp->p_be1;
k_params_twbp.h_view(i,j).p_be2 = twbp->p_be2;
k_params_twbp.h_view(i,j).De_s = twbp->De_s;
k_params_twbp.h_view(i,j).De_p = twbp->De_p;
k_params_twbp.h_view(i,j).De_pp = twbp->De_pp;
// multibody
k_params_twbp.h_view(i,j).p_ovun1 = twbp->p_ovun1;
for (k = 1; k <= n; k++) {
// Angular
thbh = &(system->reax_param.thbp[map[i]][map[j]][map[k]]);
thbp = &(thbh->prm[0]);
k_params_thbp.h_view(i,j,k).cnt = thbh->cnt;
k_params_thbp.h_view(i,j,k).theta_00 = thbp->theta_00;
k_params_thbp.h_view(i,j,k).p_val1 = thbp->p_val1;
k_params_thbp.h_view(i,j,k).p_val2 = thbp->p_val2;
k_params_thbp.h_view(i,j,k).p_val4 = thbp->p_val4;
k_params_thbp.h_view(i,j,k).p_val7 = thbp->p_val7;
k_params_thbp.h_view(i,j,k).p_pen1 = thbp->p_pen1;
k_params_thbp.h_view(i,j,k).p_coa1 = thbp->p_coa1;
// Hydrogen Bond
hbp = &(system->reax_param.hbp[map[i]][map[j]][map[k]]);
k_params_hbp.h_view(i,j,k).p_hb1 = hbp->p_hb1;
k_params_hbp.h_view(i,j,k).p_hb2 = hbp->p_hb2;
k_params_hbp.h_view(i,j,k).p_hb3 = hbp->p_hb3;
k_params_hbp.h_view(i,j,k).r0_hb = hbp->r0_hb;
for (m = 1; m <= n; m++) {
// Torsion
fbh = &(system->reax_param.fbp[map[i]][map[j]][map[k]][map[m]]);
fbp = &(fbh->prm[0]);
k_params_fbp.h_view(i,j,k,m).p_tor1 = fbp->p_tor1;
k_params_fbp.h_view(i,j,k,m).p_cot1 = fbp->p_cot1;
k_params_fbp.h_view(i,j,k,m).V1 = fbp->V1;
k_params_fbp.h_view(i,j,k,m).V2 = fbp->V2;
k_params_fbp.h_view(i,j,k,m).V3 = fbp->V3;
}
}
}
}
k_params_sing.template modify<LMPHostType>();
k_params_twbp.template modify<LMPHostType>();
k_params_thbp.template modify<LMPHostType>();
k_params_fbp.template modify<LMPHostType>();
k_params_hbp.template modify<LMPHostType>();
// cutoffs
cut_nbsq = control->nonb_cut * control->nonb_cut;
cut_hbsq = control->hbond_cut * control->hbond_cut;
cut_bosq = control->bond_cut * control->bond_cut;
// bond order cutoffs
bo_cut = 0.01 * gp[29];
thb_cut = control->thb_cut;
thb_cutsq = 0.000010; //thb_cut*thb_cut;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void PairReaxCKokkos<DeviceType>::init_md()
{
// init_taper()
F_FLOAT d1, d7, swa, swa2, swa3, swb, swb2, swb3;
swa = control->nonb_low;
swb = control->nonb_cut;
if (fabs(swa) > 0.01 )
error->warning(FLERR,"Warning: non-zero lower Taper-radius cutoff");
if (swb < 0)
error->one(FLERR,"Negative upper Taper-radius cutoff");
else if (swb < 5) {
char str[128];
sprintf(str,"Warning: very low Taper-radius cutoff: %f\n", swb);
error->one(FLERR,str);
}
d1 = swb - swa;
d7 = powint(d1,7);
swa2 = swa * swa;
swa3 = swa * swa2;
swb2 = swb * swb;
swb3 = swb * swb2;
k_tap.h_view(7) = 20.0/d7;
k_tap.h_view(6) = -70.0 * (swa + swb) / d7;
k_tap.h_view(5) = 84.0 * (swa2 + 3.0*swa*swb + swb2) / d7;
k_tap.h_view(4) = -35.0 * (swa3 + 9.0*swa2*swb + 9.0*swa*swb2 + swb3 ) / d7;
k_tap.h_view(3) = 140.0 * (swa3*swb + 3.0*swa2*swb2 + swa*swb3 ) / d7;
k_tap.h_view(2) =-210.0 * (swa3*swb2 + swa2*swb3) / d7;
k_tap.h_view(1) = 140.0 * swa3 * swb3 / d7;
k_tap.h_view(0) = (-35.0*swa3*swb2*swb2 + 21.0*swa2*swb3*swb2 +
7.0*swa*swb3*swb3 + swb3*swb3*swb ) / d7;
k_tap.template modify<LMPHostType>();
k_tap.template sync<DeviceType>();
if ( control->tabulate ) {
int ntypes = atom->ntypes;
Init_Lookup_Tables();
k_LR = tdual_LR_lookup_table_kk_2d("lookup:LR",ntypes+1,ntypes+1);
d_LR = k_LR.d_view;
for (int i = 1; i <= ntypes; ++i) {
for (int j = i; j <= ntypes; ++j) {
int n = LR[i][j].n;
if (n == 0) continue;
k_LR.h_view(i,j).xmin = LR[i][j].xmin;
k_LR.h_view(i,j).xmax = LR[i][j].xmax;
k_LR.h_view(i,j).n = LR[i][j].n;
k_LR.h_view(i,j).dx = LR[i][j].dx;
k_LR.h_view(i,j).inv_dx = LR[i][j].inv_dx;
k_LR.h_view(i,j).a = LR[i][j].a;
k_LR.h_view(i,j).m = LR[i][j].m;
k_LR.h_view(i,j).c = LR[i][j].c;
k_LR.h_view(i,j).k_y = tdual_LR_data_1d("lookup:LR[i,j].y",n);
k_LR.h_view(i,j).k_H = tdual_cubic_spline_coef_1d("lookup:LR[i,j].H",n);
k_LR.h_view(i,j).k_vdW = tdual_cubic_spline_coef_1d("lookup:LR[i,j].vdW",n);
k_LR.h_view(i,j).k_CEvd = tdual_cubic_spline_coef_1d("lookup:LR[i,j].CEvd",n);
k_LR.h_view(i,j).k_ele = tdual_cubic_spline_coef_1d("lookup:LR[i,j].ele",n);
k_LR.h_view(i,j).k_CEclmb = tdual_cubic_spline_coef_1d("lookup:LR[i,j].CEclmb",n);
k_LR.h_view(i,j).d_y = k_LR.h_view(i,j).k_y.d_view;
k_LR.h_view(i,j).d_H = k_LR.h_view(i,j).k_H.d_view;
k_LR.h_view(i,j).d_vdW = k_LR.h_view(i,j).k_vdW.d_view;
k_LR.h_view(i,j).d_CEvd = k_LR.h_view(i,j).k_CEvd.d_view;
k_LR.h_view(i,j).d_ele = k_LR.h_view(i,j).k_ele.d_view;
k_LR.h_view(i,j).d_CEclmb = k_LR.h_view(i,j).k_CEclmb.d_view;
for (int k = 0; k < n; k++) {
k_LR.h_view(i,j).k_y.h_view(k) = LR[i][j].y[k];
k_LR.h_view(i,j).k_H.h_view(k) = LR[i][j].H[k];
k_LR.h_view(i,j).k_vdW.h_view(k) = LR[i][j].vdW[k];
k_LR.h_view(i,j).k_CEvd.h_view(k) = LR[i][j].CEvd[k];
k_LR.h_view(i,j).k_ele.h_view(k) = LR[i][j].ele[k];
k_LR.h_view(i,j).k_CEclmb.h_view(k) = LR[i][j].CEclmb[k];
}
k_LR.h_view(i,j).k_y.template modify<LMPHostType>();
k_LR.h_view(i,j).k_H.template modify<LMPHostType>();
k_LR.h_view(i,j).k_vdW.template modify<LMPHostType>();
k_LR.h_view(i,j).k_CEvd.template modify<LMPHostType>();
k_LR.h_view(i,j).k_ele.template modify<LMPHostType>();
k_LR.h_view(i,j).k_CEclmb.template modify<LMPHostType>();
k_LR.h_view(i,j).k_y.template sync<DeviceType>();
k_LR.h_view(i,j).k_H.template sync<DeviceType>();
k_LR.h_view(i,j).k_vdW.template sync<DeviceType>();
k_LR.h_view(i,j).k_CEvd.template sync<DeviceType>();
k_LR.h_view(i,j).k_ele.template sync<DeviceType>();
k_LR.h_view(i,j).k_CEclmb.template sync<DeviceType>();
}
}
k_LR.template modify<LMPHostType>();
k_LR.template sync<DeviceType>();
Deallocate_Lookup_Tables();
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
int PairReaxCKokkos<DeviceType>::Init_Lookup_Tables()
{
int i, j, r;
int num_atom_types;
double dr;
double *h, *fh, *fvdw, *fele, *fCEvd, *fCEclmb;
double v0_vdw, v0_ele, vlast_vdw, vlast_ele;
/* initializations */
v0_vdw = 0;
v0_ele = 0;
vlast_vdw = 0;
vlast_ele = 0;
num_atom_types = atom->ntypes;
dr = control->nonb_cut / control->tabulate;
h = (double*)
smalloc( (control->tabulate+2) * sizeof(double), "lookup:h", world );
fh = (double*)
smalloc( (control->tabulate+2) * sizeof(double), "lookup:fh", world );
fvdw = (double*)
smalloc( (control->tabulate+2) * sizeof(double), "lookup:fvdw", world );
fCEvd = (double*)
smalloc( (control->tabulate+2) * sizeof(double), "lookup:fCEvd", world );
fele = (double*)
smalloc( (control->tabulate+2) * sizeof(double), "lookup:fele", world );
fCEclmb = (double*)
smalloc( (control->tabulate+2) * sizeof(double), "lookup:fCEclmb", world );
LR = (LR_lookup_table**)
scalloc( num_atom_types+1, sizeof(LR_lookup_table*), "lookup:LR", world );
for( i = 0; i < num_atom_types+1; ++i )
LR[i] = (LR_lookup_table*)
scalloc( num_atom_types+1, sizeof(LR_lookup_table), "lookup:LR[i]", world );
for( i = 1; i <= num_atom_types; ++i ) {
for( j = i; j <= num_atom_types; ++j ) {
LR[i][j].xmin = 0;
LR[i][j].xmax = control->nonb_cut;
LR[i][j].n = control->tabulate + 2;
LR[i][j].dx = dr;
LR[i][j].inv_dx = control->tabulate / control->nonb_cut;
LR[i][j].y = (LR_data*)
smalloc( LR[i][j].n * sizeof(LR_data), "lookup:LR[i,j].y", world );
LR[i][j].H = (cubic_spline_coef*)
smalloc( LR[i][j].n*sizeof(cubic_spline_coef),"lookup:LR[i,j].H" ,
world );
LR[i][j].vdW = (cubic_spline_coef*)
smalloc( LR[i][j].n*sizeof(cubic_spline_coef),"lookup:LR[i,j].vdW",
world);
LR[i][j].CEvd = (cubic_spline_coef*)
smalloc( LR[i][j].n*sizeof(cubic_spline_coef),"lookup:LR[i,j].CEvd",
world);
LR[i][j].ele = (cubic_spline_coef*)
smalloc( LR[i][j].n*sizeof(cubic_spline_coef),"lookup:LR[i,j].ele",
world );
LR[i][j].CEclmb = (cubic_spline_coef*)
smalloc( LR[i][j].n*sizeof(cubic_spline_coef),
"lookup:LR[i,j].CEclmb", world );
for( r = 1; r <= control->tabulate; ++r ) {
LR_vdW_Coulomb(i, j, r * dr, &(LR[i][j].y[r]) );
h[r] = LR[i][j].dx;
fh[r] = LR[i][j].y[r].H;
fvdw[r] = LR[i][j].y[r].e_vdW;
fCEvd[r] = LR[i][j].y[r].CEvd;
fele[r] = LR[i][j].y[r].e_ele;
fCEclmb[r] = LR[i][j].y[r].CEclmb;
}
// init the start-end points
h[r] = LR[i][j].dx;
v0_vdw = LR[i][j].y[1].CEvd;
v0_ele = LR[i][j].y[1].CEclmb;
fh[r] = fh[r-1];
fvdw[r] = fvdw[r-1];
fCEvd[r] = fCEvd[r-1];
fele[r] = fele[r-1];
fCEclmb[r] = fCEclmb[r-1];
vlast_vdw = fCEvd[r-1];
vlast_ele = fele[r-1];
Natural_Cubic_Spline( &h[1], &fh[1],
&(LR[i][j].H[1]), control->tabulate+1, world );
Complete_Cubic_Spline( &h[1], &fvdw[1], v0_vdw, vlast_vdw,
&(LR[i][j].vdW[1]), control->tabulate+1,
world );
Natural_Cubic_Spline( &h[1], &fCEvd[1],
&(LR[i][j].CEvd[1]), control->tabulate+1,
world );
Complete_Cubic_Spline( &h[1], &fele[1], v0_ele, vlast_ele,
&(LR[i][j].ele[1]), control->tabulate+1,
world );
Natural_Cubic_Spline( &h[1], &fCEclmb[1],
&(LR[i][j].CEclmb[1]), control->tabulate+1,
world );
}// else{
// LR[i][j].n = 0;
//}//
}
free(h);
free(fh);
free(fvdw);
free(fCEvd);
free(fele);
free(fCEclmb);
return 1;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void PairReaxCKokkos<DeviceType>::Deallocate_Lookup_Tables()
{
int i, j;
int ntypes;
ntypes = atom->ntypes;
for( i = 0; i < ntypes; ++i ) {
for( j = i; j < ntypes; ++j )
if( LR[i][j].n ) {
sfree( LR[i][j].y, "LR[i,j].y" );
sfree( LR[i][j].H, "LR[i,j].H" );
sfree( LR[i][j].vdW, "LR[i,j].vdW" );
sfree( LR[i][j].CEvd, "LR[i,j].CEvd" );
sfree( LR[i][j].ele, "LR[i,j].ele" );
sfree( LR[i][j].CEclmb, "LR[i,j].CEclmb" );
}
sfree( LR[i], "LR[i]" );
}
sfree( LR, "LR" );
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void PairReaxCKokkos<DeviceType>::LR_vdW_Coulomb( int i, int j, double r_ij, LR_data *lr )
{
double p_vdW1 = system->reax_param.gp.l[28];
double p_vdW1i = 1.0 / p_vdW1;
double powr_vdW1, powgi_vdW1;
double tmp, fn13, exp1, exp2;
double Tap, dTap, dfn13;
double dr3gamij_1, dr3gamij_3;
double e_core, de_core;
double e_lg, de_lg, r_ij5, r_ij6, re6;
two_body_parameters *twbp;
twbp = &(system->reax_param.tbp[map[i]][map[j]]);
e_core = 0;
de_core = 0;
e_lg = de_lg = 0.0;
/* calculate taper and its derivative */
Tap = k_tap.h_view[7] * r_ij + k_tap.h_view[6];
Tap = Tap * r_ij + k_tap.h_view[5];
Tap = Tap * r_ij + k_tap.h_view[4];
Tap = Tap * r_ij + k_tap.h_view[3];
Tap = Tap * r_ij + k_tap.h_view[2];
Tap = Tap * r_ij + k_tap.h_view[1];
Tap = Tap * r_ij + k_tap.h_view[0];
dTap = 7*k_tap.h_view[7] * r_ij + 6*k_tap.h_view[6];
dTap = dTap * r_ij + 5*k_tap.h_view[5];
dTap = dTap * r_ij + 4*k_tap.h_view[4];
dTap = dTap * r_ij + 3*k_tap.h_view[3];
dTap = dTap * r_ij + 2*k_tap.h_view[2];
dTap += k_tap.h_view[1]/r_ij;
/*vdWaals Calculations*/
if(system->reax_param.gp.vdw_type==1 || system->reax_param.gp.vdw_type==3)
{ // shielding
powr_vdW1 = pow(r_ij, p_vdW1);
powgi_vdW1 = pow( 1.0 / twbp->gamma_w, p_vdW1);
fn13 = pow( powr_vdW1 + powgi_vdW1, p_vdW1i );
exp1 = exp( twbp->alpha * (1.0 - fn13 / twbp->r_vdW) );
exp2 = exp( 0.5 * twbp->alpha * (1.0 - fn13 / twbp->r_vdW) );
lr->e_vdW = Tap * twbp->D * (exp1 - 2.0 * exp2);
dfn13 = pow( powr_vdW1 + powgi_vdW1, p_vdW1i-1.0) * pow(r_ij, p_vdW1-2.0);
lr->CEvd = dTap * twbp->D * (exp1 - 2.0 * exp2) -
Tap * twbp->D * (twbp->alpha / twbp->r_vdW) * (exp1 - exp2) * dfn13;
}
else{ // no shielding
exp1 = exp( twbp->alpha * (1.0 - r_ij / twbp->r_vdW) );
exp2 = exp( 0.5 * twbp->alpha * (1.0 - r_ij / twbp->r_vdW) );
lr->e_vdW = Tap * twbp->D * (exp1 - 2.0 * exp2);
lr->CEvd = dTap * twbp->D * (exp1 - 2.0 * exp2) -
Tap * twbp->D * (twbp->alpha / twbp->r_vdW) * (exp1 - exp2) / r_ij;
}
if(system->reax_param.gp.vdw_type==2 || system->reax_param.gp.vdw_type==3)
{ // innner wall
e_core = twbp->ecore * exp(twbp->acore * (1.0-(r_ij/twbp->rcore)));
lr->e_vdW += Tap * e_core;
de_core = -(twbp->acore/twbp->rcore) * e_core;
lr->CEvd += dTap * e_core + Tap * de_core / r_ij;
// lg correction, only if lgvdw is yes
if (control->lgflag) {
r_ij5 = powint( r_ij, 5 );
r_ij6 = powint( r_ij, 6 );
re6 = powint( twbp->lgre, 6 );
e_lg = -(twbp->lgcij/( r_ij6 + re6 ));
lr->e_vdW += Tap * e_lg;
de_lg = -6.0 * e_lg * r_ij5 / ( r_ij6 + re6 ) ;
lr->CEvd += dTap * e_lg + Tap * de_lg/r_ij;
}
}
/* Coulomb calculations */
dr3gamij_1 = ( r_ij * r_ij * r_ij + twbp->gamma );
dr3gamij_3 = pow( dr3gamij_1 , 0.33333333333333 );
tmp = Tap / dr3gamij_3;
lr->H = EV_to_KCALpMOL * tmp;
lr->e_ele = C_ele * tmp;
lr->CEclmb = C_ele * ( dTap - Tap * r_ij / dr3gamij_1 ) / dr3gamij_3;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
{
bocnt = hbcnt = 0;
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
// reallocate per-atom arrays if necessary
if (eflag_atom) {
if (k_eatom.dimension_0()<maxeatom) {
memory->destroy_kokkos(k_eatom,eatom);
memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom");
d_eatom = k_eatom.d_view;
h_eatom = k_eatom.h_view;
v_eatom = k_eatom.view<DeviceType>();
}
}
if (vflag_atom) {
if (k_vatom.dimension_0()<maxvatom) {
memory->destroy_kokkos(k_vatom,vatom);
memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom");
d_vatom = k_vatom.d_view;
h_vatom = k_vatom.h_view;
v_vatom = k_vatom.view<DeviceType>();
}
}
atomKK->sync(execution_space,datamask_read);
k_params_sing.template sync<DeviceType>();
k_params_twbp.template sync<DeviceType>();
k_params_thbp.template sync<DeviceType>();
k_params_fbp.template sync<DeviceType>();
k_params_hbp.template sync<DeviceType>();
if (eflag || vflag) atomKK->modified(execution_space,datamask_modify);
else atomKK->modified(execution_space,F_MASK);
x = atomKK->k_x.view<DeviceType>();
f = atomKK->k_f.view<DeviceType>();
q = atomKK->k_q.view<DeviceType>();
tag = atomKK->k_tag.view<DeviceType>();
type = atomKK->k_type.view<DeviceType>();
nlocal = atomKK->nlocal;
nall = atom->nlocal + atom->nghost;
newton_pair = force->newton_pair;
const int inum = list->inum;
const int ignum = inum + list->gnum;
NeighListKokkos<DeviceType>* k_list = static_cast<NeighListKokkos<DeviceType>*>(list);
d_numneigh = k_list->d_numneigh;
d_neighbors = k_list->d_neighbors;
d_ilist = k_list->d_ilist;
k_list->clean_copy();
if (eflag_global) {
for (int i = 0; i < 14; i++)
pvector[i] = 0.0;
}
copymode = 1;
EV_FLOAT_REAX ev;
EV_FLOAT_REAX ev_all;
const int teamsize = TEAMSIZE;
int maxtmp = 0;
// Polarization (self)
if (neighflag == HALF) {
if (evflag)
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputePolar<HALF,1> >(0,inum),*this,ev);
else
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputePolar<HALF,0> >(0,inum),*this);
} else { //if (neighflag == HALFTHREAD) {
if (evflag)
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputePolar<HALFTHREAD,1> >(0,inum),*this,ev);
else
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputePolar<HALFTHREAD,0> >(0,inum),*this);
}
DeviceType::fence();
ev_all += ev;
pvector[13] = ev.ecoul;
// LJ + Coulomb
if (control->tabulate) {
if (neighflag == HALF) {
if (evflag)
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeTabulatedLJCoulomb<HALF,1> >(0,inum),*this,ev);
else
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeTabulatedLJCoulomb<HALF,0> >(0,inum),*this);
} else if (neighflag == HALFTHREAD) {
if (evflag)
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeTabulatedLJCoulomb<HALFTHREAD,1> >(0,inum),*this,ev);
else
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeTabulatedLJCoulomb<HALFTHREAD,0> >(0,inum),*this);
} else if (neighflag == FULL) {
if (evflag)
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeTabulatedLJCoulomb<FULL,1> >(0,inum),*this,ev);
else
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeTabulatedLJCoulomb<FULL,0> >(0,inum),*this);
}
} else {
if (neighflag == HALF) {
if (evflag)
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeLJCoulomb<HALF,1> >(0,inum),*this,ev);
else
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeLJCoulomb<HALF,0> >(0,inum),*this);
} else if (neighflag == HALFTHREAD) {
if (evflag)
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeLJCoulomb<HALFTHREAD,1> >(0,inum),*this,ev);
else
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeLJCoulomb<HALFTHREAD,0> >(0,inum),*this);
} else if (neighflag == FULL) {
if (evflag)
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeLJCoulomb<FULL,1> >(0,inum),*this,ev);
else
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeLJCoulomb<FULL,0> >(0,inum),*this);
}
}
DeviceType::fence();
ev_all += ev;
pvector[10] = ev.evdwl;
pvector[11] = ev.ecoul;
if (atom->nmax > nmax) {
nmax = atom->nmax;
allocate_array();
}
// Neighbor lists for bond and hbond
// try, resize if necessary
int resize = 1;
while (resize) {
resize = 0;
k_resize_bo.h_view() = 0;
k_resize_bo.modify<LMPHostType>();
k_resize_bo.sync<DeviceType>();
k_resize_hb.h_view() = 0;
k_resize_hb.modify<LMPHostType>();
k_resize_hb.sync<DeviceType>();
// zero
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxZero>(0,nmax),*this);
DeviceType::fence();
if (neighflag == HALF)
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxBuildListsHalf<HALF> >(0,ignum),*this);
else if (neighflag == HALFTHREAD)
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxBuildListsHalf_LessAtomics<HALFTHREAD> >(0,ignum),*this);
else //(neighflag == FULL)
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxBuildListsFull>(0,ignum),*this);
DeviceType::fence();
k_resize_bo.modify<DeviceType>();
k_resize_bo.sync<LMPHostType>();
int resize_bo = k_resize_bo.h_view();
if (resize_bo) maxbo++;
k_resize_hb.modify<DeviceType>();
k_resize_hb.sync<LMPHostType>();
int resize_hb = k_resize_hb.h_view();
if (resize_hb) maxhb++;
resize = resize_bo || resize_hb;
if (resize) allocate_array();
}
// Bond order
if (neighflag == HALF) {
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxBondOrder1>(0,ignum),*this);
DeviceType::fence();
} else if (neighflag == HALFTHREAD) {
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxBondOrder1_LessAtomics>(0,ignum),*this);
DeviceType::fence();
}
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxBondOrder2>(0,ignum),*this);
DeviceType::fence();
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxBondOrder3>(0,ignum),*this);
DeviceType::fence();
// Bond energy
if (neighflag == HALF) {
if (evflag)
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeBond1<HALF,1> >(0,inum),*this,ev);
else
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeBond1<HALF,0> >(0,inum),*this);
DeviceType::fence();
ev_all += ev;
pvector[0] = ev.evdwl;
} else { //if (neighflag == HALFTHREAD) {
if (evflag)
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeBond1<HALFTHREAD,1> >(0,inum),*this,ev);
else
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeBond1<HALFTHREAD,0> >(0,inum),*this);
DeviceType::fence();
ev_all += ev;
pvector[0] = ev.evdwl;
}
// Multi-body corrections
if (neighflag == HALF) {
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeMulti1<HALF,0> >(0,inum),*this);
DeviceType::fence();
if (evflag)
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeMulti2<HALF,1> >(0,inum),*this,ev);
else
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeMulti2<HALF,0> >(0,inum),*this);
DeviceType::fence();
ev_all += ev;
} else { //if (neighflag == HALFTHREAD) {
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeMulti1<HALFTHREAD,0> >(0,inum),*this);
DeviceType::fence();
if (evflag)
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeMulti2<HALFTHREAD,1> >(0,inum),*this,ev);
else
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeMulti2<HALFTHREAD,0> >(0,inum),*this);
DeviceType::fence();
ev_all += ev;
}
pvector[2] = ev.ereax[0];
pvector[1] = ev.ereax[1]+ev.ereax[2];
pvector[3] = 0.0;
ev_all.evdwl += ev.ereax[0] + ev.ereax[1] + ev.ereax[2];
// Angular
if (neighflag == HALF) {
if (evflag)
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeAngular<HALF,1> >(0,inum),*this,ev);
else
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeAngular<HALF,0> >(0,inum),*this);
DeviceType::fence();
ev_all += ev;
} else { //if (neighflag == HALFTHREAD) {
if (evflag)
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeAngular<HALFTHREAD,1> >(0,inum),*this,ev);
else
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeAngular<HALFTHREAD,0> >(0,inum),*this);
DeviceType::fence();
ev_all += ev;
}
pvector[4] = ev.ereax[3];
pvector[5] = ev.ereax[4];
pvector[6] = ev.ereax[5];
ev_all.evdwl += ev.ereax[3] + ev.ereax[4] + ev.ereax[5];
// Torsion
if (neighflag == HALF) {
if (evflag)
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeTorsion<HALF,1> >(0,inum),*this,ev);
else
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeTorsion<HALF,0> >(0,inum),*this);
DeviceType::fence();
ev_all += ev;
} else { //if (neighflag == HALFTHREAD) {
if (evflag)
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeTorsion<HALFTHREAD,1> >(0,inum),*this,ev);
else
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeTorsion<HALFTHREAD,0> >(0,inum),*this);
DeviceType::fence();
ev_all += ev;
}
pvector[8] = ev.ereax[6];
pvector[9] = ev.ereax[7];
ev_all.evdwl += ev.ereax[6] + ev.ereax[7];
// Hydrogen Bond
if (cut_hbsq > 0.0) {
if (neighflag == HALF) {
if (evflag)
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeHydrogen<HALF,1> >(0,inum),*this,ev);
else
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeHydrogen<HALF,0> >(0,inum),*this);
DeviceType::fence();
ev_all += ev;
} else { //if (neighflag == HALFTHREAD) {
if (evflag)
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeHydrogen<HALFTHREAD,1> >(0,inum),*this,ev);
else
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeHydrogen<HALFTHREAD,0> >(0,inum),*this);
DeviceType::fence();
ev_all += ev;
}
}
pvector[7] = ev.ereax[8];
ev_all.evdwl += ev.ereax[8];
// Bond force
if (neighflag == HALF) {
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxUpdateBond<HALF> >(0,ignum),*this);
DeviceType::fence();
if (evflag)
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeBond2<HALF,1> >(0,ignum),*this,ev);
else
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeBond2<HALF,0> >(0,ignum),*this);
DeviceType::fence();
ev_all += ev;
pvector[0] += ev.evdwl;
} else { //if (neighflag == HALFTHREAD) {
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxUpdateBond<HALFTHREAD> >(0,ignum),*this);
DeviceType::fence();
if (evflag)
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeBond2<HALFTHREAD,1> >(0,ignum),*this,ev);
else
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeBond2<HALFTHREAD,0> >(0,ignum),*this);
DeviceType::fence();
ev_all += ev;
pvector[0] += ev.evdwl;
}
if (eflag_global) {
eng_vdwl += ev_all.evdwl;
eng_coul += ev_all.ecoul;
}
if (vflag_global) {
virial[0] += ev_all.v[0];
virial[1] += ev_all.v[1];
virial[2] += ev_all.v[2];
virial[3] += ev_all.v[3];
virial[4] += ev_all.v[4];
virial[5] += ev_all.v[5];
}
if (vflag_fdotr) pair_virial_fdotr_compute(this);
if (eflag_atom) {
k_eatom.template modify<DeviceType>();
k_eatom.template sync<LMPHostType>();
}
if (vflag_atom) {
k_vatom.template modify<DeviceType>();
k_vatom.template sync<LMPHostType>();
}
copymode = 0;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
template<int NEIGHFLAG, int EVFLAG>
KOKKOS_INLINE_FUNCTION
void PairReaxCKokkos<DeviceType>::operator()(PairReaxComputePolar<NEIGHFLAG,EVFLAG>, const int &ii, EV_FLOAT_REAX& ev) const {
const int i = d_ilist[ii];
const int itype = type(i);
const F_FLOAT qi = q(i);
const F_FLOAT chi = paramssing(itype).chi;
const F_FLOAT eta = paramssing(itype).eta;
const F_FLOAT epol = KCALpMOL_to_EV*(chi*qi+(eta/2.0)*qi*qi);
if (eflag) ev.ecoul += epol;
//if (eflag_atom) this->template ev_tally<NEIGHFLAG>(ev,i,i,epol,0.0,0.0,0.0,0.0);
if (eflag_atom) this->template e_tally_single<NEIGHFLAG>(ev,i,epol);
}
template<class DeviceType>
template<int NEIGHFLAG, int EVFLAG>
KOKKOS_INLINE_FUNCTION
void PairReaxCKokkos<DeviceType>::operator()(PairReaxComputePolar<NEIGHFLAG,EVFLAG>, const int &ii) const {
EV_FLOAT_REAX ev;
this->template operator()<NEIGHFLAG,EVFLAG>(PairReaxComputePolar<NEIGHFLAG,EVFLAG>(), ii, ev);
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
template<int NEIGHFLAG, int EVFLAG>
KOKKOS_INLINE_FUNCTION
void PairReaxCKokkos<DeviceType>::operator()(PairReaxComputeLJCoulomb<NEIGHFLAG,EVFLAG>, const int &ii, EV_FLOAT_REAX& ev) const {
// The f array is atomic for Half/Thread neighbor style
Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_f = f;
F_FLOAT powr_vdw, powgi_vdw, fn13, dfn13, exp1, exp2, etmp;
F_FLOAT evdwl, fvdwl;
evdwl = fvdwl = 0.0;
const int i = d_ilist[ii];
const X_FLOAT xtmp = x(i,0);
const X_FLOAT ytmp = x(i,1);
const X_FLOAT ztmp = x(i,2);
const F_FLOAT qi = q(i);
const int itype = type(i);
const int itag = tag(i);
const int jnum = d_numneigh[i];
F_FLOAT fxtmp, fytmp, fztmp;
fxtmp = fytmp = fztmp = 0.0;
for (int jj = 0; jj < jnum; jj++) {
int j = d_neighbors(i,jj);
j &= NEIGHMASK;
const int jtype = type(j);
const int jtag = tag(j);
const F_FLOAT qj = q(j);
if (NEIGHFLAG != FULL) {
// skip half of the interactions
if (j >= nlocal) {
if (itag > jtag) {
if ((itag+jtag) % 2 == 0) continue;
} else if (itag < jtag) {
if ((itag+jtag) % 2 == 1) continue;
} else {
if (x(j,2) < ztmp) continue;
if (x(j,2) == ztmp && x(j,1) < ytmp) continue;
if (x(j,2) == ztmp && x(j,1) == ytmp && x(j,0) < xtmp) continue;
}
}
}
const X_FLOAT delx = x(j,0) - xtmp;
const X_FLOAT dely = x(j,1) - ytmp;
const X_FLOAT delz = x(j,2) - ztmp;
const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
if (rsq > cut_nbsq) continue;
const F_FLOAT rij = sqrt(rsq);
// LJ energy/force
F_FLOAT Tap = d_tap[7] * rij + d_tap[6];
Tap = Tap * rij + d_tap[5];
Tap = Tap * rij + d_tap[4];
Tap = Tap * rij + d_tap[3];
Tap = Tap * rij + d_tap[2];
Tap = Tap * rij + d_tap[1];
Tap = Tap * rij + d_tap[0];
F_FLOAT dTap = 7*d_tap[7] * rij + 6*d_tap[6];
dTap = dTap * rij + 5*d_tap[5];
dTap = dTap * rij + 4*d_tap[4];
dTap = dTap * rij + 3*d_tap[3];
dTap = dTap * rij + 2*d_tap[2];
dTap += d_tap[1]/rij;
const F_FLOAT gamma_w = paramstwbp(itype,jtype).gamma_w;
const F_FLOAT alpha = paramstwbp(itype,jtype).alpha;
const F_FLOAT r_vdw = paramstwbp(itype,jtype).r_vdw;
const F_FLOAT epsilon = paramstwbp(itype,jtype).epsilon;
// shielding
if (vdwflag == 1 || vdwflag == 3) {
powr_vdw = pow(rij,gp[28]);
powgi_vdw = pow(1.0/gamma_w,gp[28]);
fn13 = pow(powr_vdw+powgi_vdw,1.0/gp[28]);
exp1 = exp(alpha*(1.0-fn13/r_vdw));
exp2 = exp(0.5*alpha*(1.0-fn13/r_vdw));
dfn13 = pow(powr_vdw+powgi_vdw,1.0/gp[28]-1.0)*pow(rij,gp[28]-2.0);
etmp = epsilon*(exp1-2.0*exp2);
evdwl = Tap*etmp;
fvdwl = dTap*etmp-Tap*epsilon*(alpha/r_vdw)*(exp1-exp2)*dfn13;
} else {
exp1 = exp(alpha*(1.0-rij/r_vdw));
exp2 = exp(0.5*alpha*(1.0-rij/r_vdw));
etmp = epsilon*(exp1-2.0*exp2);
evdwl = Tap*etmp;
fvdwl = dTap*etmp-Tap*epsilon*(alpha/r_vdw)*(exp1-exp2)*rij;
}
// inner wall
if (vdwflag == 2 || vdwflag == 3) {
const F_FLOAT ecore = paramstwbp(itype,jtype).ecore;
const F_FLOAT acore = paramstwbp(itype,jtype).acore;
const F_FLOAT rcore = paramstwbp(itype,jtype).rcore;
const F_FLOAT e_core = ecore*exp(acore*(1.0-(rij/rcore)));
const F_FLOAT de_core = -(acore/rcore)*e_core;
evdwl += Tap*e_core;
fvdwl += dTap*e_core+Tap*de_core/rij;
if (lgflag) {
const F_FLOAT lgre = paramstwbp(itype,jtype).lgre;
const F_FLOAT lgcij = paramstwbp(itype,jtype).lgcij;
const F_FLOAT rij5 = rsq*rsq*rij;
const F_FLOAT rij6 = rij5*rij;
const F_FLOAT re6 = lgre*lgre*lgre*lgre*lgre*lgre;
const F_FLOAT elg = -lgcij/(rij6+re6);
const F_FLOAT delg = -6.0*elg*rij5/(rij6+re6);
evdwl += Tap*elg;
fvdwl += dTap*elg+Tap*delg/rij;
}
}
// Coulomb energy/force
const F_FLOAT shld = paramstwbp(itype,jtype).gamma;
const F_FLOAT denom1 = rij * rij * rij + shld;
const F_FLOAT denom3 = pow(denom1,0.3333333333333);
const F_FLOAT ecoul = C_ele * qi*qj*Tap/denom3;
const F_FLOAT fcoul = C_ele * qi*qj*(dTap-Tap*rij/denom1)/denom3;
const F_FLOAT ftotal = fvdwl + fcoul;
fxtmp += delx*ftotal;
fytmp += dely*ftotal;
fztmp += delz*ftotal;
if (NEIGHFLAG != FULL) {
a_f(j,0) -= delx*ftotal;
a_f(j,1) -= dely*ftotal;
a_f(j,2) -= delz*ftotal;
}
if (NEIGHFLAG == FULL) {
if (eflag) ev.evdwl += 0.5*evdwl;
if (eflag) ev.ecoul += 0.5*ecoul;
} else {
if (eflag) ev.evdwl += evdwl;
if (eflag) ev.ecoul += ecoul;
}
if (vflag_either || eflag_atom) this->template ev_tally<NEIGHFLAG>(ev,i,j,evdwl+ecoul,-ftotal,delx,dely,delz);
}
a_f(i,0) += fxtmp;
a_f(i,1) += fytmp;
a_f(i,2) += fztmp;
}
template<class DeviceType>
template<int NEIGHFLAG, int EVFLAG>
KOKKOS_INLINE_FUNCTION
void PairReaxCKokkos<DeviceType>::operator()(PairReaxComputeLJCoulomb<NEIGHFLAG,EVFLAG>, const int &ii) const {
EV_FLOAT_REAX ev;
this->template operator()<NEIGHFLAG,EVFLAG>(PairReaxComputeLJCoulomb<NEIGHFLAG,EVFLAG>(), ii, ev);
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
template<int NEIGHFLAG, int EVFLAG>
KOKKOS_INLINE_FUNCTION
void PairReaxCKokkos<DeviceType>::operator()(PairReaxComputeTabulatedLJCoulomb<NEIGHFLAG,EVFLAG>, const int &ii, EV_FLOAT_REAX& ev) const {
// The f array is atomic for Half/Thread neighbor style
Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_f = f;
F_FLOAT powr_vdw, powgi_vdw, fn13, dfn13, exp1, exp2, etmp;
F_FLOAT evdwl, fvdwl;
evdwl = fvdwl = 0.0;
const int i = d_ilist[ii];
const X_FLOAT xtmp = x(i,0);
const X_FLOAT ytmp = x(i,1);
const X_FLOAT ztmp = x(i,2);
const F_FLOAT qi = q(i);
const int itype = type(i);
const int itag = tag(i);
const int jnum = d_numneigh[i];
F_FLOAT fxtmp, fytmp, fztmp;
fxtmp = fytmp = fztmp = 0.0;
for (int jj = 0; jj < jnum; jj++) {
int j = d_neighbors(i,jj);
j &= NEIGHMASK;
const int jtype = type(j);
const int jtag = tag(j);
const F_FLOAT qj = q(j);
if (NEIGHFLAG != FULL) {
// skip half of the interactions
if (j >= nlocal) {
if (itag > jtag) {
if ((itag+jtag) % 2 == 0) continue;
} else if (itag < jtag) {
if ((itag+jtag) % 2 == 1) continue;
} else {
if (x(j,2) < ztmp) continue;
if (x(j,2) == ztmp && x(j,1) < ytmp) continue;
if (x(j,2) == ztmp && x(j,1) == ytmp && x(j,0) < xtmp) continue;
}
}
}
const X_FLOAT delx = x(j,0) - xtmp;
const X_FLOAT dely = x(j,1) - ytmp;
const X_FLOAT delz = x(j,2) - ztmp;
const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
if (rsq > cut_nbsq) continue;
const F_FLOAT rij = sqrt(rsq);
const int tmin = MIN( itype, jtype );
const int tmax = MAX( itype, jtype );
const LR_lookup_table_kk t = d_LR(tmin,tmax);
/* Cubic Spline Interpolation */
int r = (int)(rij * t.inv_dx);
if( r == 0 ) ++r;
const F_FLOAT base = (double)(r+1) * t.dx;
const F_FLOAT dif = rij - base;
const cubic_spline_coef vdW = t.d_vdW[r];
const cubic_spline_coef ele = t.d_ele[r];
const cubic_spline_coef CEvd = t.d_CEvd[r];
const cubic_spline_coef CEclmb = t.d_CEclmb[r];
const F_FLOAT evdwl = ((vdW.d*dif + vdW.c)*dif + vdW.b)*dif +
vdW.a;
const F_FLOAT ecoul = (((ele.d*dif + ele.c)*dif + ele.b)*dif +
ele.a)*qi*qj;
const F_FLOAT fvdwl = ((CEvd.d*dif + CEvd.c)*dif + CEvd.b)*dif +
CEvd.a;
const F_FLOAT fcoul = (((CEclmb.d*dif+CEclmb.c)*dif+CEclmb.b)*dif +
CEclmb.a)*qi*qj;
const F_FLOAT ftotal = fvdwl + fcoul;
fxtmp += delx*ftotal;
fytmp += dely*ftotal;
fztmp += delz*ftotal;
if (NEIGHFLAG != FULL) {
a_f(j,0) -= delx*ftotal;
a_f(j,1) -= dely*ftotal;
a_f(j,2) -= delz*ftotal;
}
if (NEIGHFLAG == FULL) {
if (eflag) ev.evdwl += 0.5*evdwl;
if (eflag) ev.ecoul += 0.5*ecoul;
} else {
if (eflag) ev.evdwl += evdwl;
if (eflag) ev.ecoul += ecoul;
}
if (vflag_either || eflag_atom) this->template ev_tally<NEIGHFLAG>(ev,i,j,evdwl+ecoul,-ftotal,delx,dely,delz);
}
a_f(i,0) += fxtmp;
a_f(i,1) += fytmp;
a_f(i,2) += fztmp;
}
template<class DeviceType>
template<int NEIGHFLAG, int EVFLAG>
KOKKOS_INLINE_FUNCTION
void PairReaxCKokkos<DeviceType>::operator()(PairReaxComputeTabulatedLJCoulomb<NEIGHFLAG,EVFLAG>, const int &ii) const {
EV_FLOAT_REAX ev;
this->template operator()<NEIGHFLAG,EVFLAG>(PairReaxComputeTabulatedLJCoulomb<NEIGHFLAG,EVFLAG>(), ii, ev);
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void PairReaxCKokkos<DeviceType>::allocate_array()
{
if (cut_hbsq > 0.0) {
d_hb_first = typename AT::t_int_1d("reax/c/kk:hb_first",nmax);
d_hb_num = typename AT::t_int_1d("reax/c/kk:hb_num",nmax);
d_hb_list = typename AT::t_int_1d("reax/c/kk:hb_list",nmax*maxhb);
}
d_bo_first = typename AT::t_int_1d("reax/c/kk:bo_first",nmax);
d_bo_num = typename AT::t_int_1d("reax/c/kk:bo_num",nmax);
d_bo_list = typename AT::t_int_1d("reax/c/kk:bo_list",nmax*maxbo);
d_BO = typename AT::t_ffloat_2d_dl("reax/c/kk:BO",nmax,maxbo);
d_BO_s = typename AT::t_ffloat_2d_dl("reax/c/kk:BO",nmax,maxbo);
d_BO_pi = typename AT::t_ffloat_2d_dl("reax/c/kk:BO_pi",nmax,maxbo);
d_BO_pi2 = typename AT::t_ffloat_2d_dl("reax/c/kk:BO_pi2",nmax,maxbo);
d_dln_BOp_pix = typename AT::t_ffloat_2d_dl("reax/c/kk:d_dln_BOp_pix",nmax,maxbo);
d_dln_BOp_piy = typename AT::t_ffloat_2d_dl("reax/c/kk:d_dln_BOp_piy",nmax,maxbo);
d_dln_BOp_piz = typename AT::t_ffloat_2d_dl("reax/c/kk:d_dln_BOp_piz",nmax,maxbo);
d_dln_BOp_pi2x = typename AT::t_ffloat_2d_dl("reax/c/kk:d_dln_BOp_pi2x",nmax,maxbo);
d_dln_BOp_pi2y = typename AT::t_ffloat_2d_dl("reax/c/kk:d_dln_BOp_pi2y",nmax,maxbo);
d_dln_BOp_pi2z = typename AT::t_ffloat_2d_dl("reax/c/kk:d_dln_BOp_pi2z",nmax,maxbo);
d_C1dbo = typename AT::t_ffloat_2d_dl("reax/c/kk:d_C1dbo",nmax,maxbo);
d_C2dbo = typename AT::t_ffloat_2d_dl("reax/c/kk:d_C2dbo",nmax,maxbo);
d_C3dbo = typename AT::t_ffloat_2d_dl("reax/c/kk:d_C3dbo",nmax,maxbo);
d_C1dbopi = typename AT::t_ffloat_2d_dl("reax/c/kk:d_C1dbopi",nmax,maxbo);
d_C2dbopi = typename AT::t_ffloat_2d_dl("reax/c/kk:d_C2dbopi",nmax,maxbo);
d_C3dbopi = typename AT::t_ffloat_2d_dl("reax/c/kk:d_C3dbopi",nmax,maxbo);
d_C4dbopi = typename AT::t_ffloat_2d_dl("reax/c/kk:d_C4dbopi",nmax,maxbo);
d_C1dbopi2 = typename AT::t_ffloat_2d_dl("reax/c/kk:d_C1dbopi2",nmax,maxbo);
d_C2dbopi2 = typename AT::t_ffloat_2d_dl("reax/c/kk:d_C2dbopi2",nmax,maxbo);
d_C3dbopi2 = typename AT::t_ffloat_2d_dl("reax/c/kk:d_C3dbopi2",nmax,maxbo);
d_C4dbopi2 = typename AT::t_ffloat_2d_dl("reax/c/kk:d_C4dbopi2",nmax,maxbo);
d_dBOpx = typename AT::t_ffloat_2d_dl("reax/c/kk:dBOpx",nmax,maxbo);
d_dBOpy = typename AT::t_ffloat_2d_dl("reax/c/kk:dBOpy",nmax,maxbo);
d_dBOpz = typename AT::t_ffloat_2d_dl("reax/c/kk:dBOpz",nmax,maxbo);
d_dDeltap_self = typename AT::t_ffloat_2d_dl("reax/c/kk:dDeltap_self",nmax,3);
d_Deltap_boc = typename AT::t_ffloat_1d("reax/c/kk:Deltap_boc",nmax);
d_Deltap = typename AT::t_ffloat_1d("reax/c/kk:Deltap",nmax);
d_total_bo = typename AT::t_ffloat_1d("reax/c/kk:total_bo",nmax);
d_Cdbo = typename AT::t_ffloat_2d_dl("reax/c/kk:Cdbo",nmax,3*maxbo);
d_Cdbopi = typename AT::t_ffloat_2d_dl("reax/c/kk:Cdbopi",nmax,3*maxbo);
d_Cdbopi2 = typename AT::t_ffloat_2d_dl("reax/c/kk:Cdbopi2",nmax,3*maxbo);
d_Delta = typename AT::t_ffloat_1d("reax/c/kk:Delta",nmax);
d_Delta_boc = typename AT::t_ffloat_1d("reax/c/kk:Delta_boc",nmax);
d_dDelta_lp = typename AT::t_ffloat_1d("reax/c/kk:dDelta_lp",nmax);
d_Delta_lp = typename AT::t_ffloat_1d("reax/c/kk:Delta_lp",nmax);
d_Delta_lp_temp = typename AT::t_ffloat_1d("reax/c/kk:Delta_lp_temp",nmax);
d_CdDelta = typename AT::t_ffloat_1d("reax/c/kk:CdDelta",nmax);
d_sum_ovun = typename AT::t_ffloat_2d_dl("reax/c/kk:sum_ovun",nmax,3);
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void PairReaxCKokkos<DeviceType>::operator()(PairReaxZero, const int &n) const {
d_total_bo(n) = 0.0;
d_CdDelta(n) = 0.0;
if (neighflag != FULL) {
d_bo_num(n) = 0.0;
d_hb_num(n) = 0.0;
}
for (int j = 0; j < 3; j++)
d_dDeltap_self(n,j) = 0.0;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void PairReaxCKokkos<DeviceType>::operator()(PairReaxBuildListsFull, const int &ii) const {
if (d_resize_bo() || d_resize_hb())
return;
const int i = d_ilist[ii];
const X_FLOAT xtmp = x(i,0);
const X_FLOAT ytmp = x(i,1);
const X_FLOAT ztmp = x(i,2);
const int itype = type(i);
const int jnum = d_numneigh[i];
F_FLOAT C12, C34, C56, BO_s, BO_pi, BO_pi2, BO, delij[3], dBOp_i[3], dln_BOp_pi_i[3], dln_BOp_pi2_i[3];
F_FLOAT total_bo = 0.0;
int j_index = i*maxbo;
d_bo_first[i] = j_index;
const int bo_first_i = j_index;
int ihb = -1;
int jhb = -1;
int jhb_top = -1;
int hb_index = i*maxhb;
int hb_first_i;
if (cut_hbsq > 0.0) {
ihb = paramssing(itype).p_hbond;
if (ihb == 1) {
d_hb_first[i] = hb_index;
hb_first_i = hb_index;
}
}
for (int jj = 0; jj < jnum; jj++) {
int j = d_neighbors(i,jj);
j &= NEIGHMASK;
delij[0] = x(j,0) - xtmp;
delij[1] = x(j,1) - ytmp;
delij[2] = x(j,2) - ztmp;
const F_FLOAT rsq = delij[0]*delij[0] + delij[1]*delij[1] + delij[2]*delij[2];
double cutoffsq;
if(i < nlocal) cutoffsq = MAX(cut_bosq,cut_hbsq);
else cutoffsq = cut_bosq;
if (rsq > cutoffsq) continue;
const int jtype = type(j);
// hbond list
if (i < nlocal && cut_hbsq > 0.0 && (ihb == 1 || ihb == 2) && rsq <= cut_hbsq) {
jhb = paramssing(jtype).p_hbond;
if( ihb == 1 && jhb == 2) {
const int jj_index = hb_index - hb_first_i;
if (jj_index >= maxhb) {
d_resize_hb() = 1;
return;
}
d_hb_list[hb_index] = j;
hb_index++;
}
}
// bond_list
const F_FLOAT rij = sqrt(rsq);
const F_FLOAT p_bo1 = paramstwbp(itype,jtype).p_bo1;
const F_FLOAT p_bo2 = paramstwbp(itype,jtype).p_bo2;
const F_FLOAT p_bo3 = paramstwbp(itype,jtype).p_bo3;
const F_FLOAT p_bo4 = paramstwbp(itype,jtype).p_bo4;
const F_FLOAT p_bo5 = paramstwbp(itype,jtype).p_bo5;
const F_FLOAT p_bo6 = paramstwbp(itype,jtype).p_bo6;
const F_FLOAT r_s = paramstwbp(itype,jtype).r_s;
const F_FLOAT r_pi = paramstwbp(itype,jtype).r_pi;
const F_FLOAT r_pi2 = paramstwbp(itype,jtype).r_pi2;
if (paramssing(itype).r_s > 0.0 && paramssing(jtype).r_s > 0.0) {
C12 = p_bo1*pow(rij/r_s,p_bo2);
BO_s = (1.0+bo_cut)*exp(C12);
}
else BO_s = C12 = 0.0;
if (paramssing(itype).r_pi > 0.0 && paramssing(jtype).r_pi > 0.0) {
C34 = p_bo3*pow(rij/r_pi,p_bo4);
BO_pi = exp(C34);
}
else BO_pi = C34 = 0.0;
if (paramssing(itype).r_pi2 > 0.0 && paramssing(jtype).r_pi2 > 0.0) {
C56 = p_bo5*pow(rij/r_pi2,p_bo6);
BO_pi2 = exp(C56);
}
else BO_pi2 = C56 = 0.0;
BO = BO_s + BO_pi + BO_pi2;
if (BO < bo_cut) continue;
const int jj_index = j_index - bo_first_i;
if (jj_index >= maxbo) {
d_resize_bo() = 1;
return;
}
d_bo_list[j_index] = j;
// from BondOrder1
d_BO(i,jj_index) = BO;
d_BO_s(i,jj_index) = BO_s;
d_BO_pi(i,jj_index) = BO_pi;
d_BO_pi2(i,jj_index) = BO_pi2;
F_FLOAT Cln_BOp_s = p_bo2 * C12 / rij / rij;
F_FLOAT Cln_BOp_pi = p_bo4 * C34 / rij / rij;
F_FLOAT Cln_BOp_pi2 = p_bo6 * C56 / rij / rij;
if (nlocal == 0)
Cln_BOp_s = Cln_BOp_pi = Cln_BOp_pi2 = 0.0;
for (int d = 0; d < 3; d++) dln_BOp_pi_i[d] = -(BO_pi*Cln_BOp_pi)*delij[d];
for (int d = 0; d < 3; d++) dln_BOp_pi2_i[d] = -(BO_pi2*Cln_BOp_pi2)*delij[d];
for (int d = 0; d < 3; d++) dBOp_i[d] = -(BO_s*Cln_BOp_s+BO_pi*Cln_BOp_pi+BO_pi2*Cln_BOp_pi2)*delij[d];
for (int d = 0; d < 3; d++) d_dDeltap_self(i,d) += dBOp_i[d];
d_dln_BOp_pix(i,jj_index) = dln_BOp_pi_i[0];
d_dln_BOp_piy(i,jj_index) = dln_BOp_pi_i[1];
d_dln_BOp_piz(i,jj_index) = dln_BOp_pi_i[2];
d_dln_BOp_pi2x(i,jj_index) = dln_BOp_pi2_i[0];
d_dln_BOp_pi2y(i,jj_index) = dln_BOp_pi2_i[1];
d_dln_BOp_pi2z(i,jj_index) = dln_BOp_pi2_i[2];
d_dBOpx(i,jj_index) = dBOp_i[0];
d_dBOpy(i,jj_index) = dBOp_i[1];
d_dBOpz(i,jj_index) = dBOp_i[2];
d_BO(i,jj_index) -= bo_cut;
d_BO_s(i,jj_index) -= bo_cut;
total_bo += d_BO(i,jj_index);
j_index++;
}
d_bo_num[i] = j_index - d_bo_first[i];
if (cut_hbsq > 0.0 && ihb == 1) d_hb_num[i] = hb_index - d_hb_first[i];
d_total_bo[i] += total_bo;
const F_FLOAT val_i = paramssing(itype).valency;
d_Deltap[i] = d_total_bo[i] - val_i;
d_Deltap_boc[i] = d_total_bo[i] - paramssing(itype).valency_val;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
template<int NEIGHFLAG>
KOKKOS_INLINE_FUNCTION
void PairReaxCKokkos<DeviceType>::operator()(PairReaxBuildListsHalf<NEIGHFLAG>, const int &ii) const {
if (d_resize_bo() || d_resize_hb())
return;
Kokkos::View<F_FLOAT**, typename DAT::t_ffloat_2d_dl::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_dDeltap_self = d_dDeltap_self;
Kokkos::View<F_FLOAT*, typename DAT::t_float_1d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_total_bo = d_total_bo;
const int i = d_ilist[ii];
const X_FLOAT xtmp = x(i,0);
const X_FLOAT ytmp = x(i,1);
const X_FLOAT ztmp = x(i,2);
const int itype = type(i);
const int itag = tag(i);
const int jnum = d_numneigh[i];
F_FLOAT C12, C34, C56, BO_s, BO_pi, BO_pi2, BO, delij[3], dBOp_i[3], dln_BOp_pi_i[3], dln_BOp_pi2_i[3];
F_FLOAT total_bo = 0.0;
int j_index,i_index;
d_bo_first[i] = i*maxbo;
const int bo_first_i = d_bo_first[i];
int ihb = -1;
int jhb = -1;
int jhb_top = -1;
int hb_first_i;
if (cut_hbsq > 0.0) {
ihb = paramssing(itype).p_hbond;
if (ihb == 1) {
d_hb_first[i] = i*maxhb;
hb_first_i = d_hb_first[i];
}
}
for (int jj = 0; jj < jnum; jj++) {
int j = d_neighbors(i,jj);
j &= NEIGHMASK;
const int jtag = tag(j);
d_bo_first[j] = j*maxbo;
d_hb_first[j] = j*maxhb;
const int jtype = type(j);
delij[0] = x(j,0) - xtmp;
delij[1] = x(j,1) - ytmp;
delij[2] = x(j,2) - ztmp;
const F_FLOAT rsq = delij[0]*delij[0] + delij[1]*delij[1] + delij[2]*delij[2];
double cutoffsq;
if(i < nlocal) cutoffsq = MAX(cut_bosq,cut_hbsq);
else cutoffsq = cut_bosq;
if (rsq > cutoffsq) continue;
// hbond list
if (i < nlocal && cut_hbsq > 0.0 && (ihb == 1 || ihb == 2) && rsq <= cut_hbsq) {
jhb = paramssing(jtype).p_hbond;
if( ihb == 1 && jhb == 2) {
if (NEIGHFLAG == HALF) {
j_index = hb_first_i + d_hb_num[i];
d_hb_num[i]++;
} else {
j_index = hb_first_i + Kokkos::atomic_fetch_add(&d_hb_num[i],1);
}
const int jj_index = j_index - hb_first_i;
if (jj_index >= maxhb) {
d_resize_hb() = 1;
return;
}
d_hb_list[j_index] = j;
} else if ( j < nlocal && ihb == 2 && jhb == 1) {
if (NEIGHFLAG == HALF) {
i_index = d_hb_first[j] + d_hb_num[j];
d_hb_num[j]++;
} else {
i_index = d_hb_first[j] + Kokkos::atomic_fetch_add(&d_hb_num[j],1);
}
const int ii_index = i_index - d_hb_first[j];
if (ii_index >= maxhb) {
d_resize_hb() = 1;
return;
}
d_hb_list[i_index] = i;
}
}
// bond_list
const F_FLOAT rij = sqrt(rsq);
const F_FLOAT p_bo1 = paramstwbp(itype,jtype).p_bo1;
const F_FLOAT p_bo2 = paramstwbp(itype,jtype).p_bo2;
const F_FLOAT p_bo3 = paramstwbp(itype,jtype).p_bo3;
const F_FLOAT p_bo4 = paramstwbp(itype,jtype).p_bo4;
const F_FLOAT p_bo5 = paramstwbp(itype,jtype).p_bo5;
const F_FLOAT p_bo6 = paramstwbp(itype,jtype).p_bo6;
const F_FLOAT r_s = paramstwbp(itype,jtype).r_s;
const F_FLOAT r_pi = paramstwbp(itype,jtype).r_pi;
const F_FLOAT r_pi2 = paramstwbp(itype,jtype).r_pi2;
if (paramssing(itype).r_s > 0.0 && paramssing(jtype).r_s > 0.0) {
C12 = p_bo1*pow(rij/r_s,p_bo2);
BO_s = (1.0+bo_cut)*exp(C12);
}
else BO_s = C12 = 0.0;
if (paramssing(itype).r_pi > 0.0 && paramssing(jtype).r_pi > 0.0) {
C34 = p_bo3*pow(rij/r_pi,p_bo4);
BO_pi = exp(C34);
}
else BO_pi = C34 = 0.0;
if (paramssing(itype).r_pi2 > 0.0 && paramssing(jtype).r_pi2 > 0.0) {
C56 = p_bo5*pow(rij/r_pi2,p_bo6);
BO_pi2 = exp(C56);
}
else BO_pi2 = C56 = 0.0;
BO = BO_s + BO_pi + BO_pi2;
if (BO < bo_cut) continue;
if (NEIGHFLAG == HALF) {
j_index = bo_first_i + d_bo_num[i];
i_index = d_bo_first[j] + d_bo_num[j];
d_bo_num[i]++;
d_bo_num[j]++;
} else {
j_index = bo_first_i + Kokkos::atomic_fetch_add(&d_bo_num[i],1);
i_index = d_bo_first[j] + Kokkos::atomic_fetch_add(&d_bo_num[j],1);
}
const int jj_index = j_index - bo_first_i;
const int ii_index = i_index - d_bo_first[j];
if (jj_index >= maxbo || ii_index >= maxbo) {
d_resize_bo() = 1;
return;
}
d_bo_list[j_index] = j;
d_bo_list[i_index] = i;
// from BondOrder1
d_BO(i,jj_index) = BO;
d_BO_s(i,jj_index) = BO_s;
d_BO_pi(i,jj_index) = BO_pi;
d_BO_pi2(i,jj_index) = BO_pi2;
d_BO(j,ii_index) = BO;
d_BO_s(j,ii_index) = BO_s;
d_BO_pi(j,ii_index) = BO_pi;
d_BO_pi2(j,ii_index) = BO_pi2;
F_FLOAT Cln_BOp_s = p_bo2 * C12 / rij / rij;
F_FLOAT Cln_BOp_pi = p_bo4 * C34 / rij / rij;
F_FLOAT Cln_BOp_pi2 = p_bo6 * C56 / rij / rij;
if (nlocal == 0)
Cln_BOp_s = Cln_BOp_pi = Cln_BOp_pi2 = 0.0;
for (int d = 0; d < 3; d++) dln_BOp_pi_i[d] = -(BO_pi*Cln_BOp_pi)*delij[d];
for (int d = 0; d < 3; d++) dln_BOp_pi2_i[d] = -(BO_pi2*Cln_BOp_pi2)*delij[d];
for (int d = 0; d < 3; d++) dBOp_i[d] = -(BO_s*Cln_BOp_s+BO_pi*Cln_BOp_pi+BO_pi2*Cln_BOp_pi2)*delij[d];
for (int d = 0; d < 3; d++) a_dDeltap_self(i,d) += dBOp_i[d];
for (int d = 0; d < 3; d++) a_dDeltap_self(j,d) += -dBOp_i[d];
d_dln_BOp_pix(i,jj_index) = dln_BOp_pi_i[0];
d_dln_BOp_piy(i,jj_index) = dln_BOp_pi_i[1];
d_dln_BOp_piz(i,jj_index) = dln_BOp_pi_i[2];
d_dln_BOp_pix(j,ii_index) = -dln_BOp_pi_i[0];
d_dln_BOp_piy(j,ii_index) = -dln_BOp_pi_i[1];
d_dln_BOp_piz(j,ii_index) = -dln_BOp_pi_i[2];
d_dln_BOp_pi2x(i,jj_index) = dln_BOp_pi2_i[0];
d_dln_BOp_pi2y(i,jj_index) = dln_BOp_pi2_i[1];
d_dln_BOp_pi2z(i,jj_index) = dln_BOp_pi2_i[2];
d_dln_BOp_pi2x(j,ii_index) = -dln_BOp_pi2_i[0];
d_dln_BOp_pi2y(j,ii_index) = -dln_BOp_pi2_i[1];
d_dln_BOp_pi2z(j,ii_index) = -dln_BOp_pi2_i[2];
d_dBOpx(i,jj_index) = dBOp_i[0];
d_dBOpy(i,jj_index) = dBOp_i[1];
d_dBOpz(i,jj_index) = dBOp_i[2];
d_dBOpx(j,ii_index) = -dBOp_i[0];
d_dBOpy(j,ii_index) = -dBOp_i[1];
d_dBOpz(j,ii_index) = -dBOp_i[2];
d_BO(i,jj_index) -= bo_cut;
d_BO(j,ii_index) -= bo_cut;
d_BO_s(i,jj_index) -= bo_cut;
d_BO_s(j,ii_index) -= bo_cut;
total_bo += d_BO(i,jj_index);
a_total_bo[j] += d_BO(j,ii_index);
}
a_total_bo[i] += total_bo;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void PairReaxCKokkos<DeviceType>::operator()(PairReaxBondOrder1, const int &ii) const {
const int i = d_ilist[ii];
const int itype = type(i);
const F_FLOAT val_i = paramssing(itype).valency;
d_Deltap[i] = d_total_bo[i] - val_i;
d_Deltap_boc[i] = d_total_bo[i] - paramssing(itype).valency_val;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
template<int NEIGHFLAG>
KOKKOS_INLINE_FUNCTION
void PairReaxCKokkos<DeviceType>::operator()(PairReaxBuildListsHalf_LessAtomics<NEIGHFLAG>, const int &ii) const {
if (d_resize_bo() || d_resize_hb())
return;
const int i = d_ilist[ii];
const X_FLOAT xtmp = x(i,0);
const X_FLOAT ytmp = x(i,1);
const X_FLOAT ztmp = x(i,2);
const int itype = type(i);
const int itag = tag(i);
const int jnum = d_numneigh[i];
F_FLOAT C12, C34, C56, BO_s, BO_pi, BO_pi2, BO, delij[3], dBOp_i[3];
F_FLOAT total_bo = 0.0;
int j_index,i_index;
d_bo_first[i] = i*maxbo;
const int bo_first_i = d_bo_first[i];
int ihb = -1;
int jhb = -1;
int jhb_top = -1;
int hb_first_i;
if (cut_hbsq > 0.0) {
ihb = paramssing(itype).p_hbond;
if (ihb == 1) {
d_hb_first[i] = i*maxhb;
hb_first_i = d_hb_first[i];
}
}
for (int jj = 0; jj < jnum; jj++) {
int j = d_neighbors(i,jj);
j &= NEIGHMASK;
const int jtag = tag(j);
d_bo_first[j] = j*maxbo;
d_hb_first[j] = j*maxhb;
const int jtype = type(j);
delij[0] = x(j,0) - xtmp;
delij[1] = x(j,1) - ytmp;
delij[2] = x(j,2) - ztmp;
const F_FLOAT rsq = delij[0]*delij[0] + delij[1]*delij[1] + delij[2]*delij[2];
double cutoffsq;
if(i < nlocal) cutoffsq = MAX(cut_bosq,cut_hbsq);
else cutoffsq = cut_bosq;
if (rsq > cutoffsq) continue;
// hbond list
if (i < nlocal && cut_hbsq > 0.0 && (ihb == 1 || ihb == 2) && rsq <= cut_hbsq) {
jhb = paramssing(jtype).p_hbond;
if( ihb == 1 && jhb == 2) {
if (NEIGHFLAG == HALF) {
j_index = hb_first_i + d_hb_num[i];
d_hb_num[i]++;
} else {
j_index = hb_first_i + Kokkos::atomic_fetch_add(&d_hb_num[i],1);
}
const int jj_index = j_index - hb_first_i;
if (jj_index >= maxhb) {
d_resize_hb() = 1;
return;
}
d_hb_list[j_index] = j;
} else if ( j < nlocal && ihb == 2 && jhb == 1) {
if (NEIGHFLAG == HALF) {
i_index = d_hb_first[j] + d_hb_num[j];
d_hb_num[j]++;
} else {
i_index = d_hb_first[j] + Kokkos::atomic_fetch_add(&d_hb_num[j],1);
}
const int ii_index = i_index - d_hb_first[j];
if (ii_index >= maxhb) {
d_resize_hb() = 1;
return;
}
d_hb_list[i_index] = i;
}
}
// bond_list
const F_FLOAT rij = sqrt(rsq);
const F_FLOAT p_bo1 = paramstwbp(itype,jtype).p_bo1;
const F_FLOAT p_bo2 = paramstwbp(itype,jtype).p_bo2;
const F_FLOAT p_bo3 = paramstwbp(itype,jtype).p_bo3;
const F_FLOAT p_bo4 = paramstwbp(itype,jtype).p_bo4;
const F_FLOAT p_bo5 = paramstwbp(itype,jtype).p_bo5;
const F_FLOAT p_bo6 = paramstwbp(itype,jtype).p_bo6;
const F_FLOAT r_s = paramstwbp(itype,jtype).r_s;
const F_FLOAT r_pi = paramstwbp(itype,jtype).r_pi;
const F_FLOAT r_pi2 = paramstwbp(itype,jtype).r_pi2;
if (paramssing(itype).r_s > 0.0 && paramssing(jtype).r_s > 0.0) {
C12 = p_bo1*pow(rij/r_s,p_bo2);
BO_s = (1.0+bo_cut)*exp(C12);
}
else BO_s = C12 = 0.0;
if (paramssing(itype).r_pi > 0.0 && paramssing(jtype).r_pi > 0.0) {
C34 = p_bo3*pow(rij/r_pi,p_bo4);
BO_pi = exp(C34);
}
else BO_pi = C34 = 0.0;
if (paramssing(itype).r_pi2 > 0.0 && paramssing(jtype).r_pi2 > 0.0) {
C56 = p_bo5*pow(rij/r_pi2,p_bo6);
BO_pi2 = exp(C56);
}
else BO_pi2 = C56 = 0.0;
BO = BO_s + BO_pi + BO_pi2;
if (BO < bo_cut) continue;
if (NEIGHFLAG == HALF) {
j_index = bo_first_i + d_bo_num[i];
i_index = d_bo_first[j] + d_bo_num[j];
d_bo_num[i]++;
d_bo_num[j]++;
} else {
j_index = bo_first_i + Kokkos::atomic_fetch_add(&d_bo_num[i],1);
i_index = d_bo_first[j] + Kokkos::atomic_fetch_add(&d_bo_num[j],1);
}
const int jj_index = j_index - bo_first_i;
const int ii_index = i_index - d_bo_first[j];
if (jj_index >= maxbo || ii_index >= maxbo) {
d_resize_bo() = 1;
return;
}
d_bo_list[j_index] = j;
d_bo_list[i_index] = i;
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void PairReaxCKokkos<DeviceType>::operator()(PairReaxBondOrder1_LessAtomics, const int &ii) const {
F_FLOAT C12, C34, C56, BO_s, BO_pi, BO_pi2, BO, delij[3], dBOp_i[3], dln_BOp_pi_i[3], dln_BOp_pi2_i[3];
const int i = d_ilist[ii];
const X_FLOAT xtmp = x(i,0);
const X_FLOAT ytmp = x(i,1);
const X_FLOAT ztmp = x(i,2);
const int itype = type(i);
const int j_start = d_bo_first[i];
const int j_end = j_start + d_bo_num[i];
F_FLOAT total_bo = 0.0;
for (int jj = j_start; jj < j_end; jj++) {
int j = d_bo_list[jj];
j &= NEIGHMASK;
delij[0] = x(j,0) - xtmp;
delij[1] = x(j,1) - ytmp;
delij[2] = x(j,2) - ztmp;
const F_FLOAT rsq = delij[0]*delij[0] + delij[1]*delij[1] + delij[2]*delij[2];
const F_FLOAT rij = sqrt(rsq);
const int jtype = type(j);
const int j_index = jj - j_start;
// calculate uncorrected BO and total bond order
const F_FLOAT p_bo1 = paramstwbp(itype,jtype).p_bo1;
const F_FLOAT p_bo2 = paramstwbp(itype,jtype).p_bo2;
const F_FLOAT p_bo3 = paramstwbp(itype,jtype).p_bo3;
const F_FLOAT p_bo4 = paramstwbp(itype,jtype).p_bo4;
const F_FLOAT p_bo5 = paramstwbp(itype,jtype).p_bo5;
const F_FLOAT p_bo6 = paramstwbp(itype,jtype).p_bo6;
const F_FLOAT r_s = paramstwbp(itype,jtype).r_s;
const F_FLOAT r_pi = paramstwbp(itype,jtype).r_pi;
const F_FLOAT r_pi2 = paramstwbp(itype,jtype).r_pi2;
if (paramssing(itype).r_s > 0.0 && paramssing(jtype).r_s > 0.0) {
C12 = p_bo1*pow(rij/r_s,p_bo2);
BO_s = (1.0+bo_cut)*exp(C12);
}
else BO_s = C12 = 0.0;
if (paramssing(itype).r_pi > 0.0 && paramssing(jtype).r_pi > 0.0) {
C34 = p_bo3*pow(rij/r_pi,p_bo4);
BO_pi = exp(C34);
}
else BO_pi = C34 = 0.0;
if (paramssing(itype).r_pi2 > 0.0 && paramssing(jtype).r_pi2 > 0.0) {
C56 = p_bo5*pow(rij/r_pi2,p_bo6);
BO_pi2 = exp(C56);
}
else BO_pi2 = C56 = 0.0;
BO = BO_s + BO_pi + BO_pi2;
if (BO < bo_cut) continue;
d_BO(i,j_index) = BO;
d_BO_s(i,j_index) = BO;
d_BO_pi(i,j_index) = BO_pi;
d_BO_pi2(i,j_index) = BO_pi2;
F_FLOAT Cln_BOp_s = p_bo2 * C12 / rij / rij;
F_FLOAT Cln_BOp_pi = p_bo4 * C34 / rij / rij;
F_FLOAT Cln_BOp_pi2 = p_bo6 * C56 / rij / rij;
if (nlocal == 0)
Cln_BOp_s = Cln_BOp_pi = Cln_BOp_pi2 = 0.0;
for (int d = 0; d < 3; d++) dln_BOp_pi_i[d] = -(BO_pi*Cln_BOp_pi)*delij[d];
for (int d = 0; d < 3; d++) dln_BOp_pi2_i[d] = -(BO_pi2*Cln_BOp_pi2)*delij[d];
for (int d = 0; d < 3; d++) dBOp_i[d] = -(BO_s*Cln_BOp_s+BO_pi*Cln_BOp_pi+BO_pi2*Cln_BOp_pi2)*delij[d];
for (int d = 0; d < 3; d++) d_dDeltap_self(i,d) += dBOp_i[d];
d_dln_BOp_pix(i,j_index) = dln_BOp_pi_i[0];
d_dln_BOp_piy(i,j_index) = dln_BOp_pi_i[1];
d_dln_BOp_piz(i,j_index) = dln_BOp_pi_i[2];
d_dln_BOp_pi2x(i,j_index) = dln_BOp_pi2_i[0];
d_dln_BOp_pi2y(i,j_index) = dln_BOp_pi2_i[1];
d_dln_BOp_pi2z(i,j_index) = dln_BOp_pi2_i[2];
d_dBOpx(i,j_index) = dBOp_i[0];
d_dBOpy(i,j_index) = dBOp_i[1];
d_dBOpz(i,j_index) = dBOp_i[2];
d_BO(i,j_index) -= bo_cut;
d_BO_s(i,j_index) -= bo_cut;
total_bo += d_BO(i,j_index);
}
d_total_bo[i] += total_bo;
const F_FLOAT val_i = paramssing(itype).valency;
d_Deltap[i] = d_total_bo[i] - val_i;
d_Deltap_boc[i] = d_total_bo[i] - paramssing(itype).valency_val;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void PairReaxCKokkos<DeviceType>::operator()(PairReaxBondOrder2, const int &ii) const {
F_FLOAT delij[3];
F_FLOAT exp_p1i, exp_p2i, exp_p1j, exp_p2j, f1, f2, f3, u1_ij, u1_ji, Cf1A_ij, Cf1B_ij, Cf1_ij, Cf1_ji;
F_FLOAT f4, f5, exp_f4, exp_f5, f4f5, Cf45_ij, Cf45_ji;
F_FLOAT A0_ij, A1_ij, A2_ij, A3_ij, A2_ji, A3_ji;
const int i = d_ilist[ii];
const int itype = type(i);
const int j_start = d_bo_first[i];
const int j_end = j_start + d_bo_num[i];
const X_FLOAT xtmp = x(i,0);
const X_FLOAT ytmp = x(i,1);
const X_FLOAT ztmp = x(i,2);
const F_FLOAT val_i = paramssing(itype).valency;
d_total_bo[i] = 0.0;
F_FLOAT total_bo = 0.0;
for (int jj = j_start; jj < j_end; jj++) {
int j = d_bo_list[jj];
j &= NEIGHMASK;
delij[0] = x(j,0) - xtmp;
delij[1] = x(j,1) - ytmp;
delij[2] = x(j,2) - ztmp;
const F_FLOAT rsq = delij[0]*delij[0] + delij[1]*delij[1] + delij[2]*delij[2];
const F_FLOAT rij = sqrt(rsq);
const int jtype = type(j);
const int j_index = jj - j_start;
const int i_index = maxbo+j_index;
// calculate corrected BO and total bond order
const F_FLOAT val_j = paramssing(jtype).valency;
const F_FLOAT ovc = paramstwbp(itype,jtype).ovc;
const F_FLOAT v13cor = paramstwbp(itype,jtype).v13cor;
const F_FLOAT p_boc3 = paramstwbp(itype,jtype).p_boc3;
const F_FLOAT p_boc4 = paramstwbp(itype,jtype).p_boc4;
const F_FLOAT p_boc5 = paramstwbp(itype,jtype).p_boc5;
if (ovc < 0.001 && v13cor < 0.001) {
d_C1dbo(i,j_index) = 1.0;
d_C2dbo(i,j_index) = 0.0;
d_C3dbo(i,j_index) = 0.0;
d_C1dbopi(i,j_index) = d_BO_pi(i,j_index);
d_C2dbopi(i,j_index) = 0.0;
d_C3dbopi(i,j_index) = 0.0;
d_C4dbopi(i,j_index) = 0.0;
d_C1dbopi2(i,j_index) = d_BO_pi(i,j_index);
d_C2dbopi2(i,j_index) = 0.0;
d_C3dbopi2(i,j_index) = 0.0;
d_C4dbopi2(i,j_index) = 0.0;
} else {
if (ovc >= 0.001) {
exp_p1i = exp(-p_boc1 * d_Deltap[i]);
exp_p2i = exp(-p_boc2 * d_Deltap[i]);
exp_p1j = exp(-p_boc1 * d_Deltap[j]);
exp_p2j = exp(-p_boc2 * d_Deltap[j]);
f2 = exp_p1i + exp_p1j;
f3 = -1.0/p_boc2*log(0.5*(exp_p2i+exp_p2j));
f1 = 0.5 * ((val_i + f2)/(val_i + f2 + f3) + (val_j + f2)/(val_j + f2 + f3));
u1_ij = val_i + f2 + f3;
u1_ji = val_j + f2 + f3;
Cf1A_ij = 0.5 * f3 * (1.0/(u1_ij*u1_ij)+1.0/(u1_ji*u1_ji));
Cf1B_ij = -0.5 * ((u1_ij - f3)/(u1_ij*u1_ij)+(u1_ji - f3)/(u1_ji*u1_ji));
Cf1_ij = 0.5 * (-p_boc1 * exp_p1i / u1_ij - ((val_i+f2) / (u1_ij*u1_ij)) *
(-p_boc1 * exp_p1i + exp_p2i / (exp_p2i + exp_p2j)) +
-p_boc1 * exp_p1i / u1_ji - ((val_j+f2) / (u1_ji*u1_ji)) *
(-p_boc1 * exp_p1i + exp_p2i / (exp_p2i + exp_p2j)));
Cf1_ji = -Cf1A_ij * p_boc1 * exp_p1j + Cf1B_ij * exp_p2j / ( exp_p2i + exp_p2j );
} else {
f1 = 1.0;
Cf1_ij = Cf1_ji = 0.0;
}
if (v13cor >= 0.001) {
exp_f4 =exp(-(p_boc4*(d_BO(i,j_index)*d_BO(i,j_index))-d_Deltap_boc[i])*p_boc3+p_boc5);
exp_f5 =exp(-(p_boc4*(d_BO(i,j_index)*d_BO(i,j_index))-d_Deltap_boc[j])*p_boc3+p_boc5);
f4 = 1. / (1. + exp_f4);
f5 = 1. / (1. + exp_f5);
f4f5 = f4 * f5;
Cf45_ij = -f4 * exp_f4;
Cf45_ji = -f5 * exp_f5;
} else {
f4 = f5 = f4f5 = 1.0;
Cf45_ij = Cf45_ji = 0.0;
}
A0_ij = f1 * f4f5;
A1_ij = -2 * p_boc3 * p_boc4 * d_BO(i,j_index) * (Cf45_ij + Cf45_ji);
A2_ij = Cf1_ij / f1 + p_boc3 * Cf45_ij;
A2_ji = Cf1_ji / f1 + p_boc3 * Cf45_ji;
A3_ij = A2_ij + Cf1_ij / f1;
A3_ji = A2_ji + Cf1_ji / f1;
d_BO(i,j_index) = d_BO(i,j_index) * A0_ij;
d_BO_pi(i,j_index) = d_BO_pi(i,j_index) * A0_ij * f1;
d_BO_pi2(i,j_index) = d_BO_pi2(i,j_index) * A0_ij * f1;
d_BO_s(i,j_index) = d_BO(i,j_index)-(d_BO_pi(i,j_index)+d_BO_pi2(i,j_index));
d_C1dbo(i,j_index) = A0_ij + d_BO(i,j_index) * A1_ij;
d_C2dbo(i,j_index) = d_BO(i,j_index) * A2_ij;
d_C3dbo(i,j_index) = d_BO(i,j_index) * A2_ji;
d_C1dbopi(i,j_index) = f1*f1*f4*f5;
d_C2dbopi(i,j_index) = d_BO_pi(i,j_index) * A1_ij;
d_C3dbopi(i,j_index) = d_BO_pi(i,j_index) * A3_ij;
d_C4dbopi(i,j_index) = d_BO_pi(i,j_index) * A3_ji;
d_C1dbopi2(i,j_index) = f1*f1*f4*f5;
d_C2dbopi2(i,j_index) = d_BO_pi2(i,j_index) * A1_ij;
d_C3dbopi2(i,j_index) = d_BO_pi2(i,j_index) * A3_ij;
d_C4dbopi2(i,j_index) = d_BO_pi2(i,j_index) * A3_ji;
}
if(d_BO(i,j_index) < 1e-10) d_BO(i,j_index) = 0.0;
if(d_BO_s(i,j_index) < 1e-10) d_BO_s(i,j_index) = 0.0;
if(d_BO_pi(i,j_index) < 1e-10) d_BO_pi(i,j_index) = 0.0;
if(d_BO_pi2(i,j_index) < 1e-10) d_BO_pi2(i,j_index) = 0.0;
total_bo += d_BO(i,j_index);
d_Cdbo(i,j_index) = 0.0;
d_Cdbopi(i,j_index) = 0.0;
d_Cdbopi2(i,j_index) = 0.0;
d_Cdbo(j,i_index) = 0.0;
d_Cdbopi(j,i_index) = 0.0;
d_Cdbopi2(j,i_index) = 0.0;
d_CdDelta[j] = 0.0;
}
d_CdDelta[i] = 0.0;
d_total_bo[i] += total_bo;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void PairReaxCKokkos<DeviceType>::operator()(PairReaxBondOrder3, const int &ii) const {
// bot part of BO()
const int i = d_ilist[ii];
const int itype = type(i);
F_FLOAT nlp_temp;
d_Delta[i] = d_total_bo[i] - paramssing(itype).valency;
const F_FLOAT Delta_e = d_total_bo[i] - paramssing(itype).valency_e;
d_Delta_boc[i] = d_total_bo[i] - paramssing(itype).valency_boc;
const F_FLOAT vlpex = Delta_e - 2.0 * (int)(Delta_e/2.0);
const F_FLOAT explp1 = exp(-gp[15] * SQR(2.0 + vlpex));
const F_FLOAT nlp = explp1 - (int)(Delta_e / 2.0);
d_Delta_lp[i] = paramssing(itype).nlp_opt - nlp;
const F_FLOAT Clp = 2.0 * gp[15] * explp1 * (2.0 + vlpex);
d_dDelta_lp[i] = Clp;
if( paramssing(itype).mass > 21.0 ) {
nlp_temp = 0.5 * (paramssing(itype).valency_e - paramssing(itype).valency);
d_Delta_lp_temp[i] = paramssing(itype).nlp_opt - nlp_temp;
} else {
nlp_temp = nlp;
d_Delta_lp_temp[i] = paramssing(itype).nlp_opt - nlp_temp;
}
d_sum_ovun(i,1) = 0.0;
d_sum_ovun(i,2) = 0.0;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
template<int NEIGHFLAG, int EVFLAG>
KOKKOS_INLINE_FUNCTION
void PairReaxCKokkos<DeviceType>::operator()(PairReaxComputeMulti1<NEIGHFLAG,EVFLAG>, const int &ii) const {
const int i = d_ilist[ii];
const int itype = type(i);
const F_FLOAT imass = paramssing(itype).mass;
F_FLOAT dfvl;
if (imass > 21.0) dfvl = 0.0;
else dfvl = 1.0;
const int j_start = d_bo_first[i];
const int j_end = j_start + d_bo_num[i];
F_FLOAT sum_ovun1 = 0.0;
F_FLOAT sum_ovun2 = 0.0;
for (int jj = j_start; jj < j_end; jj++) {
int j = d_bo_list[jj];
j &= NEIGHMASK;
const int jtype = type(j);
const int j_index = jj - j_start;
sum_ovun1 += paramstwbp(itype,jtype).p_ovun1 * paramstwbp(itype,jtype).De_s * d_BO(i,j_index);
sum_ovun2 += (d_Delta[j] - dfvl * d_Delta_lp_temp[j]) * (d_BO_pi(i,j_index) + d_BO_pi2(i,j_index));
}
d_sum_ovun(i,1) += sum_ovun1;
d_sum_ovun(i,2) += sum_ovun2;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
template<int NEIGHFLAG, int EVFLAG>
KOKKOS_INLINE_FUNCTION
void PairReaxCKokkos<DeviceType>::operator()(PairReaxComputeMulti2<NEIGHFLAG,EVFLAG>, const int &ii, EV_FLOAT_REAX& ev) const {
Kokkos::View<F_FLOAT*, typename DAT::t_float_1d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_CdDelta = d_CdDelta;
Kokkos::View<F_FLOAT**, typename DAT::t_ffloat_2d_dl::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_Cdbo = d_Cdbo;
Kokkos::View<F_FLOAT**, typename DAT::t_ffloat_2d_dl::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_Cdbopi = d_Cdbopi;
Kokkos::View<F_FLOAT**, typename DAT::t_ffloat_2d_dl::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_Cdbopi2 = d_Cdbopi2;
const int i = d_ilist[ii];
const int itype = type(i);
const F_FLOAT imass = paramssing(itype).mass;
const F_FLOAT val_i = paramssing(itype).valency;
F_FLOAT dfvl;
if (imass > 21.0) dfvl = 0.0;
else dfvl = 1.0;
F_FLOAT e_lp, e_ov, e_un;
F_FLOAT CEover1, CEover2, CEover3, CEover4;
F_FLOAT CEunder1, CEunder2, CEunder3, CEunder4;
const F_FLOAT p_lp3 = gp[5];
const F_FLOAT p_ovun2 = paramssing(itype).p_ovun2;
const F_FLOAT p_ovun3 = gp[32];
const F_FLOAT p_ovun4 = gp[31];
const F_FLOAT p_ovun5 = paramssing(itype).p_ovun5;
const F_FLOAT p_ovun6 = gp[6];
const F_FLOAT p_ovun7 = gp[8];
const F_FLOAT p_ovun8 = gp[9];
// lone pair
const F_FLOAT p_lp2 = paramssing(itype).p_lp2;
const F_FLOAT expvd2 = exp( -75 * d_Delta_lp[i]);
const F_FLOAT inv_expvd2 = 1.0 / (1.0+expvd2);
int numbonds = d_bo_num[i];
e_lp = 0.0;
if (numbonds > 0)
e_lp = p_lp2 * d_Delta_lp[i] * inv_expvd2;
const F_FLOAT dElp = p_lp2 * inv_expvd2 + 75.0 * p_lp2 * d_Delta_lp[i] * expvd2 * inv_expvd2*inv_expvd2;
const F_FLOAT CElp = dElp * d_dDelta_lp[i];
if (numbonds > 0)
a_CdDelta[i] += CElp;
if (eflag) ev.ereax[0] += e_lp;
//if (vflag_either || eflag_atom) this->template ev_tally<NEIGHFLAG>(ev,i,i,e_lp,0.0,0.0,0.0,0.0);
//if (eflag_atom) this->template e_tally<NEIGHFLAG>(ev,i,i,e_lp);
// over coordination
const F_FLOAT exp_ovun1 = p_ovun3 * exp( p_ovun4 * d_sum_ovun(i,2) );
const F_FLOAT inv_exp_ovun1 = 1.0 / (1 + exp_ovun1);
const F_FLOAT Delta_lpcorr = d_Delta[i] - (dfvl * d_Delta_lp_temp[i]) * inv_exp_ovun1;
const F_FLOAT exp_ovun2 = exp( p_ovun2 * Delta_lpcorr );
const F_FLOAT inv_exp_ovun2 = 1.0 / (1.0 + exp_ovun2);
const F_FLOAT DlpVi = 1.0 / (Delta_lpcorr + val_i + 1e-8);
CEover1 = Delta_lpcorr * DlpVi * inv_exp_ovun2;
e_ov = d_sum_ovun(i,1) * CEover1;
if (eflag) ev.ereax[1] += e_ov;
//if (eflag_atom) this->template ev_tally<NEIGHFLAG>(ev,i,i,e_ov,0.0,0.0,0.0,0.0);
//if (eflag_atom) this->template e_tally<NEIGHFLAG>(ev,i,i,e_ov);
CEover2 = d_sum_ovun(i,1) * DlpVi * inv_exp_ovun2 *
(1.0 - Delta_lpcorr * ( DlpVi + p_ovun2 * exp_ovun2 * inv_exp_ovun2 ));
CEover3 = CEover2 * (1.0 - dfvl * d_dDelta_lp[i] * inv_exp_ovun1 );
CEover4 = CEover2 * (dfvl * d_Delta_lp_temp[i]) * p_ovun4 * exp_ovun1 * SQR(inv_exp_ovun1);
// under coordination
const F_FLOAT exp_ovun2n = 1.0 / exp_ovun2;
const F_FLOAT exp_ovun6 = exp( p_ovun6 * Delta_lpcorr );
const F_FLOAT exp_ovun8 = p_ovun7 * exp(p_ovun8 * d_sum_ovun(i,2));
const F_FLOAT inv_exp_ovun2n = 1.0 / (1.0 + exp_ovun2n);
const F_FLOAT inv_exp_ovun8 = 1.0 / (1.0 + exp_ovun8);
e_un = 0;
if (numbonds > 0)
e_un = -p_ovun5 * (1.0 - exp_ovun6) * inv_exp_ovun2n * inv_exp_ovun8;
if (eflag) ev.ereax[2] += e_un;
//if (eflag_atom) this->template ev_tally<NEIGHFLAG>(ev,i,i,e_un,0.0,0.0,0.0,0.0);
//if (eflag_atom) this->template e_tally<NEIGHFLAG>(ev,i,i,e_un);
CEunder1 = inv_exp_ovun2n *
( p_ovun5 * p_ovun6 * exp_ovun6 * inv_exp_ovun8 + p_ovun2 * e_un * exp_ovun2n );
CEunder2 = -e_un * p_ovun8 * exp_ovun8 * inv_exp_ovun8;
CEunder3 = CEunder1 * (1.0 - dfvl * d_dDelta_lp[i] * inv_exp_ovun1);
CEunder4 = CEunder1 * (dfvl * d_Delta_lp_temp[i]) *
p_ovun4 * exp_ovun1 * inv_exp_ovun1 * inv_exp_ovun1 + CEunder2;
const F_FLOAT eng_tmp = e_lp + e_ov + e_un;
if (eflag_atom) this->template e_tally_single<NEIGHFLAG>(ev,i,eng_tmp);
// multibody forces
a_CdDelta[i] += CEover3;
if (numbonds > 0)
a_CdDelta[i] += CEunder3;
const int j_start = d_bo_first[i];
const int j_end = j_start + d_bo_num[i];
F_FLOAT CdDelta_i = 0.0;
for (int jj = j_start; jj < j_end; jj++) {
int j = d_bo_list[jj];
j &= NEIGHMASK;
const int jtype = type(j);
const F_FLOAT jmass = paramssing(jtype).mass;
const int j_index = jj - j_start;
const F_FLOAT De_s = paramstwbp(itype,jtype).De_s;
// multibody lone pair: correction for C2
if (p_lp3 > 0.001 && imass == 12.0 && jmass == 12.0) {
const F_FLOAT Di = d_Delta[i];
const F_FLOAT vov3 = d_BO(i,j_index) - Di - 0.040*pow(Di,4.0);
if (vov3 > 3.0) {
const F_FLOAT e_lph = p_lp3 * (vov3-3.0)*(vov3-3.0);
const F_FLOAT deahu2dbo = 2.0 * p_lp3 * (vov3 - 3.0);
const F_FLOAT deahu2dsbo = 2.0 * p_lp3 * (vov3 - 3.0) * (-1.0 - 0.16 * pow(Di,3.0));
d_Cdbo(i,j_index) += deahu2dbo;
CdDelta_i += deahu2dsbo;
if (eflag) ev.ereax[0] += e_lph;
if (eflag_atom) this->template e_tally<NEIGHFLAG>(ev,i,j,e_lph);
}
}
// over/under coordination forces merged together
const F_FLOAT p_ovun1 = paramstwbp(itype,jtype).p_ovun1;
a_CdDelta[j] += (CEover4 + CEunder4) * (1.0 - dfvl * d_dDelta_lp[j]) * (d_BO_pi(i,j_index) + d_BO_pi2(i,j_index));
d_Cdbo(i,j_index) += CEover1 * p_ovun1 * De_s;
d_Cdbopi(i,j_index) += (CEover4 + CEunder4) * (d_Delta[j] - dfvl*d_Delta_lp_temp[j]);
d_Cdbopi2(i,j_index) += (CEover4 + CEunder4) * (d_Delta[j] - dfvl*d_Delta_lp_temp[j]);
}
a_CdDelta[i] += CdDelta_i;
}
template<class DeviceType>
template<int NEIGHFLAG, int EVFLAG>
KOKKOS_INLINE_FUNCTION
void PairReaxCKokkos<DeviceType>::operator()(PairReaxComputeMulti2<NEIGHFLAG,EVFLAG>, const int &ii) const {
EV_FLOAT_REAX ev;
this->template operator()<NEIGHFLAG,EVFLAG>(PairReaxComputeMulti2<NEIGHFLAG,EVFLAG>(), ii, ev);
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
template<int NEIGHFLAG, int EVFLAG>
KOKKOS_INLINE_FUNCTION
void PairReaxCKokkos<DeviceType>::operator()(PairReaxComputeAngular<NEIGHFLAG,EVFLAG>, const int &ii, EV_FLOAT_REAX& ev) const {
Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_f = f;
Kokkos::View<F_FLOAT**, typename DAT::t_ffloat_2d_dl::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_Cdbo = d_Cdbo;
Kokkos::View<F_FLOAT*, typename DAT::t_float_1d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_CdDelta = d_CdDelta;
const int i = d_ilist[ii];
const int itype = type(i);
const X_FLOAT xtmp = x(i,0);
const X_FLOAT ytmp = x(i,1);
const X_FLOAT ztmp = x(i,2);
F_FLOAT temp, temp_bo_jt, pBOjt7;
F_FLOAT p_val1, p_val2, p_val3, p_val4, p_val5;
F_FLOAT p_val6, p_val7, p_val8, p_val9, p_val10;
F_FLOAT p_pen1, p_pen2, p_pen3, p_pen4;
F_FLOAT p_coa1, p_coa2, p_coa3, p_coa4;
F_FLOAT trm8, expval6, expval7, expval2theta, expval12theta, exp3ij, exp3jk;
F_FLOAT exp_pen2ij, exp_pen2jk, exp_pen3, exp_pen4, trm_pen34, exp_coa2;
F_FLOAT dSBO1, dSBO2, SBO, SBO2, CSBO2, SBOp, prod_SBO, vlpadj;
F_FLOAT CEval1, CEval2, CEval3, CEval4, CEval5, CEval6, CEval7, CEval8;
F_FLOAT CEpen1, CEpen2, CEpen3;
F_FLOAT e_ang, e_coa, e_pen;
F_FLOAT CEcoa1, CEcoa2, CEcoa3, CEcoa4, CEcoa5;
F_FLOAT Cf7ij, Cf7jk, Cf8j, Cf9j;
F_FLOAT f7_ij, f7_jk, f8_Dj, f9_Dj;
F_FLOAT Ctheta_0, theta_0, theta_00, theta, cos_theta, sin_theta;
F_FLOAT BOA_ij, BOA_ik, rij, bo_ij, bo_ik;
F_FLOAT dcos_theta_di[3], dcos_theta_dj[3], dcos_theta_dk[3];
F_FLOAT eng_tmp, fi_tmp[3], fj_tmp[3], fk_tmp[3];
F_FLOAT delij[3], delik[3];
p_val6 = gp[14];
p_val8 = gp[33];
p_val9 = gp[16];
p_val10 = gp[17];
p_pen2 = gp[19];
p_pen3 = gp[20];
p_pen4 = gp[21];
p_coa2 = gp[2];
p_coa3 = gp[38];
p_coa4 = gp[30];
p_val3 = paramssing(itype).p_val3;
p_val5 = paramssing(itype).p_val5;
const int j_start = d_bo_first[i];
const int j_end = j_start + d_bo_num[i];
const F_FLOAT Delta_val = d_total_bo[i] - paramssing(itype).valency_val;
SBOp = 0.0, prod_SBO = 1.0;
for (int jj = j_start; jj < j_end; jj++) {
int j = d_bo_list[jj];
j &= NEIGHMASK;
const int j_index = jj - j_start;
bo_ij = d_BO(i,j_index);
SBOp += (d_BO_pi(i,j_index) + d_BO_pi2(i,j_index));
temp = SQR(bo_ij);
temp *= temp;
temp *= temp;
prod_SBO *= exp( -temp );
}
const F_FLOAT Delta_e = d_total_bo[i] - paramssing(itype).valency_e;
const F_FLOAT vlpex = Delta_e - 2.0 * (int)(Delta_e/2.0);
const F_FLOAT explp1 = exp(-gp[15] * SQR(2.0 + vlpex));
const F_FLOAT nlp = explp1 - (int)(Delta_e / 2.0);
if( vlpex >= 0.0 ){
vlpadj = 0.0;
dSBO2 = prod_SBO - 1.0;
} else{
vlpadj = nlp;
dSBO2 = (prod_SBO - 1.0) * (1.0 - p_val8 * d_dDelta_lp[i]);
}
SBO = SBOp + (1.0 - prod_SBO) * (-d_Delta_boc[i] - p_val8 * vlpadj);
dSBO1 = -8.0 * prod_SBO * ( d_Delta_boc[i] + p_val8 * vlpadj );
if( SBO <= 0.0 ) {
SBO2 = 0.0;
CSBO2 = 0.0;
} else if( SBO > 0.0 && SBO <= 1.0 ) {
SBO2 = pow( SBO, p_val9 );
CSBO2 = p_val9 * pow( SBO, p_val9 - 1.0 );
} else if( SBO > 1.0 && SBO < 2.0 ) {
SBO2 = 2.0 - pow( 2.0-SBO, p_val9 );
CSBO2 = p_val9 * pow( 2.0 - SBO, p_val9 - 1.0 );
} else {
SBO2 = 2.0;
CSBO2 = 0.0;
}
expval6 = exp( p_val6 * d_Delta_boc[i] );
F_FLOAT CdDelta_i = 0.0;
F_FLOAT fitmp[3],fjtmp[3];
for (int j = 0; j < 3; j++) fitmp[j] = 0.0;
for (int jj = j_start; jj < j_end; jj++) {
int j = d_bo_list[jj];
j &= NEIGHMASK;
const int j_index = jj - j_start;
delij[0] = x(j,0) - xtmp;
delij[1] = x(j,1) - ytmp;
delij[2] = x(j,2) - ztmp;
const F_FLOAT rsqij = delij[0]*delij[0] + delij[1]*delij[1] + delij[2]*delij[2];
rij = sqrt(rsqij);
bo_ij = d_BO(i,j_index);
const int i_index = maxbo+j_index;
BOA_ij = bo_ij - thb_cut;
if (BOA_ij <= 0.0) continue;
if (i >= nlocal && j >= nlocal) continue;
const int jtype = type(j);
F_FLOAT CdDelta_j = 0.0;
for (int k = 0; k < 3; k++) fjtmp[k] = 0.0;
for (int kk = jj+1; kk < j_end; kk++ ) {
//for (int kk = j_start; kk < j_end; kk++ ) {
int k = d_bo_list[kk];
k &= NEIGHMASK;
if (k == j) continue;
const int k_index = kk - j_start;
delik[0] = x(k,0) - xtmp;
delik[1] = x(k,1) - ytmp;
delik[2] = x(k,2) - ztmp;
const F_FLOAT rsqik = delik[0]*delik[0] + delik[1]*delik[1] + delik[2]*delik[2];
const F_FLOAT rik = sqrt(rsqik);
bo_ik = d_BO(i,k_index);
BOA_ik = bo_ik - thb_cut;
if (BOA_ik <= 0.0 || bo_ij <= thb_cut || bo_ik <= thb_cut || bo_ij * bo_ik <= thb_cutsq) continue;
const int ktype = type(k);
// theta and derivatives
cos_theta = (delij[0]*delik[0]+delij[1]*delik[1]+delij[2]*delik[2])/(rij*rik);
if( cos_theta > 1.0 ) cos_theta = 1.0;
if( cos_theta < -1.0 ) cos_theta = -1.0;
theta = acos(cos_theta);
const F_FLOAT inv_dists = 1.0 / (rij * rik);
const F_FLOAT Cdot_inv3 = cos_theta * inv_dists * inv_dists;
for( int t = 0; t < 3; t++ ) {
dcos_theta_di[t] = -(delik[t] + delij[t]) * inv_dists + Cdot_inv3 * (rsqik * delij[t] + rsqij * delik[t]);
dcos_theta_dj[t] = delik[t] * inv_dists - Cdot_inv3 * rsqik * delij[t];
dcos_theta_dk[t] = delij[t] * inv_dists - Cdot_inv3 * rsqij * delik[t];
}
sin_theta = sin(theta);
if (sin_theta < 1.0e-5) sin_theta = 1.0e-5;
p_val1 = paramsthbp(jtype,itype,ktype).p_val1;
if (fabs(p_val1) <= 0.001) continue;
// ANGLE ENERGY
p_val1 = paramsthbp(jtype,itype,ktype).p_val1;
p_val2 = paramsthbp(jtype,itype,ktype).p_val2;
p_val4 = paramsthbp(jtype,itype,ktype).p_val4;
p_val7 = paramsthbp(jtype,itype,ktype).p_val7;
theta_00 = paramsthbp(jtype,itype,ktype).theta_00;
exp3ij = exp( -p_val3 * pow( BOA_ij, p_val4 ) );
f7_ij = 1.0 - exp3ij;
Cf7ij = p_val3 * p_val4 * pow( BOA_ij, p_val4 - 1.0 ) * exp3ij;
exp3jk = exp( -p_val3 * pow( BOA_ik, p_val4 ) );
f7_jk = 1.0 - exp3jk;
Cf7jk = p_val3 * p_val4 * pow( BOA_ik, p_val4 - 1.0 ) * exp3jk;
expval7 = exp( -p_val7 * d_Delta_boc[i] );
trm8 = 1.0 + expval6 + expval7;
f8_Dj = p_val5 - ( (p_val5 - 1.0) * (2.0 + expval6) / trm8 );
Cf8j = ((1.0 - p_val5) / (trm8*trm8)) *
(p_val6 * expval6 * trm8 - (2.0 + expval6) * ( p_val6*expval6 - p_val7*expval7));
theta_0 = 180.0 - theta_00 * (1.0 - exp(-p_val10 * (2.0 - SBO2)));
theta_0 = theta_0*constPI/180.0;
expval2theta = exp( -p_val2 * (theta_0-theta)*(theta_0-theta) );
if( p_val1 >= 0 )
expval12theta = p_val1 * (1.0 - expval2theta);
else // To avoid linear Me-H-Me angles (6/6/06)
expval12theta = p_val1 * -expval2theta;
CEval1 = Cf7ij * f7_jk * f8_Dj * expval12theta;
CEval2 = Cf7jk * f7_ij * f8_Dj * expval12theta;
CEval3 = Cf8j * f7_ij * f7_jk * expval12theta;
CEval4 = -2.0 * p_val1 * p_val2 * f7_ij * f7_jk * f8_Dj * expval2theta * (theta_0 - theta);
Ctheta_0 = p_val10 * theta_00*constPI/180.0 * exp( -p_val10 * (2.0 - SBO2) );
CEval5 = -CEval4 * Ctheta_0 * CSBO2;
CEval6 = CEval5 * dSBO1;
CEval7 = CEval5 * dSBO2;
CEval8 = -CEval4 / sin_theta;
e_ang = f7_ij * f7_jk * f8_Dj * expval12theta;
if (eflag) ev.ereax[3] += e_ang;
// Penalty energy
p_pen1 = paramsthbp(jtype,itype,ktype).p_pen1;
exp_pen2ij = exp( -p_pen2 * (BOA_ij - 2.0)*(BOA_ij - 2.0) );
exp_pen2jk = exp( -p_pen2 * (BOA_ik - 2.0)*(BOA_ik - 2.0) );
exp_pen3 = exp( -p_pen3 * d_Delta[i] );
exp_pen4 = exp( p_pen4 * d_Delta[i] );
trm_pen34 = 1.0 + exp_pen3 + exp_pen4;
f9_Dj = (2.0 + exp_pen3 ) / trm_pen34;
Cf9j = (-p_pen3 * exp_pen3 * trm_pen34 - (2.0 + exp_pen3) *
(-p_pen3 * exp_pen3 + p_pen4 * exp_pen4 ) )/(trm_pen34*trm_pen34);
e_pen = p_pen1 * f9_Dj * exp_pen2ij * exp_pen2jk;
if (eflag) ev.ereax[4] += e_pen;
CEpen1 = e_pen * Cf9j / f9_Dj;
temp = -2.0 * p_pen2 * e_pen;
CEpen2 = temp * (BOA_ij - 2.0);
CEpen3 = temp * (BOA_ik - 2.0);
// ConjAngle energy
p_coa1 = paramsthbp(jtype,itype,ktype).p_coa1;
exp_coa2 = exp( p_coa2 * Delta_val );
e_coa = p_coa1 / (1. + exp_coa2) *
exp( -p_coa3 * SQR(d_total_bo[j]-BOA_ij) ) *
exp( -p_coa3 * SQR(d_total_bo[k]-BOA_ik) ) *
exp( -p_coa4 * SQR(BOA_ij - 1.5) ) *
exp( -p_coa4 * SQR(BOA_ik - 1.5) );
CEcoa1 = -2 * p_coa4 * (BOA_ij - 1.5) * e_coa;
CEcoa2 = -2 * p_coa4 * (BOA_ik - 1.5) * e_coa;
CEcoa3 = -p_coa2 * exp_coa2 * e_coa / (1 + exp_coa2);
CEcoa4 = -2 * p_coa3 * (d_total_bo[j]-BOA_ij) * e_coa;
CEcoa5 = -2 * p_coa3 * (d_total_bo[k]-BOA_ik) * e_coa;
if (eflag) ev.ereax[5] += e_coa;
// Forces
a_Cdbo(i,j_index) += (CEval1 + CEpen2 + (CEcoa1 - CEcoa4));
a_Cdbo(j,i_index) += (CEval1 + CEpen2 + (CEcoa1 - CEcoa4));
a_Cdbo(i,k_index) += (CEval2 + CEpen3 + (CEcoa2 - CEcoa5));
a_Cdbo(k,i_index) += (CEval2 + CEpen3 + (CEcoa2 - CEcoa5));
CdDelta_i += ((CEval3 + CEval7) + CEpen1 + CEcoa3);
CdDelta_j += CEcoa4;
a_CdDelta[k] += CEcoa5;
for (int ll = j_start; ll < j_end; ll++) {
int l = d_bo_list[ll];
l &= NEIGHMASK;
const int l_index = ll - j_start;
temp_bo_jt = d_BO(i,l_index);
temp = temp_bo_jt * temp_bo_jt * temp_bo_jt;
pBOjt7 = temp * temp * temp_bo_jt;
a_Cdbo(i,l_index) += (CEval6 * pBOjt7);
d_Cdbopi(i,l_index) += CEval5;
d_Cdbopi2(i,l_index) += CEval5;
}
for (int d = 0; d < 3; d++) fi_tmp[d] = CEval8 * dcos_theta_di[d];
for (int d = 0; d < 3; d++) fj_tmp[d] = CEval8 * dcos_theta_dj[d];
for (int d = 0; d < 3; d++) fk_tmp[d] = CEval8 * dcos_theta_dk[d];
for (int d = 0; d < 3; d++) fitmp[d] -= fi_tmp[d];
for (int d = 0; d < 3; d++) fjtmp[d] -= fj_tmp[d];
for (int d = 0; d < 3; d++) a_f(k,d) -= fk_tmp[d];
// energy/virial tally
if (EVFLAG) {
eng_tmp = e_ang + e_pen + e_coa;
//if (eflag_atom) this->template ev_tally<NEIGHFLAG>(ev,i,j,eng_tmp,0.0,0.0,0.0,0.0);
if (eflag_atom) this->template e_tally<NEIGHFLAG>(ev,i,j,eng_tmp);
if (vflag_either) this->template v_tally3<NEIGHFLAG>(ev,i,j,k,fj_tmp,fk_tmp,delij,delik);
}
}
a_CdDelta[j] += CdDelta_j;
for (int d = 0; d < 3; d++) a_f(j,d) += fjtmp[d];
}
a_CdDelta[i] += CdDelta_i;
for (int d = 0; d < 3; d++) a_f(i,d) += fitmp[d];
}
template<class DeviceType>
template<int NEIGHFLAG, int EVFLAG>
KOKKOS_INLINE_FUNCTION
void PairReaxCKokkos<DeviceType>::operator()(PairReaxComputeAngular<NEIGHFLAG,EVFLAG>, const int &ii) const {
EV_FLOAT_REAX ev;
this->template operator()<NEIGHFLAG,EVFLAG>(PairReaxComputeAngular<NEIGHFLAG,EVFLAG>(), ii, ev);
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
template<int NEIGHFLAG, int EVFLAG>
KOKKOS_INLINE_FUNCTION
void PairReaxCKokkos<DeviceType>::operator()(PairReaxComputeTorsion<NEIGHFLAG,EVFLAG>, const int &ii, EV_FLOAT_REAX& ev) const {
Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_f = f;
Kokkos::View<F_FLOAT*, typename DAT::t_float_1d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_CdDelta = d_CdDelta;
Kokkos::View<F_FLOAT**, typename DAT::t_ffloat_2d_dl::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_Cdbo = d_Cdbo;
// in reaxc_torsion_angles: j = i, k = j, i = k;
F_FLOAT Delta_i, Delta_j, bo_ij, bo_ik, bo_jl, BOA_ij, BOA_ik, BOA_jl;
F_FLOAT p_tor1, p_cot1, V1, V2, V3;
F_FLOAT exp_tor2_ij, exp_tor2_ik, exp_tor2_jl, exp_tor1, exp_tor3_DiDj, exp_tor4_DiDj, exp_tor34_inv;
F_FLOAT exp_cot2_ij, exp_cot2_ik, exp_cot2_jl, fn10, f11_DiDj, dfn11, fn12;
F_FLOAT theta_ijk, theta_jil, sin_ijk, sin_jil, cos_ijk, cos_jil, tan_ijk_i, tan_jil_i;
F_FLOAT cos_omega, cos2omega, cos3omega;
F_FLOAT CV, cmn, CEtors1, CEtors2, CEtors3, CEtors4;
F_FLOAT CEtors5, CEtors6, CEtors7, CEtors8, CEtors9;
F_FLOAT Cconj, CEconj1, CEconj2, CEconj3, CEconj4, CEconj5, CEconj6;
F_FLOAT e_tor, e_con, eng_tmp;
F_FLOAT delij[3], delik[3], deljl[3], dellk[3], delil[3], delkl[3];
F_FLOAT fi_tmp[3], fj_tmp[3], fk_tmp[3], fl_tmp[3];
F_FLOAT dcos_omega_di[3], dcos_omega_dj[3], dcos_omega_dk[3], dcos_omega_dl[3];
F_FLOAT dcos_ijk_di[3], dcos_ijk_dj[3], dcos_ijk_dk[3], dcos_jil_di[3], dcos_jil_dj[3], dcos_jil_dk[3];
F_FLOAT p_tor2 = gp[23];
F_FLOAT p_tor3 = gp[24];
F_FLOAT p_tor4 = gp[25];
F_FLOAT p_cot2 = gp[27];
const int i = d_ilist[ii];
const int itype = type(i);
const int itag = tag(i);
const X_FLOAT xtmp = x(i,0);
const X_FLOAT ytmp = x(i,1);
const X_FLOAT ztmp = x(i,2);
Delta_i = d_Delta_boc[i];
const int j_start = d_bo_first[i];
const int j_end = j_start + d_bo_num[i];
F_FLOAT fitmp[3], fjtmp[3], fktmp[3];
for(int j = 0; j < 3; j++) fitmp[j] = 0.0;
F_FLOAT CdDelta_i = 0.0;
for (int jj = j_start; jj < j_end; jj++) {
int j = d_bo_list[jj];
j &= NEIGHMASK;
const int jtag = tag(j);
const int jtype = type(j);
const int j_index = jj - j_start;
// skip half of the interactions
if (itag > jtag) {
if ((itag+jtag) % 2 == 0) continue;
} else if (itag < jtag) {
if ((itag+jtag) % 2 == 1) continue;
} else {
if (x(j,2) < ztmp) continue;
if (x(j,2) == ztmp && x(j,1) < ytmp) continue;
if (x(j,2) == ztmp && x(j,1) == ytmp && x(j,0) < xtmp) continue;
}
bo_ij = d_BO(i,j_index);
if (bo_ij < thb_cut) continue;
delij[0] = x(j,0) - xtmp;
delij[1] = x(j,1) - ytmp;
delij[2] = x(j,2) - ztmp;
const F_FLOAT rsqij = delij[0]*delij[0] + delij[1]*delij[1] + delij[2]*delij[2];
const F_FLOAT rij = sqrt(rsqij);
BOA_ij = bo_ij - thb_cut;
Delta_j = d_Delta_boc[j];
exp_tor2_ij = exp( -p_tor2 * BOA_ij );
exp_cot2_ij = exp( -p_cot2 * SQR(BOA_ij - 1.5) );
exp_tor3_DiDj = exp( -p_tor3 * (Delta_i + Delta_j) );
exp_tor4_DiDj = exp( p_tor4 * (Delta_i + Delta_j) );
exp_tor34_inv = 1.0 / (1.0 + exp_tor3_DiDj + exp_tor4_DiDj);
f11_DiDj = (2.0 + exp_tor3_DiDj) * exp_tor34_inv;
const int l_start = d_bo_first[j];
const int l_end = l_start + d_bo_num[j];
for(int k = 0; k < 3; k++) fjtmp[k] = 0.0;
F_FLOAT CdDelta_j = 0.0;
for (int kk = j_start; kk < j_end; kk++) {
int k = d_bo_list[kk];
k &= NEIGHMASK;
if (k == j) continue;
const int ktype = type(k);
const int k_index = kk - j_start;
bo_ik = d_BO(i,k_index);
if (bo_ik < thb_cut) continue;
BOA_ik = bo_ik - thb_cut;
for (int d = 0; d < 3; d ++) delik[d] = x(k,d) - x(i,d);
const F_FLOAT rsqik = delik[0]*delik[0] + delik[1]*delik[1] + delik[2]*delik[2];
const F_FLOAT rik = sqrt(rsqik);
cos_ijk = (delij[0]*delik[0]+delij[1]*delik[1]+delij[2]*delik[2])/(rij*rik);
if( cos_ijk > 1.0 ) cos_ijk = 1.0;
if( cos_ijk < -1.0 ) cos_ijk = -1.0;
theta_ijk = acos(cos_ijk);
// dcos_ijk
const F_FLOAT inv_dists = 1.0 / (rij * rik);
const F_FLOAT cos_ijk_tmp = cos_ijk / ((rij*rik)*(rij*rik));
for( int d = 0; d < 3; d++ ) {
dcos_ijk_di[d] = -(delik[d] + delij[d]) * inv_dists + cos_ijk_tmp * (rsqik * delij[d] + rsqij * delik[d]);
dcos_ijk_dj[d] = delik[d] * inv_dists - cos_ijk_tmp * rsqik * delij[d];
dcos_ijk_dk[d] = delij[d] * inv_dists - cos_ijk_tmp * rsqij * delik[d];
}
sin_ijk = sin( theta_ijk );
if( sin_ijk >= 0 && sin_ijk <= 1e-10 )
tan_ijk_i = cos_ijk / 1e-10;
else if( sin_ijk <= 0 && sin_ijk >= -1e-10 )
tan_ijk_i = -cos_ijk / 1e-10;
else tan_ijk_i = cos_ijk / sin_ijk;
exp_tor2_ik = exp( -p_tor2 * BOA_ik );
exp_cot2_ik = exp( -p_cot2 * SQR(BOA_ik -1.5) );
for(int l = 0; l < 3; l++) fktmp[l] = 0.0;
for (int ll = l_start; ll < l_end; ll++) {
int l = d_bo_list[ll];
l &= NEIGHMASK;
if (l == i) continue;
const int ltype = type(l);
const int l_index = ll - l_start;
bo_jl = d_BO(j,l_index);
if (l == k || bo_jl < thb_cut || bo_ij*bo_ik*bo_jl < thb_cut) continue;
for (int d = 0; d < 3; d ++) deljl[d] = x(l,d) - x(j,d);
const F_FLOAT rsqjl = deljl[0]*deljl[0] + deljl[1]*deljl[1] + deljl[2]*deljl[2];
const F_FLOAT rjl = sqrt(rsqjl);
BOA_jl = bo_jl - thb_cut;
cos_jil = -(delij[0]*deljl[0]+delij[1]*deljl[1]+delij[2]*deljl[2])/(rij*rjl);
if( cos_jil > 1.0 ) cos_jil = 1.0;
if( cos_jil < -1.0 ) cos_jil = -1.0;
theta_jil = acos(cos_jil);
// dcos_jil
const F_FLOAT inv_distjl = 1.0 / (rij * rjl);
const F_FLOAT inv_distjl3 = pow( inv_distjl, 3.0 );
const F_FLOAT cos_jil_tmp = cos_jil / ((rij*rjl)*(rij*rjl));
for( int d = 0; d < 3; d++ ) {
dcos_jil_di[d] = deljl[d] * inv_distjl - cos_jil_tmp * rsqjl * -delij[d];
dcos_jil_dj[d] = (-deljl[d] + delij[d]) * inv_distjl - cos_jil_tmp * (rsqjl * delij[d] + rsqij * -deljl[d]);
dcos_jil_dk[d] = -delij[d] * inv_distjl - cos_jil_tmp * rsqij * deljl[d];
}
sin_jil = sin( theta_jil );
if( sin_jil >= 0 && sin_jil <= 1e-10 )
tan_jil_i = cos_jil / 1e-10;
else if( sin_jil <= 0 && sin_jil >= -1e-10 )
tan_jil_i = -cos_jil / 1e-10;
else tan_jil_i = cos_jil / sin_jil;
for (int d = 0; d < 3; d ++) dellk[d] = x(k,d) - x(l,d);
const F_FLOAT rsqlk = dellk[0]*dellk[0] + dellk[1]*dellk[1] + dellk[2]*dellk[2];
const F_FLOAT rlk = sqrt(rsqlk);
F_FLOAT unnorm_cos_omega, unnorm_sin_omega, omega;
F_FLOAT htra, htrb, htrc, hthd, hthe, hnra, hnrc, hnhd, hnhe;
F_FLOAT arg, poem, tel;
F_FLOAT cross_ij_jl[3];
// omega
F_FLOAT dot_ij_jk = -(delij[0]*delik[0]+delij[1]*delik[1]+delij[2]*delik[2]);
F_FLOAT dot_ij_lj = delij[0]*deljl[0]+delij[1]*deljl[1]+delij[2]*deljl[2];
F_FLOAT dot_ik_jl = delik[0]*deljl[0]+delik[1]*deljl[1]+delik[2]*deljl[2];
unnorm_cos_omega = dot_ij_jk * dot_ij_lj + rsqij * dot_ik_jl;
cross_ij_jl[0] = delij[1]*deljl[2] - delij[2]*deljl[1];
cross_ij_jl[1] = delij[2]*deljl[0] - delij[0]*deljl[2];
cross_ij_jl[2] = delij[0]*deljl[1] - delij[1]*deljl[0];
unnorm_sin_omega = -rij*(delik[0]*cross_ij_jl[0]+delik[1]*cross_ij_jl[1]+delik[2]*cross_ij_jl[2]);
omega = atan2( unnorm_sin_omega, unnorm_cos_omega );
htra = rik + cos_ijk * ( rjl * cos_jil - rij );
htrb = rij - rik * cos_ijk - rjl * cos_jil;
htrc = rjl + cos_jil * ( rik * cos_ijk - rij );
hthd = rik * sin_ijk * ( rij - rjl * cos_jil );
hthe = rjl * sin_jil * ( rij - rik * cos_ijk );
hnra = rjl * sin_ijk * sin_jil;
hnrc = rik * sin_ijk * sin_jil;
hnhd = rik * rjl * cos_ijk * sin_jil;
hnhe = rik * rjl * sin_ijk * cos_jil;
poem = 2.0 * rik * rjl * sin_ijk * sin_jil;
if( poem < 1e-20 ) poem = 1e-20;
tel = SQR(rik) + SQR(rij) + SQR(rjl) - SQR(rlk) -
2.0 * (rik * rij * cos_ijk - rik * rjl * cos_ijk * cos_jil + rij * rjl * cos_jil);
arg = tel / poem;
if( arg > 1.0 ) arg = 1.0;
if( arg < -1.0 ) arg = -1.0;
if( sin_ijk >= 0 && sin_ijk <= 1e-10 ) sin_ijk = 1e-10;
else if( sin_ijk <= 0 && sin_ijk >= -1e-10 ) sin_ijk = -1e-10;
if( sin_jil >= 0 && sin_jil <= 1e-10 ) sin_jil = 1e-10;
else if( sin_jil <= 0 && sin_jil >= -1e-10 ) sin_jil = -1e-10;
// dcos_omega_di
for (int d = 0; d < 3; d++) dcos_omega_dk[d] = ((htra-arg*hnra)/rik) * delik[d] - dellk[d];
for (int d = 0; d < 3; d++) dcos_omega_dk[d] += (hthd-arg*hnhd)/sin_ijk * -dcos_ijk_dk[d];
for (int d = 0; d < 3; d++) dcos_omega_dk[d] *= 2.0/poem;
// dcos_omega_dj
for (int d = 0; d < 3; d++) dcos_omega_di[d] = -((htra-arg*hnra)/rik) * delik[d] - htrb/rij * delij[d];
for (int d = 0; d < 3; d++) dcos_omega_di[d] += -(hthd-arg*hnhd)/sin_ijk * dcos_ijk_di[d];
for (int d = 0; d < 3; d++) dcos_omega_di[d] += -(hthe-arg*hnhe)/sin_jil * dcos_jil_di[d];
for (int d = 0; d < 3; d++) dcos_omega_di[d] *= 2.0/poem;
// dcos_omega_dk
for (int d = 0; d < 3; d++) dcos_omega_dj[d] = -((htrc-arg*hnrc)/rjl) * deljl[d] + htrb/rij * delij[d];
for (int d = 0; d < 3; d++) dcos_omega_dj[d] += -(hthd-arg*hnhd)/sin_ijk * dcos_ijk_dj[d];
for (int d = 0; d < 3; d++) dcos_omega_dj[d] += -(hthe-arg*hnhe)/sin_jil * dcos_jil_dj[d];
for (int d = 0; d < 3; d++) dcos_omega_dj[d] *= 2.0/poem;
// dcos_omega_dl
for (int d = 0; d < 3; d++) dcos_omega_dl[d] = ((htrc-arg*hnrc)/rjl) * deljl[d] + dellk[d];
for (int d = 0; d < 3; d++) dcos_omega_dl[d] += (hthe-arg*hnhe)/sin_jil * -dcos_jil_dk[d];
for (int d = 0; d < 3; d++) dcos_omega_dl[d] *= 2.0/poem;
cos_omega = cos( omega );
cos2omega = cos( 2. * omega );
cos3omega = cos( 3. * omega );
// torsion energy
p_tor1 = paramsfbp(ktype,itype,jtype,ltype).p_tor1;
p_cot1 = paramsfbp(ktype,itype,jtype,ltype).p_cot1;
V1 = paramsfbp(ktype,itype,jtype,ltype).V1;
V2 = paramsfbp(ktype,itype,jtype,ltype).V2;
V3 = paramsfbp(ktype,itype,jtype,ltype).V3;
exp_tor1 = exp(p_tor1 * SQR(2.0 - d_BO_pi(i,j_index) - f11_DiDj));
exp_tor2_jl = exp(-p_tor2 * BOA_jl);
exp_cot2_jl = exp(-p_cot2 * SQR(BOA_jl - 1.5) );
fn10 = (1.0 - exp_tor2_ik) * (1.0 - exp_tor2_ij) * (1.0 - exp_tor2_jl);
CV = 0.5 * (V1 * (1.0 + cos_omega) + V2 * exp_tor1 * (1.0 - cos2omega) + V3 * (1.0 + cos3omega) );
e_tor = fn10 * sin_ijk * sin_jil * CV;
if (eflag) ev.ereax[6] += e_tor;
dfn11 = (-p_tor3 * exp_tor3_DiDj + (p_tor3 * exp_tor3_DiDj - p_tor4 * exp_tor4_DiDj) *
(2.0 + exp_tor3_DiDj) * exp_tor34_inv) * exp_tor34_inv;
CEtors1 = sin_ijk * sin_jil * CV;
CEtors2 = -fn10 * 2.0 * p_tor1 * V2 * exp_tor1 * (2.0 - d_BO_pi(i,j_index) - f11_DiDj) *
(1.0 - SQR(cos_omega)) * sin_ijk * sin_jil;
CEtors3 = CEtors2 * dfn11;
CEtors4 = CEtors1 * p_tor2 * exp_tor2_ik * (1.0 - exp_tor2_ij) * (1.0 - exp_tor2_jl);
CEtors5 = CEtors1 * p_tor2 * (1.0 - exp_tor2_ik) * exp_tor2_ij * (1.0 - exp_tor2_jl);
CEtors6 = CEtors1 * p_tor2 * (1.0 - exp_tor2_ik) * (1.0 - exp_tor2_ij) * exp_tor2_jl;
cmn = -fn10 * CV;
CEtors7 = cmn * sin_jil * tan_ijk_i;
CEtors8 = cmn * sin_ijk * tan_jil_i;
CEtors9 = fn10 * sin_ijk * sin_jil *
(0.5 * V1 - 2.0 * V2 * exp_tor1 * cos_omega + 1.5 * V3 * (cos2omega + 2.0 * SQR(cos_omega)));
// 4-body conjugation energy
fn12 = exp_cot2_ik * exp_cot2_ij * exp_cot2_jl;
e_con = p_cot1 * fn12 * (1.0 + (SQR(cos_omega) - 1.0) * sin_ijk * sin_jil);
if (eflag) ev.ereax[7] += e_con;
Cconj = -2.0 * fn12 * p_cot1 * p_cot2 * (1.0 + (SQR(cos_omega) - 1.0) * sin_ijk * sin_jil);
CEconj1 = Cconj * (BOA_ik - 1.5e0);
CEconj2 = Cconj * (BOA_ij - 1.5e0);
CEconj3 = Cconj * (BOA_jl - 1.5e0);
CEconj4 = -p_cot1 * fn12 * (SQR(cos_omega) - 1.0) * sin_jil * tan_ijk_i;
CEconj5 = -p_cot1 * fn12 * (SQR(cos_omega) - 1.0) * sin_ijk * tan_jil_i;
CEconj6 = 2.0 * p_cot1 * fn12 * cos_omega * sin_ijk * sin_jil;
// forces
// contribution to bond order
d_Cdbopi(i,j_index) += CEtors2;
CdDelta_i += CEtors3;
CdDelta_j += CEtors3;
a_Cdbo(i,k_index) += CEtors4 + CEconj1;
a_Cdbo(i,j_index) += CEtors5 + CEconj2;
a_Cdbo(j,l_index) += CEtors6 + CEconj3; // trouble
// dcos_theta_ijk
const F_FLOAT coeff74 = CEtors7 + CEconj4;
for (int d = 0; d < 3; d++) fi_tmp[d] = (coeff74) * dcos_ijk_di[d];
for (int d = 0; d < 3; d++) fj_tmp[d] = (coeff74) * dcos_ijk_dj[d];
for (int d = 0; d < 3; d++) fk_tmp[d] = (coeff74) * dcos_ijk_dk[d];
const F_FLOAT coeff85 = CEtors8 + CEconj5;
// dcos_theta_jil
for (int d = 0; d < 3; d++) fi_tmp[d] += (coeff85) * dcos_jil_di[d];
for (int d = 0; d < 3; d++) fj_tmp[d] += (coeff85) * dcos_jil_dj[d];
for (int d = 0; d < 3; d++) fl_tmp[d] = (coeff85) * dcos_jil_dk[d];
// dcos_omega
const F_FLOAT coeff96 = CEtors9 + CEconj6;
for (int d = 0; d < 3; d++) fi_tmp[d] += (coeff96) * dcos_omega_di[d];
for (int d = 0; d < 3; d++) fj_tmp[d] += (coeff96) * dcos_omega_dj[d];
for (int d = 0; d < 3; d++) fk_tmp[d] += (coeff96) * dcos_omega_dk[d];
for (int d = 0; d < 3; d++) fl_tmp[d] += (coeff96) * dcos_omega_dl[d];
// total forces
for (int d = 0; d < 3; d++) fitmp[d] -= fi_tmp[d];
for (int d = 0; d < 3; d++) fjtmp[d] -= fj_tmp[d];
for (int d = 0; d < 3; d++) fktmp[d] -= fk_tmp[d];
for (int d = 0; d < 3; d++) a_f(l,d) -= fl_tmp[d];
// per-atom energy/virial tally
if (EVFLAG) {
eng_tmp = e_tor + e_con;
//if (eflag_atom) this->template ev_tally<NEIGHFLAG>(ev,i,j,eng_tmp,0.0,0.0,0.0,0.0);
if (eflag_atom) this->template e_tally<NEIGHFLAG>(ev,i,j,eng_tmp);
if (vflag_either) {
for (int d = 0; d < 3; d ++) delil[d] = x(l,d) - x(i,d);
for (int d = 0; d < 3; d ++) delkl[d] = x(l,d) - x(k,d);
this->template v_tally4<NEIGHFLAG>(ev,k,i,j,l,fk_tmp,fi_tmp,fj_tmp,delkl,delil,deljl);
}
}
}
for (int d = 0; d < 3; d++) a_f(k,d) += fktmp[d];
}
a_CdDelta[j] += CdDelta_j;
for (int d = 0; d < 3; d++) a_f(j,d) += fjtmp[d];
}
a_CdDelta[i] += CdDelta_i;
for (int d = 0; d < 3; d++) a_f(i,d) += fitmp[d];
}
template<class DeviceType>
template<int NEIGHFLAG, int EVFLAG>
KOKKOS_INLINE_FUNCTION
void PairReaxCKokkos<DeviceType>::operator()(PairReaxComputeTorsion<NEIGHFLAG,EVFLAG>, const int &ii) const {
EV_FLOAT_REAX ev;
this->template operator()<NEIGHFLAG,EVFLAG>(PairReaxComputeTorsion<NEIGHFLAG,EVFLAG>(), ii, ev);
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
template<int NEIGHFLAG, int EVFLAG>
KOKKOS_INLINE_FUNCTION
void PairReaxCKokkos<DeviceType>::operator()(PairReaxComputeHydrogen<NEIGHFLAG,EVFLAG>, const int &ii, EV_FLOAT_REAX& ev) const {
Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_f = f;
int hblist[MAX_BONDS];
F_FLOAT theta, cos_theta, sin_xhz4, cos_xhz1, sin_theta2;
F_FLOAT e_hb, exp_hb2, exp_hb3, CEhb1, CEhb2, CEhb3;
F_FLOAT dcos_theta_di[3], dcos_theta_dj[3], dcos_theta_dk[3];
// tally variables
F_FLOAT fi_tmp[3], fj_tmp[3], fk_tmp[3], delij[3], delji[3], delik[3], delki[3];
for (int d = 0; d < 3; d++) fi_tmp[d] = fj_tmp[d] = fk_tmp[d] = 0.0;
const int i = d_ilist[ii];
const int itype = type(i);
if( paramssing(itype).p_hbond != 1 ) return;
const X_FLOAT xtmp = x(i,0);
const X_FLOAT ytmp = x(i,1);
const X_FLOAT ztmp = x(i,2);
const int itag = tag(i);
const int j_start = d_bo_first[i];
const int j_end = j_start + d_bo_num[i];
const int k_start = d_hb_first[i];
const int k_end = k_start + d_hb_num[i];
int top = 0;
for (int jj = j_start; jj < j_end; jj++) {
int j = d_bo_list[jj];
j &= NEIGHMASK;
const int jtype = type(j);
const int j_index = jj - j_start;
const F_FLOAT bo_ij = d_BO(i,j_index);
if( paramssing(jtype).p_hbond == 2 && bo_ij >= HB_THRESHOLD ) {
hblist[top] = jj;
top ++;
}
}
F_FLOAT fitmp[3], fktmp[3];
for (int d = 0; d < 3; d++) fitmp[d] = 0.0;
for (int kk = k_start; kk < k_end; kk++) {
int k = d_hb_list[kk];
k &= NEIGHMASK;
const int ktag = tag(k);
const int ktype = type(k);
delik[0] = x(k,0) - xtmp;
delik[1] = x(k,1) - ytmp;
delik[2] = x(k,2) - ztmp;
const F_FLOAT rsqik = delik[0]*delik[0] + delik[1]*delik[1] + delik[2]*delik[2];
const F_FLOAT rik = sqrt(rsqik);
for (int d = 0; d < 3; d++) fktmp[d] = 0.0;
for (int itr = 0; itr < top; itr++) {
const int jj = hblist[itr];
int j = d_bo_list[jj];
j &= NEIGHMASK;
const int jtag = tag(j);
if (jtag == ktag) continue;
const int jtype = type(j);
const int j_index = jj - j_start;
const F_FLOAT bo_ij = d_BO(i,j_index);
delij[0] = x(j,0) - xtmp;
delij[1] = x(j,1) - ytmp;
delij[2] = x(j,2) - ztmp;
const F_FLOAT rsqij = delij[0]*delij[0] + delij[1]*delij[1] + delij[2]*delij[2];
const F_FLOAT rij = sqrt(rsqij);
// theta and derivatives
cos_theta = (delij[0]*delik[0]+delij[1]*delik[1]+delij[2]*delik[2])/(rij*rik);
if( cos_theta > 1.0 ) cos_theta = 1.0;
if( cos_theta < -1.0 ) cos_theta = -1.0;
theta = acos(cos_theta);
const F_FLOAT inv_dists = 1.0 / (rij * rik);
const F_FLOAT Cdot_inv3 = cos_theta * inv_dists * inv_dists;
for( int d = 0; d < 3; d++ ) {
dcos_theta_di[d] = -(delik[d] + delij[d]) * inv_dists + Cdot_inv3 * (rsqik * delij[d] + rsqij * delik[d]);
dcos_theta_dj[d] = delik[d] * inv_dists - Cdot_inv3 * rsqik * delij[d];
dcos_theta_dk[d] = delij[d] * inv_dists - Cdot_inv3 * rsqij * delik[d];
}
// hydrogen bond energy
const F_FLOAT p_hb1 = paramshbp(jtype,itype,ktype).p_hb1;
const F_FLOAT p_hb2 = paramshbp(jtype,itype,ktype).p_hb2;
const F_FLOAT p_hb3 = paramshbp(jtype,itype,ktype).p_hb3;
const F_FLOAT r0_hb = paramshbp(jtype,itype,ktype).r0_hb;
sin_theta2 = sin(theta/2.0);
sin_xhz4 = SQR(sin_theta2);
sin_xhz4 *= sin_xhz4;
cos_xhz1 = (1.0 - cos_theta);
exp_hb2 = exp(-p_hb2 * bo_ij);
exp_hb3 = exp(-p_hb3 * (r0_hb/rik + rik/r0_hb - 2.0));
e_hb = p_hb1 * (1.0 - exp_hb2) * exp_hb3 * sin_xhz4;
if (eflag) ev.ereax[8] += e_hb;
// hydrogen bond forces
CEhb1 = p_hb1 * p_hb2 * exp_hb2 * exp_hb3 * sin_xhz4;
CEhb2 = -p_hb1/2.0 * (1.0 - exp_hb2) * exp_hb3 * cos_xhz1;
CEhb3 = -p_hb3 * (-r0_hb/SQR(rik) + 1.0/r0_hb) * e_hb;
d_Cdbo(i,j_index) += CEhb1; // dbo term
// dcos terms
for (int d = 0; d < 3; d++) fi_tmp[d] = CEhb2 * dcos_theta_di[d];
for (int d = 0; d < 3; d++) fj_tmp[d] = CEhb2 * dcos_theta_dj[d];
for (int d = 0; d < 3; d++) fk_tmp[d] = CEhb2 * dcos_theta_dk[d];
// dr terms
for (int d = 0; d < 3; d++) fi_tmp[d] -= CEhb3/rik * delik[d];
for (int d = 0; d < 3; d++) fk_tmp[d] += CEhb3/rik * delik[d];
for (int d = 0; d < 3; d++) fitmp[d] -= fi_tmp[d];
for (int d = 0; d < 3; d++) a_f(j,d) -= fj_tmp[d];
for (int d = 0; d < 3; d++) a_f(k,d) -= fk_tmp[d];
for (int d = 0; d < 3; d++) delki[d] = -1.0 * delik[d];
for (int d = 0; d < 3; d++) delji[d] = -1.0 * delij[d];
if (eflag_atom) this->template e_tally<NEIGHFLAG>(ev,i,j,e_hb);
if (vflag_either) this->template v_tally3<NEIGHFLAG>(ev,i,j,k,fj_tmp,fk_tmp,delji,delki);
}
}
for (int d = 0; d < 3; d++) a_f(i,d) += fitmp[d];
}
template<class DeviceType>
template<int NEIGHFLAG, int EVFLAG>
KOKKOS_INLINE_FUNCTION
void PairReaxCKokkos<DeviceType>::operator()(PairReaxComputeHydrogen<NEIGHFLAG,EVFLAG>, const int &ii) const {
EV_FLOAT_REAX ev;
this->template operator()<NEIGHFLAG,EVFLAG>(PairReaxComputeHydrogen<NEIGHFLAG,EVFLAG>(), ii, ev);
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
template<int NEIGHFLAG>
KOKKOS_INLINE_FUNCTION
void PairReaxCKokkos<DeviceType>::operator()(PairReaxUpdateBond<NEIGHFLAG>, const int &ii) const {
Kokkos::View<F_FLOAT**, typename DAT::t_ffloat_2d_dl::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_Cdbo = d_Cdbo;
Kokkos::View<F_FLOAT**, typename DAT::t_ffloat_2d_dl::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_Cdbopi = d_Cdbopi;
Kokkos::View<F_FLOAT**, typename DAT::t_ffloat_2d_dl::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_Cdbopi2 = d_Cdbopi2;
const int i = d_ilist[ii];
const int itag = tag(i);
const int j_start = d_bo_first[i];
const int j_end = j_start + d_bo_num[i];
for (int jj = j_start; jj < j_end; jj++) {
int j = d_bo_list[jj];
j &= NEIGHMASK;
const int jtag = tag(j);
const int j_index = jj - j_start;
const F_FLOAT Cdbo_i = d_Cdbo(i,j_index);
const F_FLOAT Cdbopi_i = d_Cdbopi(i,j_index);
const F_FLOAT Cdbopi2_i = d_Cdbopi2(i,j_index);
const int k_start = d_bo_first[j];
const int k_end = k_start + d_bo_num[j];
for (int kk = k_start; kk < k_end; kk++) {
int k = d_bo_list[kk];
k &= NEIGHMASK;
if (k != i) continue;
const int k_index = kk - k_start;
int flag = 0;
if (itag > jtag) {
if ((itag+jtag) % 2 == 0) flag = 1;
} else if (itag < jtag) {
if ((itag+jtag) % 2 == 1) flag = 1;
}
if (flag) {
a_Cdbo(j,k_index) += Cdbo_i;
a_Cdbopi(j,k_index) += Cdbopi_i;
a_Cdbopi2(j,k_index) += Cdbopi2_i;
}
}
}
}
template<class DeviceType>
template<int NEIGHFLAG, int EVFLAG>
KOKKOS_INLINE_FUNCTION
void PairReaxCKokkos<DeviceType>::operator()(PairReaxComputeBond1<NEIGHFLAG,EVFLAG>, const int &ii, EV_FLOAT_REAX& ev) const {
Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_f = f;
Kokkos::View<F_FLOAT*, typename DAT::t_ffloat_1d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_CdDelta = d_CdDelta;
F_FLOAT delij[3];
F_FLOAT p_be1, p_be2, De_s, De_p, De_pp, pow_BOs_be2, exp_be12, CEbo, ebond;
const int i = d_ilist[ii];
const X_FLOAT xtmp = x(i,0);
const X_FLOAT ytmp = x(i,1);
const X_FLOAT ztmp = x(i,2);
const int itype = type(i);
const int itag = tag(i);
const F_FLOAT imass = paramssing(itype).mass;
const F_FLOAT val_i = paramssing(itype).valency;
const int j_start = d_bo_first[i];
const int j_end = j_start + d_bo_num[i];
F_FLOAT CdDelta_i = 0.0;
F_FLOAT fitmp[3];
for (int j = 0; j < 3; j++) fitmp[j] = 0.0;
for (int jj = j_start; jj < j_end; jj++) {
int j = d_bo_list[jj];
j &= NEIGHMASK;
const int jtag = tag(j);
if (itag > jtag) {
if ((itag+jtag) % 2 == 0) continue;
} else if (itag < jtag) {
if ((itag+jtag) % 2 == 1) continue;
} else {
if (x(j,2) < ztmp) continue;
if (x(j,2) == ztmp && x(j,1) < ytmp) continue;
if (x(j,2) == ztmp && x(j,1) == ytmp && x(j,0) < xtmp) continue;
}
const int jtype = type(j);
const int j_index = jj - j_start;
const F_FLOAT jmass = paramssing(jtype).mass;
delij[0] = x(j,0) - xtmp;
delij[1] = x(j,1) - ytmp;
delij[2] = x(j,2) - ztmp;
const F_FLOAT rsq = delij[0]*delij[0] + delij[1]*delij[1] + delij[2]*delij[2];
const F_FLOAT rij = sqrt(rsq);
const int k_start = d_bo_first[j];
const int k_end = k_start + d_bo_num[j];
const F_FLOAT p_bo1 = paramstwbp(itype,jtype).p_bo1;
const F_FLOAT p_bo2 = paramstwbp(itype,jtype).p_bo2;
const F_FLOAT p_bo3 = paramstwbp(itype,jtype).p_bo3;
const F_FLOAT p_bo4 = paramstwbp(itype,jtype).p_bo4;
const F_FLOAT p_bo5 = paramstwbp(itype,jtype).p_bo5;
const F_FLOAT p_bo6 = paramstwbp(itype,jtype).p_bo6;
const F_FLOAT r_s = paramstwbp(itype,jtype).r_s;
const F_FLOAT r_pi = paramstwbp(itype,jtype).r_pi;
const F_FLOAT r_pi2 = paramstwbp(itype,jtype).r_pi2;
// bond energy (nlocal only)
p_be1 = paramstwbp(itype,jtype).p_be1;
p_be2 = paramstwbp(itype,jtype).p_be2;
De_s = paramstwbp(itype,jtype).De_s;
De_p = paramstwbp(itype,jtype).De_p;
De_pp = paramstwbp(itype,jtype).De_pp;
const F_FLOAT BO_i = d_BO(i,j_index);
const F_FLOAT BO_s_i = d_BO_s(i,j_index);
const F_FLOAT BO_pi_i = d_BO_pi(i,j_index);
const F_FLOAT BO_pi2_i = d_BO_pi2(i,j_index);
pow_BOs_be2 = pow(BO_s_i,p_be2);
exp_be12 = exp(p_be1*(1.0-pow_BOs_be2));
CEbo = -De_s*exp_be12*(1.0-p_be1*p_be2*pow_BOs_be2);
ebond = -De_s*BO_s_i*exp_be12
-De_p*BO_pi_i
-De_pp*BO_pi2_i;
if (eflag) ev.evdwl += ebond;
//if (eflag_atom) this->template ev_tally<NEIGHFLAG>(ev,i,j,ebond,0.0,0.0,0.0,0.0);
//if (eflag_atom) this->template e_tally<NEIGHFLAG>(ev,i,j,ebond);
// calculate derivatives of Bond Orders
d_Cdbo(i,j_index) += CEbo;
d_Cdbopi(i,j_index) -= (CEbo + De_p);
d_Cdbopi2(i,j_index) -= (CEbo + De_pp);
// Stabilisation terminal triple bond
F_FLOAT estriph = 0.0;
if( BO_i >= 1.00 ) {
if( gp[37] == 2 || (imass == 12.0000 && jmass == 15.9990) ||
(jmass == 12.0000 && imass == 15.9990) ) {
const F_FLOAT exphu = exp(-gp[7] * SQR(BO_i - 2.50) );
const F_FLOAT exphua1 = exp(-gp[3] * (d_total_bo[i]-BO_i));
const F_FLOAT exphub1 = exp(-gp[3] * (d_total_bo[j]-BO_i));
const F_FLOAT exphuov = exp(gp[4] * (d_Delta[i] + d_Delta[j]));
const F_FLOAT hulpov = 1.0 / (1.0 + 25.0 * exphuov);
estriph = gp[10] * exphu * hulpov * (exphua1 + exphub1);
if (eflag) ev.evdwl += estriph;
//if (eflag_atom) this->template ev_tally<NEIGHFLAG>(ev,i,j,estriph,0.0,0.0,0.0,0.0);
//if (eflag_atom) this->template e_tally<NEIGHFLAG>(ev,i,j,estriph);
const F_FLOAT decobdbo = gp[10] * exphu * hulpov * (exphua1 + exphub1) *
( gp[3] - 2.0 * gp[7] * (BO_i-2.50) );
const F_FLOAT decobdboua = -gp[10] * exphu * hulpov *
(gp[3]*exphua1 + 25.0*gp[4]*exphuov*hulpov*(exphua1+exphub1));
const F_FLOAT decobdboub = -gp[10] * exphu * hulpov *
(gp[3]*exphub1 + 25.0*gp[4]*exphuov*hulpov*(exphua1+exphub1));
d_Cdbo(i,j_index) += decobdbo;
CdDelta_i += decobdboua;
a_CdDelta[j] += decobdboub;
}
}
const F_FLOAT eng_tmp = ebond + estriph;
if (eflag_atom) this->template e_tally<NEIGHFLAG>(ev,i,j,eng_tmp);
}
a_CdDelta[i] += CdDelta_i;
}
template<class DeviceType>
template<int NEIGHFLAG, int EVFLAG>
KOKKOS_INLINE_FUNCTION
void PairReaxCKokkos<DeviceType>::operator()(PairReaxComputeBond1<NEIGHFLAG,EVFLAG>, const int &ii) const {
EV_FLOAT_REAX ev;
this->template operator()<NEIGHFLAG,EVFLAG>(PairReaxComputeBond1<NEIGHFLAG,EVFLAG>(), ii, ev);
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
template<int NEIGHFLAG, int EVFLAG>
KOKKOS_INLINE_FUNCTION
void PairReaxCKokkos<DeviceType>::operator()(PairReaxComputeBond2<NEIGHFLAG,EVFLAG>, const int &ii, EV_FLOAT_REAX& ev) const {
Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_f = f;
F_FLOAT C12, C34, C56, BO_s, BO_pi, BO_pi2, BO, delij[3], delik[3], deljk[3], tmpvec[3];
F_FLOAT dBOp_i[3], dBOp_k[3], dln_BOp_pi[3], dln_BOp_pi2[3];
const int i = d_ilist[ii];
const X_FLOAT xtmp = x(i,0);
const X_FLOAT ytmp = x(i,1);
const X_FLOAT ztmp = x(i,2);
const int itype = type(i);
const int itag = tag(i);
const F_FLOAT imass = paramssing(itype).mass;
const F_FLOAT val_i = paramssing(itype).valency;
const int j_start = d_bo_first[i];
const int j_end = j_start + d_bo_num[i];
F_FLOAT CdDelta_i = d_CdDelta[i];
F_FLOAT fitmp[3];
for (int j = 0; j < 3; j++) fitmp[j] = 0.0;
for (int jj = j_start; jj < j_end; jj++) {
int j = d_bo_list[jj];
j &= NEIGHMASK;
const int jtag = tag(j);
if (itag > jtag) {
if ((itag+jtag) % 2 == 0) continue;
} else if (itag < jtag) {
if ((itag+jtag) % 2 == 1) continue;
} else {
if (x(j,2) < ztmp) continue;
if (x(j,2) == ztmp && x(j,1) < ytmp) continue;
if (x(j,2) == ztmp && x(j,1) == ytmp && x(j,0) < xtmp) continue;
}
const int jtype = type(j);
const int j_index = jj - j_start;
const F_FLOAT jmass = paramssing(jtype).mass;
F_FLOAT CdDelta_j = d_CdDelta[j];
delij[0] = x(j,0) - xtmp;
delij[1] = x(j,1) - ytmp;
delij[2] = x(j,2) - ztmp;
const F_FLOAT rsq = delij[0]*delij[0] + delij[1]*delij[1] + delij[2]*delij[2];
const F_FLOAT rij = sqrt(rsq);
const int k_start = d_bo_first[j];
const int k_end = k_start + d_bo_num[j];
F_FLOAT coef_C1dbo, coef_C2dbo, coef_C3dbo, coef_C1dbopi, coef_C2dbopi, coef_C3dbopi, coef_C4dbopi;
F_FLOAT coef_C1dbopi2, coef_C2dbopi2, coef_C3dbopi2, coef_C4dbopi2, coef_C1dDelta, coef_C2dDelta, coef_C3dDelta;
coef_C1dbo = coef_C2dbo = coef_C3dbo = 0.0;
coef_C1dbopi = coef_C2dbopi = coef_C3dbopi = coef_C4dbopi = 0.0;
coef_C1dbopi2 = coef_C2dbopi2 = coef_C3dbopi2 = coef_C4dbopi2 = 0.0;
coef_C1dDelta = coef_C2dDelta = coef_C3dDelta = 0.0;
// total forces on i, j, k (nlocal + nghost, from Add_dBond_to_Forces))
const F_FLOAT Cdbo_ij = d_Cdbo(i,j_index);
coef_C1dbo = d_C1dbo(i,j_index) * (Cdbo_ij);
coef_C2dbo = d_C2dbo(i,j_index) * (Cdbo_ij);
coef_C3dbo = d_C3dbo(i,j_index) * (Cdbo_ij);
const F_FLOAT Cdbopi_ij = d_Cdbopi(i,j_index);
coef_C1dbopi = d_C1dbopi(i,j_index) * (Cdbopi_ij);
coef_C2dbopi = d_C2dbopi(i,j_index) * (Cdbopi_ij);
coef_C3dbopi = d_C3dbopi(i,j_index) * (Cdbopi_ij);
coef_C4dbopi = d_C4dbopi(i,j_index) * (Cdbopi_ij);
const F_FLOAT Cdbopi2_ij = d_Cdbopi2(i,j_index);
coef_C1dbopi2 = d_C1dbopi2(i,j_index) * (Cdbopi2_ij);
coef_C2dbopi2 = d_C2dbopi2(i,j_index) * (Cdbopi2_ij);
coef_C3dbopi2 = d_C3dbopi2(i,j_index) * (Cdbopi2_ij);
coef_C4dbopi2 = d_C4dbopi2(i,j_index) * (Cdbopi2_ij);
const F_FLOAT coeff_CdDelta_ij = CdDelta_i + CdDelta_j;
coef_C1dDelta = d_C1dbo(i,j_index) * (coeff_CdDelta_ij);
coef_C2dDelta = d_C2dbo(i,j_index) * (coeff_CdDelta_ij);
coef_C3dDelta = d_C3dbo(i,j_index) * (coeff_CdDelta_ij);
F_FLOAT temp[3];
dln_BOp_pi[0] = d_dln_BOp_pix(i,j_index);
dln_BOp_pi[1] = d_dln_BOp_piy(i,j_index);
dln_BOp_pi[2] = d_dln_BOp_piz(i,j_index);
dln_BOp_pi2[0] = d_dln_BOp_pi2x(i,j_index);
dln_BOp_pi2[1] = d_dln_BOp_pi2y(i,j_index);
dln_BOp_pi2[2] = d_dln_BOp_pi2z(i,j_index);
dBOp_i[0] = d_dBOpx(i,j_index);
dBOp_i[1] = d_dBOpy(i,j_index);
dBOp_i[2] = d_dBOpz(i,j_index);
// forces on i
for (int d = 0; d < 3; d++) temp[d] = coef_C1dbo * dBOp_i[d];
for (int d = 0; d < 3; d++) temp[d] += coef_C2dbo * d_dDeltap_self(i,d);
for (int d = 0; d < 3; d++) temp[d] += coef_C1dDelta * dBOp_i[d];
for (int d = 0; d < 3; d++) temp[d] += coef_C2dDelta * d_dDeltap_self(i,d);
for (int d = 0; d < 3; d++) temp[d] += coef_C1dbopi * dln_BOp_pi[d];
for (int d = 0; d < 3; d++) temp[d] += coef_C2dbopi * dBOp_i[d];
for (int d = 0; d < 3; d++) temp[d] += coef_C3dbopi * d_dDeltap_self(i,d);
for (int d = 0; d < 3; d++) temp[d] += coef_C1dbopi2 * dln_BOp_pi2[d];
for (int d = 0; d < 3; d++) temp[d] += coef_C2dbopi2 * dBOp_i[d];
for (int d = 0; d < 3; d++) temp[d] += coef_C3dbopi2 * d_dDeltap_self(i,d);
if (EVFLAG)
if (vflag_either) this->template v_tally<NEIGHFLAG>(ev,i,temp,delij);
fitmp[0] -= temp[0];
fitmp[1] -= temp[1];
fitmp[2] -= temp[2];
// forces on j
for (int d = 0; d < 3; d++) temp[d] = -coef_C1dbo * dBOp_i[d];
for (int d = 0; d < 3; d++) temp[d] += coef_C3dbo * d_dDeltap_self(j,d);
for (int d = 0; d < 3; d++) temp[d] -= coef_C1dDelta * dBOp_i[d];
for (int d = 0; d < 3; d++) temp[d] += coef_C3dDelta * d_dDeltap_self(j,d);
for (int d = 0; d < 3; d++) temp[d] -= coef_C1dbopi * dln_BOp_pi[d];
for (int d = 0; d < 3; d++) temp[d] -= coef_C2dbopi * dBOp_i[d];
for (int d = 0; d < 3; d++) temp[d] += coef_C4dbopi * d_dDeltap_self(j,d);
for (int d = 0; d < 3; d++) temp[d] -= coef_C1dbopi2 * dln_BOp_pi2[d];
for (int d = 0; d < 3; d++) temp[d] -= coef_C2dbopi2 * dBOp_i[d];
for (int d = 0; d < 3; d++) temp[d] += coef_C4dbopi2 * d_dDeltap_self(j,d);
a_f(j,0) -= temp[0];
a_f(j,1) -= temp[1];
a_f(j,2) -= temp[2];
if (EVFLAG)
if (vflag_either) {
for (int d = 0; d < 3; d++) tmpvec[d] = -delij[d];
this->template v_tally<NEIGHFLAG>(ev,j,temp,tmpvec);
}
// forces on k: i neighbor
for (int kk = j_start; kk < j_end; kk++) {
int k = d_bo_list[kk];
k &= NEIGHMASK;
const int k_index = kk - j_start;
dBOp_k[0] = d_dBOpx(i,k_index);
dBOp_k[1] = d_dBOpy(i,k_index);
dBOp_k[2] = d_dBOpz(i,k_index);
const F_FLOAT coef_all = -coef_C2dbo - coef_C2dDelta - coef_C3dbopi - coef_C3dbopi2;
for (int d = 0; d < 3; d++) temp[d] = coef_all * dBOp_k[d];
a_f(k,0) -= temp[0];
a_f(k,1) -= temp[1];
a_f(k,2) -= temp[2];
if (EVFLAG)
if (vflag_either) {
delik[0] = x(k,0) - xtmp;
delik[1] = x(k,1) - ytmp;
delik[2] = x(k,2) - ztmp;
for (int d = 0; d < 3; d++) tmpvec[d] = x(j,d) - x(k,d) - delik[d];
this->template v_tally<NEIGHFLAG>(ev,k,temp,tmpvec);
}
}
// forces on k: j neighbor
for (int kk = k_start; kk < k_end; kk++) {
int k = d_bo_list[kk];
k &= NEIGHMASK;
const int k_index = kk - k_start;
dBOp_k[0] = d_dBOpx(j,k_index);
dBOp_k[1] = d_dBOpy(j,k_index);
dBOp_k[2] = d_dBOpz(j,k_index);
const F_FLOAT coef_all = -coef_C3dbo - coef_C3dDelta - coef_C4dbopi - coef_C4dbopi2;
for (int d = 0; d < 3; d++) temp[d] = coef_all * dBOp_k[d];
a_f(k,0) -= temp[0];
a_f(k,1) -= temp[1];
a_f(k,2) -= temp[2];
if (EVFLAG) {
if (vflag_either) {
for (int d = 0; d < 3; d++) deljk[d] = x(k,d) - x(j,d);
const F_FLOAT rsqjk = deljk[0]*deljk[0] + deljk[1]*deljk[1] + deljk[2]*deljk[2];
for (int d = 0; d < 3; d++) tmpvec[d] = x(i,d) - x(k,d) - deljk[d];
this->template v_tally<NEIGHFLAG>(ev,k,temp,tmpvec);
}
}
}
}
for (int d = 0; d < 3; d++) a_f(i,d) += fitmp[d];
}
template<class DeviceType>
template<int NEIGHFLAG, int EVFLAG>
KOKKOS_INLINE_FUNCTION
void PairReaxCKokkos<DeviceType>::operator()(PairReaxComputeBond2<NEIGHFLAG,EVFLAG>, const int &ii) const {
EV_FLOAT_REAX ev;
this->template operator()<NEIGHFLAG,EVFLAG>(PairReaxComputeBond2<NEIGHFLAG,EVFLAG>(), ii, ev);
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
template<int NEIGHFLAG>
KOKKOS_INLINE_FUNCTION
void PairReaxCKokkos<DeviceType>::ev_tally(EV_FLOAT_REAX &ev, const int &i, const int &j,
const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx,
const F_FLOAT &dely, const F_FLOAT &delz) const
{
const int VFLAG = vflag_either;
// The eatom and vatom arrays are atomic for Half/Thread neighbor style
Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_eatom = v_eatom;
Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_vatom = v_vatom;
if (eflag_atom) {
const E_FLOAT epairhalf = 0.5 * epair;
a_eatom[i] += epairhalf;
if (NEIGHFLAG != FULL) a_eatom[j] += epairhalf;
}
if (VFLAG) {
const E_FLOAT v0 = delx*delx*fpair;
const E_FLOAT v1 = dely*dely*fpair;
const E_FLOAT v2 = delz*delz*fpair;
const E_FLOAT v3 = delx*dely*fpair;
const E_FLOAT v4 = delx*delz*fpair;
const E_FLOAT v5 = dely*delz*fpair;
if (vflag_global) {
if (NEIGHFLAG != FULL) {
ev.v[0] += v0;
ev.v[1] += v1;
ev.v[2] += v2;
ev.v[3] += v3;
ev.v[4] += v4;
ev.v[5] += v5;
} else {
ev.v[0] += 0.5*v0;
ev.v[1] += 0.5*v1;
ev.v[2] += 0.5*v2;
ev.v[3] += 0.5*v3;
ev.v[4] += 0.5*v4;
ev.v[5] += 0.5*v5;
}
}
if (vflag_atom) {
a_vatom(i,0) += 0.5*v0;
a_vatom(i,1) += 0.5*v1;
a_vatom(i,2) += 0.5*v2;
a_vatom(i,3) += 0.5*v3;
a_vatom(i,4) += 0.5*v4;
a_vatom(i,5) += 0.5*v5;
if (NEIGHFLAG != FULL) {
a_vatom(j,0) += 0.5*v0;
a_vatom(j,1) += 0.5*v1;
a_vatom(j,2) += 0.5*v2;
a_vatom(j,3) += 0.5*v3;
a_vatom(j,4) += 0.5*v4;
a_vatom(j,5) += 0.5*v5;
}
}
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
template<int NEIGHFLAG>
KOKKOS_INLINE_FUNCTION
void PairReaxCKokkos<DeviceType>::e_tally(EV_FLOAT_REAX &ev, const int &i, const int &j,
const F_FLOAT &epair) const
{
// The eatom array is atomic for Half/Thread neighbor style
if (eflag_atom) {
Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_eatom = v_eatom;
const E_FLOAT epairhalf = 0.5 * epair;
a_eatom[i] += epairhalf;
a_eatom[j] += epairhalf;
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
template<int NEIGHFLAG>
KOKKOS_INLINE_FUNCTION
void PairReaxCKokkos<DeviceType>::e_tally_single(EV_FLOAT_REAX &ev, const int &i,
const F_FLOAT &epair) const
{
// The eatom array is atomic for Half/Thread neighbor style
Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_eatom = v_eatom;
a_eatom[i] += epair;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
template<int NEIGHFLAG>
KOKKOS_INLINE_FUNCTION
void PairReaxCKokkos<DeviceType>::v_tally(EV_FLOAT_REAX &ev, const int &i,
F_FLOAT *fi, F_FLOAT *drij) const
{
F_FLOAT v[6];
v[0] = 0.5*drij[0]*fi[0];
v[1] = 0.5*drij[1]*fi[1];
v[2] = 0.5*drij[2]*fi[2];
v[3] = 0.5*drij[0]*fi[1];
v[4] = 0.5*drij[0]*fi[2];
v[5] = 0.5*drij[1]*fi[2];
if (vflag_global) {
ev.v[0] += v[0];
ev.v[1] += v[1];
ev.v[2] += v[2];
ev.v[3] += v[3];
ev.v[4] += v[4];
ev.v[5] += v[5];
}
if (vflag_atom) {
Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_vatom = v_vatom;
a_vatom(i,0) += v[0]; a_vatom(i,1) += v[1]; a_vatom(i,2) += v[2];
a_vatom(i,3) += v[3]; a_vatom(i,4) += v[4]; a_vatom(i,5) += v[5];
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
template<int NEIGHFLAG>
KOKKOS_INLINE_FUNCTION
void PairReaxCKokkos<DeviceType>::v_tally3(EV_FLOAT_REAX &ev, const int &i, const int &j, const int &k,
F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *drij, F_FLOAT *drik) const
{
// The eatom and vatom arrays are atomic for Half/Thread neighbor style
Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_vatom = v_vatom;
F_FLOAT v[6];
v[0] = (drij[0]*fj[0] + drik[0]*fk[0]);
v[1] = (drij[1]*fj[1] + drik[1]*fk[1]);
v[2] = (drij[2]*fj[2] + drik[2]*fk[2]);
v[3] = (drij[0]*fj[1] + drik[0]*fk[1]);
v[4] = (drij[0]*fj[2] + drik[0]*fk[2]);
v[5] = (drij[1]*fj[2] + drik[1]*fk[2]);
if (vflag_global) {
ev.v[0] += v[0];
ev.v[1] += v[1];
ev.v[2] += v[2];
ev.v[3] += v[3];
ev.v[4] += v[4];
ev.v[5] += v[5];
}
if (vflag_atom) {
a_vatom(i,0) += THIRD*v[0]; a_vatom(i,1) += THIRD*v[1]; a_vatom(i,2) += THIRD*v[2];
a_vatom(i,3) += THIRD*v[3]; a_vatom(i,4) += THIRD*v[4]; a_vatom(i,5) += THIRD*v[5];
a_vatom(j,0) += THIRD*v[0]; a_vatom(j,1) += THIRD*v[1]; a_vatom(j,2) += THIRD*v[2];
a_vatom(j,3) += THIRD*v[3]; a_vatom(j,4) += THIRD*v[4]; a_vatom(j,5) += THIRD*v[5];
a_vatom(k,0) += THIRD*v[0]; a_vatom(k,1) += THIRD*v[1]; a_vatom(k,2) += THIRD*v[2];
a_vatom(k,3) += THIRD*v[3]; a_vatom(k,4) += THIRD*v[4]; a_vatom(k,5) += THIRD*v[5];
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
template<int NEIGHFLAG>
KOKKOS_INLINE_FUNCTION
void PairReaxCKokkos<DeviceType>::v_tally4(EV_FLOAT_REAX &ev, const int &i, const int &j, const int &k,
const int &l, F_FLOAT *fi, F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *dril, F_FLOAT *drjl, F_FLOAT *drkl) const
{
// The vatom array is atomic for Half/Thread neighbor style
F_FLOAT v[6];
v[0] = 0.25 * (dril[0]*fi[0] + drjl[0]*fj[0] + drkl[0]*fk[0]);
v[1] = 0.25 * (dril[1]*fi[1] + drjl[1]*fj[1] + drkl[1]*fk[1]);
v[2] = 0.25 * (dril[2]*fi[2] + drjl[2]*fj[2] + drkl[2]*fk[2]);
v[3] = 0.25 * (dril[0]*fi[1] + drjl[0]*fj[1] + drkl[0]*fk[1]);
v[4] = 0.25 * (dril[0]*fi[2] + drjl[0]*fj[2] + drkl[0]*fk[2]);
v[5] = 0.25 * (dril[1]*fi[2] + drjl[1]*fj[2] + drkl[1]*fk[2]);
if (vflag_global) {
ev.v[0] += v[0];
ev.v[1] += v[1];
ev.v[2] += v[2];
ev.v[3] += v[3];
ev.v[4] += v[4];
ev.v[5] += v[5];
}
if (vflag_atom) {
Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_vatom = v_vatom;
a_vatom(i,0) += v[0]; a_vatom(i,1) += v[1]; a_vatom(i,2) += v[2];
a_vatom(i,3) += v[3]; a_vatom(i,4) += v[4]; a_vatom(i,5) += v[5];
a_vatom(j,0) += v[0]; a_vatom(j,1) += v[1]; a_vatom(j,2) += v[2];
a_vatom(j,3) += v[3]; a_vatom(j,4) += v[4]; a_vatom(j,5) += v[5];
a_vatom(k,0) += v[0]; a_vatom(k,1) += v[1]; a_vatom(k,2) += v[2];
a_vatom(k,3) += v[3]; a_vatom(k,4) += v[4]; a_vatom(k,5) += v[5];
a_vatom(l,0) += v[0]; a_vatom(l,1) += v[1]; a_vatom(l,2) += v[2];
a_vatom(l,3) += v[3]; a_vatom(l,4) += v[4]; a_vatom(l,5) += v[5];
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void PairReaxCKokkos<DeviceType>::v_tally3_atom(EV_FLOAT_REAX &ev, const int &i, const int &j, const int &k,
F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *drji, F_FLOAT *drjk) const
{
F_FLOAT v[6];
v[0] = THIRD * (drji[0]*fj[0] + drjk[0]*fk[0]);
v[1] = THIRD * (drji[1]*fj[1] + drjk[1]*fk[1]);
v[2] = THIRD * (drji[2]*fj[2] + drjk[2]*fk[2]);
v[3] = THIRD * (drji[0]*fj[1] + drjk[0]*fk[1]);
v[4] = THIRD * (drji[0]*fj[2] + drjk[0]*fk[2]);
v[5] = THIRD * (drji[1]*fj[2] + drjk[1]*fk[2]);
if (vflag_global) {
ev.v[0] += v[0];
ev.v[1] += v[1];
ev.v[2] += v[2];
ev.v[3] += v[3];
ev.v[4] += v[4];
ev.v[5] += v[5];
}
if (vflag_atom) {
d_vatom(i,0) += v[0]; d_vatom(i,1) += v[1]; d_vatom(i,2) += v[2];
d_vatom(i,3) += v[3]; d_vatom(i,4) += v[4]; d_vatom(i,5) += v[5];
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void *PairReaxCKokkos<DeviceType>::extract(const char *str, int &dim)
{
dim = 1;
if (strcmp(str,"chi") == 0 && chi) {
for (int i = 1; i <= atom->ntypes; i++)
if (map[i] >= 0) chi[i] = system->reax_param.sbp[map[i]].chi;
else chi[i] = 0.0;
return (void *) chi;
}
if (strcmp(str,"eta") == 0 && eta) {
for (int i = 1; i <= atom->ntypes; i++)
if (map[i] >= 0) eta[i] = system->reax_param.sbp[map[i]].eta;
else eta[i] = 0.0;
return (void *) eta;
}
if (strcmp(str,"gamma") == 0 && gamma) {
for (int i = 1; i <= atom->ntypes; i++)
if (map[i] >= 0) gamma[i] = system->reax_param.sbp[map[i]].gamma;
else gamma[i] = 0.0;
return (void *) gamma;
}
return NULL;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
double PairReaxCKokkos<DeviceType>::memory_usage()
{
double bytes = 0.0;
if (cut_hbsq > 0.0) {
bytes += nmax*3*sizeof(int);
bytes += maxhb*nmax*sizeof(int);
}
bytes += nmax*2*sizeof(int);
bytes += maxbo*nmax*sizeof(int);
bytes += nmax*17*sizeof(F_FLOAT);
bytes += maxbo*nmax*34*sizeof(F_FLOAT);
return bytes;
}
/* ---------------------------------------------------------------------- */
template class PairReaxCKokkos<LMPDeviceType>;
#ifdef KOKKOS_HAVE_CUDA
template class PairReaxCKokkos<LMPHostType>;
#endif
}

Event Timeline