Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F106906037
pair_coul_wolf_kokkos.cpp
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Wed, Apr 2, 09:00
Size
13 KB
Mime Type
text/x-c
Expires
Fri, Apr 4, 09:00 (2 d)
Engine
blob
Format
Raw Data
Handle
25302504
Attached To
rLAMMPS lammps
pair_coul_wolf_kokkos.cpp
View Options
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Stan Moore (SNL)
------------------------------------------------------------------------- */
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "pair_coul_wolf_kokkos.h"
#include "kokkos.h"
#include "atom_kokkos.h"
#include "comm.h"
#include "force.h"
#include "neighbor.h"
#include "neigh_list_kokkos.h"
#include "neigh_request.h"
#include "update.h"
#include "integrate.h"
#include "respa.h"
#include "math_const.h"
#include "memory.h"
#include "error.h"
#include "atom_masks.h"
using namespace LAMMPS_NS;
using namespace MathConst;
#define KOKKOS_CUDA_MAX_THREADS 256
#define KOKKOS_CUDA_MIN_BLOCKS 8
/* ---------------------------------------------------------------------- */
template<class DeviceType>
PairCoulWolfKokkos<DeviceType>::PairCoulWolfKokkos(LAMMPS *lmp) : PairCoulWolf(lmp)
{
respa_enable = 0;
atomKK = (AtomKokkos *) atom;
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
datamask_read = X_MASK | F_MASK | TYPE_MASK | Q_MASK | ENERGY_MASK | VIRIAL_MASK;
datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
PairCoulWolfKokkos<DeviceType>::~PairCoulWolfKokkos()
{
if (!copymode) {
memory->destroy_kokkos(k_eatom,eatom);
memory->destroy_kokkos(k_vatom,vatom);
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void PairCoulWolfKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
{
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
// reallocate per-atom arrays if necessary
if (eflag_atom) {
memory->destroy_kokkos(k_eatom,eatom);
memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom");
d_eatom = k_eatom.d_view;
}
if (vflag_atom) {
memory->destroy_kokkos(k_vatom,vatom);
memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom");
d_vatom = k_vatom.d_view;
}
atomKK->sync(execution_space,datamask_read);
if (eflag || vflag) atomKK->modified(execution_space,datamask_modify);
else atomKK->modified(execution_space,F_MASK);
// shifted coulombic energy
e_shift = erfc(alf*cut_coul)/cut_coul;
f_shift = -(e_shift+ 2.0*alf/MY_PIS * exp(-alf*alf*cut_coul*cut_coul)) /
cut_coul;
x = atomKK->k_x.view<DeviceType>();
f = atomKK->k_f.view<DeviceType>();
q = atomKK->k_q.view<DeviceType>();
nlocal = atom->nlocal;
nall = atom->nlocal + atom->nghost;
newton_pair = force->newton_pair;
NeighListKokkos<DeviceType>* k_list = static_cast<NeighListKokkos<DeviceType>*>(list);
d_numneigh = k_list->d_numneigh;
d_neighbors = k_list->d_neighbors;
d_ilist = k_list->d_ilist;
special_coul[0] = force->special_coul[0];
special_coul[1] = force->special_coul[1];
special_coul[2] = force->special_coul[2];
special_coul[3] = force->special_coul[3];
qqrd2e = force->qqrd2e;
int inum = list->inum;
// Call cleanup_copy which sets allocations NULL which are destructed by the PairStyle
k_list->clean_copy();
copymode = 1;
// loop over neighbors of my atoms
EV_FLOAT ev;
// compute kernel A
if (evflag) {
if (neighflag == HALF) {
if (newton_pair) {
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairCoulWolfKernelA<HALF,1,1> >(0,inum),*this,ev);
} else {
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairCoulWolfKernelA<HALF,0,1> >(0,inum),*this,ev);
}
} else if (neighflag == HALFTHREAD) {
if (newton_pair) {
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairCoulWolfKernelA<HALFTHREAD,1,1> >(0,inum),*this,ev);
} else {
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairCoulWolfKernelA<HALFTHREAD,0,1> >(0,inum),*this,ev);
}
} else if (neighflag == FULL) {
if (newton_pair) {
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairCoulWolfKernelA<FULL,1,1> >(0,inum),*this,ev);
} else {
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairCoulWolfKernelA<FULL,0,1> >(0,inum),*this,ev);
}
}
} else {
if (neighflag == HALF) {
if (newton_pair) {
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairCoulWolfKernelA<HALF,1,0> >(0,inum),*this);
} else {
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairCoulWolfKernelA<HALF,0,0> >(0,inum),*this);
}
} else if (neighflag == HALFTHREAD) {
if (newton_pair) {
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairCoulWolfKernelA<HALFTHREAD,1,0> >(0,inum),*this);
} else {
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairCoulWolfKernelA<HALFTHREAD,0,0> >(0,inum),*this);
}
} else if (neighflag == FULL) {
if (newton_pair) {
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairCoulWolfKernelA<FULL,1,0> >(0,inum),*this);
} else {
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairCoulWolfKernelA<FULL,0,0> >(0,inum),*this);
}
}
}
if (eflag_global) eng_coul += ev.ecoul;
if (vflag_global) {
virial[0] += ev.v[0];
virial[1] += ev.v[1];
virial[2] += ev.v[2];
virial[3] += ev.v[3];
virial[4] += ev.v[4];
virial[5] += ev.v[5];
}
if (vflag_fdotr) pair_virial_fdotr_compute(this);
if (eflag_atom) {
k_eatom.template modify<DeviceType>();
k_eatom.template sync<LMPHostType>();
}
if (vflag_atom) {
k_vatom.template modify<DeviceType>();
k_vatom.template sync<LMPHostType>();
}
copymode = 0;
}
/* ----------------------------------------------------------------------
init specific to this pair style
------------------------------------------------------------------------- */
template<class DeviceType>
void PairCoulWolfKokkos<DeviceType>::init_style()
{
PairCoulWolf::init_style();
// irequest = neigh request made by parent class
neighflag = lmp->kokkos->neighflag;
int irequest = neighbor->nrequest - 1;
neighbor->requests[irequest]->
kokkos_host = Kokkos::Impl::is_same<DeviceType,LMPHostType>::value &&
!Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
neighbor->requests[irequest]->
kokkos_device = Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with coul/wolf/kk");
}
}
/* ---------------------------------------------------------------------- */
////Specialisation for Neighborlist types Half, HalfThread, Full
template<class DeviceType>
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG>
KOKKOS_INLINE_FUNCTION
void PairCoulWolfKokkos<DeviceType>::operator()(TagPairCoulWolfKernelA<NEIGHFLAG,NEWTON_PAIR,EVFLAG>, const int &ii, EV_FLOAT& ev) const {
// The f array is atomic for Half/Thread neighbor style
Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_f = f;
Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > v_eatom = k_eatom.view<DeviceType>();
const int i = d_ilist[ii];
const X_FLOAT xtmp = x(i,0);
const X_FLOAT ytmp = x(i,1);
const X_FLOAT ztmp = x(i,2);
const F_FLOAT qtmp = q[i];
if (eflag) {
const F_FLOAT qisq = qtmp*qtmp;
const F_FLOAT e_self = -(e_shift/2.0 + alf/MY_PIS) * qisq*qqrd2e;
if (eflag_global)
ev.ecoul += e_self;
if (eflag_atom)
v_eatom[i] += e_self;
}
//const AtomNeighborsConst d_neighbors_i = k_list.get_neighbors_const(i);
const int jnum = d_numneigh[i];
F_FLOAT fxtmp = 0.0;
F_FLOAT fytmp = 0.0;
F_FLOAT fztmp = 0.0;
for (int jj = 0; jj < jnum; jj++) {
//int j = d_neighbors_i(jj);
int j = d_neighbors(i,jj);
const F_FLOAT factor_coul = special_coul[sbmask(j)];
j &= NEIGHMASK;
const X_FLOAT delx = xtmp - x(j,0);
const X_FLOAT dely = ytmp - x(j,1);
const X_FLOAT delz = ztmp - x(j,2);
const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
if (rsq < cut_coulsq) {
const F_FLOAT r = sqrt(rsq);
const F_FLOAT prefactor = qqrd2e*qtmp*q[j]/r;
const F_FLOAT erfcc = erfc(alf*r);
const F_FLOAT erfcd = exp(-alf*alf*r*r);
const F_FLOAT v_sh = (erfcc - e_shift*r) * prefactor;
const F_FLOAT dvdrr = (erfcc/rsq + 2.0*alf/MY_PIS * erfcd/r) + f_shift;
F_FLOAT forcecoul = dvdrr*rsq*prefactor;
if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
const F_FLOAT fpair = forcecoul / rsq;
fxtmp += delx*fpair;
fytmp += dely*fpair;
fztmp += delz*fpair;
if ((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < nlocal)) {
a_f(j,0) -= delx*fpair;
a_f(j,1) -= dely*fpair;
a_f(j,2) -= delz*fpair;
}
if (EVFLAG) {
F_FLOAT ecoul = v_sh;
if (eflag) {
if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
ev.ecoul += (((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD)&&(NEWTON_PAIR||(j<nlocal)))?1.0:0.5)*ecoul;
}
if (vflag_either || eflag_atom) this->template ev_tally<NEIGHFLAG,NEWTON_PAIR>(ev,i,j,ecoul,fpair,delx,dely,delz);
}
}
}
a_f(i,0) += fxtmp;
a_f(i,1) += fytmp;
a_f(i,2) += fztmp;
}
template<class DeviceType>
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG>
KOKKOS_INLINE_FUNCTION
void PairCoulWolfKokkos<DeviceType>::operator()(TagPairCoulWolfKernelA<NEIGHFLAG,NEWTON_PAIR,EVFLAG>, const int &ii) const {
EV_FLOAT ev;
this->template operator()<NEIGHFLAG,NEWTON_PAIR,EVFLAG>(TagPairCoulWolfKernelA<NEIGHFLAG,NEWTON_PAIR,EVFLAG>(), ii, ev);
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
template<int NEIGHFLAG, int NEWTON_PAIR>
KOKKOS_INLINE_FUNCTION
void PairCoulWolfKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int &i, const int &j,
const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx,
const F_FLOAT &dely, const F_FLOAT &delz) const
{
const int EFLAG = eflag;
const int VFLAG = vflag_either;
// The eatom and vatom arrays are atomic for Half/Thread neighbor style
Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > v_eatom = k_eatom.view<DeviceType>();
Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > v_vatom = k_vatom.view<DeviceType>();
if (EFLAG) {
if (eflag_atom) {
const E_FLOAT epairhalf = 0.5 * epair;
if (NEIGHFLAG!=FULL) {
if (NEWTON_PAIR || i < nlocal) v_eatom[i] += epairhalf;
if (NEWTON_PAIR || j < nlocal) v_eatom[j] += epairhalf;
} else {
v_eatom[i] += epairhalf;
}
}
}
if (VFLAG) {
const E_FLOAT v0 = delx*delx*fpair;
const E_FLOAT v1 = dely*dely*fpair;
const E_FLOAT v2 = delz*delz*fpair;
const E_FLOAT v3 = delx*dely*fpair;
const E_FLOAT v4 = delx*delz*fpair;
const E_FLOAT v5 = dely*delz*fpair;
if (vflag_global) {
if (NEIGHFLAG!=FULL) {
if (NEWTON_PAIR || i < nlocal) {
ev.v[0] += 0.5*v0;
ev.v[1] += 0.5*v1;
ev.v[2] += 0.5*v2;
ev.v[3] += 0.5*v3;
ev.v[4] += 0.5*v4;
ev.v[5] += 0.5*v5;
}
if (NEWTON_PAIR || j < nlocal) {
ev.v[0] += 0.5*v0;
ev.v[1] += 0.5*v1;
ev.v[2] += 0.5*v2;
ev.v[3] += 0.5*v3;
ev.v[4] += 0.5*v4;
ev.v[5] += 0.5*v5;
}
} else {
ev.v[0] += 0.5*v0;
ev.v[1] += 0.5*v1;
ev.v[2] += 0.5*v2;
ev.v[3] += 0.5*v3;
ev.v[4] += 0.5*v4;
ev.v[5] += 0.5*v5;
}
}
if (vflag_atom) {
if (NEIGHFLAG!=FULL) {
if (NEWTON_PAIR || i < nlocal) {
v_vatom(i,0) += 0.5*v0;
v_vatom(i,1) += 0.5*v1;
v_vatom(i,2) += 0.5*v2;
v_vatom(i,3) += 0.5*v3;
v_vatom(i,4) += 0.5*v4;
v_vatom(i,5) += 0.5*v5;
}
if (NEWTON_PAIR || j < nlocal) {
v_vatom(j,0) += 0.5*v0;
v_vatom(j,1) += 0.5*v1;
v_vatom(j,2) += 0.5*v2;
v_vatom(j,3) += 0.5*v3;
v_vatom(j,4) += 0.5*v4;
v_vatom(j,5) += 0.5*v5;
}
} else {
v_vatom(i,0) += 0.5*v0;
v_vatom(i,1) += 0.5*v1;
v_vatom(i,2) += 0.5*v2;
v_vatom(i,3) += 0.5*v3;
v_vatom(i,4) += 0.5*v4;
v_vatom(i,5) += 0.5*v5;
}
}
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
int PairCoulWolfKokkos<DeviceType>::sbmask(const int& j) const {
return j >> SBBITS & 3;
}
namespace LAMMPS_NS {
template class PairCoulWolfKokkos<LMPDeviceType>;
#ifdef KOKKOS_HAVE_CUDA
template class PairCoulWolfKokkos<LMPHostType>;
#endif
}
Event Timeline
Log In to Comment