Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F90746615
pair_tersoff_table_omp.cpp
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Mon, Nov 4, 09:42
Size
19 KB
Mime Type
text/x-c++
Expires
Wed, Nov 6, 09:42 (2 d)
Engine
blob
Format
Raw Data
Handle
22128363
Attached To
rLAMMPS lammps
pair_tersoff_table_omp.cpp
View Options
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
This software is distributed under the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Axel Kohlmeyer (Temple U)
------------------------------------------------------------------------- */
#include "math.h"
#include "pair_tersoff_table_omp.h"
#include "atom.h"
#include "comm.h"
#include "error.h"
#include "force.h"
#include "memory.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "suffix.h"
using namespace LAMMPS_NS;
#define GRIDSTART 0.1
#define GRIDDENSITY_FCUTOFF 5000
#define GRIDDENSITY_EXP 12000
#define GRIDDENSITY_GTETA 12000
#define GRIDDENSITY_BIJ 7500
// max number of interaction per atom for environment potential
#define leadingDimensionInteractionList 64
/* ---------------------------------------------------------------------- */
PairTersoffTableOMP::PairTersoffTableOMP(LAMMPS *lmp) :
PairTersoffTable(lmp), ThrOMP(lmp, THR_PAIR)
{
suffix_flag |= Suffix::OMP;
respa_enable = 0;
thrGtetaFunction = thrGtetaFunctionDerived = NULL;
thrCutoffFunction = thrCutoffFunctionDerived = NULL;
}
/* ---------------------------------------------------------------------- */
PairTersoffTableOMP::~PairTersoffTableOMP()
{
if (allocated) {
deallocatePreLoops();
}
}
/* ---------------------------------------------------------------------- */
void PairTersoffTableOMP::compute(int eflag, int vflag)
{
if (eflag || vflag) {
ev_setup(eflag,vflag);
} else evflag = vflag_fdotr = vflag_atom = 0;
const int nall = atom->nlocal + atom->nghost;
const int nthreads = comm->nthreads;
const int inum = list->inum;
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(eflag,vflag)
#endif
{
int ifrom, ito, tid;
loop_setup_thr(ifrom, ito, tid, inum, nthreads);
ThrData *thr = fix->get_thr(tid);
ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
if (evflag)
if (vflag_atom) eval<1,1>(ifrom, ito, thr);
else eval<1,0>(ifrom, ito, thr);
else eval<0,0>(ifrom, ito, thr);
reduce_thr(this, eflag, vflag, thr);
} // end of omp parallel region
}
template <int EVFLAG, int VFLAG_ATOM>
void PairTersoffTableOMP::eval(int iifrom, int iito, ThrData * const thr)
{
int i,j,k,ii,jnum;
int itype,jtype,ktype,ijparam,ikparam,ijkparam;
double xtmp,ytmp,ztmp;
double fxtmp,fytmp,fztmp;
int *ilist,*jlist,*numneigh,**firstneigh;
int interpolIDX;
double directorCos_ij_x, directorCos_ij_y, directorCos_ij_z, directorCos_ik_x, directorCos_ik_y, directorCos_ik_z;
double invR_ij, invR_ik, cosTeta;
double repulsivePotential, attractivePotential;
double exponentRepulsivePotential, exponentAttractivePotential,interpolTMP,interpolDeltaX,interpolY1;
double interpolY2, cutoffFunctionIJ, attractiveExponential, repulsiveExponential, cutoffFunctionDerivedIJ,zeta;
double gtetaFunctionIJK,gtetaFunctionDerivedIJK,cutoffFunctionIK;
double cutoffFunctionDerivedIK,factor_force3_ij,factor_1_force3_ik;
double factor_2_force3_ik,betaZetaPowerIJK,betaZetaPowerDerivedIJK,factor_force_tot;
double factor_force_ij;
double gtetaFunctionDerived_temp,gtetaFunction_temp;
double evdwl = 0.0;
const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0];
const int * _noalias const type = atom->type;
const int nlocal = atom->nlocal;
const int tid = thr->get_tid();
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
// loop over full neighbor list of my atoms
for (ii = iifrom; ii < iito; ii++) {
i = ilist[ii];
itype = map[type[i]];
xtmp = x[i].x;
ytmp = x[i].y;
ztmp = x[i].z;
fxtmp = fytmp = fztmp = 0.0;
jlist = firstneigh[i];
jnum = numneigh[i];
if (check_error_thr((jnum > leadingDimensionInteractionList), tid,
FLERR,"Too many neighbors for interaction list.\n"
"Check your system or increase 'leadingDimension"
"InteractionList'"))
return;
// Pre-calculate gteta and cutoff function
for (int neighbor_j = 0; neighbor_j < jnum; neighbor_j++) {
double dr_ij[3], r_ij;
j = jlist[neighbor_j];
j &= NEIGHMASK;
dr_ij[0] = xtmp - x[j].x;
dr_ij[1] = ytmp - x[j].y;
dr_ij[2] = ztmp - x[j].z;
r_ij = dr_ij[0]*dr_ij[0] + dr_ij[1]*dr_ij[1] + dr_ij[2]*dr_ij[2];
jtype = map[type[j]];
ijparam = elem2param[itype][jtype][jtype];
if (r_ij > params[ijparam].cutsq) continue;
r_ij = sqrt(r_ij);
invR_ij = 1.0 / r_ij;
directorCos_ij_x = invR_ij * dr_ij[0];
directorCos_ij_y = invR_ij * dr_ij[1];
directorCos_ij_z = invR_ij * dr_ij[2];
// preCutoffFunction
interpolDeltaX = r_ij - GRIDSTART;
interpolTMP = (interpolDeltaX * GRIDDENSITY_FCUTOFF);
interpolIDX = (int) interpolTMP;
interpolY1 = cutoffFunction[itype][jtype][interpolIDX];
interpolY2 = cutoffFunction[itype][jtype][interpolIDX+1];
thrCutoffFunction[tid][neighbor_j] = interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX);
// preCutoffFunctionDerived
interpolY1 = cutoffFunctionDerived[itype][jtype][interpolIDX];
interpolY2 = cutoffFunctionDerived[itype][jtype][interpolIDX+1];
thrCutoffFunctionDerived[tid][neighbor_j] = interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX);
for (int neighbor_k = neighbor_j + 1; neighbor_k < jnum; neighbor_k++) {
double dr_ik[3], r_ik;
k = jlist[neighbor_k];
k &= NEIGHMASK;
ktype = map[type[k]];
ikparam = elem2param[itype][ktype][ktype];
ijkparam = elem2param[itype][jtype][ktype];
dr_ik[0] = xtmp -x[k].x;
dr_ik[1] = ytmp -x[k].y;
dr_ik[2] = ztmp -x[k].z;
r_ik = dr_ik[0]*dr_ik[0] + dr_ik[1]*dr_ik[1] + dr_ik[2]*dr_ik[2];
if (r_ik > params[ikparam].cutsq) continue;
r_ik = sqrt(r_ik);
invR_ik = 1.0 / r_ik;
directorCos_ik_x = invR_ik * dr_ik[0];
directorCos_ik_y = invR_ik * dr_ik[1];
directorCos_ik_z = invR_ik * dr_ik[2];
cosTeta = directorCos_ij_x * directorCos_ik_x + directorCos_ij_y * directorCos_ik_y + directorCos_ij_z * directorCos_ik_z;
// preGtetaFunction
interpolDeltaX=cosTeta+1.0;
interpolTMP = (interpolDeltaX * GRIDDENSITY_GTETA);
interpolIDX = (int) interpolTMP;
interpolY1 = gtetaFunction[itype][interpolIDX];
interpolY2 = gtetaFunction[itype][interpolIDX+1];
gtetaFunction_temp = interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX);
// preGtetaFunctionDerived
interpolY1 = gtetaFunctionDerived[itype][interpolIDX];
interpolY2 = gtetaFunctionDerived[itype][interpolIDX+1];
gtetaFunctionDerived_temp = interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX);
thrGtetaFunction[tid][neighbor_j][neighbor_k]=params[ijkparam].gamma*gtetaFunction_temp;
thrGtetaFunctionDerived[tid][neighbor_j][neighbor_k]=params[ijkparam].gamma*gtetaFunctionDerived_temp;
thrGtetaFunction[tid][neighbor_k][neighbor_j]=params[ijkparam].gamma*gtetaFunction_temp;
thrGtetaFunctionDerived[tid][neighbor_k][neighbor_j]=params[ijkparam].gamma*gtetaFunctionDerived_temp;
} // loop on K
} // loop on J
// loop over neighbours of atom i
for (int neighbor_j = 0; neighbor_j < jnum; neighbor_j++) {
double dr_ij[3], r_ij, f_ij[3];
j = jlist[neighbor_j];
j &= NEIGHMASK;
dr_ij[0] = xtmp - x[j].x;
dr_ij[1] = ytmp - x[j].y;
dr_ij[2] = ztmp - x[j].z;
r_ij = dr_ij[0]*dr_ij[0] + dr_ij[1]*dr_ij[1] + dr_ij[2]*dr_ij[2];
jtype = map[type[j]];
ijparam = elem2param[itype][jtype][jtype];
if (r_ij > params[ijparam].cutsq) continue;
r_ij = sqrt(r_ij);
invR_ij = 1.0 / r_ij;
directorCos_ij_x = invR_ij * dr_ij[0];
directorCos_ij_y = invR_ij * dr_ij[1];
directorCos_ij_z = invR_ij * dr_ij[2];
exponentRepulsivePotential = params[ijparam].lam1 * r_ij;
exponentAttractivePotential = params[ijparam].lam2 * r_ij;
// repulsiveExponential
interpolDeltaX = exponentRepulsivePotential - minArgumentExponential;
interpolTMP = (interpolDeltaX * GRIDDENSITY_EXP);
interpolIDX = (int) interpolTMP;
interpolY1 = exponential[interpolIDX];
interpolY2 = exponential[interpolIDX+1];
repulsiveExponential = interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX);
// attractiveExponential
interpolDeltaX = exponentAttractivePotential - minArgumentExponential;
interpolTMP = (interpolDeltaX * GRIDDENSITY_EXP);
interpolIDX = (int) interpolTMP;
interpolY1 = exponential[interpolIDX];
interpolY2 = exponential[interpolIDX+1];
attractiveExponential = interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX);
repulsivePotential = params[ijparam].biga * repulsiveExponential;
attractivePotential = -params[ijparam].bigb * attractiveExponential;
cutoffFunctionIJ = thrCutoffFunction[tid][neighbor_j];
cutoffFunctionDerivedIJ = thrCutoffFunctionDerived[tid][neighbor_j];
zeta = 0.0;
// first loop over neighbours of atom i except j - part 1/2
for (int neighbor_k = 0; neighbor_k < neighbor_j; neighbor_k++) {
double dr_ik[3], r_ik;
k = jlist[neighbor_k];
k &= NEIGHMASK;
ktype = map[type[k]];
ikparam = elem2param[itype][ktype][ktype];
ijkparam = elem2param[itype][jtype][ktype];
dr_ik[0] = xtmp -x[k].x;
dr_ik[1] = ytmp -x[k].y;
dr_ik[2] = ztmp -x[k].z;
r_ik = dr_ik[0]*dr_ik[0] + dr_ik[1]*dr_ik[1] + dr_ik[2]*dr_ik[2];
if (r_ik > params[ikparam].cutsq) continue;
r_ik = sqrt(r_ik);
invR_ik = 1.0 / r_ik;
gtetaFunctionIJK = thrGtetaFunction[tid][neighbor_j][neighbor_k];
cutoffFunctionIK = thrCutoffFunction[tid][neighbor_k];
zeta += cutoffFunctionIK * gtetaFunctionIJK;
}
// first loop over neighbours of atom i except j - part 2/2
for (int neighbor_k = neighbor_j+1; neighbor_k < jnum; neighbor_k++) {
double dr_ik[3], r_ik;
k = jlist[neighbor_k];
k &= NEIGHMASK;
ktype = map[type[k]];
ikparam = elem2param[itype][ktype][ktype];
ijkparam = elem2param[itype][jtype][ktype];
dr_ik[0] = xtmp -x[k].x;
dr_ik[1] = ytmp -x[k].y;
dr_ik[2] = ztmp -x[k].z;
r_ik = dr_ik[0]*dr_ik[0] + dr_ik[1]*dr_ik[1] + dr_ik[2]*dr_ik[2];
if (r_ik > params[ikparam].cutsq) continue;
r_ik = sqrt(r_ik);
invR_ik = 1.0 / r_ik;
directorCos_ik_x = invR_ik * dr_ik[0];
directorCos_ik_y = invR_ik * dr_ik[1];
directorCos_ik_z = invR_ik * dr_ik[2];
gtetaFunctionIJK = thrGtetaFunction[tid][neighbor_j][neighbor_k];
cutoffFunctionIK = thrCutoffFunction[tid][neighbor_k];
zeta += cutoffFunctionIK * gtetaFunctionIJK;
}
// betaZetaPowerIJK
interpolDeltaX= params[ijparam].beta * zeta;
interpolTMP = (interpolDeltaX * GRIDDENSITY_BIJ);
interpolIDX = (int) interpolTMP;
interpolY1 = betaZetaPower[itype][interpolIDX];
interpolY2 = betaZetaPower[itype][interpolIDX+1];
betaZetaPowerIJK = (interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX));
// betaZetaPowerDerivedIJK
interpolY1 = betaZetaPowerDerived[itype][interpolIDX];
interpolY2 = betaZetaPowerDerived[itype][interpolIDX+1];
betaZetaPowerDerivedIJK = params[ijparam].beta*(interpolY1 + (interpolY2 - interpolY1) * (interpolTMP - interpolIDX));
// Forces and virial
factor_force_ij = 0.5*cutoffFunctionDerivedIJ*(repulsivePotential + attractivePotential * betaZetaPowerIJK)+0.5*cutoffFunctionIJ*(-repulsivePotential*params[ijparam].lam1-betaZetaPowerIJK*attractivePotential*params[ijparam].lam2);
f_ij[0] = factor_force_ij * directorCos_ij_x;
f_ij[1] = factor_force_ij * directorCos_ij_y;
f_ij[2] = factor_force_ij * directorCos_ij_z;
f[j].x += f_ij[0];
f[j].y += f_ij[1];
f[j].z += f_ij[2];
fxtmp -= f_ij[0];
fytmp -= f_ij[1];
fztmp -= f_ij[2];
// potential energy
evdwl = cutoffFunctionIJ * repulsivePotential
+ cutoffFunctionIJ * attractivePotential * betaZetaPowerIJK;
if (EVFLAG) ev_tally_thr(this,i, j, nlocal, /* newton_pair */ 1, 0.5 * evdwl, 0.0,
-factor_force_ij*invR_ij, dr_ij[0], dr_ij[1], dr_ij[2],thr);
factor_force_tot= 0.5*cutoffFunctionIJ*attractivePotential*betaZetaPowerDerivedIJK;
// second loop over neighbours of atom i except j, forces and virial only - part 1/2
for (int neighbor_k = 0; neighbor_k < neighbor_j; neighbor_k++) {
double dr_ik[3], r_ik, f_ik[3];
k = jlist[neighbor_k];
k &= NEIGHMASK;
ktype = map[type[k]];
ikparam = elem2param[itype][ktype][ktype];
ijkparam = elem2param[itype][jtype][ktype];
dr_ik[0] = xtmp -x[k].x;
dr_ik[1] = ytmp -x[k].y;
dr_ik[2] = ztmp -x[k].z;
r_ik = dr_ik[0]*dr_ik[0] + dr_ik[1]*dr_ik[1] + dr_ik[2]*dr_ik[2];
if (r_ik > params[ikparam].cutsq) continue;
r_ik = sqrt(r_ik);
invR_ik = 1.0 / r_ik;
directorCos_ik_x = invR_ik * dr_ik[0];
directorCos_ik_y = invR_ik * dr_ik[1];
directorCos_ik_z = invR_ik * dr_ik[2];
cosTeta = directorCos_ij_x * directorCos_ik_x + directorCos_ij_y * directorCos_ik_y
+ directorCos_ij_z * directorCos_ik_z;
gtetaFunctionIJK = thrGtetaFunction[tid][neighbor_j][neighbor_k];
gtetaFunctionDerivedIJK = thrGtetaFunctionDerived[tid][neighbor_j][neighbor_k];
cutoffFunctionIK = thrCutoffFunction[tid][neighbor_k];
cutoffFunctionDerivedIK = thrCutoffFunctionDerived[tid][neighbor_k];
factor_force3_ij= cutoffFunctionIK * gtetaFunctionDerivedIJK * invR_ij *factor_force_tot;
f_ij[0] = factor_force3_ij * (directorCos_ij_x*cosTeta - directorCos_ik_x);
f_ij[1] = factor_force3_ij * (directorCos_ij_y*cosTeta - directorCos_ik_y);
f_ij[2] = factor_force3_ij * (directorCos_ij_z*cosTeta - directorCos_ik_z);
factor_1_force3_ik = (cutoffFunctionIK * gtetaFunctionDerivedIJK * invR_ik)*factor_force_tot;
factor_2_force3_ik = -(cutoffFunctionDerivedIK * gtetaFunctionIJK)*factor_force_tot;
f_ik[0] = factor_1_force3_ik * (directorCos_ik_x*cosTeta - directorCos_ij_x)
+ factor_2_force3_ik * directorCos_ik_x;
f_ik[1] = factor_1_force3_ik * (directorCos_ik_y*cosTeta - directorCos_ij_y)
+ factor_2_force3_ik * directorCos_ik_y;
f_ik[2] = factor_1_force3_ik * (directorCos_ik_z*cosTeta - directorCos_ij_z)
+ factor_2_force3_ik * directorCos_ik_z;
f[j].x -= f_ij[0];
f[j].y -= f_ij[1];
f[j].z -= f_ij[2];
f[k].x -= f_ik[0];
f[k].y -= f_ik[1];
f[k].z -= f_ik[2];
fxtmp += f_ij[0] + f_ik[0];
fytmp += f_ij[1] + f_ik[1];
fztmp += f_ij[2] + f_ik[2];
if (VFLAG_ATOM) v_tally3_thr(i,j,k,f_ij,f_ik,dr_ij,dr_ik,thr);
}
// second loop over neighbours of atom i except j, forces and virial only - part 2/2
for (int neighbor_k = neighbor_j+1; neighbor_k < jnum; neighbor_k++) {
double dr_ik[3], r_ik, f_ik[3];
k = jlist[neighbor_k];
k &= NEIGHMASK;
ktype = map[type[k]];
ikparam = elem2param[itype][ktype][ktype];
ijkparam = elem2param[itype][jtype][ktype];
dr_ik[0] = xtmp -x[k].x;
dr_ik[1] = ytmp -x[k].y;
dr_ik[2] = ztmp -x[k].z;
r_ik = dr_ik[0]*dr_ik[0] + dr_ik[1]*dr_ik[1] + dr_ik[2]*dr_ik[2];
if (r_ik > params[ikparam].cutsq) continue;
r_ik = sqrt(r_ik);
invR_ik = 1.0 / r_ik;
directorCos_ik_x = invR_ik * dr_ik[0];
directorCos_ik_y = invR_ik * dr_ik[1];
directorCos_ik_z = invR_ik * dr_ik[2];
cosTeta = directorCos_ij_x * directorCos_ik_x + directorCos_ij_y * directorCos_ik_y
+ directorCos_ij_z * directorCos_ik_z;
gtetaFunctionIJK = thrGtetaFunction[tid][neighbor_j][neighbor_k];
gtetaFunctionDerivedIJK = thrGtetaFunctionDerived[tid][neighbor_j][neighbor_k];
cutoffFunctionIK = thrCutoffFunction[tid][neighbor_k];
cutoffFunctionDerivedIK = thrCutoffFunctionDerived[tid][neighbor_k];
factor_force3_ij= cutoffFunctionIK * gtetaFunctionDerivedIJK * invR_ij *factor_force_tot;
f_ij[0] = factor_force3_ij * (directorCos_ij_x*cosTeta - directorCos_ik_x);
f_ij[1] = factor_force3_ij * (directorCos_ij_y*cosTeta - directorCos_ik_y);
f_ij[2] = factor_force3_ij * (directorCos_ij_z*cosTeta - directorCos_ik_z);
factor_1_force3_ik = (cutoffFunctionIK * gtetaFunctionDerivedIJK * invR_ik)*factor_force_tot;
factor_2_force3_ik = -(cutoffFunctionDerivedIK * gtetaFunctionIJK)*factor_force_tot;
f_ik[0] = factor_1_force3_ik * (directorCos_ik_x*cosTeta - directorCos_ij_x)
+ factor_2_force3_ik * directorCos_ik_x;
f_ik[1] = factor_1_force3_ik * (directorCos_ik_y*cosTeta - directorCos_ij_y)
+ factor_2_force3_ik * directorCos_ik_y;
f_ik[2] = factor_1_force3_ik * (directorCos_ik_z*cosTeta - directorCos_ij_z)
+ factor_2_force3_ik * directorCos_ik_z;
f[j].x -= f_ij[0];
f[j].y -= f_ij[1];
f[j].z -= f_ij[2];
f[k].x -= f_ik[0];
f[k].y -= f_ik[1];
f[k].z -= f_ik[2];
fxtmp += f_ij[0] + f_ik[0];
fytmp += f_ij[1] + f_ik[1];
fztmp += f_ij[2] + f_ik[2];
if (VFLAG_ATOM) v_tally3_thr(i,j,k,f_ij,f_ik,dr_ij,dr_ik,thr);
}
} // loop on J
f[i].x += fxtmp;
f[i].y += fytmp;
f[i].z += fztmp;
} // loop on I
}
void PairTersoffTableOMP::deallocatePreLoops(void)
{
memory->destroy(thrGtetaFunction);
memory->destroy(thrGtetaFunctionDerived);
memory->destroy(thrCutoffFunction);
memory->destroy(thrCutoffFunctionDerived);
}
void PairTersoffTableOMP::allocatePreLoops(void)
{
const int nthreads = comm->nthreads;
memory->create(thrGtetaFunction,nthreads,leadingDimensionInteractionList,leadingDimensionInteractionList,"tersofftable:thrGtetaFunction");
memory->create(thrGtetaFunctionDerived,nthreads,leadingDimensionInteractionList,leadingDimensionInteractionList,"tersofftable:thrGtetaFunctionDerived");
memory->create(thrCutoffFunction,nthreads,leadingDimensionInteractionList,"tersofftable:thrCutoffFunction");
memory->create(thrCutoffFunctionDerived,nthreads,leadingDimensionInteractionList,"tersofftable:thrCutoffFunctionDerived");
}
/* ---------------------------------------------------------------------- */
double PairTersoffTableOMP::memory_usage()
{
double bytes = memory_usage_thr();
bytes += PairTersoffTable::memory_usage();
return bytes;
}
Event Timeline
Log In to Comment