Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F78041148
pair_eam_intel.cpp
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Sun, Aug 18, 01:05
Size
24 KB
Mime Type
text/x-c++
Expires
Tue, Aug 20, 01:05 (1 d, 21 h)
Engine
blob
Format
Raw Data
Handle
19943450
Attached To
rLAMMPS lammps
pair_eam_intel.cpp
View Options
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors: W. Michael Brown (Intel)
------------------------------------------------------------------------- */
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "pair_eam_intel.h"
#include "atom.h"
#include "force.h"
#include "comm.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "memory.h"
#include "error.h"
#include "suffix.h"
using namespace LAMMPS_NS;
#define MAXLINE 1024
#define FC_PACKED1_T typename ForceConst<flt_t>::fc_packed1
#define FC_PACKED2_T typename ForceConst<flt_t>::fc_packed2
/* ---------------------------------------------------------------------- */
PairEAMIntel::PairEAMIntel(LAMMPS *lmp) : PairEAM(lmp)
{
suffix_flag |= Suffix::INTEL;
fp_float = 0;
}
/* ---------------------------------------------------------------------- */
PairEAMIntel::~PairEAMIntel()
{
memory->destroy(fp_float);
}
/* ---------------------------------------------------------------------- */
void PairEAMIntel::compute(int eflag, int vflag)
{
if (fix->precision() == FixIntel::PREC_MODE_MIXED)
compute<float,double>(eflag, vflag, fix->get_mixed_buffers(),
force_const_single);
else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE)
compute<double,double>(eflag, vflag, fix->get_double_buffers(),
force_const_double);
else
compute<float,float>(eflag, vflag, fix->get_single_buffers(),
force_const_single);
fix->balance_stamp();
vflag_fdotr = 0;
}
/* ---------------------------------------------------------------------- */
template <class flt_t, class acc_t>
void PairEAMIntel::compute(int eflag, int vflag,
IntelBuffers<flt_t,acc_t> *buffers,
const ForceConst<flt_t> &fc)
{
if (eflag || vflag) {
ev_setup(eflag, vflag);
} else evflag = vflag_fdotr = 0;
const int inum = list->inum;
const int nthreads = comm->nthreads;
const int host_start = fix->host_start_pair();
const int offload_end = fix->offload_end_pair();
const int ago = neighbor->ago;
if (ago != 0 && fix->separate_buffers() == 0) {
fix->start_watch(TIME_PACK);
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(eflag,vflag,buffers,fc)
#endif
{
int ifrom, ito, tid;
IP_PRE_omp_range_id_align(ifrom, ito, tid, atom->nlocal + atom->nghost,
nthreads, sizeof(ATOM_T));
buffers->thr_pack(ifrom,ito,ago);
}
fix->stop_watch(TIME_PACK);
}
if (_onetype) {
if (evflag || vflag_fdotr) {
int ovflag = 0;
if (vflag_fdotr) ovflag = 2;
else if (vflag) ovflag = 1;
if (eflag) {
if (force->newton_pair) {
eval<1,1,1,1>(1, ovflag, buffers, fc, 0, offload_end);
eval<1,1,1,1>(0, ovflag, buffers, fc, host_start, inum);
} else {
eval<1,1,1,0>(1, ovflag, buffers, fc, 0, offload_end);
eval<1,1,1,0>(0, ovflag, buffers, fc, host_start, inum);
}
} else {
if (force->newton_pair) {
eval<1,1,0,1>(1, ovflag, buffers, fc, 0, offload_end);
eval<1,1,0,1>(0, ovflag, buffers, fc, host_start, inum);
} else {
eval<1,1,0,0>(1, ovflag, buffers, fc, 0, offload_end);
eval<1,1,0,0>(0, ovflag, buffers, fc, host_start, inum);
}
}
} else {
if (force->newton_pair) {
eval<0,0,0,1>(1, 0, buffers, fc, 0, offload_end);
eval<0,0,0,1>(0, 0, buffers, fc, host_start, inum);
} else {
eval<0,0,0,0>(1, 0, buffers, fc, 0, offload_end);
eval<0,0,0,0>(0, 0, buffers, fc, host_start, inum);
}
}
} else {
if (evflag || vflag_fdotr) {
int ovflag = 0;
if (vflag_fdotr) ovflag = 2;
else if (vflag) ovflag = 1;
if (eflag) {
if (force->newton_pair) {
eval<0,1,1,1>(1, ovflag, buffers, fc, 0, offload_end);
eval<0,1,1,1>(0, ovflag, buffers, fc, host_start, inum);
} else {
eval<0,1,1,0>(1, ovflag, buffers, fc, 0, offload_end);
eval<0,1,1,0>(0, ovflag, buffers, fc, host_start, inum);
}
} else {
if (force->newton_pair) {
eval<0,1,0,1>(1, ovflag, buffers, fc, 0, offload_end);
eval<0,1,0,1>(0, ovflag, buffers, fc, host_start, inum);
} else {
eval<0,1,0,0>(1, ovflag, buffers, fc, 0, offload_end);
eval<0,1,0,0>(0, ovflag, buffers, fc, host_start, inum);
}
}
} else {
if (force->newton_pair) {
eval<0,0,0,1>(1, 0, buffers, fc, 0, offload_end);
eval<0,0,0,1>(0, 0, buffers, fc, host_start, inum);
} else {
eval<0,0,0,0>(1, 0, buffers, fc, 0, offload_end);
eval<0,0,0,0>(0, 0, buffers, fc, host_start, inum);
}
}
}
}
/* ---------------------------------------------------------------------- */
template <int ONETYPE, int EVFLAG, int EFLAG, int NEWTON_PAIR, class flt_t,
class acc_t>
void PairEAMIntel::eval(const int offload, const int vflag,
IntelBuffers<flt_t,acc_t> *buffers,
const ForceConst<flt_t> &fc,
const int astart, const int aend)
{
const int inum = aend - astart;
if (inum == 0) return;
flt_t *fp_f;
if (atom->nmax > nmax) {
memory->destroy(rho);
memory->destroy(fp);
nmax = atom->nmax;
int edge = (nmax * sizeof(acc_t)) % INTEL_DATA_ALIGN;
if (edge) nmax += (INTEL_DATA_ALIGN - edge) / sizeof(acc_t);
memory->create(rho,nmax*comm->nthreads,"pair:rho");
memory->create(fp,nmax,"pair:fp");
// Use single precision allocation for single/mixed mode
// Keep double version for single and swap_eam
if (sizeof(flt_t)==sizeof(float)) {
memory->destroy(fp_float);
memory->create(fp_float,nmax,"pair::fp_float");
}
}
if (sizeof(flt_t)==sizeof(float))
fp_f = (flt_t *)fp_float;
else
fp_f = (flt_t *)fp;
int nlocal, nall, minlocal;
fix->get_buffern(offload, nlocal, nall, minlocal);
const int ago = neighbor->ago;
IP_PRE_pack_separate_buffers(fix, buffers, ago, offload, nlocal, nall);
ATOM_T * _noalias const x = buffers->get_x(offload);
const int * _noalias const numneigh = list->numneigh;
const int * _noalias const cnumneigh = buffers->cnumneigh(list);
const int * _noalias const firstneigh = buffers->firstneigh(list);
const FC_PACKED1_T * _noalias const rhor_spline_f = fc.rhor_spline_f;
const FC_PACKED1_T * _noalias const rhor_spline_e = fc.rhor_spline_e;
const FC_PACKED2_T * _noalias const z2r_spline_t = fc.z2r_spline_t;
const FC_PACKED1_T * _noalias const frho_spline_f = fc.frho_spline_f;
const FC_PACKED1_T * _noalias const frho_spline_e = fc.frho_spline_e;
const flt_t * _noalias const scale_f = fc.scale_f[0];
const int ntypes = atom->ntypes + 1;
const int eatom = this->eflag_atom;
// Determine how much data to transfer
int x_size, q_size, f_stride, ev_size, separate_flag;
IP_PRE_get_transfern(ago, NEWTON_PAIR, EVFLAG, EFLAG, vflag,
buffers, offload, fix, separate_flag,
x_size, q_size, ev_size, f_stride);
int tc;
FORCE_T * _noalias f_start;
acc_t * _noalias ev_global;
IP_PRE_get_buffers(offload, buffers, fix, tc, f_start, ev_global);
const int nthreads = tc;
int *overflow = fix->get_off_overflow_flag();
const flt_t frdr = rdr;
const flt_t frdrho = rdrho;
const flt_t frhomax = rhomax;
const flt_t fcutforcesq = cutforcesq;
const int istride = fc.rhor_istride();
const int jstride = fc.rhor_jstride();
const int fstride = fc.frho_stride();
{
#if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD)
*timer_compute = MIC_Wtime();
#endif
IP_PRE_repack_for_offload(NEWTON_PAIR, separate_flag, nlocal, nall,
f_stride, x, 0);
acc_t oevdwl, ov0, ov1, ov2, ov3, ov4, ov5;
if (EVFLAG) {
oevdwl = (acc_t)0;
if (vflag) ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0;
}
// loop over neighbors of my atoms
#if defined(_OPENMP)
#pragma omp parallel default(none) \
shared(fp_f, f_start,f_stride,nlocal,nall,minlocal) \
reduction(+:oevdwl,ov0,ov1,ov2,ov3,ov4,ov5)
#endif
{
int iifrom, iito, tid;
IP_PRE_omp_range_id_vec(iifrom, iito, tid, inum, nthreads,
INTEL_VECTOR_WIDTH);
iifrom += astart;
iito += astart;
FORCE_T * _noalias const f = f_start - minlocal + (tid * f_stride);
double * _noalias const trho = rho + tid*nmax;
if (NEWTON_PAIR)
memset(trho, 0, nall * sizeof(double));
else
memset(trho, 0, nlocal * sizeof(double));
flt_t oscale;
int rhor_joff, frho_ioff;
if (ONETYPE) {
const int ptr_off=_onetype * ntypes + _onetype;
oscale = scale_f[ptr_off];
int rhor_ioff = istride * _onetype;
rhor_joff = rhor_ioff + _onetype * jstride;
frho_ioff = fstride * _onetype;
}
for (int i = iifrom; i < iito; ++i) {
int itype, rhor_ioff;
if (!ONETYPE) {
itype = x[i].w;
rhor_ioff = istride * itype;
}
const int * _noalias const jlist = firstneigh + cnumneigh[i];
const int jnum = numneigh[i];
const flt_t xtmp = x[i].x;
const flt_t ytmp = x[i].y;
const flt_t ztmp = x[i].z;
acc_t rhoi = (acc_t)0.0;
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma simd reduction(+:rhoi)
#endif
for (int jj = 0; jj < jnum; jj++) {
int j, jtype;
j = jlist[jj] & NEIGHMASK;
const flt_t delx = xtmp - x[j].x;
const flt_t dely = ytmp - x[j].y;
const flt_t delz = ztmp - x[j].z;
const flt_t rsq = delx*delx + dely*dely + delz*delz;
if (rsq < fcutforcesq) {
if (!ONETYPE) jtype = x[j].w;
flt_t p = sqrt(rsq)*frdr + (flt_t)1.0;
int m = static_cast<int> (p);
m = MIN(m,nr-1);
p -= m;
p = MIN(p,(flt_t)1.0);
if (!ONETYPE)
rhor_joff = rhor_ioff + jtype * jstride;
const int joff = rhor_joff + m;
flt_t ra;
ra = ((rhor_spline_e[joff].a*p + rhor_spline_e[joff].b) * p +
rhor_spline_e[joff].c) * p + rhor_spline_e[joff].d;
rhoi += ra;
if (NEWTON_PAIR || j < nlocal) {
if (!ONETYPE) {
const int ioff = jtype * istride + itype * jstride + m;
ra = ((rhor_spline_e[ioff].a*p + rhor_spline_e[ioff].b)*p +
rhor_spline_e[ioff].c) * p + rhor_spline_e[ioff].d;
}
trho[j] += ra;
}
}
} // for jj
trho[i] += rhoi;
} // for i
#if defined(_OPENMP)
if (nthreads > 1) {
#pragma omp barrier
if (tid == 0) {
int rcount;
if (NEWTON_PAIR) rcount = nall;
else rcount = nlocal;
if (nthreads == 2) {
double *trho2 = rho + nmax;
#pragma vector aligned
#pragma simd
for (int n = 0; n < rcount; n++)
rho[n] += trho2[n];
} else if (nthreads == 4) {
double *trho2 = rho + nmax;
double *trho3 = trho2 + nmax;
double *trho4 = trho3 + nmax;
#pragma vector aligned
#pragma simd
for (int n = 0; n < rcount; n++)
rho[n] += trho2[n] + trho3[n] + trho4[n];
} else {
double *trhon = rho + nmax;
for (int t = 1; t < nthreads; t++) {
#pragma vector aligned
#pragma simd
for (int n = 0; n < rcount; n++)
rho[n] += trhon[n];
trhon += nmax;
}
}
}
}
#endif
// communicate and sum densities
if (NEWTON_PAIR) {
if (tid == 0)
comm->reverse_comm_pair(this);
}
#if defined(_OPENMP)
#pragma omp barrier
#endif
// fp = derivative of embedding energy at each atom
// phi = embedding energy at each atom
// if rho > rhomax (e.g. due to close approach of two atoms),
// will exceed table, so add linear term to conserve energy
acc_t tevdwl;
if (EFLAG) tevdwl = (acc_t)0.0;
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma simd reduction(+:tevdwl)
#endif
for (int i = iifrom; i < iito; ++i) {
int itype;
if (!ONETYPE) itype = x[i].w;
flt_t p = rho[i]*frdrho + (flt_t)1.0;
int m = static_cast<int> (p);
m = MAX(1,MIN(m,nrho-1));
p -= m;
p = MIN(p,(flt_t)1.0);
if (!ONETYPE) frho_ioff = itype * fstride;
const int ioff = frho_ioff + m;
fp_f[i] = (frho_spline_f[ioff].a*p + frho_spline_f[ioff].b)*p +
frho_spline_f[ioff].c;
if (EFLAG) {
flt_t phi = ((frho_spline_e[ioff].a*p + frho_spline_e[ioff].b)*p +
frho_spline_e[ioff].c)*p + frho_spline_e[ioff].d;
if (rho[i] > frhomax) phi += fp_f[i] * (rho[i]-frhomax);
if (!ONETYPE) {
const int ptr_off=itype*ntypes + itype;
oscale = scale_f[ptr_off];
}
phi *= oscale;
tevdwl += phi;
if (eatom) f[i].w += phi;
}
}
if (EFLAG) oevdwl += tevdwl;
// communicate derivative of embedding function
#if defined(_OPENMP)
#pragma omp barrier
#endif
if (tid == 0) {
comm->forward_comm_pair(this);
memset(f + minlocal, 0, f_stride * sizeof(FORCE_T));
} else
memset(f + minlocal, 0, f_stride * sizeof(FORCE_T));
#if defined(_OPENMP)
#pragma omp barrier
#endif
// compute forces on each atom
// loop over neighbors of my atoms
for (int i = iifrom; i < iito; ++i) {
int itype, rhor_ioff;
const flt_t * _noalias scale_fi;
if (!ONETYPE) {
itype = x[i].w;
rhor_ioff = istride * itype;
scale_fi = scale_f + itype*ntypes;
}
const int * _noalias const jlist = firstneigh + cnumneigh[i];
const int jnum = numneigh[i];
acc_t fxtmp, fytmp, fztmp, fwtmp;
acc_t sevdwl, sv0, sv1, sv2, sv3, sv4, sv5;
const flt_t xtmp = x[i].x;
const flt_t ytmp = x[i].y;
const flt_t ztmp = x[i].z;
fxtmp = fytmp = fztmp = (acc_t)0;
if (EVFLAG) {
if (EFLAG) fwtmp = sevdwl = (acc_t)0;
if (vflag==1) sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0;
}
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \
sv0, sv1, sv2, sv3, sv4, sv5)
#endif
for (int jj = 0; jj < jnum; jj++) {
int j, jtype;
j = jlist[jj] & NEIGHMASK;
const flt_t delx = xtmp - x[j].x;
const flt_t dely = ytmp - x[j].y;
const flt_t delz = ztmp - x[j].z;
const flt_t rsq = delx*delx + dely*dely + delz*delz;
if (rsq < fcutforcesq) {
if (!ONETYPE) jtype = x[j].w;
const flt_t r = sqrt(rsq);
flt_t p = r*frdr + (flt_t)1.0;
int m = static_cast<int> (p);
m = MIN(m,nr-1);
p -= m;
p = MIN(p,(flt_t)1.0);
if (!ONETYPE)
rhor_joff = rhor_ioff + jtype * jstride;
const int joff = rhor_joff + m;
const flt_t rhojp = (rhor_spline_f[joff].a*p +
rhor_spline_f[joff].b)*p +
rhor_spline_f[joff].c;
flt_t rhoip;
if (!ONETYPE) {
const int ioff = jtype * istride + itype * jstride + m;
rhoip = (rhor_spline_f[ioff].a*p + rhor_spline_f[ioff].b)*p +
rhor_spline_f[ioff].c;
} else
rhoip = rhojp;
const flt_t z2p = (z2r_spline_t[joff].a*p +
z2r_spline_t[joff].b)*p +
z2r_spline_t[joff].c;
const flt_t z2 = ((z2r_spline_t[joff].d*p +
z2r_spline_t[joff].e)*p +
z2r_spline_t[joff].f)*p +
z2r_spline_t[joff].g;
const flt_t recip = (flt_t)1.0/r;
const flt_t phi = z2*recip;
const flt_t phip = z2p*recip - phi*recip;
const flt_t psip = fp_f[i]*rhojp + fp_f[j]*rhoip + phip;
if (!ONETYPE)
oscale = scale_fi[jtype];
const flt_t fpair = -oscale*psip*recip;
fxtmp += delx*fpair;
fytmp += dely*fpair;
fztmp += delz*fpair;
if (NEWTON_PAIR || j < nlocal) {
f[j].x -= delx*fpair;
f[j].y -= dely*fpair;
f[j].z -= delz*fpair;
}
if (EVFLAG) {
flt_t ev_pre = (flt_t)0;
if (NEWTON_PAIR || i<nlocal)
ev_pre += (flt_t)0.5;
if (NEWTON_PAIR || j<nlocal)
ev_pre += (flt_t)0.5;
if (EFLAG) {
const flt_t evdwl = oscale*phi;
sevdwl += ev_pre * evdwl;
if (eatom) {
if (NEWTON_PAIR || i < nlocal)
fwtmp += (flt_t)0.5 * evdwl;
if (NEWTON_PAIR || j < nlocal)
f[j].w += (flt_t)0.5 * evdwl;
}
}
IP_PRE_ev_tally_nbor(vflag, ev_pre, fpair,
delx, dely, delz);
}
} // if rsq
} // for jj
f[i].x += fxtmp;
f[i].y += fytmp;
f[i].z += fztmp;
IP_PRE_ev_tally_atom(EVFLAG, EFLAG, vflag, f, fwtmp);
} // for i
if (vflag == 2) {
#if defined(_OPENMP)
#pragma omp barrier
#endif
IP_PRE_fdotr_acc_force(NEWTON_PAIR, EVFLAG, EFLAG, vflag, eatom, nall,
nlocal, minlocal, nthreads, f_start, f_stride,
x, offload);
}
} /// omp
if (EVFLAG) {
if (EFLAG) {
ev_global[0] = oevdwl;
ev_global[1] = (acc_t)0.0;
}
if (vflag) {
ev_global[2] = ov0;
ev_global[3] = ov1;
ev_global[4] = ov2;
ev_global[5] = ov3;
ev_global[6] = ov4;
ev_global[7] = ov5;
}
}
#if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD)
*timer_compute = MIC_Wtime() - *timer_compute;
#endif
}
if (offload)
fix->stop_watch(TIME_OFFLOAD_LATENCY);
else
fix->stop_watch(TIME_HOST_PAIR);
if (EVFLAG)
fix->add_result_array(f_start, ev_global, offload, eatom, 0, vflag);
else
fix->add_result_array(f_start, 0, offload);
}
/* ----------------------------------------------------------------------
init specific to this pair style
------------------------------------------------------------------------- */
void PairEAMIntel::init_style()
{
PairEAM::init_style();
neighbor->requests[neighbor->nrequest-1]->intel = 1;
int ifix = modify->find_fix("package_intel");
if (ifix < 0)
error->all(FLERR,
"The 'package intel' command is required for /intel styles");
fix = static_cast<FixIntel *>(modify->fix[ifix]);
fix->pair_init_check();
#ifdef _LMP_INTEL_OFFLOAD
if (fix->offload_balance() != 0.0)
error->all(FLERR,
"Offload for eam/intel is not yet available. Set balance to 0.");
#endif
if (fix->precision() == FixIntel::PREC_MODE_MIXED)
pack_force_const(force_const_single, fix->get_mixed_buffers());
else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE)
pack_force_const(force_const_double, fix->get_double_buffers());
else
pack_force_const(force_const_single, fix->get_single_buffers());
}
/* ---------------------------------------------------------------------- */
template <class flt_t, class acc_t>
void PairEAMIntel::pack_force_const(ForceConst<flt_t> &fc,
IntelBuffers<flt_t,acc_t> *buffers)
{
int tp1 = atom->ntypes + 1;
fc.set_ntypes(tp1,nr,nrho,memory,_cop);
buffers->set_ntypes(tp1);
flt_t **cutneighsq = buffers->get_cutneighsq();
// Repeat cutsq calculation because done after call to init_style
double cut, cutneigh;
for (int i = 1; i <= atom->ntypes; i++) {
for (int j = i; j <= atom->ntypes; j++) {
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
cut = init_one(i,j);
cutneigh = cut + neighbor->skin;
cutsq[i][j] = cutsq[j][i] = cut*cut;
cutneighsq[i][j] = cutneighsq[j][i] = cutneigh * cutneigh;
}
}
}
_onetype=-1;
double oldscale=-1;
for (int i = 1; i < tp1; i++) {
int ioff = i * fc.frho_stride();
for (int k = 0; k < nrho + 1; k++) {
fc.frho_spline_f[ioff + k].a = frho_spline[type2frho[i]][k][0];
fc.frho_spline_f[ioff + k].b = frho_spline[type2frho[i]][k][1];
fc.frho_spline_f[ioff + k].c = frho_spline[type2frho[i]][k][2];
fc.frho_spline_e[ioff + k].a = frho_spline[type2frho[i]][k][3];
fc.frho_spline_e[ioff + k].b = frho_spline[type2frho[i]][k][4];
fc.frho_spline_e[ioff + k].c = frho_spline[type2frho[i]][k][5];
fc.frho_spline_e[ioff + k].d = frho_spline[type2frho[i]][k][6];
}
ioff = i * fc.rhor_istride();
for (int j = 1; j < tp1; j++) {
fc.scale_f[i][j] = scale[i][j];
if (type2rhor[i][j] >= 0) {
const int joff = ioff + j * fc.rhor_jstride();
for (int k = 0; k < nr + 1; k++) {
if (type2rhor[j][i] != type2rhor[i][j])
_onetype = 0;
else if (_onetype < 0)
_onetype = i;
if (oldscale < 0)
oldscale = scale[i][j];
else
if (oldscale != scale[i][j])
_onetype = 0;
fc.rhor_spline_f[joff + k].a=rhor_spline[type2rhor[j][i]][k][0];
fc.rhor_spline_f[joff + k].b=rhor_spline[type2rhor[j][i]][k][1];
fc.rhor_spline_f[joff + k].c=rhor_spline[type2rhor[j][i]][k][2];
fc.rhor_spline_e[joff + k].a=rhor_spline[type2rhor[j][i]][k][3];
fc.rhor_spline_e[joff + k].b=rhor_spline[type2rhor[j][i]][k][4];
fc.rhor_spline_e[joff + k].c=rhor_spline[type2rhor[j][i]][k][5];
fc.rhor_spline_e[joff + k].d=rhor_spline[type2rhor[j][i]][k][6];
fc.z2r_spline_t[joff + k].a=z2r_spline[type2rhor[j][i]][k][0];
fc.z2r_spline_t[joff + k].b=z2r_spline[type2rhor[j][i]][k][1];
fc.z2r_spline_t[joff + k].c=z2r_spline[type2rhor[j][i]][k][2];
fc.z2r_spline_t[joff + k].d=z2r_spline[type2rhor[j][i]][k][3];
fc.z2r_spline_t[joff + k].e=z2r_spline[type2rhor[j][i]][k][4];
fc.z2r_spline_t[joff + k].f=z2r_spline[type2rhor[j][i]][k][5];
fc.z2r_spline_t[joff + k].g=z2r_spline[type2rhor[j][i]][k][6];
}
}
}
}
if (_onetype < 0) _onetype = 0;
}
/* ---------------------------------------------------------------------- */
template <class flt_t>
void PairEAMIntel::ForceConst<flt_t>::set_ntypes(const int ntypes,
const int nr, const int nrho,
Memory *memory,
const int cop) {
if (ntypes != _ntypes || nr > _nr || nrho > _nrho) {
if (_ntypes > 0) {
_memory->destroy(rhor_spline_f);
_memory->destroy(rhor_spline_e);
_memory->destroy(frho_spline_f);
_memory->destroy(frho_spline_e);
_memory->destroy(z2r_spline_t);
_memory->destroy(scale_f);
}
if (ntypes > 0) {
_cop = cop;
_nr = nr + 1;
int edge = (_nr * sizeof(flt_t)) % INTEL_DATA_ALIGN;
if (edge) _nr += (INTEL_DATA_ALIGN - edge) / sizeof(flt_t);
memory->create(rhor_spline_f,ntypes*ntypes*_nr,"fc.rhor_spline_f");
memory->create(rhor_spline_e,ntypes*ntypes*_nr,"fc.rhor_spline_e");
memory->create(z2r_spline_t,ntypes*ntypes*_nr,"fc.z2r_spline_t");
_nrho = nrho + 1;
edge = (_nrho * sizeof(flt_t)) % INTEL_DATA_ALIGN;
if (edge) _nrho += (INTEL_DATA_ALIGN - edge) / sizeof(flt_t);
memory->create(frho_spline_f,ntypes*_nrho,"fc.frho_spline_f");
memory->create(frho_spline_e,ntypes*_nrho,"fc.frho_spline_e");
memory->create(scale_f,ntypes,ntypes,"fc.scale_f");
}
}
_ntypes = ntypes;
_memory = memory;
}
/* ---------------------------------------------------------------------- */
int PairEAMIntel::pack_forward_comm(int n, int *list, double *buf,
int pbc_flag, int *pbc)
{
if (fix->precision() == FixIntel::PREC_MODE_DOUBLE)
return pack_forward_comm(n, list, buf, fp);
else
return pack_forward_comm(n, list, buf, fp_float);
}
/* ---------------------------------------------------------------------- */
void PairEAMIntel::unpack_forward_comm(int n, int first, double *buf)
{
if (fix->precision() == FixIntel::PREC_MODE_DOUBLE)
unpack_forward_comm(n, first, buf, fp);
else
unpack_forward_comm(n, first, buf, fp_float);
}
/* ---------------------------------------------------------------------- */
template<class flt_t>
int PairEAMIntel::pack_forward_comm(int n, int *list, double *buf,
flt_t *fp_f)
{
int i,j,m;
m = 0;
for (i = 0; i < n; i++) {
j = list[i];
buf[m++] = fp_f[j];
}
return m;
}
/* ---------------------------------------------------------------------- */
template<class flt_t>
void PairEAMIntel::unpack_forward_comm(int n, int first, double *buf,
flt_t *fp_f)
{
int i,m,last;
m = 0;
last = first + n;
for (i = first; i < last; i++) fp_f[i] = buf[m++];
}
Event Timeline
Log In to Comment