pair_eam_intel.cpp
No OneTemporary
Actions

Subscribers

None

File Metadata

Created: Sun, Sep 15, 14:11

pair_eam_intel.cpp
View Options

	/* ----------------------------------------------------------------------
	LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
	http://lammps.sandia.gov, Sandia National Laboratories
	Steve Plimpton, sjplimp@sandia.gov

	Copyright (2003) Sandia Corporation. Under the terms of Contract
	DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
	certain rights in this software. This software is distributed under
	the GNU General Public License.

	See the README file in the top-level LAMMPS directory.
	------------------------------------------------------------------------- */

	/* ----------------------------------------------------------------------
	Contributing authors: W. Michael Brown (Intel)
	------------------------------------------------------------------------- */

	#include <math.h>
	#include <stdio.h>
	#include <stdlib.h>
	#include <string.h>
	#include "pair_eam_intel.h"
	#include "atom.h"
	#include "force.h"
	#include "comm.h"
	#include "neighbor.h"
	#include "neigh_list.h"
	#include "neigh_request.h"
	#include "memory.h"
	#include "error.h"
	#include "suffix.h"

	using namespace LAMMPS_NS;

	#define MAXLINE 1024

	#define FC_PACKED1_T typename ForceConst<flt_t>::fc_packed1
	#define FC_PACKED2_T typename ForceConst<flt_t>::fc_packed2

	/* ---------------------------------------------------------------------- */

	PairEAMIntel::PairEAMIntel(LAMMPS *lmp) : PairEAM(lmp)
	{
	suffix_flag \|= Suffix::INTEL;
	fp_float = 0;
	}

	/* ---------------------------------------------------------------------- */

	PairEAMIntel::~PairEAMIntel()
	{
	memory->destroy(fp_float);
	}

	/* ---------------------------------------------------------------------- */

	void PairEAMIntel::compute(int eflag, int vflag)
	{
	if (fix->precision() == FixIntel::PREC_MODE_MIXED)
	compute<float,double>(eflag, vflag, fix->get_mixed_buffers(),
	force_const_single);
	else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE)
	compute<double,double>(eflag, vflag, fix->get_double_buffers(),
	force_const_double);
	else
	compute<float,float>(eflag, vflag, fix->get_single_buffers(),
	force_const_single);

	fix->balance_stamp();
	vflag_fdotr = 0;
	}

	/* ---------------------------------------------------------------------- */

	template <class flt_t, class acc_t>
	void PairEAMIntel::compute(int eflag, int vflag,
	IntelBuffers<flt_t,acc_t> *buffers,
	const ForceConst<flt_t> &fc)
	{
	if (eflag \|\| vflag) {
	ev_setup(eflag, vflag);
	} else evflag = vflag_fdotr = 0;

	const int inum = list->inum;
	const int nthreads = comm->nthreads;
	const int host_start = fix->host_start_pair();
	const int offload_end = fix->offload_end_pair();
	const int ago = neighbor->ago;

	if (ago != 0 && fix->separate_buffers() == 0) {
	fix->start_watch(TIME_PACK);

	#if defined(_OPENMP)
	#pragma omp parallel default(none) shared(eflag,vflag,buffers,fc)
	#endif
	{
	int ifrom, ito, tid;
	IP_PRE_omp_range_id_align(ifrom, ito, tid, atom->nlocal + atom->nghost,
	nthreads, sizeof(ATOM_T));
	buffers->thr_pack(ifrom,ito,ago);
	}
	fix->stop_watch(TIME_PACK);
	}

	if (_onetype) {
	if (evflag \|\| vflag_fdotr) {
	int ovflag = 0;
	if (vflag_fdotr) ovflag = 2;
	else if (vflag) ovflag = 1;
	if (eflag) {
	if (force->newton_pair) {
	eval<1,1,1,1>(1, ovflag, buffers, fc, 0, offload_end);
	eval<1,1,1,1>(0, ovflag, buffers, fc, host_start, inum);
	} else {
	eval<1,1,1,0>(1, ovflag, buffers, fc, 0, offload_end);
	eval<1,1,1,0>(0, ovflag, buffers, fc, host_start, inum);
	}
	} else {
	if (force->newton_pair) {
	eval<1,1,0,1>(1, ovflag, buffers, fc, 0, offload_end);
	eval<1,1,0,1>(0, ovflag, buffers, fc, host_start, inum);
	} else {
	eval<1,1,0,0>(1, ovflag, buffers, fc, 0, offload_end);
	eval<1,1,0,0>(0, ovflag, buffers, fc, host_start, inum);
	}
	}
	} else {
	if (force->newton_pair) {
	eval<0,0,0,1>(1, 0, buffers, fc, 0, offload_end);
	eval<0,0,0,1>(0, 0, buffers, fc, host_start, inum);
	} else {
	eval<0,0,0,0>(1, 0, buffers, fc, 0, offload_end);
	eval<0,0,0,0>(0, 0, buffers, fc, host_start, inum);
	}
	}
	} else {
	if (evflag \|\| vflag_fdotr) {
	int ovflag = 0;
	if (vflag_fdotr) ovflag = 2;
	else if (vflag) ovflag = 1;
	if (eflag) {
	if (force->newton_pair) {
	eval<0,1,1,1>(1, ovflag, buffers, fc, 0, offload_end);
	eval<0,1,1,1>(0, ovflag, buffers, fc, host_start, inum);
	} else {
	eval<0,1,1,0>(1, ovflag, buffers, fc, 0, offload_end);
	eval<0,1,1,0>(0, ovflag, buffers, fc, host_start, inum);
	}
	} else {
	if (force->newton_pair) {
	eval<0,1,0,1>(1, ovflag, buffers, fc, 0, offload_end);
	eval<0,1,0,1>(0, ovflag, buffers, fc, host_start, inum);
	} else {
	eval<0,1,0,0>(1, ovflag, buffers, fc, 0, offload_end);
	eval<0,1,0,0>(0, ovflag, buffers, fc, host_start, inum);
	}
	}
	} else {
	if (force->newton_pair) {
	eval<0,0,0,1>(1, 0, buffers, fc, 0, offload_end);
	eval<0,0,0,1>(0, 0, buffers, fc, host_start, inum);
	} else {
	eval<0,0,0,0>(1, 0, buffers, fc, 0, offload_end);
	eval<0,0,0,0>(0, 0, buffers, fc, host_start, inum);
	}
	}
	}
	}

	/* ---------------------------------------------------------------------- */

	template <int ONETYPE, int EVFLAG, int EFLAG, int NEWTON_PAIR, class flt_t,
	class acc_t>
	void PairEAMIntel::eval(const int offload, const int vflag,
	IntelBuffers<flt_t,acc_t> *buffers,
	const ForceConst<flt_t> &fc,
	const int astart, const int aend)
	{
	const int inum = aend - astart;
	if (inum == 0) return;

	flt_t *fp_f;
	if (atom->nmax > nmax) {
	memory->destroy(rho);
	memory->destroy(fp);
	nmax = atom->nmax;
	int edge = (nmax * sizeof(acc_t)) % INTEL_DATA_ALIGN;
	if (edge) nmax += (INTEL_DATA_ALIGN - edge) / sizeof(acc_t);
	memory->create(rho,nmax*comm->nthreads,"pair:rho");
	memory->create(fp,nmax,"pair:fp");
	// Use single precision allocation for single/mixed mode
	// Keep double version for single and swap_eam
	if (sizeof(flt_t)==sizeof(float)) {
	memory->destroy(fp_float);
	memory->create(fp_float,nmax,"pair::fp_float");
	}
	}
	if (sizeof(flt_t)==sizeof(float))
	fp_f = (flt_t *)fp_float;
	else
	fp_f = (flt_t *)fp;


	int nlocal, nall, minlocal;
	fix->get_buffern(offload, nlocal, nall, minlocal);

	const int ago = neighbor->ago;
	IP_PRE_pack_separate_buffers(fix, buffers, ago, offload, nlocal, nall);

	ATOM_T * _noalias const x = buffers->get_x(offload);

	const int * _noalias const numneigh = list->numneigh;
	const int * _noalias const cnumneigh = buffers->cnumneigh(list);
	const int * _noalias const firstneigh = buffers->firstneigh(list);
	const FC_PACKED1_T * _noalias const rhor_spline_f = fc.rhor_spline_f;
	const FC_PACKED1_T * _noalias const rhor_spline_e = fc.rhor_spline_e;
	const FC_PACKED2_T * _noalias const z2r_spline_t = fc.z2r_spline_t;
	const FC_PACKED1_T * _noalias const frho_spline_f = fc.frho_spline_f;
	const FC_PACKED1_T * _noalias const frho_spline_e = fc.frho_spline_e;
	const flt_t * _noalias const scale_f = fc.scale_f[0];

	const int ntypes = atom->ntypes + 1;
	const int eatom = this->eflag_atom;

	// Determine how much data to transfer
	int x_size, q_size, f_stride, ev_size, separate_flag;
	IP_PRE_get_transfern(ago, NEWTON_PAIR, EVFLAG, EFLAG, vflag,
	buffers, offload, fix, separate_flag,
	x_size, q_size, ev_size, f_stride);

	int tc;
	FORCE_T * _noalias f_start;
	acc_t * _noalias ev_global;
	IP_PRE_get_buffers(offload, buffers, fix, tc, f_start, ev_global);
	const int nthreads = tc;
	int *overflow = fix->get_off_overflow_flag();

	const flt_t frdr = rdr;
	const flt_t frdrho = rdrho;
	const flt_t frhomax = rhomax;
	const flt_t fcutforcesq = cutforcesq;
	const int istride = fc.rhor_istride();
	const int jstride = fc.rhor_jstride();
	const int fstride = fc.frho_stride();

	{
	#if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD)
	*timer_compute = MIC_Wtime();
	#endif

	IP_PRE_repack_for_offload(NEWTON_PAIR, separate_flag, nlocal, nall,
	f_stride, x, 0);

	acc_t oevdwl, ov0, ov1, ov2, ov3, ov4, ov5;
	if (EVFLAG) {
	oevdwl = (acc_t)0;
	if (vflag) ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0;
	}

	// loop over neighbors of my atoms
	#if defined(_OPENMP)
	#pragma omp parallel default(none) \
	shared(fp_f, f_start,f_stride,nlocal,nall,minlocal) \
	reduction(+:oevdwl,ov0,ov1,ov2,ov3,ov4,ov5)
	#endif
	{
	int iifrom, iito, tid;
	IP_PRE_omp_range_id_vec(iifrom, iito, tid, inum, nthreads,
	INTEL_VECTOR_WIDTH);
	iifrom += astart;
	iito += astart;

	FORCE_T * _noalias const f = f_start - minlocal + (tid * f_stride);
	double * _noalias const trho = rho + tid*nmax;
	if (NEWTON_PAIR)
	memset(trho, 0, nall * sizeof(double));
	else
	memset(trho, 0, nlocal * sizeof(double));

	flt_t oscale;
	int rhor_joff, frho_ioff;
	if (ONETYPE) {
	const int ptr_off=_onetype * ntypes + _onetype;
	oscale = scale_f[ptr_off];
	int rhor_ioff = istride * _onetype;
	rhor_joff = rhor_ioff + _onetype * jstride;
	frho_ioff = fstride * _onetype;
	}
	for (int i = iifrom; i < iito; ++i) {
	int itype, rhor_ioff;
	if (!ONETYPE) {
	itype = x[i].w;
	rhor_ioff = istride * itype;
	}
	const int * _noalias const jlist = firstneigh + cnumneigh[i];
	const int jnum = numneigh[i];

	const flt_t xtmp = x[i].x;
	const flt_t ytmp = x[i].y;
	const flt_t ztmp = x[i].z;

	acc_t rhoi = (acc_t)0.0;
	#if defined(LMP_SIMD_COMPILER)
	#pragma vector aligned
	#pragma simd reduction(+:rhoi)
	#endif
	for (int jj = 0; jj < jnum; jj++) {
	int j, jtype;
	j = jlist[jj] & NEIGHMASK;

	const flt_t delx = xtmp - x[j].x;
	const flt_t dely = ytmp - x[j].y;
	const flt_t delz = ztmp - x[j].z;
	const flt_t rsq = delxdelx + delydely + delz*delz;

	if (rsq < fcutforcesq) {
	if (!ONETYPE) jtype = x[j].w;
	flt_t p = sqrt(rsq)*frdr + (flt_t)1.0;
	int m = static_cast<int> (p);
	m = MIN(m,nr-1);
	p -= m;
	p = MIN(p,(flt_t)1.0);
	if (!ONETYPE)
	rhor_joff = rhor_ioff + jtype * jstride;
	const int joff = rhor_joff + m;
	flt_t ra;
	ra = ((rhor_spline_e[joff].ap + rhor_spline_e[joff].b) p +
	rhor_spline_e[joff].c) * p + rhor_spline_e[joff].d;
	rhoi += ra;
	if (NEWTON_PAIR \|\| j < nlocal) {
	if (!ONETYPE) {
	const int ioff = jtype * istride + itype * jstride + m;
	ra = ((rhor_spline_e[ioff].ap + rhor_spline_e[ioff].b)p +
	rhor_spline_e[ioff].c) * p + rhor_spline_e[ioff].d;
	}
	trho[j] += ra;
	}
	}
	} // for jj
	trho[i] += rhoi;
	} // for i

	#if defined(_OPENMP)
	if (nthreads > 1) {
	#pragma omp barrier
	if (tid == 0) {
	int rcount;
	if (NEWTON_PAIR) rcount = nall;
	else rcount = nlocal;
	if (nthreads == 2) {
	double *trho2 = rho + nmax;
	#pragma vector aligned
	#pragma simd
	for (int n = 0; n < rcount; n++)
	rho[n] += trho2[n];
	} else if (nthreads == 4) {
	double *trho2 = rho + nmax;
	double *trho3 = trho2 + nmax;
	double *trho4 = trho3 + nmax;
	#pragma vector aligned
	#pragma simd
	for (int n = 0; n < rcount; n++)
	rho[n] += trho2[n] + trho3[n] + trho4[n];
	} else {
	double *trhon = rho + nmax;
	for (int t = 1; t < nthreads; t++) {
	#pragma vector aligned
	#pragma simd
	for (int n = 0; n < rcount; n++)
	rho[n] += trhon[n];
	trhon += nmax;
	}
	}
	}
	}
	#endif

	// communicate and sum densities

	if (NEWTON_PAIR) {
	if (tid == 0)
	comm->reverse_comm_pair(this);
	}
	#if defined(_OPENMP)
	#pragma omp barrier
	#endif

	// fp = derivative of embedding energy at each atom
	// phi = embedding energy at each atom
	// if rho > rhomax (e.g. due to close approach of two atoms),
	// will exceed table, so add linear term to conserve energy

	acc_t tevdwl;
	if (EFLAG) tevdwl = (acc_t)0.0;

	#if defined(LMP_SIMD_COMPILER)
	#pragma vector aligned
	#pragma simd reduction(+:tevdwl)
	#endif
	for (int i = iifrom; i < iito; ++i) {
	int itype;
	if (!ONETYPE) itype = x[i].w;
	flt_t p = rho[i]*frdrho + (flt_t)1.0;
	int m = static_cast<int> (p);
	m = MAX(1,MIN(m,nrho-1));
	p -= m;
	p = MIN(p,(flt_t)1.0);
	if (!ONETYPE) frho_ioff = itype * fstride;
	const int ioff = frho_ioff + m;
	fp_f[i] = (frho_spline_f[ioff].ap + frho_spline_f[ioff].b)p +
	frho_spline_f[ioff].c;
	if (EFLAG) {
	flt_t phi = ((frho_spline_e[ioff].ap + frho_spline_e[ioff].b)p +
	frho_spline_e[ioff].c)*p + frho_spline_e[ioff].d;
	if (rho[i] > frhomax) phi += fp_f[i] * (rho[i]-frhomax);
	if (!ONETYPE) {
	const int ptr_off=itype*ntypes + itype;
	oscale = scale_f[ptr_off];
	}
	phi *= oscale;
	tevdwl += phi;
	if (eatom) f[i].w += phi;
	}
	}
	if (EFLAG) oevdwl += tevdwl;


	// communicate derivative of embedding function

	#if defined(_OPENMP)
	#pragma omp barrier
	#endif

	if (tid == 0) {
	comm->forward_comm_pair(this);
	memset(f + minlocal, 0, f_stride * sizeof(FORCE_T));
	} else
	memset(f + minlocal, 0, f_stride * sizeof(FORCE_T));

	#if defined(_OPENMP)
	#pragma omp barrier
	#endif

	// compute forces on each atom
	// loop over neighbors of my atoms

	for (int i = iifrom; i < iito; ++i) {
	int itype, rhor_ioff;
	const flt_t * _noalias scale_fi;
	if (!ONETYPE) {
	itype = x[i].w;
	rhor_ioff = istride * itype;
	scale_fi = scale_f + itype*ntypes;
	}
	const int * _noalias const jlist = firstneigh + cnumneigh[i];
	const int jnum = numneigh[i];

	acc_t fxtmp, fytmp, fztmp, fwtmp;
	acc_t sevdwl, sv0, sv1, sv2, sv3, sv4, sv5;

	const flt_t xtmp = x[i].x;
	const flt_t ytmp = x[i].y;
	const flt_t ztmp = x[i].z;
	fxtmp = fytmp = fztmp = (acc_t)0;
	if (EVFLAG) {
	if (EFLAG) fwtmp = sevdwl = (acc_t)0;
	if (vflag==1) sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0;
	}

	#if defined(LMP_SIMD_COMPILER)
	#pragma vector aligned
	#pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \
	sv0, sv1, sv2, sv3, sv4, sv5)
	#endif
	for (int jj = 0; jj < jnum; jj++) {
	int j, jtype;
	j = jlist[jj] & NEIGHMASK;

	const flt_t delx = xtmp - x[j].x;
	const flt_t dely = ytmp - x[j].y;
	const flt_t delz = ztmp - x[j].z;
	const flt_t rsq = delxdelx + delydely + delz*delz;


	if (rsq < fcutforcesq) {
	if (!ONETYPE) jtype = x[j].w;
	const flt_t r = sqrt(rsq);
	flt_t p = r*frdr + (flt_t)1.0;
	int m = static_cast<int> (p);
	m = MIN(m,nr-1);
	p -= m;
	p = MIN(p,(flt_t)1.0);
	if (!ONETYPE)
	rhor_joff = rhor_ioff + jtype * jstride;
	const int joff = rhor_joff + m;
	const flt_t rhojp = (rhor_spline_f[joff].a*p +
	rhor_spline_f[joff].b)*p +
	rhor_spline_f[joff].c;
	flt_t rhoip;
	if (!ONETYPE) {
	const int ioff = jtype * istride + itype * jstride + m;
	rhoip = (rhor_spline_f[ioff].ap + rhor_spline_f[ioff].b)p +
	rhor_spline_f[ioff].c;
	} else
	rhoip = rhojp;
	const flt_t z2p = (z2r_spline_t[joff].a*p +
	z2r_spline_t[joff].b)*p +
	z2r_spline_t[joff].c;
	const flt_t z2 = ((z2r_spline_t[joff].d*p +
	z2r_spline_t[joff].e)*p +
	z2r_spline_t[joff].f)*p +
	z2r_spline_t[joff].g;

	const flt_t recip = (flt_t)1.0/r;
	const flt_t phi = z2*recip;
	const flt_t phip = z2precip - phirecip;
	const flt_t psip = fp_f[i]rhojp + fp_f[j]rhoip + phip;
	if (!ONETYPE)
	oscale = scale_fi[jtype];
	const flt_t fpair = -oscalepsiprecip;

	fxtmp += delx*fpair;
	fytmp += dely*fpair;
	fztmp += delz*fpair;
	if (NEWTON_PAIR \|\| j < nlocal) {
	f[j].x -= delx*fpair;
	f[j].y -= dely*fpair;
	f[j].z -= delz*fpair;
	}

	if (EVFLAG) {
	flt_t ev_pre = (flt_t)0;
	if (NEWTON_PAIR \|\| i<nlocal)
	ev_pre += (flt_t)0.5;
	if (NEWTON_PAIR \|\| j<nlocal)
	ev_pre += (flt_t)0.5;

	if (EFLAG) {
	const flt_t evdwl = oscale*phi;
	sevdwl += ev_pre * evdwl;
	if (eatom) {
	if (NEWTON_PAIR \|\| i < nlocal)
	fwtmp += (flt_t)0.5 * evdwl;
	if (NEWTON_PAIR \|\| j < nlocal)
	f[j].w += (flt_t)0.5 * evdwl;
	}
	}
	IP_PRE_ev_tally_nbor(vflag, ev_pre, fpair,
	delx, dely, delz);
	}
	} // if rsq
	} // for jj
	f[i].x += fxtmp;
	f[i].y += fytmp;
	f[i].z += fztmp;

	IP_PRE_ev_tally_atom(EVFLAG, EFLAG, vflag, f, fwtmp);
	} // for i

	if (vflag == 2) {
	#if defined(_OPENMP)
	#pragma omp barrier
	#endif
	IP_PRE_fdotr_acc_force(NEWTON_PAIR, EVFLAG, EFLAG, vflag, eatom, nall,
	nlocal, minlocal, nthreads, f_start, f_stride,
	x, offload);
	}

	} /// omp
	if (EVFLAG) {
	if (EFLAG) {
	ev_global[0] = oevdwl;
	ev_global[1] = (acc_t)0.0;
	}
	if (vflag) {
	ev_global[2] = ov0;
	ev_global[3] = ov1;
	ev_global[4] = ov2;
	ev_global[5] = ov3;
	ev_global[6] = ov4;
	ev_global[7] = ov5;
	}
	}
	#if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD)
	timer_compute = MIC_Wtime() - timer_compute;
	#endif
	}

	if (offload)
	fix->stop_watch(TIME_OFFLOAD_LATENCY);
	else
	fix->stop_watch(TIME_HOST_PAIR);

	if (EVFLAG)
	fix->add_result_array(f_start, ev_global, offload, eatom, 0, vflag);
	else
	fix->add_result_array(f_start, 0, offload);
	}

	/* ----------------------------------------------------------------------
	init specific to this pair style
	------------------------------------------------------------------------- */

	void PairEAMIntel::init_style()
	{
	PairEAM::init_style();
	neighbor->requests[neighbor->nrequest-1]->intel = 1;

	int ifix = modify->find_fix("package_intel");
	if (ifix < 0)
	error->all(FLERR,
	"The 'package intel' command is required for /intel styles");
	fix = static_cast<FixIntel *>(modify->fix[ifix]);

	fix->pair_init_check();
	#ifdef _LMP_INTEL_OFFLOAD
	if (fix->offload_balance() != 0.0)
	error->all(FLERR,
	"Offload for eam/intel is not yet available. Set balance to 0.");
	#endif

	if (fix->precision() == FixIntel::PREC_MODE_MIXED)
	pack_force_const(force_const_single, fix->get_mixed_buffers());
	else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE)
	pack_force_const(force_const_double, fix->get_double_buffers());
	else
	pack_force_const(force_const_single, fix->get_single_buffers());
	}

	/* ---------------------------------------------------------------------- */

	template <class flt_t, class acc_t>
	void PairEAMIntel::pack_force_const(ForceConst<flt_t> &fc,
	IntelBuffers<flt_t,acc_t> *buffers)
	{
	int tp1 = atom->ntypes + 1;
	fc.set_ntypes(tp1,nr,nrho,memory,_cop);
	buffers->set_ntypes(tp1);
	flt_t **cutneighsq = buffers->get_cutneighsq();

	// Repeat cutsq calculation because done after call to init_style
	double cut, cutneigh;
	for (int i = 1; i <= atom->ntypes; i++) {
	for (int j = i; j <= atom->ntypes; j++) {
	if (setflag[i][j] != 0 \|\| (setflag[i][i] != 0 && setflag[j][j] != 0)) {
	cut = init_one(i,j);
	cutneigh = cut + neighbor->skin;
	cutsq[i][j] = cutsq[j][i] = cut*cut;
	cutneighsq[i][j] = cutneighsq[j][i] = cutneigh * cutneigh;
	}
	}
	}

	_onetype=-1;
	double oldscale=-1;
	for (int i = 1; i < tp1; i++) {
	int ioff = i * fc.frho_stride();
	for (int k = 0; k < nrho + 1; k++) {
	fc.frho_spline_f[ioff + k].a = frho_spline[type2frho[i]][k][0];
	fc.frho_spline_f[ioff + k].b = frho_spline[type2frho[i]][k][1];
	fc.frho_spline_f[ioff + k].c = frho_spline[type2frho[i]][k][2];
	fc.frho_spline_e[ioff + k].a = frho_spline[type2frho[i]][k][3];
	fc.frho_spline_e[ioff + k].b = frho_spline[type2frho[i]][k][4];
	fc.frho_spline_e[ioff + k].c = frho_spline[type2frho[i]][k][5];
	fc.frho_spline_e[ioff + k].d = frho_spline[type2frho[i]][k][6];
	}
	ioff = i * fc.rhor_istride();
	for (int j = 1; j < tp1; j++) {
	fc.scale_f[i][j] = scale[i][j];
	if (type2rhor[i][j] >= 0) {
	const int joff = ioff + j * fc.rhor_jstride();
	for (int k = 0; k < nr + 1; k++) {
	if (type2rhor[j][i] != type2rhor[i][j])
	_onetype = 0;
	else if (_onetype < 0)
	_onetype = i;
	if (oldscale < 0)
	oldscale = scale[i][j];
	else
	if (oldscale != scale[i][j])
	_onetype = 0;
	fc.rhor_spline_f[joff + k].a=rhor_spline[type2rhor[j][i]][k][0];
	fc.rhor_spline_f[joff + k].b=rhor_spline[type2rhor[j][i]][k][1];
	fc.rhor_spline_f[joff + k].c=rhor_spline[type2rhor[j][i]][k][2];
	fc.rhor_spline_e[joff + k].a=rhor_spline[type2rhor[j][i]][k][3];
	fc.rhor_spline_e[joff + k].b=rhor_spline[type2rhor[j][i]][k][4];
	fc.rhor_spline_e[joff + k].c=rhor_spline[type2rhor[j][i]][k][5];
	fc.rhor_spline_e[joff + k].d=rhor_spline[type2rhor[j][i]][k][6];
	fc.z2r_spline_t[joff + k].a=z2r_spline[type2rhor[j][i]][k][0];
	fc.z2r_spline_t[joff + k].b=z2r_spline[type2rhor[j][i]][k][1];
	fc.z2r_spline_t[joff + k].c=z2r_spline[type2rhor[j][i]][k][2];
	fc.z2r_spline_t[joff + k].d=z2r_spline[type2rhor[j][i]][k][3];
	fc.z2r_spline_t[joff + k].e=z2r_spline[type2rhor[j][i]][k][4];
	fc.z2r_spline_t[joff + k].f=z2r_spline[type2rhor[j][i]][k][5];
	fc.z2r_spline_t[joff + k].g=z2r_spline[type2rhor[j][i]][k][6];
	}
	}
	}
	}
	if (_onetype < 0) _onetype = 0;
	}

	/* ---------------------------------------------------------------------- */

	template <class flt_t>
	void PairEAMIntel::ForceConst<flt_t>::set_ntypes(const int ntypes,
	const int nr, const int nrho,
	Memory *memory,
	const int cop) {
	if (ntypes != _ntypes \|\| nr > _nr \|\| nrho > _nrho) {
	if (_ntypes > 0) {
	_memory->destroy(rhor_spline_f);
	_memory->destroy(rhor_spline_e);
	_memory->destroy(frho_spline_f);
	_memory->destroy(frho_spline_e);
	_memory->destroy(z2r_spline_t);
	_memory->destroy(scale_f);
	}
	if (ntypes > 0) {
	_cop = cop;
	_nr = nr + 1;
	int edge = (_nr * sizeof(flt_t)) % INTEL_DATA_ALIGN;
	if (edge) _nr += (INTEL_DATA_ALIGN - edge) / sizeof(flt_t);
	memory->create(rhor_spline_f,ntypesntypes_nr,"fc.rhor_spline_f");
	memory->create(rhor_spline_e,ntypesntypes_nr,"fc.rhor_spline_e");
	memory->create(z2r_spline_t,ntypesntypes_nr,"fc.z2r_spline_t");
	_nrho = nrho + 1;
	edge = (_nrho * sizeof(flt_t)) % INTEL_DATA_ALIGN;
	if (edge) _nrho += (INTEL_DATA_ALIGN - edge) / sizeof(flt_t);
	memory->create(frho_spline_f,ntypes*_nrho,"fc.frho_spline_f");
	memory->create(frho_spline_e,ntypes*_nrho,"fc.frho_spline_e");
	memory->create(scale_f,ntypes,ntypes,"fc.scale_f");
	}
	}
	_ntypes = ntypes;
	_memory = memory;
	}

	/* ---------------------------------------------------------------------- */

	int PairEAMIntel::pack_forward_comm(int n, int list, double buf,
	int pbc_flag, int *pbc)
	{
	if (fix->precision() == FixIntel::PREC_MODE_DOUBLE)
	return pack_forward_comm(n, list, buf, fp);
	else
	return pack_forward_comm(n, list, buf, fp_float);
	}

	/* ---------------------------------------------------------------------- */

	void PairEAMIntel::unpack_forward_comm(int n, int first, double *buf)
	{
	if (fix->precision() == FixIntel::PREC_MODE_DOUBLE)
	unpack_forward_comm(n, first, buf, fp);
	else
	unpack_forward_comm(n, first, buf, fp_float);
	}

	/* ---------------------------------------------------------------------- */

	template<class flt_t>
	int PairEAMIntel::pack_forward_comm(int n, int list, double buf,
	flt_t *fp_f)
	{
	int i,j,m;

	m = 0;
	for (i = 0; i < n; i++) {
	j = list[i];
	buf[m++] = fp_f[j];
	}
	return m;
	}

	/* ---------------------------------------------------------------------- */

	template<class flt_t>
	void PairEAMIntel::unpack_forward_comm(int n, int first, double *buf,
	flt_t *fp_f)
	{
	int i,m,last;

	m = 0;
	last = first + n;
	for (i = first; i < last; i++) fp_f[i] = buf[m++];
	}

pair_eam_intel.cppNo OneTemporaryActions

File Metadata

pair_eam_intel.cppView Options

Event Timeline

pair_eam_intel.cpp
No OneTemporary
Actions

pair_eam_intel.cpp
View Options