lal_eam.cu
No OneTemporary
Actions

Subscribers

None

File Metadata

Created: Fri, Oct 11, 13:38

lal_eam.cu
View Options

	// **************************************************************************
	// lal_eam.cu
	// -------------------
	// Trung Dac Nguyen, W. Michael Brown (ORNL)
	//
	// Device code for acceleration of the eam pair style
	//
	// __________________________________________________________________________
	// This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
	// __________________________________________________________________________
	//
	// begin :
	// email : brownw@ornl.gov nguyentd@ornl.gov
	// ***************************************************************************/

	#ifdef NV_KERNEL
	#include "lal_aux_fun1.h"
	texture<float4> pos_tex;
	texture<float> fp_tex;
	#ifndef _DOUBLE_DOUBLE
	ucl_inline float4 fetch_pos(const int& i, const float4 *pos)
	{ return tex1Dfetch(pos_tex, i); }
	ucl_inline float fetch_q(const int& i, const float *fp)
	{ return tex1Dfetch(fp_tex, i); }
	#endif
	#endif

	#define MIN(A,B) ((A) < (B) ? (A) : (B))
	#define MAX(A,B) ((A) > (B) ? (A) : (B))

	#define store_answers_eam(f, energy, virial, ii, inum, tid, t_per_atom, \
	offset, elag, vflag, ans, engv) \
	if (t_per_atom>1) { \
	__local acctyp red_acc[6][BLOCK_PAIR]; \
	red_acc[0][tid]=f.x; \
	red_acc[1][tid]=f.y; \
	red_acc[2][tid]=f.z; \
	red_acc[3][tid]=energy; \
	for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \
	if (offset < s) { \
	for (int r=0; r<4; r++) \
	red_acc[r][tid] += red_acc[r][tid+s]; \
	} \
	} \
	f.x=red_acc[0][tid]; \
	f.y=red_acc[1][tid]; \
	f.z=red_acc[2][tid]; \
	energy=red_acc[3][tid]; \
	if (vflag>0) { \
	for (int r=0; r<6; r++) \
	red_acc[r][tid]=virial[r]; \
	for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \
	if (offset < s) { \
	for (int r=0; r<6; r++) \
	red_acc[r][tid] += red_acc[r][tid+s]; \
	} \
	} \
	for (int r=0; r<6; r++) \
	virial[r]=red_acc[r][tid]; \
	} \
	} \
	if (offset==0) { \
	engv+=ii; \
	if (eflag>0) { \
	*engv+=energy; \
	engv+=inum; \
	} \
	if (vflag>0) { \
	for (int i=0; i<6; i++) { \
	*engv=virial[i]; \
	engv+=inum; \
	} \
	} \
	ans[ii]=f; \
	}

	__kernel void kernel_energy(__global numtyp4 *x_,
	__global numtyp2 type2rhor_z2r, __global numtyp type2frho,
	__global numtyp4 rhor_spline2, __global numtyp4 frho_spline1,
	__global numtyp4 *frho_spline2,
	__global int dev_nbor, __global int dev_packed,
	__global numtyp *fp_,
	__global acctyp *engv, const int eflag,
	const int inum,
	const int nbor_pitch,
	const int ntypes, const numtyp cutforcesq,
	const numtyp rdr, const numtyp rdrho,
	const int nrho, const int nr,
	const int t_per_atom) {
	int tid, ii, offset;
	atom_info(t_per_atom,ii,tid,offset);

	acctyp rho = (acctyp)0;
	acctyp energy = (acctyp)0;

	if (ii<inum) {
	__global int nbor, list_end;
	int i, numj, n_stride;
	nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
	n_stride,list_end,nbor);

	numtyp4 ix=fetch_pos(i,x_); //x_[i];
	int itype=ix.w;

	for ( ; nbor<list_end; nbor+=n_stride) {
	int j=*nbor;
	j &= NEIGHMASK;

	numtyp4 jx=fetch_pos(j,x_); //x_[j];
	int jtype=jx.w;

	// Compute r12
	numtyp delx = ix.x-jx.x;
	numtyp dely = ix.y-jx.y;
	numtyp delz = ix.z-jx.z;
	numtyp rsq = delxdelx+delydely+delz*delz;

	if (rsq<cutforcesq) {
	numtyp p = ucl_sqrt(rsq)*rdr + (numtyp)1.0;
	int m=p;
	m = MIN(m,nr-1);
	p -= m;
	p = MIN(p,(numtyp)1.0);

	int mtype = jtype*ntypes+itype;
	int index = type2rhor_z2r[mtype].x*(nr+1)+m;
	numtyp4 coeff = rhor_spline2[index];
	rho += ((coeff.xp + coeff.y)p + coeff.z)*p + coeff.w;
	}
	} // for nbor

	// reduce to get the density at atom ii

	if (t_per_atom>1) {
	__local acctyp red_acc[BLOCK_PAIR];
	red_acc[tid]=rho;
	for (unsigned int s=t_per_atom/2; s>0; s>>=1) {
	if (offset < s)
	red_acc[tid] += red_acc[tid+s];
	}
	rho=red_acc[tid];
	}

	// calculate the embedded force for ii
	if (offset==0) {
	numtyp p = rho*rdrho + (numtyp)1.0;
	int m=p;
	m = MAX(1,MIN(m,nrho-1));
	p -= m;
	p = MIN(p,(numtyp)1.0);

	int index = type2frho[itype]*(nr+1)+m;
	numtyp4 coeff = frho_spline1[index];
	numtyp fp = (coeff.xp + coeff.y)p + coeff.z;

	fp_[ii]=fp;

	engv+=ii;
	if (eflag>0) {
	coeff = frho_spline2[index];
	energy = ((coeff.xp + coeff.y)p + coeff.z)*p + coeff.w;
	engv=(acctyp)2.0energy;
	}
	}
	} // if ii
	}


	__kernel void kernel_pair(__global numtyp4 x_, __global numtyp fp_,
	__global numtyp2 *type2rhor_z2r,
	__global numtyp4 rhor_spline1, __global numtyp z2r_spline,
	__global int dev_nbor, __global int dev_packed,
	__global acctyp4 ans, __global acctyp engv,
	const int eflag, const int vflag,
	const int inum, const int nbor_pitch,
	const int ntypes, const numtyp cutforcesq,
	const numtyp rdr, const int nr,
	const int t_per_atom) {
	int tid, ii, offset;
	atom_info(t_per_atom,ii,tid,offset);

	acctyp energy=(acctyp)0;
	acctyp4 f;
	f.x=(acctyp)0;
	f.y=(acctyp)0;
	f.z=(acctyp)0;
	acctyp virial[6];
	for (int i=0; i<6; i++)
	virial[i]=(acctyp)0;

	if (ii<inum) {
	__global int nbor, list_end;
	int i, numj, n_stride;
	nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
	n_stride,list_end,nbor);

	numtyp4 ix=fetch_pos(i,x_); //x_[i];
	numtyp ifp=fetch_q(i,fp_); //fp_[i];
	int itype=ix.w;

	for ( ; nbor<list_end; nbor+=n_stride) {
	int j=*nbor;
	j &= NEIGHMASK;

	numtyp4 jx=fetch_pos(j,x_); //x_[j];
	numtyp jfp=fetch_q(j,fp_); //fp_[j];
	int jtype=jx.w;

	// Compute r12
	numtyp delx = ix.x-jx.x;
	numtyp dely = ix.y-jx.y;
	numtyp delz = ix.z-jx.z;
	numtyp rsq = delxdelx+delydely+delz*delz;

	if (rsq<cutforcesq) {
	numtyp r = ucl_sqrt(rsq);
	numtyp p = r*rdr + (numtyp)1.0;
	int m=p;
	m = MIN(m,nr-1);
	p -= m;
	p = MIN(p,(numtyp)1.0);

	int mtype,index;
	numtyp coeff0,coeff1,coeff2,coeff3,coeff4,coeff5,coeff6;
	numtyp4 coeff;

	mtype = itype*ntypes+jtype;
	index = type2rhor_z2r[mtype].x*(nr+1)+m;
	coeff = rhor_spline1[index];
	numtyp rhoip = (coeff.xp + coeff.y)p + coeff.z;

	mtype = jtype*ntypes+itype;
	index = type2rhor_z2r[mtype].x*(nr+1)+m;
	coeff = rhor_spline1[index];
	numtyp rhojp = (coeff.xp + coeff.y)p + coeff.z;

	mtype = itype*ntypes+jtype;
	index = type2rhor_z2r[mtype].y(nr+1)8+m*8;
	coeff0 = z2r_spline[index+0];
	coeff1 = z2r_spline[index+1];
	coeff2 = z2r_spline[index+2];
	coeff3 = z2r_spline[index+3];
	coeff4 = z2r_spline[index+4];
	coeff5 = z2r_spline[index+5];
	coeff6 = z2r_spline[index+6];

	numtyp z2p = (coeff0p + coeff1)p + coeff2;
	numtyp z2 = ((coeff3p + coeff4)p + coeff5)*p + coeff6;

	numtyp recip = (numtyp)1.0/r;
	numtyp phi = z2*recip;
	numtyp phip = z2precip - phirecip;
	numtyp psip = ifprhojp + jfprhoip + phip;
	numtyp force = -psip*recip;

	f.x+=delx*force;
	f.y+=dely*force;
	f.z+=delz*force;

	if (eflag>0) {
	energy += phi;
	}
	if (vflag>0) {
	virial[0] += delxdelxforce;
	virial[1] += delydelyforce;
	virial[2] += delzdelzforce;
	virial[3] += delxdelyforce;
	virial[4] += delxdelzforce;
	virial[5] += delydelzforce;
	}
	}

	} // for nbor
	store_answers_eam(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag,
	ans,engv);
	} // if ii

	}

	__kernel void kernel_pair_fast(__global numtyp4 x_, __global numtyp fp_,
	__global numtyp2 *type2rhor_z2r,
	__global numtyp4 rhor_spline1, __global numtyp z2r_spline,
	__global int dev_nbor, __global int dev_packed,
	__global acctyp4 ans, __global acctyp engv,
	const int eflag, const int vflag, const int inum,
	const int nbor_pitch,
	const numtyp cutforcesq,
	const numtyp rdr, const int nr,
	const int t_per_atom) {
	int tid, ii, offset;
	atom_info(t_per_atom,ii,tid,offset);

	acctyp energy=(acctyp)0;
	acctyp4 f;
	f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
	acctyp virial[6];
	for (int i=0; i<6; i++)
	virial[i]=(acctyp)0;

	if (ii<inum) {
	__global int nbor, list_end;
	int i, numj, n_stride;
	nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
	n_stride,list_end,nbor);

	numtyp4 ix=fetch_pos(i,x_); //x_[i];
	numtyp ifp=fetch_q(i,fp_); //fp_[i];
	int iw=ix.w;
	int itype=fast_mul((int)MAX_SHARED_TYPES,iw);

	for ( ; nbor<list_end; nbor+=n_stride) {
	int j=*nbor;
	j &= NEIGHMASK;

	numtyp4 jx=fetch_pos(j,x_); //x_[j];
	numtyp jfp=fetch_q(j,fp_); //fp_[j];
	int jtype=jx.w;

	// Compute r12
	numtyp delx = ix.x-jx.x;
	numtyp dely = ix.y-jx.y;
	numtyp delz = ix.z-jx.z;
	numtyp rsq = delxdelx+delydely+delz*delz;

	if (rsq<cutforcesq) {
	numtyp r = ucl_sqrt(rsq);
	numtyp p = r*rdr + (numtyp)1.0;
	int m=p;
	m = MIN(m,nr-1);
	p -= m;
	p = MIN(p,(numtyp)1.0);

	numtyp coeff0,coeff1,coeff2,coeff3,coeff4,coeff5,coeff6;
	numtyp4 coeff;
	int mtype,index;

	mtype = itype+jx.w;
	index = type2rhor_z2r[mtype].x*(nr+1)+m;
	coeff = rhor_spline1[index];
	numtyp rhoip = (coeff.xp + coeff.y)p + coeff.z;

	mtype = jtype+ix.w;
	index = type2rhor_z2r[mtype].x*(nr+1)+m;
	coeff = rhor_spline1[index];
	numtyp rhojp = (coeff.xp + coeff.y)p + coeff.z;

	mtype = itype+jx.w;
	index = type2rhor_z2r[mtype].y(nr+1)8+m*8;
	coeff0 = z2r_spline[index+0];
	coeff1 = z2r_spline[index+1];
	coeff2 = z2r_spline[index+2];
	coeff3 = z2r_spline[index+3];
	coeff4 = z2r_spline[index+4];
	coeff5 = z2r_spline[index+5];
	coeff6 = z2r_spline[index+6];

	numtyp z2p = (coeff0p + coeff1)p + coeff2;
	numtyp z2 = ((coeff3p + coeff4)p + coeff5)*p + coeff6;

	numtyp recip = (numtyp)1.0/r;
	numtyp phi = z2*recip;
	numtyp phip = z2precip - phirecip;
	numtyp psip = ifprhojp + jfprhoip + phip;
	numtyp force = -psip*recip;

	f.x+=delx*force;
	f.y+=dely*force;
	f.z+=delz*force;

	if (eflag>0) {
	energy += phi;
	}
	if (vflag>0) {
	virial[0] += delxdelxforce;
	virial[1] += delydelyforce;
	virial[2] += delzdelzforce;
	virial[3] += delxdelyforce;
	virial[4] += delxdelzforce;
	virial[5] += delydelzforce;
	}
	}

	} // for nbor
	store_answers_eam(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag,
	ans,engv);
	} // if ii
	}

lal_eam.cuNo OneTemporaryActions

File Metadata

lal_eam.cuView Options

Event Timeline

lal_eam.cu
No OneTemporary
Actions

lal_eam.cu
View Options