pair_omp_gpu.cpp
No OneTemporary
Actions

Subscribers

None

File Metadata

Created: Thu, Jul 3, 01:58

pair_omp_gpu.cpp
View Options

	/* ----------------------------------------------------------------------
	LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
	http://lammps.sandia.gov, Sandia National Laboratories
	Steve Plimpton, sjplimp@sandia.gov

	Copyright (2003) Sandia Corporation. Under the terms of Contract
	DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
	certain rights in this software. This software is distributed under
	the GNU General Public License.

	See the README file in the top-level LAMMPS directory.
	------------------------------------------------------------------------- */

	/* ----------------------------------------------------------------------
	Contributing author: Axel Kohlmeyer (Temple U)
	Modified by Mike for use with GPU library
	------------------------------------------------------------------------- */

	#if defined(_OPENMP)

	#include "atom.h"
	#include "comm.h"
	#include "force.h"
	#include "pair_omp_gpu.h"
	#include "memory.h"

	using namespace LAMMPS_NS;

	/* ---------------------------------------------------------------------- */

	PairOMPGPU::PairOMPGPU(LAMMPS *lmp) : Pointers(lmp)
	{
	eng_vdwl_thr = NULL;
	eng_coul_thr = NULL;
	virial_thr = NULL;
	eatom_thr = NULL;
	vatom_thr = NULL;
	f_thr = NULL;
	}

	/* ---------------------------------------------------------------------- */

	PairOMPGPU::~PairOMPGPU()
	{
	free_mem();
	}

	/* ----------------------------------------------------------------------
	free any allocated memory
	------------------------------------------------------------------------- */

	void PairOMPGPU::free_mem() {
	memory->sfree(eng_vdwl_thr);
	memory->sfree(eng_coul_thr);
	memory->destroy(virial_thr);
	memory->destroy(eatom_thr);
	memory->destroy(vatom_thr);
	memory->destroy(f_thr);
	eng_vdwl_thr = NULL;
	eng_coul_thr = NULL;
	virial_thr = NULL;
	eatom_thr = NULL;
	vatom_thr = NULL;
	f_thr = NULL;
	_nmax = 0;
	}

	/* ----------------------------------------------------------------------
	init specific to this pair style
	------------------------------------------------------------------------- */

	void PairOMPGPU::init_style()
	{
	free_mem();

	#pragma omp parallel
	{
	int th_id = omp_get_thread_num();
	#pragma omp barrier
	if (th_id == 0)
	_nthreads = omp_get_num_threads();
	}

	// for hybrid OpenMP/MPI we need multiple copies
	// of some accumulators to avoid race conditions

	eng_vdwl_thr = (double )memory->smalloc(_nthreadssizeof(double),
	"pair:eng_vdwl_thr");
	eng_coul_thr = (double )memory->smalloc(_nthreadssizeof(double),
	"pair:eng_coul_thr");
	memory->create(virial_thr,_nthreads,6,"pair:virial_thr");

	maxeatom_thr = maxvatom_thr = 0;
	}

	/* ----------------------------------------------------------------------
	setup for energy, virial computation. additional code for multi-threading
	see integrate::ev_set() for values of eflag (0-3) and vflag (0-6)
	------------------------------------------------------------------------- */

	void PairOMPGPU::ev_setup_thr(int eflag, int vflag, int _eflag_either,
	int _eflag_global, int _eflag_atom,
	int _vflag_either, int _vflag_global,
	int _vflag_atom)
	{
	eflag_either=_eflag_either;
	eflag_global=_eflag_global;
	eflag_atom=_eflag_atom;
	vflag_either=_vflag_either;
	vflag_global=_vflag_global;
	vflag_atom=_vflag_atom;
	int i,n,t;

	// reallocate per-atom arrays if necessary
	if (eflag_atom && atom->nmax > maxeatom_thr) {
	maxeatom_thr = atom->nmax;
	memory->destroy(eatom_thr);
	memory->create(eatom_thr,_nthreads,maxeatom_thr,"pair:eatom_thr");
	}
	if (vflag_atom && atom->nmax > maxvatom_thr) {
	maxvatom_thr = atom->nmax;
	memory->destroy(vatom_thr);
	memory->create(vatom_thr,_nthreads,maxvatom_thr,6,"pair:vatom_thr");
	}

	// zero per thread accumulators
	// use force->newton instead of newton_pair
	// b/c some bonds/dihedrals call pair::ev_tally with pairwise info
	const int ntotal = (force->newton) ?
	(atom->nlocal + atom->nghost) : atom->nlocal;
	for (t = 0; t < _nthreads; ++t) {
	if (eflag_global) eng_vdwl_thr[t] = eng_coul_thr[t] = 0.0;
	if (vflag_global) for (i = 0; i < 6; ++i) virial_thr[t][i] = 0.0;
	if (eflag_atom) {
	for (i = 0; i < ntotal; ++i) eatom_thr[t][i] = 0.0;
	}
	if (vflag_atom) {
	for (i = 0; i < ntotal; ++i) {
	vatom_thr[t][i][0] = 0.0;
	vatom_thr[t][i][1] = 0.0;
	vatom_thr[t][i][2] = 0.0;
	vatom_thr[t][i][3] = 0.0;
	vatom_thr[t][i][4] = 0.0;
	vatom_thr[t][i][5] = 0.0;
	}
	}
	}
	}

	/* ----------------------------------------------------------------------
	tally eng_vdwl and virial into per thread global and per-atom accumulators
	need i < nlocal test since called by bond_quartic and dihedral_charmm
	------------------------------------------------------------------------- */

	void PairOMPGPU::ev_tally_thr(int i, int j, int nlocal, int newton_pair,
	double evdwl, double ecoul, double fpair,
	double delx, double dely, double delz, int tid)
	{
	double evdwlhalf,ecoulhalf,epairhalf,v[6];

	if (eflag_either) {
	if (eflag_global) {
	if (newton_pair) {
	eng_vdwl_thr[tid] += evdwl;
	eng_coul_thr[tid] += ecoul;
	} else {
	evdwlhalf = 0.5*evdwl;
	ecoulhalf = 0.5*ecoul;
	if (i < nlocal) {
	eng_vdwl_thr[tid] += evdwlhalf;
	eng_coul_thr[tid] += ecoulhalf;
	}
	if (j < nlocal) {
	eng_vdwl_thr[tid] += evdwlhalf;
	eng_coul_thr[tid] += ecoulhalf;
	}
	}
	}
	if (eflag_atom) {
	epairhalf = 0.5 * (evdwl + ecoul);
	if (newton_pair \|\| i < nlocal) eatom_thr[tid][i] += epairhalf;
	if (newton_pair \|\| j < nlocal) eatom_thr[tid][j] += epairhalf;
	}
	}

	if (vflag_either) {
	v[0] = delxdelxfpair;
	v[1] = delydelyfpair;
	v[2] = delzdelzfpair;
	v[3] = delxdelyfpair;
	v[4] = delxdelzfpair;
	v[5] = delydelzfpair;

	if (vflag_global) {
	if (newton_pair) {
	virial_thr[tid][0] += v[0];
	virial_thr[tid][1] += v[1];
	virial_thr[tid][2] += v[2];
	virial_thr[tid][3] += v[3];
	virial_thr[tid][4] += v[4];
	virial_thr[tid][5] += v[5];
	} else {
	if (i < nlocal) {
	virial_thr[tid][0] += 0.5*v[0];
	virial_thr[tid][1] += 0.5*v[1];
	virial_thr[tid][2] += 0.5*v[2];
	virial_thr[tid][3] += 0.5*v[3];
	virial_thr[tid][4] += 0.5*v[4];
	virial_thr[tid][5] += 0.5*v[5];
	}
	if (j < nlocal) {
	virial_thr[tid][0] += 0.5*v[0];
	virial_thr[tid][1] += 0.5*v[1];
	virial_thr[tid][2] += 0.5*v[2];
	virial_thr[tid][3] += 0.5*v[3];
	virial_thr[tid][4] += 0.5*v[4];
	virial_thr[tid][5] += 0.5*v[5];
	}
	}
	}

	if (vflag_atom) {
	if (newton_pair \|\| i < nlocal) {
	vatom_thr[tid][i][0] += 0.5*v[0];
	vatom_thr[tid][i][1] += 0.5*v[1];
	vatom_thr[tid][i][2] += 0.5*v[2];
	vatom_thr[tid][i][3] += 0.5*v[3];
	vatom_thr[tid][i][4] += 0.5*v[4];
	vatom_thr[tid][i][5] += 0.5*v[5];
	}
	if (newton_pair \|\| j < nlocal) {
	vatom_thr[tid][j][0] += 0.5*v[0];
	vatom_thr[tid][j][1] += 0.5*v[1];
	vatom_thr[tid][j][2] += 0.5*v[2];
	vatom_thr[tid][j][3] += 0.5*v[3];
	vatom_thr[tid][j][4] += 0.5*v[4];
	vatom_thr[tid][j][5] += 0.5*v[5];
	}
	}
	}
	}

	/* ----------------------------------------------------------------------
	tally eng_vdwl and virial into per thread global and per-atom accumulators
	need i < nlocal test since called by bond_quartic and dihedral_charmm
	------------------------------------------------------------------------- */

	void PairOMPGPU::ev_tally_full_thr(int i, double evdwl,
	double ecoul, double fpair, double delx,
	double dely, double delz, int tid)
	{
	double evdwlhalf,ecoulhalf,epairhalf,v[6];

	if (eflag_either) {
	if (eflag_global) {
	evdwlhalf = 0.5*evdwl;
	ecoulhalf = 0.5*ecoul;
	eng_vdwl_thr[tid] += evdwlhalf;
	eng_coul_thr[tid] += ecoulhalf;
	}
	if (eflag_atom) {
	epairhalf = 0.5 * (evdwl + ecoul);
	eatom_thr[tid][i] += epairhalf;
	}
	}

	if (vflag_either) {
	v[0] = delxdelxfpair;
	v[1] = delydelyfpair;
	v[2] = delzdelzfpair;
	v[3] = delxdelyfpair;
	v[4] = delxdelzfpair;
	v[5] = delydelzfpair;

	if (vflag_global) {
	virial_thr[tid][0] += 0.5*v[0];
	virial_thr[tid][1] += 0.5*v[1];
	virial_thr[tid][2] += 0.5*v[2];
	virial_thr[tid][3] += 0.5*v[3];
	virial_thr[tid][4] += 0.5*v[4];
	virial_thr[tid][5] += 0.5*v[5];
	}

	if (vflag_atom) {
	vatom_thr[tid][i][0] += 0.5*v[0];
	vatom_thr[tid][i][1] += 0.5*v[1];
	vatom_thr[tid][i][2] += 0.5*v[2];
	vatom_thr[tid][i][3] += 0.5*v[3];
	vatom_thr[tid][i][4] += 0.5*v[4];
	vatom_thr[tid][i][5] += 0.5*v[5];
	}
	}
	}

	/* ----------------------------------------------------------------------
	tally eng_vdwl and virial into global and per-atom accumulators
	for virial, have delx,dely,delz and fx,fy,fz
	------------------------------------------------------------------------- */

	void PairOMPGPU::ev_tally_xyz_thr(int i, int j, int nlocal, int newton_pair,
	double evdwl, double ecoul,
	double fx, double fy, double fz,
	double delx, double dely, double delz, int tid)
	{
	double evdwlhalf,ecoulhalf,epairhalf,v[6];

	if (eflag_either) {
	if (eflag_global) {
	if (newton_pair) {
	eng_vdwl_thr[tid] += evdwl;
	eng_coul_thr[tid] += ecoul;
	} else {
	evdwlhalf = 0.5*evdwl;
	ecoulhalf = 0.5*ecoul;
	if (i < nlocal) {
	eng_vdwl_thr[tid] += evdwlhalf;
	eng_coul_thr[tid] += ecoulhalf;
	}
	if (j < nlocal) {
	eng_vdwl_thr[tid] += evdwlhalf;
	eng_coul_thr[tid] += ecoulhalf;
	}
	}
	}
	if (eflag_atom) {
	epairhalf = 0.5 * (evdwl + ecoul);
	if (newton_pair \|\| i < nlocal) eatom_thr[tid][i] += epairhalf;
	if (newton_pair \|\| j < nlocal) eatom_thr[tid][j] += epairhalf;
	}
	}

	if (vflag_either) {
	v[0] = delx*fx;
	v[1] = dely*fy;
	v[2] = delz*fz;
	v[3] = delx*fy;
	v[4] = delx*fz;
	v[5] = dely*fz;

	if (vflag_global) {
	if (newton_pair) {
	virial_thr[tid][0] += v[0];
	virial_thr[tid][1] += v[1];
	virial_thr[tid][2] += v[2];
	virial_thr[tid][3] += v[3];
	virial_thr[tid][4] += v[4];
	virial_thr[tid][5] += v[5];
	} else {
	if (i < nlocal) {
	virial_thr[tid][0] += 0.5*v[0];
	virial_thr[tid][1] += 0.5*v[1];
	virial_thr[tid][2] += 0.5*v[2];
	virial_thr[tid][3] += 0.5*v[3];
	virial_thr[tid][4] += 0.5*v[4];
	virial_thr[tid][5] += 0.5*v[5];
	}
	if (j < nlocal) {
	virial_thr[tid][0] += 0.5*v[0];
	virial_thr[tid][1] += 0.5*v[1];
	virial_thr[tid][2] += 0.5*v[2];
	virial_thr[tid][3] += 0.5*v[3];
	virial_thr[tid][4] += 0.5*v[4];
	virial_thr[tid][5] += 0.5*v[5];
	}
	}
	}

	if (vflag_atom) {
	if (newton_pair \|\| i < nlocal) {
	vatom_thr[tid][i][0] += 0.5*v[0];
	vatom_thr[tid][i][1] += 0.5*v[1];
	vatom_thr[tid][i][2] += 0.5*v[2];
	vatom_thr[tid][i][3] += 0.5*v[3];
	vatom_thr[tid][i][4] += 0.5*v[4];
	vatom_thr[tid][i][5] += 0.5*v[5];
	}
	if (newton_pair \|\| j < nlocal) {
	vatom_thr[tid][j][0] += 0.5*v[0];
	vatom_thr[tid][j][1] += 0.5*v[1];
	vatom_thr[tid][j][2] += 0.5*v[2];
	vatom_thr[tid][j][3] += 0.5*v[3];
	vatom_thr[tid][j][4] += 0.5*v[4];
	vatom_thr[tid][j][5] += 0.5*v[5];
	}
	}
	}
	}

	/* ----------------------------------------------------------------------
	tally eng_vdwl and virial into global and per-atom accumulators
	called by SW potential, newton_pair is always on
	virial = riFi + rjFj + rkFk = (rj-ri) Fj + (rk-ri) Fk = drjifj + drkifk
	------------------------------------------------------------------------- */

	void PairOMPGPU::ev_tally3_thr(int i, int j, int k, double evdwl, double ecoul,
	double fj, double fk, double drji, double drki, int tid,
	double THIRD)
	{
	double epairthird,v[6];

	if (eflag_either) {
	if (eflag_global) {
	eng_vdwl_thr[tid] += evdwl;
	eng_coul_thr[tid] += ecoul;
	}
	if (eflag_atom) {
	epairthird = THIRD * (evdwl + ecoul);
	eatom_thr[tid][i] += epairthird;
	eatom_thr[tid][j] += epairthird;
	eatom_thr[tid][k] += epairthird;
	}
	}

	if (vflag_atom) {
	v[0] = THIRD * (drji[0]fj[0] + drki[0]fk[0]);
	v[1] = THIRD * (drji[1]fj[1] + drki[1]fk[1]);
	v[2] = THIRD * (drji[2]fj[2] + drki[2]fk[2]);
	v[3] = THIRD * (drji[0]fj[1] + drki[0]fk[1]);
	v[4] = THIRD * (drji[0]fj[2] + drki[0]fk[2]);
	v[5] = THIRD * (drji[1]fj[2] + drki[1]fk[2]);

	vatom_thr[tid][i][0] += v[0]; vatom_thr[tid][i][1] += v[1];
	vatom_thr[tid][i][2] += v[2]; vatom_thr[tid][i][3] += v[3];
	vatom_thr[tid][i][4] += v[4]; vatom_thr[tid][i][5] += v[5];
	vatom_thr[tid][j][0] += v[0]; vatom_thr[tid][j][1] += v[1];
	vatom_thr[tid][j][2] += v[2]; vatom_thr[tid][j][3] += v[3];
	vatom_thr[tid][j][4] += v[4]; vatom_thr[tid][j][5] += v[5];
	vatom_thr[tid][k][0] += v[0]; vatom_thr[tid][k][1] += v[1];
	vatom_thr[tid][k][2] += v[2]; vatom_thr[tid][k][3] += v[3];
	vatom_thr[tid][k][4] += v[4]; vatom_thr[tid][k][5] += v[5];
	}
	}

	/* ----------------------------------------------------------------------
	tally eng_vdwl and virial into global and per-atom accumulators
	called by AIREBO potential, newton_pair is always on
	------------------------------------------------------------------------- */

	void PairOMPGPU::ev_tally4_thr(int i, int j, int k, int m, double evdwl,
	double fi, double fj, double *fk,
	double drim, double drjm, double *drkm, int tid)
	{
	double epairfourth,v[6];

	if (eflag_either) {
	if (eflag_global) eng_vdwl_thr[tid] += evdwl;
	if (eflag_atom) {
	epairfourth = 0.25 * evdwl;
	eatom_thr[tid][i] += epairfourth;
	eatom_thr[tid][j] += epairfourth;
	eatom_thr[tid][k] += epairfourth;
	eatom_thr[tid][m] += epairfourth;
	}
	}

	if (vflag_atom) {
	v[0] = 0.25 * (drim[0]fi[0] + drjm[0]fj[0] + drkm[0]*fk[0]);
	v[1] = 0.25 * (drim[1]fi[1] + drjm[1]fj[1] + drkm[1]*fk[1]);
	v[2] = 0.25 * (drim[2]fi[2] + drjm[2]fj[2] + drkm[2]*fk[2]);
	v[3] = 0.25 * (drim[0]fi[1] + drjm[0]fj[1] + drkm[0]*fk[1]);
	v[4] = 0.25 * (drim[0]fi[2] + drjm[0]fj[2] + drkm[0]*fk[2]);
	v[5] = 0.25 * (drim[1]fi[2] + drjm[1]fj[2] + drkm[1]*fk[2]);

	vatom_thr[tid][i][0] += v[0]; vatom_thr[tid][i][1] += v[1];
	vatom_thr[tid][i][2] += v[2]; vatom_thr[tid][i][3] += v[3];
	vatom_thr[tid][i][4] += v[4]; vatom_thr[tid][i][5] += v[5];
	vatom_thr[tid][j][0] += v[0]; vatom_thr[tid][j][1] += v[1];
	vatom_thr[tid][j][2] += v[2]; vatom_thr[tid][j][3] += v[3];
	vatom_thr[tid][j][4] += v[4]; vatom_thr[tid][j][5] += v[5];
	vatom_thr[tid][k][0] += v[0]; vatom_thr[tid][k][1] += v[1];
	vatom_thr[tid][k][2] += v[2]; vatom_thr[tid][k][3] += v[3];
	vatom_thr[tid][k][4] += v[4]; vatom_thr[tid][k][5] += v[5];
	vatom_thr[tid][m][0] += v[0]; vatom_thr[tid][m][1] += v[1];
	vatom_thr[tid][m][2] += v[2]; vatom_thr[tid][m][3] += v[3];
	vatom_thr[tid][m][4] += v[4]; vatom_thr[tid][m][5] += v[5];
	}
	}

	/* ----------------------------------------------------------------------
	tally ecoul and virial into each of n atoms in list
	called by TIP4P potential, newton_pair is always on
	changes v values by dividing by n
	------------------------------------------------------------------------- */

	void PairOMPGPU::ev_tally_list_thr(int n, int list, double ecoul, double v, int tid)
	{
	int i,j;

	if (eflag_either) {
	if (eflag_global) eng_coul_thr[tid] += ecoul;
	if (eflag_atom) {
	double epairatom = ecoul/n;
	for (i = 0; i < n; i++) eatom_thr[tid][list[i]] += epairatom;
	}
	}

	if (vflag_either) {
	if (vflag_global) {
	virial_thr[tid][0] += v[0];
	virial_thr[tid][1] += v[1];
	virial_thr[tid][2] += v[2];
	virial_thr[tid][3] += v[3];
	virial_thr[tid][4] += v[4];
	virial_thr[tid][5] += v[5];
	}

	if (vflag_atom) {
	v[0] /= n;
	v[1] /= n;
	v[2] /= n;
	v[3] /= n;
	v[4] /= n;
	v[5] /= n;
	for (i = 0; i < n; i++) {
	j = list[i];
	vatom_thr[tid][j][0] += v[0];
	vatom_thr[tid][j][1] += v[1];
	vatom_thr[tid][j][2] += v[2];
	vatom_thr[tid][j][3] += v[3];
	vatom_thr[tid][j][4] += v[4];
	vatom_thr[tid][j][5] += v[5];
	}
	}
	}
	}

	/* ----------------------------------------------------------------------
	tally virial into per-atom accumulators
	called by AIREBO potential, newton_pair is always on
	fpair is magnitude of force on atom I
	------------------------------------------------------------------------- */

	void PairOMPGPU::v_tally2_thr(int i, int j, double fpair, double *drij, int tid)
	{
	double v[6];

	v[0] = 0.5 * drij[0]drij[0]fpair;
	v[1] = 0.5 * drij[1]drij[1]fpair;
	v[2] = 0.5 * drij[2]drij[2]fpair;
	v[3] = 0.5 * drij[0]drij[1]fpair;
	v[4] = 0.5 * drij[0]drij[2]fpair;
	v[5] = 0.5 * drij[1]drij[2]fpair;

	vatom_thr[tid][i][0] += v[0]; vatom_thr[tid][i][1] += v[1];
	vatom_thr[tid][i][2] += v[2]; vatom_thr[tid][i][3] += v[3];
	vatom_thr[tid][i][4] += v[4]; vatom_thr[tid][i][5] += v[5];
	vatom_thr[tid][j][0] += v[0]; vatom_thr[tid][j][1] += v[1];
	vatom_thr[tid][j][2] += v[2]; vatom_thr[tid][j][3] += v[3];
	vatom_thr[tid][j][4] += v[4]; vatom_thr[tid][j][5] += v[5];
	}

	/* ----------------------------------------------------------------------
	tally virial into per-atom accumulators
	called by AIREBO and Tersoff potential, newton_pair is always on
	------------------------------------------------------------------------- */

	void PairOMPGPU::v_tally3_thr(int i, int j, int k, double fi, double fj,
	double drik, double drjk, int tid,
	double THIRD)
	{
	double v[6];

	v[0] = THIRD * (drik[0]fi[0] + drjk[0]fj[0]);
	v[1] = THIRD * (drik[1]fi[1] + drjk[1]fj[1]);
	v[2] = THIRD * (drik[2]fi[2] + drjk[2]fj[2]);
	v[3] = THIRD * (drik[0]fi[1] + drjk[0]fj[1]);
	v[4] = THIRD * (drik[0]fi[2] + drjk[0]fj[2]);
	v[5] = THIRD * (drik[1]fi[2] + drjk[1]fj[2]);

	vatom_thr[tid][i][0] += v[0]; vatom_thr[tid][i][1] += v[1];
	vatom_thr[tid][i][2] += v[2]; vatom_thr[tid][i][3] += v[3];
	vatom_thr[tid][i][4] += v[4]; vatom_thr[tid][i][5] += v[5];
	vatom_thr[tid][j][0] += v[0]; vatom_thr[tid][j][1] += v[1];
	vatom_thr[tid][j][2] += v[2]; vatom_thr[tid][j][3] += v[3];
	vatom_thr[tid][j][4] += v[4]; vatom_thr[tid][j][5] += v[5];
	vatom_thr[tid][k][0] += v[0]; vatom_thr[tid][k][1] += v[1];
	vatom_thr[tid][k][2] += v[2]; vatom_thr[tid][k][3] += v[3];
	vatom_thr[tid][k][4] += v[4]; vatom_thr[tid][k][5] += v[5];
	}

	/* ----------------------------------------------------------------------
	tally virial into per-atom accumulators
	called by AIREBO potential, newton_pair is always on
	------------------------------------------------------------------------- */

	void PairOMPGPU::v_tally4_thr(int i, int j, int k, int m,
	double fi, double fj, double *fk,
	double drim, double drjm, double *drkm, int tid)
	{
	double v[6];

	v[0] = 0.25 * (drim[0]fi[0] + drjm[0]fj[0] + drkm[0]*fk[0]);
	v[1] = 0.25 * (drim[1]fi[1] + drjm[1]fj[1] + drkm[1]*fk[1]);
	v[2] = 0.25 * (drim[2]fi[2] + drjm[2]fj[2] + drkm[2]*fk[2]);
	v[3] = 0.25 * (drim[0]fi[1] + drjm[0]fj[1] + drkm[0]*fk[1]);
	v[4] = 0.25 * (drim[0]fi[2] + drjm[0]fj[2] + drkm[0]*fk[2]);
	v[5] = 0.25 * (drim[1]fi[2] + drjm[1]fj[2] + drkm[1]*fk[2]);

	vatom_thr[tid][i][0] += v[0]; vatom_thr[tid][i][1] += v[1];
	vatom_thr[tid][i][2] += v[2]; vatom_thr[tid][i][3] += v[3];
	vatom_thr[tid][i][4] += v[4]; vatom_thr[tid][i][5] += v[5];
	vatom_thr[tid][j][0] += v[0]; vatom_thr[tid][j][1] += v[1];
	vatom_thr[tid][j][2] += v[2]; vatom_thr[tid][j][3] += v[3];
	vatom_thr[tid][j][4] += v[4]; vatom_thr[tid][j][5] += v[5];
	vatom_thr[tid][k][0] += v[0]; vatom_thr[tid][k][1] += v[1];
	vatom_thr[tid][k][2] += v[2]; vatom_thr[tid][k][3] += v[3];
	vatom_thr[tid][k][4] += v[4]; vatom_thr[tid][k][5] += v[5];
	vatom_thr[tid][m][0] += v[0]; vatom_thr[tid][m][1] += v[1];
	vatom_thr[tid][m][2] += v[2]; vatom_thr[tid][m][3] += v[3];
	vatom_thr[tid][m][4] += v[4]; vatom_thr[tid][m][5] += v[5];
	}

	/* ----------------------------------------------------------------------
	reduce the per thread accumulated E/V data into the canonical accumulators.
	------------------------------------------------------------------------- */
	void PairOMPGPU::ev_reduce_thr(Pair &p)
	{
	const int ntotal = (force->newton) ?
	(atom->nlocal + atom->nghost) : atom->nlocal;

	for (int n = 0; n < _nthreads; ++n) {
	p.eng_vdwl += eng_vdwl_thr[n];
	p.eng_coul += eng_coul_thr[n];
	if (vflag_either) {
	p.virial[0] += virial_thr[n][0];
	p.virial[1] += virial_thr[n][1];
	p.virial[2] += virial_thr[n][2];
	p.virial[3] += virial_thr[n][3];
	p.virial[4] += virial_thr[n][4];
	p.virial[5] += virial_thr[n][5];
	if (vflag_atom) {
	for (int i = 0; i < ntotal; ++i) {
	p.vatom[i][0] += vatom_thr[n][i][0];
	p.vatom[i][1] += vatom_thr[n][i][1];
	p.vatom[i][2] += vatom_thr[n][i][2];
	p.vatom[i][3] += vatom_thr[n][i][3];
	p.vatom[i][4] += vatom_thr[n][i][4];
	p.vatom[i][5] += vatom_thr[n][i][5];
	}
	}
	}
	if (eflag_atom) {
	for (int i = 0; i < ntotal; ++i) {
	p.eatom[i] += eatom_thr[n][i];
	}
	}
	}
	}

	/* ---------------------------------------------------------------------- */

	double PairOMPGPU::memory_usage()
	{
	double bytes = 0.0;

	bytes += _nthreads * (2 + 7) * sizeof(double);
	bytes += _nthreads * maxeatom_thr * sizeof(double);
	bytes += _nthreads * maxvatom_thr * 6 * sizeof(double);

	if (f_thr != NULL)
	bytes += _nthreads * _nmax * sizeof(double);
	return bytes;
	}

	#endif

pair_omp_gpu.cppNo OneTemporaryActions

File Metadata

pair_omp_gpu.cppView Options

Event Timeline

pair_omp_gpu.cpp
No OneTemporary
Actions

pair_omp_gpu.cpp
View Options