Page MenuHomec4science

pair_gpu_ans.cpp
No OneTemporary

File Metadata

Created
Sun, Sep 1, 18:12

pair_gpu_ans.cpp

/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
------------------------------------------------------------------------- */
#include "pair_gpu_ans.h"
#define PairGPUAnsT PairGPUAns<numtyp,acctyp>
template <class numtyp, class acctyp>
PairGPUAnsT::PairGPUAns() : _allocated(false),_eflag(false),_vflag(false),
_inum(0),_ilist(NULL),_newton(false) {
}
template <class numtyp, class acctyp>
int PairGPUAnsT::bytes_per_atom() const {
int bytes=11*sizeof(acctyp);
if (_rot)
bytes+=4*sizeof(acctyp);
if (_charge)
bytes+=sizeof(acctyp);
return bytes;
}
template <class numtyp, class acctyp>
bool PairGPUAnsT::alloc(const int inum) {
_max_local=static_cast<int>(static_cast<double>(inum)*1.10);
bool success=true;
int ans_elements=4;
if (_rot)
ans_elements+=4;
// Ignore host/device transfers?
bool cpuview=false;
if (dev->device_type()==UCL_CPU)
cpuview=true;
// -------------------------- Host allocations
success=success &&(host_ans.alloc(ans_elements*_max_local,*dev)==UCL_SUCCESS);
success=success &&(host_engv.alloc(_ev_fields*_max_local,*dev)==UCL_SUCCESS);
// --------------------------- Device allocations
if (cpuview) {
dev_engv.view(host_engv);
dev_ans.view(host_ans);
} else {
success=success && (dev_engv.alloc(_ev_fields*_max_local,*dev,
UCL_WRITE_ONLY)==UCL_SUCCESS);
success=success && (dev_ans.alloc(ans_elements*_max_local,
*dev,UCL_WRITE_ONLY)==UCL_SUCCESS);
}
_gpu_bytes=dev_engv.row_bytes()+dev_ans.row_bytes();
_allocated=true;
return success;
}
template <class numtyp, class acctyp>
bool PairGPUAnsT::init(const int inum, const bool charge, const bool rot,
UCL_Device &devi) {
clear();
bool success=true;
_charge=charge;
_rot=rot;
_other=_charge || _rot;
dev=&devi;
_e_fields=1;
if (_charge)
_e_fields++;
_ev_fields=6+_e_fields;
// Initialize atom and nbor data
int ef_inum=inum;
if (ef_inum==0)
ef_inum=1000;
// Initialize timers for the selected device
time_answer.init(*dev);
time_answer.zero();
_time_cast=0.0;
_time_cpu_idle=0.0;
return success && alloc(ef_inum);
}
template <class numtyp, class acctyp>
bool PairGPUAnsT::add_fields(const bool charge, const bool rot) {
bool realloc=false;
if (charge && _charge==false) {
_charge=true;
_e_fields++;
_ev_fields++;
realloc=true;
}
if (rot && _rot==false) {
_rot=true;
realloc=true;
}
if (realloc) {
_other=_charge || _rot;
int inum=_max_local;
clear_resize();
return alloc(inum);
}
return true;
}
template <class numtyp, class acctyp>
void PairGPUAnsT::clear_resize() {
if (!_allocated)
return;
_allocated=false;
dev_ans.clear();
dev_engv.clear();
host_ans.clear();
host_engv.clear();
}
template <class numtyp, class acctyp>
void PairGPUAnsT::clear() {
_gpu_bytes=0;
if (!_allocated)
return;
time_answer.clear();
clear_resize();
_inum=0;
_ilist=NULL;
_eflag=false;
_vflag=false;
}
template <class numtyp, class acctyp>
double PairGPUAnsT::host_memory_usage() const {
int atom_bytes=4;
if (_charge)
atom_bytes+=1;
if (_rot)
atom_bytes+=4;
int ans_bytes=atom_bytes+_ev_fields;
return ans_bytes*(_max_local)*sizeof(acctyp)+
sizeof(PairGPUAns<numtyp,acctyp>);
}
template <class numtyp, class acctyp>
void PairGPUAnsT::copy_answers(const bool eflag, const bool vflag,
const bool ef_atom, const bool vf_atom) {
time_answer.start();
_eflag=eflag;
_vflag=vflag;
_ef_atom=ef_atom;
_vf_atom=vf_atom;
int csize=_ev_fields;
if (!eflag)
csize-=_e_fields;
if (!vflag)
csize-=6;
if (csize>0)
ucl_copy(host_engv,dev_engv,_inum*csize,true);
if (_rot)
ucl_copy(host_ans,dev_ans,_inum*4*2,true);
else
ucl_copy(host_ans,dev_ans,_inum*4,true);
time_answer.stop();
}
template <class numtyp, class acctyp>
void PairGPUAnsT::copy_answers(const bool eflag, const bool vflag,
const bool ef_atom, const bool vf_atom,
int *ilist) {
_ilist=ilist;
copy_answers(eflag,vflag,ef_atom,vf_atom);
}
template <class numtyp, class acctyp>
double PairGPUAnsT::energy_virial(double *eatom, double **vatom,
double *virial) {
if (_eflag==false && _vflag==false)
return 0.0;
double evdwl=0.0;
double virial_acc[6];
for (int i=0; i<6; i++) virial_acc[i]=0.0;
if (_ilist==NULL) {
for (int i=0; i<_inum; i++) {
acctyp *ap=host_engv.begin()+i;
if (_eflag) {
if (_ef_atom) {
evdwl+=*ap;
eatom[i]+=*ap*0.5;
ap+=_inum;
} else {
evdwl+=*ap;
ap+=_inum;
}
}
if (_vflag) {
if (_vf_atom) {
for (int j=0; j<6; j++) {
vatom[i][j]+=*ap*0.5;
virial_acc[j]+=*ap;
ap+=_inum;
}
} else {
for (int j=0; j<6; j++) {
virial_acc[j]+=*ap;
ap+=_inum;
}
}
}
}
for (int j=0; j<6; j++)
virial[j]+=virial_acc[j]*0.5;
} else {
for (int i=0; i<_inum; i++) {
acctyp *ap=host_engv.begin()+i;
int ii=_ilist[i];
if (_eflag) {
if (_ef_atom) {
evdwl+=*ap;
eatom[ii]+=*ap*0.5;
ap+=_inum;
} else {
evdwl+=*ap;
ap+=_inum;
}
}
if (_vflag) {
if (_vf_atom) {
for (int j=0; j<6; j++) {
vatom[ii][j]+=*ap*0.5;
virial_acc[j]+=*ap;
ap+=_inum;
}
} else {
for (int j=0; j<6; j++) {
virial_acc[j]+=*ap;
ap+=_inum;
}
}
}
}
for (int j=0; j<6; j++)
virial[j]+=virial_acc[j]*0.5;
}
evdwl*=0.5;
return evdwl;
}
template <class numtyp, class acctyp>
double PairGPUAnsT::energy_virial(double *eatom, double **vatom,
double *virial, double &ecoul) {
if (_eflag==false && _vflag==false)
return 0.0;
if (_charge==false)
return energy_virial(eatom,vatom,virial);
double evdwl=0.0;
double _ecoul=0.0;
double virial_acc[6];
for (int i=0; i<6; i++) virial_acc[i]=0.0;
if (_ilist==NULL) {
for (int i=0; i<_inum; i++) {
acctyp *ap=host_engv.begin()+i;
if (_eflag) {
if (_ef_atom) {
evdwl+=*ap;
eatom[i]+=*ap*0.5;
ap+=_inum;
_ecoul+=*ap;
eatom[i]+=*ap*0.5;
ap+=_inum;
} else {
evdwl+=*ap;
ap+=_inum;
_ecoul+=*ap;
ap+=_inum;
}
}
if (_vflag) {
if (_vf_atom) {
for (int j=0; j<6; j++) {
vatom[i][j]+=*ap*0.5;
virial_acc[j]+=*ap;
ap+=_inum;
}
} else {
for (int j=0; j<6; j++) {
virial_acc[j]+=*ap;
ap+=_inum;
}
}
}
}
for (int j=0; j<6; j++)
virial[j]+=virial_acc[j]*0.5;
} else {
for (int i=0; i<_inum; i++) {
acctyp *ap=host_engv.begin()+i;
int ii=_ilist[i];
if (_eflag) {
if (_ef_atom) {
evdwl+=*ap;
eatom[ii]+=*ap*0.5;
ap+=_inum;
_ecoul+=*ap;
eatom[ii]+=*ap*0.5;
ap+=_inum;
} else {
evdwl+=*ap;
ap+=_inum;
_ecoul+=*ap;
ap+=_inum;
}
}
if (_vflag) {
if (_vf_atom) {
for (int j=0; j<6; j++) {
vatom[ii][j]+=*ap*0.5;
virial_acc[j]+=*ap;
ap+=_inum;
}
} else {
for (int j=0; j<6; j++) {
virial_acc[j]+=*ap;
ap+=_inum;
}
}
}
}
for (int j=0; j<6; j++)
virial[j]+=virial_acc[j]*0.5;
}
evdwl*=0.5;
ecoul+=_ecoul*0.5;
return evdwl;
}
template <class numtyp, class acctyp>
void PairGPUAnsT::get_answers(double **f, double **tor) {
acctyp *ap=host_ans.begin();
if (_ilist==NULL) {
for (int i=0; i<_inum; i++) {
f[i][0]+=*ap;
ap++;
f[i][1]+=*ap;
ap++;
f[i][2]+=*ap;
ap+=2;
}
if (_rot) {
for (int i=0; i<_inum; i++) {
tor[i][0]+=*ap;
ap++;
tor[i][1]+=*ap;
ap++;
tor[i][2]+=*ap;
ap+=2;
}
}
} else {
for (int i=0; i<_inum; i++) {
int ii=_ilist[i];
f[ii][0]+=*ap;
ap++;
f[ii][1]+=*ap;
ap++;
f[ii][2]+=*ap;
ap+=2;
}
if (_rot) {
for (int i=0; i<_inum; i++) {
int ii=_ilist[i];
tor[ii][0]+=*ap;
ap++;
tor[ii][1]+=*ap;
ap++;
tor[ii][2]+=*ap;
ap+=2;
}
}
}
}
template class PairGPUAns<PRECISION,ACC_PRECISION>;

Event Timeline