Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F91447114
intel_buffers.cpp
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Mon, Nov 11, 05:17
Size
16 KB
Mime Type
text/x-c++
Expires
Wed, Nov 13, 05:17 (1 d, 23 h)
Engine
blob
Format
Raw Data
Handle
22264457
Attached To
rLAMMPS lammps
intel_buffers.cpp
View Options
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
This software is distributed under the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: W. Michael Brown (Intel)
------------------------------------------------------------------------- */
#include "intel_buffers.h"
#include "force.h"
#include "memory.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
template <class flt_t, class acc_t>
IntelBuffers<flt_t, acc_t>::IntelBuffers(class LAMMPS *lmp_in) :
lmp(lmp_in), _x(0), _q(0), _quat(0), _f(0), _off_threads(0),
_buf_size(0), _buf_local_size(0) {
_list_alloc_atoms = 0;
_ntypes = 0;
_off_map_maxlocal = 0;
_ccachex = 0;
_host_nmax = 0;
#ifdef _LMP_INTEL_OFFLOAD
_separate_buffers = 0;
_off_f = 0;
_off_map_ilist = 0;
_off_map_nmax = 0;
_off_map_maxhead = 0;
_off_list_alloc = false;
_off_threads = 0;
_off_ccache = 0;
#endif
}
/* ---------------------------------------------------------------------- */
template <class flt_t, class acc_t>
IntelBuffers<flt_t, acc_t>::~IntelBuffers()
{
free_buffers();
free_all_nbor_buffers();
free_ccache();
set_ntypes(0);
}
/* ---------------------------------------------------------------------- */
template <class flt_t, class acc_t>
void IntelBuffers<flt_t, acc_t>::free_buffers()
{
if (_buf_size > 0) {
atom_t * x = get_x();
flt_t * q = get_q();
quat_t * quat = get_quat();
#ifdef _LMP_INTEL_OFFLOAD
vec3_acc_t * f_start = get_off_f();
if (f_start != 0) {
acc_t * ev_global = get_ev_global();
if (ev_global != 0) {
#pragma offload_transfer target(mic:_cop) \
nocopy(x:alloc_if(0) free_if(1)) \
nocopy(f_start:alloc_if(0) free_if(1)) \
nocopy(ev_global:alloc_if(0) free_if(1))
}
if (q != 0) {
#pragma offload_transfer target (mic:_cop) \
nocopy(q:alloc_if(0) free_if(1))
}
if (quat != 0) {
#pragma offload_transfer target (mic:_cop) \
nocopy(quat:alloc_if(0) free_if(1))
}
lmp->memory->destroy(f_start);
}
if (_separate_buffers) {
lmp->memory->destroy(_host_x);
if (q != 0) lmp->memory->destroy(_host_q);
if (quat != 0) lmp->memory->destroy(_host_quat);
}
#endif
lmp->memory->destroy(x);
if (q != 0) lmp->memory->destroy(q);
if (quat != 0) lmp->memory->destroy(quat);
lmp->memory->destroy(_f);
_buf_size = _buf_local_size = 0;
}
}
/* ---------------------------------------------------------------------- */
template <class flt_t, class acc_t>
void IntelBuffers<flt_t, acc_t>::_grow(const int nall, const int nlocal,
const int nthreads,
const int offload_end)
{
free_buffers();
_buf_size = static_cast<double>(nall) * 1.1 + 1;
if (lmp->force->newton_pair)
_buf_local_size = _buf_size;
else
_buf_local_size = static_cast<double>(nlocal) * 1.1 + 1;
if (lmp->atom->torque)
_buf_local_size *= 2;
const int f_stride = get_stride(_buf_local_size);
lmp->memory->create(_x, _buf_size,"intel_x");
if (lmp->atom->q != NULL)
lmp->memory->create(_q, _buf_size, "intel_q");
if (lmp->atom->ellipsoid != NULL)
lmp->memory->create(_quat, _buf_size, "intel_quat");
lmp->memory->create(_f, f_stride * nthreads, "intel_f");
#ifdef _LMP_INTEL_OFFLOAD
if (_separate_buffers) {
lmp->memory->create(_host_x, _buf_size,"intel_host_x");
if (lmp->atom->q != NULL)
lmp->memory->create(_host_q, _buf_size, "intel_host_q");
if (lmp->atom->ellipsoid != NULL)
lmp->memory->create(_host_quat, _buf_size, "intel_host_quat");
}
if (offload_end > 0) {
lmp->memory->create(_off_f, f_stride * _off_threads, "intel_off_f");
const atom_t * const x = get_x();
const flt_t * const q = get_q();
const vec3_acc_t * f_start = get_off_f();
acc_t * ev_global = get_ev_global();
if (lmp->atom->q != NULL) {
if (x != NULL && q != NULL && f_start != NULL && ev_global != NULL) {
#pragma offload_transfer target(mic:_cop) \
nocopy(x,q:length(_buf_size) alloc_if(1) free_if(0)) \
nocopy(f_start:length(f_stride*_off_threads) alloc_if(1) free_if(0))\
nocopy(ev_global:length(8) alloc_if(1) free_if(0))
}
} else {
if (x != NULL && f_start != NULL && ev_global != NULL) {
#pragma offload_transfer target(mic:_cop) \
nocopy(x:length(_buf_size) alloc_if(1) free_if(0)) \
nocopy(f_start:length(f_stride*_off_threads) alloc_if(1) free_if(0))\
nocopy(ev_global:length(8) alloc_if(1) free_if(0))
}
}
if (lmp->atom->ellipsoid != NULL) {
const quat_t * const quat = get_quat();
if (quat != NULL) {
#pragma offload_transfer target(mic:_cop) \
nocopy(quat:length(_buf_size) alloc_if(1) free_if(0))
}
}
}
#endif
}
/* ---------------------------------------------------------------------- */
template <class flt_t, class acc_t>
void IntelBuffers<flt_t, acc_t>::free_nmax()
{
#ifdef _LMP_INTEL_OFFLOAD
if (_off_map_nmax > 0) {
const int * tag = _off_map_tag;
const int * special = _off_map_special;
const int * nspecial = _off_map_nspecial;
const int * bins = _off_map_bins;
const int * binpacked = _binpacked;
if (tag != 0 && special != 0 && nspecial !=0 && bins != 0) {
#pragma offload_transfer target(mic:_cop) \
nocopy(tag:alloc_if(0) free_if(1)) \
nocopy(special,nspecial:alloc_if(0) free_if(1)) \
nocopy(bins,binpacked:alloc_if(0) free_if(1))
}
_off_map_nmax = 0;
}
#endif
if (_host_nmax > 0) {
lmp->memory->destroy(_binpacked);
_host_nmax = 0;
}
}
/* ---------------------------------------------------------------------- */
template <class flt_t, class acc_t>
void IntelBuffers<flt_t, acc_t>::_grow_nmax(const int offload_end)
{
free_nmax();
int size = lmp->atom->nmax;
_host_nmax = size;
lmp->memory->create(_binpacked, _host_nmax, "_binpacked");
#ifdef _LMP_INTEL_OFFLOAD
if (!offload_end) return;
int *special, *nspecial;
int tag_length, special_length, nspecial_length;
if (lmp->atom->molecular) {
special = lmp->atom->special[0];
nspecial = lmp->atom->nspecial[0];
special_length = size * lmp->atom->maxspecial;
nspecial_length = size * 3;
} else {
special = &_special_holder;
nspecial = &_nspecial_holder;
special_length = 1;
nspecial_length = 1;
}
if (_need_tag)
tag_length = size;
else
tag_length = 1;
int *tag = lmp->atom->tag;
int *bins = lmp->neighbor->bins;
int * binpacked = _binpacked;
#pragma offload_transfer target(mic:_cop) \
nocopy(bins,binpacked:length(size) alloc_if(1) free_if(0)) \
nocopy(tag:length(tag_length) alloc_if(1) free_if(0)) \
nocopy(special:length(special_length) alloc_if(1) free_if(0)) \
nocopy(nspecial:length(nspecial_length) alloc_if(1) free_if(0))
_off_map_tag = tag;
_off_map_special = special;
_off_map_nspecial = nspecial;
_off_map_nmax = size;
_off_map_bins = bins;
#endif
}
/* ---------------------------------------------------------------------- */
template <class flt_t, class acc_t>
void IntelBuffers<flt_t, acc_t>::free_local()
{
if (_off_map_maxlocal > 0) {
int * cnumneigh = _cnumneigh;
int * atombin = _atombin;
#ifdef _LMP_INTEL_OFFLOAD
if (_off_map_ilist != NULL) {
const int * ilist = _off_map_ilist;
const int * numneigh = _off_map_numneigh;
_off_map_ilist = NULL;
if (numneigh != 0 && ilist != 0) {
#pragma offload_transfer target(mic:_cop) \
nocopy(ilist,numneigh,cnumneigh,atombin:alloc_if(0) free_if(1))
}
}
#endif
lmp->memory->destroy(cnumneigh);
lmp->memory->destroy(atombin);
_off_map_maxlocal = 0;
}
}
/* ---------------------------------------------------------------------- */
template <class flt_t, class acc_t>
void IntelBuffers<flt_t, acc_t>::_grow_local(NeighList *list,
const int offload_end)
{
free_local();
int size = list->get_maxlocal();
lmp->memory->create(_cnumneigh, size, "_cnumneigh");
lmp->memory->create(_atombin, size, "_atombin");
_off_map_maxlocal = size;
#ifdef _LMP_INTEL_OFFLOAD
if (offload_end > 0) {
int * numneigh = list->numneigh;
int * ilist = list->ilist;
int * cnumneigh = _cnumneigh;
int * atombin = _atombin;
if (cnumneigh != 0 && atombin != 0) {
#pragma offload_transfer target(mic:_cop) \
nocopy(ilist:length(size) alloc_if(1) free_if(0)) \
nocopy(numneigh:length(size) alloc_if(1) free_if(0)) \
nocopy(cnumneigh:length(size) alloc_if(1) free_if(0)) \
nocopy(atombin:length(size) alloc_if(1) free_if(0))
}
_off_map_ilist = ilist;
_off_map_numneigh = numneigh;
}
#endif
}
/* ---------------------------------------------------------------------- */
template <class flt_t, class acc_t>
void IntelBuffers<flt_t, acc_t>::free_binhead()
{
#ifdef _LMP_INTEL_OFFLOAD
if (_off_map_maxhead > 0) {
const int * binhead = _off_map_binhead;
if (binhead !=0) {
#pragma offload_transfer target(mic:_cop) \
nocopy(binhead:alloc_if(0) free_if(1))
}
_off_map_maxhead = 0;
}
#endif
}
/* ---------------------------------------------------------------------- */
template <class flt_t, class acc_t>
void IntelBuffers<flt_t, acc_t>::_grow_binhead()
{
#ifdef _LMP_INTEL_OFFLOAD
free_binhead();
int * binhead = lmp->neighbor->binhead;
const int maxhead = lmp->neighbor->maxhead;
#pragma offload_transfer target(mic:_cop) \
nocopy(binhead:length(maxhead+1) alloc_if(1) free_if(0))
_off_map_binhead = binhead;
_off_map_maxhead = maxhead;
#endif
}
/* ---------------------------------------------------------------------- */
template <class flt_t, class acc_t>
void IntelBuffers<flt_t, acc_t>::free_nbor_list()
{
if (_list_alloc_atoms > 0) {
#ifdef _LMP_INTEL_OFFLOAD
if (_off_list_alloc) {
int * list_alloc = _list_alloc;
int * stencil = _off_map_stencil;
if (list_alloc != 0 && stencil != 0) {
#pragma offload_transfer target(mic:_cop) \
nocopy(list_alloc:alloc_if(0) free_if(1))
}
_off_list_alloc = false;
}
#endif
lmp->memory->destroy(_list_alloc);
_list_alloc_atoms = 0;
}
}
/* ---------------------------------------------------------------------- */
template <class flt_t, class acc_t>
void IntelBuffers<flt_t, acc_t>::_grow_nbor_list(NeighList *list,
const int nlocal,
const int nthreads,
const int offload_end,
const int pack_width)
{
free_nbor_list();
_list_alloc_atoms = 1.10 * nlocal;
int nt = MAX(nthreads, _off_threads);
int list_alloc_size = (_list_alloc_atoms + nt * 2 + pack_width - 1) *
get_max_nbors();
lmp->memory->create(_list_alloc, list_alloc_size, "_list_alloc");
#ifdef _LMP_INTEL_OFFLOAD
if (offload_end > 0) {
int * list_alloc =_list_alloc;
int * stencil = list->stencil;
if (list_alloc != NULL) {
#pragma offload_transfer target(mic:_cop) \
in(stencil:length(list->maxstencil) alloc_if(1) free_if(0)) \
nocopy(list_alloc:length(list_alloc_size) alloc_if(1) free_if(0))
_off_map_stencil = stencil;
_off_list_alloc = true;
}
}
#endif
}
template <class flt_t, class acc_t>
void IntelBuffers<flt_t, acc_t>::_grow_stencil(NeighList *list)
{
#ifdef _LMP_INTEL_OFFLOAD
int * stencil = _off_map_stencil;
#pragma offload_transfer target(mic:_cop) \
nocopy(stencil:alloc_if(0) free_if(1))
stencil = list->stencil;
#pragma offload_transfer target(mic:_cop) \
in(stencil:length(list->maxstencil) alloc_if(1) free_if(0))
_off_map_stencil = stencil;
#endif
}
/* ---------------------------------------------------------------------- */
template <class flt_t, class acc_t>
void IntelBuffers<flt_t, acc_t>::free_ccache()
{
if (_ccachex) {
flt_t *ccachex = _ccachex;
flt_t *ccachey = _ccachey;
flt_t *ccachez = _ccachez;
flt_t *ccachew = _ccachew;
int *ccachei = _ccachei;
int *ccachej = _ccachej;
#ifdef LMP_USE_AVXCD
acc_t *ccachef = _ccachef;
#endif
#ifdef _LMP_INTEL_OFFLOAD
if (_off_ccache) {
#pragma offload_transfer target(mic:_cop) \
nocopy(ccachex,ccachey,ccachez,ccachew:alloc_if(0) free_if(1)) \
nocopy(ccachei,ccachej:alloc_if(0) free_if(1))
#ifdef LMP_USE_AVXCD
#pragma offload_transfer target(mic:_cop) \
nocopy(ccachef:alloc_if(0) free_if(1))
#endif
}
_off_ccache = 0;
#endif
lmp->memory->destroy(ccachex);
lmp->memory->destroy(ccachey);
lmp->memory->destroy(ccachez);
lmp->memory->destroy(ccachew);
lmp->memory->destroy(ccachei);
lmp->memory->destroy(ccachej);
#ifdef LMP_USE_AVXCD
lmp->memory->destroy(ccachef);
#endif
_ccachex = 0;
}
}
/* ---------------------------------------------------------------------- */
template <class flt_t, class acc_t>
void IntelBuffers<flt_t, acc_t>::grow_ccache(const int off_flag,
const int nthreads,
const int width)
{
#ifdef _LMP_INTEL_OFFLOAD
if (_ccachex && off_flag && _off_ccache == 0)
free_ccache();
#endif
if (_ccachex)
return;
const int nsize = get_max_nbors() * width;
int esize = MIN(sizeof(int), sizeof(flt_t));
IP_PRE_get_stride(_ccache_stride, nsize, esize, 0);
int nt = MAX(nthreads, _off_threads);
const int vsize = _ccache_stride * nt;
lmp->memory->create(_ccachex, vsize , "_ccachex");
lmp->memory->create(_ccachey, vsize, "_ccachey");
lmp->memory->create(_ccachez, vsize, "_ccachez");
lmp->memory->create(_ccachew, vsize, "_ccachew");
lmp->memory->create(_ccachei, vsize, "_ccachei");
lmp->memory->create(_ccachej, vsize, "_ccachej");
#ifdef LMP_USE_AVXCD
IP_PRE_get_stride(_ccache_stride3, nsize * 3, sizeof(acc_t), 0);
lmp->memory->create(_ccachef, _ccache_stride3 * nt, "_ccachef");
#endif
memset(_ccachej, 0, vsize * sizeof(int));
#ifdef _LMP_INTEL_OFFLOAD
if (off_flag) {
flt_t *ccachex = _ccachex;
flt_t *ccachey = _ccachey;
flt_t *ccachez = _ccachez;
flt_t *ccachew = _ccachew;
int *ccachei = _ccachei;
int *ccachej = _ccachej;
if (ccachex != NULL && ccachey !=NULL && ccachez != NULL &&
ccachew != NULL && ccachei != NULL && ccachej !=NULL) {
#pragma offload_transfer target(mic:_cop) \
nocopy(ccachex,ccachey:length(vsize) alloc_if(1) free_if(0)) \
nocopy(ccachez,ccachew:length(vsize) alloc_if(1) free_if(0)) \
nocopy(ccachei:length(vsize) alloc_if(1) free_if(0)) \
in(ccachej:length(vsize) alloc_if(1) free_if(0))
}
#ifdef LMP_USE_AVXCD
if (ccachef != NULL) {
#pragma offload_transfer target(mic:_cop) \
nocopy(ccachef:length(_ccache_stride3 * nt) alloc_if(1) free_if(0))
}
#endif
_off_ccache = 1;
}
#endif
}
/* ---------------------------------------------------------------------- */
template <class flt_t, class acc_t>
void IntelBuffers<flt_t, acc_t>::set_ntypes(const int ntypes)
{
if (ntypes != _ntypes) {
if (_ntypes > 0) {
#ifdef _LMP_INTEL_OFFLOAD
flt_t * cutneighsqo = _cutneighsq[0];
if (_off_threads > 0 && cutneighsqo != 0) {
#pragma offload_transfer target(mic:_cop) \
nocopy(cutneighsqo:alloc_if(0) free_if(1))
}
#endif
lmp->memory->destroy(_cutneighsq);
}
if (ntypes > 0) {
lmp->memory->create(_cutneighsq, ntypes, ntypes, "_cutneighsq");
#ifdef _LMP_INTEL_OFFLOAD
flt_t * cutneighsqo = _cutneighsq[0];
if (_off_threads > 0 && cutneighsqo != NULL) {
#pragma offload_transfer target(mic:_cop) \
nocopy(cutneighsqo:length(ntypes * ntypes) alloc_if(1) free_if(0))
}
#endif
}
_ntypes = ntypes;
}
}
/* ---------------------------------------------------------------------- */
template <class flt_t, class acc_t>
double IntelBuffers<flt_t, acc_t>::memory_usage(const int nthreads)
{
double tmem = sizeof(atom_t);
if (lmp->atom->q) tmem += sizeof(flt_t);
if (lmp->atom->torque) tmem += sizeof(quat_t);
#ifdef _LMP_INTEL_OFFLOAD
if (_separate_buffers) tmem *= 2;
#endif
tmem *= _buf_size;
const int fstride = get_stride(_buf_local_size);
tmem += fstride * nthreads * sizeof(vec3_acc_t);
#ifdef _LMP_INTEL_OFFLOAD
if (_off_f) tmem += fstride*_off_threads * sizeof(vec3_acc_t);
#endif
tmem += _off_map_maxlocal * sizeof(int) * 2;
tmem += (_list_alloc_atoms + _off_threads) * get_max_nbors() * sizeof(int);
tmem += _ntypes * _ntypes * sizeof(int);
return tmem;
}
/* ---------------------------------------------------------------------- */
template class IntelBuffers<float,float>;
template class IntelBuffers<float,double>;
template class IntelBuffers<double,double>;
Event Timeline
Log In to Comment