diff --git a/src/KOKKOS/nbin_kokkos.cpp b/src/KOKKOS/nbin_kokkos.cpp index c7e815928..95ea105ad 100644 --- a/src/KOKKOS/nbin_kokkos.cpp +++ b/src/KOKKOS/nbin_kokkos.cpp @@ -1,141 +1,154 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ #include "nbin_kokkos.h" #include "neighbor.h" #include "atom_kokkos.h" #include "group.h" #include "domain.h" #include "comm.h" #include "update.h" #include "error.h" #include "atom_masks.h" using namespace LAMMPS_NS; enum{NSQ,BIN,MULTI}; // also in Neighbor #define SMALL 1.0e-6 #define CUT2BIN_RATIO 100 /* ---------------------------------------------------------------------- */ template NBinKokkos::NBinKokkos(LAMMPS *lmp) : NBinStandard(lmp) { atoms_per_bin = 16; d_resize = typename AT::t_int_scalar("NeighborKokkosFunctor::resize"); #ifndef KOKKOS_USE_CUDA_UVM h_resize = Kokkos::create_mirror_view(d_resize); #else h_resize = d_resize; #endif h_resize() = 1; } /* ---------------------------------------------------------------------- setup neighbor binning geometry bin numbering in each dimension is global: 0 = 0.0 to binsize, 1 = binsize to 2*binsize, etc nbin-1,nbin,etc = bbox-binsize to bbox, bbox to bbox+binsize, etc -1,-2,etc = -binsize to 0.0, -2*binsize to -binsize, etc code will work for any binsize since next(xyz) and stencil extend as far as necessary binsize = 1/2 of cutoff is roughly optimal for orthogonal boxes: a dim must be filled exactly by integer # of bins in periodic, procs on both sides of PBC must see same bin boundary in non-periodic, coord2bin() still assumes this by use of nbin xyz for triclinic boxes: tilted simulation box cannot contain integer # of bins stencil & neigh list built differently to account for this mbinlo = lowest global bin any of my ghost atoms could fall into mbinhi = highest global bin any of my ghost atoms could fall into mbin = number of bins I need in a dimension ------------------------------------------------------------------------- */ template void NBinKokkos::bin_atoms_setup(int nall) { if (mbins > k_bins.d_view.dimension_0()) { k_bins = DAT::tdual_int_2d("Neighbor::d_bins",mbins,atoms_per_bin); bins = k_bins.view(); k_bincount = DAT::tdual_int_1d("Neighbor::d_bincount",mbins); bincount = k_bincount.view(); } + if (nall > k_atom2bin.d_view.dimension_0()) { + k_atom2bin = DAT::tdual_int_1d("Neighbor::d_atom2bin",nall); + atom2bin = k_atom2bin.view(); + } } /* ---------------------------------------------------------------------- bin owned and ghost atoms ------------------------------------------------------------------------- */ template void NBinKokkos::bin_atoms() { last_bin = update->ntimestep; + k_bins.template sync(); + k_bincount.template sync(); + k_atom2bin.template sync(); + h_resize() = 1; while(h_resize() > 0) { h_resize() = 0; deep_copy(d_resize, h_resize); MemsetZeroFunctor f_zero; f_zero.ptr = (void*) k_bincount.view().ptr_on_device(); Kokkos::parallel_for(mbins, f_zero); atomKK->sync(ExecutionSpaceFromDevice::space,X_MASK); x = atomKK->k_x.view(); bboxlo_[0] = bboxlo[0]; bboxlo_[1] = bboxlo[1]; bboxlo_[2] = bboxlo[2]; bboxhi_[0] = bboxhi[0]; bboxhi_[1] = bboxhi[1]; bboxhi_[2] = bboxhi[2]; NPairKokkosBinAtomsFunctor f(*this); Kokkos::parallel_for(atom->nlocal+atom->nghost, f); deep_copy(h_resize, d_resize); if(h_resize()) { atoms_per_bin += 16; k_bins = DAT::tdual_int_2d("bins", mbins, atoms_per_bin); bins = k_bins.view(); c_bins = bins; } } + + k_bins.template modify(); + k_bincount.template modify(); + k_atom2bin.template modify(); } /* ---------------------------------------------------------------------- */ template KOKKOS_INLINE_FUNCTION void NBinKokkos::binatomsItem(const int &i) const { const int ibin = coord2bin(x(i, 0), x(i, 1), x(i, 2)); + atom2bin(i) = ibin; const int ac = Kokkos::atomic_fetch_add(&bincount[ibin], (int)1); if(ac < bins.dimension_1()) { bins(ibin, ac) = i; } else { d_resize() = 1; } } namespace LAMMPS_NS { template class NBinKokkos; #ifdef KOKKOS_HAVE_CUDA template class NBinKokkos; #endif } diff --git a/src/KOKKOS/nbin_kokkos.h b/src/KOKKOS/nbin_kokkos.h index de3cf41d1..bf2ccc590 100644 --- a/src/KOKKOS/nbin_kokkos.h +++ b/src/KOKKOS/nbin_kokkos.h @@ -1,153 +1,155 @@ /* -*- c++ -*- ---------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ #ifdef NBIN_CLASS NBinStyle(kk/host, NBinKokkos, NB_KOKKOS_HOST) NBinStyle(kk/device, NBinKokkos, NB_KOKKOS_DEVICE) #else #ifndef LMP_NBIN_KOKKOS_H #define LMP_NBIN_KOKKOS_H #include "nbin_standard.h" #include "kokkos_type.h" namespace LAMMPS_NS { template class NBinKokkos : public NBinStandard { public: typedef ArrayTypes AT; NBinKokkos(class LAMMPS *); ~NBinKokkos() {} void bin_atoms_setup(int); void bin_atoms(); int atoms_per_bin; DAT::tdual_int_1d k_bincount; DAT::tdual_int_2d k_bins; + DAT::tdual_int_1d k_atom2bin; typename AT::t_int_1d bincount; const typename AT::t_int_1d_const c_bincount; typename AT::t_int_2d bins; typename AT::t_int_2d_const c_bins; + typename AT::t_int_1d atom2bin; typename AT::t_int_scalar d_resize; typename ArrayTypes::t_int_scalar h_resize; typename AT::t_x_array_randomread x; KOKKOS_INLINE_FUNCTION void binatomsItem(const int &i) const; KOKKOS_INLINE_FUNCTION int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z) const { int ix,iy,iz; if (x >= bboxhi_[0]) ix = static_cast ((x-bboxhi_[0])*bininvx) + nbinx; else if (x >= bboxlo_[0]) { ix = static_cast ((x-bboxlo_[0])*bininvx); ix = MIN(ix,nbinx-1); } else ix = static_cast ((x-bboxlo_[0])*bininvx) - 1; if (y >= bboxhi_[1]) iy = static_cast ((y-bboxhi_[1])*bininvy) + nbiny; else if (y >= bboxlo_[1]) { iy = static_cast ((y-bboxlo_[1])*bininvy); iy = MIN(iy,nbiny-1); } else iy = static_cast ((y-bboxlo_[1])*bininvy) - 1; if (z >= bboxhi_[2]) iz = static_cast ((z-bboxhi_[2])*bininvz) + nbinz; else if (z >= bboxlo_[2]) { iz = static_cast ((z-bboxlo_[2])*bininvz); iz = MIN(iz,nbinz-1); } else iz = static_cast ((z-bboxlo_[2])*bininvz) - 1; return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo); } KOKKOS_INLINE_FUNCTION int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z, int* i) const { int ix,iy,iz; if (x >= bboxhi_[0]) ix = static_cast ((x-bboxhi_[0])*bininvx) + nbinx; else if (x >= bboxlo_[0]) { ix = static_cast ((x-bboxlo_[0])*bininvx); ix = MIN(ix,nbinx-1); } else ix = static_cast ((x-bboxlo_[0])*bininvx) - 1; if (y >= bboxhi_[1]) iy = static_cast ((y-bboxhi_[1])*bininvy) + nbiny; else if (y >= bboxlo_[1]) { iy = static_cast ((y-bboxlo_[1])*bininvy); iy = MIN(iy,nbiny-1); } else iy = static_cast ((y-bboxlo_[1])*bininvy) - 1; if (z >= bboxhi_[2]) iz = static_cast ((z-bboxhi_[2])*bininvz) + nbinz; else if (z >= bboxlo_[2]) { iz = static_cast ((z-bboxlo_[2])*bininvz); iz = MIN(iz,nbinz-1); } else iz = static_cast ((z-bboxlo_[2])*bininvz) - 1; i[0] = ix - mbinxlo; i[1] = iy - mbinylo; i[2] = iz - mbinzlo; return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo); } private: double bboxlo_[3],bboxhi_[3]; }; template struct NPairKokkosBinAtomsFunctor { typedef DeviceType device_type; const NBinKokkos c; NPairKokkosBinAtomsFunctor(const NBinKokkos &_c): c(_c) {}; ~NPairKokkosBinAtomsFunctor() {} KOKKOS_INLINE_FUNCTION void operator() (const int & i) const { c.binatomsItem(i); } }; } #endif #endif /* ERROR/WARNING messages: */ diff --git a/src/KOKKOS/npair_kokkos.cpp b/src/KOKKOS/npair_kokkos.cpp index b568bd5c9..fd89f5ef6 100644 --- a/src/KOKKOS/npair_kokkos.cpp +++ b/src/KOKKOS/npair_kokkos.cpp @@ -1,787 +1,790 @@ /* -*- c++ -*- ---------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ #include "npair_kokkos.h" #include "atom_kokkos.h" #include "atom_masks.h" #include "domain_kokkos.h" #include "neighbor_kokkos.h" #include "nbin_kokkos.h" #include "nstencil.h" #include "force.h" namespace LAMMPS_NS { /* ---------------------------------------------------------------------- */ template NPairKokkos::NPairKokkos(LAMMPS *lmp) : NPair(lmp) { } /* ---------------------------------------------------------------------- copy needed info from Neighbor class to this build class ------------------------------------------------------------------------- */ template void NPairKokkos::copy_neighbor_info() { NPair::copy_neighbor_info(); NeighborKokkos* neighborKK = (NeighborKokkos*) neighbor; // general params newton_pair = force->newton_pair; k_cutneighsq = neighborKK->k_cutneighsq; // exclusion info k_ex1_type = neighborKK->k_ex1_type; k_ex2_type = neighborKK->k_ex2_type; k_ex_type = neighborKK->k_ex_type; k_ex1_group = neighborKK->k_ex1_group; k_ex2_group = neighborKK->k_ex2_group; k_ex1_bit = neighborKK->k_ex1_bit; k_ex2_bit = neighborKK->k_ex2_bit; k_ex_mol_group = neighborKK->k_ex_mol_group; k_ex_mol_bit = neighborKK->k_ex_mol_bit; k_ex_mol_intra = neighborKK->k_ex_mol_intra; } /* ---------------------------------------------------------------------- copy per-atom and per-bin vectors from NBin class to this build class ------------------------------------------------------------------------- */ template void NPairKokkos::copy_bin_info() { NPair::copy_bin_info(); NBinKokkos* nbKK = (NBinKokkos*) nb; atoms_per_bin = nbKK->atoms_per_bin; k_bincount = nbKK->k_bincount; k_bins = nbKK->k_bins; + k_atom2bin = nbKK->k_atom2bin; } /* ---------------------------------------------------------------------- copy needed info from NStencil class to this build class ------------------------------------------------------------------------- */ template void NPairKokkos::copy_stencil_info() { NPair::copy_stencil_info(); nstencil = ns->nstencil; int maxstencil = ns->get_maxstencil(); k_stencil = DAT::tdual_int_1d("neighlist:stencil",maxstencil); for (int k = 0; k < maxstencil; k++) k_stencil.h_view(k) = ns->stencil[k]; k_stencil.modify(); k_stencil.sync(); if (GHOST) { k_stencilxyz = DAT::tdual_int_1d_3("neighlist:stencilxyz",maxstencil); for (int k = 0; k < maxstencil; k++) { k_stencilxyz.h_view(k,0) = ns->stencilxyz[k][0]; k_stencilxyz.h_view(k,1) = ns->stencilxyz[k][1]; k_stencilxyz.h_view(k,2) = ns->stencilxyz[k][2]; } k_stencilxyz.modify(); k_stencilxyz.sync(); } } /* ---------------------------------------------------------------------- */ template void NPairKokkos::build(NeighList *list_) { NeighListKokkos* list = (NeighListKokkos*) list_; const int nlocal = includegroup?atom->nfirst:atom->nlocal; int nall = nlocal; if (GHOST) nall += atom->nghost; list->grow(nall); NeighborKokkosExecute data(*list, k_cutneighsq.view(), k_bincount.view(), k_bins.view(), + k_atom2bin.view(), nstencil, k_stencil.view(), k_stencilxyz.view(), nlocal, atomKK->k_x.view(), atomKK->k_type.view(), atomKK->k_mask.view(), atomKK->k_molecule.view(), atomKK->k_tag.view(), atomKK->k_special.view(), atomKK->k_nspecial.view(), atomKK->molecular, nbinx,nbiny,nbinz,mbinx,mbiny,mbinz,mbinxlo,mbinylo,mbinzlo, bininvx,bininvy,bininvz, exclude, nex_type, k_ex1_type.view(), k_ex2_type.view(), k_ex_type.view(), nex_group, k_ex1_group.view(), k_ex2_group.view(), k_ex1_bit.view(), k_ex2_bit.view(), nex_mol, k_ex_mol_group.view(), k_ex_mol_bit.view(), k_ex_mol_intra.view(), bboxhi,bboxlo, domain->xperiodic,domain->yperiodic,domain->zperiodic, domain->xprd_half,domain->yprd_half,domain->zprd_half); k_cutneighsq.sync(); k_ex1_type.sync(); k_ex2_type.sync(); k_ex_type.sync(); k_ex1_group.sync(); k_ex2_group.sync(); k_ex1_bit.sync(); k_ex2_bit.sync(); k_ex_mol_group.sync(); k_ex_mol_bit.sync(); k_ex_mol_intra.sync(); - k_bincount.sync(), - k_bins.sync(), + k_bincount.sync(); + k_bins.sync(); + k_atom2bin.sync(); atomKK->sync(Device,X_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK|TAG_MASK|SPECIAL_MASK); data.special_flag[0] = special_flag[0]; data.special_flag[1] = special_flag[1]; data.special_flag[2] = special_flag[2]; data.special_flag[3] = special_flag[3]; data.h_resize()=1; while(data.h_resize()) { data.h_new_maxneighs() = list->maxneighs; data.h_resize() = 0; Kokkos::deep_copy(data.resize, data.h_resize); Kokkos::deep_copy(data.new_maxneighs, data.h_new_maxneighs); #ifdef KOKKOS_HAVE_CUDA #define BINS_PER_BLOCK 2 const int factor = atoms_per_bin<64?2:1; Kokkos::TeamPolicy config((mbins+factor-1)/factor,atoms_per_bin*factor); #else const int factor = 1; #endif if (GHOST) { NPairKokkosBuildFunctorGhost f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor); Kokkos::parallel_for(nall, f); } else { if (newton_pair) { NPairKokkosBuildFunctor f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor); #ifdef KOKKOS_HAVE_CUDA if (ExecutionSpaceFromDevice::space == Device) Kokkos::parallel_for(config, f); else Kokkos::parallel_for(nall, f); #else Kokkos::parallel_for(nall, f); #endif } else { NPairKokkosBuildFunctor f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor); #ifdef KOKKOS_HAVE_CUDA if (ExecutionSpaceFromDevice::space == Device) Kokkos::parallel_for(config, f); else Kokkos::parallel_for(nall, f); #else Kokkos::parallel_for(nall, f); #endif } } deep_copy(data.h_resize, data.resize); if(data.h_resize()) { deep_copy(data.h_new_maxneighs, data.new_maxneighs); list->maxneighs = data.h_new_maxneighs() * 1.2; list->d_neighbors = typename ArrayTypes::t_neighbors_2d("neighbors", list->d_neighbors.dimension_0(), list->maxneighs); data.neigh_list.d_neighbors = list->d_neighbors; data.neigh_list.maxneighs = list->maxneighs; } } if (GHOST) { list->inum = atom->nlocal; list->gnum = nall - atom->nlocal; } else { list->inum = nall; list->gnum = 0; } list->k_ilist.template modify(); } /* ---------------------------------------------------------------------- */ template KOKKOS_INLINE_FUNCTION int NeighborKokkosExecute::find_special(const int &i, const int &j) const { const int n1 = nspecial(i,0); const int n2 = nspecial(i,1); const int n3 = nspecial(i,2); for (int k = 0; k < n3; k++) { if (special(i,k) == tag(j)) { if (k < n1) { if (special_flag[1] == 0) return -1; else if (special_flag[1] == 1) return 0; else return 1; } else if (k < n2) { if (special_flag[2] == 0) return -1; else if (special_flag[2] == 1) return 0; else return 2; } else { if (special_flag[3] == 0) return -1; else if (special_flag[3] == 1) return 0; else return 3; } } } return 0; }; /* ---------------------------------------------------------------------- */ template KOKKOS_INLINE_FUNCTION int NeighborKokkosExecute::exclusion(const int &i,const int &j, const int &itype,const int &jtype) const { int m; if (nex_type && ex_type(itype,jtype)) return 1; if (nex_group) { for (m = 0; m < nex_group; m++) { if (mask(i) & ex1_bit(m) && mask(j) & ex2_bit(m)) return 1; if (mask(i) & ex2_bit(m) && mask(j) & ex1_bit(m)) return 1; } } if (nex_mol) { for (m = 0; m < nex_mol; m++) if (ex_mol_intra[m]) { // intra-chain: exclude i-j pair if on same molecule if (mask[i] & ex_mol_bit[m] && mask[j] & ex_mol_bit[m] && molecule[i] == molecule[j]) return 1; } else // exclude i-j pair if on different molecules if (mask[i] & ex_mol_bit[m] && mask[j] & ex_mol_bit[m] && molecule[i] != molecule[j]) return 1; } return 0; } /* ---------------------------------------------------------------------- */ template template void NeighborKokkosExecute:: build_Item(const int &i) const { /* if necessary, goto next page and add pages */ int n = 0; int which = 0; int moltemplate; if (molecular == 2) moltemplate = 1; else moltemplate = 0; // get subview of neighbors of i const AtomNeighbors neighbors_i = neigh_list.get_neighbors(i); const X_FLOAT xtmp = x(i, 0); const X_FLOAT ytmp = x(i, 1); const X_FLOAT ztmp = x(i, 2); const int itype = type(i); - const int ibin = coord2bin(xtmp, ytmp, ztmp); + const int ibin = c_atom2bin(i); const typename ArrayTypes::t_int_1d_const_um stencil = d_stencil; // loop over all bins in neighborhood (includes ibin) if(HalfNeigh) for(int m = 0; m < c_bincount(ibin); m++) { const int j = c_bins(ibin,m); const int jtype = type(j); //for same bin as atom i skip j if i==j and skip atoms "below and to the left" if using HalfNeighborlists if((j == i) || (HalfNeigh && !Newton && (j < i)) || (HalfNeigh && Newton && ((j < i) || ((j >= nlocal) && ((x(j, 2) < ztmp) || (x(j, 2) == ztmp && x(j, 1) < ytmp) || (x(j, 2) == ztmp && x(j, 1) == ytmp && x(j, 0) < xtmp))))) ) continue; if(exclude && exclusion(i,j,itype,jtype)) continue; const X_FLOAT delx = xtmp - x(j, 0); const X_FLOAT dely = ytmp - x(j, 1); const X_FLOAT delz = ztmp - x(j, 2); const X_FLOAT rsq = delx * delx + dely * dely + delz * delz; if(rsq <= cutneighsq(itype,jtype)) { if (molecular) { if (!moltemplate) which = find_special(i,j); /* else if (imol >= 0) */ /* which = find_special(onemols[imol]->special[iatom], */ /* onemols[imol]->nspecial[iatom], */ /* tag[j]-tagprev); */ /* else which = 0; */ if (which == 0){ if(n 0) { if(n::t_int_1d_const_um =Kokkos::subview(bins,jbin,ALL); for(int m = 0; m < c_bincount(jbin); m++) { const int j = c_bins(jbin,m); const int jtype = type(j); if(HalfNeigh && !Newton && (j < i)) continue; if(!HalfNeigh && j==i) continue; if(Tri) { if (x(j,2) < ztmp) continue; if (x(j,2) == ztmp) { if (x(j,1) < ytmp) continue; if (x(j,1) == ytmp) { if (x(j,0) < xtmp) continue; if (x(j,0) == xtmp && j <= i) continue; } } } if(exclude && exclusion(i,j,itype,jtype)) continue; const X_FLOAT delx = xtmp - x(j, 0); const X_FLOAT dely = ytmp - x(j, 1); const X_FLOAT delz = ztmp - x(j, 2); const X_FLOAT rsq = delx * delx + dely * dely + delz * delz; if(rsq <= cutneighsq(itype,jtype)) { if (molecular) { if (!moltemplate) which = NeighborKokkosExecute::find_special(i,j); /* else if (imol >= 0) */ /* which = find_special(onemols[imol]->special[iatom], */ /* onemols[imol]->nspecial[iatom], */ /* tag[j]-tagprev); */ /* else which = 0; */ if (which == 0){ if(n 0) { if(n neigh_list.maxneighs) { resize() = 1; - if(n > new_maxneighs()) Kokkos::atomic_fetch_max(&new_maxneighs(),n); + if(n > new_maxneighs()) new_maxneighs() = n; // avoid atomics, safe because in while loop } neigh_list.d_ilist(i) = i; } /* ---------------------------------------------------------------------- */ #ifdef KOKKOS_HAVE_CUDA extern __shared__ X_FLOAT sharedmem[]; /* ---------------------------------------------------------------------- */ template template __device__ inline void NeighborKokkosExecute::build_ItemCuda(typename Kokkos::TeamPolicy::member_type dev) const { /* loop over atoms in i's bin, */ const int atoms_per_bin = c_bins.dimension_1(); const int BINS_PER_TEAM = dev.team_size()/atoms_per_bin<1?1:dev.team_size()/atoms_per_bin; const int TEAMS_PER_BIN = atoms_per_bin/dev.team_size()<1?1:atoms_per_bin/dev.team_size(); const int MY_BIN = dev.team_rank()/atoms_per_bin; const int ibin = dev.league_rank()*BINS_PER_TEAM+MY_BIN; if(ibin >=c_bincount.dimension_0()) return; X_FLOAT* other_x = sharedmem; other_x = other_x + 5*atoms_per_bin*MY_BIN; int* other_id = (int*) &other_x[4 * atoms_per_bin]; int bincount_current = c_bincount[ibin]; for(int kk = 0; kk < TEAMS_PER_BIN; kk++) { const int MY_II = dev.team_rank()%atoms_per_bin+kk*dev.team_size(); const int i = MY_II < bincount_current ? c_bins(ibin, MY_II) : -1; /* if necessary, goto next page and add pages */ int n = 0; X_FLOAT xtmp; X_FLOAT ytmp; X_FLOAT ztmp; int itype; const AtomNeighbors neighbors_i = neigh_list.get_neighbors((i>=0&&i= 0) { xtmp = x(i, 0); ytmp = x(i, 1); ztmp = x(i, 2); itype = type(i); other_x[MY_II] = xtmp; other_x[MY_II + atoms_per_bin] = ytmp; other_x[MY_II + 2 * atoms_per_bin] = ztmp; other_x[MY_II + 3 * atoms_per_bin] = itype; } other_id[MY_II] = i; int test = (__syncthreads_count(i >= 0 && i <= nlocal) == 0); if(test) return; if(i >= 0 && i < nlocal) { #pragma unroll 4 for(int m = 0; m < bincount_current; m++) { int j = other_id[m]; const int jtype = other_x[m + 3 * atoms_per_bin]; //for same bin as atom i skip j if i==j and skip atoms "below and to the left" if using halfneighborlists if((j == i) || (HalfNeigh && !Newton && (j < i)) || (HalfNeigh && Newton && ((j < i) || ((j >= nlocal) && ((x(j, 2) < ztmp) || (x(j, 2) == ztmp && x(j, 1) < ytmp) || (x(j, 2) == ztmp && x(j, 1) == ytmp && x(j, 0) < xtmp))))) ) continue; if(Tri) { if (x(j,2) < ztmp) continue; if (x(j,2) == ztmp) { if (x(j,1) < ytmp) continue; if (x(j,1) == ytmp) { if (x(j,0) < xtmp) continue; if (x(j,0) == xtmp && j <= i) continue; } } } if(exclude && exclusion(i,j,itype,jtype)) continue; const X_FLOAT delx = xtmp - other_x[m]; const X_FLOAT dely = ytmp - other_x[m + atoms_per_bin]; const X_FLOAT delz = ztmp - other_x[m + 2 * atoms_per_bin]; const X_FLOAT rsq = delx * delx + dely * dely + delz * delz; if(rsq <= cutneighsq(itype,jtype)) { if (molecular) { int which = 0; if (!moltemplate) which = NeighborKokkosExecute::find_special(i,j); /* else if (imol >= 0) */ /* which = find_special(onemols[imol]->special[iatom], */ /* onemols[imol]->nspecial[iatom], */ /* tag[j]-tagprev); */ /* else which = 0; */ if (which == 0){ if(n 0) { if(n::t_int_1d_const_um stencil = d_stencil; for(int k = 0; k < nstencil; k++) { const int jbin = ibin + stencil[k]; if(ibin == jbin) continue; bincount_current = c_bincount[jbin]; int j = MY_II < bincount_current ? c_bins(jbin, MY_II) : -1; if(j >= 0) { other_x[MY_II] = x(j, 0); other_x[MY_II + atoms_per_bin] = x(j, 1); other_x[MY_II + 2 * atoms_per_bin] = x(j, 2); other_x[MY_II + 3 * atoms_per_bin] = type(j); } other_id[MY_II] = j; __syncthreads(); if(i >= 0 && i < nlocal) { #pragma unroll 8 for(int m = 0; m < bincount_current; m++) { const int j = other_id[m]; const int jtype = other_x[m + 3 * atoms_per_bin]; //if(HalfNeigh && (j < i)) continue; if(HalfNeigh && !Newton && (j < i)) continue; if(!HalfNeigh && j==i) continue; if(Tri) { if (x(j,2) < ztmp) continue; if (x(j,2) == ztmp) { if (x(j,1) < ytmp) continue; if (x(j,1) == ytmp) { if (x(j,0) < xtmp) continue; if (x(j,0) == xtmp && j <= i) continue; } } } if(exclude && exclusion(i,j,itype,jtype)) continue; const X_FLOAT delx = xtmp - other_x[m]; const X_FLOAT dely = ytmp - other_x[m + atoms_per_bin]; const X_FLOAT delz = ztmp - other_x[m + 2 * atoms_per_bin]; const X_FLOAT rsq = delx * delx + dely * dely + delz * delz; if(rsq <= cutneighsq(itype,jtype)) { if (molecular) { int which = 0; if (!moltemplate) which = NeighborKokkosExecute::find_special(i,j); /* else if (imol >= 0) */ /* which = find_special(onemols[imol]->special[iatom], */ /* onemols[imol]->nspecial[iatom], */ /* tag[j]-tagprev); */ /* else which = 0; */ if (which == 0){ if(n 0) { if(n= 0 && i < nlocal) { neigh_list.d_numneigh(i) = n; neigh_list.d_ilist(i) = i; } if(n > neigh_list.maxneighs) { resize() = 1; - if(n > new_maxneighs()) Kokkos::atomic_fetch_max(&new_maxneighs(),n); + if(n > new_maxneighs()) new_maxneighs() = n; // avoid atomics, safe because in while loop } } } #endif /* ---------------------------------------------------------------------- */ template template void NeighborKokkosExecute:: build_Item_Ghost(const int &i) const { /* if necessary, goto next page and add pages */ int n = 0; int which = 0; int moltemplate; if (molecular == 2) moltemplate = 1; else moltemplate = 0; // get subview of neighbors of i const AtomNeighbors neighbors_i = neigh_list.get_neighbors(i); const X_FLOAT xtmp = x(i, 0); const X_FLOAT ytmp = x(i, 1); const X_FLOAT ztmp = x(i, 2); const int itype = type(i); const typename ArrayTypes::t_int_1d_const_um stencil = d_stencil; const typename ArrayTypes::t_int_1d_3_const_um stencilxyz = d_stencilxyz; // loop over all atoms in surrounding bins in stencil including self // when i is a ghost atom, must check if stencil bin is out of bounds // skip i = j // no molecular test when i = ghost atom if (i < nlocal) { - const int ibin = coord2bin(xtmp, ytmp, ztmp); + const int ibin = c_atom2bin(i); for (int k = 0; k < nstencil; k++) { const int jbin = ibin + stencil[k]; for(int m = 0; m < c_bincount(jbin); m++) { const int j = c_bins(jbin,m); if (HalfNeigh && j <= i) continue; else if (j == i) continue; const int jtype = type[j]; if(exclude && exclusion(i,j,itype,jtype)) continue; const X_FLOAT delx = xtmp - x(j,0); const X_FLOAT dely = ytmp - x(j,1); const X_FLOAT delz = ztmp - x(j,2); const X_FLOAT rsq = delx*delx + dely*dely + delz*delz; if (rsq <= cutneighsq(itype,jtype)) { if (molecular) { if (!moltemplate) which = find_special(i,j); /* else if (imol >= 0) */ /* which = find_special(onemols[imol]->special[iatom], */ /* onemols[imol]->nspecial[iatom], */ /* tag[j]-tagprev); */ /* else which = 0; */ if (which == 0){ if(n 0) { if(n= mbinx || ybin2 < 0 || ybin2 >= mbiny || zbin2 < 0 || zbin2 >= mbinz) continue; const int jbin = ibin + stencil[k]; for(int m = 0; m < c_bincount(jbin); m++) { const int j = c_bins(jbin,m); if (HalfNeigh && j <= i) continue; else if (j == i) continue; const int jtype = type[j]; if(exclude && exclusion(i,j,itype,jtype)) continue; const X_FLOAT delx = xtmp - x(j,0); const X_FLOAT dely = ytmp - x(j,1); const X_FLOAT delz = ztmp - x(j,2); const X_FLOAT rsq = delx*delx + dely*dely + delz*delz; if (rsq <= cutneighsq(itype,jtype)) { if(n neigh_list.maxneighs) { resize() = 1; - if(n > new_maxneighs()) Kokkos::atomic_fetch_max(&new_maxneighs(),n); + if(n > new_maxneighs()) new_maxneighs() = n; // avoid atomics, safe because in while loop } neigh_list.d_ilist(i) = i; } } namespace LAMMPS_NS { template class NPairKokkos; template class NPairKokkos; template class NPairKokkos; template class NPairKokkos; template class NPairKokkos; #ifdef KOKKOS_HAVE_CUDA template class NPairKokkos; template class NPairKokkos; template class NPairKokkos; template class NPairKokkos; template class NPairKokkos; #endif } diff --git a/src/KOKKOS/npair_kokkos.h b/src/KOKKOS/npair_kokkos.h index 517ea546f..6c1c0e958 100644 --- a/src/KOKKOS/npair_kokkos.h +++ b/src/KOKKOS/npair_kokkos.h @@ -1,436 +1,409 @@ /* -*- c++ -*- ---------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ #ifdef NPAIR_CLASS typedef NPairKokkos NPairKokkosFullBinHost; NPairStyle(full/bin/kk/host, NPairKokkosFullBinHost, NP_FULL | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI) typedef NPairKokkos NPairKokkosFullBinDevice; NPairStyle(full/bin/kk/device, NPairKokkosFullBinDevice, NP_FULL | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI) typedef NPairKokkos NPairKokkosFullBinGhostHost; NPairStyle(full/bin/ghost/kk/host, NPairKokkosFullBinGhostHost, NP_FULL | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_GHOST | NP_ORTHO | NP_TRI) typedef NPairKokkos NPairKokkosFullBinGhostDevice; NPairStyle(full/bin/ghost/kk/device, NPairKokkosFullBinGhostDevice, NP_FULL | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_GHOST | NP_ORTHO | NP_TRI) typedef NPairKokkos NPairKokkosHalfBinHost; NPairStyle(half/bin/kk/host, NPairKokkosHalfBinHost, NP_HALF | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_ORTHO) typedef NPairKokkos NPairKokkosHalfBinDevice; NPairStyle(half/bin/kk/device, NPairKokkosHalfBinDevice, NP_HALF | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_ORTHO) typedef NPairKokkos NPairKokkosHalfBinHostTri; NPairStyle(half/bin/kk/host, NPairKokkosHalfBinHostTri, NP_HALF | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_TRI) typedef NPairKokkos NPairKokkosHalfBinDeviceTri; NPairStyle(half/bin/kk/device, NPairKokkosHalfBinDeviceTri, NP_HALF | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_TRI) typedef NPairKokkos NPairKokkosHalfBinGhostHost; NPairStyle(half/bin/ghost/kk/host, NPairKokkosHalfBinGhostHost, NP_HALF | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_GHOST | NP_ORTHO | NP_TRI) typedef NPairKokkos NPairKokkosHalfBinGhostDevice; NPairStyle(half/bin/ghost/kk/device, NPairKokkosHalfBinGhostDevice, NP_HALF | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_GHOST | NP_ORTHO | NP_TRI) #else #ifndef LMP_NPAIR_KOKKOS_H #define LMP_NPAIR_KOKKOS_H #include "npair.h" #include "neigh_list_kokkos.h" namespace LAMMPS_NS { template class NPairKokkos : public NPair { public: NPairKokkos(class LAMMPS *); ~NPairKokkos() {} void copy_neighbor_info(); void copy_bin_info(); void copy_stencil_info(); void build(class NeighList *); private: int newton_pair; // data from Neighbor class DAT::tdual_xfloat_2d k_cutneighsq; // exclusion data from Neighbor class DAT::tdual_int_1d k_ex1_type,k_ex2_type; DAT::tdual_int_2d k_ex_type; DAT::tdual_int_1d k_ex1_group,k_ex2_group; DAT::tdual_int_1d k_ex1_bit,k_ex2_bit; DAT::tdual_int_1d k_ex_mol_group; DAT::tdual_int_1d k_ex_mol_bit; DAT::tdual_int_1d k_ex_mol_intra; // data from NBin class int atoms_per_bin; DAT::tdual_int_1d k_bincount; DAT::tdual_int_2d k_bins; + DAT::tdual_int_1d k_atom2bin; // data from NStencil class int nstencil; DAT::tdual_int_1d k_stencil; // # of J neighs for each I DAT::tdual_int_1d_3 k_stencilxyz; }; template class NeighborKokkosExecute { typedef ArrayTypes AT; public: NeighListKokkos neigh_list; // data from Neighbor class const typename AT::t_xfloat_2d_randomread cutneighsq; // exclusion data from Neighbor class const int exclude; const int nex_type; const typename AT::t_int_1d_const ex1_type,ex2_type; const typename AT::t_int_2d_const ex_type; const int nex_group; const typename AT::t_int_1d_const ex1_group,ex2_group; const typename AT::t_int_1d_const ex1_bit,ex2_bit; const int nex_mol; const typename AT::t_int_1d_const ex_mol_group; const typename AT::t_int_1d_const ex_mol_bit; const typename AT::t_int_1d_const ex_mol_intra; // data from NBin class const typename AT::t_int_1d bincount; const typename AT::t_int_1d_const c_bincount; typename AT::t_int_2d bins; typename AT::t_int_2d_const c_bins; + const typename AT::t_int_1d atom2bin; + const typename AT::t_int_1d_const c_atom2bin; // data from NStencil class int nstencil; typename AT::t_int_1d d_stencil; // # of J neighs for each I typename AT::t_int_1d_3 d_stencilxyz; // data from Atom class const typename AT::t_x_array_randomread x; const typename AT::t_int_1d_const type,mask; const typename AT::t_tagint_1d_const molecule; const typename AT::t_tagint_1d_const tag; const typename AT::t_tagint_2d_const special; const typename AT::t_int_2d_const nspecial; const int molecular; int moltemplate; int special_flag[4]; const int nbinx,nbiny,nbinz; const int mbinx,mbiny,mbinz; const int mbinxlo,mbinylo,mbinzlo; const X_FLOAT bininvx,bininvy,bininvz; X_FLOAT bboxhi[3],bboxlo[3]; const int nlocal; typename AT::t_int_scalar resize; typename AT::t_int_scalar new_maxneighs; typename ArrayTypes::t_int_scalar h_resize; typename ArrayTypes::t_int_scalar h_new_maxneighs; const int xperiodic, yperiodic, zperiodic; const int xprd_half, yprd_half, zprd_half; NeighborKokkosExecute( const NeighListKokkos &_neigh_list, const typename AT::t_xfloat_2d_randomread &_cutneighsq, const typename AT::t_int_1d &_bincount, const typename AT::t_int_2d &_bins, + const typename AT::t_int_1d &_atom2bin, const int _nstencil, const typename AT::t_int_1d &_d_stencil, const typename AT::t_int_1d_3 &_d_stencilxyz, const int _nlocal, const typename AT::t_x_array_randomread &_x, const typename AT::t_int_1d_const &_type, const typename AT::t_int_1d_const &_mask, const typename AT::t_tagint_1d_const &_molecule, const typename AT::t_tagint_1d_const &_tag, const typename AT::t_tagint_2d_const &_special, const typename AT::t_int_2d_const &_nspecial, const int &_molecular, const int & _nbinx,const int & _nbiny,const int & _nbinz, const int & _mbinx,const int & _mbiny,const int & _mbinz, const int & _mbinxlo,const int & _mbinylo,const int & _mbinzlo, const X_FLOAT &_bininvx,const X_FLOAT &_bininvy,const X_FLOAT &_bininvz, const int & _exclude,const int & _nex_type, const typename AT::t_int_1d_const & _ex1_type, const typename AT::t_int_1d_const & _ex2_type, const typename AT::t_int_2d_const & _ex_type, const int & _nex_group, const typename AT::t_int_1d_const & _ex1_group, const typename AT::t_int_1d_const & _ex2_group, const typename AT::t_int_1d_const & _ex1_bit, const typename AT::t_int_1d_const & _ex2_bit, const int & _nex_mol, const typename AT::t_int_1d_const & _ex_mol_group, const typename AT::t_int_1d_const & _ex_mol_bit, const typename AT::t_int_1d_const & _ex_mol_intra, const X_FLOAT *_bboxhi, const X_FLOAT* _bboxlo, const int & _xperiodic, const int & _yperiodic, const int & _zperiodic, const int & _xprd_half, const int & _yprd_half, const int & _zprd_half): neigh_list(_neigh_list), cutneighsq(_cutneighsq), bincount(_bincount),c_bincount(_bincount),bins(_bins),c_bins(_bins), + atom2bin(_atom2bin),c_atom2bin(_atom2bin), nstencil(_nstencil),d_stencil(_d_stencil),d_stencilxyz(_d_stencilxyz), nlocal(_nlocal), x(_x),type(_type),mask(_mask),molecule(_molecule), tag(_tag),special(_special),nspecial(_nspecial),molecular(_molecular), nbinx(_nbinx),nbiny(_nbiny),nbinz(_nbinz), mbinx(_mbinx),mbiny(_mbiny),mbinz(_mbinz), mbinxlo(_mbinxlo),mbinylo(_mbinylo),mbinzlo(_mbinzlo), bininvx(_bininvx),bininvy(_bininvy),bininvz(_bininvz), exclude(_exclude),nex_type(_nex_type), ex1_type(_ex1_type),ex2_type(_ex2_type),ex_type(_ex_type), nex_group(_nex_group), ex1_group(_ex1_group),ex2_group(_ex2_group), ex1_bit(_ex1_bit),ex2_bit(_ex2_bit),nex_mol(_nex_mol), ex_mol_group(_ex_mol_group),ex_mol_bit(_ex_mol_bit), ex_mol_intra(_ex_mol_intra), xperiodic(_xperiodic),yperiodic(_yperiodic),zperiodic(_zperiodic), xprd_half(_xprd_half),yprd_half(_yprd_half),zprd_half(_zprd_half) { if (molecular == 2) moltemplate = 1; else moltemplate = 0; bboxlo[0] = _bboxlo[0]; bboxlo[1] = _bboxlo[1]; bboxlo[2] = _bboxlo[2]; bboxhi[0] = _bboxhi[0]; bboxhi[1] = _bboxhi[1]; bboxhi[2] = _bboxhi[2]; resize = typename AT::t_int_scalar("NeighborKokkosFunctor::resize"); #ifndef KOKKOS_USE_CUDA_UVM h_resize = Kokkos::create_mirror_view(resize); #else h_resize = resize; #endif h_resize() = 1; new_maxneighs = typename AT:: t_int_scalar("NeighborKokkosFunctor::new_maxneighs"); #ifndef KOKKOS_USE_CUDA_UVM h_new_maxneighs = Kokkos::create_mirror_view(new_maxneighs); #else h_new_maxneighs = new_maxneighs; #endif h_new_maxneighs() = neigh_list.maxneighs; }; ~NeighborKokkosExecute() {neigh_list.copymode = 1;}; template KOKKOS_FUNCTION void build_Item(const int &i) const; template KOKKOS_FUNCTION void build_Item_Ghost(const int &i) const; #ifdef KOKKOS_HAVE_CUDA template __device__ inline void build_ItemCuda(typename Kokkos::TeamPolicy::member_type dev) const; #endif - KOKKOS_INLINE_FUNCTION - int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z) const - { - int ix,iy,iz; - - if (x >= bboxhi[0]) - ix = static_cast ((x-bboxhi[0])*bininvx) + nbinx; - else if (x >= bboxlo[0]) { - ix = static_cast ((x-bboxlo[0])*bininvx); - ix = MIN(ix,nbinx-1); - } else - ix = static_cast ((x-bboxlo[0])*bininvx) - 1; - - if (y >= bboxhi[1]) - iy = static_cast ((y-bboxhi[1])*bininvy) + nbiny; - else if (y >= bboxlo[1]) { - iy = static_cast ((y-bboxlo[1])*bininvy); - iy = MIN(iy,nbiny-1); - } else - iy = static_cast ((y-bboxlo[1])*bininvy) - 1; - - if (z >= bboxhi[2]) - iz = static_cast ((z-bboxhi[2])*bininvz) + nbinz; - else if (z >= bboxlo[2]) { - iz = static_cast ((z-bboxlo[2])*bininvz); - iz = MIN(iz,nbinz-1); - } else - iz = static_cast ((z-bboxlo[2])*bininvz) - 1; - - return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo); - } - KOKKOS_INLINE_FUNCTION int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z, int* i) const { int ix,iy,iz; if (x >= bboxhi[0]) ix = static_cast ((x-bboxhi[0])*bininvx) + nbinx; else if (x >= bboxlo[0]) { ix = static_cast ((x-bboxlo[0])*bininvx); ix = MIN(ix,nbinx-1); } else ix = static_cast ((x-bboxlo[0])*bininvx) - 1; if (y >= bboxhi[1]) iy = static_cast ((y-bboxhi[1])*bininvy) + nbiny; else if (y >= bboxlo[1]) { iy = static_cast ((y-bboxlo[1])*bininvy); iy = MIN(iy,nbiny-1); } else iy = static_cast ((y-bboxlo[1])*bininvy) - 1; if (z >= bboxhi[2]) iz = static_cast ((z-bboxhi[2])*bininvz) + nbinz; else if (z >= bboxlo[2]) { iz = static_cast ((z-bboxlo[2])*bininvz); iz = MIN(iz,nbinz-1); } else iz = static_cast ((z-bboxlo[2])*bininvz) - 1; i[0] = ix - mbinxlo; i[1] = iy - mbinylo; i[2] = iz - mbinzlo; return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo); } KOKKOS_INLINE_FUNCTION int exclusion(const int &i,const int &j, const int &itype,const int &jtype) const; KOKKOS_INLINE_FUNCTION int find_special(const int &i, const int &j) const; KOKKOS_INLINE_FUNCTION int minimum_image_check(double dx, double dy, double dz) const { if (xperiodic && fabs(dx) > xprd_half) return 1; if (yperiodic && fabs(dy) > yprd_half) return 1; if (zperiodic && fabs(dz) > zprd_half) return 1; return 0; } }; template struct NPairKokkosBuildFunctor { typedef DeviceType device_type; const NeighborKokkosExecute c; const size_t sharedsize; NPairKokkosBuildFunctor(const NeighborKokkosExecute &_c, const size_t _sharedsize):c(_c), sharedsize(_sharedsize) {}; KOKKOS_INLINE_FUNCTION void operator() (const int & i) const { c.template build_Item(i); } #ifdef KOKKOS_HAVE_CUDA __device__ inline void operator() (typename Kokkos::TeamPolicy::member_type dev) const { c.template build_ItemCuda(dev); } size_t shmem_size(const int team_size) const { (void) team_size; return sharedsize; } #endif }; template struct NPairKokkosBuildFunctor { typedef LMPHostType device_type; const NeighborKokkosExecute c; const size_t sharedsize; NPairKokkosBuildFunctor(const NeighborKokkosExecute &_c, const size_t _sharedsize):c(_c), sharedsize(_sharedsize) {}; KOKKOS_INLINE_FUNCTION void operator() (const int & i) const { c.template build_Item(i); } void operator() (typename Kokkos::TeamPolicy::member_type dev) const {} // Should error out }; template struct NPairKokkosBuildFunctorGhost { typedef DeviceType device_type; const NeighborKokkosExecute c; const size_t sharedsize; NPairKokkosBuildFunctorGhost(const NeighborKokkosExecute &_c, const size_t _sharedsize):c(_c), sharedsize(_sharedsize) {}; KOKKOS_INLINE_FUNCTION void operator() (const int & i) const { c.template build_Item_Ghost(i); } }; } #endif #endif /* ERROR/WARNING messages: */