Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F68353797
npair_kokkos.cpp
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Thu, Jun 27, 00:51
Size
25 KB
Mime Type
text/x-c
Expires
Sat, Jun 29, 00:51 (2 d)
Engine
blob
Format
Raw Data
Handle
18569377
Attached To
rLAMMPS lammps
npair_kokkos.cpp
View Options
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include "npair_kokkos.h"
#include "atom_kokkos.h"
#include "atom_masks.h"
#include "domain_kokkos.h"
#include "neighbor_kokkos.h"
#include "nbin_kokkos.h"
#include "nstencil.h"
#include "force.h"
namespace
LAMMPS_NS
{
/* ---------------------------------------------------------------------- */
template
<
class
DeviceType
,
int
HALF_NEIGH
,
int
GHOST
,
int
TRI
>
NPairKokkos
<
DeviceType
,
HALF_NEIGH
,
GHOST
,
TRI
>::
NPairKokkos
(
LAMMPS
*
lmp
)
:
NPair
(
lmp
)
{
}
/* ----------------------------------------------------------------------
copy needed info from Neighbor class to this build class
------------------------------------------------------------------------- */
template
<
class
DeviceType
,
int
HALF_NEIGH
,
int
GHOST
,
int
TRI
>
void
NPairKokkos
<
DeviceType
,
HALF_NEIGH
,
GHOST
,
TRI
>::
copy_neighbor_info
()
{
NPair
::
copy_neighbor_info
();
NeighborKokkos
*
neighborKK
=
(
NeighborKokkos
*
)
neighbor
;
// general params
newton_pair
=
force
->
newton_pair
;
k_cutneighsq
=
neighborKK
->
k_cutneighsq
;
// exclusion info
k_ex1_type
=
neighborKK
->
k_ex1_type
;
k_ex2_type
=
neighborKK
->
k_ex2_type
;
k_ex_type
=
neighborKK
->
k_ex_type
;
k_ex1_group
=
neighborKK
->
k_ex1_group
;
k_ex2_group
=
neighborKK
->
k_ex2_group
;
k_ex1_bit
=
neighborKK
->
k_ex1_bit
;
k_ex2_bit
=
neighborKK
->
k_ex2_bit
;
k_ex_mol_group
=
neighborKK
->
k_ex_mol_group
;
k_ex_mol_bit
=
neighborKK
->
k_ex_mol_bit
;
}
/* ----------------------------------------------------------------------
copy per-atom and per-bin vectors from NBin class to this build class
------------------------------------------------------------------------- */
template
<
class
DeviceType
,
int
HALF_NEIGH
,
int
GHOST
,
int
TRI
>
void
NPairKokkos
<
DeviceType
,
HALF_NEIGH
,
GHOST
,
TRI
>::
copy_bin_info
()
{
NPair
::
copy_bin_info
();
NBinKokkos
<
DeviceType
>*
nbKK
=
(
NBinKokkos
<
DeviceType
>*
)
nb
;
atoms_per_bin
=
nbKK
->
atoms_per_bin
;
k_bincount
=
nbKK
->
k_bincount
;
k_bins
=
nbKK
->
k_bins
;
}
/* ----------------------------------------------------------------------
copy needed info from NStencil class to this build class
------------------------------------------------------------------------- */
template
<
class
DeviceType
,
int
HALF_NEIGH
,
int
GHOST
,
int
TRI
>
void
NPairKokkos
<
DeviceType
,
HALF_NEIGH
,
GHOST
,
TRI
>::
copy_stencil_info
()
{
NPair
::
copy_stencil_info
();
nstencil
=
ns
->
nstencil
;
int
maxstencil
=
ns
->
get_maxstencil
();
k_stencil
=
DAT
::
tdual_int_1d
(
"neighlist:stencil"
,
maxstencil
);
for
(
int
k
=
0
;
k
<
maxstencil
;
k
++
)
k_stencil
.
h_view
(
k
)
=
ns
->
stencil
[
k
];
k_stencil
.
modify
<
LMPHostType
>
();
k_stencil
.
sync
<
DeviceType
>
();
if
(
GHOST
)
{
k_stencilxyz
=
DAT
::
tdual_int_1d_3
(
"neighlist:stencilxyz"
,
maxstencil
);
for
(
int
k
=
0
;
k
<
maxstencil
;
k
++
)
{
k_stencilxyz
.
h_view
(
k
,
0
)
=
ns
->
stencilxyz
[
k
][
0
];
k_stencilxyz
.
h_view
(
k
,
1
)
=
ns
->
stencilxyz
[
k
][
1
];
k_stencilxyz
.
h_view
(
k
,
2
)
=
ns
->
stencilxyz
[
k
][
2
];
}
k_stencilxyz
.
modify
<
LMPHostType
>
();
k_stencilxyz
.
sync
<
DeviceType
>
();
}
}
/* ---------------------------------------------------------------------- */
template
<
class
DeviceType
,
int
HALF_NEIGH
,
int
GHOST
,
int
TRI
>
void
NPairKokkos
<
DeviceType
,
HALF_NEIGH
,
GHOST
,
TRI
>::
build
(
NeighList
*
list_
)
{
NeighListKokkos
<
DeviceType
>*
list
=
(
NeighListKokkos
<
DeviceType
>*
)
list_
;
const
int
nlocal
=
includegroup
?
atom
->
nfirst:
atom
->
nlocal
;
int
nall
=
nlocal
;
if
(
GHOST
)
nall
+=
atom
->
nghost
;
list
->
grow
(
nall
);
NeighborKokkosExecute
<
DeviceType
>
data
(
*
list
,
k_cutneighsq
.
view
<
DeviceType
>
(),
k_bincount
.
view
<
DeviceType
>
(),
k_bins
.
view
<
DeviceType
>
(),
nstencil
,
k_stencil
.
view
<
DeviceType
>
(),
k_stencilxyz
.
view
<
DeviceType
>
(),
nlocal
,
atomKK
->
k_x
.
view
<
DeviceType
>
(),
atomKK
->
k_type
.
view
<
DeviceType
>
(),
atomKK
->
k_mask
.
view
<
DeviceType
>
(),
atomKK
->
k_molecule
.
view
<
DeviceType
>
(),
atomKK
->
k_tag
.
view
<
DeviceType
>
(),
atomKK
->
k_special
.
view
<
DeviceType
>
(),
atomKK
->
k_nspecial
.
view
<
DeviceType
>
(),
atomKK
->
molecular
,
nbinx
,
nbiny
,
nbinz
,
mbinx
,
mbiny
,
mbinz
,
mbinxlo
,
mbinylo
,
mbinzlo
,
bininvx
,
bininvy
,
bininvz
,
exclude
,
nex_type
,
k_ex1_type
.
view
<
DeviceType
>
(),
k_ex2_type
.
view
<
DeviceType
>
(),
k_ex_type
.
view
<
DeviceType
>
(),
nex_group
,
k_ex1_group
.
view
<
DeviceType
>
(),
k_ex2_group
.
view
<
DeviceType
>
(),
k_ex1_bit
.
view
<
DeviceType
>
(),
k_ex2_bit
.
view
<
DeviceType
>
(),
nex_mol
,
k_ex_mol_group
.
view
<
DeviceType
>
(),
k_ex_mol_bit
.
view
<
DeviceType
>
(),
bboxhi
,
bboxlo
,
domain
->
xperiodic
,
domain
->
yperiodic
,
domain
->
zperiodic
,
domain
->
xprd_half
,
domain
->
yprd_half
,
domain
->
zprd_half
);
k_cutneighsq
.
sync
<
DeviceType
>
();
k_ex1_type
.
sync
<
DeviceType
>
();
k_ex2_type
.
sync
<
DeviceType
>
();
k_ex_type
.
sync
<
DeviceType
>
();
k_ex1_group
.
sync
<
DeviceType
>
();
k_ex2_group
.
sync
<
DeviceType
>
();
k_ex1_bit
.
sync
<
DeviceType
>
();
k_ex2_bit
.
sync
<
DeviceType
>
();
k_ex_mol_group
.
sync
<
DeviceType
>
();
k_ex_mol_bit
.
sync
<
DeviceType
>
();
k_bincount
.
sync
<
DeviceType
>
(),
k_bins
.
sync
<
DeviceType
>
(),
atomKK
->
sync
(
Device
,
X_MASK
|
TYPE_MASK
|
MASK_MASK
|
MOLECULE_MASK
|
TAG_MASK
|
SPECIAL_MASK
);
data
.
special_flag
[
0
]
=
special_flag
[
0
];
data
.
special_flag
[
1
]
=
special_flag
[
1
];
data
.
special_flag
[
2
]
=
special_flag
[
2
];
data
.
special_flag
[
3
]
=
special_flag
[
3
];
if
(
list
->
d_neighbors
.
dimension_0
()
<
nall
)
{
// Can this EVER be true??? - TIM 20170215
list
->
d_neighbors
=
typename
ArrayTypes
<
DeviceType
>::
t_neighbors_2d
(
"neighbors"
,
nall
*
1.1
,
list
->
maxneighs
);
list
->
d_numneigh
=
typename
ArrayTypes
<
DeviceType
>::
t_int_1d
(
"numneigh"
,
nall
*
1.1
);
data
.
neigh_list
.
d_neighbors
=
list
->
d_neighbors
;
data
.
neigh_list
.
d_numneigh
=
list
->
d_numneigh
;
}
data
.
h_resize
()
=
1
;
while
(
data
.
h_resize
())
{
data
.
h_new_maxneighs
()
=
list
->
maxneighs
;
data
.
h_resize
()
=
0
;
Kokkos
::
deep_copy
(
data
.
resize
,
data
.
h_resize
);
Kokkos
::
deep_copy
(
data
.
new_maxneighs
,
data
.
h_new_maxneighs
);
#ifdef KOKKOS_HAVE_CUDA
#define BINS_PER_BLOCK 2
const
int
factor
=
atoms_per_bin
<
64
?
2
:
1
;
Kokkos
::
TeamPolicy
<
DeviceType
>
config
((
mbins
+
factor
-
1
)
/
factor
,
atoms_per_bin
*
factor
);
#else
const
int
factor
=
1
;
#endif
if
(
GHOST
)
{
NPairKokkosBuildFunctorGhost
<
DeviceType
,
HALF_NEIGH
>
f
(
data
,
atoms_per_bin
*
5
*
sizeof
(
X_FLOAT
)
*
factor
);
Kokkos
::
parallel_for
(
nall
,
f
);
}
else
{
if
(
newton_pair
)
{
NPairKokkosBuildFunctor
<
DeviceType
,
TRI
?
0
:
HALF_NEIGH
,
1
,
TRI
>
f
(
data
,
atoms_per_bin
*
5
*
sizeof
(
X_FLOAT
)
*
factor
);
#ifdef KOKKOS_HAVE_CUDA
Kokkos
::
parallel_for
(
config
,
f
);
#else
Kokkos
::
parallel_for
(
nall
,
f
);
#endif
}
else
{
NPairKokkosBuildFunctor
<
DeviceType
,
HALF_NEIGH
,
0
,
0
>
f
(
data
,
atoms_per_bin
*
5
*
sizeof
(
X_FLOAT
)
*
factor
);
#ifdef KOKKOS_HAVE_CUDA
Kokkos
::
parallel_for
(
config
,
f
);
#else
Kokkos
::
parallel_for
(
nall
,
f
);
#endif
}
}
DeviceType
::
fence
();
deep_copy
(
data
.
h_resize
,
data
.
resize
);
if
(
data
.
h_resize
())
{
deep_copy
(
data
.
h_new_maxneighs
,
data
.
new_maxneighs
);
list
->
maxneighs
=
data
.
h_new_maxneighs
()
*
1.2
;
list
->
d_neighbors
=
typename
ArrayTypes
<
DeviceType
>::
t_neighbors_2d
(
"neighbors"
,
list
->
d_neighbors
.
dimension_0
(),
list
->
maxneighs
);
data
.
neigh_list
.
d_neighbors
=
list
->
d_neighbors
;
data
.
neigh_list
.
maxneighs
=
list
->
maxneighs
;
}
}
if
(
GHOST
)
{
list
->
inum
=
atom
->
nlocal
;
list
->
gnum
=
nall
-
atom
->
nlocal
;
}
else
{
list
->
inum
=
nall
;
list
->
gnum
=
0
;
}
list
->
k_ilist
.
template
modify
<
DeviceType
>
();
}
/* ---------------------------------------------------------------------- */
template
<
class
DeviceType
>
KOKKOS_INLINE_FUNCTION
int
NeighborKokkosExecute
<
DeviceType
>::
find_special
(
const
int
&
i
,
const
int
&
j
)
const
{
const
int
n1
=
nspecial
(
i
,
0
);
const
int
n2
=
nspecial
(
i
,
1
);
const
int
n3
=
nspecial
(
i
,
2
);
for
(
int
k
=
0
;
k
<
n3
;
k
++
)
{
if
(
special
(
i
,
k
)
==
tag
(
j
))
{
if
(
k
<
n1
)
{
if
(
special_flag
[
1
]
==
0
)
return
-
1
;
else
if
(
special_flag
[
1
]
==
1
)
return
0
;
else
return
1
;
}
else
if
(
k
<
n2
)
{
if
(
special_flag
[
2
]
==
0
)
return
-
1
;
else
if
(
special_flag
[
2
]
==
1
)
return
0
;
else
return
2
;
}
else
{
if
(
special_flag
[
3
]
==
0
)
return
-
1
;
else
if
(
special_flag
[
3
]
==
1
)
return
0
;
else
return
3
;
}
}
}
return
0
;
};
/* ---------------------------------------------------------------------- */
template
<
class
DeviceType
>
KOKKOS_INLINE_FUNCTION
int
NeighborKokkosExecute
<
DeviceType
>::
exclusion
(
const
int
&
i
,
const
int
&
j
,
const
int
&
itype
,
const
int
&
jtype
)
const
{
int
m
;
if
(
nex_type
&&
ex_type
(
itype
,
jtype
))
return
1
;
if
(
nex_group
)
{
for
(
m
=
0
;
m
<
nex_group
;
m
++
)
{
if
(
mask
(
i
)
&
ex1_bit
(
m
)
&&
mask
(
j
)
&
ex2_bit
(
m
))
return
1
;
if
(
mask
(
i
)
&
ex2_bit
(
m
)
&&
mask
(
j
)
&
ex1_bit
(
m
))
return
1
;
}
}
if
(
nex_mol
)
{
for
(
m
=
0
;
m
<
nex_mol
;
m
++
)
if
(
mask
(
i
)
&
ex_mol_bit
(
m
)
&&
mask
(
j
)
&
ex_mol_bit
(
m
)
&&
molecule
(
i
)
==
molecule
(
j
))
return
1
;
}
return
0
;
}
/* ---------------------------------------------------------------------- */
template
<
class
DeviceType
>
template
<
int
HalfNeigh
,
int
Newton
,
int
Tri
>
void
NeighborKokkosExecute
<
DeviceType
>::
build_Item
(
const
int
&
i
)
const
{
/* if necessary, goto next page and add pages */
int
n
=
0
;
int
which
=
0
;
int
moltemplate
;
if
(
molecular
==
2
)
moltemplate
=
1
;
else
moltemplate
=
0
;
// get subview of neighbors of i
const
AtomNeighbors
neighbors_i
=
neigh_list
.
get_neighbors
(
i
);
const
X_FLOAT
xtmp
=
x
(
i
,
0
);
const
X_FLOAT
ytmp
=
x
(
i
,
1
);
const
X_FLOAT
ztmp
=
x
(
i
,
2
);
const
int
itype
=
type
(
i
);
const
int
ibin
=
coord2bin
(
xtmp
,
ytmp
,
ztmp
);
const
typename
ArrayTypes
<
DeviceType
>::
t_int_1d_const_um
stencil
=
d_stencil
;
// loop over all bins in neighborhood (includes ibin)
if
(
HalfNeigh
)
for
(
int
m
=
0
;
m
<
c_bincount
(
ibin
);
m
++
)
{
const
int
j
=
c_bins
(
ibin
,
m
);
const
int
jtype
=
type
(
j
);
//for same bin as atom i skip j if i==j and skip atoms "below and to the left" if using HalfNeighborlists
if
((
j
==
i
)
||
(
HalfNeigh
&&
!
Newton
&&
(
j
<
i
))
||
(
HalfNeigh
&&
Newton
&&
((
j
<
i
)
||
((
j
>=
nlocal
)
&&
((
x
(
j
,
2
)
<
ztmp
)
||
(
x
(
j
,
2
)
==
ztmp
&&
x
(
j
,
1
)
<
ytmp
)
||
(
x
(
j
,
2
)
==
ztmp
&&
x
(
j
,
1
)
==
ytmp
&&
x
(
j
,
0
)
<
xtmp
)))))
)
continue
;
if
(
exclude
&&
exclusion
(
i
,
j
,
itype
,
jtype
))
continue
;
const
X_FLOAT
delx
=
xtmp
-
x
(
j
,
0
);
const
X_FLOAT
dely
=
ytmp
-
x
(
j
,
1
);
const
X_FLOAT
delz
=
ztmp
-
x
(
j
,
2
);
const
X_FLOAT
rsq
=
delx
*
delx
+
dely
*
dely
+
delz
*
delz
;
if
(
rsq
<=
cutneighsq
(
itype
,
jtype
))
{
if
(
molecular
)
{
if
(
!
moltemplate
)
which
=
find_special
(
i
,
j
);
/* else if (imol >= 0) */
/* which = find_special(onemols[imol]->special[iatom], */
/* onemols[imol]->nspecial[iatom], */
/* tag[j]-tagprev); */
/* else which = 0; */
if
(
which
==
0
){
if
(
n
<
neigh_list
.
maxneighs
)
neighbors_i
(
n
++
)
=
j
;
else
n
++
;
}
else
if
(
minimum_image_check
(
delx
,
dely
,
delz
)){
if
(
n
<
neigh_list
.
maxneighs
)
neighbors_i
(
n
++
)
=
j
;
else
n
++
;
}
else
if
(
which
>
0
)
{
if
(
n
<
neigh_list
.
maxneighs
)
neighbors_i
(
n
++
)
=
j
^
(
which
<<
SBBITS
);
else
n
++
;
}
}
else
{
if
(
n
<
neigh_list
.
maxneighs
)
neighbors_i
(
n
++
)
=
j
;
else
n
++
;
}
}
}
for
(
int
k
=
0
;
k
<
nstencil
;
k
++
)
{
const
int
jbin
=
ibin
+
stencil
[
k
];
// get subview of jbin
if
(
HalfNeigh
&&
(
ibin
==
jbin
))
continue
;
//const ArrayTypes<DeviceType>::t_int_1d_const_um =Kokkos::subview<t_int_1d_const_um>(bins,jbin,ALL);
for
(
int
m
=
0
;
m
<
c_bincount
(
jbin
);
m
++
)
{
const
int
j
=
c_bins
(
jbin
,
m
);
const
int
jtype
=
type
(
j
);
if
(
HalfNeigh
&&
!
Newton
&&
(
j
<
i
))
continue
;
if
(
!
HalfNeigh
&&
j
==
i
)
continue
;
if
(
Tri
)
{
if
(
x
(
j
,
2
)
<
ztmp
)
continue
;
if
(
x
(
j
,
2
)
==
ztmp
)
{
if
(
x
(
j
,
1
)
<
ytmp
)
continue
;
if
(
x
(
j
,
1
)
==
ytmp
)
{
if
(
x
(
j
,
0
)
<
xtmp
)
continue
;
if
(
x
(
j
,
0
)
==
xtmp
&&
j
<=
i
)
continue
;
}
}
}
if
(
exclude
&&
exclusion
(
i
,
j
,
itype
,
jtype
))
continue
;
const
X_FLOAT
delx
=
xtmp
-
x
(
j
,
0
);
const
X_FLOAT
dely
=
ytmp
-
x
(
j
,
1
);
const
X_FLOAT
delz
=
ztmp
-
x
(
j
,
2
);
const
X_FLOAT
rsq
=
delx
*
delx
+
dely
*
dely
+
delz
*
delz
;
if
(
rsq
<=
cutneighsq
(
itype
,
jtype
))
{
if
(
molecular
)
{
if
(
!
moltemplate
)
which
=
NeighborKokkosExecute
<
DeviceType
>::
find_special
(
i
,
j
);
/* else if (imol >= 0) */
/* which = find_special(onemols[imol]->special[iatom], */
/* onemols[imol]->nspecial[iatom], */
/* tag[j]-tagprev); */
/* else which = 0; */
if
(
which
==
0
){
if
(
n
<
neigh_list
.
maxneighs
)
neighbors_i
(
n
++
)
=
j
;
else
n
++
;
}
else
if
(
minimum_image_check
(
delx
,
dely
,
delz
)){
if
(
n
<
neigh_list
.
maxneighs
)
neighbors_i
(
n
++
)
=
j
;
else
n
++
;
}
else
if
(
which
>
0
)
{
if
(
n
<
neigh_list
.
maxneighs
)
neighbors_i
(
n
++
)
=
j
^
(
which
<<
SBBITS
);
else
n
++
;
}
}
else
{
if
(
n
<
neigh_list
.
maxneighs
)
neighbors_i
(
n
++
)
=
j
;
else
n
++
;
}
}
}
}
neigh_list
.
d_numneigh
(
i
)
=
n
;
if
(
n
>=
neigh_list
.
maxneighs
)
{
resize
()
=
1
;
if
(
n
>=
new_maxneighs
())
new_maxneighs
()
=
n
;
}
neigh_list
.
d_ilist
(
i
)
=
i
;
}
/* ---------------------------------------------------------------------- */
#ifdef KOKKOS_HAVE_CUDA
extern
__shared__
X_FLOAT
sharedmem
[];
/* ---------------------------------------------------------------------- */
template
<
class
DeviceType
>
template
<
int
HalfNeigh
,
int
Newton
,
int
Tri
>
__device__
inline
void
NeighborKokkosExecute
<
DeviceType
>::
build_ItemCuda
(
typename
Kokkos
::
TeamPolicy
<
DeviceType
>::
member_type
dev
)
const
{
/* loop over atoms in i's bin,
*/
const
int
atoms_per_bin
=
c_bins
.
dimension_1
();
const
int
BINS_PER_TEAM
=
dev
.
team_size
()
/
atoms_per_bin
<
1
?
1
:
dev
.
team_size
()
/
atoms_per_bin
;
const
int
TEAMS_PER_BIN
=
atoms_per_bin
/
dev
.
team_size
()
<
1
?
1
:
atoms_per_bin
/
dev
.
team_size
();
const
int
MY_BIN
=
dev
.
team_rank
()
/
atoms_per_bin
;
const
int
ibin
=
dev
.
league_rank
()
*
BINS_PER_TEAM
+
MY_BIN
;
if
(
ibin
>=
c_bincount
.
dimension_0
())
return
;
X_FLOAT
*
other_x
=
sharedmem
;
other_x
=
other_x
+
5
*
atoms_per_bin
*
MY_BIN
;
int
*
other_id
=
(
int
*
)
&
other_x
[
4
*
atoms_per_bin
];
int
bincount_current
=
c_bincount
[
ibin
];
for
(
int
kk
=
0
;
kk
<
TEAMS_PER_BIN
;
kk
++
)
{
const
int
MY_II
=
dev
.
team_rank
()
%
atoms_per_bin
+
kk
*
dev
.
team_size
();
const
int
i
=
MY_II
<
bincount_current
?
c_bins
(
ibin
,
MY_II
)
:
-
1
;
/* if necessary, goto next page and add pages */
int
n
=
0
;
X_FLOAT
xtmp
;
X_FLOAT
ytmp
;
X_FLOAT
ztmp
;
int
itype
;
const
AtomNeighbors
neighbors_i
=
neigh_list
.
get_neighbors
((
i
>=
0
&&
i
<
nlocal
)
?
i:
0
);
if
(
i
>=
0
)
{
xtmp
=
x
(
i
,
0
);
ytmp
=
x
(
i
,
1
);
ztmp
=
x
(
i
,
2
);
itype
=
type
(
i
);
other_x
[
MY_II
]
=
xtmp
;
other_x
[
MY_II
+
atoms_per_bin
]
=
ytmp
;
other_x
[
MY_II
+
2
*
atoms_per_bin
]
=
ztmp
;
other_x
[
MY_II
+
3
*
atoms_per_bin
]
=
itype
;
}
other_id
[
MY_II
]
=
i
;
int
test
=
(
__syncthreads_count
(
i
>=
0
&&
i
<=
nlocal
)
==
0
);
if
(
test
)
return
;
if
(
i
>=
0
&&
i
<
nlocal
)
{
#pragma unroll 4
for
(
int
m
=
0
;
m
<
bincount_current
;
m
++
)
{
int
j
=
other_id
[
m
];
const
int
jtype
=
other_x
[
m
+
3
*
atoms_per_bin
];
//for same bin as atom i skip j if i==j and skip atoms "below and to the left" if using halfneighborlists
if
((
j
==
i
)
||
(
HalfNeigh
&&
!
Newton
&&
(
j
<
i
))
||
(
HalfNeigh
&&
Newton
&&
((
j
<
i
)
||
((
j
>=
nlocal
)
&&
((
x
(
j
,
2
)
<
ztmp
)
||
(
x
(
j
,
2
)
==
ztmp
&&
x
(
j
,
1
)
<
ytmp
)
||
(
x
(
j
,
2
)
==
ztmp
&&
x
(
j
,
1
)
==
ytmp
&&
x
(
j
,
0
)
<
xtmp
)))))
)
continue
;
if
(
Tri
)
{
if
(
x
(
j
,
2
)
<
ztmp
)
continue
;
if
(
x
(
j
,
2
)
==
ztmp
)
{
if
(
x
(
j
,
1
)
<
ytmp
)
continue
;
if
(
x
(
j
,
1
)
==
ytmp
)
{
if
(
x
(
j
,
0
)
<
xtmp
)
continue
;
if
(
x
(
j
,
0
)
==
xtmp
&&
j
<=
i
)
continue
;
}
}
}
if
(
exclude
&&
exclusion
(
i
,
j
,
itype
,
jtype
))
continue
;
const
X_FLOAT
delx
=
xtmp
-
other_x
[
m
];
const
X_FLOAT
dely
=
ytmp
-
other_x
[
m
+
atoms_per_bin
];
const
X_FLOAT
delz
=
ztmp
-
other_x
[
m
+
2
*
atoms_per_bin
];
const
X_FLOAT
rsq
=
delx
*
delx
+
dely
*
dely
+
delz
*
delz
;
if
(
rsq
<=
cutneighsq
(
itype
,
jtype
))
{
if
(
molecular
)
{
int
which
=
0
;
if
(
!
moltemplate
)
which
=
NeighborKokkosExecute
<
DeviceType
>::
find_special
(
i
,
j
);
/* else if (imol >= 0) */
/* which = find_special(onemols[imol]->special[iatom], */
/* onemols[imol]->nspecial[iatom], */
/* tag[j]-tagprev); */
/* else which = 0; */
if
(
which
==
0
){
if
(
n
<
neigh_list
.
maxneighs
)
neighbors_i
(
n
++
)
=
j
;
else
n
++
;
}
else
if
(
minimum_image_check
(
delx
,
dely
,
delz
)){
if
(
n
<
neigh_list
.
maxneighs
)
neighbors_i
(
n
++
)
=
j
;
else
n
++
;
}
else
if
(
which
>
0
)
{
if
(
n
<
neigh_list
.
maxneighs
)
neighbors_i
(
n
++
)
=
j
^
(
which
<<
SBBITS
);
else
n
++
;
}
}
else
{
if
(
n
<
neigh_list
.
maxneighs
)
neighbors_i
(
n
++
)
=
j
;
else
n
++
;
}
}
}
}
__syncthreads
();
const
typename
ArrayTypes
<
DeviceType
>::
t_int_1d_const_um
stencil
=
d_stencil
;
for
(
int
k
=
0
;
k
<
nstencil
;
k
++
)
{
const
int
jbin
=
ibin
+
stencil
[
k
];
if
(
ibin
==
jbin
)
continue
;
bincount_current
=
c_bincount
[
jbin
];
int
j
=
MY_II
<
bincount_current
?
c_bins
(
jbin
,
MY_II
)
:
-
1
;
if
(
j
>=
0
)
{
other_x
[
MY_II
]
=
x
(
j
,
0
);
other_x
[
MY_II
+
atoms_per_bin
]
=
x
(
j
,
1
);
other_x
[
MY_II
+
2
*
atoms_per_bin
]
=
x
(
j
,
2
);
other_x
[
MY_II
+
3
*
atoms_per_bin
]
=
type
(
j
);
}
other_id
[
MY_II
]
=
j
;
__syncthreads
();
if
(
i
>=
0
&&
i
<
nlocal
)
{
#pragma unroll 8
for
(
int
m
=
0
;
m
<
bincount_current
;
m
++
)
{
const
int
j
=
other_id
[
m
];
const
int
jtype
=
other_x
[
m
+
3
*
atoms_per_bin
];
//if(HalfNeigh && (j < i)) continue;
if
(
HalfNeigh
&&
!
Newton
&&
(
j
<
i
))
continue
;
if
(
!
HalfNeigh
&&
j
==
i
)
continue
;
if
(
Tri
)
{
if
(
x
(
j
,
2
)
<
ztmp
)
continue
;
if
(
x
(
j
,
2
)
==
ztmp
)
{
if
(
x
(
j
,
1
)
<
ytmp
)
continue
;
if
(
x
(
j
,
1
)
==
ytmp
)
{
if
(
x
(
j
,
0
)
<
xtmp
)
continue
;
if
(
x
(
j
,
0
)
==
xtmp
&&
j
<=
i
)
continue
;
}
}
}
if
(
exclude
&&
exclusion
(
i
,
j
,
itype
,
jtype
))
continue
;
const
X_FLOAT
delx
=
xtmp
-
other_x
[
m
];
const
X_FLOAT
dely
=
ytmp
-
other_x
[
m
+
atoms_per_bin
];
const
X_FLOAT
delz
=
ztmp
-
other_x
[
m
+
2
*
atoms_per_bin
];
const
X_FLOAT
rsq
=
delx
*
delx
+
dely
*
dely
+
delz
*
delz
;
if
(
rsq
<=
cutneighsq
(
itype
,
jtype
))
{
if
(
molecular
)
{
int
which
=
0
;
if
(
!
moltemplate
)
which
=
NeighborKokkosExecute
<
DeviceType
>::
find_special
(
i
,
j
);
/* else if (imol >= 0) */
/* which = find_special(onemols[imol]->special[iatom], */
/* onemols[imol]->nspecial[iatom], */
/* tag[j]-tagprev); */
/* else which = 0; */
if
(
which
==
0
){
if
(
n
<
neigh_list
.
maxneighs
)
neighbors_i
(
n
++
)
=
j
;
else
n
++
;
}
else
if
(
minimum_image_check
(
delx
,
dely
,
delz
)){
if
(
n
<
neigh_list
.
maxneighs
)
neighbors_i
(
n
++
)
=
j
;
else
n
++
;
}
else
if
(
which
>
0
)
{
if
(
n
<
neigh_list
.
maxneighs
)
neighbors_i
(
n
++
)
=
j
^
(
which
<<
SBBITS
);
else
n
++
;
}
}
else
{
if
(
n
<
neigh_list
.
maxneighs
)
neighbors_i
(
n
++
)
=
j
;
else
n
++
;
}
}
}
}
__syncthreads
();
}
if
(
i
>=
0
&&
i
<
nlocal
)
{
neigh_list
.
d_numneigh
(
i
)
=
n
;
neigh_list
.
d_ilist
(
i
)
=
i
;
}
if
(
n
>=
neigh_list
.
maxneighs
)
{
resize
()
=
1
;
if
(
n
>=
new_maxneighs
())
new_maxneighs
()
=
n
;
}
}
}
#endif
/* ---------------------------------------------------------------------- */
template
<
class
DeviceType
>
template
<
int
HalfNeigh
>
void
NeighborKokkosExecute
<
DeviceType
>::
build_Item_Ghost
(
const
int
&
i
)
const
{
/* if necessary, goto next page and add pages */
int
n
=
0
;
int
which
=
0
;
int
moltemplate
;
if
(
molecular
==
2
)
moltemplate
=
1
;
else
moltemplate
=
0
;
// get subview of neighbors of i
const
AtomNeighbors
neighbors_i
=
neigh_list
.
get_neighbors
(
i
);
const
X_FLOAT
xtmp
=
x
(
i
,
0
);
const
X_FLOAT
ytmp
=
x
(
i
,
1
);
const
X_FLOAT
ztmp
=
x
(
i
,
2
);
const
int
itype
=
type
(
i
);
const
typename
ArrayTypes
<
DeviceType
>::
t_int_1d_const_um
stencil
=
d_stencil
;
const
typename
ArrayTypes
<
DeviceType
>::
t_int_1d_3_const_um
stencilxyz
=
d_stencilxyz
;
// loop over all atoms in surrounding bins in stencil including self
// when i is a ghost atom, must check if stencil bin is out of bounds
// skip i = j
// no molecular test when i = ghost atom
if
(
i
<
nlocal
)
{
const
int
ibin
=
coord2bin
(
xtmp
,
ytmp
,
ztmp
);
for
(
int
k
=
0
;
k
<
nstencil
;
k
++
)
{
const
int
jbin
=
ibin
+
stencil
[
k
];
for
(
int
m
=
0
;
m
<
c_bincount
(
jbin
);
m
++
)
{
const
int
j
=
c_bins
(
jbin
,
m
);
if
(
HalfNeigh
&&
j
<=
i
)
continue
;
else
if
(
j
==
i
)
continue
;
const
int
jtype
=
type
[
j
];
if
(
exclude
&&
exclusion
(
i
,
j
,
itype
,
jtype
))
continue
;
const
X_FLOAT
delx
=
xtmp
-
x
(
j
,
0
);
const
X_FLOAT
dely
=
ytmp
-
x
(
j
,
1
);
const
X_FLOAT
delz
=
ztmp
-
x
(
j
,
2
);
const
X_FLOAT
rsq
=
delx
*
delx
+
dely
*
dely
+
delz
*
delz
;
if
(
rsq
<=
cutneighsq
(
itype
,
jtype
))
{
if
(
molecular
)
{
if
(
!
moltemplate
)
which
=
find_special
(
i
,
j
);
/* else if (imol >= 0) */
/* which = find_special(onemols[imol]->special[iatom], */
/* onemols[imol]->nspecial[iatom], */
/* tag[j]-tagprev); */
/* else which = 0; */
if
(
which
==
0
){
if
(
n
<
neigh_list
.
maxneighs
)
neighbors_i
(
n
++
)
=
j
;
else
n
++
;
}
else
if
(
minimum_image_check
(
delx
,
dely
,
delz
)){
if
(
n
<
neigh_list
.
maxneighs
)
neighbors_i
(
n
++
)
=
j
;
else
n
++
;
}
else
if
(
which
>
0
)
{
if
(
n
<
neigh_list
.
maxneighs
)
neighbors_i
(
n
++
)
=
j
^
(
which
<<
SBBITS
);
else
n
++
;
}
}
else
{
if
(
n
<
neigh_list
.
maxneighs
)
neighbors_i
(
n
++
)
=
j
;
else
n
++
;
}
}
}
}
}
else
{
int
binxyz
[
3
];
const
int
ibin
=
coord2bin
(
xtmp
,
ytmp
,
ztmp
,
binxyz
);
const
int
xbin
=
binxyz
[
0
];
const
int
ybin
=
binxyz
[
1
];
const
int
zbin
=
binxyz
[
2
];
for
(
int
k
=
0
;
k
<
nstencil
;
k
++
)
{
const
int
xbin2
=
xbin
+
stencilxyz
(
k
,
0
);
const
int
ybin2
=
ybin
+
stencilxyz
(
k
,
1
);
const
int
zbin2
=
zbin
+
stencilxyz
(
k
,
2
);
if
(
xbin2
<
0
||
xbin2
>=
mbinx
||
ybin2
<
0
||
ybin2
>=
mbiny
||
zbin2
<
0
||
zbin2
>=
mbinz
)
continue
;
const
int
jbin
=
ibin
+
stencil
[
k
];
for
(
int
m
=
0
;
m
<
c_bincount
(
jbin
);
m
++
)
{
const
int
j
=
c_bins
(
jbin
,
m
);
if
(
HalfNeigh
&&
j
<=
i
)
continue
;
else
if
(
j
==
i
)
continue
;
const
int
jtype
=
type
[
j
];
if
(
exclude
&&
exclusion
(
i
,
j
,
itype
,
jtype
))
continue
;
const
X_FLOAT
delx
=
xtmp
-
x
(
j
,
0
);
const
X_FLOAT
dely
=
ytmp
-
x
(
j
,
1
);
const
X_FLOAT
delz
=
ztmp
-
x
(
j
,
2
);
const
X_FLOAT
rsq
=
delx
*
delx
+
dely
*
dely
+
delz
*
delz
;
if
(
rsq
<=
cutneighsq
(
itype
,
jtype
))
{
if
(
n
<
neigh_list
.
maxneighs
)
neighbors_i
(
n
++
)
=
j
;
else
n
++
;
}
}
}
}
neigh_list
.
d_numneigh
(
i
)
=
n
;
if
(
n
>=
neigh_list
.
maxneighs
)
{
resize
()
=
1
;
if
(
n
>=
new_maxneighs
())
new_maxneighs
()
=
n
;
}
neigh_list
.
d_ilist
(
i
)
=
i
;
}
}
namespace
LAMMPS_NS
{
template
class
NPairKokkos
<
LMPDeviceType
,
0
,
0
,
0
>
;
template
class
NPairKokkos
<
LMPDeviceType
,
0
,
1
,
0
>
;
template
class
NPairKokkos
<
LMPDeviceType
,
1
,
0
,
0
>
;
template
class
NPairKokkos
<
LMPDeviceType
,
1
,
1
,
0
>
;
template
class
NPairKokkos
<
LMPDeviceType
,
1
,
0
,
1
>
;
#ifdef KOKKOS_HAVE_CUDA
template
class
NPairKokkos
<
LMPHostType
,
0
,
0
,
0
>
;
template
class
NPairKokkos
<
LMPHostType
,
0
,
1
,
0
>
;
template
class
NPairKokkos
<
LMPHostType
,
1
,
0
,
0
>
;
template
class
NPairKokkos
<
LMPHostType
,
1
,
1
,
0
>
;
template
class
NPairKokkos
<
LMPHostType
,
1
,
0
,
1
>
;
#endif
}
Event Timeline
Log In to Comment