Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F86076885
comm_kokkos.cpp
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Fri, Oct 4, 03:05
Size
33 KB
Mime Type
text/x-c
Expires
Sun, Oct 6, 03:05 (2 d)
Engine
blob
Format
Raw Data
Handle
21341872
Attached To
rLAMMPS lammps
comm_kokkos.cpp
View Options
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include "comm_kokkos.h"
#include "kokkos.h"
#include "atom.h"
#include "atom_kokkos.h"
#include "atom_vec.h"
#include "atom_vec_kokkos.h"
#include "domain.h"
#include "atom_masks.h"
#include "error.h"
#include "memory.h"
#include "force.h"
#include "pair.h"
#include "fix.h"
#include "compute.h"
#include "dump.h"
#include "output.h"
#include "modify.h"
using
namespace
LAMMPS_NS
;
#define BUFFACTOR 1.5
#define BUFMIN 10000
#define BUFEXTRA 1000
enum
{
SINGLE
,
MULTI
};
/* ----------------------------------------------------------------------
setup MPI and allocate buffer space
------------------------------------------------------------------------- */
CommKokkos
::
CommKokkos
(
LAMMPS
*
lmp
)
:
CommBrick
(
lmp
)
{
if
(
sendlist
)
for
(
int
i
=
0
;
i
<
maxswap
;
i
++
)
memory
->
destroy
(
sendlist
[
i
]);
memory
->
sfree
(
sendlist
);
sendlist
=
NULL
;
k_sendlist
=
ArrayTypes
<
LMPDeviceType
>::
tdual_int_2d
();
// error check for disallow of OpenMP threads?
// initialize comm buffers & exchange memory
memory
->
destroy
(
buf_send
);
buf_send
=
NULL
;
memory
->
destroy
(
buf_recv
);
buf_recv
=
NULL
;
k_exchange_sendlist
=
ArrayTypes
<
LMPDeviceType
>::
tdual_int_1d
(
"comm:k_exchange_sendlist"
,
100
);
k_exchange_copylist
=
ArrayTypes
<
LMPDeviceType
>::
tdual_int_1d
(
"comm:k_exchange_copylist"
,
100
);
k_count
=
ArrayTypes
<
LMPDeviceType
>::
tdual_int_1d
(
"comm:k_count"
,
1
);
k_sendflag
=
ArrayTypes
<
LMPDeviceType
>::
tdual_int_1d
(
"comm:k_sendflag"
,
100
);
memory
->
destroy
(
maxsendlist
);
maxsendlist
=
NULL
;
memory
->
create
(
maxsendlist
,
maxswap
,
"comm:maxsendlist"
);
for
(
int
i
=
0
;
i
<
maxswap
;
i
++
)
{
maxsendlist
[
i
]
=
BUFMIN
;
}
memory
->
create_kokkos
(
k_sendlist
,
sendlist
,
maxswap
,
BUFMIN
,
"comm:sendlist"
);
max_buf_pair
=
0
;
k_buf_send_pair
=
DAT
::
tdual_xfloat_1d
(
"comm:k_buf_send_pair"
,
1
);
k_buf_recv_pair
=
DAT
::
tdual_xfloat_1d
(
"comm:k_recv_send_pair"
,
1
);
}
/* ---------------------------------------------------------------------- */
CommKokkos
::~
CommKokkos
()
{
memory
->
destroy_kokkos
(
k_sendlist
,
sendlist
);
sendlist
=
NULL
;
memory
->
destroy_kokkos
(
k_buf_send
,
buf_send
);
buf_send
=
NULL
;
memory
->
destroy_kokkos
(
k_buf_recv
,
buf_recv
);
buf_recv
=
NULL
;
}
/* ---------------------------------------------------------------------- */
void
CommKokkos
::
init
()
{
maxsend
=
BUFMIN
;
maxrecv
=
BUFMIN
;
grow_send_kokkos
(
maxsend
+
bufextra
,
0
,
Host
);
grow_recv_kokkos
(
maxrecv
,
Host
);
atomKK
=
(
AtomKokkos
*
)
atom
;
exchange_comm_classic
=
lmp
->
kokkos
->
exchange_comm_classic
;
forward_comm_classic
=
lmp
->
kokkos
->
forward_comm_classic
;
exchange_comm_on_host
=
lmp
->
kokkos
->
exchange_comm_on_host
;
forward_comm_on_host
=
lmp
->
kokkos
->
forward_comm_on_host
;
CommBrick
::
init
();
int
check_forward
=
0
;
int
check_reverse
=
0
;
if
(
force
->
pair
&&
(
force
->
pair
->
execution_space
==
Host
))
check_forward
+=
force
->
pair
->
comm_forward
;
if
(
force
->
pair
&&
(
force
->
pair
->
execution_space
==
Host
))
check_reverse
+=
force
->
pair
->
comm_reverse
;
for
(
int
i
=
0
;
i
<
modify
->
nfix
;
i
++
)
{
check_forward
+=
modify
->
fix
[
i
]
->
comm_forward
;
check_reverse
+=
modify
->
fix
[
i
]
->
comm_reverse
;
}
for
(
int
i
=
0
;
i
<
modify
->
ncompute
;
i
++
)
{
check_forward
+=
modify
->
compute
[
i
]
->
comm_forward
;
check_reverse
+=
modify
->
compute
[
i
]
->
comm_reverse
;
}
for
(
int
i
=
0
;
i
<
output
->
ndump
;
i
++
)
{
check_forward
+=
output
->
dump
[
i
]
->
comm_forward
;
check_reverse
+=
output
->
dump
[
i
]
->
comm_reverse
;
}
if
(
force
->
newton
==
0
)
check_reverse
=
0
;
if
(
force
->
pair
)
check_reverse
+=
force
->
pair
->
comm_reverse_off
;
if
(
check_reverse
||
check_forward
)
forward_comm_classic
=
true
;
}
/* ----------------------------------------------------------------------
forward communication of atom coords every timestep
other per-atom attributes may also be sent via pack/unpack routines
------------------------------------------------------------------------- */
void
CommKokkos
::
forward_comm
(
int
dummy
)
{
if
(
!
forward_comm_classic
)
{
if
(
forward_comm_on_host
)
forward_comm_device
<
LMPHostType
>
(
dummy
);
else
forward_comm_device
<
LMPDeviceType
>
(
dummy
);
return
;
}
k_sendlist
.
sync
<
LMPHostType
>
();
if
(
comm_x_only
)
{
atomKK
->
sync
(
Host
,
X_MASK
);
atomKK
->
modified
(
Host
,
X_MASK
);
}
else
if
(
ghost_velocity
)
{
atomKK
->
sync
(
Host
,
X_MASK
|
V_MASK
);
atomKK
->
modified
(
Host
,
X_MASK
|
V_MASK
);
}
else
{
atomKK
->
sync
(
Host
,
ALL_MASK
);
atomKK
->
modified
(
Host
,
ALL_MASK
);
}
CommBrick
::
forward_comm
(
dummy
);
}
/* ---------------------------------------------------------------------- */
template
<
class
DeviceType
>
void
CommKokkos
::
forward_comm_device
(
int
dummy
)
{
int
n
;
MPI_Request
request
;
AtomVecKokkos
*
avec
=
(
AtomVecKokkos
*
)
atom
->
avec
;
double
**
x
=
atom
->
x
;
double
*
buf
;
// exchange data with another proc
// if other proc is self, just copy
// if comm_x_only set, exchange or copy directly to x, don't unpack
k_sendlist
.
sync
<
DeviceType
>
();
for
(
int
iswap
=
0
;
iswap
<
nswap
;
iswap
++
)
{
if
(
sendproc
[
iswap
]
!=
me
)
{
if
(
comm_x_only
)
{
atomKK
->
sync
(
ExecutionSpaceFromDevice
<
DeviceType
>::
space
,
X_MASK
);
if
(
size_forward_recv
[
iswap
])
buf
=
x
[
firstrecv
[
iswap
]];
else
buf
=
NULL
;
if
(
size_forward_recv
[
iswap
])
{
buf
=
atomKK
->
k_x
.
view
<
DeviceType
>
().
ptr_on_device
()
+
firstrecv
[
iswap
]
*
atomKK
->
k_x
.
view
<
DeviceType
>
().
dimension_1
();
MPI_Irecv
(
buf
,
size_forward_recv
[
iswap
],
MPI_DOUBLE
,
recvproc
[
iswap
],
0
,
world
,
&
request
);
}
n
=
avec
->
pack_comm_kokkos
(
sendnum
[
iswap
],
k_sendlist
,
iswap
,
k_buf_send
,
pbc_flag
[
iswap
],
pbc
[
iswap
]);
if
(
n
)
{
MPI_Send
(
k_buf_send
.
view
<
DeviceType
>
().
ptr_on_device
(),
n
,
MPI_DOUBLE
,
sendproc
[
iswap
],
0
,
world
);
}
if
(
size_forward_recv
[
iswap
])
MPI_Wait
(
&
request
,
MPI_STATUS_IGNORE
);
atomKK
->
modified
(
ExecutionSpaceFromDevice
<
DeviceType
>::
space
,
X_MASK
);
}
else
if
(
ghost_velocity
)
{
error
->
all
(
FLERR
,
"Ghost velocity forward comm not yet "
"implemented with Kokkos"
);
if
(
size_forward_recv
[
iswap
])
MPI_Irecv
(
k_buf_recv
.
view
<
LMPHostType
>
().
ptr_on_device
(),
size_forward_recv
[
iswap
],
MPI_DOUBLE
,
recvproc
[
iswap
],
0
,
world
,
&
request
);
n
=
avec
->
pack_comm_vel
(
sendnum
[
iswap
],
sendlist
[
iswap
],
buf_send
,
pbc_flag
[
iswap
],
pbc
[
iswap
]);
if
(
n
)
MPI_Send
(
buf_send
,
n
,
MPI_DOUBLE
,
sendproc
[
iswap
],
0
,
world
);
if
(
size_forward_recv
[
iswap
])
MPI_Wait
(
&
request
,
MPI_STATUS_IGNORE
);
avec
->
unpack_comm_vel
(
recvnum
[
iswap
],
firstrecv
[
iswap
],
buf_recv
);
}
else
{
if
(
size_forward_recv
[
iswap
])
MPI_Irecv
(
k_buf_recv
.
view
<
DeviceType
>
().
ptr_on_device
(),
size_forward_recv
[
iswap
],
MPI_DOUBLE
,
recvproc
[
iswap
],
0
,
world
,
&
request
);
n
=
avec
->
pack_comm_kokkos
(
sendnum
[
iswap
],
k_sendlist
,
iswap
,
k_buf_send
,
pbc_flag
[
iswap
],
pbc
[
iswap
]);
if
(
n
)
MPI_Send
(
k_buf_send
.
view
<
DeviceType
>
().
ptr_on_device
(),
n
,
MPI_DOUBLE
,
sendproc
[
iswap
],
0
,
world
);
if
(
size_forward_recv
[
iswap
])
MPI_Wait
(
&
request
,
MPI_STATUS_IGNORE
);
avec
->
unpack_comm_kokkos
(
recvnum
[
iswap
],
firstrecv
[
iswap
],
k_buf_recv
);
}
}
else
{
if
(
!
ghost_velocity
)
{
if
(
sendnum
[
iswap
])
n
=
avec
->
pack_comm_self
(
sendnum
[
iswap
],
k_sendlist
,
iswap
,
firstrecv
[
iswap
],
pbc_flag
[
iswap
],
pbc
[
iswap
]);
}
else
if
(
ghost_velocity
)
{
error
->
all
(
FLERR
,
"Ghost velocity forward comm not yet "
"implemented with Kokkos"
);
n
=
avec
->
pack_comm_vel
(
sendnum
[
iswap
],
sendlist
[
iswap
],
buf_send
,
pbc_flag
[
iswap
],
pbc
[
iswap
]);
avec
->
unpack_comm_vel
(
recvnum
[
iswap
],
firstrecv
[
iswap
],
buf_send
);
}
}
}
}
void
CommKokkos
::
reverse_comm
()
{
k_sendlist
.
sync
<
LMPHostType
>
();
if
(
comm_f_only
)
atomKK
->
sync
(
Host
,
F_MASK
);
else
atomKK
->
sync
(
Host
,
ALL_MASK
);
CommBrick
::
reverse_comm
();
if
(
comm_f_only
)
atomKK
->
modified
(
Host
,
F_MASK
);
else
atomKK
->
modified
(
Host
,
ALL_MASK
);
atomKK
->
sync
(
Device
,
ALL_MASK
);
}
void
CommKokkos
::
forward_comm_fix
(
Fix
*
fix
,
int
size
)
{
k_sendlist
.
sync
<
LMPHostType
>
();
CommBrick
::
forward_comm_fix
(
fix
,
size
);
}
void
CommKokkos
::
reverse_comm_fix
(
Fix
*
fix
,
int
size
)
{
k_sendlist
.
sync
<
LMPHostType
>
();
CommBrick
::
reverse_comm_fix
(
fix
,
size
);
}
void
CommKokkos
::
forward_comm_compute
(
Compute
*
compute
)
{
k_sendlist
.
sync
<
LMPHostType
>
();
CommBrick
::
forward_comm_compute
(
compute
);
}
void
CommKokkos
::
reverse_comm_compute
(
Compute
*
compute
)
{
k_sendlist
.
sync
<
LMPHostType
>
();
CommBrick
::
reverse_comm_compute
(
compute
);
}
void
CommKokkos
::
forward_comm_pair
(
Pair
*
pair
)
{
if
(
pair
->
execution_space
==
Host
)
{
k_sendlist
.
sync
<
LMPHostType
>
();
CommBrick
::
forward_comm_pair
(
pair
);
}
else
if
(
pair
->
execution_space
==
Device
)
{
k_sendlist
.
sync
<
LMPDeviceType
>
();
forward_comm_pair_device
<
LMPDeviceType
>
(
pair
);
}
}
template
<
class
DeviceType
>
void
CommKokkos
::
forward_comm_pair_device
(
Pair
*
pair
)
{
int
iswap
,
n
;
MPI_Request
request
;
int
nsize
=
pair
->
comm_forward
;
for
(
iswap
=
0
;
iswap
<
nswap
;
iswap
++
)
{
int
n
=
MAX
(
max_buf_pair
,
nsize
*
sendnum
[
iswap
]);
n
=
MAX
(
n
,
nsize
*
recvnum
[
iswap
]);
if
(
n
>
max_buf_pair
)
grow_buf_pair
(
n
);
}
for
(
iswap
=
0
;
iswap
<
nswap
;
iswap
++
)
{
// pack buffer
n
=
pair
->
pack_forward_comm_kokkos
(
sendnum
[
iswap
],
k_sendlist
,
iswap
,
k_buf_send_pair
,
pbc_flag
[
iswap
],
pbc
[
iswap
]);
// exchange with another proc
// if self, set recv buffer to send buffer
if
(
sendproc
[
iswap
]
!=
me
)
{
if
(
recvnum
[
iswap
])
MPI_Irecv
(
k_buf_recv_pair
.
view
<
DeviceType
>
().
ptr_on_device
(),
nsize
*
recvnum
[
iswap
],
MPI_DOUBLE
,
recvproc
[
iswap
],
0
,
world
,
&
request
);
if
(
sendnum
[
iswap
])
MPI_Send
(
k_buf_send_pair
.
view
<
DeviceType
>
().
ptr_on_device
(),
n
,
MPI_DOUBLE
,
sendproc
[
iswap
],
0
,
world
);
if
(
recvnum
[
iswap
])
MPI_Wait
(
&
request
,
MPI_STATUS_IGNORE
);
}
else
k_buf_recv_pair
=
k_buf_send_pair
;
// unpack buffer
pair
->
unpack_forward_comm_kokkos
(
recvnum
[
iswap
],
firstrecv
[
iswap
],
k_buf_recv_pair
);
}
}
void
CommKokkos
::
grow_buf_pair
(
int
n
)
{
max_buf_pair
=
n
*
BUFFACTOR
;
k_buf_send_pair
.
resize
(
max_buf_pair
);
k_buf_recv_pair
.
resize
(
max_buf_pair
);
}
void
CommKokkos
::
reverse_comm_pair
(
Pair
*
pair
)
{
k_sendlist
.
sync
<
LMPHostType
>
();
CommBrick
::
reverse_comm_pair
(
pair
);
}
void
CommKokkos
::
forward_comm_dump
(
Dump
*
dump
)
{
k_sendlist
.
sync
<
LMPHostType
>
();
CommBrick
::
forward_comm_dump
(
dump
);
}
void
CommKokkos
::
reverse_comm_dump
(
Dump
*
dump
)
{
k_sendlist
.
sync
<
LMPHostType
>
();
CommBrick
::
reverse_comm_dump
(
dump
);
}
/* ----------------------------------------------------------------------
exchange: move atoms to correct processors
atoms exchanged with all 6 stencil neighbors
send out atoms that have left my box, receive ones entering my box
atoms will be lost if not inside some proc's box
can happen if atom moves outside of non-periodic bounary
or if atom moves more than one proc away
this routine called before every reneighboring
for triclinic, atoms must be in lamda coords (0-1) before exchange is called
------------------------------------------------------------------------- */
void
CommKokkos
::
exchange
()
{
if
(
atom
->
nextra_grow
+
atom
->
nextra_border
)
{
if
(
!
exchange_comm_classic
)
{
static
int
print
=
1
;
if
(
print
&&
comm
->
me
==
0
)
{
error
->
warning
(
FLERR
,
"Fixes cannot send data in Kokkos communication, "
"switching to classic communication"
);
}
print
=
0
;
exchange_comm_classic
=
true
;
}
}
if
(
!
exchange_comm_classic
)
{
if
(
exchange_comm_on_host
)
exchange_device
<
LMPHostType
>
();
else
exchange_device
<
LMPDeviceType
>
();
return
;
}
atomKK
->
sync
(
Host
,
ALL_MASK
);
atomKK
->
modified
(
Host
,
ALL_MASK
);
CommBrick
::
exchange
();
}
/* ---------------------------------------------------------------------- */
template
<
class
DeviceType
>
struct
BuildExchangeListFunctor
{
typedef
DeviceType
device_type
;
typedef
ArrayTypes
<
DeviceType
>
AT
;
X_FLOAT
_lo
,
_hi
;
typename
AT
::
t_x_array
_x
;
int
_nlocal
,
_dim
;
typename
AT
::
t_int_1d
_nsend
;
typename
AT
::
t_int_1d
_sendlist
;
typename
AT
::
t_int_1d
_sendflag
;
BuildExchangeListFunctor
(
const
typename
AT
::
tdual_x_array
x
,
const
typename
AT
::
tdual_int_1d
sendlist
,
typename
AT
::
tdual_int_1d
nsend
,
typename
AT
::
tdual_int_1d
sendflag
,
int
nlocal
,
int
dim
,
X_FLOAT
lo
,
X_FLOAT
hi
)
:
_x
(
x
.
template
view
<
DeviceType
>
()),
_sendlist
(
sendlist
.
template
view
<
DeviceType
>
()),
_nsend
(
nsend
.
template
view
<
DeviceType
>
()),
_sendflag
(
sendflag
.
template
view
<
DeviceType
>
()),
_nlocal
(
nlocal
),
_dim
(
dim
),
_lo
(
lo
),
_hi
(
hi
){
}
KOKKOS_INLINE_FUNCTION
void
operator
()
(
int
i
)
const
{
if
(
_x
(
i
,
_dim
)
<
_lo
||
_x
(
i
,
_dim
)
>=
_hi
)
{
const
int
mysend
=
Kokkos
::
atomic_fetch_add
(
&
_nsend
(
0
),
1
);
if
(
mysend
<
_sendlist
.
dimension_0
())
{
_sendlist
(
mysend
)
=
i
;
_sendflag
(
i
)
=
1
;
}
}
else
_sendflag
(
i
)
=
0
;
}
};
/* ---------------------------------------------------------------------- */
template
<
class
DeviceType
>
void
CommKokkos
::
exchange_device
()
{
int
i
,
nsend
,
nrecv
,
nrecv1
,
nrecv2
,
nlocal
;
double
lo
,
hi
;
double
**
x
;
double
*
sublo
,
*
subhi
;
MPI_Request
request
;
AtomVecKokkos
*
avec
=
(
AtomVecKokkos
*
)
atom
->
avec
;
// clear global->local map for owned and ghost atoms
// b/c atoms migrate to new procs in exchange() and
// new ghosts are created in borders()
// map_set() is done at end of borders()
// clear ghost count and any ghost bonus data internal to AtomVec
if
(
map_style
)
atom
->
map_clear
();
atom
->
nghost
=
0
;
atom
->
avec
->
clear_bonus
();
// subbox bounds for orthogonal or triclinic
if
(
triclinic
==
0
)
{
sublo
=
domain
->
sublo
;
subhi
=
domain
->
subhi
;
}
else
{
sublo
=
domain
->
sublo_lamda
;
subhi
=
domain
->
subhi_lamda
;
}
atomKK
->
sync
(
ExecutionSpaceFromDevice
<
DeviceType
>::
space
,
ALL_MASK
);
// loop over dimensions
for
(
int
dim
=
0
;
dim
<
3
;
dim
++
)
{
// fill buffer with atoms leaving my box, using < and >=
// when atom is deleted, fill it in with last atom
x
=
atom
->
x
;
lo
=
sublo
[
dim
];
hi
=
subhi
[
dim
];
nlocal
=
atom
->
nlocal
;
i
=
nsend
=
0
;
if
(
true
)
{
if
(
k_sendflag
.
h_view
.
dimension_0
()
<
nlocal
)
k_sendflag
.
resize
(
nlocal
);
k_sendflag
.
sync
<
DeviceType
>
();
k_count
.
h_view
(
0
)
=
k_exchange_sendlist
.
h_view
.
dimension_0
();
while
(
k_count
.
h_view
(
0
)
>=
k_exchange_sendlist
.
h_view
.
dimension_0
())
{
k_count
.
h_view
(
0
)
=
0
;
k_count
.
modify
<
LMPHostType
>
();
k_count
.
sync
<
DeviceType
>
();
BuildExchangeListFunctor
<
DeviceType
>
f
(
atomKK
->
k_x
,
k_exchange_sendlist
,
k_count
,
k_sendflag
,
nlocal
,
dim
,
lo
,
hi
);
Kokkos
::
parallel_for
(
nlocal
,
f
);
k_exchange_sendlist
.
modify
<
DeviceType
>
();
k_sendflag
.
modify
<
DeviceType
>
();
k_count
.
modify
<
DeviceType
>
();
k_count
.
sync
<
LMPHostType
>
();
if
(
k_count
.
h_view
(
0
)
>=
k_exchange_sendlist
.
h_view
.
dimension_0
())
{
k_exchange_sendlist
.
resize
(
k_count
.
h_view
(
0
)
*
1.1
);
k_exchange_copylist
.
resize
(
k_count
.
h_view
(
0
)
*
1.1
);
k_count
.
h_view
(
0
)
=
k_exchange_sendlist
.
h_view
.
dimension_0
();
}
}
k_exchange_copylist
.
sync
<
LMPHostType
>
();
k_exchange_sendlist
.
sync
<
LMPHostType
>
();
k_sendflag
.
sync
<
LMPHostType
>
();
int
sendpos
=
nlocal
-
1
;
nlocal
-=
k_count
.
h_view
(
0
);
for
(
int
i
=
0
;
i
<
k_count
.
h_view
(
0
);
i
++
)
{
if
(
k_exchange_sendlist
.
h_view
(
i
)
<
nlocal
)
{
while
(
k_sendflag
.
h_view
(
sendpos
))
sendpos
--
;
k_exchange_copylist
.
h_view
(
i
)
=
sendpos
;
sendpos
--
;
}
else
k_exchange_copylist
.
h_view
(
i
)
=
-
1
;
}
k_exchange_copylist
.
modify
<
LMPHostType
>
();
k_exchange_copylist
.
sync
<
DeviceType
>
();
nsend
=
k_count
.
h_view
(
0
);
if
(
nsend
>
maxsend
)
grow_send_kokkos
(
nsend
,
1
);
nsend
=
avec
->
pack_exchange_kokkos
(
k_count
.
h_view
(
0
),
k_buf_send
,
k_exchange_sendlist
,
k_exchange_copylist
,
ExecutionSpaceFromDevice
<
DeviceType
>::
space
,
dim
,
lo
,
hi
);
}
else
{
while
(
i
<
nlocal
)
{
if
(
x
[
i
][
dim
]
<
lo
||
x
[
i
][
dim
]
>=
hi
)
{
if
(
nsend
>
maxsend
)
grow_send_kokkos
(
nsend
,
1
);
nsend
+=
avec
->
pack_exchange
(
i
,
&
buf_send
[
nsend
]);
avec
->
copy
(
nlocal
-
1
,
i
,
1
);
nlocal
--
;
}
else
i
++
;
}
}
atom
->
nlocal
=
nlocal
;
// send/recv atoms in both directions
// if 1 proc in dimension, no send/recv, set recv buf to send buf
// if 2 procs in dimension, single send/recv
// if more than 2 procs in dimension, send/recv to both neighbors
if
(
procgrid
[
dim
]
==
1
)
{
nrecv
=
nsend
;
if
(
nrecv
)
{
atom
->
nlocal
=
avec
->
unpack_exchange_kokkos
(
k_buf_send
,
nrecv
,
atom
->
nlocal
,
dim
,
lo
,
hi
,
ExecutionSpaceFromDevice
<
DeviceType
>::
space
);
}
}
else
{
MPI_Sendrecv
(
&
nsend
,
1
,
MPI_INT
,
procneigh
[
dim
][
0
],
0
,
&
nrecv1
,
1
,
MPI_INT
,
procneigh
[
dim
][
1
],
0
,
world
,
MPI_STATUS_IGNORE
);
nrecv
=
nrecv1
;
if
(
procgrid
[
dim
]
>
2
)
{
MPI_Sendrecv
(
&
nsend
,
1
,
MPI_INT
,
procneigh
[
dim
][
1
],
0
,
&
nrecv2
,
1
,
MPI_INT
,
procneigh
[
dim
][
0
],
0
,
world
,
MPI_STATUS_IGNORE
);
nrecv
+=
nrecv2
;
}
if
(
nrecv
>
maxrecv
)
grow_recv_kokkos
(
nrecv
);
MPI_Irecv
(
k_buf_recv
.
view
<
DeviceType
>
().
ptr_on_device
(),
nrecv1
,
MPI_DOUBLE
,
procneigh
[
dim
][
1
],
0
,
world
,
&
request
);
MPI_Send
(
k_buf_send
.
view
<
DeviceType
>
().
ptr_on_device
(),
nsend
,
MPI_DOUBLE
,
procneigh
[
dim
][
0
],
0
,
world
);
MPI_Wait
(
&
request
,
MPI_STATUS_IGNORE
);
if
(
procgrid
[
dim
]
>
2
)
{
MPI_Irecv
(
k_buf_recv
.
view
<
DeviceType
>
().
ptr_on_device
()
+
nrecv1
,
nrecv2
,
MPI_DOUBLE
,
procneigh
[
dim
][
0
],
0
,
world
,
&
request
);
MPI_Send
(
k_buf_send
.
view
<
DeviceType
>
().
ptr_on_device
(),
nsend
,
MPI_DOUBLE
,
procneigh
[
dim
][
1
],
0
,
world
);
MPI_Wait
(
&
request
,
MPI_STATUS_IGNORE
);
}
if
(
nrecv
)
{
atom
->
nlocal
=
avec
->
unpack_exchange_kokkos
(
k_buf_recv
,
nrecv
,
atom
->
nlocal
,
dim
,
lo
,
hi
,
ExecutionSpaceFromDevice
<
DeviceType
>::
space
);
}
}
// check incoming atoms to see if they are in my box
// if so, add to my list
}
atomKK
->
modified
(
ExecutionSpaceFromDevice
<
DeviceType
>::
space
,
ALL_MASK
);
if
(
atom
->
firstgroupname
)
{
/* this is not yet implemented with Kokkos */
atomKK
->
sync
(
Host
,
ALL_MASK
);
atom
->
first_reorder
();
atomKK
->
modified
(
Host
,
ALL_MASK
);
}
}
/* ----------------------------------------------------------------------
borders: list nearby atoms to send to neighboring procs at every timestep
one list is created for every swap that will be made
as list is made, actually do swaps
this does equivalent of a communicate, so don't need to explicitly
call communicate routine on reneighboring timestep
this routine is called before every reneighboring
for triclinic, atoms must be in lamda coords (0-1) before borders is called
------------------------------------------------------------------------- */
void
CommKokkos
::
borders
()
{
if
(
!
exchange_comm_classic
)
{
if
(
exchange_comm_on_host
)
borders_device
<
LMPHostType
>
();
else
borders_device
<
LMPDeviceType
>
();
return
;
}
atomKK
->
sync
(
Host
,
ALL_MASK
);
atomKK
->
modified
(
Host
,
ALL_MASK
);
k_sendlist
.
sync
<
LMPHostType
>
();
k_sendlist
.
modify
<
LMPHostType
>
();
CommBrick
::
borders
();
k_sendlist
.
modify
<
LMPHostType
>
();
atomKK
->
modified
(
Host
,
ALL_MASK
);
}
/* ---------------------------------------------------------------------- */
template
<
class
DeviceType
>
struct
BuildBorderListFunctor
{
typedef
DeviceType
device_type
;
typedef
ArrayTypes
<
DeviceType
>
AT
;
X_FLOAT
lo
,
hi
;
typename
AT
::
t_x_array
x
;
int
iswap
,
maxsendlist
;
int
nfirst
,
nlast
,
dim
;
typename
AT
::
t_int_2d
sendlist
;
typename
AT
::
t_int_1d
nsend
;
BuildBorderListFunctor
(
typename
AT
::
tdual_x_array
_x
,
typename
AT
::
tdual_int_2d
_sendlist
,
typename
AT
::
tdual_int_1d
_nsend
,
int
_nfirst
,
int
_nlast
,
int
_dim
,
X_FLOAT
_lo
,
X_FLOAT
_hi
,
int
_iswap
,
int
_maxsendlist
)
:
x
(
_x
.
template
view
<
DeviceType
>
()),
sendlist
(
_sendlist
.
template
view
<
DeviceType
>
()),
nsend
(
_nsend
.
template
view
<
DeviceType
>
()),
nfirst
(
_nfirst
),
nlast
(
_nlast
),
dim
(
_dim
),
lo
(
_lo
),
hi
(
_hi
),
iswap
(
_iswap
),
maxsendlist
(
_maxsendlist
){}
KOKKOS_INLINE_FUNCTION
void
operator
()
(
typename
Kokkos
::
TeamPolicy
<
DeviceType
>::
member_type
dev
)
const
{
const
int
chunk
=
((
nlast
-
nfirst
+
dev
.
league_size
()
-
1
)
/
dev
.
league_size
());
const
int
teamstart
=
chunk
*
dev
.
league_rank
()
+
nfirst
;
const
int
teamend
=
(
teamstart
+
chunk
)
<
nlast
?
(
teamstart
+
chunk
)
:
nlast
;
int
mysend
=
0
;
for
(
int
i
=
teamstart
+
dev
.
team_rank
();
i
<
teamend
;
i
+=
dev
.
team_size
())
{
if
(
x
(
i
,
dim
)
>=
lo
&&
x
(
i
,
dim
)
<=
hi
)
mysend
++
;
}
const
int
my_store_pos
=
dev
.
team_scan
(
mysend
,
&
nsend
(
0
));
if
(
my_store_pos
+
mysend
<
maxsendlist
)
{
mysend
=
my_store_pos
;
for
(
int
i
=
teamstart
+
dev
.
team_rank
();
i
<
teamend
;
i
+=
dev
.
team_size
()){
if
(
x
(
i
,
dim
)
>=
lo
&&
x
(
i
,
dim
)
<=
hi
)
{
sendlist
(
iswap
,
mysend
++
)
=
i
;
}
}
}
}
size_t
shmem_size
(
const
int
team_size
)
const
{
(
void
)
team_size
;
return
1000u
;}
};
/* ---------------------------------------------------------------------- */
template
<
class
DeviceType
>
void
CommKokkos
::
borders_device
()
{
int
i
,
n
,
itype
,
iswap
,
dim
,
ineed
,
twoneed
,
smax
,
rmax
;
int
nsend
,
nrecv
,
sendflag
,
nfirst
,
nlast
,
ngroup
;
double
lo
,
hi
;
int
*
type
;
double
**
x
;
double
*
buf
,
*
mlo
,
*
mhi
;
MPI_Request
request
;
AtomVecKokkos
*
avec
=
(
AtomVecKokkos
*
)
atom
->
avec
;
ExecutionSpace
exec_space
=
ExecutionSpaceFromDevice
<
DeviceType
>::
space
;
k_sendlist
.
modify
<
DeviceType
>
();
atomKK
->
sync
(
exec_space
,
ALL_MASK
);
// do swaps over all 3 dimensions
iswap
=
0
;
smax
=
rmax
=
0
;
for
(
dim
=
0
;
dim
<
3
;
dim
++
)
{
nlast
=
0
;
twoneed
=
2
*
maxneed
[
dim
];
for
(
ineed
=
0
;
ineed
<
twoneed
;
ineed
++
)
{
// find atoms within slab boundaries lo/hi using <= and >=
// check atoms between nfirst and nlast
// for first swaps in a dim, check owned and ghost
// for later swaps in a dim, only check newly arrived ghosts
// store sent atom indices in list for use in future timesteps
x
=
atom
->
x
;
if
(
style
==
SINGLE
)
{
lo
=
slablo
[
iswap
];
hi
=
slabhi
[
iswap
];
}
else
{
type
=
atom
->
type
;
mlo
=
multilo
[
iswap
];
mhi
=
multihi
[
iswap
];
}
if
(
ineed
%
2
==
0
)
{
nfirst
=
nlast
;
nlast
=
atom
->
nlocal
+
atom
->
nghost
;
}
nsend
=
0
;
// sendflag = 0 if I do not send on this swap
// sendneed test indicates receiver no longer requires data
// e.g. due to non-PBC or non-uniform sub-domains
if
(
ineed
/
2
>=
sendneed
[
dim
][
ineed
%
2
])
sendflag
=
0
;
else
sendflag
=
1
;
// find send atoms according to SINGLE vs MULTI
// all atoms eligible versus atoms in bordergroup
// only need to limit loop to bordergroup for first sends (ineed < 2)
// on these sends, break loop in two: owned (in group) and ghost
if
(
sendflag
)
{
if
(
!
bordergroup
||
ineed
>=
2
)
{
if
(
style
==
SINGLE
)
{
typename
ArrayTypes
<
DeviceType
>::
tdual_int_1d
total_send
(
"TS"
,
1
);
total_send
.
h_view
(
0
)
=
0
;
if
(
exec_space
==
Device
)
{
total_send
.
template
modify
<
DeviceType
>
();
total_send
.
template
sync
<
LMPDeviceType
>
();
}
BuildBorderListFunctor
<
DeviceType
>
f
(
atomKK
->
k_x
,
k_sendlist
,
total_send
,
nfirst
,
nlast
,
dim
,
lo
,
hi
,
iswap
,
maxsendlist
[
iswap
]);
Kokkos
::
TeamPolicy
<
DeviceType
>
config
((
nlast
-
nfirst
+
127
)
/
128
,
128
);
Kokkos
::
parallel_for
(
config
,
f
);
total_send
.
template
modify
<
DeviceType
>
();
total_send
.
template
sync
<
LMPHostType
>
();
if
(
total_send
.
h_view
(
0
)
>=
maxsendlist
[
iswap
])
{
grow_list
(
iswap
,
total_send
.
h_view
(
0
));
k_sendlist
.
modify
<
DeviceType
>
();
total_send
.
h_view
(
0
)
=
0
;
if
(
exec_space
==
Device
)
{
total_send
.
template
modify
<
LMPHostType
>
();
total_send
.
template
sync
<
LMPDeviceType
>
();
}
BuildBorderListFunctor
<
DeviceType
>
f
(
atomKK
->
k_x
,
k_sendlist
,
total_send
,
nfirst
,
nlast
,
dim
,
lo
,
hi
,
iswap
,
maxsendlist
[
iswap
]);
Kokkos
::
TeamPolicy
<
DeviceType
>
config
((
nlast
-
nfirst
+
127
)
/
128
,
128
);
Kokkos
::
parallel_for
(
config
,
f
);
total_send
.
template
modify
<
DeviceType
>
();
total_send
.
template
sync
<
LMPHostType
>
();
}
nsend
=
total_send
.
h_view
(
0
);
}
else
{
error
->
all
(
FLERR
,
"Required border comm not yet "
"implemented with Kokkos"
);
for
(
i
=
nfirst
;
i
<
nlast
;
i
++
)
{
itype
=
type
[
i
];
if
(
x
[
i
][
dim
]
>=
mlo
[
itype
]
&&
x
[
i
][
dim
]
<=
mhi
[
itype
])
{
if
(
nsend
==
maxsendlist
[
iswap
])
grow_list
(
iswap
,
nsend
);
sendlist
[
iswap
][
nsend
++
]
=
i
;
}
}
}
}
else
{
error
->
all
(
FLERR
,
"Required border comm not yet "
"implemented with Kokkos"
);
if
(
style
==
SINGLE
)
{
ngroup
=
atom
->
nfirst
;
for
(
i
=
0
;
i
<
ngroup
;
i
++
)
if
(
x
[
i
][
dim
]
>=
lo
&&
x
[
i
][
dim
]
<=
hi
)
{
if
(
nsend
==
maxsendlist
[
iswap
])
grow_list
(
iswap
,
nsend
);
sendlist
[
iswap
][
nsend
++
]
=
i
;
}
for
(
i
=
atom
->
nlocal
;
i
<
nlast
;
i
++
)
if
(
x
[
i
][
dim
]
>=
lo
&&
x
[
i
][
dim
]
<=
hi
)
{
if
(
nsend
==
maxsendlist
[
iswap
])
grow_list
(
iswap
,
nsend
);
sendlist
[
iswap
][
nsend
++
]
=
i
;
}
}
else
{
ngroup
=
atom
->
nfirst
;
for
(
i
=
0
;
i
<
ngroup
;
i
++
)
{
itype
=
type
[
i
];
if
(
x
[
i
][
dim
]
>=
mlo
[
itype
]
&&
x
[
i
][
dim
]
<=
mhi
[
itype
])
{
if
(
nsend
==
maxsendlist
[
iswap
])
grow_list
(
iswap
,
nsend
);
sendlist
[
iswap
][
nsend
++
]
=
i
;
}
}
for
(
i
=
atom
->
nlocal
;
i
<
nlast
;
i
++
)
{
itype
=
type
[
i
];
if
(
x
[
i
][
dim
]
>=
mlo
[
itype
]
&&
x
[
i
][
dim
]
<=
mhi
[
itype
])
{
if
(
nsend
==
maxsendlist
[
iswap
])
grow_list
(
iswap
,
nsend
);
sendlist
[
iswap
][
nsend
++
]
=
i
;
}
}
}
}
}
// pack up list of border atoms
if
(
nsend
*
size_border
>
maxsend
)
grow_send_kokkos
(
nsend
*
size_border
,
0
);
if
(
ghost_velocity
)
{
error
->
all
(
FLERR
,
"Required border comm not yet "
"implemented with Kokkos"
);
n
=
avec
->
pack_border_vel
(
nsend
,
sendlist
[
iswap
],
buf_send
,
pbc_flag
[
iswap
],
pbc
[
iswap
]);
}
else
n
=
avec
->
pack_border_kokkos
(
nsend
,
k_sendlist
,
k_buf_send
,
iswap
,
pbc_flag
[
iswap
],
pbc
[
iswap
],
exec_space
);
// swap atoms with other proc
// no MPI calls except SendRecv if nsend/nrecv = 0
// put incoming ghosts at end of my atom arrays
// if swapping with self, simply copy, no messages
if
(
sendproc
[
iswap
]
!=
me
)
{
MPI_Sendrecv
(
&
nsend
,
1
,
MPI_INT
,
sendproc
[
iswap
],
0
,
&
nrecv
,
1
,
MPI_INT
,
recvproc
[
iswap
],
0
,
world
,
MPI_STATUS_IGNORE
);
if
(
nrecv
*
size_border
>
maxrecv
)
grow_recv_kokkos
(
nrecv
*
size_border
);
if
(
nrecv
)
MPI_Irecv
(
k_buf_recv
.
view
<
DeviceType
>
().
ptr_on_device
(),
nrecv
*
size_border
,
MPI_DOUBLE
,
recvproc
[
iswap
],
0
,
world
,
&
request
);
if
(
n
)
MPI_Send
(
k_buf_send
.
view
<
DeviceType
>
().
ptr_on_device
(),
n
,
MPI_DOUBLE
,
sendproc
[
iswap
],
0
,
world
);
if
(
nrecv
)
MPI_Wait
(
&
request
,
MPI_STATUS_IGNORE
);
buf
=
buf_recv
;
}
else
{
nrecv
=
nsend
;
buf
=
buf_send
;
}
// unpack buffer
if
(
ghost_velocity
)
{
error
->
all
(
FLERR
,
"Required border comm not yet "
"implemented with Kokkos"
);
avec
->
unpack_border_vel
(
nrecv
,
atom
->
nlocal
+
atom
->
nghost
,
buf
);
}
else
if
(
sendproc
[
iswap
]
!=
me
)
avec
->
unpack_border_kokkos
(
nrecv
,
atom
->
nlocal
+
atom
->
nghost
,
k_buf_recv
,
exec_space
);
else
avec
->
unpack_border_kokkos
(
nrecv
,
atom
->
nlocal
+
atom
->
nghost
,
k_buf_send
,
exec_space
);
// set all pointers & counters
smax
=
MAX
(
smax
,
nsend
);
rmax
=
MAX
(
rmax
,
nrecv
);
sendnum
[
iswap
]
=
nsend
;
recvnum
[
iswap
]
=
nrecv
;
size_forward_recv
[
iswap
]
=
nrecv
*
size_forward
;
size_reverse_send
[
iswap
]
=
nrecv
*
size_reverse
;
size_reverse_recv
[
iswap
]
=
nsend
*
size_reverse
;
firstrecv
[
iswap
]
=
atom
->
nlocal
+
atom
->
nghost
;
atom
->
nghost
+=
nrecv
;
iswap
++
;
}
}
// insure send/recv buffers are long enough for all forward & reverse comm
int
max
=
MAX
(
maxforward
*
smax
,
maxreverse
*
rmax
);
if
(
max
>
maxsend
)
grow_send_kokkos
(
max
,
0
);
max
=
MAX
(
maxforward
*
rmax
,
maxreverse
*
smax
);
if
(
max
>
maxrecv
)
grow_recv_kokkos
(
max
);
// reset global->local map
if
(
exec_space
==
Host
)
k_sendlist
.
sync
<
LMPDeviceType
>
();
atomKK
->
modified
(
exec_space
,
ALL_MASK
);
atomKK
->
sync
(
Host
,
TAG_MASK
);
if
(
map_style
)
atom
->
map_set
();
}
/* ----------------------------------------------------------------------
realloc the size of the send buffer as needed with BUFFACTOR and bufextra
if flag = 1, realloc
if flag = 0, don't need to realloc with copy, just free/malloc
------------------------------------------------------------------------- */
void
CommKokkos
::
grow_send
(
int
n
,
int
flag
)
{
grow_send_kokkos
(
n
,
flag
,
Host
);
}
/* ----------------------------------------------------------------------
free/malloc the size of the recv buffer as needed with BUFFACTOR
------------------------------------------------------------------------- */
void
CommKokkos
::
grow_recv
(
int
n
)
{
grow_recv_kokkos
(
n
,
Host
);
}
/* ----------------------------------------------------------------------
realloc the size of the send buffer as needed with BUFFACTOR & BUFEXTRA
if flag = 1, realloc
if flag = 0, don't need to realloc with copy, just free/malloc
------------------------------------------------------------------------- */
void
CommKokkos
::
grow_send_kokkos
(
int
n
,
int
flag
,
ExecutionSpace
space
)
{
maxsend
=
static_cast
<
int
>
(
BUFFACTOR
*
n
);
int
maxsend_border
=
(
maxsend
+
BUFEXTRA
+
5
)
/
atom
->
avec
->
size_border
+
2
;
if
(
flag
)
{
if
(
space
==
Device
)
k_buf_send
.
modify
<
LMPDeviceType
>
();
else
k_buf_send
.
modify
<
LMPHostType
>
();
k_buf_send
.
resize
(
maxsend_border
,
atom
->
avec
->
size_border
);
buf_send
=
k_buf_send
.
view
<
LMPHostType
>
().
ptr_on_device
();
}
else
{
k_buf_send
=
ArrayTypes
<
LMPDeviceType
>::
tdual_xfloat_2d
(
"comm:k_buf_send"
,
maxsend_border
,
atom
->
avec
->
size_border
);
buf_send
=
k_buf_send
.
view
<
LMPHostType
>
().
ptr_on_device
();
}
}
/* ----------------------------------------------------------------------
free/malloc the size of the recv buffer as needed with BUFFACTOR
------------------------------------------------------------------------- */
void
CommKokkos
::
grow_recv_kokkos
(
int
n
,
ExecutionSpace
space
)
{
maxrecv
=
static_cast
<
int
>
(
BUFFACTOR
*
n
);
int
maxrecv_border
=
(
maxrecv
+
BUFEXTRA
+
5
)
/
atom
->
avec
->
size_border
+
2
;
k_buf_recv
=
ArrayTypes
<
LMPDeviceType
>::
tdual_xfloat_2d
(
"comm:k_buf_recv"
,
maxrecv_border
,
atom
->
avec
->
size_border
);
buf_recv
=
k_buf_recv
.
view
<
LMPHostType
>
().
ptr_on_device
();
}
/* ----------------------------------------------------------------------
realloc the size of the iswap sendlist as needed with BUFFACTOR
------------------------------------------------------------------------- */
void
CommKokkos
::
grow_list
(
int
iswap
,
int
n
)
{
int
size
=
static_cast
<
int
>
(
BUFFACTOR
*
n
);
memory
->
grow_kokkos
(
k_sendlist
,
sendlist
,
maxswap
,
size
,
"comm:sendlist"
);
for
(
int
i
=
0
;
i
<
maxswap
;
i
++
)
{
maxsendlist
[
i
]
=
size
;
sendlist
[
i
]
=&
k_sendlist
.
view
<
LMPHostType
>
()(
i
,
0
);
}
}
/* ----------------------------------------------------------------------
realloc the buffers needed for swaps
------------------------------------------------------------------------- */
void
CommKokkos
::
grow_swap
(
int
n
)
{
free_swap
();
allocate_swap
(
n
);
if
(
style
==
MULTI
)
{
free_multi
();
allocate_multi
(
n
);
}
maxswap
=
n
;
int
size
=
MAX
(
k_sendlist
.
d_view
.
dimension_1
(),
BUFMIN
);
memory
->
grow_kokkos
(
k_sendlist
,
sendlist
,
maxswap
,
size
,
"comm:sendlist"
);
memory
->
grow
(
maxsendlist
,
n
,
"comm:maxsendlist"
);
for
(
int
i
=
0
;
i
<
maxswap
;
i
++
)
maxsendlist
[
i
]
=
size
;
}
Event Timeline
Log In to Comment