Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F92256389
pair_coul_wolf_kokkos.cpp
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Mon, Nov 18, 20:12
Size
13 KB
Mime Type
text/x-c
Expires
Wed, Nov 20, 20:12 (1 d, 21 h)
Engine
blob
Format
Raw Data
Handle
22403399
Attached To
rLAMMPS lammps
pair_coul_wolf_kokkos.cpp
View Options
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Stan Moore (SNL)
------------------------------------------------------------------------- */
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "pair_coul_wolf_kokkos.h"
#include "kokkos.h"
#include "atom_kokkos.h"
#include "comm.h"
#include "force.h"
#include "neighbor.h"
#include "neigh_list_kokkos.h"
#include "neigh_request.h"
#include "update.h"
#include "integrate.h"
#include "respa.h"
#include "math_const.h"
#include "memory.h"
#include "error.h"
#include "atom_masks.h"
using
namespace
LAMMPS_NS
;
using
namespace
MathConst
;
#define KOKKOS_CUDA_MAX_THREADS 256
#define KOKKOS_CUDA_MIN_BLOCKS 8
/* ---------------------------------------------------------------------- */
template
<
class
DeviceType
>
PairCoulWolfKokkos
<
DeviceType
>::
PairCoulWolfKokkos
(
LAMMPS
*
lmp
)
:
PairCoulWolf
(
lmp
)
{
respa_enable
=
0
;
atomKK
=
(
AtomKokkos
*
)
atom
;
execution_space
=
ExecutionSpaceFromDevice
<
DeviceType
>::
space
;
datamask_read
=
X_MASK
|
F_MASK
|
TYPE_MASK
|
Q_MASK
|
ENERGY_MASK
|
VIRIAL_MASK
;
datamask_modify
=
F_MASK
|
ENERGY_MASK
|
VIRIAL_MASK
;
}
/* ---------------------------------------------------------------------- */
template
<
class
DeviceType
>
PairCoulWolfKokkos
<
DeviceType
>::~
PairCoulWolfKokkos
()
{
if
(
!
copymode
)
{
memory
->
destroy_kokkos
(
k_eatom
,
eatom
);
memory
->
destroy_kokkos
(
k_vatom
,
vatom
);
}
}
/* ---------------------------------------------------------------------- */
template
<
class
DeviceType
>
void
PairCoulWolfKokkos
<
DeviceType
>::
compute
(
int
eflag_in
,
int
vflag_in
)
{
eflag
=
eflag_in
;
vflag
=
vflag_in
;
if
(
neighflag
==
FULL
)
no_virial_fdotr_compute
=
1
;
if
(
eflag
||
vflag
)
ev_setup
(
eflag
,
vflag
);
else
evflag
=
vflag_fdotr
=
0
;
// reallocate per-atom arrays if necessary
if
(
eflag_atom
)
{
memory
->
destroy_kokkos
(
k_eatom
,
eatom
);
memory
->
create_kokkos
(
k_eatom
,
eatom
,
maxeatom
,
"pair:eatom"
);
d_eatom
=
k_eatom
.
d_view
;
}
if
(
vflag_atom
)
{
memory
->
destroy_kokkos
(
k_vatom
,
vatom
);
memory
->
create_kokkos
(
k_vatom
,
vatom
,
maxvatom
,
6
,
"pair:vatom"
);
d_vatom
=
k_vatom
.
d_view
;
}
atomKK
->
sync
(
execution_space
,
datamask_read
);
if
(
eflag
||
vflag
)
atomKK
->
modified
(
execution_space
,
datamask_modify
);
else
atomKK
->
modified
(
execution_space
,
F_MASK
);
// shifted coulombic energy
e_shift
=
erfc
(
alf
*
cut_coul
)
/
cut_coul
;
f_shift
=
-
(
e_shift
+
2.0
*
alf
/
MY_PIS
*
exp
(
-
alf
*
alf
*
cut_coul
*
cut_coul
))
/
cut_coul
;
x
=
atomKK
->
k_x
.
view
<
DeviceType
>
();
f
=
atomKK
->
k_f
.
view
<
DeviceType
>
();
q
=
atomKK
->
k_q
.
view
<
DeviceType
>
();
nlocal
=
atom
->
nlocal
;
nall
=
atom
->
nlocal
+
atom
->
nghost
;
newton_pair
=
force
->
newton_pair
;
NeighListKokkos
<
DeviceType
>*
k_list
=
static_cast
<
NeighListKokkos
<
DeviceType
>*>
(
list
);
d_numneigh
=
k_list
->
d_numneigh
;
d_neighbors
=
k_list
->
d_neighbors
;
d_ilist
=
k_list
->
d_ilist
;
special_coul
[
0
]
=
force
->
special_coul
[
0
];
special_coul
[
1
]
=
force
->
special_coul
[
1
];
special_coul
[
2
]
=
force
->
special_coul
[
2
];
special_coul
[
3
]
=
force
->
special_coul
[
3
];
qqrd2e
=
force
->
qqrd2e
;
int
inum
=
list
->
inum
;
// Call cleanup_copy which sets allocations NULL which are destructed by the PairStyle
k_list
->
clean_copy
();
copymode
=
1
;
// loop over neighbors of my atoms
EV_FLOAT
ev
;
// compute kernel A
if
(
evflag
)
{
if
(
neighflag
==
HALF
)
{
if
(
newton_pair
)
{
Kokkos
::
parallel_reduce
(
Kokkos
::
RangePolicy
<
DeviceType
,
TagPairCoulWolfKernelA
<
HALF
,
1
,
1
>
>
(
0
,
inum
),
*
this
,
ev
);
}
else
{
Kokkos
::
parallel_reduce
(
Kokkos
::
RangePolicy
<
DeviceType
,
TagPairCoulWolfKernelA
<
HALF
,
0
,
1
>
>
(
0
,
inum
),
*
this
,
ev
);
}
}
else
if
(
neighflag
==
HALFTHREAD
)
{
if
(
newton_pair
)
{
Kokkos
::
parallel_reduce
(
Kokkos
::
RangePolicy
<
DeviceType
,
TagPairCoulWolfKernelA
<
HALFTHREAD
,
1
,
1
>
>
(
0
,
inum
),
*
this
,
ev
);
}
else
{
Kokkos
::
parallel_reduce
(
Kokkos
::
RangePolicy
<
DeviceType
,
TagPairCoulWolfKernelA
<
HALFTHREAD
,
0
,
1
>
>
(
0
,
inum
),
*
this
,
ev
);
}
}
else
if
(
neighflag
==
FULL
)
{
if
(
newton_pair
)
{
Kokkos
::
parallel_reduce
(
Kokkos
::
RangePolicy
<
DeviceType
,
TagPairCoulWolfKernelA
<
FULL
,
1
,
1
>
>
(
0
,
inum
),
*
this
,
ev
);
}
else
{
Kokkos
::
parallel_reduce
(
Kokkos
::
RangePolicy
<
DeviceType
,
TagPairCoulWolfKernelA
<
FULL
,
0
,
1
>
>
(
0
,
inum
),
*
this
,
ev
);
}
}
}
else
{
if
(
neighflag
==
HALF
)
{
if
(
newton_pair
)
{
Kokkos
::
parallel_for
(
Kokkos
::
RangePolicy
<
DeviceType
,
TagPairCoulWolfKernelA
<
HALF
,
1
,
0
>
>
(
0
,
inum
),
*
this
);
}
else
{
Kokkos
::
parallel_for
(
Kokkos
::
RangePolicy
<
DeviceType
,
TagPairCoulWolfKernelA
<
HALF
,
0
,
0
>
>
(
0
,
inum
),
*
this
);
}
}
else
if
(
neighflag
==
HALFTHREAD
)
{
if
(
newton_pair
)
{
Kokkos
::
parallel_for
(
Kokkos
::
RangePolicy
<
DeviceType
,
TagPairCoulWolfKernelA
<
HALFTHREAD
,
1
,
0
>
>
(
0
,
inum
),
*
this
);
}
else
{
Kokkos
::
parallel_for
(
Kokkos
::
RangePolicy
<
DeviceType
,
TagPairCoulWolfKernelA
<
HALFTHREAD
,
0
,
0
>
>
(
0
,
inum
),
*
this
);
}
}
else
if
(
neighflag
==
FULL
)
{
if
(
newton_pair
)
{
Kokkos
::
parallel_for
(
Kokkos
::
RangePolicy
<
DeviceType
,
TagPairCoulWolfKernelA
<
FULL
,
1
,
0
>
>
(
0
,
inum
),
*
this
);
}
else
{
Kokkos
::
parallel_for
(
Kokkos
::
RangePolicy
<
DeviceType
,
TagPairCoulWolfKernelA
<
FULL
,
0
,
0
>
>
(
0
,
inum
),
*
this
);
}
}
}
if
(
eflag_global
)
eng_coul
+=
ev
.
ecoul
;
if
(
vflag_global
)
{
virial
[
0
]
+=
ev
.
v
[
0
];
virial
[
1
]
+=
ev
.
v
[
1
];
virial
[
2
]
+=
ev
.
v
[
2
];
virial
[
3
]
+=
ev
.
v
[
3
];
virial
[
4
]
+=
ev
.
v
[
4
];
virial
[
5
]
+=
ev
.
v
[
5
];
}
if
(
vflag_fdotr
)
pair_virial_fdotr_compute
(
this
);
if
(
eflag_atom
)
{
k_eatom
.
template
modify
<
DeviceType
>
();
k_eatom
.
template
sync
<
LMPHostType
>
();
}
if
(
vflag_atom
)
{
k_vatom
.
template
modify
<
DeviceType
>
();
k_vatom
.
template
sync
<
LMPHostType
>
();
}
copymode
=
0
;
}
/* ----------------------------------------------------------------------
init specific to this pair style
------------------------------------------------------------------------- */
template
<
class
DeviceType
>
void
PairCoulWolfKokkos
<
DeviceType
>::
init_style
()
{
PairCoulWolf
::
init_style
();
// irequest = neigh request made by parent class
neighflag
=
lmp
->
kokkos
->
neighflag
;
int
irequest
=
neighbor
->
nrequest
-
1
;
neighbor
->
requests
[
irequest
]
->
kokkos_host
=
Kokkos
::
Impl
::
is_same
<
DeviceType
,
LMPHostType
>::
value
&&
!
Kokkos
::
Impl
::
is_same
<
DeviceType
,
LMPDeviceType
>::
value
;
neighbor
->
requests
[
irequest
]
->
kokkos_device
=
Kokkos
::
Impl
::
is_same
<
DeviceType
,
LMPDeviceType
>::
value
;
if
(
neighflag
==
FULL
)
{
neighbor
->
requests
[
irequest
]
->
full
=
1
;
neighbor
->
requests
[
irequest
]
->
half
=
0
;
neighbor
->
requests
[
irequest
]
->
full_cluster
=
0
;
}
else
if
(
neighflag
==
HALF
||
neighflag
==
HALFTHREAD
)
{
neighbor
->
requests
[
irequest
]
->
full
=
0
;
neighbor
->
requests
[
irequest
]
->
half
=
1
;
neighbor
->
requests
[
irequest
]
->
full_cluster
=
0
;
}
else
{
error
->
all
(
FLERR
,
"Cannot use chosen neighbor list style with coul/wolf/kk"
);
}
}
/* ---------------------------------------------------------------------- */
////Specialisation for Neighborlist types Half, HalfThread, Full
template
<
class
DeviceType
>
template
<
int
NEIGHFLAG
,
int
NEWTON_PAIR
,
int
EVFLAG
>
KOKKOS_INLINE_FUNCTION
void
PairCoulWolfKokkos
<
DeviceType
>::
operator
()(
TagPairCoulWolfKernelA
<
NEIGHFLAG
,
NEWTON_PAIR
,
EVFLAG
>
,
const
int
&
ii
,
EV_FLOAT
&
ev
)
const
{
// The f array is atomic for Half/Thread neighbor style
Kokkos
::
View
<
F_FLOAT
*
[
3
],
typename
DAT
::
t_f_array
::
array_layout
,
DeviceType
,
Kokkos
::
MemoryTraits
<
AtomicF
<
NEIGHFLAG
>::
value
>
>
a_f
=
f
;
Kokkos
::
View
<
E_FLOAT
*
,
typename
DAT
::
t_efloat_1d
::
array_layout
,
DeviceType
,
Kokkos
::
MemoryTraits
<
AtomicF
<
NEIGHFLAG
>::
value
>
>
v_eatom
=
k_eatom
.
view
<
DeviceType
>
();
const
int
i
=
d_ilist
[
ii
];
const
X_FLOAT
xtmp
=
x
(
i
,
0
);
const
X_FLOAT
ytmp
=
x
(
i
,
1
);
const
X_FLOAT
ztmp
=
x
(
i
,
2
);
const
F_FLOAT
qtmp
=
q
[
i
];
if
(
eflag
)
{
const
F_FLOAT
qisq
=
qtmp
*
qtmp
;
const
F_FLOAT
e_self
=
-
(
e_shift
/
2.0
+
alf
/
MY_PIS
)
*
qisq
*
qqrd2e
;
if
(
eflag_global
)
ev
.
ecoul
+=
e_self
;
if
(
eflag_atom
)
v_eatom
[
i
]
+=
e_self
;
}
//const AtomNeighborsConst d_neighbors_i = k_list.get_neighbors_const(i);
const
int
jnum
=
d_numneigh
[
i
];
F_FLOAT
fxtmp
=
0.0
;
F_FLOAT
fytmp
=
0.0
;
F_FLOAT
fztmp
=
0.0
;
for
(
int
jj
=
0
;
jj
<
jnum
;
jj
++
)
{
//int j = d_neighbors_i(jj);
int
j
=
d_neighbors
(
i
,
jj
);
const
F_FLOAT
factor_coul
=
special_coul
[
sbmask
(
j
)];
j
&=
NEIGHMASK
;
const
X_FLOAT
delx
=
xtmp
-
x
(
j
,
0
);
const
X_FLOAT
dely
=
ytmp
-
x
(
j
,
1
);
const
X_FLOAT
delz
=
ztmp
-
x
(
j
,
2
);
const
F_FLOAT
rsq
=
delx
*
delx
+
dely
*
dely
+
delz
*
delz
;
if
(
rsq
<
cut_coulsq
)
{
const
F_FLOAT
r
=
sqrt
(
rsq
);
const
F_FLOAT
prefactor
=
qqrd2e
*
qtmp
*
q
[
j
]
/
r
;
const
F_FLOAT
erfcc
=
erfc
(
alf
*
r
);
const
F_FLOAT
erfcd
=
exp
(
-
alf
*
alf
*
r
*
r
);
const
F_FLOAT
v_sh
=
(
erfcc
-
e_shift
*
r
)
*
prefactor
;
const
F_FLOAT
dvdrr
=
(
erfcc
/
rsq
+
2.0
*
alf
/
MY_PIS
*
erfcd
/
r
)
+
f_shift
;
F_FLOAT
forcecoul
=
dvdrr
*
rsq
*
prefactor
;
if
(
factor_coul
<
1.0
)
forcecoul
-=
(
1.0
-
factor_coul
)
*
prefactor
;
const
F_FLOAT
fpair
=
forcecoul
/
rsq
;
fxtmp
+=
delx
*
fpair
;
fytmp
+=
dely
*
fpair
;
fztmp
+=
delz
*
fpair
;
if
((
NEIGHFLAG
==
HALF
||
NEIGHFLAG
==
HALFTHREAD
)
&&
(
NEWTON_PAIR
||
j
<
nlocal
))
{
a_f
(
j
,
0
)
-=
delx
*
fpair
;
a_f
(
j
,
1
)
-=
dely
*
fpair
;
a_f
(
j
,
2
)
-=
delz
*
fpair
;
}
if
(
EVFLAG
)
{
F_FLOAT
ecoul
=
v_sh
;
if
(
eflag
)
{
if
(
factor_coul
<
1.0
)
ecoul
-=
(
1.0
-
factor_coul
)
*
prefactor
;
ev
.
ecoul
+=
(((
NEIGHFLAG
==
HALF
||
NEIGHFLAG
==
HALFTHREAD
)
&&
(
NEWTON_PAIR
||
(
j
<
nlocal
)))
?
1.0
:
0.5
)
*
ecoul
;
}
if
(
vflag_either
||
eflag_atom
)
this
->
template
ev_tally
<
NEIGHFLAG
,
NEWTON_PAIR
>
(
ev
,
i
,
j
,
ecoul
,
fpair
,
delx
,
dely
,
delz
);
}
}
}
a_f
(
i
,
0
)
+=
fxtmp
;
a_f
(
i
,
1
)
+=
fytmp
;
a_f
(
i
,
2
)
+=
fztmp
;
}
template
<
class
DeviceType
>
template
<
int
NEIGHFLAG
,
int
NEWTON_PAIR
,
int
EVFLAG
>
KOKKOS_INLINE_FUNCTION
void
PairCoulWolfKokkos
<
DeviceType
>::
operator
()(
TagPairCoulWolfKernelA
<
NEIGHFLAG
,
NEWTON_PAIR
,
EVFLAG
>
,
const
int
&
ii
)
const
{
EV_FLOAT
ev
;
this
->
template
operator
()
<
NEIGHFLAG
,
NEWTON_PAIR
,
EVFLAG
>
(
TagPairCoulWolfKernelA
<
NEIGHFLAG
,
NEWTON_PAIR
,
EVFLAG
>
(),
ii
,
ev
);
}
/* ---------------------------------------------------------------------- */
template
<
class
DeviceType
>
template
<
int
NEIGHFLAG
,
int
NEWTON_PAIR
>
KOKKOS_INLINE_FUNCTION
void
PairCoulWolfKokkos
<
DeviceType
>::
ev_tally
(
EV_FLOAT
&
ev
,
const
int
&
i
,
const
int
&
j
,
const
F_FLOAT
&
epair
,
const
F_FLOAT
&
fpair
,
const
F_FLOAT
&
delx
,
const
F_FLOAT
&
dely
,
const
F_FLOAT
&
delz
)
const
{
const
int
EFLAG
=
eflag
;
const
int
VFLAG
=
vflag_either
;
// The eatom and vatom arrays are atomic for Half/Thread neighbor style
Kokkos
::
View
<
E_FLOAT
*
,
typename
DAT
::
t_efloat_1d
::
array_layout
,
DeviceType
,
Kokkos
::
MemoryTraits
<
AtomicF
<
NEIGHFLAG
>::
value
>
>
v_eatom
=
k_eatom
.
view
<
DeviceType
>
();
Kokkos
::
View
<
F_FLOAT
*
[
6
],
typename
DAT
::
t_virial_array
::
array_layout
,
DeviceType
,
Kokkos
::
MemoryTraits
<
AtomicF
<
NEIGHFLAG
>::
value
>
>
v_vatom
=
k_vatom
.
view
<
DeviceType
>
();
if
(
EFLAG
)
{
if
(
eflag_atom
)
{
const
E_FLOAT
epairhalf
=
0.5
*
epair
;
if
(
NEIGHFLAG
!=
FULL
)
{
if
(
NEWTON_PAIR
||
i
<
nlocal
)
v_eatom
[
i
]
+=
epairhalf
;
if
(
NEWTON_PAIR
||
j
<
nlocal
)
v_eatom
[
j
]
+=
epairhalf
;
}
else
{
v_eatom
[
i
]
+=
epairhalf
;
}
}
}
if
(
VFLAG
)
{
const
E_FLOAT
v0
=
delx
*
delx
*
fpair
;
const
E_FLOAT
v1
=
dely
*
dely
*
fpair
;
const
E_FLOAT
v2
=
delz
*
delz
*
fpair
;
const
E_FLOAT
v3
=
delx
*
dely
*
fpair
;
const
E_FLOAT
v4
=
delx
*
delz
*
fpair
;
const
E_FLOAT
v5
=
dely
*
delz
*
fpair
;
if
(
vflag_global
)
{
if
(
NEIGHFLAG
!=
FULL
)
{
if
(
NEWTON_PAIR
||
i
<
nlocal
)
{
ev
.
v
[
0
]
+=
0.5
*
v0
;
ev
.
v
[
1
]
+=
0.5
*
v1
;
ev
.
v
[
2
]
+=
0.5
*
v2
;
ev
.
v
[
3
]
+=
0.5
*
v3
;
ev
.
v
[
4
]
+=
0.5
*
v4
;
ev
.
v
[
5
]
+=
0.5
*
v5
;
}
if
(
NEWTON_PAIR
||
j
<
nlocal
)
{
ev
.
v
[
0
]
+=
0.5
*
v0
;
ev
.
v
[
1
]
+=
0.5
*
v1
;
ev
.
v
[
2
]
+=
0.5
*
v2
;
ev
.
v
[
3
]
+=
0.5
*
v3
;
ev
.
v
[
4
]
+=
0.5
*
v4
;
ev
.
v
[
5
]
+=
0.5
*
v5
;
}
}
else
{
ev
.
v
[
0
]
+=
0.5
*
v0
;
ev
.
v
[
1
]
+=
0.5
*
v1
;
ev
.
v
[
2
]
+=
0.5
*
v2
;
ev
.
v
[
3
]
+=
0.5
*
v3
;
ev
.
v
[
4
]
+=
0.5
*
v4
;
ev
.
v
[
5
]
+=
0.5
*
v5
;
}
}
if
(
vflag_atom
)
{
if
(
NEIGHFLAG
!=
FULL
)
{
if
(
NEWTON_PAIR
||
i
<
nlocal
)
{
v_vatom
(
i
,
0
)
+=
0.5
*
v0
;
v_vatom
(
i
,
1
)
+=
0.5
*
v1
;
v_vatom
(
i
,
2
)
+=
0.5
*
v2
;
v_vatom
(
i
,
3
)
+=
0.5
*
v3
;
v_vatom
(
i
,
4
)
+=
0.5
*
v4
;
v_vatom
(
i
,
5
)
+=
0.5
*
v5
;
}
if
(
NEWTON_PAIR
||
j
<
nlocal
)
{
v_vatom
(
j
,
0
)
+=
0.5
*
v0
;
v_vatom
(
j
,
1
)
+=
0.5
*
v1
;
v_vatom
(
j
,
2
)
+=
0.5
*
v2
;
v_vatom
(
j
,
3
)
+=
0.5
*
v3
;
v_vatom
(
j
,
4
)
+=
0.5
*
v4
;
v_vatom
(
j
,
5
)
+=
0.5
*
v5
;
}
}
else
{
v_vatom
(
i
,
0
)
+=
0.5
*
v0
;
v_vatom
(
i
,
1
)
+=
0.5
*
v1
;
v_vatom
(
i
,
2
)
+=
0.5
*
v2
;
v_vatom
(
i
,
3
)
+=
0.5
*
v3
;
v_vatom
(
i
,
4
)
+=
0.5
*
v4
;
v_vatom
(
i
,
5
)
+=
0.5
*
v5
;
}
}
}
}
/* ---------------------------------------------------------------------- */
template
<
class
DeviceType
>
KOKKOS_INLINE_FUNCTION
int
PairCoulWolfKokkos
<
DeviceType
>::
sbmask
(
const
int
&
j
)
const
{
return
j
>>
SBBITS
&
3
;
}
namespace
LAMMPS_NS
{
template
class
PairCoulWolfKokkos
<
LMPDeviceType
>
;
#ifdef KOKKOS_HAVE_CUDA
template
class
PairCoulWolfKokkos
<
LMPHostType
>
;
#endif
}
Event Timeline
Log In to Comment