Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F107049181
pair_omp_gpu.cpp
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Thu, Apr 3, 23:45
Size
21 KB
Mime Type
text/x-c
Expires
Sat, Apr 5, 23:45 (1 d, 23 h)
Engine
blob
Format
Raw Data
Handle
25335617
Attached To
rLAMMPS lammps
pair_omp_gpu.cpp
View Options
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Axel Kohlmeyer (Temple U)
Modified by Mike for use with GPU library
------------------------------------------------------------------------- */
#if defined(_OPENMP)
#include "atom.h"
#include "comm.h"
#include "force.h"
#include "pair_omp_gpu.h"
#include "memory.h"
using
namespace
LAMMPS_NS
;
/* ---------------------------------------------------------------------- */
PairOMPGPU
::
PairOMPGPU
(
LAMMPS
*
lmp
)
:
Pointers
(
lmp
)
{
eng_vdwl_thr
=
NULL
;
eng_coul_thr
=
NULL
;
virial_thr
=
NULL
;
eatom_thr
=
NULL
;
vatom_thr
=
NULL
;
f_thr
=
NULL
;
}
/* ---------------------------------------------------------------------- */
PairOMPGPU
::~
PairOMPGPU
()
{
free_mem
();
}
/* ----------------------------------------------------------------------
free any allocated memory
------------------------------------------------------------------------- */
void
PairOMPGPU
::
free_mem
()
{
memory
->
sfree
(
eng_vdwl_thr
);
memory
->
sfree
(
eng_coul_thr
);
memory
->
destroy
(
virial_thr
);
memory
->
destroy
(
eatom_thr
);
memory
->
destroy
(
vatom_thr
);
memory
->
destroy
(
f_thr
);
eng_vdwl_thr
=
NULL
;
eng_coul_thr
=
NULL
;
virial_thr
=
NULL
;
eatom_thr
=
NULL
;
vatom_thr
=
NULL
;
f_thr
=
NULL
;
_nmax
=
0
;
}
/* ----------------------------------------------------------------------
init specific to this pair style
------------------------------------------------------------------------- */
void
PairOMPGPU
::
init_style
()
{
free_mem
();
#pragma omp parallel
{
int
th_id
=
omp_get_thread_num
();
#pragma omp barrier
if
(
th_id
==
0
)
_nthreads
=
omp_get_num_threads
();
}
// for hybrid OpenMP/MPI we need multiple copies
// of some accumulators to avoid race conditions
eng_vdwl_thr
=
(
double
*
)
memory
->
smalloc
(
_nthreads
*
sizeof
(
double
),
"pair:eng_vdwl_thr"
);
eng_coul_thr
=
(
double
*
)
memory
->
smalloc
(
_nthreads
*
sizeof
(
double
),
"pair:eng_coul_thr"
);
memory
->
create
(
virial_thr
,
_nthreads
,
6
,
"pair:virial_thr"
);
maxeatom_thr
=
maxvatom_thr
=
0
;
}
/* ----------------------------------------------------------------------
setup for energy, virial computation. additional code for multi-threading
see integrate::ev_set() for values of eflag (0-3) and vflag (0-6)
------------------------------------------------------------------------- */
void
PairOMPGPU
::
ev_setup_thr
(
int
eflag
,
int
vflag
,
int
_eflag_either
,
int
_eflag_global
,
int
_eflag_atom
,
int
_vflag_either
,
int
_vflag_global
,
int
_vflag_atom
)
{
eflag_either
=
_eflag_either
;
eflag_global
=
_eflag_global
;
eflag_atom
=
_eflag_atom
;
vflag_either
=
_vflag_either
;
vflag_global
=
_vflag_global
;
vflag_atom
=
_vflag_atom
;
int
i
,
n
,
t
;
// reallocate per-atom arrays if necessary
if
(
eflag_atom
&&
atom
->
nmax
>
maxeatom_thr
)
{
maxeatom_thr
=
atom
->
nmax
;
memory
->
destroy
(
eatom_thr
);
memory
->
create
(
eatom_thr
,
_nthreads
,
maxeatom_thr
,
"pair:eatom_thr"
);
}
if
(
vflag_atom
&&
atom
->
nmax
>
maxvatom_thr
)
{
maxvatom_thr
=
atom
->
nmax
;
memory
->
destroy
(
vatom_thr
);
memory
->
create
(
vatom_thr
,
_nthreads
,
maxvatom_thr
,
6
,
"pair:vatom_thr"
);
}
// zero per thread accumulators
// use force->newton instead of newton_pair
// b/c some bonds/dihedrals call pair::ev_tally with pairwise info
const
int
ntotal
=
(
force
->
newton
)
?
(
atom
->
nlocal
+
atom
->
nghost
)
:
atom
->
nlocal
;
for
(
t
=
0
;
t
<
_nthreads
;
++
t
)
{
if
(
eflag_global
)
eng_vdwl_thr
[
t
]
=
eng_coul_thr
[
t
]
=
0.0
;
if
(
vflag_global
)
for
(
i
=
0
;
i
<
6
;
++
i
)
virial_thr
[
t
][
i
]
=
0.0
;
if
(
eflag_atom
)
{
for
(
i
=
0
;
i
<
ntotal
;
++
i
)
eatom_thr
[
t
][
i
]
=
0.0
;
}
if
(
vflag_atom
)
{
for
(
i
=
0
;
i
<
ntotal
;
++
i
)
{
vatom_thr
[
t
][
i
][
0
]
=
0.0
;
vatom_thr
[
t
][
i
][
1
]
=
0.0
;
vatom_thr
[
t
][
i
][
2
]
=
0.0
;
vatom_thr
[
t
][
i
][
3
]
=
0.0
;
vatom_thr
[
t
][
i
][
4
]
=
0.0
;
vatom_thr
[
t
][
i
][
5
]
=
0.0
;
}
}
}
}
/* ----------------------------------------------------------------------
tally eng_vdwl and virial into per thread global and per-atom accumulators
need i < nlocal test since called by bond_quartic and dihedral_charmm
------------------------------------------------------------------------- */
void
PairOMPGPU
::
ev_tally_thr
(
int
i
,
int
j
,
int
nlocal
,
int
newton_pair
,
double
evdwl
,
double
ecoul
,
double
fpair
,
double
delx
,
double
dely
,
double
delz
,
int
tid
)
{
double
evdwlhalf
,
ecoulhalf
,
epairhalf
,
v
[
6
];
if
(
eflag_either
)
{
if
(
eflag_global
)
{
if
(
newton_pair
)
{
eng_vdwl_thr
[
tid
]
+=
evdwl
;
eng_coul_thr
[
tid
]
+=
ecoul
;
}
else
{
evdwlhalf
=
0.5
*
evdwl
;
ecoulhalf
=
0.5
*
ecoul
;
if
(
i
<
nlocal
)
{
eng_vdwl_thr
[
tid
]
+=
evdwlhalf
;
eng_coul_thr
[
tid
]
+=
ecoulhalf
;
}
if
(
j
<
nlocal
)
{
eng_vdwl_thr
[
tid
]
+=
evdwlhalf
;
eng_coul_thr
[
tid
]
+=
ecoulhalf
;
}
}
}
if
(
eflag_atom
)
{
epairhalf
=
0.5
*
(
evdwl
+
ecoul
);
if
(
newton_pair
||
i
<
nlocal
)
eatom_thr
[
tid
][
i
]
+=
epairhalf
;
if
(
newton_pair
||
j
<
nlocal
)
eatom_thr
[
tid
][
j
]
+=
epairhalf
;
}
}
if
(
vflag_either
)
{
v
[
0
]
=
delx
*
delx
*
fpair
;
v
[
1
]
=
dely
*
dely
*
fpair
;
v
[
2
]
=
delz
*
delz
*
fpair
;
v
[
3
]
=
delx
*
dely
*
fpair
;
v
[
4
]
=
delx
*
delz
*
fpair
;
v
[
5
]
=
dely
*
delz
*
fpair
;
if
(
vflag_global
)
{
if
(
newton_pair
)
{
virial_thr
[
tid
][
0
]
+=
v
[
0
];
virial_thr
[
tid
][
1
]
+=
v
[
1
];
virial_thr
[
tid
][
2
]
+=
v
[
2
];
virial_thr
[
tid
][
3
]
+=
v
[
3
];
virial_thr
[
tid
][
4
]
+=
v
[
4
];
virial_thr
[
tid
][
5
]
+=
v
[
5
];
}
else
{
if
(
i
<
nlocal
)
{
virial_thr
[
tid
][
0
]
+=
0.5
*
v
[
0
];
virial_thr
[
tid
][
1
]
+=
0.5
*
v
[
1
];
virial_thr
[
tid
][
2
]
+=
0.5
*
v
[
2
];
virial_thr
[
tid
][
3
]
+=
0.5
*
v
[
3
];
virial_thr
[
tid
][
4
]
+=
0.5
*
v
[
4
];
virial_thr
[
tid
][
5
]
+=
0.5
*
v
[
5
];
}
if
(
j
<
nlocal
)
{
virial_thr
[
tid
][
0
]
+=
0.5
*
v
[
0
];
virial_thr
[
tid
][
1
]
+=
0.5
*
v
[
1
];
virial_thr
[
tid
][
2
]
+=
0.5
*
v
[
2
];
virial_thr
[
tid
][
3
]
+=
0.5
*
v
[
3
];
virial_thr
[
tid
][
4
]
+=
0.5
*
v
[
4
];
virial_thr
[
tid
][
5
]
+=
0.5
*
v
[
5
];
}
}
}
if
(
vflag_atom
)
{
if
(
newton_pair
||
i
<
nlocal
)
{
vatom_thr
[
tid
][
i
][
0
]
+=
0.5
*
v
[
0
];
vatom_thr
[
tid
][
i
][
1
]
+=
0.5
*
v
[
1
];
vatom_thr
[
tid
][
i
][
2
]
+=
0.5
*
v
[
2
];
vatom_thr
[
tid
][
i
][
3
]
+=
0.5
*
v
[
3
];
vatom_thr
[
tid
][
i
][
4
]
+=
0.5
*
v
[
4
];
vatom_thr
[
tid
][
i
][
5
]
+=
0.5
*
v
[
5
];
}
if
(
newton_pair
||
j
<
nlocal
)
{
vatom_thr
[
tid
][
j
][
0
]
+=
0.5
*
v
[
0
];
vatom_thr
[
tid
][
j
][
1
]
+=
0.5
*
v
[
1
];
vatom_thr
[
tid
][
j
][
2
]
+=
0.5
*
v
[
2
];
vatom_thr
[
tid
][
j
][
3
]
+=
0.5
*
v
[
3
];
vatom_thr
[
tid
][
j
][
4
]
+=
0.5
*
v
[
4
];
vatom_thr
[
tid
][
j
][
5
]
+=
0.5
*
v
[
5
];
}
}
}
}
/* ----------------------------------------------------------------------
tally eng_vdwl and virial into per thread global and per-atom accumulators
need i < nlocal test since called by bond_quartic and dihedral_charmm
------------------------------------------------------------------------- */
void
PairOMPGPU
::
ev_tally_full_thr
(
int
i
,
double
evdwl
,
double
ecoul
,
double
fpair
,
double
delx
,
double
dely
,
double
delz
,
int
tid
)
{
double
evdwlhalf
,
ecoulhalf
,
epairhalf
,
v
[
6
];
if
(
eflag_either
)
{
if
(
eflag_global
)
{
evdwlhalf
=
0.5
*
evdwl
;
ecoulhalf
=
0.5
*
ecoul
;
eng_vdwl_thr
[
tid
]
+=
evdwlhalf
;
eng_coul_thr
[
tid
]
+=
ecoulhalf
;
}
if
(
eflag_atom
)
{
epairhalf
=
0.5
*
(
evdwl
+
ecoul
);
eatom_thr
[
tid
][
i
]
+=
epairhalf
;
}
}
if
(
vflag_either
)
{
v
[
0
]
=
delx
*
delx
*
fpair
;
v
[
1
]
=
dely
*
dely
*
fpair
;
v
[
2
]
=
delz
*
delz
*
fpair
;
v
[
3
]
=
delx
*
dely
*
fpair
;
v
[
4
]
=
delx
*
delz
*
fpair
;
v
[
5
]
=
dely
*
delz
*
fpair
;
if
(
vflag_global
)
{
virial_thr
[
tid
][
0
]
+=
0.5
*
v
[
0
];
virial_thr
[
tid
][
1
]
+=
0.5
*
v
[
1
];
virial_thr
[
tid
][
2
]
+=
0.5
*
v
[
2
];
virial_thr
[
tid
][
3
]
+=
0.5
*
v
[
3
];
virial_thr
[
tid
][
4
]
+=
0.5
*
v
[
4
];
virial_thr
[
tid
][
5
]
+=
0.5
*
v
[
5
];
}
if
(
vflag_atom
)
{
vatom_thr
[
tid
][
i
][
0
]
+=
0.5
*
v
[
0
];
vatom_thr
[
tid
][
i
][
1
]
+=
0.5
*
v
[
1
];
vatom_thr
[
tid
][
i
][
2
]
+=
0.5
*
v
[
2
];
vatom_thr
[
tid
][
i
][
3
]
+=
0.5
*
v
[
3
];
vatom_thr
[
tid
][
i
][
4
]
+=
0.5
*
v
[
4
];
vatom_thr
[
tid
][
i
][
5
]
+=
0.5
*
v
[
5
];
}
}
}
/* ----------------------------------------------------------------------
tally eng_vdwl and virial into global and per-atom accumulators
for virial, have delx,dely,delz and fx,fy,fz
------------------------------------------------------------------------- */
void
PairOMPGPU
::
ev_tally_xyz_thr
(
int
i
,
int
j
,
int
nlocal
,
int
newton_pair
,
double
evdwl
,
double
ecoul
,
double
fx
,
double
fy
,
double
fz
,
double
delx
,
double
dely
,
double
delz
,
int
tid
)
{
double
evdwlhalf
,
ecoulhalf
,
epairhalf
,
v
[
6
];
if
(
eflag_either
)
{
if
(
eflag_global
)
{
if
(
newton_pair
)
{
eng_vdwl_thr
[
tid
]
+=
evdwl
;
eng_coul_thr
[
tid
]
+=
ecoul
;
}
else
{
evdwlhalf
=
0.5
*
evdwl
;
ecoulhalf
=
0.5
*
ecoul
;
if
(
i
<
nlocal
)
{
eng_vdwl_thr
[
tid
]
+=
evdwlhalf
;
eng_coul_thr
[
tid
]
+=
ecoulhalf
;
}
if
(
j
<
nlocal
)
{
eng_vdwl_thr
[
tid
]
+=
evdwlhalf
;
eng_coul_thr
[
tid
]
+=
ecoulhalf
;
}
}
}
if
(
eflag_atom
)
{
epairhalf
=
0.5
*
(
evdwl
+
ecoul
);
if
(
newton_pair
||
i
<
nlocal
)
eatom_thr
[
tid
][
i
]
+=
epairhalf
;
if
(
newton_pair
||
j
<
nlocal
)
eatom_thr
[
tid
][
j
]
+=
epairhalf
;
}
}
if
(
vflag_either
)
{
v
[
0
]
=
delx
*
fx
;
v
[
1
]
=
dely
*
fy
;
v
[
2
]
=
delz
*
fz
;
v
[
3
]
=
delx
*
fy
;
v
[
4
]
=
delx
*
fz
;
v
[
5
]
=
dely
*
fz
;
if
(
vflag_global
)
{
if
(
newton_pair
)
{
virial_thr
[
tid
][
0
]
+=
v
[
0
];
virial_thr
[
tid
][
1
]
+=
v
[
1
];
virial_thr
[
tid
][
2
]
+=
v
[
2
];
virial_thr
[
tid
][
3
]
+=
v
[
3
];
virial_thr
[
tid
][
4
]
+=
v
[
4
];
virial_thr
[
tid
][
5
]
+=
v
[
5
];
}
else
{
if
(
i
<
nlocal
)
{
virial_thr
[
tid
][
0
]
+=
0.5
*
v
[
0
];
virial_thr
[
tid
][
1
]
+=
0.5
*
v
[
1
];
virial_thr
[
tid
][
2
]
+=
0.5
*
v
[
2
];
virial_thr
[
tid
][
3
]
+=
0.5
*
v
[
3
];
virial_thr
[
tid
][
4
]
+=
0.5
*
v
[
4
];
virial_thr
[
tid
][
5
]
+=
0.5
*
v
[
5
];
}
if
(
j
<
nlocal
)
{
virial_thr
[
tid
][
0
]
+=
0.5
*
v
[
0
];
virial_thr
[
tid
][
1
]
+=
0.5
*
v
[
1
];
virial_thr
[
tid
][
2
]
+=
0.5
*
v
[
2
];
virial_thr
[
tid
][
3
]
+=
0.5
*
v
[
3
];
virial_thr
[
tid
][
4
]
+=
0.5
*
v
[
4
];
virial_thr
[
tid
][
5
]
+=
0.5
*
v
[
5
];
}
}
}
if
(
vflag_atom
)
{
if
(
newton_pair
||
i
<
nlocal
)
{
vatom_thr
[
tid
][
i
][
0
]
+=
0.5
*
v
[
0
];
vatom_thr
[
tid
][
i
][
1
]
+=
0.5
*
v
[
1
];
vatom_thr
[
tid
][
i
][
2
]
+=
0.5
*
v
[
2
];
vatom_thr
[
tid
][
i
][
3
]
+=
0.5
*
v
[
3
];
vatom_thr
[
tid
][
i
][
4
]
+=
0.5
*
v
[
4
];
vatom_thr
[
tid
][
i
][
5
]
+=
0.5
*
v
[
5
];
}
if
(
newton_pair
||
j
<
nlocal
)
{
vatom_thr
[
tid
][
j
][
0
]
+=
0.5
*
v
[
0
];
vatom_thr
[
tid
][
j
][
1
]
+=
0.5
*
v
[
1
];
vatom_thr
[
tid
][
j
][
2
]
+=
0.5
*
v
[
2
];
vatom_thr
[
tid
][
j
][
3
]
+=
0.5
*
v
[
3
];
vatom_thr
[
tid
][
j
][
4
]
+=
0.5
*
v
[
4
];
vatom_thr
[
tid
][
j
][
5
]
+=
0.5
*
v
[
5
];
}
}
}
}
/* ----------------------------------------------------------------------
tally eng_vdwl and virial into global and per-atom accumulators
called by SW potential, newton_pair is always on
virial = riFi + rjFj + rkFk = (rj-ri) Fj + (rk-ri) Fk = drji*fj + drki*fk
------------------------------------------------------------------------- */
void
PairOMPGPU
::
ev_tally3_thr
(
int
i
,
int
j
,
int
k
,
double
evdwl
,
double
ecoul
,
double
*
fj
,
double
*
fk
,
double
*
drji
,
double
*
drki
,
int
tid
,
double
THIRD
)
{
double
epairthird
,
v
[
6
];
if
(
eflag_either
)
{
if
(
eflag_global
)
{
eng_vdwl_thr
[
tid
]
+=
evdwl
;
eng_coul_thr
[
tid
]
+=
ecoul
;
}
if
(
eflag_atom
)
{
epairthird
=
THIRD
*
(
evdwl
+
ecoul
);
eatom_thr
[
tid
][
i
]
+=
epairthird
;
eatom_thr
[
tid
][
j
]
+=
epairthird
;
eatom_thr
[
tid
][
k
]
+=
epairthird
;
}
}
if
(
vflag_atom
)
{
v
[
0
]
=
THIRD
*
(
drji
[
0
]
*
fj
[
0
]
+
drki
[
0
]
*
fk
[
0
]);
v
[
1
]
=
THIRD
*
(
drji
[
1
]
*
fj
[
1
]
+
drki
[
1
]
*
fk
[
1
]);
v
[
2
]
=
THIRD
*
(
drji
[
2
]
*
fj
[
2
]
+
drki
[
2
]
*
fk
[
2
]);
v
[
3
]
=
THIRD
*
(
drji
[
0
]
*
fj
[
1
]
+
drki
[
0
]
*
fk
[
1
]);
v
[
4
]
=
THIRD
*
(
drji
[
0
]
*
fj
[
2
]
+
drki
[
0
]
*
fk
[
2
]);
v
[
5
]
=
THIRD
*
(
drji
[
1
]
*
fj
[
2
]
+
drki
[
1
]
*
fk
[
2
]);
vatom_thr
[
tid
][
i
][
0
]
+=
v
[
0
];
vatom_thr
[
tid
][
i
][
1
]
+=
v
[
1
];
vatom_thr
[
tid
][
i
][
2
]
+=
v
[
2
];
vatom_thr
[
tid
][
i
][
3
]
+=
v
[
3
];
vatom_thr
[
tid
][
i
][
4
]
+=
v
[
4
];
vatom_thr
[
tid
][
i
][
5
]
+=
v
[
5
];
vatom_thr
[
tid
][
j
][
0
]
+=
v
[
0
];
vatom_thr
[
tid
][
j
][
1
]
+=
v
[
1
];
vatom_thr
[
tid
][
j
][
2
]
+=
v
[
2
];
vatom_thr
[
tid
][
j
][
3
]
+=
v
[
3
];
vatom_thr
[
tid
][
j
][
4
]
+=
v
[
4
];
vatom_thr
[
tid
][
j
][
5
]
+=
v
[
5
];
vatom_thr
[
tid
][
k
][
0
]
+=
v
[
0
];
vatom_thr
[
tid
][
k
][
1
]
+=
v
[
1
];
vatom_thr
[
tid
][
k
][
2
]
+=
v
[
2
];
vatom_thr
[
tid
][
k
][
3
]
+=
v
[
3
];
vatom_thr
[
tid
][
k
][
4
]
+=
v
[
4
];
vatom_thr
[
tid
][
k
][
5
]
+=
v
[
5
];
}
}
/* ----------------------------------------------------------------------
tally eng_vdwl and virial into global and per-atom accumulators
called by AIREBO potential, newton_pair is always on
------------------------------------------------------------------------- */
void
PairOMPGPU
::
ev_tally4_thr
(
int
i
,
int
j
,
int
k
,
int
m
,
double
evdwl
,
double
*
fi
,
double
*
fj
,
double
*
fk
,
double
*
drim
,
double
*
drjm
,
double
*
drkm
,
int
tid
)
{
double
epairfourth
,
v
[
6
];
if
(
eflag_either
)
{
if
(
eflag_global
)
eng_vdwl_thr
[
tid
]
+=
evdwl
;
if
(
eflag_atom
)
{
epairfourth
=
0.25
*
evdwl
;
eatom_thr
[
tid
][
i
]
+=
epairfourth
;
eatom_thr
[
tid
][
j
]
+=
epairfourth
;
eatom_thr
[
tid
][
k
]
+=
epairfourth
;
eatom_thr
[
tid
][
m
]
+=
epairfourth
;
}
}
if
(
vflag_atom
)
{
v
[
0
]
=
0.25
*
(
drim
[
0
]
*
fi
[
0
]
+
drjm
[
0
]
*
fj
[
0
]
+
drkm
[
0
]
*
fk
[
0
]);
v
[
1
]
=
0.25
*
(
drim
[
1
]
*
fi
[
1
]
+
drjm
[
1
]
*
fj
[
1
]
+
drkm
[
1
]
*
fk
[
1
]);
v
[
2
]
=
0.25
*
(
drim
[
2
]
*
fi
[
2
]
+
drjm
[
2
]
*
fj
[
2
]
+
drkm
[
2
]
*
fk
[
2
]);
v
[
3
]
=
0.25
*
(
drim
[
0
]
*
fi
[
1
]
+
drjm
[
0
]
*
fj
[
1
]
+
drkm
[
0
]
*
fk
[
1
]);
v
[
4
]
=
0.25
*
(
drim
[
0
]
*
fi
[
2
]
+
drjm
[
0
]
*
fj
[
2
]
+
drkm
[
0
]
*
fk
[
2
]);
v
[
5
]
=
0.25
*
(
drim
[
1
]
*
fi
[
2
]
+
drjm
[
1
]
*
fj
[
2
]
+
drkm
[
1
]
*
fk
[
2
]);
vatom_thr
[
tid
][
i
][
0
]
+=
v
[
0
];
vatom_thr
[
tid
][
i
][
1
]
+=
v
[
1
];
vatom_thr
[
tid
][
i
][
2
]
+=
v
[
2
];
vatom_thr
[
tid
][
i
][
3
]
+=
v
[
3
];
vatom_thr
[
tid
][
i
][
4
]
+=
v
[
4
];
vatom_thr
[
tid
][
i
][
5
]
+=
v
[
5
];
vatom_thr
[
tid
][
j
][
0
]
+=
v
[
0
];
vatom_thr
[
tid
][
j
][
1
]
+=
v
[
1
];
vatom_thr
[
tid
][
j
][
2
]
+=
v
[
2
];
vatom_thr
[
tid
][
j
][
3
]
+=
v
[
3
];
vatom_thr
[
tid
][
j
][
4
]
+=
v
[
4
];
vatom_thr
[
tid
][
j
][
5
]
+=
v
[
5
];
vatom_thr
[
tid
][
k
][
0
]
+=
v
[
0
];
vatom_thr
[
tid
][
k
][
1
]
+=
v
[
1
];
vatom_thr
[
tid
][
k
][
2
]
+=
v
[
2
];
vatom_thr
[
tid
][
k
][
3
]
+=
v
[
3
];
vatom_thr
[
tid
][
k
][
4
]
+=
v
[
4
];
vatom_thr
[
tid
][
k
][
5
]
+=
v
[
5
];
vatom_thr
[
tid
][
m
][
0
]
+=
v
[
0
];
vatom_thr
[
tid
][
m
][
1
]
+=
v
[
1
];
vatom_thr
[
tid
][
m
][
2
]
+=
v
[
2
];
vatom_thr
[
tid
][
m
][
3
]
+=
v
[
3
];
vatom_thr
[
tid
][
m
][
4
]
+=
v
[
4
];
vatom_thr
[
tid
][
m
][
5
]
+=
v
[
5
];
}
}
/* ----------------------------------------------------------------------
tally ecoul and virial into each of n atoms in list
called by TIP4P potential, newton_pair is always on
changes v values by dividing by n
------------------------------------------------------------------------- */
void
PairOMPGPU
::
ev_tally_list_thr
(
int
n
,
int
*
list
,
double
ecoul
,
double
*
v
,
int
tid
)
{
int
i
,
j
;
if
(
eflag_either
)
{
if
(
eflag_global
)
eng_coul_thr
[
tid
]
+=
ecoul
;
if
(
eflag_atom
)
{
double
epairatom
=
ecoul
/
n
;
for
(
i
=
0
;
i
<
n
;
i
++
)
eatom_thr
[
tid
][
list
[
i
]]
+=
epairatom
;
}
}
if
(
vflag_either
)
{
if
(
vflag_global
)
{
virial_thr
[
tid
][
0
]
+=
v
[
0
];
virial_thr
[
tid
][
1
]
+=
v
[
1
];
virial_thr
[
tid
][
2
]
+=
v
[
2
];
virial_thr
[
tid
][
3
]
+=
v
[
3
];
virial_thr
[
tid
][
4
]
+=
v
[
4
];
virial_thr
[
tid
][
5
]
+=
v
[
5
];
}
if
(
vflag_atom
)
{
v
[
0
]
/=
n
;
v
[
1
]
/=
n
;
v
[
2
]
/=
n
;
v
[
3
]
/=
n
;
v
[
4
]
/=
n
;
v
[
5
]
/=
n
;
for
(
i
=
0
;
i
<
n
;
i
++
)
{
j
=
list
[
i
];
vatom_thr
[
tid
][
j
][
0
]
+=
v
[
0
];
vatom_thr
[
tid
][
j
][
1
]
+=
v
[
1
];
vatom_thr
[
tid
][
j
][
2
]
+=
v
[
2
];
vatom_thr
[
tid
][
j
][
3
]
+=
v
[
3
];
vatom_thr
[
tid
][
j
][
4
]
+=
v
[
4
];
vatom_thr
[
tid
][
j
][
5
]
+=
v
[
5
];
}
}
}
}
/* ----------------------------------------------------------------------
tally virial into per-atom accumulators
called by AIREBO potential, newton_pair is always on
fpair is magnitude of force on atom I
------------------------------------------------------------------------- */
void
PairOMPGPU
::
v_tally2_thr
(
int
i
,
int
j
,
double
fpair
,
double
*
drij
,
int
tid
)
{
double
v
[
6
];
v
[
0
]
=
0.5
*
drij
[
0
]
*
drij
[
0
]
*
fpair
;
v
[
1
]
=
0.5
*
drij
[
1
]
*
drij
[
1
]
*
fpair
;
v
[
2
]
=
0.5
*
drij
[
2
]
*
drij
[
2
]
*
fpair
;
v
[
3
]
=
0.5
*
drij
[
0
]
*
drij
[
1
]
*
fpair
;
v
[
4
]
=
0.5
*
drij
[
0
]
*
drij
[
2
]
*
fpair
;
v
[
5
]
=
0.5
*
drij
[
1
]
*
drij
[
2
]
*
fpair
;
vatom_thr
[
tid
][
i
][
0
]
+=
v
[
0
];
vatom_thr
[
tid
][
i
][
1
]
+=
v
[
1
];
vatom_thr
[
tid
][
i
][
2
]
+=
v
[
2
];
vatom_thr
[
tid
][
i
][
3
]
+=
v
[
3
];
vatom_thr
[
tid
][
i
][
4
]
+=
v
[
4
];
vatom_thr
[
tid
][
i
][
5
]
+=
v
[
5
];
vatom_thr
[
tid
][
j
][
0
]
+=
v
[
0
];
vatom_thr
[
tid
][
j
][
1
]
+=
v
[
1
];
vatom_thr
[
tid
][
j
][
2
]
+=
v
[
2
];
vatom_thr
[
tid
][
j
][
3
]
+=
v
[
3
];
vatom_thr
[
tid
][
j
][
4
]
+=
v
[
4
];
vatom_thr
[
tid
][
j
][
5
]
+=
v
[
5
];
}
/* ----------------------------------------------------------------------
tally virial into per-atom accumulators
called by AIREBO and Tersoff potential, newton_pair is always on
------------------------------------------------------------------------- */
void
PairOMPGPU
::
v_tally3_thr
(
int
i
,
int
j
,
int
k
,
double
*
fi
,
double
*
fj
,
double
*
drik
,
double
*
drjk
,
int
tid
,
double
THIRD
)
{
double
v
[
6
];
v
[
0
]
=
THIRD
*
(
drik
[
0
]
*
fi
[
0
]
+
drjk
[
0
]
*
fj
[
0
]);
v
[
1
]
=
THIRD
*
(
drik
[
1
]
*
fi
[
1
]
+
drjk
[
1
]
*
fj
[
1
]);
v
[
2
]
=
THIRD
*
(
drik
[
2
]
*
fi
[
2
]
+
drjk
[
2
]
*
fj
[
2
]);
v
[
3
]
=
THIRD
*
(
drik
[
0
]
*
fi
[
1
]
+
drjk
[
0
]
*
fj
[
1
]);
v
[
4
]
=
THIRD
*
(
drik
[
0
]
*
fi
[
2
]
+
drjk
[
0
]
*
fj
[
2
]);
v
[
5
]
=
THIRD
*
(
drik
[
1
]
*
fi
[
2
]
+
drjk
[
1
]
*
fj
[
2
]);
vatom_thr
[
tid
][
i
][
0
]
+=
v
[
0
];
vatom_thr
[
tid
][
i
][
1
]
+=
v
[
1
];
vatom_thr
[
tid
][
i
][
2
]
+=
v
[
2
];
vatom_thr
[
tid
][
i
][
3
]
+=
v
[
3
];
vatom_thr
[
tid
][
i
][
4
]
+=
v
[
4
];
vatom_thr
[
tid
][
i
][
5
]
+=
v
[
5
];
vatom_thr
[
tid
][
j
][
0
]
+=
v
[
0
];
vatom_thr
[
tid
][
j
][
1
]
+=
v
[
1
];
vatom_thr
[
tid
][
j
][
2
]
+=
v
[
2
];
vatom_thr
[
tid
][
j
][
3
]
+=
v
[
3
];
vatom_thr
[
tid
][
j
][
4
]
+=
v
[
4
];
vatom_thr
[
tid
][
j
][
5
]
+=
v
[
5
];
vatom_thr
[
tid
][
k
][
0
]
+=
v
[
0
];
vatom_thr
[
tid
][
k
][
1
]
+=
v
[
1
];
vatom_thr
[
tid
][
k
][
2
]
+=
v
[
2
];
vatom_thr
[
tid
][
k
][
3
]
+=
v
[
3
];
vatom_thr
[
tid
][
k
][
4
]
+=
v
[
4
];
vatom_thr
[
tid
][
k
][
5
]
+=
v
[
5
];
}
/* ----------------------------------------------------------------------
tally virial into per-atom accumulators
called by AIREBO potential, newton_pair is always on
------------------------------------------------------------------------- */
void
PairOMPGPU
::
v_tally4_thr
(
int
i
,
int
j
,
int
k
,
int
m
,
double
*
fi
,
double
*
fj
,
double
*
fk
,
double
*
drim
,
double
*
drjm
,
double
*
drkm
,
int
tid
)
{
double
v
[
6
];
v
[
0
]
=
0.25
*
(
drim
[
0
]
*
fi
[
0
]
+
drjm
[
0
]
*
fj
[
0
]
+
drkm
[
0
]
*
fk
[
0
]);
v
[
1
]
=
0.25
*
(
drim
[
1
]
*
fi
[
1
]
+
drjm
[
1
]
*
fj
[
1
]
+
drkm
[
1
]
*
fk
[
1
]);
v
[
2
]
=
0.25
*
(
drim
[
2
]
*
fi
[
2
]
+
drjm
[
2
]
*
fj
[
2
]
+
drkm
[
2
]
*
fk
[
2
]);
v
[
3
]
=
0.25
*
(
drim
[
0
]
*
fi
[
1
]
+
drjm
[
0
]
*
fj
[
1
]
+
drkm
[
0
]
*
fk
[
1
]);
v
[
4
]
=
0.25
*
(
drim
[
0
]
*
fi
[
2
]
+
drjm
[
0
]
*
fj
[
2
]
+
drkm
[
0
]
*
fk
[
2
]);
v
[
5
]
=
0.25
*
(
drim
[
1
]
*
fi
[
2
]
+
drjm
[
1
]
*
fj
[
2
]
+
drkm
[
1
]
*
fk
[
2
]);
vatom_thr
[
tid
][
i
][
0
]
+=
v
[
0
];
vatom_thr
[
tid
][
i
][
1
]
+=
v
[
1
];
vatom_thr
[
tid
][
i
][
2
]
+=
v
[
2
];
vatom_thr
[
tid
][
i
][
3
]
+=
v
[
3
];
vatom_thr
[
tid
][
i
][
4
]
+=
v
[
4
];
vatom_thr
[
tid
][
i
][
5
]
+=
v
[
5
];
vatom_thr
[
tid
][
j
][
0
]
+=
v
[
0
];
vatom_thr
[
tid
][
j
][
1
]
+=
v
[
1
];
vatom_thr
[
tid
][
j
][
2
]
+=
v
[
2
];
vatom_thr
[
tid
][
j
][
3
]
+=
v
[
3
];
vatom_thr
[
tid
][
j
][
4
]
+=
v
[
4
];
vatom_thr
[
tid
][
j
][
5
]
+=
v
[
5
];
vatom_thr
[
tid
][
k
][
0
]
+=
v
[
0
];
vatom_thr
[
tid
][
k
][
1
]
+=
v
[
1
];
vatom_thr
[
tid
][
k
][
2
]
+=
v
[
2
];
vatom_thr
[
tid
][
k
][
3
]
+=
v
[
3
];
vatom_thr
[
tid
][
k
][
4
]
+=
v
[
4
];
vatom_thr
[
tid
][
k
][
5
]
+=
v
[
5
];
vatom_thr
[
tid
][
m
][
0
]
+=
v
[
0
];
vatom_thr
[
tid
][
m
][
1
]
+=
v
[
1
];
vatom_thr
[
tid
][
m
][
2
]
+=
v
[
2
];
vatom_thr
[
tid
][
m
][
3
]
+=
v
[
3
];
vatom_thr
[
tid
][
m
][
4
]
+=
v
[
4
];
vatom_thr
[
tid
][
m
][
5
]
+=
v
[
5
];
}
/* ----------------------------------------------------------------------
reduce the per thread accumulated E/V data into the canonical accumulators.
------------------------------------------------------------------------- */
void
PairOMPGPU
::
ev_reduce_thr
(
Pair
&
p
)
{
const
int
ntotal
=
(
force
->
newton
)
?
(
atom
->
nlocal
+
atom
->
nghost
)
:
atom
->
nlocal
;
for
(
int
n
=
0
;
n
<
_nthreads
;
++
n
)
{
p
.
eng_vdwl
+=
eng_vdwl_thr
[
n
];
p
.
eng_coul
+=
eng_coul_thr
[
n
];
if
(
vflag_either
)
{
p
.
virial
[
0
]
+=
virial_thr
[
n
][
0
];
p
.
virial
[
1
]
+=
virial_thr
[
n
][
1
];
p
.
virial
[
2
]
+=
virial_thr
[
n
][
2
];
p
.
virial
[
3
]
+=
virial_thr
[
n
][
3
];
p
.
virial
[
4
]
+=
virial_thr
[
n
][
4
];
p
.
virial
[
5
]
+=
virial_thr
[
n
][
5
];
if
(
vflag_atom
)
{
for
(
int
i
=
0
;
i
<
ntotal
;
++
i
)
{
p
.
vatom
[
i
][
0
]
+=
vatom_thr
[
n
][
i
][
0
];
p
.
vatom
[
i
][
1
]
+=
vatom_thr
[
n
][
i
][
1
];
p
.
vatom
[
i
][
2
]
+=
vatom_thr
[
n
][
i
][
2
];
p
.
vatom
[
i
][
3
]
+=
vatom_thr
[
n
][
i
][
3
];
p
.
vatom
[
i
][
4
]
+=
vatom_thr
[
n
][
i
][
4
];
p
.
vatom
[
i
][
5
]
+=
vatom_thr
[
n
][
i
][
5
];
}
}
}
if
(
eflag_atom
)
{
for
(
int
i
=
0
;
i
<
ntotal
;
++
i
)
{
p
.
eatom
[
i
]
+=
eatom_thr
[
n
][
i
];
}
}
}
}
/* ---------------------------------------------------------------------- */
double
PairOMPGPU
::
memory_usage
()
{
double
bytes
=
0.0
;
bytes
+=
_nthreads
*
(
2
+
7
)
*
sizeof
(
double
);
bytes
+=
_nthreads
*
maxeatom_thr
*
sizeof
(
double
);
bytes
+=
_nthreads
*
maxvatom_thr
*
6
*
sizeof
(
double
);
if
(
f_thr
!=
NULL
)
bytes
+=
_nthreads
*
_nmax
*
sizeof
(
double
);
return
bytes
;
}
#endif
Event Timeline
Log In to Comment