Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F70489629
pair_gayberne_intel.cpp
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Sun, Jul 7, 03:23
Size
35 KB
Mime Type
text/x-c++
Expires
Tue, Jul 9, 03:23 (2 d)
Engine
blob
Format
Raw Data
Handle
18442838
Attached To
rLAMMPS lammps
pair_gayberne_intel.cpp
View Options
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
This software is distributed under the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: W. Michael Brown (Intel)
------------------------------------------------------------------------- */
#include "pair_gayberne_intel.h"
#include "math_extra_intel.h"
#ifdef _LMP_INTEL_OFFLOAD
#pragma offload_attribute(push,target(mic))
#endif
#include <cmath>
#ifdef _LMP_INTEL_OFFLOAD
#pragma offload_attribute(pop)
#endif
#include "atom.h"
#include "comm.h"
#include "atom_vec_ellipsoid.h"
#include "force.h"
#include "memory.h"
#include "modify.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "suffix.h"
using
namespace
LAMMPS_NS
;
#define FC_PACKED1_T typename ForceConst<flt_t>::fc_packed1
#define FC_PACKED2_T typename ForceConst<flt_t>::fc_packed2
#define FC_PACKED3_T typename ForceConst<flt_t>::fc_packed3
/* ---------------------------------------------------------------------- */
PairGayBerneIntel
::
PairGayBerneIntel
(
LAMMPS
*
lmp
)
:
PairGayBerne
(
lmp
)
{
suffix_flag
|=
Suffix
::
INTEL
;
respa_enable
=
0
;
}
/* ---------------------------------------------------------------------- */
void
PairGayBerneIntel
::
compute
(
int
eflag
,
int
vflag
)
{
if
(
fix
->
precision
()
==
FixIntel
::
PREC_MODE_MIXED
)
compute
<
float
,
double
>
(
eflag
,
vflag
,
fix
->
get_mixed_buffers
(),
force_const_single
);
else
if
(
fix
->
precision
()
==
FixIntel
::
PREC_MODE_DOUBLE
)
compute
<
double
,
double
>
(
eflag
,
vflag
,
fix
->
get_double_buffers
(),
force_const_double
);
else
compute
<
float
,
float
>
(
eflag
,
vflag
,
fix
->
get_single_buffers
(),
force_const_single
);
fix
->
balance_stamp
();
vflag_fdotr
=
0
;
}
template
<
class
flt_t
,
class
acc_t
>
void
PairGayBerneIntel
::
compute
(
int
eflag
,
int
vflag
,
IntelBuffers
<
flt_t
,
acc_t
>
*
buffers
,
const
ForceConst
<
flt_t
>
&
fc
)
{
if
(
eflag
||
vflag
)
{
ev_setup
(
eflag
,
vflag
);
}
else
evflag
=
vflag_fdotr
=
0
;
const
int
inum
=
list
->
inum
;
const
int
nall
=
atom
->
nlocal
+
atom
->
nghost
;
const
int
nthreads
=
comm
->
nthreads
;
const
int
host_start
=
fix
->
host_start_pair
();
const
int
offload_end
=
fix
->
offload_end_pair
();
const
int
ago
=
neighbor
->
ago
;
if
(
fix
->
separate_buffers
()
==
0
)
{
fix
->
start_watch
(
TIME_PACK
);
const
AtomVecEllipsoid
::
Bonus
*
const
bonus
=
avec
->
bonus
;
const
int
*
const
ellipsoid
=
atom
->
ellipsoid
;
QUAT_T
*
_noalias
const
quat
=
buffers
->
get_quat
();
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(eflag,vflag,buffers,fc)
#endif
{
int
ifrom
,
ito
,
tid
;
IP_PRE_omp_range_id_align
(
ifrom
,
ito
,
tid
,
nall
,
nthreads
,
sizeof
(
ATOM_T
));
if
(
ago
!=
0
)
buffers
->
thr_pack
(
ifrom
,
ito
,
ago
);
for
(
int
i
=
ifrom
;
i
<
ito
;
i
++
)
{
int
qi
=
ellipsoid
[
i
];
if
(
qi
>
-
1
)
{
quat
[
i
].
w
=
bonus
[
qi
].
quat
[
0
];
quat
[
i
].
i
=
bonus
[
qi
].
quat
[
1
];
quat
[
i
].
j
=
bonus
[
qi
].
quat
[
2
];
quat
[
i
].
k
=
bonus
[
qi
].
quat
[
3
];
}
}
}
quat
[
nall
].
w
=
(
flt_t
)
1.0
;
quat
[
nall
].
i
=
(
flt_t
)
0.0
;
quat
[
nall
].
j
=
(
flt_t
)
0.0
;
quat
[
nall
].
k
=
(
flt_t
)
0.0
;
fix
->
stop_watch
(
TIME_PACK
);
}
if
(
evflag
||
vflag_fdotr
)
{
int
ovflag
=
0
;
if
(
vflag_fdotr
)
ovflag
=
2
;
else
if
(
vflag
)
ovflag
=
1
;
if
(
eflag
)
{
if
(
force
->
newton_pair
)
{
eval
<
1
,
1
,
1
>
(
1
,
ovflag
,
buffers
,
fc
,
0
,
offload_end
);
eval
<
1
,
1
,
1
>
(
0
,
ovflag
,
buffers
,
fc
,
host_start
,
inum
);
}
else
{
eval
<
1
,
1
,
0
>
(
1
,
ovflag
,
buffers
,
fc
,
0
,
offload_end
);
eval
<
1
,
1
,
0
>
(
0
,
ovflag
,
buffers
,
fc
,
host_start
,
inum
);
}
}
else
{
if
(
force
->
newton_pair
)
{
eval
<
1
,
0
,
1
>
(
1
,
ovflag
,
buffers
,
fc
,
0
,
offload_end
);
eval
<
1
,
0
,
1
>
(
0
,
ovflag
,
buffers
,
fc
,
host_start
,
inum
);
}
else
{
eval
<
1
,
0
,
0
>
(
1
,
ovflag
,
buffers
,
fc
,
0
,
offload_end
);
eval
<
1
,
0
,
0
>
(
0
,
ovflag
,
buffers
,
fc
,
host_start
,
inum
);
}
}
}
else
{
if
(
force
->
newton_pair
)
{
eval
<
0
,
0
,
1
>
(
1
,
0
,
buffers
,
fc
,
0
,
offload_end
);
eval
<
0
,
0
,
1
>
(
0
,
0
,
buffers
,
fc
,
host_start
,
inum
);
}
else
{
eval
<
0
,
0
,
0
>
(
1
,
0
,
buffers
,
fc
,
0
,
offload_end
);
eval
<
0
,
0
,
0
>
(
0
,
0
,
buffers
,
fc
,
host_start
,
inum
);
}
}
}
template
<
int
EVFLAG
,
int
EFLAG
,
int
NEWTON_PAIR
,
class
flt_t
,
class
acc_t
>
void
PairGayBerneIntel
::
eval
(
const
int
offload
,
const
int
vflag
,
IntelBuffers
<
flt_t
,
acc_t
>
*
buffers
,
const
ForceConst
<
flt_t
>
&
fc
,
const
int
astart
,
const
int
aend
)
{
const
int
inum
=
aend
-
astart
;
if
(
inum
==
0
)
return
;
int
nlocal
,
nall
,
minlocal
;
fix
->
get_buffern
(
offload
,
nlocal
,
nall
,
minlocal
);
const
int
ago
=
neighbor
->
ago
;
ATOM_T
*
_noalias
const
x
=
buffers
->
get_x
(
offload
);
QUAT_T
*
_noalias
const
quat
=
buffers
->
get_quat
(
offload
);
const
AtomVecEllipsoid
::
Bonus
*
bonus
=
avec
->
bonus
;
const
int
*
ellipsoid
=
atom
->
ellipsoid
;
#ifdef _LMP_INTEL_OFFLOAD
if
(
fix
->
separate_buffers
())
{
fix
->
start_watch
(
TIME_PACK
);
if
(
offload
)
{
#pragma omp parallel default(none) \
shared(buffers,nlocal,nall,bonus,ellipsoid)
{
int
ifrom
,
ito
,
tid
;
int
nthreads
=
comm
->
nthreads
;
IP_PRE_omp_range_id_align
(
ifrom
,
ito
,
tid
,
nlocal
,
nthreads
,
sizeof
(
ATOM_T
));
if
(
ago
!=
0
)
buffers
->
thr_pack_cop
(
ifrom
,
ito
,
0
);
for
(
int
i
=
ifrom
;
i
<
ito
;
i
++
)
{
int
qi
=
ellipsoid
[
i
];
if
(
qi
>
-
1
)
{
quat
[
i
].
w
=
bonus
[
qi
].
quat
[
0
];
quat
[
i
].
i
=
bonus
[
qi
].
quat
[
1
];
quat
[
i
].
j
=
bonus
[
qi
].
quat
[
2
];
quat
[
i
].
k
=
bonus
[
qi
].
quat
[
3
];
}
}
int
nghost
=
nall
-
nlocal
;
if
(
nghost
)
{
IP_PRE_omp_range_align
(
ifrom
,
ito
,
tid
,
nall
-
nlocal
,
nthreads
,
sizeof
(
ATOM_T
));
int
offset
=
0
;
ifrom
+=
nlocal
;
ito
+=
nlocal
;
if
(
ago
!=
0
)
{
offset
=
fix
->
offload_min_ghost
()
-
nlocal
;
buffers
->
thr_pack_cop
(
ifrom
,
ito
,
offset
,
ago
==
1
);
}
for
(
int
i
=
ifrom
;
i
<
ito
;
i
++
)
{
int
qi
=
ellipsoid
[
i
+
offset
];
if
(
qi
>
-
1
)
{
quat
[
i
].
w
=
bonus
[
qi
].
quat
[
0
];
quat
[
i
].
i
=
bonus
[
qi
].
quat
[
1
];
quat
[
i
].
j
=
bonus
[
qi
].
quat
[
2
];
quat
[
i
].
k
=
bonus
[
qi
].
quat
[
3
];
}
}
}
}
}
else
{
if
(
ago
!=
0
)
buffers
->
thr_pack_host
(
fix
->
host_min_local
(),
nlocal
,
0
);
for
(
int
i
=
fix
->
host_min_local
();
i
<
nlocal
;
i
++
)
{
int
qi
=
ellipsoid
[
i
];
if
(
qi
>
-
1
)
{
quat
[
i
].
w
=
bonus
[
qi
].
quat
[
0
];
quat
[
i
].
i
=
bonus
[
qi
].
quat
[
1
];
quat
[
i
].
j
=
bonus
[
qi
].
quat
[
2
];
quat
[
i
].
k
=
bonus
[
qi
].
quat
[
3
];
}
}
int
offset
=
fix
->
host_min_ghost
()
-
nlocal
;
if
(
ago
!=
0
)
buffers
->
thr_pack_host
(
nlocal
,
nall
,
offset
);
for
(
int
i
=
nlocal
;
i
<
nall
;
i
++
)
{
int
qi
=
ellipsoid
[
i
+
offset
];
if
(
qi
>
-
1
)
{
quat
[
i
].
w
=
bonus
[
qi
].
quat
[
0
];
quat
[
i
].
i
=
bonus
[
qi
].
quat
[
1
];
quat
[
i
].
j
=
bonus
[
qi
].
quat
[
2
];
quat
[
i
].
k
=
bonus
[
qi
].
quat
[
3
];
}
}
}
fix
->
stop_watch
(
TIME_PACK
);
}
#endif
// const int * _noalias const ilist = list->ilist;
const
int
*
_noalias
const
numneigh
=
list
->
numneigh
;
const
int
*
_noalias
const
cnumneigh
=
buffers
->
cnumneigh
(
list
);
const
int
*
_noalias
const
firstneigh
=
buffers
->
firstneigh
(
list
);
const
flt_t
*
_noalias
const
special_lj
=
fc
.
special_lj
;
const
FC_PACKED1_T
*
_noalias
const
ijc
=
fc
.
ijc
[
0
];
const
FC_PACKED2_T
*
_noalias
const
lj34
=
fc
.
lj34
[
0
];
const
FC_PACKED3_T
*
_noalias
const
ic
=
fc
.
ic
;
const
flt_t
mu
=
fc
.
mu
;
const
flt_t
gamma
=
fc
.
gamma
;
const
flt_t
upsilon
=
fc
.
upsilon
;
flt_t
*
const
rsq_formi
=
fc
.
rsq_form
[
0
];
flt_t
*
const
delx_formi
=
fc
.
delx_form
[
0
];
flt_t
*
const
dely_formi
=
fc
.
dely_form
[
0
];
flt_t
*
const
delz_formi
=
fc
.
delz_form
[
0
];
int
*
const
jtype_formi
=
fc
.
jtype_form
[
0
];
int
*
const
jlist_formi
=
fc
.
jlist_form
[
0
];
const
int
ntypes
=
atom
->
ntypes
+
1
;
const
int
eatom
=
this
->
eflag_atom
;
// Determine how much data to transfer
int
x_size
,
q_size
,
f_stride
,
ev_size
,
separate_flag
;
IP_PRE_get_transfern
(
ago
,
NEWTON_PAIR
,
EVFLAG
,
EFLAG
,
vflag
,
buffers
,
offload
,
fix
,
separate_flag
,
x_size
,
q_size
,
ev_size
,
f_stride
);
int
tc
;
FORCE_T
*
_noalias
f_start
;
acc_t
*
_noalias
ev_global
;
IP_PRE_get_buffers
(
offload
,
buffers
,
fix
,
tc
,
f_start
,
ev_global
);
const
int
max_nbors
=
_max_nbors
;
const
int
nthreads
=
tc
;
int
pad
=
1
;
if
(
offload
)
{
if
(
INTEL_MIC_NBOR_PAD
>
1
)
pad
=
INTEL_MIC_NBOR_PAD
*
sizeof
(
float
)
/
sizeof
(
flt_t
);
}
else
{
if
(
INTEL_NBOR_PAD
>
1
)
pad
=
INTEL_NBOR_PAD
*
sizeof
(
float
)
/
sizeof
(
flt_t
);
}
const
int
pad_width
=
pad
;
#ifdef _LMP_INTEL_OFFLOAD
int
*
overflow
=
fix
->
get_off_overflow_flag
();
double
*
timer_compute
=
fix
->
off_watch_pair
();
if
(
offload
)
fix
->
start_watch
(
TIME_OFFLOAD_LATENCY
);
#pragma offload target(mic:_cop) if(offload) \
in(special_lj:length(0) alloc_if(0) free_if(0)) \
in(ijc,lj34,ic:length(0) alloc_if(0) free_if(0)) \
in(rsq_formi, delx_formi, dely_formi: length(0) alloc_if(0) free_if(0)) \
in(delz_formi, jtype_formi, jlist_formi: length(0) alloc_if(0) free_if(0))\
in(firstneigh:length(0) alloc_if(0) free_if(0)) \
in(cnumneigh:length(0) alloc_if(0) free_if(0)) \
in(numneigh:length(0) alloc_if(0) free_if(0)) \
in(x:length(x_size) alloc_if(0) free_if(0)) \
in(quat:length(nall+1) alloc_if(0) free_if(0)) \
in(overflow:length(0) alloc_if(0) free_if(0)) \
in(nthreads,inum,nall,ntypes,vflag,eatom,minlocal,separate_flag) \
in(astart,nlocal,f_stride,max_nbors,mu,gamma,upsilon,offload,pad_width) \
out(f_start:length(f_stride) alloc_if(0) free_if(0)) \
out(ev_global:length(ev_size) alloc_if(0) free_if(0)) \
out(timer_compute:length(1) alloc_if(0) free_if(0)) \
signal(f_start)
#endif
{
#if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD)
*
timer_compute
=
MIC_Wtime
();
#endif
#ifdef _LMP_INTEL_OFFLOAD
if
(
separate_flag
)
{
if
(
separate_flag
<
3
)
{
int
all_local
=
nlocal
;
int
ghost_min
=
overflow
[
LMP_GHOST_MIN
];
nlocal
=
overflow
[
LMP_LOCAL_MAX
]
+
1
;
int
nghost
=
overflow
[
LMP_GHOST_MAX
]
+
1
-
ghost_min
;
if
(
nghost
<
0
)
nghost
=
0
;
nall
=
nlocal
+
nghost
;
separate_flag
--
;
int
flength
;
if
(
NEWTON_PAIR
)
flength
=
nall
;
else
flength
=
nlocal
;
IP_PRE_get_stride
(
f_stride
,
flength
,
sizeof
(
FORCE_T
),
separate_flag
);
if
(
nghost
)
{
if
(
nlocal
<
all_local
||
ghost_min
>
all_local
)
{
memmove
(
x
+
nlocal
,
x
+
ghost_min
,
(
nall
-
nlocal
)
*
sizeof
(
ATOM_T
));
memmove
(
quat
+
nlocal
,
quat
+
ghost_min
,
(
nall
-
nlocal
)
*
sizeof
(
QUAT_T
));
}
}
}
x
[
nall
].
x
=
(
flt_t
)
INTEL_BIGP
;
x
[
nall
].
y
=
(
flt_t
)
INTEL_BIGP
;
x
[
nall
].
z
=
(
flt_t
)
INTEL_BIGP
;
quat
[
nall
].
w
=
(
flt_t
)
1.0
;
quat
[
nall
].
i
=
(
flt_t
)
0.0
;
quat
[
nall
].
j
=
(
flt_t
)
0.0
;
quat
[
nall
].
k
=
(
flt_t
)
0.0
;
}
#endif
acc_t
oevdwl
,
ov0
,
ov1
,
ov2
,
ov3
,
ov4
,
ov5
;
if
(
EVFLAG
)
{
oevdwl
=
(
acc_t
)
0.0
;
if
(
vflag
)
ov0
=
ov1
=
ov2
=
ov3
=
ov4
=
ov5
=
(
acc_t
)
0.0
;
}
// loop over neighbors of my atoms
#if defined(_OPENMP)
#pragma omp parallel default(none) \
shared(f_start,f_stride,nlocal,nall,minlocal) \
reduction(+:oevdwl,ov0,ov1,ov2,ov3,ov4,ov5)
#endif
{
int
iifrom
,
iito
,
tid
;
IP_PRE_omp_range_id
(
iifrom
,
iito
,
tid
,
inum
,
nthreads
);
iifrom
+=
astart
;
iito
+=
astart
;
FORCE_T
*
_noalias
const
f
=
f_start
-
minlocal
*
2
+
(
tid
*
f_stride
);
memset
(
f
+
minlocal
*
2
,
0
,
f_stride
*
sizeof
(
FORCE_T
));
flt_t
*
_noalias
const
rsq_form
=
rsq_formi
+
tid
*
max_nbors
;
flt_t
*
_noalias
const
delx_form
=
delx_formi
+
tid
*
max_nbors
;
flt_t
*
_noalias
const
dely_form
=
dely_formi
+
tid
*
max_nbors
;
flt_t
*
_noalias
const
delz_form
=
delz_formi
+
tid
*
max_nbors
;
int
*
_noalias
const
jtype_form
=
jtype_formi
+
tid
*
max_nbors
;
int
*
_noalias
const
jlist_form
=
jlist_formi
+
tid
*
max_nbors
;
int
ierror
=
0
;
for
(
int
i
=
iifrom
;
i
<
iito
;
++
i
)
{
// const int i = ilist[ii];
const
int
itype
=
x
[
i
].
w
;
const
int
ptr_off
=
itype
*
ntypes
;
const
FC_PACKED1_T
*
_noalias
const
ijci
=
ijc
+
ptr_off
;
const
FC_PACKED2_T
*
_noalias
const
lj34i
=
lj34
+
ptr_off
;
const
int
*
_noalias
const
jlist
=
firstneigh
+
cnumneigh
[
i
];
const
int
jnum
=
numneigh
[
i
];
const
flt_t
xtmp
=
x
[
i
].
x
;
const
flt_t
ytmp
=
x
[
i
].
y
;
const
flt_t
ztmp
=
x
[
i
].
z
;
flt_t
a1_0
,
a1_1
,
a1_2
,
a1_3
,
a1_4
,
a1_5
,
a1_6
,
a1_7
,
a1_8
;
flt_t
b1_0
,
b1_1
,
b1_2
,
b1_3
,
b1_4
,
b1_5
,
b1_6
,
b1_7
,
b1_8
;
flt_t
g1_0
,
g1_1
,
g1_2
,
g1_3
,
g1_4
,
g1_5
,
g1_6
,
g1_7
,
g1_8
;
if
(
ijci
[
itype
].
form
==
ELLIPSE_ELLIPSE
)
{
flt_t
temp_0
,
temp_1
,
temp_2
,
temp_3
,
temp_4
,
temp_5
,
temp_6
,
temp_7
,
temp_8
;
ME_quat_to_mat_trans
(
quat
[
i
],
a1
);
ME_diag_times3
(
ic
[
itype
].
well
,
a1
,
temp
);
ME_transpose_times3
(
a1
,
temp
,
b1
);
ME_diag_times3
(
ic
[
itype
].
shape2
,
a1
,
temp
);
ME_transpose_times3
(
a1
,
temp
,
g1
);
}
acc_t
fxtmp
,
fytmp
,
fztmp
,
fwtmp
,
t1tmp
,
t2tmp
,
t3tmp
;
acc_t
sevdwl
,
sv0
,
sv1
,
sv2
,
sv3
,
sv4
,
sv5
;
fxtmp
=
fytmp
=
fztmp
=
t1tmp
=
t2tmp
=
t3tmp
=
(
acc_t
)
0.0
;
if
(
EVFLAG
)
{
if
(
EFLAG
)
fwtmp
=
sevdwl
=
(
acc_t
)
0.0
;
if
(
vflag
==
1
)
sv0
=
sv1
=
sv2
=
sv3
=
sv4
=
sv5
=
(
acc_t
)
0.0
;
}
bool
multiple_forms
=
false
;
int
packed_j
=
0
;
for
(
int
jj
=
0
;
jj
<
jnum
;
jj
++
)
{
int
jm
=
jlist
[
jj
];
int
j
=
jm
&
NEIGHMASK
;
const
int
jtype
=
x
[
j
].
w
;
if
(
ijci
[
jtype
].
form
==
ELLIPSE_ELLIPSE
)
{
flt_t
delx
=
x
[
j
].
x
-
xtmp
;
flt_t
dely
=
x
[
j
].
y
-
ytmp
;
flt_t
delz
=
x
[
j
].
z
-
ztmp
;
flt_t
rsq
=
delx
*
delx
+
dely
*
dely
+
delz
*
delz
;
if
(
rsq
<
ijci
[
jtype
].
cutsq
)
{
rsq_form
[
packed_j
]
=
rsq
;
delx_form
[
packed_j
]
=
delx
;
dely_form
[
packed_j
]
=
dely
;
delz_form
[
packed_j
]
=
delz
;
jtype_form
[
packed_j
]
=
jtype
;
jlist_form
[
packed_j
]
=
jm
;
packed_j
++
;
}
}
else
multiple_forms
=
true
;
}
const
int
edge
=
(
packed_j
%
pad_width
);
if
(
edge
)
{
const
int
packed_end
=
packed_j
+
(
pad_width
-
edge
);
#if defined(LMP_SIMD_COMPILER)
#pragma loop_count min=1, max=15, avg=8
#endif
for
(
;
packed_j
<
packed_end
;
packed_j
++
)
jlist_form
[
packed_j
]
=
nall
;
}
// -------------------------------------------------------------
#ifdef INTEL_V512
__assume
(
packed_j
%
INTEL_VECTOR_WIDTH
==
0
);
__assume
(
packed_j
%
8
==
0
);
__assume
(
packed_j
%
INTEL_MIC_VECTOR_WIDTH
==
0
);
#endif
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma simd reduction(+:fxtmp,fytmp,fztmp,fwtmp,t1tmp,t2tmp,t3tmp, \
sevdwl,sv0,sv1,sv2,sv3,sv4,sv5)
#endif
for
(
int
jj
=
0
;
jj
<
packed_j
;
jj
++
)
{
flt_t
a2_0
,
a2_1
,
a2_2
,
a2_3
,
a2_4
,
a2_5
,
a2_6
,
a2_7
,
a2_8
;
flt_t
b2_0
,
b2_1
,
b2_2
,
b2_3
,
b2_4
,
b2_5
,
b2_6
,
b2_7
,
b2_8
;
flt_t
g2_0
,
g2_1
,
g2_2
,
g2_3
,
g2_4
,
g2_5
,
g2_6
,
g2_7
,
g2_8
;
flt_t
temp_0
,
temp_1
,
temp_2
,
temp_3
,
temp_4
,
temp_5
,
temp_6
,
temp_7
,
temp_8
;
flt_t
fforce_0
,
fforce_1
,
fforce_2
,
ttor_0
,
ttor_1
,
ttor_2
;
flt_t
rtor_0
,
rtor_1
,
rtor_2
;
const
int
sbindex
=
jlist_form
[
jj
]
>>
SBBITS
&
3
;
const
int
j
=
jlist_form
[
jj
]
&
NEIGHMASK
;
flt_t
factor_lj
=
special_lj
[
sbindex
];
const
int
jtype
=
jtype_form
[
jj
];
const
flt_t
sigma
=
ijci
[
jtype
].
sigma
;
const
flt_t
epsilon
=
ijci
[
jtype
].
epsilon
;
const
flt_t
shape2_0
=
ic
[
jtype
].
shape2
[
0
];
const
flt_t
shape2_1
=
ic
[
jtype
].
shape2
[
1
];
const
flt_t
shape2_2
=
ic
[
jtype
].
shape2
[
2
];
flt_t
one_eng
,
evdwl
;
ME_quat_to_mat_trans
(
quat
[
j
],
a2
);
ME_diag_times3
(
ic
[
jtype
].
well
,
a2
,
temp
);
ME_transpose_times3
(
a2
,
temp
,
b2
);
ME_diag_times3a
(
shape2
,
a2
,
temp
);
ME_transpose_times3
(
a2
,
temp
,
g2
);
flt_t
tempv_0
,
tempv_1
,
tempv_2
,
tempv2_0
,
tempv2_1
,
tempv2_2
;
flt_t
temp1
,
temp2
,
temp3
;
flt_t
r12hat_0
,
r12hat_1
,
r12hat_2
;
ME_normalize3
(
delx_form
[
jj
],
dely_form
[
jj
],
delz_form
[
jj
],
r12hat
);
flt_t
r
=
sqrt
(
rsq_form
[
jj
]);
// compute distance of closest approach
flt_t
g12_0
,
g12_1
,
g12_2
,
g12_3
,
g12_4
,
g12_5
,
g12_6
,
g12_7
,
g12_8
;
ME_plus3
(
g1
,
g2
,
g12
);
flt_t
kappa_0
,
kappa_1
,
kappa_2
;
ME_mldivide3
(
g12
,
delx_form
[
jj
],
dely_form
[
jj
],
delz_form
[
jj
],
kappa
,
ierror
);
// tempv = G12^-1*r12hat
flt_t
inv_r
=
(
flt_t
)
1.0
/
r
;
tempv_0
=
kappa_0
*
inv_r
;
tempv_1
=
kappa_1
*
inv_r
;
tempv_2
=
kappa_2
*
inv_r
;
flt_t
sigma12
=
ME_dot3
(
r12hat
,
tempv
);
sigma12
=
std
::
pow
((
flt_t
)
0.5
*
sigma12
,(
flt_t
)
-
0.5
);
flt_t
h12
=
r
-
sigma12
;
// energy
// compute u_r
flt_t
varrho
=
sigma
/
(
h12
+
gamma
*
sigma
);
flt_t
varrho6
=
std
::
pow
(
varrho
,
(
flt_t
)
6.0
);
flt_t
varrho12
=
varrho6
*
varrho6
;
flt_t
u_r
=
(
flt_t
)
4.0
*
epsilon
*
(
varrho12
-
varrho6
);
// compute eta_12
flt_t
eta
=
(
flt_t
)
2.0
*
ijci
[
jtype
].
lshape
;
flt_t
det_g12
=
ME_det3
(
g12
);
eta
=
std
::
pow
(
eta
/
det_g12
,
upsilon
);
// compute chi_12
flt_t
b12_0
,
b12_1
,
b12_2
,
b12_3
,
b12_4
,
b12_5
,
b12_6
,
b12_7
,
b12_8
;
flt_t
iota_0
,
iota_1
,
iota_2
;
ME_plus3
(
b1
,
b2
,
b12
);
ME_mldivide3
(
b12
,
delx_form
[
jj
],
dely_form
[
jj
],
delz_form
[
jj
],
iota
,
ierror
);
// tempv = G12^-1*r12hat
tempv_0
=
iota_0
*
inv_r
;
tempv_1
=
iota_1
*
inv_r
;
tempv_2
=
iota_2
*
inv_r
;
flt_t
chi
=
ME_dot3
(
r12hat
,
tempv
);
chi
=
std
::
pow
(
chi
*
(
flt_t
)
2.0
,
mu
);
// force
// compute dUr/dr
temp1
=
((
flt_t
)
2.0
*
varrho12
*
varrho
-
varrho6
*
varrho
)
/
sigma
;
temp1
=
temp1
*
(
flt_t
)
24.0
*
epsilon
;
flt_t
u_slj
=
temp1
*
std
::
pow
(
sigma12
,
(
flt_t
)
3.0
)
*
(
flt_t
)
0.5
;
flt_t
dUr_0
,
dUr_1
,
dUr_2
;
temp2
=
ME_dot3
(
kappa
,
r12hat
);
flt_t
uslj_rsq
=
u_slj
/
rsq_form
[
jj
];
dUr_0
=
temp1
*
r12hat_0
+
uslj_rsq
*
(
kappa_0
-
temp2
*
r12hat_0
);
dUr_1
=
temp1
*
r12hat_1
+
uslj_rsq
*
(
kappa_1
-
temp2
*
r12hat_1
);
dUr_2
=
temp1
*
r12hat_2
+
uslj_rsq
*
(
kappa_2
-
temp2
*
r12hat_2
);
// compute dChi_12/dr
flt_t
dchi_0
,
dchi_1
,
dchi_2
;
temp1
=
ME_dot3
(
iota
,
r12hat
);
temp2
=
(
flt_t
)
-
4.0
/
rsq_form
[
jj
]
*
mu
*
std
::
pow
(
chi
,
(
mu
-
(
flt_t
)
1.0
)
/
mu
);
dchi_0
=
temp2
*
(
iota_0
-
temp1
*
r12hat_0
);
dchi_1
=
temp2
*
(
iota_1
-
temp1
*
r12hat_1
);
dchi_2
=
temp2
*
(
iota_2
-
temp1
*
r12hat_2
);
temp1
=
-
eta
*
u_r
;
temp2
=
eta
*
chi
;
fforce_0
=
temp1
*
dchi_0
-
temp2
*
dUr_0
;
fforce_1
=
temp1
*
dchi_1
-
temp2
*
dUr_1
;
fforce_2
=
temp1
*
dchi_2
-
temp2
*
dUr_2
;
// torque for particle 1 and 2
// compute dUr
tempv_0
=
-
uslj_rsq
*
kappa_0
;
tempv_1
=
-
uslj_rsq
*
kappa_1
;
tempv_2
=
-
uslj_rsq
*
kappa_2
;
ME_vecmat
(
kappa
,
g1
,
tempv2
);
ME_cross3
(
tempv
,
tempv2
,
dUr
);
flt_t
dUr2_0
,
dUr2_1
,
dUr2_2
;
if
(
NEWTON_PAIR
||
j
<
nlocal
)
{
ME_vecmat
(
kappa
,
g2
,
tempv2
);
ME_cross3
(
tempv
,
tempv2
,
dUr2
);
}
// compute d_chi
ME_vecmat
(
iota
,
b1
,
tempv
);
ME_cross3
(
tempv
,
iota
,
dchi
);
temp1
=
(
flt_t
)
-
4.0
/
rsq_form
[
jj
];
dchi_0
*=
temp1
;
dchi_1
*=
temp1
;
dchi_2
*=
temp1
;
flt_t
dchi2_0
,
dchi2_1
,
dchi2_2
;
if
(
NEWTON_PAIR
||
j
<
nlocal
)
{
ME_vecmat
(
iota
,
b2
,
tempv
);
ME_cross3
(
tempv
,
iota
,
dchi2
);
dchi2_0
*=
temp1
;
dchi2_1
*=
temp1
;
dchi2_2
*=
temp1
;
}
// compute d_eta
flt_t
deta_0
,
deta_1
,
deta_2
;
deta_0
=
deta_1
=
deta_2
=
(
flt_t
)
0.0
;
ME_compute_eta_torque
(
g12
,
a1
,
shape2
,
temp
);
temp1
=
-
eta
*
upsilon
;
tempv_0
=
temp1
*
temp_0
;
tempv_1
=
temp1
*
temp_1
;
tempv_2
=
temp1
*
temp_2
;
ME_mv0_cross3
(
a1
,
tempv
,
tempv2
);
deta_0
+=
tempv2_0
;
deta_1
+=
tempv2_1
;
deta_2
+=
tempv2_2
;
tempv_0
=
temp1
*
temp_3
;
tempv_1
=
temp1
*
temp_4
;
tempv_2
=
temp1
*
temp_5
;
ME_mv1_cross3
(
a1
,
tempv
,
tempv2
);
deta_0
+=
tempv2_0
;
deta_1
+=
tempv2_1
;
deta_2
+=
tempv2_2
;
tempv_0
=
temp1
*
temp_6
;
tempv_1
=
temp1
*
temp_7
;
tempv_2
=
temp1
*
temp_8
;
ME_mv2_cross3
(
a1
,
tempv
,
tempv2
);
deta_0
+=
tempv2_0
;
deta_1
+=
tempv2_1
;
deta_2
+=
tempv2_2
;
// compute d_eta for particle 2
flt_t
deta2_0
,
deta2_1
,
deta2_2
;
if
(
NEWTON_PAIR
||
j
<
nlocal
)
{
deta2_0
=
deta2_1
=
deta2_2
=
(
flt_t
)
0.0
;
ME_compute_eta_torque
(
g12
,
a2
,
shape2
,
temp
);
tempv_0
=
temp1
*
temp_0
;
tempv_1
=
temp1
*
temp_1
;
tempv_2
=
temp1
*
temp_2
;
ME_mv0_cross3
(
a2
,
tempv
,
tempv2
);
deta2_0
+=
tempv2_0
;
deta2_1
+=
tempv2_1
;
deta2_2
+=
tempv2_2
;
tempv_0
=
temp1
*
temp_3
;
tempv_1
=
temp1
*
temp_4
;
tempv_2
=
temp1
*
temp_5
;
ME_mv1_cross3
(
a2
,
tempv
,
tempv2
);
deta2_0
+=
tempv2_0
;
deta2_1
+=
tempv2_1
;
deta2_2
+=
tempv2_2
;
tempv_0
=
temp1
*
temp_6
;
tempv_1
=
temp1
*
temp_7
;
tempv_2
=
temp1
*
temp_8
;
ME_mv2_cross3
(
a2
,
tempv
,
tempv2
);
deta2_0
+=
tempv2_0
;
deta2_1
+=
tempv2_1
;
deta2_2
+=
tempv2_2
;
}
// torque
temp1
=
u_r
*
eta
;
temp2
=
u_r
*
chi
;
temp3
=
chi
*
eta
;
ttor_0
=
(
temp1
*
dchi_0
+
temp2
*
deta_0
+
temp3
*
dUr_0
)
*
(
flt_t
)
-
1.0
;
ttor_1
=
(
temp1
*
dchi_1
+
temp2
*
deta_1
+
temp3
*
dUr_1
)
*
(
flt_t
)
-
1.0
;
ttor_2
=
(
temp1
*
dchi_2
+
temp2
*
deta_2
+
temp3
*
dUr_2
)
*
(
flt_t
)
-
1.0
;
if
(
NEWTON_PAIR
||
j
<
nlocal
)
{
rtor_0
=
(
temp1
*
dchi2_0
+
temp2
*
deta2_0
+
temp3
*
dUr2_0
)
*
(
flt_t
)
-
1.0
;
rtor_1
=
(
temp1
*
dchi2_1
+
temp2
*
deta2_1
+
temp3
*
dUr2_1
)
*
(
flt_t
)
-
1.0
;
rtor_2
=
(
temp1
*
dchi2_2
+
temp2
*
deta2_2
+
temp3
*
dUr2_2
)
*
(
flt_t
)
-
1.0
;
}
one_eng
=
temp1
*
chi
;
#ifndef INTEL_VMASK
if
(
jlist_form
[
jj
]
==
nall
)
{
one_eng
=
(
flt_t
)
0.0
;
fforce_0
=
0.0
;
fforce_1
=
0.0
;
fforce_2
=
0.0
;
ttor_0
=
0.0
;
ttor_1
=
0.0
;
ttor_2
=
0.0
;
rtor_0
=
0.0
;
rtor_1
=
0.0
;
rtor_2
=
0.0
;
}
#endif
fforce_0
*=
factor_lj
;
fforce_1
*=
factor_lj
;
fforce_2
*=
factor_lj
;
ttor_0
*=
factor_lj
;
ttor_1
*=
factor_lj
;
ttor_2
*=
factor_lj
;
#ifdef INTEL_VMASK
if
(
jlist_form
[
jj
]
<
nall
)
{
#endif
fxtmp
+=
fforce_0
;
fytmp
+=
fforce_1
;
fztmp
+=
fforce_2
;
t1tmp
+=
ttor_0
;
t2tmp
+=
ttor_1
;
t3tmp
+=
ttor_2
;
if
(
NEWTON_PAIR
||
j
<
nlocal
)
{
rtor_0
*=
factor_lj
;
rtor_1
*=
factor_lj
;
rtor_2
*=
factor_lj
;
int
jp
=
j
*
2
;
f
[
jp
].
x
-=
fforce_0
;
f
[
jp
].
y
-=
fforce_1
;
f
[
jp
].
z
-=
fforce_2
;
jp
++
;
f
[
jp
].
x
+=
rtor_0
;
f
[
jp
].
y
+=
rtor_1
;
f
[
jp
].
z
+=
rtor_2
;
}
if
(
EVFLAG
)
{
flt_t
ev_pre
=
(
flt_t
)
0.0
;
if
(
NEWTON_PAIR
||
i
<
nlocal
)
ev_pre
+=
(
flt_t
)
0.5
;
if
(
NEWTON_PAIR
||
j
<
nlocal
)
ev_pre
+=
(
flt_t
)
0.5
;
if
(
EFLAG
)
{
evdwl
=
factor_lj
*
one_eng
;
sevdwl
+=
ev_pre
*
evdwl
;
if
(
eatom
)
{
if
(
NEWTON_PAIR
||
i
<
nlocal
)
fwtmp
+=
(
flt_t
)
0.5
*
evdwl
;
if
(
NEWTON_PAIR
||
j
<
nlocal
)
f
[
j
*
2
].
w
+=
(
flt_t
)
0.5
*
evdwl
;
}
}
if
(
vflag
==
1
)
{
ev_pre
*=
(
flt_t
)
-
1.0
;
sv0
+=
ev_pre
*
delx_form
[
jj
]
*
fforce_0
;
sv1
+=
ev_pre
*
dely_form
[
jj
]
*
fforce_1
;
sv2
+=
ev_pre
*
delz_form
[
jj
]
*
fforce_2
;
sv3
+=
ev_pre
*
delx_form
[
jj
]
*
fforce_1
;
sv4
+=
ev_pre
*
delx_form
[
jj
]
*
fforce_2
;
sv5
+=
ev_pre
*
dely_form
[
jj
]
*
fforce_2
;
}
}
// EVFLAG
#ifdef INTEL_VMASK
}
#endif
}
// for jj
// -------------------------------------------------------------
if
(
multiple_forms
)
ierror
=
2
;
int
ip
=
i
*
2
;
f
[
ip
].
x
+=
fxtmp
;
f
[
ip
].
y
+=
fytmp
;
f
[
ip
].
z
+=
fztmp
;
ip
++
;
f
[
ip
].
x
+=
t1tmp
;
f
[
ip
].
y
+=
t2tmp
;
f
[
ip
].
z
+=
t3tmp
;
if
(
EVFLAG
)
{
if
(
EFLAG
)
{
if
(
eatom
)
f
[
i
*
2
].
w
+=
fwtmp
;
oevdwl
+=
sevdwl
;
}
if
(
vflag
==
1
)
{
ov0
+=
sv0
;
ov1
+=
sv1
;
ov2
+=
sv2
;
ov3
+=
sv3
;
ov4
+=
sv4
;
ov5
+=
sv5
;
}
}
}
// for i
int
o_range
;
if
(
NEWTON_PAIR
)
o_range
=
nall
;
else
o_range
=
nlocal
;
if
(
offload
==
0
)
o_range
-=
minlocal
;
IP_PRE_omp_range_align
(
iifrom
,
iito
,
tid
,
o_range
,
nthreads
,
sizeof
(
FORCE_T
));
const
int
two_iito
=
iito
*
2
;
acc_t
*
facc
=
&
(
f_start
[
0
].
x
);
const
int
sto
=
two_iito
*
4
;
const
int
fst4
=
f_stride
*
4
;
#if defined(_OPENMP)
#pragma omp barrier
#endif
int
t_off
=
f_stride
;
if
(
EFLAG
&&
eatom
)
{
for
(
int
t
=
1
;
t
<
nthreads
;
t
++
)
{
#if defined(LMP_SIMD_COMPILER)
#pragma vector nontemporal
#pragma novector
#endif
for
(
int
n
=
iifrom
*
2
;
n
<
two_iito
;
n
++
)
{
f_start
[
n
].
x
+=
f_start
[
n
+
t_off
].
x
;
f_start
[
n
].
y
+=
f_start
[
n
+
t_off
].
y
;
f_start
[
n
].
z
+=
f_start
[
n
+
t_off
].
z
;
f_start
[
n
].
w
+=
f_start
[
n
+
t_off
].
w
;
}
t_off
+=
f_stride
;
}
}
else
{
for
(
int
t
=
1
;
t
<
nthreads
;
t
++
)
{
#if defined(LMP_SIMD_COMPILER)
#pragma vector nontemporal
#pragma novector
#endif
for
(
int
n
=
iifrom
*
2
;
n
<
two_iito
;
n
++
)
{
f_start
[
n
].
x
+=
f_start
[
n
+
t_off
].
x
;
f_start
[
n
].
y
+=
f_start
[
n
+
t_off
].
y
;
f_start
[
n
].
z
+=
f_start
[
n
+
t_off
].
z
;
}
t_off
+=
f_stride
;
}
}
if
(
EVFLAG
)
{
if
(
vflag
==
2
)
{
const
ATOM_T
*
_noalias
const
xo
=
x
+
minlocal
;
#if defined(LMP_SIMD_COMPILER)
#pragma vector nontemporal
#pragma novector
#endif
for
(
int
n
=
iifrom
;
n
<
iito
;
n
++
)
{
const
int
nt2
=
n
*
2
;
ov0
+=
f_start
[
nt2
].
x
*
xo
[
n
].
x
;
ov1
+=
f_start
[
nt2
].
y
*
xo
[
n
].
y
;
ov2
+=
f_start
[
nt2
].
z
*
xo
[
n
].
z
;
ov3
+=
f_start
[
nt2
].
y
*
xo
[
n
].
x
;
ov4
+=
f_start
[
nt2
].
z
*
xo
[
n
].
x
;
ov5
+=
f_start
[
nt2
].
z
*
xo
[
n
].
y
;
}
}
}
if
(
ierror
)
f_start
[
1
].
w
=
ierror
;
}
// omp
if
(
EVFLAG
)
{
if
(
EFLAG
)
{
ev_global
[
0
]
=
oevdwl
;
ev_global
[
1
]
=
(
acc_t
)
0.0
;
}
if
(
vflag
)
{
ev_global
[
2
]
=
ov0
;
ev_global
[
3
]
=
ov1
;
ev_global
[
4
]
=
ov2
;
ev_global
[
5
]
=
ov3
;
ev_global
[
6
]
=
ov4
;
ev_global
[
7
]
=
ov5
;
}
}
#if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD)
*
timer_compute
=
MIC_Wtime
()
-
*
timer_compute
;
#endif
}
// offload
if
(
offload
)
fix
->
stop_watch
(
TIME_OFFLOAD_LATENCY
);
else
fix
->
stop_watch
(
TIME_HOST_PAIR
);
if
(
EVFLAG
)
fix
->
add_result_array
(
f_start
,
ev_global
,
offload
,
eatom
,
0
,
2
);
else
fix
->
add_result_array
(
f_start
,
0
,
offload
,
0
,
0
,
2
);
}
/* ---------------------------------------------------------------------- */
void
PairGayBerneIntel
::
init_style
()
{
PairGayBerne
::
init_style
();
neighbor
->
requests
[
neighbor
->
nrequest
-
1
]
->
intel
=
1
;
int
ifix
=
modify
->
find_fix
(
"package_intel"
);
if
(
ifix
<
0
)
error
->
all
(
FLERR
,
"The 'package intel' command is required for /intel styles"
);
fix
=
static_cast
<
FixIntel
*>
(
modify
->
fix
[
ifix
]);
fix
->
pair_init_check
();
#ifdef _LMP_INTEL_OFFLOAD
if
(
force
->
newton_pair
)
fix
->
set_offload_noghost
(
1
);
_cop
=
fix
->
coprocessor_number
();
#endif
if
(
fix
->
precision
()
==
FixIntel
::
PREC_MODE_MIXED
)
pack_force_const
(
force_const_single
,
fix
->
get_mixed_buffers
());
else
if
(
fix
->
precision
()
==
FixIntel
::
PREC_MODE_DOUBLE
)
pack_force_const
(
force_const_double
,
fix
->
get_double_buffers
());
else
pack_force_const
(
force_const_single
,
fix
->
get_single_buffers
());
}
/* ---------------------------------------------------------------------- */
template
<
class
flt_t
,
class
acc_t
>
void
PairGayBerneIntel
::
pack_force_const
(
ForceConst
<
flt_t
>
&
fc
,
IntelBuffers
<
flt_t
,
acc_t
>
*
buffers
)
{
int
tp1
=
atom
->
ntypes
+
1
;
_max_nbors
=
buffers
->
get_max_nbors
();
int
mthreads
=
comm
->
nthreads
;
if
(
mthreads
<
buffers
->
get_off_threads
())
mthreads
=
buffers
->
get_off_threads
();
fc
.
set_ntypes
(
tp1
,
_max_nbors
,
mthreads
,
memory
,
_cop
);
buffers
->
set_ntypes
(
tp1
);
flt_t
**
cutneighsq
=
buffers
->
get_cutneighsq
();
// Repeat cutsq calculation because done after call to init_style
double
cut
,
cutneigh
;
for
(
int
i
=
1
;
i
<=
atom
->
ntypes
;
i
++
)
{
for
(
int
j
=
i
;
j
<=
atom
->
ntypes
;
j
++
)
{
if
(
setflag
[
i
][
j
]
!=
0
||
(
setflag
[
i
][
i
]
!=
0
&&
setflag
[
j
][
j
]
!=
0
))
{
cut
=
init_one
(
i
,
j
);
cutneigh
=
cut
+
neighbor
->
skin
;
cutsq
[
i
][
j
]
=
cutsq
[
j
][
i
]
=
cut
*
cut
;
cutneighsq
[
i
][
j
]
=
cutneighsq
[
j
][
i
]
=
cutneigh
*
cutneigh
;
}
}
}
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
fc
.
special_lj
[
i
]
=
force
->
special_lj
[
i
];
fc
.
special_lj
[
0
]
=
1.0
;
}
fc
.
gamma
=
gamma
;
fc
.
upsilon
=
upsilon
;
fc
.
mu
=
mu
;
for
(
int
i
=
0
;
i
<
tp1
;
i
++
)
{
for
(
int
j
=
0
;
j
<
tp1
;
j
++
)
{
fc
.
ijc
[
i
][
j
].
lj1
=
lj1
[
i
][
j
];
fc
.
ijc
[
i
][
j
].
lj2
=
lj2
[
i
][
j
];
fc
.
ijc
[
i
][
j
].
cutsq
=
cutsq
[
i
][
j
];
fc
.
ijc
[
i
][
j
].
offset
=
offset
[
i
][
j
];
fc
.
ijc
[
i
][
j
].
sigma
=
sigma
[
i
][
j
];
fc
.
ijc
[
i
][
j
].
epsilon
=
epsilon
[
i
][
j
];
fc
.
ijc
[
i
][
j
].
form
=
form
[
i
][
j
];
fc
.
ijc
[
i
][
j
].
lshape
=
lshape
[
i
]
*
lshape
[
j
];
fc
.
lj34
[
i
][
j
].
lj3
=
lj3
[
i
][
j
];
fc
.
lj34
[
i
][
j
].
lj4
=
lj4
[
i
][
j
];
}
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
fc
.
ic
[
i
].
shape2
[
j
]
=
shape2
[
i
][
j
];
fc
.
ic
[
i
].
well
[
j
]
=
well
[
i
][
j
];
}
}
#ifdef _LMP_INTEL_OFFLOAD
if
(
_cop
<
0
)
return
;
flt_t
*
special_lj
=
fc
.
special_lj
;
FC_PACKED1_T
*
oijc
=
fc
.
ijc
[
0
];
FC_PACKED2_T
*
olj34
=
fc
.
lj34
[
0
];
FC_PACKED3_T
*
oic
=
fc
.
ic
;
flt_t
*
ocutneighsq
=
cutneighsq
[
0
];
int
tp1sq
=
tp1
*
tp1
;
if
(
oijc
!=
NULL
&&
oic
!=
NULL
)
{
#pragma offload_transfer target(mic:_cop) \
in(special_lj: length(4) alloc_if(0) free_if(0)) \
in(oijc,olj34: length(tp1sq) alloc_if(0) free_if(0)) \
in(oic: length(tp1) alloc_if(0) free_if(0)) \
in(ocutneighsq: length(tp1sq))
}
#endif
}
/* ---------------------------------------------------------------------- */
template
<
class
flt_t
>
void
PairGayBerneIntel
::
ForceConst
<
flt_t
>::
set_ntypes
(
const
int
ntypes
,
const
int
one_length
,
const
int
nthreads
,
Memory
*
memory
,
const
int
cop
)
{
if
(
ntypes
!=
_ntypes
)
{
if
(
_ntypes
>
0
)
{
fc_packed3
*
oic
=
ic
;
#ifdef _LMP_INTEL_OFFLOAD
flt_t
*
ospecial_lj
=
special_lj
;
fc_packed1
*
oijc
=
ijc
[
0
];
fc_packed2
*
olj34
=
lj34
[
0
];
flt_t
*
orsq_form
=
rsq_form
[
0
];
flt_t
*
odelx_form
=
delx_form
[
0
];
flt_t
*
odely_form
=
dely_form
[
0
];
flt_t
*
odelz_form
=
delz_form
[
0
];
int
*
ojtype_form
=
jtype_form
[
0
];
int
*
ojlist_form
=
jlist_form
[
0
];
if
(
ospecial_lj
!=
NULL
&&
oijc
!=
NULL
&&
olj34
!=
NULL
&&
orsq_form
!=
NULL
&&
odelx_form
!=
NULL
&&
odely_form
!=
NULL
&&
odelz_form
!=
NULL
&&
ojtype_form
!=
NULL
&&
ojlist_form
!=
NULL
&&
_cop
>=
0
)
{
#pragma offload_transfer target(mic:_cop) \
nocopy(ospecial_lj, oijc, olj34, oic: alloc_if(0) free_if(1)) \
nocopy(orsq_form, odelx_form, odely_form: alloc_if(0) free_if(1)) \
nocopy(odelz_form, ojtype_form, ojlist_form: alloc_if(0) free_if(1))
}
#endif
_memory
->
destroy
(
oic
);
_memory
->
destroy
(
ijc
);
_memory
->
destroy
(
lj34
);
_memory
->
destroy
(
rsq_form
);
_memory
->
destroy
(
delx_form
);
_memory
->
destroy
(
dely_form
);
_memory
->
destroy
(
delz_form
);
_memory
->
destroy
(
jtype_form
);
_memory
->
destroy
(
jlist_form
);
}
if
(
ntypes
>
0
)
{
_cop
=
cop
;
memory
->
create
(
ijc
,
ntypes
,
ntypes
,
"fc.ijc"
);
memory
->
create
(
lj34
,
ntypes
,
ntypes
,
"fc.lj34"
);
memory
->
create
(
ic
,
ntypes
,
"fc.ic"
);
memory
->
create
(
rsq_form
,
nthreads
,
one_length
,
"rsq_form"
);
memory
->
create
(
delx_form
,
nthreads
,
one_length
,
"delx_form"
);
memory
->
create
(
dely_form
,
nthreads
,
one_length
,
"dely_form"
);
memory
->
create
(
delz_form
,
nthreads
,
one_length
,
"delz_form"
);
memory
->
create
(
jtype_form
,
nthreads
,
one_length
,
"jtype_form"
);
memory
->
create
(
jlist_form
,
nthreads
,
one_length
,
"jlist_form"
);
for
(
int
zn
=
0
;
zn
<
nthreads
;
zn
++
)
for
(
int
zo
=
0
;
zo
<
one_length
;
zo
++
)
{
rsq_form
[
zn
][
zo
]
=
10.0
;
delx_form
[
zn
][
zo
]
=
10.0
;
dely_form
[
zn
][
zo
]
=
10.0
;
delz_form
[
zn
][
zo
]
=
10.0
;
jtype_form
[
zn
][
zo
]
=
1
;
jlist_form
[
zn
][
zo
]
=
0
;
}
#ifdef _LMP_INTEL_OFFLOAD
flt_t
*
ospecial_lj
=
special_lj
;
fc_packed1
*
oijc
=
ijc
[
0
];
fc_packed2
*
olj34
=
lj34
[
0
];
fc_packed3
*
oic
=
ic
;
flt_t
*
orsq_form
=
rsq_form
[
0
];
flt_t
*
odelx_form
=
delx_form
[
0
];
flt_t
*
odely_form
=
dely_form
[
0
];
flt_t
*
odelz_form
=
delz_form
[
0
];
int
*
ojtype_form
=
jtype_form
[
0
];
int
*
ojlist_form
=
jlist_form
[
0
];
int
off_onel
=
one_length
*
nthreads
;
int
tp1sq
=
ntypes
*
ntypes
;
if
(
ospecial_lj
!=
NULL
&&
oijc
!=
NULL
&&
olj34
!=
NULL
&&
oic
!=
NULL
&&
orsq_form
!=
NULL
&&
odelx_form
!=
NULL
&&
odely_form
!=
NULL
&&
odelz_form
!=
NULL
&&
ojtype_form
!=
NULL
&&
ojlist_form
!=
NULL
&&
cop
>=
0
)
{
#pragma offload_transfer target(mic:cop) \
nocopy(ospecial_lj: length(4) alloc_if(1) free_if(0)) \
nocopy(oijc,olj34: length(tp1sq) alloc_if(1) free_if(0)) \
nocopy(oic: length(ntypes) alloc_if(1) free_if(0)) \
in(orsq_form: length(off_onel) alloc_if(1) free_if(0)) \
in(odelx_form: length(off_onel) alloc_if(1) free_if(0)) \
in(odely_form: length(off_onel) alloc_if(1) free_if(0)) \
in(odelz_form: length(off_onel) alloc_if(1) free_if(0)) \
in(ojtype_form: length(off_onel) alloc_if(1) free_if(0)) \
in(ojlist_form: length(off_onel) alloc_if(1) free_if(0))
}
#endif
}
}
_ntypes
=
ntypes
;
_memory
=
memory
;
}
Event Timeline
Log In to Comment