Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F88713975
npair_intel.cpp
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Sun, Oct 20, 07:48
Size
31 KB
Mime Type
text/x-c
Expires
Tue, Oct 22, 07:48 (2 d)
Engine
blob
Format
Raw Data
Handle
21810860
Attached To
rLAMMPS lammps
npair_intel.cpp
View Options
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: W. Michael Brown (Intel)
------------------------------------------------------------------------- */
#include "npair_intel.h"
#include "nstencil.h"
using
namespace
LAMMPS_NS
;
/* ---------------------------------------------------------------------- */
NPairIntel
::
NPairIntel
(
LAMMPS
*
lmp
)
:
NPair
(
lmp
)
{
int
ifix
=
modify
->
find_fix
(
"package_intel"
);
if
(
ifix
<
0
)
error
->
all
(
FLERR
,
"The 'package intel' command is required for /intel styles"
);
_fix
=
static_cast
<
FixIntel
*>
(
modify
->
fix
[
ifix
]);
#ifdef _LMP_INTEL_OFFLOAD
_cop
=
_fix
->
coprocessor_number
();
_off_map_stencil
=
0
;
#endif
}
/* ---------------------------------------------------------------------- */
NPairIntel
::~
NPairIntel
()
{
#ifdef _LMP_INTEL_OFFLOAD
if
(
_off_map_stencil
)
{
const
int
*
stencil
=
this
->
stencil
;
#pragma offload_transfer target(mic:_cop) \
nocopy(stencil:alloc_if(0) free_if(1))
}
#endif
}
/* ---------------------------------------------------------------------- */
template
<
class
flt_t
,
class
acc_t
,
int
offload_noghost
,
int
need_ic
,
int
FULL
,
int
TRI
,
int
THREE
>
void
NPairIntel
::
bin_newton
(
const
int
offload
,
NeighList
*
list
,
IntelBuffers
<
flt_t
,
acc_t
>
*
buffers
,
const
int
astart
,
const
int
aend
,
const
int
offload_end
)
{
if
(
aend
-
astart
==
0
)
return
;
const
int
nall
=
atom
->
nlocal
+
atom
->
nghost
;
int
pad
=
1
;
int
nall_t
=
nall
;
#ifdef _LMP_INTEL_OFFLOAD
if
(
offload_noghost
&&
offload
)
nall_t
=
atom
->
nlocal
;
if
(
THREE
==
0
&&
offload
)
{
if
(
INTEL_MIC_NBOR_PAD
>
1
)
pad
=
INTEL_MIC_NBOR_PAD
*
sizeof
(
float
)
/
sizeof
(
flt_t
);
}
else
#endif
if
(
THREE
==
0
&&
INTEL_NBOR_PAD
>
1
)
pad
=
INTEL_NBOR_PAD
*
sizeof
(
float
)
/
sizeof
(
flt_t
);
const
int
pad_width
=
pad
;
const
int
pack_width
=
_fix
->
nbor_pack_width
();
const
ATOM_T
*
_noalias
const
x
=
buffers
->
get_x
();
int
*
_noalias
const
firstneigh
=
buffers
->
firstneigh
(
list
);
const
int
e_nall
=
nall_t
;
const
int
molecular
=
atom
->
molecular
;
int
*
ns
=
NULL
;
tagint
*
s
=
NULL
;
int
tag_size
=
0
,
special_size
;
if
(
buffers
->
need_tag
())
tag_size
=
e_nall
;
if
(
molecular
)
{
s
=
atom
->
special
[
0
];
ns
=
atom
->
nspecial
[
0
];
special_size
=
aend
;
}
else
{
s
=
&
buffers
->
_special_holder
;
ns
=
&
buffers
->
_nspecial_holder
;
special_size
=
0
;
}
const
tagint
*
_noalias
const
special
=
s
;
const
int
*
_noalias
const
nspecial
=
ns
;
const
int
maxspecial
=
atom
->
maxspecial
;
const
tagint
*
_noalias
const
tag
=
atom
->
tag
;
int
*
_noalias
const
ilist
=
list
->
ilist
;
int
*
_noalias
numneigh
=
list
->
numneigh
;
int
*
_noalias
const
cnumneigh
=
buffers
->
cnumneigh
(
list
);
const
int
nstencil
=
this
->
nstencil
;
const
int
*
_noalias
const
stencil
=
this
->
stencil
;
const
flt_t
*
_noalias
const
cutneighsq
=
buffers
->
get_cutneighsq
()[
0
];
const
int
ntypes
=
atom
->
ntypes
+
1
;
const
int
nlocal
=
atom
->
nlocal
;
#ifndef _LMP_INTEL_OFFLOAD
int
*
const
mask
=
atom
->
mask
;
tagint
*
const
molecule
=
atom
->
molecule
;
#endif
int
tnum
;
int
*
overflow
;
double
*
timer_compute
;
#ifdef _LMP_INTEL_OFFLOAD
if
(
offload
)
{
timer_compute
=
_fix
->
off_watch_neighbor
();
tnum
=
buffers
->
get_off_threads
();
overflow
=
_fix
->
get_off_overflow_flag
();
_fix
->
stop_watch
(
TIME_HOST_NEIGHBOR
);
_fix
->
start_watch
(
TIME_OFFLOAD_LATENCY
);
}
else
#endif
{
tnum
=
comm
->
nthreads
;
overflow
=
_fix
->
get_overflow_flag
();
}
const
int
nthreads
=
tnum
;
const
int
maxnbors
=
buffers
->
get_max_nbors
();
int
*
_noalias
const
atombin
=
buffers
->
get_atombin
();
const
int
*
_noalias
const
binpacked
=
buffers
->
get_binpacked
();
const
int
xperiodic
=
domain
->
xperiodic
;
const
int
yperiodic
=
domain
->
yperiodic
;
const
int
zperiodic
=
domain
->
zperiodic
;
const
flt_t
xprd_half
=
domain
->
xprd_half
;
const
flt_t
yprd_half
=
domain
->
yprd_half
;
const
flt_t
zprd_half
=
domain
->
zprd_half
;
flt_t
*
_noalias
const
ncachex
=
buffers
->
get_ncachex
();
flt_t
*
_noalias
const
ncachey
=
buffers
->
get_ncachey
();
flt_t
*
_noalias
const
ncachez
=
buffers
->
get_ncachez
();
int
*
_noalias
const
ncachej
=
buffers
->
get_ncachej
();
int
*
_noalias
const
ncachejtype
=
buffers
->
get_ncachejtype
();
int
*
_noalias
const
ncachetag
=
buffers
->
get_ncachetag
();
const
int
ncache_stride
=
buffers
->
ncache_stride
();
#ifdef _LMP_INTEL_OFFLOAD
const
int
*
_noalias
const
binhead
=
this
->
binhead
;
const
int
*
_noalias
const
bins
=
this
->
bins
;
const
int
cop
=
_fix
->
coprocessor_number
();
const
int
separate_buffers
=
_fix
->
separate_buffers
();
#pragma offload target(mic:cop) if(offload) \
in(x:length(e_nall+1) alloc_if(0) free_if(0)) \
in(tag:length(tag_size) alloc_if(0) free_if(0)) \
in(special:length(special_size*maxspecial) alloc_if(0) free_if(0)) \
in(nspecial:length(special_size*3) alloc_if(0) free_if(0)) \
in(bins,binpacked:length(nall) alloc_if(0) free_if(0)) \
in(binhead:length(mbins+1) alloc_if(0) free_if(0)) \
in(cutneighsq:length(0) alloc_if(0) free_if(0)) \
in(firstneigh:length(0) alloc_if(0) free_if(0)) \
in(cnumneigh:length(0) alloc_if(0) free_if(0)) \
out(numneigh:length(0) alloc_if(0) free_if(0)) \
in(ilist:length(0) alloc_if(0) free_if(0)) \
in(atombin:length(aend) alloc_if(0) free_if(0)) \
in(stencil:length(nstencil) alloc_if(0) free_if(0)) \
in(ncachex,ncachey,ncachez,ncachej:length(0) alloc_if(0) free_if(0)) \
in(ncachejtype,ncachetag:length(0) alloc_if(0) free_if(0)) \
in(ncache_stride,maxnbors,nthreads,maxspecial,nstencil,e_nall,offload) \
in(pad_width,offload_end,separate_buffers,astart,aend,nlocal,molecular) \
in(ntypes,xperiodic,yperiodic,zperiodic,xprd_half,yprd_half,zprd_half) \
in(pack_width) \
out(overflow:length(5) alloc_if(0) free_if(0)) \
out(timer_compute:length(1) alloc_if(0) free_if(0)) \
signal(tag)
#endif
{
#if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD)
*
timer_compute
=
MIC_Wtime
();
#endif
#ifdef _LMP_INTEL_OFFLOAD
overflow
[
LMP_LOCAL_MIN
]
=
astart
;
overflow
[
LMP_LOCAL_MAX
]
=
aend
-
1
;
overflow
[
LMP_GHOST_MIN
]
=
e_nall
;
overflow
[
LMP_GHOST_MAX
]
=
-
1
;
#endif
int
nstencilp
=
0
;
int
binstart
[
INTEL_MAX_STENCIL
],
binend
[
INTEL_MAX_STENCIL
];
for
(
int
k
=
0
;
k
<
nstencil
;
k
++
)
{
binstart
[
nstencilp
]
=
stencil
[
k
];
int
end
=
stencil
[
k
]
+
1
;
for
(
int
kk
=
k
+
1
;
kk
<
nstencil
;
kk
++
)
{
if
(
stencil
[
kk
-
1
]
+
1
==
stencil
[
kk
])
{
end
++
;
k
++
;
}
else
break
;
}
binend
[
nstencilp
]
=
end
;
nstencilp
++
;
}
#if defined(_OPENMP)
#pragma omp parallel default(none) \
shared(numneigh, overflow, nstencilp, binstart, binend)
#endif
{
#ifdef _LMP_INTEL_OFFLOAD
int
lmin
=
e_nall
,
lmax
=
-
1
,
gmin
=
e_nall
,
gmax
=
-
1
;
#endif
const
int
num
=
aend
-
astart
;
int
tid
,
ifrom
,
ito
;
if
(
THREE
)
{
IP_PRE_omp_range_id_vec
(
ifrom
,
ito
,
tid
,
num
,
nthreads
,
pack_width
);
}
else
{
IP_PRE_omp_range_id
(
ifrom
,
ito
,
tid
,
num
,
nthreads
);
}
ifrom
+=
astart
;
ito
+=
astart
;
int
e_ito
=
ito
;
if
(
THREE
&&
ito
==
num
)
{
int
imod
=
ito
&
(
pack_width
-
1
);
if
(
imod
)
e_ito
+=
pack_width
-
imod
;
}
const
int
list_size
=
(
e_ito
+
tid
*
2
+
2
)
*
maxnbors
;
int
which
;
int
pack_offset
=
maxnbors
;
if
(
THREE
)
pack_offset
*=
pack_width
;
int
ct
=
(
ifrom
+
tid
*
2
)
*
maxnbors
;
int
*
neighptr
=
firstneigh
+
ct
;
const
int
obound
=
pack_offset
+
maxnbors
*
2
;
const
int
toffs
=
tid
*
ncache_stride
;
flt_t
*
_noalias
const
tx
=
ncachex
+
toffs
;
flt_t
*
_noalias
const
ty
=
ncachey
+
toffs
;
flt_t
*
_noalias
const
tz
=
ncachez
+
toffs
;
int
*
_noalias
const
tj
=
ncachej
+
toffs
;
int
*
_noalias
const
tjtype
=
ncachejtype
+
toffs
;
int
*
_noalias
const
ttag
=
ncachetag
+
toffs
;
flt_t
*
_noalias
itx
;
flt_t
*
_noalias
ity
;
flt_t
*
_noalias
itz
;
int
*
_noalias
itj
;
int
*
_noalias
itjtype
;
// loop over all atoms in other bins in stencil, store every pair
int
istart
,
icount
,
ncount
,
oldbin
=
-
9999999
,
lane
,
max_chunk
;
if
(
THREE
)
{
lane
=
0
;
max_chunk
=
0
;
}
for
(
int
i
=
ifrom
;
i
<
ito
;
i
++
)
{
const
flt_t
xtmp
=
x
[
i
].
x
;
const
flt_t
ytmp
=
x
[
i
].
y
;
const
flt_t
ztmp
=
x
[
i
].
z
;
const
int
itype
=
x
[
i
].
w
;
tagint
itag
;
if
(
THREE
)
itag
=
tag
[
i
];
const
int
ioffset
=
ntypes
*
itype
;
const
int
ibin
=
atombin
[
i
];
if
(
ibin
!=
oldbin
)
{
oldbin
=
ibin
;
ncount
=
0
;
for
(
int
k
=
0
;
k
<
nstencilp
;
k
++
)
{
const
int
bstart
=
binhead
[
ibin
+
binstart
[
k
]];
const
int
bend
=
binhead
[
ibin
+
binend
[
k
]];
#if defined(LMP_SIMD_COMPILER)
#pragma simd
#endif
for
(
int
jj
=
bstart
;
jj
<
bend
;
jj
++
)
tj
[
ncount
++
]
=
binpacked
[
jj
];
}
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma simd
#endif
for
(
int
u
=
0
;
u
<
ncount
;
u
++
)
{
const
int
j
=
tj
[
u
];
tx
[
u
]
=
x
[
j
].
x
;
ty
[
u
]
=
x
[
j
].
y
;
tz
[
u
]
=
x
[
j
].
z
;
tjtype
[
u
]
=
x
[
j
].
w
;
if
(
THREE
)
ttag
[
u
]
=
tag
[
j
];
}
if
(
FULL
==
0
||
TRI
==
1
)
{
icount
=
0
;
istart
=
ncount
;
const
int
alignb
=
INTEL_DATA_ALIGN
/
sizeof
(
int
);
int
nedge
=
istart
&
(
alignb
-
1
);
if
(
nedge
)
istart
+
(
alignb
-
nedge
);
itx
=
tx
+
istart
;
ity
=
ty
+
istart
;
itz
=
tz
+
istart
;
itj
=
tj
+
istart
;
itjtype
=
tjtype
+
istart
;
const
int
bstart
=
binhead
[
ibin
];
const
int
bend
=
binhead
[
ibin
+
1
];
#if defined(LMP_SIMD_COMPILER)
#pragma simd
#endif
for
(
int
jj
=
bstart
;
jj
<
bend
;
jj
++
)
{
const
int
j
=
binpacked
[
jj
];
itj
[
icount
]
=
j
;
itx
[
icount
]
=
x
[
j
].
x
;
ity
[
icount
]
=
x
[
j
].
y
;
itz
[
icount
]
=
x
[
j
].
z
;
itjtype
[
icount
]
=
x
[
j
].
w
;
icount
++
;
}
if
(
icount
+
istart
>
obound
)
*
overflow
=
1
;
}
else
if
(
ncount
>
obound
)
*
overflow
=
1
;
}
// ---------------------- Loop over i bin
int
n
=
0
;
if
(
FULL
==
0
||
TRI
==
1
)
{
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma ivdep
#endif
for
(
int
u
=
0
;
u
<
icount
;
u
++
)
{
int
addme
=
1
;
int
j
=
itj
[
u
];
// Cutoff Check
const
flt_t
delx
=
xtmp
-
itx
[
u
];
const
flt_t
dely
=
ytmp
-
ity
[
u
];
const
flt_t
delz
=
ztmp
-
itz
[
u
];
const
int
jtype
=
itjtype
[
u
];
const
flt_t
rsq
=
delx
*
delx
+
dely
*
dely
+
delz
*
delz
;
if
(
rsq
>
cutneighsq
[
ioffset
+
jtype
])
addme
=
0
;
// i bin (half) check and offload ghost check
if
(
j
<
nlocal
)
{
const
int
ijmod
=
(
i
+
j
)
&
1
;
if
(
i
>
j
)
{
if
(
ijmod
==
0
)
addme
=
0
;
}
else
if
(
i
<
j
)
{
if
(
ijmod
==
1
)
addme
=
0
;
}
else
addme
=
0
;
#ifdef _LMP_INTEL_OFFLOAD
if
(
offload_noghost
&&
i
<
offload_end
)
addme
=
0
;
#endif
}
else
{
#ifdef _LMP_INTEL_OFFLOAD
if
(
offload_noghost
&&
offload
)
addme
=
0
;
#endif
if
(
itz
[
u
]
<
ztmp
)
addme
=
0
;
if
(
itz
[
u
]
==
ztmp
)
{
if
(
ity
[
u
]
<
ytmp
)
addme
=
0
;
if
(
ity
[
u
]
==
ytmp
&&
itx
[
u
]
<
xtmp
)
addme
=
0
;
}
}
if
(
need_ic
)
{
int
no_special
;
ominimum_image_check
(
no_special
,
delx
,
dely
,
delz
);
if
(
no_special
)
j
=
-
j
-
1
;
}
if
(
addme
)
neighptr
[
n
++
]
=
j
;
}
}
// if FULL==0
// ---------------------- Loop over other bins
int
n2
,
*
neighptr2
;
if
(
THREE
)
{
n
=
pack_offset
;
n2
=
pack_offset
+
maxnbors
;
neighptr2
=
neighptr
;
}
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma ivdep
#endif
for
(
int
u
=
0
;
u
<
ncount
;
u
++
)
{
int
addme
=
1
;
int
j
=
tj
[
u
];
if
(
FULL
)
if
(
i
==
j
)
addme
=
0
;
// Cutoff Check
const
flt_t
delx
=
xtmp
-
tx
[
u
];
const
flt_t
dely
=
ytmp
-
ty
[
u
];
const
flt_t
delz
=
ztmp
-
tz
[
u
];
const
int
jtype
=
tjtype
[
u
];
const
flt_t
rsq
=
delx
*
delx
+
dely
*
dely
+
delz
*
delz
;
if
(
rsq
>
cutneighsq
[
ioffset
+
jtype
])
addme
=
0
;
// Triclinic
if
(
TRI
)
{
if
(
tz
[
u
]
<
ztmp
)
addme
=
0
;
if
(
tz
[
u
]
==
ztmp
)
{
if
(
ty
[
u
]
<
ytmp
)
addme
=
0
;
if
(
ty
[
u
]
==
ytmp
)
{
if
(
tx
[
u
]
<
xtmp
)
addme
=
0
;
if
(
tx
[
u
]
==
xtmp
&&
j
<=
i
)
addme
=
0
;
}
}
}
// offload ghost check
#ifdef _LMP_INTEL_OFFLOAD
if
(
offload_noghost
)
{
if
(
j
<
nlocal
)
{
if
(
i
<
offload_end
)
addme
=
0
;
}
else
if
(
offload
)
addme
=
0
;
}
#endif
if
(
need_ic
)
{
int
no_special
;
ominimum_image_check
(
no_special
,
delx
,
dely
,
delz
);
if
(
no_special
)
j
=
-
j
-
1
;
}
if
(
THREE
)
{
const
int
jtag
=
ttag
[
u
];
int
flist
=
0
;
if
(
itag
>
jtag
)
{
if
(((
itag
+
jtag
)
&
1
)
==
0
)
flist
=
1
;
}
else
if
(
itag
<
jtag
)
{
if
(((
itag
+
jtag
)
&
1
)
==
1
)
flist
=
1
;
}
else
{
if
(
tz
[
u
]
<
ztmp
)
flist
=
1
;
else
if
(
tz
[
u
]
==
ztmp
&&
ty
[
u
]
<
ytmp
)
flist
=
1
;
else
if
(
tz
[
u
]
==
ztmp
&&
ty
[
u
]
==
ytmp
&&
tx
[
u
]
<
xtmp
)
flist
=
1
;
}
if
(
addme
)
{
if
(
flist
)
neighptr2
[
n2
++
]
=
j
;
else
neighptr
[
n
++
]
=
j
;
}
}
else
{
if
(
addme
)
neighptr
[
n
++
]
=
j
;
}
}
// for u
#ifndef _LMP_INTEL_OFFLOAD
if
(
exclude
)
{
int
alln
=
n
;
if
(
THREE
)
n
=
pack_offset
;
else
n
=
0
;
for
(
int
u
=
pack_offset
;
u
<
alln
;
u
++
)
{
const
int
j
=
neighptr
[
u
];
int
pj
=
j
;
if
(
need_ic
)
if
(
pj
<
0
)
pj
=
-
j
-
1
;
const
int
jtype
=
x
[
pj
].
w
;
if
(
exclusion
(
i
,
pj
,
itype
,
jtype
,
mask
,
molecule
))
continue
;
neighptr
[
n
++
]
=
j
;
}
if
(
THREE
)
{
alln
=
n2
;
n2
=
pack_offset
+
maxnbors
;
for
(
int
u
=
pack_offset
+
maxnbors
;
u
<
alln
;
u
++
)
{
const
int
j
=
neighptr
[
u
];
int
pj
=
j
;
if
(
need_ic
)
if
(
pj
<
0
)
pj
=
-
j
-
1
;
const
int
jtype
=
x
[
pj
].
w
;
if
(
exclusion
(
i
,
pj
,
itype
,
jtype
,
mask
,
molecule
))
continue
;
neighptr
[
n2
++
]
=
j
;
}
}
}
#endif
int
ns
;
if
(
THREE
)
{
int
alln
=
n
;
ns
=
n
-
pack_offset
;
atombin
[
i
]
=
ns
;
n
=
lane
;
for
(
int
u
=
pack_offset
;
u
<
alln
;
u
++
)
{
neighptr
[
n
]
=
neighptr
[
u
];
n
+=
pack_width
;
}
ns
+=
n2
-
pack_offset
-
maxnbors
;
for
(
int
u
=
pack_offset
+
maxnbors
;
u
<
n2
;
u
++
)
{
neighptr
[
n
]
=
neighptr
[
u
];
n
+=
pack_width
;
}
if
(
ns
>
maxnbors
)
*
overflow
=
1
;
}
else
if
(
n
>
maxnbors
)
*
overflow
=
1
;
ilist
[
i
]
=
i
;
cnumneigh
[
i
]
=
ct
;
if
(
THREE
)
{
cnumneigh
[
i
]
+=
lane
;
numneigh
[
i
]
=
ns
;
}
else
{
int
edge
=
n
&
(
pad_width
-
1
);
if
(
edge
)
{
const
int
pad_end
=
n
+
(
pad_width
-
edge
);
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma loop_count min=1, max=INTEL_COMPILE_WIDTH-1, \
avg=INTEL_COMPILE_WIDTH/2
#endif
for
(
;
n
<
pad_end
;
n
++
)
neighptr
[
n
]
=
e_nall
;
}
numneigh
[
i
]
=
n
;
}
if
(
THREE
)
{
if
(
ns
>
max_chunk
)
max_chunk
=
ns
;
lane
++
;
if
(
lane
==
pack_width
)
{
ct
+=
max_chunk
*
pack_width
;
const
int
alignb
=
(
INTEL_DATA_ALIGN
/
sizeof
(
int
));
const
int
edge
=
ct
&
(
alignb
-
1
);
if
(
edge
)
ct
+=
alignb
-
edge
;
neighptr
=
firstneigh
+
ct
;
max_chunk
=
0
;
pack_offset
=
maxnbors
*
pack_width
;
lane
=
0
;
if
(
ct
+
obound
>
list_size
)
{
if
(
i
<
ito
-
1
)
{
*
overflow
=
1
;
ct
=
(
ifrom
+
tid
*
2
)
*
maxnbors
;
}
}
}
}
else
{
ct
+=
n
;
const
int
alignb
=
(
INTEL_DATA_ALIGN
/
sizeof
(
int
));
const
int
edge
=
ct
&
(
alignb
-
1
);
if
(
edge
)
ct
+=
alignb
-
edge
;
neighptr
=
firstneigh
+
ct
;
if
(
ct
+
obound
>
list_size
)
{
if
(
i
<
ito
-
1
)
{
*
overflow
=
1
;
ct
=
(
ifrom
+
tid
*
2
)
*
maxnbors
;
}
}
}
}
if
(
*
overflow
==
1
)
for
(
int
i
=
ifrom
;
i
<
ito
;
i
++
)
numneigh
[
i
]
=
0
;
#ifdef _LMP_INTEL_OFFLOAD
int
vlmin
=
lmin
,
vlmax
=
lmax
,
vgmin
=
gmin
,
vgmax
=
gmax
;
int
ghost_offset
=
0
,
nall_offset
=
e_nall
;
if
(
separate_buffers
)
{
for
(
int
i
=
ifrom
;
i
<
ito
;
++
i
)
{
int
*
_noalias
jlist
=
firstneigh
+
cnumneigh
[
i
];
const
int
jnum
=
numneigh
[
i
];
#if __INTEL_COMPILER+0 > 1499
#pragma vector aligned
#pragma simd reduction(max:vlmax,vgmax) reduction(min:vlmin, vgmin)
#endif
for
(
int
jj
=
0
;
jj
<
jnum
;
jj
++
)
{
int
j
=
jlist
[
jj
];
if
(
need_ic
&&
j
<
0
)
j
=
-
j
-
1
;
if
(
j
<
nlocal
)
{
if
(
j
<
vlmin
)
vlmin
=
j
;
if
(
j
>
vlmax
)
vlmax
=
j
;
}
else
{
if
(
j
<
vgmin
)
vgmin
=
j
;
if
(
j
>
vgmax
)
vgmax
=
j
;
}
}
}
lmin
=
MIN
(
lmin
,
vlmin
);
gmin
=
MIN
(
gmin
,
vgmin
);
lmax
=
MAX
(
lmax
,
vlmax
);
gmax
=
MAX
(
gmax
,
vgmax
);
#if defined(_OPENMP)
#pragma omp critical
#endif
{
if
(
lmin
<
overflow
[
LMP_LOCAL_MIN
])
overflow
[
LMP_LOCAL_MIN
]
=
lmin
;
if
(
lmax
>
overflow
[
LMP_LOCAL_MAX
])
overflow
[
LMP_LOCAL_MAX
]
=
lmax
;
if
(
gmin
<
overflow
[
LMP_GHOST_MIN
])
overflow
[
LMP_GHOST_MIN
]
=
gmin
;
if
(
gmax
>
overflow
[
LMP_GHOST_MAX
])
overflow
[
LMP_GHOST_MAX
]
=
gmax
;
}
#pragma omp barrier
int
nghost
=
overflow
[
LMP_GHOST_MAX
]
+
1
-
overflow
[
LMP_GHOST_MIN
];
if
(
nghost
<
0
)
nghost
=
0
;
if
(
offload
)
{
ghost_offset
=
overflow
[
LMP_GHOST_MIN
]
-
overflow
[
LMP_LOCAL_MAX
]
-
1
;
nall_offset
=
overflow
[
LMP_LOCAL_MAX
]
+
1
+
nghost
;
}
else
{
ghost_offset
=
overflow
[
LMP_GHOST_MIN
]
-
nlocal
;
nall_offset
=
nlocal
+
nghost
;
}
}
// if separate_buffers
#endif
if
(
molecular
)
{
for
(
int
i
=
ifrom
;
i
<
ito
;
++
i
)
{
int
*
_noalias
jlist
=
firstneigh
+
cnumneigh
[
i
];
const
int
jnum
=
numneigh
[
i
];
if
(
THREE
)
{
const
int
trip
=
jnum
*
pack_width
;
for
(
int
jj
=
0
;
jj
<
trip
;
jj
+=
pack_width
)
{
const
int
j
=
jlist
[
jj
];
if
(
need_ic
&&
j
<
0
)
{
which
=
0
;
jlist
[
jj
]
=
-
j
-
1
;
}
else
ofind_special
(
which
,
special
,
nspecial
,
i
,
tag
[
j
]);
#ifdef _LMP_INTEL_OFFLOAD
if
(
j
>=
nlocal
)
{
if
(
j
==
e_nall
)
jlist
[
jj
]
=
nall_offset
;
else
if
(
which
)
jlist
[
jj
]
=
(
j
-
ghost_offset
)
^
(
which
<<
SBBITS
);
else
jlist
[
jj
]
-=
ghost_offset
;
}
else
#endif
if
(
which
)
jlist
[
jj
]
=
j
^
(
which
<<
SBBITS
);
}
}
else
{
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma simd
#endif
for
(
int
jj
=
0
;
jj
<
jnum
;
jj
++
)
{
const
int
j
=
jlist
[
jj
];
if
(
need_ic
&&
j
<
0
)
{
which
=
0
;
jlist
[
jj
]
=
-
j
-
1
;
}
else
ofind_special
(
which
,
special
,
nspecial
,
i
,
tag
[
j
]);
#ifdef _LMP_INTEL_OFFLOAD
if
(
j
>=
nlocal
)
{
if
(
j
==
e_nall
)
jlist
[
jj
]
=
nall_offset
;
else
if
(
which
)
jlist
[
jj
]
=
(
j
-
ghost_offset
)
^
(
which
<<
SBBITS
);
else
jlist
[
jj
]
-=
ghost_offset
;
}
else
#endif
if
(
which
)
jlist
[
jj
]
=
j
^
(
which
<<
SBBITS
);
}
}
}
// for i
}
// if molecular
#ifdef _LMP_INTEL_OFFLOAD
else
if
(
separate_buffers
)
{
for
(
int
i
=
ifrom
;
i
<
ito
;
++
i
)
{
int
*
_noalias
jlist
=
firstneigh
+
cnumneigh
[
i
];
const
int
jnum
=
numneigh
[
i
];
int
jj
=
0
;
#pragma vector aligned
#pragma simd
for
(
jj
=
0
;
jj
<
jnum
;
jj
++
)
{
if
(
jlist
[
jj
]
>=
nlocal
)
{
if
(
jlist
[
jj
]
==
e_nall
)
jlist
[
jj
]
=
nall_offset
;
else
jlist
[
jj
]
-=
ghost_offset
;
}
}
}
}
#endif
}
// end omp
#if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD)
*
timer_compute
=
MIC_Wtime
()
-
*
timer_compute
;
#endif
}
// end offload
#ifdef _LMP_INTEL_OFFLOAD
if
(
offload
)
{
_fix
->
stop_watch
(
TIME_OFFLOAD_LATENCY
);
_fix
->
start_watch
(
TIME_HOST_NEIGHBOR
);
for
(
int
n
=
0
;
n
<
aend
;
n
++
)
{
ilist
[
n
]
=
n
;
numneigh
[
n
]
=
0
;
}
}
else
{
for
(
int
i
=
astart
;
i
<
aend
;
i
++
)
list
->
firstneigh
[
i
]
=
firstneigh
+
cnumneigh
[
i
];
if
(
separate_buffers
)
{
_fix
->
start_watch
(
TIME_PACK
);
_fix
->
set_neighbor_host_sizes
();
buffers
->
pack_sep_from_single
(
_fix
->
host_min_local
(),
_fix
->
host_used_local
(),
_fix
->
host_min_ghost
(),
_fix
->
host_used_ghost
());
_fix
->
stop_watch
(
TIME_PACK
);
}
}
#else
#pragma vector aligned
#pragma simd
for
(
int
i
=
astart
;
i
<
aend
;
i
++
)
list
->
firstneigh
[
i
]
=
firstneigh
+
cnumneigh
[
i
];
#endif
}
/* ---------------------------------------------------------------------- */
#ifdef _LMP_INTEL_OFFLOAD
void
NPairIntel
::
grow_stencil
()
{
if
(
_off_map_stencil
!=
stencil
)
{
if
(
_off_map_stencil
)
{
const
int
*
stencil
=
_off_map_stencil
;
#pragma offload_transfer target(mic:_cop) \
nocopy(stencil:alloc_if(0) free_if(1))
}
_off_map_stencil
=
stencil
;
const
int
*
stencil
=
_off_map_stencil
;
const
int
maxstencil
=
ns
->
get_maxstencil
();
#pragma offload_transfer target(mic:_cop) \
in(stencil:length(maxstencil) alloc_if(1) free_if(0))
}
}
#endif
/* ---------------------------------------------------------------------- */
// ---- Half, no IC
template
void
NPairIntel
::
bin_newton
<
float
,
float
,
0
,
0
,
0
,
0
,
0
>
(
const
int
,
NeighList
*
,
IntelBuffers
<
float
,
float
>
*
,
const
int
,
const
int
,
const
int
);
template
void
NPairIntel
::
bin_newton
<
float
,
double
,
0
,
0
,
0
,
0
,
0
>
(
const
int
,
NeighList
*
,
IntelBuffers
<
float
,
double
>
*
,
const
int
,
const
int
,
const
int
);
template
void
NPairIntel
::
bin_newton
<
double
,
double
,
0
,
0
,
0
,
0
,
0
>
(
const
int
,
NeighList
*
,
IntelBuffers
<
double
,
double
>
*
,
const
int
,
const
int
,
const
int
);
// ---- Half, IC
template
void
NPairIntel
::
bin_newton
<
float
,
float
,
0
,
1
,
0
,
0
,
0
>
(
const
int
,
NeighList
*
,
IntelBuffers
<
float
,
float
>
*
,
const
int
,
const
int
,
const
int
);
template
void
NPairIntel
::
bin_newton
<
float
,
double
,
0
,
1
,
0
,
0
,
0
>
(
const
int
,
NeighList
*
,
IntelBuffers
<
float
,
double
>
*
,
const
int
,
const
int
,
const
int
);
template
void
NPairIntel
::
bin_newton
<
double
,
double
,
0
,
1
,
0
,
0
,
0
>
(
const
int
,
NeighList
*
,
IntelBuffers
<
double
,
double
>
*
,
const
int
,
const
int
,
const
int
);
// ---- Tri, no IC
template
void
NPairIntel
::
bin_newton
<
float
,
float
,
0
,
0
,
0
,
1
,
0
>
(
const
int
,
NeighList
*
,
IntelBuffers
<
float
,
float
>
*
,
const
int
,
const
int
,
const
int
);
template
void
NPairIntel
::
bin_newton
<
float
,
double
,
0
,
0
,
0
,
1
,
0
>
(
const
int
,
NeighList
*
,
IntelBuffers
<
float
,
double
>
*
,
const
int
,
const
int
,
const
int
);
template
void
NPairIntel
::
bin_newton
<
double
,
double
,
0
,
0
,
0
,
1
,
0
>
(
const
int
,
NeighList
*
,
IntelBuffers
<
double
,
double
>
*
,
const
int
,
const
int
,
const
int
);
// ---- Tri, IC
template
void
NPairIntel
::
bin_newton
<
float
,
float
,
0
,
1
,
0
,
1
,
0
>
(
const
int
,
NeighList
*
,
IntelBuffers
<
float
,
float
>
*
,
const
int
,
const
int
,
const
int
);
template
void
NPairIntel
::
bin_newton
<
float
,
double
,
0
,
1
,
0
,
1
,
0
>
(
const
int
,
NeighList
*
,
IntelBuffers
<
float
,
double
>
*
,
const
int
,
const
int
,
const
int
);
template
void
NPairIntel
::
bin_newton
<
double
,
double
,
0
,
1
,
0
,
1
,
0
>
(
const
int
,
NeighList
*
,
IntelBuffers
<
double
,
double
>
*
,
const
int
,
const
int
,
const
int
);
// ---- Full, no IC
template
void
NPairIntel
::
bin_newton
<
float
,
float
,
0
,
0
,
1
,
0
,
0
>
(
const
int
,
NeighList
*
,
IntelBuffers
<
float
,
float
>
*
,
const
int
,
const
int
,
const
int
);
template
void
NPairIntel
::
bin_newton
<
float
,
double
,
0
,
0
,
1
,
0
,
0
>
(
const
int
,
NeighList
*
,
IntelBuffers
<
float
,
double
>
*
,
const
int
,
const
int
,
const
int
);
template
void
NPairIntel
::
bin_newton
<
double
,
double
,
0
,
0
,
1
,
0
,
0
>
(
const
int
,
NeighList
*
,
IntelBuffers
<
double
,
double
>
*
,
const
int
,
const
int
,
const
int
);
// ---- Full, IC
template
void
NPairIntel
::
bin_newton
<
float
,
float
,
0
,
1
,
1
,
0
,
0
>
(
const
int
,
NeighList
*
,
IntelBuffers
<
float
,
float
>
*
,
const
int
,
const
int
,
const
int
);
template
void
NPairIntel
::
bin_newton
<
float
,
double
,
0
,
1
,
1
,
0
,
0
>
(
const
int
,
NeighList
*
,
IntelBuffers
<
float
,
double
>
*
,
const
int
,
const
int
,
const
int
);
template
void
NPairIntel
::
bin_newton
<
double
,
double
,
0
,
1
,
1
,
0
,
0
>
(
const
int
,
NeighList
*
,
IntelBuffers
<
double
,
double
>
*
,
const
int
,
const
int
,
const
int
);
// ---- 3-body, no IC
template
void
NPairIntel
::
bin_newton
<
float
,
float
,
0
,
0
,
1
,
0
,
1
>
(
const
int
,
NeighList
*
,
IntelBuffers
<
float
,
float
>
*
,
const
int
,
const
int
,
const
int
);
template
void
NPairIntel
::
bin_newton
<
float
,
double
,
0
,
0
,
1
,
0
,
1
>
(
const
int
,
NeighList
*
,
IntelBuffers
<
float
,
double
>
*
,
const
int
,
const
int
,
const
int
);
template
void
NPairIntel
::
bin_newton
<
double
,
double
,
0
,
0
,
1
,
0
,
1
>
(
const
int
,
NeighList
*
,
IntelBuffers
<
double
,
double
>
*
,
const
int
,
const
int
,
const
int
);
// ---- 3-body, IC
template
void
NPairIntel
::
bin_newton
<
float
,
float
,
0
,
1
,
1
,
0
,
1
>
(
const
int
,
NeighList
*
,
IntelBuffers
<
float
,
float
>
*
,
const
int
,
const
int
,
const
int
);
template
void
NPairIntel
::
bin_newton
<
float
,
double
,
0
,
1
,
1
,
0
,
1
>
(
const
int
,
NeighList
*
,
IntelBuffers
<
float
,
double
>
*
,
const
int
,
const
int
,
const
int
);
template
void
NPairIntel
::
bin_newton
<
double
,
double
,
0
,
1
,
1
,
0
,
1
>
(
const
int
,
NeighList
*
,
IntelBuffers
<
double
,
double
>
*
,
const
int
,
const
int
,
const
int
);
#ifdef _LMP_INTEL_OFFLOAD
// ---- Half, no IC, no ghost
template
void
NPairIntel
::
bin_newton
<
float
,
float
,
1
,
0
,
0
,
0
,
0
>
(
const
int
,
NeighList
*
,
IntelBuffers
<
float
,
float
>
*
,
const
int
,
const
int
,
const
int
);
template
void
NPairIntel
::
bin_newton
<
float
,
double
,
1
,
0
,
0
,
0
,
0
>
(
const
int
,
NeighList
*
,
IntelBuffers
<
float
,
double
>
*
,
const
int
,
const
int
,
const
int
);
template
void
NPairIntel
::
bin_newton
<
double
,
double
,
1
,
0
,
0
,
0
,
0
>
(
const
int
,
NeighList
*
,
IntelBuffers
<
double
,
double
>
*
,
const
int
,
const
int
,
const
int
);
// ---- Half, IC, no ghost
template
void
NPairIntel
::
bin_newton
<
float
,
float
,
1
,
1
,
0
,
0
,
0
>
(
const
int
,
NeighList
*
,
IntelBuffers
<
float
,
float
>
*
,
const
int
,
const
int
,
const
int
);
template
void
NPairIntel
::
bin_newton
<
float
,
double
,
1
,
1
,
0
,
0
,
0
>
(
const
int
,
NeighList
*
,
IntelBuffers
<
float
,
double
>
*
,
const
int
,
const
int
,
const
int
);
template
void
NPairIntel
::
bin_newton
<
double
,
double
,
1
,
1
,
0
,
0
,
0
>
(
const
int
,
NeighList
*
,
IntelBuffers
<
double
,
double
>
*
,
const
int
,
const
int
,
const
int
);
// ---- Tri, no IC, no ghost
template
void
NPairIntel
::
bin_newton
<
float
,
float
,
1
,
0
,
0
,
1
,
0
>
(
const
int
,
NeighList
*
,
IntelBuffers
<
float
,
float
>
*
,
const
int
,
const
int
,
const
int
);
template
void
NPairIntel
::
bin_newton
<
float
,
double
,
1
,
0
,
0
,
1
,
0
>
(
const
int
,
NeighList
*
,
IntelBuffers
<
float
,
double
>
*
,
const
int
,
const
int
,
const
int
);
template
void
NPairIntel
::
bin_newton
<
double
,
double
,
1
,
0
,
0
,
1
,
0
>
(
const
int
,
NeighList
*
,
IntelBuffers
<
double
,
double
>
*
,
const
int
,
const
int
,
const
int
);
// ---- Tri, IC, no ghost
template
void
NPairIntel
::
bin_newton
<
float
,
float
,
1
,
1
,
0
,
1
,
0
>
(
const
int
,
NeighList
*
,
IntelBuffers
<
float
,
float
>
*
,
const
int
,
const
int
,
const
int
);
template
void
NPairIntel
::
bin_newton
<
float
,
double
,
1
,
1
,
0
,
1
,
0
>
(
const
int
,
NeighList
*
,
IntelBuffers
<
float
,
double
>
*
,
const
int
,
const
int
,
const
int
);
template
void
NPairIntel
::
bin_newton
<
double
,
double
,
1
,
1
,
0
,
1
,
0
>
(
const
int
,
NeighList
*
,
IntelBuffers
<
double
,
double
>
*
,
const
int
,
const
int
,
const
int
);
// ---- Full, no IC, no ghost
template
void
NPairIntel
::
bin_newton
<
float
,
float
,
1
,
0
,
1
,
0
,
0
>
(
const
int
,
NeighList
*
,
IntelBuffers
<
float
,
float
>
*
,
const
int
,
const
int
,
const
int
);
template
void
NPairIntel
::
bin_newton
<
float
,
double
,
1
,
0
,
1
,
0
,
0
>
(
const
int
,
NeighList
*
,
IntelBuffers
<
float
,
double
>
*
,
const
int
,
const
int
,
const
int
);
template
void
NPairIntel
::
bin_newton
<
double
,
double
,
1
,
0
,
1
,
0
,
0
>
(
const
int
,
NeighList
*
,
IntelBuffers
<
double
,
double
>
*
,
const
int
,
const
int
,
const
int
);
// ---- Full, IC, no ghost
template
void
NPairIntel
::
bin_newton
<
float
,
float
,
1
,
1
,
1
,
0
,
0
>
(
const
int
,
NeighList
*
,
IntelBuffers
<
float
,
float
>
*
,
const
int
,
const
int
,
const
int
);
template
void
NPairIntel
::
bin_newton
<
float
,
double
,
1
,
1
,
1
,
0
,
0
>
(
const
int
,
NeighList
*
,
IntelBuffers
<
float
,
double
>
*
,
const
int
,
const
int
,
const
int
);
template
void
NPairIntel
::
bin_newton
<
double
,
double
,
1
,
1
,
1
,
0
,
0
>
(
const
int
,
NeighList
*
,
IntelBuffers
<
double
,
double
>
*
,
const
int
,
const
int
,
const
int
);
// ---- 3-body, no IC, no ghost
template
void
NPairIntel
::
bin_newton
<
float
,
float
,
1
,
0
,
1
,
0
,
1
>
(
const
int
,
NeighList
*
,
IntelBuffers
<
float
,
float
>
*
,
const
int
,
const
int
,
const
int
);
template
void
NPairIntel
::
bin_newton
<
float
,
double
,
1
,
0
,
1
,
0
,
1
>
(
const
int
,
NeighList
*
,
IntelBuffers
<
float
,
double
>
*
,
const
int
,
const
int
,
const
int
);
template
void
NPairIntel
::
bin_newton
<
double
,
double
,
1
,
0
,
1
,
0
,
1
>
(
const
int
,
NeighList
*
,
IntelBuffers
<
double
,
double
>
*
,
const
int
,
const
int
,
const
int
);
// ---- 3-body, IC, no ghost
template
void
NPairIntel
::
bin_newton
<
float
,
float
,
1
,
1
,
1
,
0
,
1
>
(
const
int
,
NeighList
*
,
IntelBuffers
<
float
,
float
>
*
,
const
int
,
const
int
,
const
int
);
template
void
NPairIntel
::
bin_newton
<
float
,
double
,
1
,
1
,
1
,
0
,
1
>
(
const
int
,
NeighList
*
,
IntelBuffers
<
float
,
double
>
*
,
const
int
,
const
int
,
const
int
);
template
void
NPairIntel
::
bin_newton
<
double
,
double
,
1
,
1
,
1
,
0
,
1
>
(
const
int
,
NeighList
*
,
IntelBuffers
<
double
,
double
>
*
,
const
int
,
const
int
,
const
int
);
#endif
Event Timeline
Log In to Comment