Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F82947763
pppm_intel.cpp
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Sat, Sep 14, 09:20
Size
20 KB
Mime Type
text/x-c
Expires
Mon, Sep 16, 09:20 (2 d)
Engine
blob
Format
Raw Data
Handle
20776688
Attached To
rLAMMPS lammps
pppm_intel.cpp
View Options
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors: Rodrigo Canales (RWTH Aachen University)
W. Michael Brown (Intel)
------------------------------------------------------------------------- */
#include <mpi.h>
#include <stdlib.h>
#include <math.h>
#include "pppm_intel.h"
#include "atom.h"
#include "error.h"
#include "gridcomm.h"
#include "math_const.h"
#include "math_special.h"
#include "memory.h"
#include "suffix.h"
using
namespace
LAMMPS_NS
;
using
namespace
MathConst
;
using
namespace
MathSpecial
;
#define MAXORDER 7
#define OFFSET 16384
#define LARGE 10000.0
#define SMALL 0.00001
#define EPS_HOC 1.0e-7
enum
{
REVERSE_RHO
};
enum
{
FORWARD_IK
,
FORWARD_AD
,
FORWARD_IK_PERATOM
,
FORWARD_AD_PERATOM
};
#ifdef FFT_SINGLE
#define ZEROF 0.0f
#define ONEF 1.0f
#else
#define ZEROF 0.0
#define ONEF 1.0
#endif
/* ---------------------------------------------------------------------- */
PPPMIntel
::
PPPMIntel
(
LAMMPS
*
lmp
,
int
narg
,
char
**
arg
)
:
PPPM
(
lmp
,
narg
,
arg
)
{
suffix_flag
|=
Suffix
::
INTEL
;
}
PPPMIntel
::~
PPPMIntel
()
{
}
/* ----------------------------------------------------------------------
called once before run
------------------------------------------------------------------------- */
void
PPPMIntel
::
init
()
{
PPPM
::
init
();
int
ifix
=
modify
->
find_fix
(
"package_intel"
);
if
(
ifix
<
0
)
error
->
all
(
FLERR
,
"The 'package intel' command is required for /intel styles"
);
fix
=
static_cast
<
FixIntel
*>
(
modify
->
fix
[
ifix
]);
#ifdef _LMP_INTEL_OFFLOAD
_use_base
=
0
;
if
(
fix
->
offload_balance
()
!=
0.0
)
{
_use_base
=
1
;
return
;
}
#endif
fix
->
kspace_init_check
();
if
(
order
>
INTEL_P3M_MAXORDER
)
error
->
all
(
FLERR
,
"PPPM order greater than supported by USER-INTEL
\n
"
);
/*
if (fix->precision() == FixIntel::PREC_MODE_MIXED)
pack_force_const(force_const_single, fix->get_mixed_buffers());
else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE)
pack_force_const(force_const_double, fix->get_double_buffers());
else
pack_force_const(force_const_single, fix->get_single_buffers());
*/
}
/* ----------------------------------------------------------------------
compute the PPPMIntel long-range force, energy, virial
------------------------------------------------------------------------- */
void
PPPMIntel
::
compute
(
int
eflag
,
int
vflag
)
{
#ifdef _LMP_INTEL_OFFLOAD
if
(
_use_base
)
{
PPPM
::
compute
(
eflag
,
vflag
);
return
;
}
#endif
compute_first
(
eflag
,
vflag
);
compute_second
(
eflag
,
vflag
);
}
/* ---------------------------------------------------------------------- */
void
PPPMIntel
::
compute_first
(
int
eflag
,
int
vflag
)
{
int
i
,
j
;
// set energy/virial flags
// invoke allocate_peratom() if needed for first time
if
(
eflag
||
vflag
)
ev_setup
(
eflag
,
vflag
);
else
evflag
=
evflag_atom
=
eflag_global
=
vflag_global
=
eflag_atom
=
vflag_atom
=
0
;
if
(
evflag_atom
&&
!
peratom_allocate_flag
)
{
allocate_peratom
();
cg_peratom
->
ghost_notify
();
cg_peratom
->
setup
();
}
// if atom count has changed, update qsum and qsqsum
if
(
atom
->
natoms
!=
natoms_original
)
{
qsum_qsq
();
natoms_original
=
atom
->
natoms
;
}
// return if there are no charges
if
(
qsqsum
==
0.0
)
return
;
// convert atoms from box to lamda coords
if
(
triclinic
==
0
)
boxlo
=
domain
->
boxlo
;
else
{
boxlo
=
domain
->
boxlo_lamda
;
domain
->
x2lamda
(
atom
->
nlocal
);
}
// extend size of per-atom arrays if necessary
if
(
atom
->
nmax
>
nmax
)
{
memory
->
destroy
(
part2grid
);
nmax
=
atom
->
nmax
;
memory
->
create
(
part2grid
,
nmax
,
3
,
"pppm:part2grid"
);
}
// find grid points for all my particles
// map my particle charge onto my local 3d density grid
if
(
fix
->
precision
()
==
FixIntel
::
PREC_MODE_MIXED
)
{
particle_map
<
float
,
double
>
(
fix
->
get_mixed_buffers
());
make_rho
<
float
,
double
>
(
fix
->
get_mixed_buffers
());
}
else
if
(
fix
->
precision
()
==
FixIntel
::
PREC_MODE_DOUBLE
)
{
particle_map
<
double
,
double
>
(
fix
->
get_double_buffers
());
make_rho
<
double
,
double
>
(
fix
->
get_double_buffers
());
}
else
{
particle_map
<
float
,
float
>
(
fix
->
get_single_buffers
());
make_rho
<
float
,
float
>
(
fix
->
get_single_buffers
());
}
// all procs communicate density values from their ghost cells
// to fully sum contribution in their 3d bricks
// remap from 3d decomposition to FFT decomposition
cg
->
reverse_comm
(
this
,
REVERSE_RHO
);
brick2fft
();
// compute potential gradient on my FFT grid and
// portion of e_long on this proc's FFT grid
// return gradients (electric fields) in 3d brick decomposition
// also performs per-atom calculations via poisson_peratom()
poisson
();
// all procs communicate E-field values
// to fill ghost cells surrounding their 3d bricks
if
(
differentiation_flag
==
1
)
cg
->
forward_comm
(
this
,
FORWARD_AD
);
else
cg
->
forward_comm
(
this
,
FORWARD_IK
);
// extra per-atom energy/virial communication
if
(
evflag_atom
)
{
if
(
differentiation_flag
==
1
&&
vflag_atom
)
cg_peratom
->
forward_comm
(
this
,
FORWARD_AD_PERATOM
);
else
if
(
differentiation_flag
==
0
)
cg_peratom
->
forward_comm
(
this
,
FORWARD_IK_PERATOM
);
}
}
/* ---------------------------------------------------------------------- */
void
PPPMIntel
::
compute_second
(
int
eflag
,
int
vflag
)
{
int
i
,
j
;
// calculate the force on my particles
if
(
differentiation_flag
==
1
)
{
if
(
fix
->
precision
()
==
FixIntel
::
PREC_MODE_MIXED
)
fieldforce_ad
<
float
,
double
>
(
fix
->
get_mixed_buffers
());
else
if
(
fix
->
precision
()
==
FixIntel
::
PREC_MODE_DOUBLE
)
fieldforce_ad
<
double
,
double
>
(
fix
->
get_double_buffers
());
else
fieldforce_ad
<
float
,
float
>
(
fix
->
get_single_buffers
());
}
else
{
if
(
fix
->
precision
()
==
FixIntel
::
PREC_MODE_MIXED
)
fieldforce_ik
<
float
,
double
>
(
fix
->
get_mixed_buffers
());
else
if
(
fix
->
precision
()
==
FixIntel
::
PREC_MODE_DOUBLE
)
fieldforce_ik
<
double
,
double
>
(
fix
->
get_double_buffers
());
else
fieldforce_ik
<
float
,
float
>
(
fix
->
get_single_buffers
());
}
// extra per-atom energy/virial communication
if
(
evflag_atom
)
fieldforce_peratom
();
// sum global energy across procs and add in volume-dependent term
const
double
qscale
=
qqrd2e
*
scale
;
if
(
eflag_global
)
{
double
energy_all
;
MPI_Allreduce
(
&
energy
,
&
energy_all
,
1
,
MPI_DOUBLE
,
MPI_SUM
,
world
);
energy
=
energy_all
;
energy
*=
0.5
*
volume
;
energy
-=
g_ewald
*
qsqsum
/
MY_PIS
+
MY_PI2
*
qsum
*
qsum
/
(
g_ewald
*
g_ewald
*
volume
);
energy
*=
qscale
;
}
// sum global virial across procs
if
(
vflag_global
)
{
double
virial_all
[
6
];
MPI_Allreduce
(
virial
,
virial_all
,
6
,
MPI_DOUBLE
,
MPI_SUM
,
world
);
for
(
i
=
0
;
i
<
6
;
i
++
)
virial
[
i
]
=
0.5
*
qscale
*
volume
*
virial_all
[
i
];
}
// per-atom energy/virial
// energy includes self-energy correction
// ntotal accounts for TIP4P tallying eatom/vatom for ghost atoms
if
(
evflag_atom
)
{
double
*
q
=
atom
->
q
;
int
nlocal
=
atom
->
nlocal
;
int
ntotal
=
nlocal
;
if
(
tip4pflag
)
ntotal
+=
atom
->
nghost
;
if
(
eflag_atom
)
{
for
(
i
=
0
;
i
<
nlocal
;
i
++
)
{
eatom
[
i
]
*=
0.5
;
eatom
[
i
]
-=
g_ewald
*
q
[
i
]
*
q
[
i
]
/
MY_PIS
+
MY_PI2
*
q
[
i
]
*
qsum
/
(
g_ewald
*
g_ewald
*
volume
);
eatom
[
i
]
*=
qscale
;
}
for
(
i
=
nlocal
;
i
<
ntotal
;
i
++
)
eatom
[
i
]
*=
0.5
*
qscale
;
}
if
(
vflag_atom
)
{
for
(
i
=
0
;
i
<
ntotal
;
i
++
)
for
(
j
=
0
;
j
<
6
;
j
++
)
vatom
[
i
][
j
]
*=
0.5
*
qscale
;
}
}
// 2d slab correction
if
(
slabflag
==
1
)
slabcorr
();
// convert atoms back from lamda to box coords
if
(
triclinic
)
domain
->
lamda2x
(
atom
->
nlocal
);
}
/* ----------------------------------------------------------------------
find center grid pt for each of my particles
check that full stencil for the particle will fit in my 3d brick
store central grid pt indices in part2grid array
------------------------------------------------------------------------- */
template
<
class
flt_t
,
class
acc_t
>
void
PPPMIntel
::
particle_map
(
IntelBuffers
<
flt_t
,
acc_t
>
*
buffers
)
{
int
nx
,
ny
,
nz
;
ATOM_T
*
_noalias
const
x
=
buffers
->
get_x
(
0
);
int
nlocal
=
atom
->
nlocal
;
int
flag
=
0
;
if
(
!
ISFINITE
(
boxlo
[
0
])
||
!
ISFINITE
(
boxlo
[
1
])
||
!
ISFINITE
(
boxlo
[
2
]))
error
->
one
(
FLERR
,
"Non-numeric box dimensions - simulation unstable"
);
const
flt_t
lo0
=
boxlo
[
0
];
const
flt_t
lo1
=
boxlo
[
1
];
const
flt_t
lo2
=
boxlo
[
2
];
const
flt_t
xi
=
delxinv
;
const
flt_t
yi
=
delyinv
;
const
flt_t
zi
=
delzinv
;
const
flt_t
fshift
=
shift
;
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma simd
#endif
for
(
int
i
=
0
;
i
<
nlocal
;
i
++
)
{
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// current particle coord can be outside global and local box
// add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
nx
=
static_cast
<
int
>
((
x
[
i
].
x
-
lo0
)
*
xi
+
fshift
)
-
OFFSET
;
ny
=
static_cast
<
int
>
((
x
[
i
].
y
-
lo1
)
*
yi
+
fshift
)
-
OFFSET
;
nz
=
static_cast
<
int
>
((
x
[
i
].
z
-
lo2
)
*
zi
+
fshift
)
-
OFFSET
;
part2grid
[
i
][
0
]
=
nx
;
part2grid
[
i
][
1
]
=
ny
;
part2grid
[
i
][
2
]
=
nz
;
// check that entire stencil around nx,ny,nz will fit in my 3d brick
if
(
nx
+
nlower
<
nxlo_out
||
nx
+
nupper
>
nxhi_out
||
ny
+
nlower
<
nylo_out
||
ny
+
nupper
>
nyhi_out
||
nz
+
nlower
<
nzlo_out
||
nz
+
nupper
>
nzhi_out
)
flag
=
1
;
}
if
(
flag
)
error
->
one
(
FLERR
,
"Out of range atoms - cannot compute PPPM"
);
}
/* ----------------------------------------------------------------------
create discretized "density" on section of global grid due to my particles
density(x,y,z) = charge "density" at grid points of my 3d brick
(nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
in global grid
------------------------------------------------------------------------- */
template
<
class
flt_t
,
class
acc_t
>
void
PPPMIntel
::
make_rho
(
IntelBuffers
<
flt_t
,
acc_t
>
*
buffers
)
{
// clear 3d density array
memset
(
&
(
density_brick
[
nzlo_out
][
nylo_out
][
nxlo_out
]),
0
,
ngrid
*
sizeof
(
FFT_SCALAR
));
// loop over my charges, add their contribution to nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
ATOM_T
*
_noalias
const
x
=
buffers
->
get_x
(
0
);
flt_t
*
_noalias
const
q
=
buffers
->
get_q
(
0
);
int
nlocal
=
atom
->
nlocal
;
const
flt_t
lo0
=
boxlo
[
0
];
const
flt_t
lo1
=
boxlo
[
1
];
const
flt_t
lo2
=
boxlo
[
2
];
const
flt_t
xi
=
delxinv
;
const
flt_t
yi
=
delyinv
;
const
flt_t
zi
=
delzinv
;
const
flt_t
fshift
=
shift
;
const
flt_t
fshiftone
=
shiftone
;
const
flt_t
fdelvolinv
=
delvolinv
;
for
(
int
i
=
0
;
i
<
nlocal
;
i
++
)
{
int
nx
=
part2grid
[
i
][
0
];
int
ny
=
part2grid
[
i
][
1
];
int
nz
=
part2grid
[
i
][
2
];
FFT_SCALAR
dx
=
nx
+
fshiftone
-
(
x
[
i
].
x
-
lo0
)
*
xi
;
FFT_SCALAR
dy
=
ny
+
fshiftone
-
(
x
[
i
].
y
-
lo1
)
*
yi
;
FFT_SCALAR
dz
=
nz
+
fshiftone
-
(
x
[
i
].
z
-
lo2
)
*
zi
;
flt_t
rho
[
3
][
INTEL_P3M_MAXORDER
];
for
(
int
k
=
nlower
;
k
<=
nupper
;
k
++
)
{
FFT_SCALAR
r1
,
r2
,
r3
;
r1
=
r2
=
r3
=
ZEROF
;
for
(
int
l
=
order
-
1
;
l
>=
0
;
l
--
)
{
r1
=
rho_coeff
[
l
][
k
]
+
r1
*
dx
;
r2
=
rho_coeff
[
l
][
k
]
+
r2
*
dy
;
r3
=
rho_coeff
[
l
][
k
]
+
r3
*
dz
;
}
rho
[
0
][
k
-
nlower
]
=
r1
;
rho
[
1
][
k
-
nlower
]
=
r2
;
rho
[
2
][
k
-
nlower
]
=
r3
;
}
FFT_SCALAR
z0
=
fdelvolinv
*
q
[
i
];
for
(
int
n
=
nlower
;
n
<=
nupper
;
n
++
)
{
int
mz
=
n
+
nz
;
FFT_SCALAR
y0
=
z0
*
rho
[
2
][
n
-
nlower
];
for
(
int
m
=
nlower
;
m
<=
nupper
;
m
++
)
{
int
my
=
m
+
ny
;
FFT_SCALAR
x0
=
y0
*
rho
[
1
][
m
-
nlower
];
for
(
int
l
=
nlower
;
l
<=
nupper
;
l
++
)
{
int
mx
=
l
+
nx
;
density_brick
[
mz
][
my
][
mx
]
+=
x0
*
rho
[
0
][
l
-
nlower
];
}
}
}
}
}
/* ----------------------------------------------------------------------
interpolate from grid to get electric field & force on my particles for ik
------------------------------------------------------------------------- */
template
<
class
flt_t
,
class
acc_t
>
void
PPPMIntel
::
fieldforce_ik
(
IntelBuffers
<
flt_t
,
acc_t
>
*
buffers
)
{
// loop over my charges, interpolate electric field from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
// ek = 3 components of E-field on particle
ATOM_T
*
_noalias
const
x
=
buffers
->
get_x
(
0
);
flt_t
*
_noalias
const
q
=
buffers
->
get_q
(
0
);
FORCE_T
*
_noalias
const
f
=
buffers
->
get_f
();
int
nlocal
=
atom
->
nlocal
;
const
flt_t
lo0
=
boxlo
[
0
];
const
flt_t
lo1
=
boxlo
[
1
];
const
flt_t
lo2
=
boxlo
[
2
];
const
flt_t
xi
=
delxinv
;
const
flt_t
yi
=
delyinv
;
const
flt_t
zi
=
delzinv
;
const
flt_t
fshiftone
=
shiftone
;
const
flt_t
fqqrd2es
=
qqrd2e
*
scale
;
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned nontemporal
#pragma simd
#endif
for
(
int
i
=
0
;
i
<
nlocal
;
i
++
)
{
int
nx
=
part2grid
[
i
][
0
];
int
ny
=
part2grid
[
i
][
1
];
int
nz
=
part2grid
[
i
][
2
];
FFT_SCALAR
dx
=
nx
+
fshiftone
-
(
x
[
i
].
x
-
lo0
)
*
xi
;
FFT_SCALAR
dy
=
ny
+
fshiftone
-
(
x
[
i
].
y
-
lo1
)
*
yi
;
FFT_SCALAR
dz
=
nz
+
fshiftone
-
(
x
[
i
].
z
-
lo2
)
*
zi
;
flt_t
rho
[
3
][
INTEL_P3M_MAXORDER
];
for
(
int
k
=
nlower
;
k
<=
nupper
;
k
++
)
{
FFT_SCALAR
r1
=
rho_coeff
[
order
-
1
][
k
];
FFT_SCALAR
r2
=
rho_coeff
[
order
-
1
][
k
];
FFT_SCALAR
r3
=
rho_coeff
[
order
-
1
][
k
];
for
(
int
l
=
order
-
2
;
l
>=
0
;
l
--
)
{
r1
=
rho_coeff
[
l
][
k
]
+
r1
*
dx
;
r2
=
rho_coeff
[
l
][
k
]
+
r2
*
dy
;
r3
=
rho_coeff
[
l
][
k
]
+
r3
*
dz
;
}
rho
[
0
][
k
-
nlower
]
=
r1
;
rho
[
1
][
k
-
nlower
]
=
r2
;
rho
[
2
][
k
-
nlower
]
=
r3
;
}
FFT_SCALAR
ekx
,
eky
,
ekz
;
ekx
=
eky
=
ekz
=
ZEROF
;
for
(
int
n
=
nlower
;
n
<=
nupper
;
n
++
)
{
int
mz
=
n
+
nz
;
FFT_SCALAR
z0
=
rho
[
2
][
n
-
nlower
];
for
(
int
m
=
nlower
;
m
<=
nupper
;
m
++
)
{
int
my
=
m
+
ny
;
FFT_SCALAR
y0
=
z0
*
rho
[
1
][
m
-
nlower
];
for
(
int
l
=
nlower
;
l
<=
nupper
;
l
++
)
{
int
mx
=
l
+
nx
;
FFT_SCALAR
x0
=
y0
*
rho
[
0
][
l
-
nlower
];
ekx
-=
x0
*
vdx_brick
[
mz
][
my
][
mx
];
eky
-=
x0
*
vdy_brick
[
mz
][
my
][
mx
];
ekz
-=
x0
*
vdz_brick
[
mz
][
my
][
mx
];
}
}
}
// convert E-field to force
const
flt_t
qfactor
=
fqqrd2es
*
q
[
i
];
f
[
i
].
x
+=
qfactor
*
ekx
;
f
[
i
].
y
+=
qfactor
*
eky
;
if
(
slabflag
!=
2
)
f
[
i
].
z
+=
qfactor
*
ekz
;
}
}
/* ----------------------------------------------------------------------
interpolate from grid to get electric field & force on my particles for ad
------------------------------------------------------------------------- */
template
<
class
flt_t
,
class
acc_t
>
void
PPPMIntel
::
fieldforce_ad
(
IntelBuffers
<
flt_t
,
acc_t
>
*
buffers
)
{
// loop over my charges, interpolate electric field from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
// ek = 3 components of E-field on particle
ATOM_T
*
_noalias
const
x
=
buffers
->
get_x
(
0
);
const
flt_t
*
_noalias
const
q
=
buffers
->
get_q
(
0
);
FORCE_T
*
_noalias
const
f
=
buffers
->
get_f
();
int
nlocal
=
atom
->
nlocal
;
const
flt_t
ftwo_pi
=
MY_PI
*
2.0
;
const
flt_t
ffour_pi
=
MY_PI
*
4.0
;
const
flt_t
lo0
=
boxlo
[
0
];
const
flt_t
lo1
=
boxlo
[
1
];
const
flt_t
lo2
=
boxlo
[
2
];
const
flt_t
xi
=
delxinv
;
const
flt_t
yi
=
delyinv
;
const
flt_t
zi
=
delzinv
;
const
flt_t
fshiftone
=
shiftone
;
const
flt_t
fqqrd2es
=
qqrd2e
*
scale
;
const
double
*
prd
=
domain
->
prd
;
const
double
xprd
=
prd
[
0
];
const
double
yprd
=
prd
[
1
];
const
double
zprd
=
prd
[
2
];
const
flt_t
hx_inv
=
nx_pppm
/
xprd
;
const
flt_t
hy_inv
=
ny_pppm
/
yprd
;
const
flt_t
hz_inv
=
nz_pppm
/
zprd
;
const
flt_t
fsf_coeff0
=
sf_coeff
[
0
];
const
flt_t
fsf_coeff1
=
sf_coeff
[
1
];
const
flt_t
fsf_coeff2
=
sf_coeff
[
2
];
const
flt_t
fsf_coeff3
=
sf_coeff
[
3
];
const
flt_t
fsf_coeff4
=
sf_coeff
[
4
];
const
flt_t
fsf_coeff5
=
sf_coeff
[
5
];
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned nontemporal
#pragma simd
#endif
for
(
int
i
=
0
;
i
<
nlocal
;
i
++
)
{
int
nx
=
part2grid
[
i
][
0
];
int
ny
=
part2grid
[
i
][
1
];
int
nz
=
part2grid
[
i
][
2
];
FFT_SCALAR
dx
=
nx
+
fshiftone
-
(
x
[
i
].
x
-
lo0
)
*
xi
;
FFT_SCALAR
dy
=
ny
+
fshiftone
-
(
x
[
i
].
y
-
lo1
)
*
yi
;
FFT_SCALAR
dz
=
nz
+
fshiftone
-
(
x
[
i
].
z
-
lo2
)
*
zi
;
flt_t
rho
[
3
][
INTEL_P3M_MAXORDER
];
flt_t
drho
[
3
][
INTEL_P3M_MAXORDER
];
for
(
int
k
=
nlower
;
k
<=
nupper
;
k
++
)
{
FFT_SCALAR
r1
,
r2
,
r3
,
dr1
,
dr2
,
dr3
;
dr1
=
dr2
=
dr3
=
ZEROF
;
r1
=
rho_coeff
[
order
-
1
][
k
];
r2
=
rho_coeff
[
order
-
1
][
k
];
r3
=
rho_coeff
[
order
-
1
][
k
];
for
(
int
l
=
order
-
2
;
l
>=
0
;
l
--
)
{
r1
=
rho_coeff
[
l
][
k
]
+
r1
*
dx
;
r2
=
rho_coeff
[
l
][
k
]
+
r2
*
dy
;
r3
=
rho_coeff
[
l
][
k
]
+
r3
*
dz
;
dr1
=
drho_coeff
[
l
][
k
]
+
dr1
*
dx
;
dr2
=
drho_coeff
[
l
][
k
]
+
dr2
*
dy
;
dr3
=
drho_coeff
[
l
][
k
]
+
dr3
*
dz
;
}
rho
[
0
][
k
-
nlower
]
=
r1
;
rho
[
1
][
k
-
nlower
]
=
r2
;
rho
[
2
][
k
-
nlower
]
=
r3
;
drho
[
0
][
k
-
nlower
]
=
dr1
;
drho
[
1
][
k
-
nlower
]
=
dr2
;
drho
[
2
][
k
-
nlower
]
=
dr3
;
}
FFT_SCALAR
ekx
,
eky
,
ekz
;
ekx
=
eky
=
ekz
=
ZEROF
;
for
(
int
n
=
nlower
;
n
<=
nupper
;
n
++
)
{
int
mz
=
n
+
nz
;
for
(
int
m
=
nlower
;
m
<=
nupper
;
m
++
)
{
int
my
=
m
+
ny
;
FFT_SCALAR
ekx_p
=
rho
[
1
][
m
-
nlower
]
*
rho
[
2
][
n
-
nlower
];
FFT_SCALAR
eky_p
=
drho
[
1
][
m
-
nlower
]
*
rho
[
2
][
n
-
nlower
];
FFT_SCALAR
ekz_p
=
rho
[
1
][
m
-
nlower
]
*
drho
[
2
][
n
-
nlower
];
for
(
int
l
=
nlower
;
l
<=
nupper
;
l
++
)
{
int
mx
=
l
+
nx
;
ekx
+=
drho
[
0
][
l
-
nlower
]
*
ekx_p
*
u_brick
[
mz
][
my
][
mx
];
eky
+=
rho
[
0
][
l
-
nlower
]
*
eky_p
*
u_brick
[
mz
][
my
][
mx
];
ekz
+=
rho
[
0
][
l
-
nlower
]
*
ekz_p
*
u_brick
[
mz
][
my
][
mx
];
}
}
}
ekx
*=
hx_inv
;
eky
*=
hy_inv
;
ekz
*=
hz_inv
;
// convert E-field to force
const
flt_t
qfactor
=
fqqrd2es
*
q
[
i
];
const
flt_t
twoqsq
=
(
flt_t
)
2.0
*
q
[
i
]
*
q
[
i
];
const
flt_t
s1
=
x
[
i
].
x
*
hx_inv
;
const
flt_t
s2
=
x
[
i
].
y
*
hy_inv
;
const
flt_t
s3
=
x
[
i
].
z
*
hz_inv
;
flt_t
sf
=
fsf_coeff0
*
sin
(
ftwo_pi
*
s1
);
sf
+=
fsf_coeff1
*
sin
(
ffour_pi
*
s1
);
sf
*=
twoqsq
;
f
[
i
].
x
+=
qfactor
*
ekx
-
fqqrd2es
*
sf
;
sf
=
fsf_coeff2
*
sin
(
ftwo_pi
*
s2
);
sf
+=
fsf_coeff3
*
sin
(
ffour_pi
*
s2
);
sf
*=
twoqsq
;
f
[
i
].
y
+=
qfactor
*
eky
-
fqqrd2es
*
sf
;
sf
=
fsf_coeff4
*
sin
(
ftwo_pi
*
s3
);
sf
+=
fsf_coeff5
*
sin
(
ffour_pi
*
s3
);
sf
*=
twoqsq
;
if
(
slabflag
!=
2
)
f
[
i
].
z
+=
qfactor
*
ekz
-
fqqrd2es
*
sf
;
}
}
/* ----------------------------------------------------------------------
Pack data into intel package buffers if using LRT mode
------------------------------------------------------------------------- */
void
PPPMIntel
::
pack_buffers
()
{
fix
->
start_watch
(
TIME_PACK
);
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
int
ifrom
,
ito
,
tid
;
IP_PRE_omp_range_id_align
(
ifrom
,
ito
,
tid
,
atom
->
nlocal
+
atom
->
nghost
,
comm
->
nthreads
,
sizeof
(
IntelBuffers
<
float
,
double
>::
atom_t
));
if
(
fix
->
precision
()
==
FixIntel
::
PREC_MODE_MIXED
)
fix
->
get_mixed_buffers
()
->
thr_pack
(
ifrom
,
ito
,
1
);
else
if
(
fix
->
precision
()
==
FixIntel
::
PREC_MODE_DOUBLE
)
fix
->
get_double_buffers
()
->
thr_pack
(
ifrom
,
ito
,
1
);
else
fix
->
get_single_buffers
()
->
thr_pack
(
ifrom
,
ito
,
1
);
}
fix
->
stop_watch
(
TIME_PACK
);
}
/* ----------------------------------------------------------------------
Returns 0 if Intel optimizations for PPPM ignored due to offload
------------------------------------------------------------------------- */
#ifdef _LMP_INTEL_OFFLOAD
int
PPPMIntel
::
use_base
()
{
return
_use_base
;
}
#endif
Event Timeline
Log In to Comment