Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F67641605
pppm_disp_tip4p_omp.cpp
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Sun, Jun 23, 13:31
Size
59 KB
Mime Type
text/x-c
Expires
Tue, Jun 25, 13:31 (2 d)
Engine
blob
Format
Raw Data
Handle
18444291
Attached To
rLAMMPS lammps
pppm_disp_tip4p_omp.cpp
View Options
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Axel Kohlmeyer (Temple U)
------------------------------------------------------------------------- */
#include "pppm_disp_tip4p_omp.h"
#include "atom.h"
#include "comm.h"
#include "domain.h"
#include "error.h"
#include "fix_omp.h"
#include "force.h"
#include "memory.h"
#include "math_const.h"
#include "math_special.h"
#include <string.h>
#include <math.h>
#include "suffix.h"
using
namespace
LAMMPS_NS
;
using
namespace
MathConst
;
using
namespace
MathSpecial
;
#ifdef FFT_SINGLE
#define ZEROF 0.0f
#else
#define ZEROF 0.0
#endif
#define OFFSET 16384
/* ---------------------------------------------------------------------- */
PPPMDispTIP4POMP
::
PPPMDispTIP4POMP
(
LAMMPS
*
lmp
,
int
narg
,
char
**
arg
)
:
PPPMDispTIP4P
(
lmp
,
narg
,
arg
),
ThrOMP
(
lmp
,
THR_KSPACE
)
{
triclinic_support
=
0
;
tip4pflag
=
1
;
suffix_flag
|=
Suffix
::
OMP
;
}
/* ---------------------------------------------------------------------- */
PPPMDispTIP4POMP
::~
PPPMDispTIP4POMP
()
{
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
#if defined(_OPENMP)
const
int
tid
=
omp_get_thread_num
();
#else
const
int
tid
=
0
;
#endif
if
(
function
[
0
])
{
ThrData
*
thr
=
fix
->
get_thr
(
tid
);
thr
->
init_pppm
(
-
order
,
memory
);
}
if
(
function
[
1
]
+
function
[
2
])
{
ThrData
*
thr
=
fix
->
get_thr
(
tid
);
thr
->
init_pppm_disp
(
-
order_6
,
memory
);
}
}
}
/* ----------------------------------------------------------------------
allocate memory that depends on # of K-vectors and order
------------------------------------------------------------------------- */
void
PPPMDispTIP4POMP
::
allocate
()
{
PPPMDispTIP4P
::
allocate
();
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
#if defined(_OPENMP)
const
int
tid
=
omp_get_thread_num
();
#else
const
int
tid
=
0
;
#endif
if
(
function
[
0
])
{
ThrData
*
thr
=
fix
->
get_thr
(
tid
);
thr
->
init_pppm
(
order
,
memory
);
}
if
(
function
[
1
]
+
function
[
2
])
{
ThrData
*
thr
=
fix
->
get_thr
(
tid
);
thr
->
init_pppm_disp
(
order_6
,
memory
);
}
}
}
/* ----------------------------------------------------------------------
Compute the modified (hockney-eastwood) coulomb green function
------------------------------------------------------------------------- */
void
PPPMDispTIP4POMP
::
compute_gf
()
{
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
double
*
prd
;
if
(
triclinic
==
0
)
prd
=
domain
->
prd
;
else
prd
=
domain
->
prd_lamda
;
double
xprd
=
prd
[
0
];
double
yprd
=
prd
[
1
];
double
zprd
=
prd
[
2
];
double
zprd_slab
=
zprd
*
slab_volfactor
;
double
unitkx
=
(
2.0
*
MY_PI
/
xprd
);
double
unitky
=
(
2.0
*
MY_PI
/
yprd
);
double
unitkz
=
(
2.0
*
MY_PI
/
zprd_slab
);
int
tid
,
nn
,
nnfrom
,
nnto
,
k
,
l
,
m
;
int
kper
,
lper
,
mper
;
double
snx
,
sny
,
snz
,
snx2
,
sny2
,
snz2
;
double
sqk
;
double
argx
,
argy
,
argz
,
wx
,
wy
,
wz
,
sx
,
sy
,
sz
,
qx
,
qy
,
qz
;
double
numerator
,
denominator
;
const
int
nnx
=
nxhi_fft
-
nxlo_fft
+
1
;
const
int
nny
=
nyhi_fft
-
nylo_fft
+
1
;
loop_setup_thr
(
nnfrom
,
nnto
,
tid
,
nfft
,
comm
->
nthreads
);
for
(
m
=
nzlo_fft
;
m
<=
nzhi_fft
;
m
++
)
{
mper
=
m
-
nz_pppm
*
(
2
*
m
/
nz_pppm
);
qz
=
unitkz
*
mper
;
snz
=
sin
(
0.5
*
qz
*
zprd_slab
/
nz_pppm
);
snz2
=
snz
*
snz
;
sz
=
exp
(
-
0.25
*
pow
(
qz
/
g_ewald
,
2.0
));
wz
=
1.0
;
argz
=
0.5
*
qz
*
zprd_slab
/
nz_pppm
;
if
(
argz
!=
0.0
)
wz
=
pow
(
sin
(
argz
)
/
argz
,
order
);
wz
*=
wz
;
for
(
l
=
nylo_fft
;
l
<=
nyhi_fft
;
l
++
)
{
lper
=
l
-
ny_pppm
*
(
2
*
l
/
ny_pppm
);
qy
=
unitky
*
lper
;
sny
=
sin
(
0.5
*
qy
*
yprd
/
ny_pppm
);
sny2
=
sny
*
sny
;
sy
=
exp
(
-
0.25
*
pow
(
qy
/
g_ewald
,
2.0
));
wy
=
1.0
;
argy
=
0.5
*
qy
*
yprd
/
ny_pppm
;
if
(
argy
!=
0.0
)
wy
=
pow
(
sin
(
argy
)
/
argy
,
order
);
wy
*=
wy
;
for
(
k
=
nxlo_fft
;
k
<=
nxhi_fft
;
k
++
)
{
/* only compute the part designated to this thread */
nn
=
k
-
nxlo_fft
+
nnx
*
(
l
-
nylo_fft
+
nny
*
(
m
-
nzlo_fft
));
if
((
nn
<
nnfrom
)
||
(
nn
>=
nnto
))
continue
;
kper
=
k
-
nx_pppm
*
(
2
*
k
/
nx_pppm
);
qx
=
unitkx
*
kper
;
snx
=
sin
(
0.5
*
qx
*
xprd
/
nx_pppm
);
snx2
=
snx
*
snx
;
sx
=
exp
(
-
0.25
*
pow
(
qx
/
g_ewald
,
2.0
));
wx
=
1.0
;
argx
=
0.5
*
qx
*
xprd
/
nx_pppm
;
if
(
argx
!=
0.0
)
wx
=
pow
(
sin
(
argx
)
/
argx
,
order
);
wx
*=
wx
;
sqk
=
pow
(
qx
,
2.0
)
+
pow
(
qy
,
2.0
)
+
pow
(
qz
,
2.0
);
if
(
sqk
!=
0.0
)
{
numerator
=
4.0
*
MY_PI
/
sqk
;
denominator
=
gf_denom
(
snx2
,
sny2
,
snz2
,
gf_b
,
order
);
greensfn
[
nn
]
=
numerator
*
sx
*
sy
*
sz
*
wx
*
wy
*
wz
/
denominator
;
}
else
greensfn
[
nn
]
=
0.0
;
}
}
}
}
}
/* ----------------------------------------------------------------------
Compyute the modified (hockney-eastwood) dispersion green function
------------------------------------------------------------------------- */
void
PPPMDispTIP4POMP
::
compute_gf_6
()
{
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
double
*
prd
;
int
k
,
l
,
m
,
nn
;
// volume-dependent factors
// adjust z dimension for 2d slab PPPM
// z dimension for 3d PPPM is zprd since slab_volfactor = 1.0
if
(
triclinic
==
0
)
prd
=
domain
->
prd
;
else
prd
=
domain
->
prd_lamda
;
double
xprd
=
prd
[
0
];
double
yprd
=
prd
[
1
];
double
zprd
=
prd
[
2
];
double
zprd_slab
=
zprd
*
slab_volfactor
;
double
unitkx
=
(
2.0
*
MY_PI
/
xprd
);
double
unitky
=
(
2.0
*
MY_PI
/
yprd
);
double
unitkz
=
(
2.0
*
MY_PI
/
zprd_slab
);
int
kper
,
lper
,
mper
;
double
sqk
;
double
snx
,
sny
,
snz
,
snx2
,
sny2
,
snz2
;
double
argx
,
argy
,
argz
,
wx
,
wy
,
wz
,
sx
,
sy
,
sz
;
double
qx
,
qy
,
qz
;
double
rtsqk
,
term
;
double
numerator
,
denominator
;
double
inv2ew
=
2
*
g_ewald_6
;
inv2ew
=
1
/
inv2ew
;
double
rtpi
=
sqrt
(
MY_PI
);
int
nnfrom
,
nnto
,
tid
;
numerator
=
-
MY_PI
*
rtpi
*
g_ewald_6
*
g_ewald_6
*
g_ewald_6
/
(
3.0
);
const
int
nnx
=
nxhi_fft_6
-
nxlo_fft_6
+
1
;
const
int
nny
=
nyhi_fft_6
-
nylo_fft_6
+
1
;
loop_setup_thr
(
nnfrom
,
nnto
,
tid
,
nfft_6
,
comm
->
nthreads
);
for
(
m
=
nzlo_fft_6
;
m
<=
nzhi_fft_6
;
m
++
)
{
mper
=
m
-
nz_pppm_6
*
(
2
*
m
/
nz_pppm_6
);
qz
=
unitkz
*
mper
;
snz
=
sin
(
0.5
*
unitkz
*
mper
*
zprd_slab
/
nz_pppm_6
);
snz2
=
snz
*
snz
;
sz
=
exp
(
-
qz
*
qz
*
inv2ew
*
inv2ew
);
wz
=
1.0
;
argz
=
0.5
*
qz
*
zprd_slab
/
nz_pppm_6
;
if
(
argz
!=
0.0
)
wz
=
pow
(
sin
(
argz
)
/
argz
,
order_6
);
wz
*=
wz
;
for
(
l
=
nylo_fft_6
;
l
<=
nyhi_fft_6
;
l
++
)
{
lper
=
l
-
ny_pppm_6
*
(
2
*
l
/
ny_pppm_6
);
qy
=
unitky
*
lper
;
sny
=
sin
(
0.5
*
unitky
*
lper
*
yprd
/
ny_pppm_6
);
sny2
=
sny
*
sny
;
sy
=
exp
(
-
qy
*
qy
*
inv2ew
*
inv2ew
);
wy
=
1.0
;
argy
=
0.5
*
qy
*
yprd
/
ny_pppm_6
;
if
(
argy
!=
0.0
)
wy
=
pow
(
sin
(
argy
)
/
argy
,
order_6
);
wy
*=
wy
;
for
(
k
=
nxlo_fft_6
;
k
<=
nxhi_fft_6
;
k
++
)
{
/* only compute the part designated to this thread */
nn
=
k
-
nxlo_fft_6
+
nnx
*
(
l
-
nylo_fft_6
+
nny
*
(
m
-
nzlo_fft_6
));
if
((
nn
<
nnfrom
)
||
(
nn
>=
nnto
))
continue
;
kper
=
k
-
nx_pppm_6
*
(
2
*
k
/
nx_pppm_6
);
qx
=
unitkx
*
kper
;
snx
=
sin
(
0.5
*
unitkx
*
kper
*
xprd
/
nx_pppm_6
);
snx2
=
snx
*
snx
;
sx
=
exp
(
-
qx
*
qx
*
inv2ew
*
inv2ew
);
wx
=
1.0
;
argx
=
0.5
*
qx
*
xprd
/
nx_pppm_6
;
if
(
argx
!=
0.0
)
wx
=
pow
(
sin
(
argx
)
/
argx
,
order_6
);
wx
*=
wx
;
sqk
=
pow
(
qx
,
2.0
)
+
pow
(
qy
,
2.0
)
+
pow
(
qz
,
2.0
);
denominator
=
gf_denom
(
snx2
,
sny2
,
snz2
,
gf_b_6
,
order_6
);
rtsqk
=
sqrt
(
sqk
);
term
=
(
1
-
2
*
sqk
*
inv2ew
*
inv2ew
)
*
sx
*
sy
*
sz
+
2
*
sqk
*
rtsqk
*
inv2ew
*
inv2ew
*
inv2ew
*
rtpi
*
erfc
(
rtsqk
*
inv2ew
);
greensfn_6
[
nn
]
=
numerator
*
term
*
wx
*
wy
*
wz
/
denominator
;
}
}
}
}
}
/* ----------------------------------------------------------------------
run the regular toplevel compute method from plain PPPM
which will have individual methods replaced by our threaded
versions and then call the obligatory force reduction.
------------------------------------------------------------------------- */
void
PPPMDispTIP4POMP
::
compute
(
int
eflag
,
int
vflag
)
{
PPPMDispTIP4P
::
compute
(
eflag
,
vflag
);
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(eflag,vflag)
#endif
{
#if defined(_OPENMP)
const
int
tid
=
omp_get_thread_num
();
#else
const
int
tid
=
0
;
#endif
ThrData
*
thr
=
fix
->
get_thr
(
tid
);
reduce_thr
(
this
,
eflag
,
vflag
,
thr
);
}
// end of omp parallel region
}
/* ----------------------------------------------------------------------
find center grid pt for each of my particles
check that full stencil for the particle will fit in my 3d brick
store central grid pt indices in part2grid array
------------------------------------------------------------------------- */
void
PPPMDispTIP4POMP
::
particle_map_c
(
double
dxinv
,
double
dyinv
,
double
dzinv
,
double
sft
,
int
**
part2grid
,
int
nup
,
int
nlw
,
int
nxlo_o
,
int
nylo_o
,
int
nzlo_o
,
int
nxhi_o
,
int
nyhi_o
,
int
nzhi_o
)
{
// no local atoms => nothing to do
if
(
atom
->
nlocal
==
0
)
return
;
const
int
*
_noalias
const
type
=
atom
->
type
;
const
dbl3_t
*
_noalias
const
x
=
(
dbl3_t
*
)
atom
->
x
[
0
];
int3_t
*
_noalias
const
p2g
=
(
int3_t
*
)
part2grid
[
0
];
const
double
boxlox
=
boxlo
[
0
];
const
double
boxloy
=
boxlo
[
1
];
const
double
boxloz
=
boxlo
[
2
];
const
int
nlocal
=
atom
->
nlocal
;
const
double
delxinv
=
dxinv
;
const
double
delyinv
=
dyinv
;
const
double
delzinv
=
dzinv
;
const
double
shift
=
sft
;
const
int
nupper
=
nup
;
const
int
nlower
=
nlw
;
const
int
nxlo_out
=
nxlo_o
;
const
int
nylo_out
=
nylo_o
;
const
int
nzlo_out
=
nzlo_o
;
const
int
nxhi_out
=
nxhi_o
;
const
int
nyhi_out
=
nyhi_o
;
const
int
nzhi_out
=
nzhi_o
;
if
(
!
ISFINITE
(
boxlo
[
0
])
||
!
ISFINITE
(
boxlo
[
1
])
||
!
ISFINITE
(
boxlo
[
2
]))
error
->
one
(
FLERR
,
"Non-numeric box dimensions - simulation unstable"
);
int
i
,
flag
=
0
;
#if defined(_OPENMP)
#pragma omp parallel for private(i) default(none) reduction(+:flag) schedule(static)
#endif
for
(
i
=
0
;
i
<
nlocal
;
i
++
)
{
dbl3_t
xM
;
int
iH1
,
iH2
;
if
(
type
[
i
]
==
typeO
)
{
find_M_thr
(
i
,
iH1
,
iH2
,
xM
);
}
else
{
xM
=
x
[
i
];
}
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// current particle coord can be outside global and local box
// add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
const
int
nx
=
static_cast
<
int
>
((
xM
.
x
-
boxlox
)
*
delxinv
+
shift
)
-
OFFSET
;
const
int
ny
=
static_cast
<
int
>
((
xM
.
y
-
boxloy
)
*
delyinv
+
shift
)
-
OFFSET
;
const
int
nz
=
static_cast
<
int
>
((
xM
.
z
-
boxloz
)
*
delzinv
+
shift
)
-
OFFSET
;
p2g
[
i
].
a
=
nx
;
p2g
[
i
].
b
=
ny
;
p2g
[
i
].
t
=
nz
;
// check that entire stencil around nx,ny,nz will fit in my 3d brick
if
(
nx
+
nlower
<
nxlo_out
||
nx
+
nupper
>
nxhi_out
||
ny
+
nlower
<
nylo_out
||
ny
+
nupper
>
nyhi_out
||
nz
+
nlower
<
nzlo_out
||
nz
+
nupper
>
nzhi_out
)
flag
++
;
}
int
flag_all
;
MPI_Allreduce
(
&
flag
,
&
flag_all
,
1
,
MPI_INT
,
MPI_SUM
,
world
);
if
(
flag_all
)
error
->
all
(
FLERR
,
"Out of range atoms - cannot compute PPPM"
);
}
/* ----------------------------------------------------------------------
find center grid pt for each of my particles
check that full stencil for the particle will fit in my 3d brick
store central grid pt indices in part2grid array
------------------------------------------------------------------------- */
void
PPPMDispTIP4POMP
::
particle_map
(
double
dxinv
,
double
dyinv
,
double
dzinv
,
double
sft
,
int
**
part2grid
,
int
nup
,
int
nlw
,
int
nxlo_o
,
int
nylo_o
,
int
nzlo_o
,
int
nxhi_o
,
int
nyhi_o
,
int
nzhi_o
)
{
// no local atoms => nothing to do
if
(
atom
->
nlocal
==
0
)
return
;
const
dbl3_t
*
_noalias
const
x
=
(
dbl3_t
*
)
atom
->
x
[
0
];
int3_t
*
_noalias
const
p2g
=
(
int3_t
*
)
part2grid
[
0
];
const
double
boxlox
=
boxlo
[
0
];
const
double
boxloy
=
boxlo
[
1
];
const
double
boxloz
=
boxlo
[
2
];
const
int
nlocal
=
atom
->
nlocal
;
const
double
delxinv
=
dxinv
;
const
double
delyinv
=
dyinv
;
const
double
delzinv
=
dzinv
;
const
double
shift
=
sft
;
const
int
nupper
=
nup
;
const
int
nlower
=
nlw
;
const
int
nxlo_out
=
nxlo_o
;
const
int
nylo_out
=
nylo_o
;
const
int
nzlo_out
=
nzlo_o
;
const
int
nxhi_out
=
nxhi_o
;
const
int
nyhi_out
=
nyhi_o
;
const
int
nzhi_out
=
nzhi_o
;
int
i
,
flag
=
0
;
#if defined(_OPENMP)
#pragma omp parallel for private(i) default(none) reduction(+:flag) schedule(static)
#endif
for
(
i
=
0
;
i
<
nlocal
;
i
++
)
{
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// current particle coord can be outside global and local box
// add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
const
int
nx
=
static_cast
<
int
>
((
x
[
i
].
x
-
boxlox
)
*
delxinv
+
shift
)
-
OFFSET
;
const
int
ny
=
static_cast
<
int
>
((
x
[
i
].
y
-
boxloy
)
*
delyinv
+
shift
)
-
OFFSET
;
const
int
nz
=
static_cast
<
int
>
((
x
[
i
].
z
-
boxloz
)
*
delzinv
+
shift
)
-
OFFSET
;
p2g
[
i
].
a
=
nx
;
p2g
[
i
].
b
=
ny
;
p2g
[
i
].
t
=
nz
;
// check that entire stencil around nx,ny,nz will fit in my 3d brick
if
(
nx
+
nlower
<
nxlo_out
||
nx
+
nupper
>
nxhi_out
||
ny
+
nlower
<
nylo_out
||
ny
+
nupper
>
nyhi_out
||
nz
+
nlower
<
nzlo_out
||
nz
+
nupper
>
nzhi_out
)
flag
++
;
}
int
flag_all
;
MPI_Allreduce
(
&
flag
,
&
flag_all
,
1
,
MPI_INT
,
MPI_SUM
,
world
);
if
(
flag_all
)
error
->
all
(
FLERR
,
"Out of range atoms - cannot compute PPPM"
);
}
/* ----------------------------------------------------------------------
create discretized "density" on section of global grid due to my particles
density(x,y,z) = charge "density" at grid points of my 3d brick
(nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
in global grid
------------------------------------------------------------------------- */
void
PPPMDispTIP4POMP
::
make_rho_c
()
{
// clear 3d density array
FFT_SCALAR
*
_noalias
const
d
=
&
(
density_brick
[
nzlo_out
][
nylo_out
][
nxlo_out
]);
memset
(
d
,
0
,
ngrid
*
sizeof
(
FFT_SCALAR
));
// no local atoms => nothing else to do
const
int
nlocal
=
atom
->
nlocal
;
if
(
nlocal
==
0
)
return
;
const
int
ix
=
nxhi_out
-
nxlo_out
+
1
;
const
int
iy
=
nyhi_out
-
nylo_out
+
1
;
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
const
double
*
_noalias
const
q
=
atom
->
q
;
const
dbl3_t
*
_noalias
const
x
=
(
dbl3_t
*
)
atom
->
x
[
0
];
const
int3_t
*
_noalias
const
p2g
=
(
int3_t
*
)
part2grid
[
0
];
const
int
*
_noalias
const
type
=
atom
->
type
;
dbl3_t
xM
;
const
double
boxlox
=
boxlo
[
0
];
const
double
boxloy
=
boxlo
[
1
];
const
double
boxloz
=
boxlo
[
2
];
// determine range of grid points handled by this thread
int
i
,
jfrom
,
jto
,
tid
,
iH1
,
iH2
;
loop_setup_thr
(
jfrom
,
jto
,
tid
,
ngrid
,
comm
->
nthreads
);
// get per thread data
ThrData
*
thr
=
fix
->
get_thr
(
tid
);
FFT_SCALAR
*
const
*
const
r1d
=
static_cast
<
FFT_SCALAR
**>
(
thr
->
get_rho1d
());
// loop over my charges, add their contribution to nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// loop over all local atoms for all threads
for
(
i
=
0
;
i
<
nlocal
;
i
++
)
{
const
int
nx
=
p2g
[
i
].
a
;
const
int
ny
=
p2g
[
i
].
b
;
const
int
nz
=
p2g
[
i
].
t
;
// pre-screen whether this atom will ever come within
// reach of the data segement this thread is updating.
if
(
((
nz
+
nlower
-
nzlo_out
)
*
ix
*
iy
>=
jto
)
||
((
nz
+
nupper
-
nzlo_out
+
1
)
*
ix
*
iy
<
jfrom
)
)
continue
;
if
(
type
[
i
]
==
typeO
)
{
find_M_thr
(
i
,
iH1
,
iH2
,
xM
);
}
else
{
xM
=
x
[
i
];
}
const
FFT_SCALAR
dx
=
nx
+
shiftone
-
(
xM
.
x
-
boxlox
)
*
delxinv
;
const
FFT_SCALAR
dy
=
ny
+
shiftone
-
(
xM
.
y
-
boxloy
)
*
delyinv
;
const
FFT_SCALAR
dz
=
nz
+
shiftone
-
(
xM
.
z
-
boxloz
)
*
delzinv
;
compute_rho1d_thr
(
r1d
,
dx
,
dy
,
dz
,
order
,
rho_coeff
);
const
FFT_SCALAR
z0
=
delvolinv
*
q
[
i
];
for
(
int
n
=
nlower
;
n
<=
nupper
;
++
n
)
{
const
int
jn
=
(
nz
+
n
-
nzlo_out
)
*
ix
*
iy
;
const
FFT_SCALAR
y0
=
z0
*
r1d
[
2
][
n
];
for
(
int
m
=
nlower
;
m
<=
nupper
;
++
m
)
{
const
int
jm
=
jn
+
(
ny
+
m
-
nylo_out
)
*
ix
;
const
FFT_SCALAR
x0
=
y0
*
r1d
[
1
][
m
];
for
(
int
l
=
nlower
;
l
<=
nupper
;
++
l
)
{
const
int
jl
=
jm
+
nx
+
l
-
nxlo_out
;
// make sure each thread only updates
// "his" elements of the density grid
if
(
jl
>=
jto
)
break
;
if
(
jl
<
jfrom
)
continue
;
d
[
jl
]
+=
x0
*
r1d
[
0
][
l
];
}
}
}
}
}
}
/* ----------------------------------------------------------------------
same as above for dispersion interaction with geometric mixing rule
------------------------------------------------------------------------- */
void
PPPMDispTIP4POMP
::
make_rho_g
()
{
// clear 3d density array
FFT_SCALAR
*
_noalias
const
d
=
&
(
density_brick_g
[
nzlo_out_6
][
nylo_out_6
][
nxlo_out_6
]);
memset
(
d
,
0
,
ngrid_6
*
sizeof
(
FFT_SCALAR
));
// no local atoms => nothing else to do
const
int
nlocal
=
atom
->
nlocal
;
if
(
nlocal
==
0
)
return
;
const
int
ix
=
nxhi_out_6
-
nxlo_out_6
+
1
;
const
int
iy
=
nyhi_out_6
-
nylo_out_6
+
1
;
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
const
dbl3_t
*
_noalias
const
x
=
(
dbl3_t
*
)
atom
->
x
[
0
];
const
int3_t
*
_noalias
const
p2g
=
(
int3_t
*
)
part2grid_6
[
0
];
const
double
boxlox
=
boxlo
[
0
];
const
double
boxloy
=
boxlo
[
1
];
const
double
boxloz
=
boxlo
[
2
];
// determine range of grid points handled by this thread
int
i
,
jfrom
,
jto
,
tid
;
loop_setup_thr
(
jfrom
,
jto
,
tid
,
ngrid_6
,
comm
->
nthreads
);
// get per thread data
ThrData
*
thr
=
fix
->
get_thr
(
tid
);
FFT_SCALAR
*
const
*
const
r1d
=
static_cast
<
FFT_SCALAR
**>
(
thr
->
get_rho1d_6
());
// loop over my charges, add their contribution to nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// loop over all local atoms for all threads
for
(
i
=
0
;
i
<
nlocal
;
i
++
)
{
const
int
nx
=
p2g
[
i
].
a
;
const
int
ny
=
p2g
[
i
].
b
;
const
int
nz
=
p2g
[
i
].
t
;
// pre-screen whether this atom will ever come within
// reach of the data segement this thread is updating.
if
(
((
nz
+
nlower_6
-
nzlo_out_6
)
*
ix
*
iy
>=
jto
)
||
((
nz
+
nupper_6
-
nzlo_out_6
+
1
)
*
ix
*
iy
<
jfrom
)
)
continue
;
const
FFT_SCALAR
dx
=
nx
+
shiftone_6
-
(
x
[
i
].
x
-
boxlox
)
*
delxinv_6
;
const
FFT_SCALAR
dy
=
ny
+
shiftone_6
-
(
x
[
i
].
y
-
boxloy
)
*
delyinv_6
;
const
FFT_SCALAR
dz
=
nz
+
shiftone_6
-
(
x
[
i
].
z
-
boxloz
)
*
delzinv_6
;
compute_rho1d_thr
(
r1d
,
dx
,
dy
,
dz
,
order_6
,
rho_coeff_6
);
const
int
type
=
atom
->
type
[
i
];
const
double
lj
=
B
[
type
];
const
FFT_SCALAR
z0
=
delvolinv_6
*
lj
;
for
(
int
n
=
nlower_6
;
n
<=
nupper_6
;
++
n
)
{
const
int
jn
=
(
nz
+
n
-
nzlo_out_6
)
*
ix
*
iy
;
const
FFT_SCALAR
y0
=
z0
*
r1d
[
2
][
n
];
for
(
int
m
=
nlower_6
;
m
<=
nupper_6
;
++
m
)
{
const
int
jm
=
jn
+
(
ny
+
m
-
nylo_out_6
)
*
ix
;
const
FFT_SCALAR
x0
=
y0
*
r1d
[
1
][
m
];
for
(
int
l
=
nlower_6
;
l
<=
nupper_6
;
++
l
)
{
const
int
jl
=
jm
+
nx
+
l
-
nxlo_out_6
;
// make sure each thread only updates
// "his" elements of the density grid
if
(
jl
>=
jto
)
break
;
if
(
jl
<
jfrom
)
continue
;
d
[
jl
]
+=
x0
*
r1d
[
0
][
l
];
}
}
}
}
}
}
/* ----------------------------------------------------------------------
same as above for dispersion interaction with arithmetic mixing rule
------------------------------------------------------------------------- */
void
PPPMDispTIP4POMP
::
make_rho_a
()
{
// clear 3d density array
FFT_SCALAR
*
_noalias
const
d0
=
&
(
density_brick_a0
[
nzlo_out_6
][
nylo_out_6
][
nxlo_out_6
]);
FFT_SCALAR
*
_noalias
const
d1
=
&
(
density_brick_a1
[
nzlo_out_6
][
nylo_out_6
][
nxlo_out_6
]);
FFT_SCALAR
*
_noalias
const
d2
=
&
(
density_brick_a2
[
nzlo_out_6
][
nylo_out_6
][
nxlo_out_6
]);
FFT_SCALAR
*
_noalias
const
d3
=
&
(
density_brick_a3
[
nzlo_out_6
][
nylo_out_6
][
nxlo_out_6
]);
FFT_SCALAR
*
_noalias
const
d4
=
&
(
density_brick_a4
[
nzlo_out_6
][
nylo_out_6
][
nxlo_out_6
]);
FFT_SCALAR
*
_noalias
const
d5
=
&
(
density_brick_a5
[
nzlo_out_6
][
nylo_out_6
][
nxlo_out_6
]);
FFT_SCALAR
*
_noalias
const
d6
=
&
(
density_brick_a6
[
nzlo_out_6
][
nylo_out_6
][
nxlo_out_6
]);
memset
(
d0
,
0
,
ngrid_6
*
sizeof
(
FFT_SCALAR
));
memset
(
d1
,
0
,
ngrid_6
*
sizeof
(
FFT_SCALAR
));
memset
(
d2
,
0
,
ngrid_6
*
sizeof
(
FFT_SCALAR
));
memset
(
d3
,
0
,
ngrid_6
*
sizeof
(
FFT_SCALAR
));
memset
(
d4
,
0
,
ngrid_6
*
sizeof
(
FFT_SCALAR
));
memset
(
d5
,
0
,
ngrid_6
*
sizeof
(
FFT_SCALAR
));
memset
(
d6
,
0
,
ngrid_6
*
sizeof
(
FFT_SCALAR
));
// no local atoms => nothing else to do
const
int
nlocal
=
atom
->
nlocal
;
if
(
nlocal
==
0
)
return
;
const
int
ix
=
nxhi_out_6
-
nxlo_out_6
+
1
;
const
int
iy
=
nyhi_out_6
-
nylo_out_6
+
1
;
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
const
dbl3_t
*
_noalias
const
x
=
(
dbl3_t
*
)
atom
->
x
[
0
];
const
int3_t
*
_noalias
const
p2g
=
(
int3_t
*
)
part2grid_6
[
0
];
const
double
boxlox
=
boxlo
[
0
];
const
double
boxloy
=
boxlo
[
1
];
const
double
boxloz
=
boxlo
[
2
];
// determine range of grid points handled by this thread
int
i
,
jfrom
,
jto
,
tid
;
loop_setup_thr
(
jfrom
,
jto
,
tid
,
ngrid_6
,
comm
->
nthreads
);
// get per thread data
ThrData
*
thr
=
fix
->
get_thr
(
tid
);
FFT_SCALAR
*
const
*
const
r1d
=
static_cast
<
FFT_SCALAR
**>
(
thr
->
get_rho1d_6
());
// loop over my charges, add their contribution to nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// loop over all local atoms for all threads
for
(
i
=
0
;
i
<
nlocal
;
i
++
)
{
const
int
nx
=
p2g
[
i
].
a
;
const
int
ny
=
p2g
[
i
].
b
;
const
int
nz
=
p2g
[
i
].
t
;
// pre-screen whether this atom will ever come within
// reach of the data segement this thread is updating.
if
(
((
nz
+
nlower_6
-
nzlo_out_6
)
*
ix
*
iy
>=
jto
)
||
((
nz
+
nupper_6
-
nzlo_out_6
+
1
)
*
ix
*
iy
<
jfrom
)
)
continue
;
const
FFT_SCALAR
dx
=
nx
+
shiftone_6
-
(
x
[
i
].
x
-
boxlox
)
*
delxinv_6
;
const
FFT_SCALAR
dy
=
ny
+
shiftone_6
-
(
x
[
i
].
y
-
boxloy
)
*
delyinv_6
;
const
FFT_SCALAR
dz
=
nz
+
shiftone_6
-
(
x
[
i
].
z
-
boxloz
)
*
delzinv_6
;
compute_rho1d_thr
(
r1d
,
dx
,
dy
,
dz
,
order_6
,
rho_coeff_6
);
const
int
type
=
atom
->
type
[
i
];
const
double
lj0
=
B
[
7
*
type
];
const
double
lj1
=
B
[
7
*
type
+
1
];
const
double
lj2
=
B
[
7
*
type
+
2
];
const
double
lj3
=
B
[
7
*
type
+
3
];
const
double
lj4
=
B
[
7
*
type
+
4
];
const
double
lj5
=
B
[
7
*
type
+
5
];
const
double
lj6
=
B
[
7
*
type
+
6
];
const
FFT_SCALAR
z0
=
delvolinv_6
;
for
(
int
n
=
nlower_6
;
n
<=
nupper_6
;
++
n
)
{
const
int
jn
=
(
nz
+
n
-
nzlo_out_6
)
*
ix
*
iy
;
const
FFT_SCALAR
y0
=
z0
*
r1d
[
2
][
n
];
for
(
int
m
=
nlower_6
;
m
<=
nupper_6
;
++
m
)
{
const
int
jm
=
jn
+
(
ny
+
m
-
nylo_out_6
)
*
ix
;
const
FFT_SCALAR
x0
=
y0
*
r1d
[
1
][
m
];
for
(
int
l
=
nlower_6
;
l
<=
nupper_6
;
++
l
)
{
const
int
jl
=
jm
+
nx
+
l
-
nxlo_out_6
;
// make sure each thread only updates
// "his" elements of the density grid
if
(
jl
>=
jto
)
break
;
if
(
jl
<
jfrom
)
continue
;
const
double
w
=
x0
*
r1d
[
0
][
l
];
d0
[
jl
]
+=
w
*
lj0
;
d1
[
jl
]
+=
w
*
lj1
;
d2
[
jl
]
+=
w
*
lj2
;
d3
[
jl
]
+=
w
*
lj3
;
d4
[
jl
]
+=
w
*
lj4
;
d5
[
jl
]
+=
w
*
lj5
;
d6
[
jl
]
+=
w
*
lj6
;
}
}
}
}
}
}
/* ----------------------------------------------------------------------
interpolate from grid to get electric field & force on my particles for ik
------------------------------------------------------------------------- */
void
PPPMDispTIP4POMP
::
fieldforce_c_ik
()
{
const
int
nlocal
=
atom
->
nlocal
;
// no local atoms => nothing to do
if
(
nlocal
==
0
)
return
;
// loop over my charges, interpolate electric field from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
// ek = 3 components of E-field on particle
const
dbl3_t
*
_noalias
const
x
=
(
dbl3_t
*
)
atom
->
x
[
0
];
const
double
*
_noalias
const
q
=
atom
->
q
;
const
int3_t
*
_noalias
const
p2g
=
(
int3_t
*
)
part2grid
[
0
];
const
int
*
_noalias
const
type
=
atom
->
type
;
const
double
qqrd2e
=
force
->
qqrd2e
;
const
double
boxlox
=
boxlo
[
0
];
const
double
boxloy
=
boxlo
[
1
];
const
double
boxloz
=
boxlo
[
2
];
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
dbl3_t
xM
;
FFT_SCALAR
x0
,
y0
,
z0
,
ekx
,
eky
,
ekz
;
int
i
,
ifrom
,
ito
,
tid
,
iH1
,
iH2
,
l
,
m
,
n
,
mx
,
my
,
mz
;
loop_setup_thr
(
ifrom
,
ito
,
tid
,
nlocal
,
comm
->
nthreads
);
// get per thread data
ThrData
*
thr
=
fix
->
get_thr
(
tid
);
dbl3_t
*
_noalias
const
f
=
(
dbl3_t
*
)
thr
->
get_f
()[
0
];
FFT_SCALAR
*
const
*
const
r1d
=
static_cast
<
FFT_SCALAR
**>
(
thr
->
get_rho1d
());
for
(
i
=
ifrom
;
i
<
ito
;
++
i
)
{
if
(
type
[
i
]
==
typeO
)
{
find_M_thr
(
i
,
iH1
,
iH2
,
xM
);
}
else
xM
=
x
[
i
];
const
int
nx
=
p2g
[
i
].
a
;
const
int
ny
=
p2g
[
i
].
b
;
const
int
nz
=
p2g
[
i
].
t
;
const
FFT_SCALAR
dx
=
nx
+
shiftone
-
(
xM
.
x
-
boxlox
)
*
delxinv
;
const
FFT_SCALAR
dy
=
ny
+
shiftone
-
(
xM
.
y
-
boxloy
)
*
delyinv
;
const
FFT_SCALAR
dz
=
nz
+
shiftone
-
(
xM
.
z
-
boxloz
)
*
delzinv
;
compute_rho1d_thr
(
r1d
,
dx
,
dy
,
dz
,
order
,
rho_coeff
);
ekx
=
eky
=
ekz
=
ZEROF
;
for
(
n
=
nlower
;
n
<=
nupper
;
n
++
)
{
mz
=
n
+
nz
;
z0
=
r1d
[
2
][
n
];
for
(
m
=
nlower
;
m
<=
nupper
;
m
++
)
{
my
=
m
+
ny
;
y0
=
z0
*
r1d
[
1
][
m
];
for
(
l
=
nlower
;
l
<=
nupper
;
l
++
)
{
mx
=
l
+
nx
;
x0
=
y0
*
r1d
[
0
][
l
];
ekx
-=
x0
*
vdx_brick
[
mz
][
my
][
mx
];
eky
-=
x0
*
vdy_brick
[
mz
][
my
][
mx
];
ekz
-=
x0
*
vdz_brick
[
mz
][
my
][
mx
];
}
}
}
// convert E-field to force
const
double
qfactor
=
qqrd2e
*
scale
*
q
[
i
];
if
(
type
[
i
]
!=
typeO
)
{
f
[
i
].
x
+=
qfactor
*
ekx
;
f
[
i
].
y
+=
qfactor
*
eky
;
if
(
slabflag
!=
2
)
f
[
i
].
z
+=
qfactor
*
ekz
;
}
else
{
const
double
fx
=
qfactor
*
ekx
;
const
double
fy
=
qfactor
*
eky
;
const
double
fz
=
qfactor
*
ekz
;
f
[
i
].
x
+=
fx
*
(
1
-
alpha
);
f
[
i
].
y
+=
fy
*
(
1
-
alpha
);
if
(
slabflag
!=
2
)
f
[
i
].
z
+=
fz
*
(
1
-
alpha
);
f
[
iH1
].
x
+=
0.5
*
alpha
*
fx
;
f
[
iH1
].
y
+=
0.5
*
alpha
*
fy
;
if
(
slabflag
!=
2
)
f
[
iH1
].
z
+=
0.5
*
alpha
*
fz
;
f
[
iH2
].
x
+=
0.5
*
alpha
*
fx
;
f
[
iH2
].
y
+=
0.5
*
alpha
*
fy
;
if
(
slabflag
!=
2
)
f
[
iH2
].
z
+=
0.5
*
alpha
*
fz
;
}
}
}
// end of parallel region
}
/* ----------------------------------------------------------------------
interpolate from grid to get electric field & force on my particles for ad
------------------------------------------------------------------------- */
void
PPPMDispTIP4POMP
::
fieldforce_c_ad
()
{
const
int
nlocal
=
atom
->
nlocal
;
// no local atoms => nothing to do
if
(
nlocal
==
0
)
return
;
const
double
*
prd
=
(
triclinic
==
0
)
?
domain
->
prd
:
domain
->
prd_lamda
;
const
double
hx_inv
=
nx_pppm
/
prd
[
0
];
const
double
hy_inv
=
ny_pppm
/
prd
[
1
];
const
double
hz_inv
=
nz_pppm
/
prd
[
2
];
// loop over my charges, interpolate electric field from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
// ek = 3 components of E-field on particle
const
dbl3_t
*
_noalias
const
x
=
(
dbl3_t
*
)
atom
->
x
[
0
];
const
double
*
_noalias
const
q
=
atom
->
q
;
const
int3_t
*
_noalias
const
p2g
=
(
int3_t
*
)
part2grid
[
0
];
const
int
*
_noalias
const
type
=
atom
->
type
;
const
double
qqrd2e
=
force
->
qqrd2e
;
const
double
boxlox
=
boxlo
[
0
];
const
double
boxloy
=
boxlo
[
1
];
const
double
boxloz
=
boxlo
[
2
];
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
double
s1
,
s2
,
s3
,
sf
;
dbl3_t
xM
;
FFT_SCALAR
ekx
,
eky
,
ekz
;
int
i
,
ifrom
,
ito
,
tid
,
iH1
,
iH2
,
l
,
m
,
n
,
mx
,
my
,
mz
;
loop_setup_thr
(
ifrom
,
ito
,
tid
,
nlocal
,
comm
->
nthreads
);
// get per thread data
ThrData
*
thr
=
fix
->
get_thr
(
tid
);
dbl3_t
*
_noalias
const
f
=
(
dbl3_t
*
)
thr
->
get_f
()[
0
];
FFT_SCALAR
*
const
*
const
r1d
=
static_cast
<
FFT_SCALAR
**>
(
thr
->
get_rho1d
());
FFT_SCALAR
*
const
*
const
d1d
=
static_cast
<
FFT_SCALAR
**>
(
thr
->
get_drho1d
());
for
(
i
=
ifrom
;
i
<
ito
;
++
i
)
{
if
(
type
[
i
]
==
typeO
)
{
find_M_thr
(
i
,
iH1
,
iH2
,
xM
);
}
else
xM
=
x
[
i
];
const
int
nx
=
p2g
[
i
].
a
;
const
int
ny
=
p2g
[
i
].
b
;
const
int
nz
=
p2g
[
i
].
t
;
const
FFT_SCALAR
dx
=
nx
+
shiftone
-
(
xM
.
x
-
boxlox
)
*
delxinv
;
const
FFT_SCALAR
dy
=
ny
+
shiftone
-
(
xM
.
y
-
boxloy
)
*
delyinv
;
const
FFT_SCALAR
dz
=
nz
+
shiftone
-
(
xM
.
z
-
boxloz
)
*
delzinv
;
compute_rho1d_thr
(
r1d
,
dx
,
dy
,
dz
,
order
,
rho_coeff
);
compute_drho1d_thr
(
d1d
,
dx
,
dy
,
dz
,
order
,
drho_coeff
);
ekx
=
eky
=
ekz
=
ZEROF
;
for
(
n
=
nlower
;
n
<=
nupper
;
n
++
)
{
mz
=
n
+
nz
;
for
(
m
=
nlower
;
m
<=
nupper
;
m
++
)
{
my
=
m
+
ny
;
for
(
l
=
nlower
;
l
<=
nupper
;
l
++
)
{
mx
=
l
+
nx
;
ekx
+=
d1d
[
0
][
l
]
*
r1d
[
1
][
m
]
*
r1d
[
2
][
n
]
*
u_brick
[
mz
][
my
][
mx
];
eky
+=
r1d
[
0
][
l
]
*
d1d
[
1
][
m
]
*
r1d
[
2
][
n
]
*
u_brick
[
mz
][
my
][
mx
];
ekz
+=
r1d
[
0
][
l
]
*
r1d
[
1
][
m
]
*
d1d
[
2
][
n
]
*
u_brick
[
mz
][
my
][
mx
];
}
}
}
ekx
*=
hx_inv
;
eky
*=
hy_inv
;
ekz
*=
hz_inv
;
// convert E-field to force and substract self forces
const
double
qi
=
q
[
i
];
const
double
qfactor
=
qqrd2e
*
scale
*
qi
;
s1
=
x
[
i
].
x
*
hx_inv
;
sf
=
sf_coeff
[
0
]
*
sin
(
MY_2PI
*
s1
);
sf
+=
sf_coeff
[
1
]
*
sin
(
MY_4PI
*
s1
);
sf
*=
2.0
*
qi
;
const
double
fx
=
qfactor
*
(
ekx
-
sf
);
s2
=
x
[
i
].
y
*
hy_inv
;
sf
=
sf_coeff
[
2
]
*
sin
(
MY_2PI
*
s2
);
sf
+=
sf_coeff
[
3
]
*
sin
(
MY_4PI
*
s2
);
sf
*=
2.0
*
qi
;
const
double
fy
=
qfactor
*
(
eky
-
sf
);
s3
=
x
[
i
].
z
*
hz_inv
;
sf
=
sf_coeff
[
4
]
*
sin
(
MY_2PI
*
s3
);
sf
+=
sf_coeff
[
5
]
*
sin
(
MY_4PI
*
s3
);
sf
*=
2.0
*
qi
;
const
double
fz
=
qfactor
*
(
ekz
-
sf
);
if
(
type
[
i
]
!=
typeO
)
{
f
[
i
].
x
+=
fx
;
f
[
i
].
y
+=
fy
;
if
(
slabflag
!=
2
)
f
[
i
].
z
+=
fz
;
}
else
{
f
[
i
].
x
+=
fx
*
(
1
-
alpha
);
f
[
i
].
y
+=
fy
*
(
1
-
alpha
);
if
(
slabflag
!=
2
)
f
[
i
].
z
+=
fz
*
(
1
-
alpha
);
f
[
iH1
].
x
+=
0.5
*
alpha
*
fx
;
f
[
iH1
].
y
+=
0.5
*
alpha
*
fy
;
if
(
slabflag
!=
2
)
f
[
iH1
].
z
+=
0.5
*
alpha
*
fz
;
f
[
iH2
].
x
+=
0.5
*
alpha
*
fx
;
f
[
iH2
].
y
+=
0.5
*
alpha
*
fy
;
if
(
slabflag
!=
2
)
f
[
iH2
].
z
+=
0.5
*
alpha
*
fz
;
}
}
}
// end of parallel region
}
/* ----------------------------------------------------------------------
interpolate from grid to get dispersion field & force on my particles
for ik scheme and geometric mixing rule
------------------------------------------------------------------------- */
void
PPPMDispTIP4POMP
::
fieldforce_g_ik
()
{
const
int
nlocal
=
atom
->
nlocal
;
// no local atoms => nothing to do
if
(
nlocal
==
0
)
return
;
// loop over my charges, interpolate electric field from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
// ek = 3 components of E-field on particle
const
double
*
const
*
const
x
=
atom
->
x
;
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
#if defined(_OPENMP)
// each thread works on a fixed chunk of atoms.
const
int
tid
=
omp_get_thread_num
();
const
int
inum
=
nlocal
;
const
int
idelta
=
1
+
inum
/
comm
->
nthreads
;
const
int
ifrom
=
tid
*
idelta
;
const
int
ito
=
((
ifrom
+
idelta
)
>
inum
)
?
inum
:
ifrom
+
idelta
;
#else
const
int
ifrom
=
0
;
const
int
ito
=
nlocal
;
const
int
tid
=
0
;
#endif
ThrData
*
thr
=
fix
->
get_thr
(
tid
);
double
*
const
*
const
f
=
thr
->
get_f
();
FFT_SCALAR
*
const
*
const
r1d
=
static_cast
<
FFT_SCALAR
**>
(
thr
->
get_rho1d_6
());
int
l
,
m
,
n
,
nx
,
ny
,
nz
,
mx
,
my
,
mz
;
FFT_SCALAR
dx
,
dy
,
dz
,
x0
,
y0
,
z0
;
FFT_SCALAR
ekx
,
eky
,
ekz
;
int
type
;
double
lj
;
// this if protects against having more threads than local atoms
if
(
ifrom
<
nlocal
)
{
for
(
int
i
=
ifrom
;
i
<
ito
;
i
++
)
{
nx
=
part2grid_6
[
i
][
0
];
ny
=
part2grid_6
[
i
][
1
];
nz
=
part2grid_6
[
i
][
2
];
dx
=
nx
+
shiftone_6
-
(
x
[
i
][
0
]
-
boxlo
[
0
])
*
delxinv_6
;
dy
=
ny
+
shiftone_6
-
(
x
[
i
][
1
]
-
boxlo
[
1
])
*
delyinv_6
;
dz
=
nz
+
shiftone_6
-
(
x
[
i
][
2
]
-
boxlo
[
2
])
*
delzinv_6
;
compute_rho1d_thr
(
r1d
,
dx
,
dy
,
dz
,
order_6
,
rho_coeff_6
);
ekx
=
eky
=
ekz
=
ZEROF
;
for
(
n
=
nlower_6
;
n
<=
nupper_6
;
n
++
)
{
mz
=
n
+
nz
;
z0
=
r1d
[
2
][
n
];
for
(
m
=
nlower_6
;
m
<=
nupper_6
;
m
++
)
{
my
=
m
+
ny
;
y0
=
z0
*
r1d
[
1
][
m
];
for
(
l
=
nlower_6
;
l
<=
nupper_6
;
l
++
)
{
mx
=
l
+
nx
;
x0
=
y0
*
r1d
[
0
][
l
];
ekx
-=
x0
*
vdx_brick_g
[
mz
][
my
][
mx
];
eky
-=
x0
*
vdy_brick_g
[
mz
][
my
][
mx
];
ekz
-=
x0
*
vdz_brick_g
[
mz
][
my
][
mx
];
}
}
}
// convert E-field to force
type
=
atom
->
type
[
i
];
lj
=
B
[
type
];
f
[
i
][
0
]
+=
lj
*
ekx
;
f
[
i
][
1
]
+=
lj
*
eky
;
f
[
i
][
2
]
+=
lj
*
ekz
;
}
}
}
}
/* ----------------------------------------------------------------------
interpolate from grid to get dispersion field & force on my particles
for ad scheme and geometric mixing rule
------------------------------------------------------------------------- */
void
PPPMDispTIP4POMP
::
fieldforce_g_ad
()
{
const
int
nlocal
=
atom
->
nlocal
;
// no local atoms => nothing to do
if
(
nlocal
==
0
)
return
;
// loop over my charges, interpolate electric field from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
// ek = 3 components of E-field on particle
const
double
*
const
*
const
x
=
atom
->
x
;
double
*
prd
;
if
(
triclinic
==
0
)
prd
=
domain
->
prd
;
else
prd
=
domain
->
prd_lamda
;
double
xprd
=
prd
[
0
];
double
yprd
=
prd
[
1
];
double
zprd
=
prd
[
2
];
double
zprd_slab
=
zprd
*
slab_volfactor
;
const
double
hx_inv
=
nx_pppm_6
/
xprd
;
const
double
hy_inv
=
ny_pppm_6
/
yprd
;
const
double
hz_inv
=
nz_pppm_6
/
zprd_slab
;
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
#if defined(_OPENMP)
// each thread works on a fixed chunk of atoms.
const
int
tid
=
omp_get_thread_num
();
const
int
inum
=
nlocal
;
const
int
idelta
=
1
+
inum
/
comm
->
nthreads
;
const
int
ifrom
=
tid
*
idelta
;
const
int
ito
=
((
ifrom
+
idelta
)
>
inum
)
?
inum
:
ifrom
+
idelta
;
#else
const
int
ifrom
=
0
;
const
int
ito
=
nlocal
;
const
int
tid
=
0
;
#endif
ThrData
*
thr
=
fix
->
get_thr
(
tid
);
double
*
const
*
const
f
=
thr
->
get_f
();
FFT_SCALAR
*
const
*
const
r1d
=
static_cast
<
FFT_SCALAR
**>
(
thr
->
get_rho1d_6
());
FFT_SCALAR
*
const
*
const
dr1d
=
static_cast
<
FFT_SCALAR
**>
(
thr
->
get_drho1d_6
());
int
l
,
m
,
n
,
nx
,
ny
,
nz
,
mx
,
my
,
mz
;
FFT_SCALAR
dx
,
dy
,
dz
;
FFT_SCALAR
ekx
,
eky
,
ekz
;
int
type
;
double
lj
;
double
sf
=
0.0
;
double
s1
,
s2
,
s3
;
// this if protects against having more threads than local atoms
if
(
ifrom
<
nlocal
)
{
for
(
int
i
=
ifrom
;
i
<
ito
;
i
++
)
{
nx
=
part2grid_6
[
i
][
0
];
ny
=
part2grid_6
[
i
][
1
];
nz
=
part2grid_6
[
i
][
2
];
dx
=
nx
+
shiftone_6
-
(
x
[
i
][
0
]
-
boxlo
[
0
])
*
delxinv_6
;
dy
=
ny
+
shiftone_6
-
(
x
[
i
][
1
]
-
boxlo
[
1
])
*
delyinv_6
;
dz
=
nz
+
shiftone_6
-
(
x
[
i
][
2
]
-
boxlo
[
2
])
*
delzinv_6
;
compute_rho1d_thr
(
r1d
,
dx
,
dy
,
dz
,
order_6
,
rho_coeff_6
);
compute_drho1d_thr
(
dr1d
,
dx
,
dy
,
dz
,
order_6
,
drho_coeff_6
);
ekx
=
eky
=
ekz
=
ZEROF
;
for
(
n
=
nlower_6
;
n
<=
nupper_6
;
n
++
)
{
mz
=
n
+
nz
;
for
(
m
=
nlower_6
;
m
<=
nupper_6
;
m
++
)
{
my
=
m
+
ny
;
for
(
l
=
nlower_6
;
l
<=
nupper_6
;
l
++
)
{
mx
=
l
+
nx
;
ekx
+=
dr1d
[
0
][
l
]
*
r1d
[
1
][
m
]
*
r1d
[
2
][
n
]
*
u_brick_g
[
mz
][
my
][
mx
];
eky
+=
r1d
[
0
][
l
]
*
dr1d
[
1
][
m
]
*
r1d
[
2
][
n
]
*
u_brick_g
[
mz
][
my
][
mx
];
ekz
+=
r1d
[
0
][
l
]
*
r1d
[
1
][
m
]
*
dr1d
[
2
][
n
]
*
u_brick_g
[
mz
][
my
][
mx
];
}
}
}
ekx
*=
hx_inv
;
eky
*=
hy_inv
;
ekz
*=
hz_inv
;
// convert E-field to force
type
=
atom
->
type
[
i
];
lj
=
B
[
type
];
s1
=
x
[
i
][
0
]
*
hx_inv
;
s2
=
x
[
i
][
1
]
*
hy_inv
;
s3
=
x
[
i
][
2
]
*
hz_inv
;
sf
=
sf_coeff_6
[
0
]
*
sin
(
2
*
MY_PI
*
s1
);
sf
+=
sf_coeff_6
[
1
]
*
sin
(
4
*
MY_PI
*
s1
);
sf
*=
2
*
lj
*
lj
;
f
[
i
][
0
]
+=
ekx
*
lj
-
sf
;
sf
=
sf_coeff_6
[
2
]
*
sin
(
2
*
MY_PI
*
s2
);
sf
+=
sf_coeff_6
[
3
]
*
sin
(
4
*
MY_PI
*
s2
);
sf
*=
2
*
lj
*
lj
;
f
[
i
][
1
]
+=
eky
*
lj
-
sf
;
sf
=
sf_coeff_6
[
4
]
*
sin
(
2
*
MY_PI
*
s3
);
sf
+=
sf_coeff_6
[
5
]
*
sin
(
4
*
MY_PI
*
s3
);
sf
*=
2
*
lj
*
lj
;
if
(
slabflag
!=
2
)
f
[
i
][
2
]
+=
ekz
*
lj
-
sf
;
}
}
}
}
/* ----------------------------------------------------------------------
interpolate from grid to get per-atom energy/virial for dispersion
interaction and geometric mixing rule
------------------------------------------------------------------------- */
void
PPPMDispTIP4POMP
::
fieldforce_g_peratom
()
{
const
int
nlocal
=
atom
->
nlocal
;
// no local atoms => nothing to do
if
(
nlocal
==
0
)
return
;
// loop over my charges, interpolate from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
const
double
*
const
*
const
x
=
atom
->
x
;
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
#if defined(_OPENMP)
// each thread works on a fixed chunk of atoms.
const
int
tid
=
omp_get_thread_num
();
const
int
inum
=
nlocal
;
const
int
idelta
=
1
+
inum
/
comm
->
nthreads
;
const
int
ifrom
=
tid
*
idelta
;
const
int
ito
=
((
ifrom
+
idelta
)
>
inum
)
?
inum
:
ifrom
+
idelta
;
#else
const
int
ifrom
=
0
;
const
int
ito
=
nlocal
;
const
int
tid
=
0
;
#endif
ThrData
*
thr
=
fix
->
get_thr
(
tid
);
FFT_SCALAR
*
const
*
const
r1d
=
static_cast
<
FFT_SCALAR
**>
(
thr
->
get_rho1d_6
());
int
l
,
m
,
n
,
nx
,
ny
,
nz
,
mx
,
my
,
mz
;
FFT_SCALAR
dx
,
dy
,
dz
,
x0
,
y0
,
z0
;
FFT_SCALAR
u
,
v0
,
v1
,
v2
,
v3
,
v4
,
v5
;
int
type
;
double
lj
;
// this if protects against having more threads than local atoms
if
(
ifrom
<
nlocal
)
{
for
(
int
i
=
ifrom
;
i
<
ito
;
i
++
)
{
nx
=
part2grid_6
[
i
][
0
];
ny
=
part2grid_6
[
i
][
1
];
nz
=
part2grid_6
[
i
][
2
];
dx
=
nx
+
shiftone_6
-
(
x
[
i
][
0
]
-
boxlo
[
0
])
*
delxinv_6
;
dy
=
ny
+
shiftone_6
-
(
x
[
i
][
1
]
-
boxlo
[
1
])
*
delyinv_6
;
dz
=
nz
+
shiftone_6
-
(
x
[
i
][
2
]
-
boxlo
[
2
])
*
delzinv_6
;
compute_rho1d_thr
(
r1d
,
dx
,
dy
,
dz
,
order_6
,
rho_coeff_6
);
u
=
v0
=
v1
=
v2
=
v3
=
v4
=
v5
=
ZEROF
;
for
(
n
=
nlower_6
;
n
<=
nupper_6
;
n
++
)
{
mz
=
n
+
nz
;
z0
=
r1d
[
2
][
n
];
for
(
m
=
nlower_6
;
m
<=
nupper_6
;
m
++
)
{
my
=
m
+
ny
;
y0
=
z0
*
r1d
[
1
][
m
];
for
(
l
=
nlower_6
;
l
<=
nupper_6
;
l
++
)
{
mx
=
l
+
nx
;
x0
=
y0
*
r1d
[
0
][
l
];
if
(
eflag_atom
)
u
+=
x0
*
u_brick_g
[
mz
][
my
][
mx
];
if
(
vflag_atom
)
{
v0
+=
x0
*
v0_brick_g
[
mz
][
my
][
mx
];
v1
+=
x0
*
v1_brick_g
[
mz
][
my
][
mx
];
v2
+=
x0
*
v2_brick_g
[
mz
][
my
][
mx
];
v3
+=
x0
*
v3_brick_g
[
mz
][
my
][
mx
];
v4
+=
x0
*
v4_brick_g
[
mz
][
my
][
mx
];
v5
+=
x0
*
v5_brick_g
[
mz
][
my
][
mx
];
}
}
}
}
type
=
atom
->
type
[
i
];
lj
=
B
[
type
]
*
0.5
;
if
(
eflag_atom
)
eatom
[
i
]
+=
u
*
lj
;
if
(
vflag_atom
)
{
vatom
[
i
][
0
]
+=
v0
*
lj
;
vatom
[
i
][
1
]
+=
v1
*
lj
;
vatom
[
i
][
2
]
+=
v2
*
lj
;
vatom
[
i
][
3
]
+=
v3
*
lj
;
vatom
[
i
][
4
]
+=
v4
*
lj
;
vatom
[
i
][
5
]
+=
v5
*
lj
;
}
}
}
}
}
/* ----------------------------------------------------------------------
interpolate from grid to get dispersion field & force on my particles
for ik scheme and arithmetic mixing rule
------------------------------------------------------------------------- */
void
PPPMDispTIP4POMP
::
fieldforce_a_ik
()
{
const
int
nlocal
=
atom
->
nlocal
;
// no local atoms => nothing to do
if
(
nlocal
==
0
)
return
;
// loop over my charges, interpolate electric field from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
// ek = 3 components of E-field on particle
const
double
*
const
*
const
x
=
atom
->
x
;
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
#if defined(_OPENMP)
// each thread works on a fixed chunk of atoms.
const
int
tid
=
omp_get_thread_num
();
const
int
inum
=
nlocal
;
const
int
idelta
=
1
+
inum
/
comm
->
nthreads
;
const
int
ifrom
=
tid
*
idelta
;
const
int
ito
=
((
ifrom
+
idelta
)
>
inum
)
?
inum
:
ifrom
+
idelta
;
#else
const
int
ifrom
=
0
;
const
int
ito
=
nlocal
;
const
int
tid
=
0
;
#endif
ThrData
*
thr
=
fix
->
get_thr
(
tid
);
double
*
const
*
const
f
=
thr
->
get_f
();
FFT_SCALAR
*
const
*
const
r1d
=
static_cast
<
FFT_SCALAR
**>
(
thr
->
get_rho1d_6
());
int
l
,
m
,
n
,
nx
,
ny
,
nz
,
mx
,
my
,
mz
;
FFT_SCALAR
dx
,
dy
,
dz
,
x0
,
y0
,
z0
;
FFT_SCALAR
ekx0
,
eky0
,
ekz0
,
ekx1
,
eky1
,
ekz1
,
ekx2
,
eky2
,
ekz2
;
FFT_SCALAR
ekx3
,
eky3
,
ekz3
,
ekx4
,
eky4
,
ekz4
,
ekx5
,
eky5
,
ekz5
;
FFT_SCALAR
ekx6
,
eky6
,
ekz6
;
int
type
;
double
lj0
,
lj1
,
lj2
,
lj3
,
lj4
,
lj5
,
lj6
;
// this if protects against having more threads than local atoms
if
(
ifrom
<
nlocal
)
{
for
(
int
i
=
ifrom
;
i
<
ito
;
i
++
)
{
nx
=
part2grid_6
[
i
][
0
];
ny
=
part2grid_6
[
i
][
1
];
nz
=
part2grid_6
[
i
][
2
];
dx
=
nx
+
shiftone_6
-
(
x
[
i
][
0
]
-
boxlo
[
0
])
*
delxinv_6
;
dy
=
ny
+
shiftone_6
-
(
x
[
i
][
1
]
-
boxlo
[
1
])
*
delyinv_6
;
dz
=
nz
+
shiftone_6
-
(
x
[
i
][
2
]
-
boxlo
[
2
])
*
delzinv_6
;
compute_rho1d_thr
(
r1d
,
dx
,
dy
,
dz
,
order_6
,
rho_coeff_6
);
ekx0
=
eky0
=
ekz0
=
ZEROF
;
ekx1
=
eky1
=
ekz1
=
ZEROF
;
ekx2
=
eky2
=
ekz2
=
ZEROF
;
ekx3
=
eky3
=
ekz3
=
ZEROF
;
ekx4
=
eky4
=
ekz4
=
ZEROF
;
ekx5
=
eky5
=
ekz5
=
ZEROF
;
ekx6
=
eky6
=
ekz6
=
ZEROF
;
for
(
n
=
nlower_6
;
n
<=
nupper_6
;
n
++
)
{
mz
=
n
+
nz
;
z0
=
r1d
[
2
][
n
];
for
(
m
=
nlower_6
;
m
<=
nupper_6
;
m
++
)
{
my
=
m
+
ny
;
y0
=
z0
*
r1d
[
1
][
m
];
for
(
l
=
nlower_6
;
l
<=
nupper_6
;
l
++
)
{
mx
=
l
+
nx
;
x0
=
y0
*
r1d
[
0
][
l
];
ekx0
-=
x0
*
vdx_brick_a0
[
mz
][
my
][
mx
];
eky0
-=
x0
*
vdy_brick_a0
[
mz
][
my
][
mx
];
ekz0
-=
x0
*
vdz_brick_a0
[
mz
][
my
][
mx
];
ekx1
-=
x0
*
vdx_brick_a1
[
mz
][
my
][
mx
];
eky1
-=
x0
*
vdy_brick_a1
[
mz
][
my
][
mx
];
ekz1
-=
x0
*
vdz_brick_a1
[
mz
][
my
][
mx
];
ekx2
-=
x0
*
vdx_brick_a2
[
mz
][
my
][
mx
];
eky2
-=
x0
*
vdy_brick_a2
[
mz
][
my
][
mx
];
ekz2
-=
x0
*
vdz_brick_a2
[
mz
][
my
][
mx
];
ekx3
-=
x0
*
vdx_brick_a3
[
mz
][
my
][
mx
];
eky3
-=
x0
*
vdy_brick_a3
[
mz
][
my
][
mx
];
ekz3
-=
x0
*
vdz_brick_a3
[
mz
][
my
][
mx
];
ekx4
-=
x0
*
vdx_brick_a4
[
mz
][
my
][
mx
];
eky4
-=
x0
*
vdy_brick_a4
[
mz
][
my
][
mx
];
ekz4
-=
x0
*
vdz_brick_a4
[
mz
][
my
][
mx
];
ekx5
-=
x0
*
vdx_brick_a5
[
mz
][
my
][
mx
];
eky5
-=
x0
*
vdy_brick_a5
[
mz
][
my
][
mx
];
ekz5
-=
x0
*
vdz_brick_a5
[
mz
][
my
][
mx
];
ekx6
-=
x0
*
vdx_brick_a6
[
mz
][
my
][
mx
];
eky6
-=
x0
*
vdy_brick_a6
[
mz
][
my
][
mx
];
ekz6
-=
x0
*
vdz_brick_a6
[
mz
][
my
][
mx
];
}
}
}
// convert D-field to force
type
=
atom
->
type
[
i
];
lj0
=
B
[
7
*
type
+
6
];
lj1
=
B
[
7
*
type
+
5
];
lj2
=
B
[
7
*
type
+
4
];
lj3
=
B
[
7
*
type
+
3
];
lj4
=
B
[
7
*
type
+
2
];
lj5
=
B
[
7
*
type
+
1
];
lj6
=
B
[
7
*
type
];
f
[
i
][
0
]
+=
lj0
*
ekx0
+
lj1
*
ekx1
+
lj2
*
ekx2
+
lj3
*
ekx3
+
lj4
*
ekx4
+
lj5
*
ekx5
+
lj6
*
ekx6
;
f
[
i
][
1
]
+=
lj0
*
eky0
+
lj1
*
eky1
+
lj2
*
eky2
+
lj3
*
eky3
+
lj4
*
eky4
+
lj5
*
eky5
+
lj6
*
eky6
;
f
[
i
][
2
]
+=
lj0
*
ekz0
+
lj1
*
ekz1
+
lj2
*
ekz2
+
lj3
*
ekz3
+
lj4
*
ekz4
+
lj5
*
ekz5
+
lj6
*
ekz6
;
}
}
}
}
/* ----------------------------------------------------------------------
interpolate from grid to get dispersion field & force on my particles
for ad scheme and arithmetic mixing rule
------------------------------------------------------------------------- */
void
PPPMDispTIP4POMP
::
fieldforce_a_ad
()
{
const
int
nlocal
=
atom
->
nlocal
;
// no local atoms => nothing to do
if
(
nlocal
==
0
)
return
;
// loop over my charges, interpolate electric field from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
// ek = 3 components of E-field on particle
const
double
*
const
*
const
x
=
atom
->
x
;
double
*
prd
;
if
(
triclinic
==
0
)
prd
=
domain
->
prd
;
else
prd
=
domain
->
prd_lamda
;
double
xprd
=
prd
[
0
];
double
yprd
=
prd
[
1
];
double
zprd
=
prd
[
2
];
double
zprd_slab
=
zprd
*
slab_volfactor
;
const
double
hx_inv
=
nx_pppm_6
/
xprd
;
const
double
hy_inv
=
ny_pppm_6
/
yprd
;
const
double
hz_inv
=
nz_pppm_6
/
zprd_slab
;
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
#if defined(_OPENMP)
// each thread works on a fixed chunk of atoms.
const
int
tid
=
omp_get_thread_num
();
const
int
inum
=
nlocal
;
const
int
idelta
=
1
+
inum
/
comm
->
nthreads
;
const
int
ifrom
=
tid
*
idelta
;
const
int
ito
=
((
ifrom
+
idelta
)
>
inum
)
?
inum
:
ifrom
+
idelta
;
#else
const
int
ifrom
=
0
;
const
int
ito
=
nlocal
;
const
int
tid
=
0
;
#endif
ThrData
*
thr
=
fix
->
get_thr
(
tid
);
double
*
const
*
const
f
=
thr
->
get_f
();
FFT_SCALAR
*
const
*
const
r1d
=
static_cast
<
FFT_SCALAR
**>
(
thr
->
get_rho1d_6
());
FFT_SCALAR
*
const
*
const
dr1d
=
static_cast
<
FFT_SCALAR
**>
(
thr
->
get_drho1d_6
());
int
l
,
m
,
n
,
nx
,
ny
,
nz
,
mx
,
my
,
mz
;
FFT_SCALAR
dx
,
dy
,
dz
,
x0
,
y0
,
z0
;
FFT_SCALAR
ekx0
,
eky0
,
ekz0
,
ekx1
,
eky1
,
ekz1
,
ekx2
,
eky2
,
ekz2
;
FFT_SCALAR
ekx3
,
eky3
,
ekz3
,
ekx4
,
eky4
,
ekz4
,
ekx5
,
eky5
,
ekz5
;
FFT_SCALAR
ekx6
,
eky6
,
ekz6
;
int
type
;
double
lj0
,
lj1
,
lj2
,
lj3
,
lj4
,
lj5
,
lj6
;
double
sf
=
0.0
;
double
s1
,
s2
,
s3
;
// this if protects against having more threads than local atoms
if
(
ifrom
<
nlocal
)
{
for
(
int
i
=
ifrom
;
i
<
ito
;
i
++
)
{
nx
=
part2grid_6
[
i
][
0
];
ny
=
part2grid_6
[
i
][
1
];
nz
=
part2grid_6
[
i
][
2
];
dx
=
nx
+
shiftone_6
-
(
x
[
i
][
0
]
-
boxlo
[
0
])
*
delxinv_6
;
dy
=
ny
+
shiftone_6
-
(
x
[
i
][
1
]
-
boxlo
[
1
])
*
delyinv_6
;
dz
=
nz
+
shiftone_6
-
(
x
[
i
][
2
]
-
boxlo
[
2
])
*
delzinv_6
;
compute_rho1d_thr
(
r1d
,
dx
,
dy
,
dz
,
order_6
,
rho_coeff_6
);
compute_drho1d_thr
(
dr1d
,
dx
,
dy
,
dz
,
order_6
,
drho_coeff_6
);
ekx0
=
eky0
=
ekz0
=
ZEROF
;
ekx1
=
eky1
=
ekz1
=
ZEROF
;
ekx2
=
eky2
=
ekz2
=
ZEROF
;
ekx3
=
eky3
=
ekz3
=
ZEROF
;
ekx4
=
eky4
=
ekz4
=
ZEROF
;
ekx5
=
eky5
=
ekz5
=
ZEROF
;
ekx6
=
eky6
=
ekz6
=
ZEROF
;
for
(
n
=
nlower_6
;
n
<=
nupper_6
;
n
++
)
{
mz
=
n
+
nz
;
for
(
m
=
nlower_6
;
m
<=
nupper_6
;
m
++
)
{
my
=
m
+
ny
;
for
(
l
=
nlower_6
;
l
<=
nupper_6
;
l
++
)
{
mx
=
l
+
nx
;
x0
=
dr1d
[
0
][
l
]
*
r1d
[
1
][
m
]
*
r1d
[
2
][
n
];
y0
=
r1d
[
0
][
l
]
*
dr1d
[
1
][
m
]
*
r1d
[
2
][
n
];
z0
=
r1d
[
0
][
l
]
*
r1d
[
1
][
m
]
*
dr1d
[
2
][
n
];
ekx0
+=
x0
*
u_brick_a0
[
mz
][
my
][
mx
];
eky0
+=
y0
*
u_brick_a0
[
mz
][
my
][
mx
];
ekz0
+=
z0
*
u_brick_a0
[
mz
][
my
][
mx
];
ekx1
+=
x0
*
u_brick_a1
[
mz
][
my
][
mx
];
eky1
+=
y0
*
u_brick_a1
[
mz
][
my
][
mx
];
ekz1
+=
z0
*
u_brick_a1
[
mz
][
my
][
mx
];
ekx2
+=
x0
*
u_brick_a2
[
mz
][
my
][
mx
];
eky2
+=
y0
*
u_brick_a2
[
mz
][
my
][
mx
];
ekz2
+=
z0
*
u_brick_a2
[
mz
][
my
][
mx
];
ekx3
+=
x0
*
u_brick_a3
[
mz
][
my
][
mx
];
eky3
+=
y0
*
u_brick_a3
[
mz
][
my
][
mx
];
ekz3
+=
z0
*
u_brick_a3
[
mz
][
my
][
mx
];
ekx4
+=
x0
*
u_brick_a4
[
mz
][
my
][
mx
];
eky4
+=
y0
*
u_brick_a4
[
mz
][
my
][
mx
];
ekz4
+=
z0
*
u_brick_a4
[
mz
][
my
][
mx
];
ekx5
+=
x0
*
u_brick_a5
[
mz
][
my
][
mx
];
eky5
+=
y0
*
u_brick_a5
[
mz
][
my
][
mx
];
ekz5
+=
z0
*
u_brick_a5
[
mz
][
my
][
mx
];
ekx6
+=
x0
*
u_brick_a6
[
mz
][
my
][
mx
];
eky6
+=
y0
*
u_brick_a6
[
mz
][
my
][
mx
];
ekz6
+=
z0
*
u_brick_a6
[
mz
][
my
][
mx
];
}
}
}
ekx0
*=
hx_inv
;
eky0
*=
hy_inv
;
ekz0
*=
hz_inv
;
ekx1
*=
hx_inv
;
eky1
*=
hy_inv
;
ekz1
*=
hz_inv
;
ekx2
*=
hx_inv
;
eky2
*=
hy_inv
;
ekz2
*=
hz_inv
;
ekx3
*=
hx_inv
;
eky3
*=
hy_inv
;
ekz3
*=
hz_inv
;
ekx4
*=
hx_inv
;
eky4
*=
hy_inv
;
ekz4
*=
hz_inv
;
ekx5
*=
hx_inv
;
eky5
*=
hy_inv
;
ekz5
*=
hz_inv
;
ekx6
*=
hx_inv
;
eky6
*=
hy_inv
;
ekz6
*=
hz_inv
;
// convert D-field to force
type
=
atom
->
type
[
i
];
lj0
=
B
[
7
*
type
+
6
];
lj1
=
B
[
7
*
type
+
5
];
lj2
=
B
[
7
*
type
+
4
];
lj3
=
B
[
7
*
type
+
3
];
lj4
=
B
[
7
*
type
+
2
];
lj5
=
B
[
7
*
type
+
1
];
lj6
=
B
[
7
*
type
];
s1
=
x
[
i
][
0
]
*
hx_inv
;
s2
=
x
[
i
][
1
]
*
hy_inv
;
s3
=
x
[
i
][
2
]
*
hz_inv
;
sf
=
sf_coeff_6
[
0
]
*
sin
(
2
*
MY_PI
*
s1
);
sf
+=
sf_coeff_6
[
1
]
*
sin
(
4
*
MY_PI
*
s1
);
sf
*=
4
*
lj0
*
lj6
+
4
*
lj1
*
lj5
+
4
*
lj2
*
lj4
+
2
*
lj3
*
lj3
;
f
[
i
][
0
]
+=
lj0
*
ekx0
+
lj1
*
ekx1
+
lj2
*
ekx2
+
lj3
*
ekx3
+
lj4
*
ekx4
+
lj5
*
ekx5
+
lj6
*
ekx6
-
sf
;
sf
=
sf_coeff_6
[
2
]
*
sin
(
2
*
MY_PI
*
s2
);
sf
+=
sf_coeff_6
[
3
]
*
sin
(
4
*
MY_PI
*
s2
);
sf
*=
4
*
lj0
*
lj6
+
4
*
lj1
*
lj5
+
4
*
lj2
*
lj4
+
2
*
lj3
*
lj3
;
f
[
i
][
1
]
+=
lj0
*
eky0
+
lj1
*
eky1
+
lj2
*
eky2
+
lj3
*
eky3
+
lj4
*
eky4
+
lj5
*
eky5
+
lj6
*
eky6
-
sf
;
sf
=
sf_coeff_6
[
4
]
*
sin
(
2
*
MY_PI
*
s3
);
sf
+=
sf_coeff_6
[
5
]
*
sin
(
4
*
MY_PI
*
s3
);
sf
*=
4
*
lj0
*
lj6
+
4
*
lj1
*
lj5
+
4
*
lj2
*
lj4
+
2
*
lj3
*
lj3
;
if
(
slabflag
!=
2
)
f
[
i
][
2
]
+=
lj0
*
ekz0
+
lj1
*
ekz1
+
lj2
*
ekz2
+
lj3
*
ekz3
+
lj4
*
ekz4
+
lj5
*
ekz5
+
lj6
*
ekz6
-
sf
;
}
}
}
}
/* ----------------------------------------------------------------------
interpolate from grid to get per-atom energy/virial for dispersion
interaction and arithmetic mixing rule
------------------------------------------------------------------------- */
void
PPPMDispTIP4POMP
::
fieldforce_a_peratom
()
{
const
int
nlocal
=
atom
->
nlocal
;
// no local atoms => nothing to do
if
(
nlocal
==
0
)
return
;
// loop over my charges, interpolate from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
const
double
*
const
*
const
x
=
atom
->
x
;
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
#if defined(_OPENMP)
// each thread works on a fixed chunk of atoms.
const
int
tid
=
omp_get_thread_num
();
const
int
inum
=
nlocal
;
const
int
idelta
=
1
+
inum
/
comm
->
nthreads
;
const
int
ifrom
=
tid
*
idelta
;
const
int
ito
=
((
ifrom
+
idelta
)
>
inum
)
?
inum
:
ifrom
+
idelta
;
#else
const
int
ifrom
=
0
;
const
int
ito
=
nlocal
;
const
int
tid
=
0
;
#endif
ThrData
*
thr
=
fix
->
get_thr
(
tid
);
FFT_SCALAR
*
const
*
const
r1d
=
static_cast
<
FFT_SCALAR
**>
(
thr
->
get_rho1d_6
());
int
i
,
l
,
m
,
n
,
nx
,
ny
,
nz
,
mx
,
my
,
mz
;
FFT_SCALAR
dx
,
dy
,
dz
,
x0
,
y0
,
z0
;
FFT_SCALAR
u0
,
v00
,
v10
,
v20
,
v30
,
v40
,
v50
;
FFT_SCALAR
u1
,
v01
,
v11
,
v21
,
v31
,
v41
,
v51
;
FFT_SCALAR
u2
,
v02
,
v12
,
v22
,
v32
,
v42
,
v52
;
FFT_SCALAR
u3
,
v03
,
v13
,
v23
,
v33
,
v43
,
v53
;
FFT_SCALAR
u4
,
v04
,
v14
,
v24
,
v34
,
v44
,
v54
;
FFT_SCALAR
u5
,
v05
,
v15
,
v25
,
v35
,
v45
,
v55
;
FFT_SCALAR
u6
,
v06
,
v16
,
v26
,
v36
,
v46
,
v56
;
int
type
;
double
lj0
,
lj1
,
lj2
,
lj3
,
lj4
,
lj5
,
lj6
;
// this if protects against having more threads than local atoms
if
(
ifrom
<
nlocal
)
{
for
(
i
=
ifrom
;
i
<
ito
;
i
++
)
{
nx
=
part2grid_6
[
i
][
0
];
ny
=
part2grid_6
[
i
][
1
];
nz
=
part2grid_6
[
i
][
2
];
dx
=
nx
+
shiftone_6
-
(
x
[
i
][
0
]
-
boxlo
[
0
])
*
delxinv_6
;
dy
=
ny
+
shiftone_6
-
(
x
[
i
][
1
]
-
boxlo
[
1
])
*
delyinv_6
;
dz
=
nz
+
shiftone_6
-
(
x
[
i
][
2
]
-
boxlo
[
2
])
*
delzinv_6
;
compute_rho1d_thr
(
r1d
,
dx
,
dy
,
dz
,
order_6
,
rho_coeff_6
);
u0
=
v00
=
v10
=
v20
=
v30
=
v40
=
v50
=
ZEROF
;
u1
=
v01
=
v11
=
v21
=
v31
=
v41
=
v51
=
ZEROF
;
u2
=
v02
=
v12
=
v22
=
v32
=
v42
=
v52
=
ZEROF
;
u3
=
v03
=
v13
=
v23
=
v33
=
v43
=
v53
=
ZEROF
;
u4
=
v04
=
v14
=
v24
=
v34
=
v44
=
v54
=
ZEROF
;
u5
=
v05
=
v15
=
v25
=
v35
=
v45
=
v55
=
ZEROF
;
u6
=
v06
=
v16
=
v26
=
v36
=
v46
=
v56
=
ZEROF
;
for
(
n
=
nlower_6
;
n
<=
nupper_6
;
n
++
)
{
mz
=
n
+
nz
;
z0
=
r1d
[
2
][
n
];
for
(
m
=
nlower_6
;
m
<=
nupper_6
;
m
++
)
{
my
=
m
+
ny
;
y0
=
z0
*
r1d
[
1
][
m
];
for
(
l
=
nlower_6
;
l
<=
nupper_6
;
l
++
)
{
mx
=
l
+
nx
;
x0
=
y0
*
r1d
[
0
][
l
];
if
(
eflag_atom
)
{
u0
+=
x0
*
u_brick_a0
[
mz
][
my
][
mx
];
u1
+=
x0
*
u_brick_a1
[
mz
][
my
][
mx
];
u2
+=
x0
*
u_brick_a2
[
mz
][
my
][
mx
];
u3
+=
x0
*
u_brick_a3
[
mz
][
my
][
mx
];
u4
+=
x0
*
u_brick_a4
[
mz
][
my
][
mx
];
u5
+=
x0
*
u_brick_a5
[
mz
][
my
][
mx
];
u6
+=
x0
*
u_brick_a6
[
mz
][
my
][
mx
];
}
if
(
vflag_atom
)
{
v00
+=
x0
*
v0_brick_a0
[
mz
][
my
][
mx
];
v10
+=
x0
*
v1_brick_a0
[
mz
][
my
][
mx
];
v20
+=
x0
*
v2_brick_a0
[
mz
][
my
][
mx
];
v30
+=
x0
*
v3_brick_a0
[
mz
][
my
][
mx
];
v40
+=
x0
*
v4_brick_a0
[
mz
][
my
][
mx
];
v50
+=
x0
*
v5_brick_a0
[
mz
][
my
][
mx
];
v01
+=
x0
*
v0_brick_a1
[
mz
][
my
][
mx
];
v11
+=
x0
*
v1_brick_a1
[
mz
][
my
][
mx
];
v21
+=
x0
*
v2_brick_a1
[
mz
][
my
][
mx
];
v31
+=
x0
*
v3_brick_a1
[
mz
][
my
][
mx
];
v41
+=
x0
*
v4_brick_a1
[
mz
][
my
][
mx
];
v51
+=
x0
*
v5_brick_a1
[
mz
][
my
][
mx
];
v02
+=
x0
*
v0_brick_a2
[
mz
][
my
][
mx
];
v12
+=
x0
*
v1_brick_a2
[
mz
][
my
][
mx
];
v22
+=
x0
*
v2_brick_a2
[
mz
][
my
][
mx
];
v32
+=
x0
*
v3_brick_a2
[
mz
][
my
][
mx
];
v42
+=
x0
*
v4_brick_a2
[
mz
][
my
][
mx
];
v52
+=
x0
*
v5_brick_a2
[
mz
][
my
][
mx
];
v03
+=
x0
*
v0_brick_a3
[
mz
][
my
][
mx
];
v13
+=
x0
*
v1_brick_a3
[
mz
][
my
][
mx
];
v23
+=
x0
*
v2_brick_a3
[
mz
][
my
][
mx
];
v33
+=
x0
*
v3_brick_a3
[
mz
][
my
][
mx
];
v43
+=
x0
*
v4_brick_a3
[
mz
][
my
][
mx
];
v53
+=
x0
*
v5_brick_a3
[
mz
][
my
][
mx
];
v04
+=
x0
*
v0_brick_a4
[
mz
][
my
][
mx
];
v14
+=
x0
*
v1_brick_a4
[
mz
][
my
][
mx
];
v24
+=
x0
*
v2_brick_a4
[
mz
][
my
][
mx
];
v34
+=
x0
*
v3_brick_a4
[
mz
][
my
][
mx
];
v44
+=
x0
*
v4_brick_a4
[
mz
][
my
][
mx
];
v54
+=
x0
*
v5_brick_a4
[
mz
][
my
][
mx
];
v05
+=
x0
*
v0_brick_a5
[
mz
][
my
][
mx
];
v15
+=
x0
*
v1_brick_a5
[
mz
][
my
][
mx
];
v25
+=
x0
*
v2_brick_a5
[
mz
][
my
][
mx
];
v35
+=
x0
*
v3_brick_a5
[
mz
][
my
][
mx
];
v45
+=
x0
*
v4_brick_a5
[
mz
][
my
][
mx
];
v55
+=
x0
*
v5_brick_a5
[
mz
][
my
][
mx
];
v06
+=
x0
*
v0_brick_a6
[
mz
][
my
][
mx
];
v16
+=
x0
*
v1_brick_a6
[
mz
][
my
][
mx
];
v26
+=
x0
*
v2_brick_a6
[
mz
][
my
][
mx
];
v36
+=
x0
*
v3_brick_a6
[
mz
][
my
][
mx
];
v46
+=
x0
*
v4_brick_a6
[
mz
][
my
][
mx
];
v56
+=
x0
*
v5_brick_a6
[
mz
][
my
][
mx
];
}
}
}
}
// convert D-field to force
type
=
atom
->
type
[
i
];
lj0
=
B
[
7
*
type
+
6
]
*
0.5
;
lj1
=
B
[
7
*
type
+
5
]
*
0.5
;
lj2
=
B
[
7
*
type
+
4
]
*
0.5
;
lj3
=
B
[
7
*
type
+
3
]
*
0.5
;
lj4
=
B
[
7
*
type
+
2
]
*
0.5
;
lj5
=
B
[
7
*
type
+
1
]
*
0.5
;
lj6
=
B
[
7
*
type
]
*
0.5
;
if
(
eflag_atom
)
eatom
[
i
]
+=
u0
*
lj0
+
u1
*
lj1
+
u2
*
lj2
+
u3
*
lj3
+
u4
*
lj4
+
u5
*
lj5
+
u6
*
lj6
;
if
(
vflag_atom
)
{
vatom
[
i
][
0
]
+=
v00
*
lj0
+
v01
*
lj1
+
v02
*
lj2
+
v03
*
lj3
+
v04
*
lj4
+
v05
*
lj5
+
v06
*
lj6
;
vatom
[
i
][
1
]
+=
v10
*
lj0
+
v11
*
lj1
+
v12
*
lj2
+
v13
*
lj3
+
v14
*
lj4
+
v15
*
lj5
+
v16
*
lj6
;
vatom
[
i
][
2
]
+=
v20
*
lj0
+
v21
*
lj1
+
v22
*
lj2
+
v23
*
lj3
+
v24
*
lj4
+
v25
*
lj5
+
v26
*
lj6
;
vatom
[
i
][
3
]
+=
v30
*
lj0
+
v31
*
lj1
+
v32
*
lj2
+
v33
*
lj3
+
v34
*
lj4
+
v35
*
lj5
+
v36
*
lj6
;
vatom
[
i
][
4
]
+=
v40
*
lj0
+
v41
*
lj1
+
v42
*
lj2
+
v43
*
lj3
+
v44
*
lj4
+
v45
*
lj5
+
v46
*
lj6
;
vatom
[
i
][
5
]
+=
v50
*
lj0
+
v51
*
lj1
+
v52
*
lj2
+
v53
*
lj3
+
v54
*
lj4
+
v55
*
lj5
+
v56
*
lj6
;
}
}
}
}
}
/* ----------------------------------------------------------------------
charge assignment into rho1d
dx,dy,dz = distance of particle from "lower left" grid point
------------------------------------------------------------------------- */
void
PPPMDispTIP4POMP
::
compute_rho1d_thr
(
FFT_SCALAR
*
const
*
const
r1d
,
const
FFT_SCALAR
&
dx
,
const
FFT_SCALAR
&
dy
,
const
FFT_SCALAR
&
dz
,
const
int
ord
,
FFT_SCALAR
*
const
*
const
rho_c
)
{
int
k
,
l
;
FFT_SCALAR
r1
,
r2
,
r3
;
for
(
k
=
(
1
-
ord
)
/
2
;
k
<=
ord
/
2
;
k
++
)
{
r1
=
r2
=
r3
=
ZEROF
;
for
(
l
=
ord
-
1
;
l
>=
0
;
l
--
)
{
r1
=
rho_c
[
l
][
k
]
+
r1
*
dx
;
r2
=
rho_c
[
l
][
k
]
+
r2
*
dy
;
r3
=
rho_c
[
l
][
k
]
+
r3
*
dz
;
}
r1d
[
0
][
k
]
=
r1
;
r1d
[
1
][
k
]
=
r2
;
r1d
[
2
][
k
]
=
r3
;
}
}
/* ----------------------------------------------------------------------
charge assignment into drho1d
dx,dy,dz = distance of particle from "lower left" grid point
------------------------------------------------------------------------- */
void
PPPMDispTIP4POMP
::
compute_drho1d_thr
(
FFT_SCALAR
*
const
*
const
dr1d
,
const
FFT_SCALAR
&
dx
,
const
FFT_SCALAR
&
dy
,
const
FFT_SCALAR
&
dz
,
const
int
ord
,
FFT_SCALAR
*
const
*
const
drho_c
)
{
int
k
,
l
;
FFT_SCALAR
r1
,
r2
,
r3
;
for
(
k
=
(
1
-
ord
)
/
2
;
k
<=
ord
/
2
;
k
++
)
{
r1
=
r2
=
r3
=
ZEROF
;
for
(
l
=
ord
-
2
;
l
>=
0
;
l
--
)
{
r1
=
drho_c
[
l
][
k
]
+
r1
*
dx
;
r2
=
drho_c
[
l
][
k
]
+
r2
*
dy
;
r3
=
drho_c
[
l
][
k
]
+
r3
*
dz
;
}
dr1d
[
0
][
k
]
=
r1
;
dr1d
[
1
][
k
]
=
r2
;
dr1d
[
2
][
k
]
=
r3
;
}
}
/* ----------------------------------------------------------------------
find 2 H atoms bonded to O atom i
compute position xM of fictitious charge site for O atom
also return local indices iH1,iH2 of H atoms
------------------------------------------------------------------------- */
void
PPPMDispTIP4POMP
::
find_M_thr
(
int
i
,
int
&
iH1
,
int
&
iH2
,
dbl3_t
&
xM
)
{
iH1
=
atom
->
map
(
atom
->
tag
[
i
]
+
1
);
iH2
=
atom
->
map
(
atom
->
tag
[
i
]
+
2
);
if
(
iH1
==
-
1
||
iH2
==
-
1
)
error
->
one
(
FLERR
,
"TIP4P hydrogen is missing"
);
if
(
atom
->
type
[
iH1
]
!=
typeH
||
atom
->
type
[
iH2
]
!=
typeH
)
error
->
one
(
FLERR
,
"TIP4P hydrogen has incorrect atom type"
);
// set iH1,iH2 to index of closest image to O
iH1
=
domain
->
closest_image
(
i
,
iH1
);
iH2
=
domain
->
closest_image
(
i
,
iH2
);
const
dbl3_t
*
_noalias
const
x
=
(
dbl3_t
*
)
atom
->
x
[
0
];
double
delx1
=
x
[
iH1
].
x
-
x
[
i
].
x
;
double
dely1
=
x
[
iH1
].
y
-
x
[
i
].
y
;
double
delz1
=
x
[
iH1
].
z
-
x
[
i
].
z
;
double
delx2
=
x
[
iH2
].
x
-
x
[
i
].
x
;
double
dely2
=
x
[
iH2
].
y
-
x
[
i
].
y
;
double
delz2
=
x
[
iH2
].
z
-
x
[
i
].
z
;
xM
.
x
=
x
[
i
].
x
+
alpha
*
0.5
*
(
delx1
+
delx2
);
xM
.
y
=
x
[
i
].
y
+
alpha
*
0.5
*
(
dely1
+
dely2
);
xM
.
z
=
x
[
i
].
z
+
alpha
*
0.5
*
(
delz1
+
delz2
);
}
Event Timeline
Log In to Comment