Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F120161700
pppm_omp.cpp
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Wed, Jul 2, 09:19
Size
21 KB
Mime Type
text/x-c
Expires
Fri, Jul 4, 09:19 (2 d)
Engine
blob
Format
Raw Data
Handle
27149941
Attached To
rLAMMPS lammps
pppm_omp.cpp
View Options
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Axel Kohlmeyer (Temple U)
------------------------------------------------------------------------- */
#include "pppm_omp.h"
#include "atom.h"
#include "comm.h"
#include "domain.h"
#include "error.h"
#include "fix_omp.h"
#include "force.h"
#include "memory.h"
#include "math_const.h"
#include "math_special.h"
#include <string.h>
#include <math.h>
#include "suffix.h"
using namespace LAMMPS_NS;
using namespace MathConst;
using namespace MathSpecial;
#ifdef FFT_SINGLE
#define ZEROF 0.0f
#else
#define ZEROF 0.0
#endif
#define EPS_HOC 1.0e-7
/* ---------------------------------------------------------------------- */
PPPMOMP::PPPMOMP(LAMMPS *lmp, int narg, char **arg) :
PPPM(lmp, narg, arg), ThrOMP(lmp, THR_KSPACE)
{
triclinic_support = 0;
suffix_flag |= Suffix::OMP;
}
/* ----------------------------------------------------------------------
allocate memory that depends on # of K-vectors and order
------------------------------------------------------------------------- */
void PPPMOMP::allocate()
{
PPPM::allocate();
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
#if defined(_OPENMP)
const int tid = omp_get_thread_num();
#else
const int tid = 0;
#endif
ThrData *thr = fix->get_thr(tid);
thr->init_pppm(order,memory);
}
}
/* ----------------------------------------------------------------------
free memory that depends on # of K-vectors and order
------------------------------------------------------------------------- */
void PPPMOMP::deallocate()
{
PPPM::deallocate();
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
#if defined(_OPENMP)
const int tid = omp_get_thread_num();
#else
const int tid = 0;
#endif
ThrData *thr = fix->get_thr(tid);
thr->init_pppm(-order,memory);
}
}
/* ----------------------------------------------------------------------
pre-compute modified (Hockney-Eastwood) Coulomb Green's function
------------------------------------------------------------------------- */
void PPPMOMP::compute_gf_ik()
{
const double * const prd = (triclinic==0) ? domain->prd : domain->prd_lamda;
const double xprd = prd[0];
const double yprd = prd[1];
const double zprd = prd[2];
const double zprd_slab = zprd*slab_volfactor;
const double unitkx = (MY_2PI/xprd);
const double unitky = (MY_2PI/yprd);
const double unitkz = (MY_2PI/zprd_slab);
const int nbx = static_cast<int> ((g_ewald*xprd/(MY_PI*nx_pppm)) *
pow(-log(EPS_HOC),0.25));
const int nby = static_cast<int> ((g_ewald*yprd/(MY_PI*ny_pppm)) *
pow(-log(EPS_HOC),0.25));
const int nbz = static_cast<int> ((g_ewald*zprd_slab/(MY_PI*nz_pppm)) *
pow(-log(EPS_HOC),0.25));
const int numk = nxhi_fft - nxlo_fft + 1;
const int numl = nyhi_fft - nylo_fft + 1;
const int twoorder = 2*order;
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
double snx,sny,snz;
double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
double sum1,dot1,dot2;
double numerator,denominator;
double sqk;
int k,l,m,nx,ny,nz,kper,lper,mper,n,nfrom,nto,tid;
loop_setup_thr(nfrom, nto, tid, nfft, comm->nthreads);
for (n = nfrom; n < nto; ++n) {
m = n / (numl*numk);
l = (n - m*numl*numk) / numk;
k = n - m*numl*numk - l*numk;
m += nzlo_fft;
l += nylo_fft;
k += nxlo_fft;
mper = m - nz_pppm*(2*m/nz_pppm);
snz = square(sin(0.5*unitkz*mper*zprd_slab/nz_pppm));
lper = l - ny_pppm*(2*l/ny_pppm);
sny = square(sin(0.5*unitky*lper*yprd/ny_pppm));
kper = k - nx_pppm*(2*k/nx_pppm);
snx = square(sin(0.5*unitkx*kper*xprd/nx_pppm));
sqk = square(unitkx*kper) + square(unitky*lper) + square(unitkz*mper);
if (sqk != 0.0) {
numerator = 12.5663706/sqk;
denominator = gf_denom(snx,sny,snz);
sum1 = 0.0;
for (nx = -nbx; nx <= nbx; nx++) {
qx = unitkx*(kper+nx_pppm*nx);
sx = exp(-0.25*square(qx/g_ewald));
argx = 0.5*qx*xprd/nx_pppm;
wx = powsinxx(argx,twoorder);
for (ny = -nby; ny <= nby; ny++) {
qy = unitky*(lper+ny_pppm*ny);
sy = exp(-0.25*square(qy/g_ewald));
argy = 0.5*qy*yprd/ny_pppm;
wy = powsinxx(argy,twoorder);
for (nz = -nbz; nz <= nbz; nz++) {
qz = unitkz*(mper+nz_pppm*nz);
sz = exp(-0.25*square(qz/g_ewald));
argz = 0.5*qz*zprd_slab/nz_pppm;
wz = powsinxx(argz,twoorder);
dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz;
dot2 = qx*qx+qy*qy+qz*qz;
sum1 += (dot1/dot2) * sx*sy*sz * wx*wy*wz;
}
}
}
greensfn[n] = numerator*sum1/denominator;
} else greensfn[n] = 0.0;
}
} // end of parallel region
}
/* ----------------------------------------------------------------------
compute optimized Green's function for energy calculation
------------------------------------------------------------------------- */
void PPPMOMP::compute_gf_ad()
{
const double * const prd = (triclinic==0) ? domain->prd : domain->prd_lamda;
const double xprd = prd[0];
const double yprd = prd[1];
const double zprd = prd[2];
const double zprd_slab = zprd*slab_volfactor;
const double unitkx = (MY_2PI/xprd);
const double unitky = (MY_2PI/yprd);
const double unitkz = (MY_2PI/zprd_slab);
const int numk = nxhi_fft - nxlo_fft + 1;
const int numl = nyhi_fft - nylo_fft + 1;
const int twoorder = 2*order;
double sf0=0.0,sf1=0.0,sf2=0.0,sf3=0.0,sf4=0.0,sf5=0.0;
#if defined(_OPENMP)
#pragma omp parallel default(none) reduction(+:sf0,sf1,sf2,sf3,sf4,sf5)
#endif
{
double snx,sny,snz,sqk;
double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
double numerator,denominator;
int k,l,m,kper,lper,mper,n,nfrom,nto,tid;
loop_setup_thr(nfrom, nto, tid, nfft, comm->nthreads);
for (n = nfrom; n < nto; ++n) {
m = n / (numl*numk);
l = (n - m*numl*numk) / numk;
k = n - m*numl*numk - l*numk;
m += nzlo_fft;
l += nylo_fft;
k += nxlo_fft;
mper = m - nz_pppm*(2*m/nz_pppm);
qz = unitkz*mper;
snz = square(sin(0.5*qz*zprd_slab/nz_pppm));
sz = exp(-0.25*square(qz/g_ewald));
argz = 0.5*qz*zprd_slab/nz_pppm;
wz = powsinxx(argz,twoorder);
lper = l - ny_pppm*(2*l/ny_pppm);
qy = unitky*lper;
sny = square(sin(0.5*qy*yprd/ny_pppm));
sy = exp(-0.25*square(qy/g_ewald));
argy = 0.5*qy*yprd/ny_pppm;
wy = powsinxx(argy,twoorder);
kper = k - nx_pppm*(2*k/nx_pppm);
qx = unitkx*kper;
snx = square(sin(0.5*qx*xprd/nx_pppm));
sx = exp(-0.25*square(qx/g_ewald));
argx = 0.5*qx*xprd/nx_pppm;
wx = powsinxx(argx,twoorder);
sqk = qx*qx + qy*qy + qz*qz;
if (sqk != 0.0) {
numerator = MY_4PI/sqk;
denominator = gf_denom(snx,sny,snz);
greensfn[n] = numerator*sx*sy*sz*wx*wy*wz/denominator;
sf0 += sf_precoeff1[n]*greensfn[n];
sf1 += sf_precoeff2[n]*greensfn[n];
sf2 += sf_precoeff3[n]*greensfn[n];
sf3 += sf_precoeff4[n]*greensfn[n];
sf4 += sf_precoeff5[n]*greensfn[n];
sf5 += sf_precoeff6[n]*greensfn[n];
} else {
greensfn[n] = 0.0;
sf0 += sf_precoeff1[n]*greensfn[n];
sf1 += sf_precoeff2[n]*greensfn[n];
sf2 += sf_precoeff3[n]*greensfn[n];
sf3 += sf_precoeff4[n]*greensfn[n];
sf4 += sf_precoeff5[n]*greensfn[n];
sf5 += sf_precoeff6[n]*greensfn[n];
}
}
} // end of paralle region
// compute the coefficients for the self-force correction
double prex, prey, prez, tmp[6];
prex = prey = prez = MY_PI/volume;
prex *= nx_pppm/xprd;
prey *= ny_pppm/yprd;
prez *= nz_pppm/zprd_slab;
tmp[0] = sf0 * prex;
tmp[1] = sf1 * prex*2;
tmp[2] = sf2 * prey;
tmp[3] = sf3 * prey*2;
tmp[4] = sf4 * prez;
tmp[5] = sf5 * prez*2;
// communicate values with other procs
MPI_Allreduce(tmp,sf_coeff,6,MPI_DOUBLE,MPI_SUM,world);
}
/* ----------------------------------------------------------------------
run the regular toplevel compute method from plain PPPM
which will have individual methods replaced by our threaded
versions and then call the obligatory force reduction.
------------------------------------------------------------------------- */
void PPPMOMP::compute(int eflag, int vflag)
{
PPPM::compute(eflag,vflag);
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(eflag,vflag)
#endif
{
#if defined(_OPENMP)
const int tid = omp_get_thread_num();
#else
const int tid = 0;
#endif
ThrData *thr = fix->get_thr(tid);
reduce_thr(this, eflag, vflag, thr);
} // end of omp parallel region
}
/* ----------------------------------------------------------------------
create discretized "density" on section of global grid due to my particles
density(x,y,z) = charge "density" at grid points of my 3d brick
(nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
in global grid
------------------------------------------------------------------------- */
void PPPMOMP::make_rho()
{
// clear 3d density array
FFT_SCALAR * _noalias const d = &(density_brick[nzlo_out][nylo_out][nxlo_out]);
memset(d,0,ngrid*sizeof(FFT_SCALAR));
// no local atoms => nothing else to do
const int nlocal = atom->nlocal;
if (nlocal == 0) return;
const int ix = nxhi_out - nxlo_out + 1;
const int iy = nyhi_out - nylo_out + 1;
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
const double * _noalias const q = atom->q;
const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
const int3_t * _noalias const p2g = (int3_t *) part2grid[0];
const double boxlox = boxlo[0];
const double boxloy = boxlo[1];
const double boxloz = boxlo[2];
// determine range of grid points handled by this thread
int i,jfrom,jto,tid;
loop_setup_thr(jfrom,jto,tid,ngrid,comm->nthreads);
// get per thread data
ThrData *thr = fix->get_thr(tid);
FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d());
// loop over my charges, add their contribution to nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// loop over all local atoms for all threads
for (i = 0; i < nlocal; i++) {
const int nx = p2g[i].a;
const int ny = p2g[i].b;
const int nz = p2g[i].t;
// pre-screen whether this atom will ever come within
// reach of the data segement this thread is updating.
if ( ((nz+nlower-nzlo_out)*ix*iy >= jto)
|| ((nz+nupper-nzlo_out+1)*ix*iy < jfrom) ) continue;
const FFT_SCALAR dx = nx+shiftone - (x[i].x-boxlox)*delxinv;
const FFT_SCALAR dy = ny+shiftone - (x[i].y-boxloy)*delyinv;
const FFT_SCALAR dz = nz+shiftone - (x[i].z-boxloz)*delzinv;
compute_rho1d_thr(r1d,dx,dy,dz);
const FFT_SCALAR z0 = delvolinv * q[i];
for (int n = nlower; n <= nupper; ++n) {
const int jn = (nz+n-nzlo_out)*ix*iy;
const FFT_SCALAR y0 = z0*r1d[2][n];
for (int m = nlower; m <= nupper; ++m) {
const int jm = jn+(ny+m-nylo_out)*ix;
const FFT_SCALAR x0 = y0*r1d[1][m];
for (int l = nlower; l <= nupper; ++l) {
const int jl = jm+nx+l-nxlo_out;
// make sure each thread only updates
// "his" elements of the density grid
if (jl >= jto) break;
if (jl < jfrom) continue;
d[jl] += x0*r1d[0][l];
}
}
}
}
}
}
/* ----------------------------------------------------------------------
interpolate from grid to get electric field & force on my particles for ik
------------------------------------------------------------------------- */
void PPPMOMP::fieldforce_ik()
{
// loop over my charges, interpolate electric field from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
// ek = 3 components of E-field on particle
const int nthreads = comm->nthreads;
const int nlocal = atom->nlocal;
// no local atoms => nothing to do
if (nlocal == 0) return;
const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
const double * _noalias const q = atom->q;
const int3_t * _noalias const p2g = (int3_t *) part2grid[0];
const double qqrd2e = force->qqrd2e;
const double boxlox = boxlo[0];
const double boxloy = boxlo[1];
const double boxloz = boxlo[2];
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
FFT_SCALAR x0,y0,z0,ekx,eky,ekz;
int i,ifrom,ito,tid,l,m,n,mx,my,mz;
loop_setup_thr(ifrom,ito,tid,nlocal,nthreads);
// get per thread data
ThrData *thr = fix->get_thr(tid);
dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0];
FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d());
for (i = ifrom; i < ito; ++i) {
const int nx = p2g[i].a;
const int ny = p2g[i].b;
const int nz = p2g[i].t;
const FFT_SCALAR dx = nx+shiftone - (x[i].x-boxlox)*delxinv;
const FFT_SCALAR dy = ny+shiftone - (x[i].y-boxloy)*delyinv;
const FFT_SCALAR dz = nz+shiftone - (x[i].z-boxloz)*delzinv;
compute_rho1d_thr(r1d,dx,dy,dz);
ekx = eky = ekz = ZEROF;
for (n = nlower; n <= nupper; n++) {
mz = n+nz;
z0 = r1d[2][n];
for (m = nlower; m <= nupper; m++) {
my = m+ny;
y0 = z0*r1d[1][m];
for (l = nlower; l <= nupper; l++) {
mx = l+nx;
x0 = y0*r1d[0][l];
ekx -= x0*vdx_brick[mz][my][mx];
eky -= x0*vdy_brick[mz][my][mx];
ekz -= x0*vdz_brick[mz][my][mx];
}
}
}
// convert E-field to force
const double qfactor = qqrd2e * scale * q[i];
f[i].x += qfactor*ekx;
f[i].y += qfactor*eky;
if (slabflag != 2) f[i].z += qfactor*ekz;
}
} // end of parallel region
}
/* ----------------------------------------------------------------------
interpolate from grid to get electric field & force on my particles for ad
------------------------------------------------------------------------- */
void PPPMOMP::fieldforce_ad()
{
const int nthreads = comm->nthreads;
const int nlocal = atom->nlocal;
// no local atoms => nothing to do
if (nlocal == 0) return;
const double *prd = (triclinic == 0) ? domain->prd : domain->prd_lamda;
const double hx_inv = nx_pppm/prd[0];
const double hy_inv = ny_pppm/prd[1];
const double hz_inv = nz_pppm/prd[2];
// loop over my charges, interpolate electric field from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
// ek = 3 components of E-field on particle
const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
const double * _noalias const q = atom->q;
const int3_t * _noalias const p2g = (int3_t *) part2grid[0];
const double qqrd2e = force->qqrd2e;
const double boxlox = boxlo[0];
const double boxloy = boxlo[1];
const double boxloz = boxlo[2];
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
double s1,s2,s3,sf;
FFT_SCALAR ekx,eky,ekz;
int i,ifrom,ito,tid,l,m,n,mx,my,mz;
loop_setup_thr(ifrom,ito,tid,nlocal,nthreads);
// get per thread data
ThrData *thr = fix->get_thr(tid);
dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0];
FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d());
FFT_SCALAR * const * const d1d = static_cast<FFT_SCALAR **>(thr->get_drho1d());
for (i = ifrom; i < ito; ++i) {
const int nx = p2g[i].a;
const int ny = p2g[i].b;
const int nz = p2g[i].t;
const FFT_SCALAR dx = nx+shiftone - (x[i].x-boxlox)*delxinv;
const FFT_SCALAR dy = ny+shiftone - (x[i].y-boxloy)*delyinv;
const FFT_SCALAR dz = nz+shiftone - (x[i].z-boxloz)*delzinv;
compute_rho1d_thr(r1d,dx,dy,dz);
compute_drho1d_thr(d1d,dx,dy,dz);
ekx = eky = ekz = ZEROF;
for (n = nlower; n <= nupper; n++) {
mz = n+nz;
for (m = nlower; m <= nupper; m++) {
my = m+ny;
for (l = nlower; l <= nupper; l++) {
mx = l+nx;
ekx += d1d[0][l]*r1d[1][m]*r1d[2][n]*u_brick[mz][my][mx];
eky += r1d[0][l]*d1d[1][m]*r1d[2][n]*u_brick[mz][my][mx];
ekz += r1d[0][l]*r1d[1][m]*d1d[2][n]*u_brick[mz][my][mx];
}
}
}
ekx *= hx_inv;
eky *= hy_inv;
ekz *= hz_inv;
// convert E-field to force and substract self forces
const double qi = q[i];
const double qfactor = qqrd2e * scale * qi;
s1 = x[i].x*hx_inv;
sf = sf_coeff[0]*sin(MY_2PI*s1);
sf += sf_coeff[1]*sin(MY_4PI*s1);
sf *= 2.0*qi;
f[i].x += qfactor*(ekx - sf);
s2 = x[i].y*hy_inv;
sf = sf_coeff[2]*sin(MY_2PI*s2);
sf += sf_coeff[3]*sin(MY_4PI*s2);
sf *= 2.0*qi;
f[i].y += qfactor*(eky - sf);
s3 = x[i].z*hz_inv;
sf = sf_coeff[4]*sin(MY_2PI*s3);
sf += sf_coeff[5]*sin(MY_4PI*s3);
sf *= 2.0*qi;
if (slabflag != 2) f[i].z += qfactor*(ekz - sf);
}
} // end of parallel region
}
/* ----------------------------------------------------------------------
interpolate from grid to get per-atom energy/virial
------------------------------------------------------------------------- */
void PPPMOMP::fieldforce_peratom()
{
const int nthreads = comm->nthreads;
const int nlocal = atom->nlocal;
// no local atoms => nothing to do
if (nlocal == 0) return;
// loop over my charges, interpolate from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
const double * _noalias const q = atom->q;
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
FFT_SCALAR dx,dy,dz,x0,y0,z0;
FFT_SCALAR u,v0,v1,v2,v3,v4,v5;
int i,ifrom,ito,tid,l,m,n,nx,ny,nz,mx,my,mz;
loop_setup_thr(ifrom,ito,tid,nlocal,nthreads);
// get per thread data
ThrData *thr = fix->get_thr(tid);
FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d());
for (i = ifrom; i < ito; ++i) {
nx = part2grid[i][0];
ny = part2grid[i][1];
nz = part2grid[i][2];
dx = nx+shiftone - (x[i].x-boxlo[0])*delxinv;
dy = ny+shiftone - (x[i].y-boxlo[1])*delyinv;
dz = nz+shiftone - (x[i].z-boxlo[2])*delzinv;
compute_rho1d_thr(r1d,dx,dy,dz);
u = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF;
for (n = nlower; n <= nupper; n++) {
mz = n+nz;
z0 = r1d[2][n];
for (m = nlower; m <= nupper; m++) {
my = m+ny;
y0 = z0*r1d[1][m];
for (l = nlower; l <= nupper; l++) {
mx = l+nx;
x0 = y0*r1d[0][l];
if (eflag_atom) u += x0*u_brick[mz][my][mx];
if (vflag_atom) {
v0 += x0*v0_brick[mz][my][mx];
v1 += x0*v1_brick[mz][my][mx];
v2 += x0*v2_brick[mz][my][mx];
v3 += x0*v3_brick[mz][my][mx];
v4 += x0*v4_brick[mz][my][mx];
v5 += x0*v5_brick[mz][my][mx];
}
}
}
}
const double qi = q[i];
if (eflag_atom) eatom[i] += qi*u;
if (vflag_atom) {
vatom[i][0] += qi*v0;
vatom[i][1] += qi*v1;
vatom[i][2] += qi*v2;
vatom[i][3] += qi*v3;
vatom[i][4] += qi*v4;
vatom[i][5] += qi*v5;
}
}
} // end of parallel region
}
/* ----------------------------------------------------------------------
charge assignment into rho1d
dx,dy,dz = distance of particle from "lower left" grid point
------------------------------------------------------------------------- */
void PPPMOMP::compute_rho1d_thr(FFT_SCALAR * const * const r1d, const FFT_SCALAR &dx,
const FFT_SCALAR &dy, const FFT_SCALAR &dz)
{
int k,l;
FFT_SCALAR r1,r2,r3;
for (k = (1-order)/2; k <= order/2; k++) {
r1 = r2 = r3 = ZEROF;
for (l = order-1; l >= 0; l--) {
r1 = rho_coeff[l][k] + r1*dx;
r2 = rho_coeff[l][k] + r2*dy;
r3 = rho_coeff[l][k] + r3*dz;
}
r1d[0][k] = r1;
r1d[1][k] = r2;
r1d[2][k] = r3;
}
}
/* ----------------------------------------------------------------------
charge assignment into drho1d
dx,dy,dz = distance of particle from "lower left" grid point
------------------------------------------------------------------------- */
void PPPMOMP::compute_drho1d_thr(FFT_SCALAR * const * const d1d, const FFT_SCALAR &dx,
const FFT_SCALAR &dy, const FFT_SCALAR &dz)
{
int k,l;
FFT_SCALAR r1,r2,r3;
for (k = (1-order)/2; k <= order/2; k++) {
r1 = r2 = r3 = ZEROF;
for (l = order-2; l >= 0; l--) {
r1 = drho_coeff[l][k] + r1*dx;
r2 = drho_coeff[l][k] + r2*dy;
r3 = drho_coeff[l][k] + r3*dz;
}
d1d[0][k] = r1;
d1d[1][k] = r2;
d1d[2][k] = r3;
}
}
Event Timeline
Log In to Comment