Page MenuHomec4science

pppm_omp.cpp
No OneTemporary

File Metadata

Created
Tue, Nov 12, 03:29

pppm_omp.cpp

/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Axel Kohlmeyer (Temple U)
------------------------------------------------------------------------- */
#include "pppm_omp.h"
#include "atom.h"
#include "comm.h"
#include "domain.h"
#include "error.h"
#include "fix_omp.h"
#include "force.h"
#include "memory.h"
#include "math_const.h"
#include "math_special.h"
#include <string.h>
#include <math.h>
#include "suffix.h"
using namespace LAMMPS_NS;
using namespace MathConst;
using namespace MathSpecial;
#ifdef FFT_SINGLE
#define ZEROF 0.0f
#else
#define ZEROF 0.0
#endif
#define EPS_HOC 1.0e-7
/* ---------------------------------------------------------------------- */
PPPMOMP::PPPMOMP(LAMMPS *lmp, int narg, char **arg) :
PPPM(lmp, narg, arg), ThrOMP(lmp, THR_KSPACE)
{
triclinic_support = 0;
suffix_flag |= Suffix::OMP;
}
/* ----------------------------------------------------------------------
allocate memory that depends on # of K-vectors and order
------------------------------------------------------------------------- */
void PPPMOMP::allocate()
{
PPPM::allocate();
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
#if defined(_OPENMP)
const int tid = omp_get_thread_num();
#else
const int tid = 0;
#endif
ThrData *thr = fix->get_thr(tid);
thr->init_pppm(order,memory);
}
}
/* ----------------------------------------------------------------------
free memory that depends on # of K-vectors and order
------------------------------------------------------------------------- */
void PPPMOMP::deallocate()
{
PPPM::deallocate();
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
#if defined(_OPENMP)
const int tid = omp_get_thread_num();
#else
const int tid = 0;
#endif
ThrData *thr = fix->get_thr(tid);
thr->init_pppm(-order,memory);
}
}
/* ----------------------------------------------------------------------
pre-compute modified (Hockney-Eastwood) Coulomb Green's function
------------------------------------------------------------------------- */
void PPPMOMP::compute_gf_ik()
{
const double * const prd = (triclinic==0) ? domain->prd : domain->prd_lamda;
const double xprd = prd[0];
const double yprd = prd[1];
const double zprd = prd[2];
const double zprd_slab = zprd*slab_volfactor;
const double unitkx = (MY_2PI/xprd);
const double unitky = (MY_2PI/yprd);
const double unitkz = (MY_2PI/zprd_slab);
const int nbx = static_cast<int> ((g_ewald*xprd/(MY_PI*nx_pppm)) *
pow(-log(EPS_HOC),0.25));
const int nby = static_cast<int> ((g_ewald*yprd/(MY_PI*ny_pppm)) *
pow(-log(EPS_HOC),0.25));
const int nbz = static_cast<int> ((g_ewald*zprd_slab/(MY_PI*nz_pppm)) *
pow(-log(EPS_HOC),0.25));
const int numk = nxhi_fft - nxlo_fft + 1;
const int numl = nyhi_fft - nylo_fft + 1;
const int twoorder = 2*order;
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
double snx,sny,snz;
double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
double sum1,dot1,dot2;
double numerator,denominator;
double sqk;
int k,l,m,nx,ny,nz,kper,lper,mper,n,nfrom,nto,tid;
loop_setup_thr(nfrom, nto, tid, nfft, comm->nthreads);
for (n = nfrom; n < nto; ++n) {
m = n / (numl*numk);
l = (n - m*numl*numk) / numk;
k = n - m*numl*numk - l*numk;
m += nzlo_fft;
l += nylo_fft;
k += nxlo_fft;
mper = m - nz_pppm*(2*m/nz_pppm);
snz = square(sin(0.5*unitkz*mper*zprd_slab/nz_pppm));
lper = l - ny_pppm*(2*l/ny_pppm);
sny = square(sin(0.5*unitky*lper*yprd/ny_pppm));
kper = k - nx_pppm*(2*k/nx_pppm);
snx = square(sin(0.5*unitkx*kper*xprd/nx_pppm));
sqk = square(unitkx*kper) + square(unitky*lper) + square(unitkz*mper);
if (sqk != 0.0) {
numerator = 12.5663706/sqk;
denominator = gf_denom(snx,sny,snz);
sum1 = 0.0;
for (nx = -nbx; nx <= nbx; nx++) {
qx = unitkx*(kper+nx_pppm*nx);
sx = exp(-0.25*square(qx/g_ewald));
argx = 0.5*qx*xprd/nx_pppm;
wx = powsinxx(argx,twoorder);
for (ny = -nby; ny <= nby; ny++) {
qy = unitky*(lper+ny_pppm*ny);
sy = exp(-0.25*square(qy/g_ewald));
argy = 0.5*qy*yprd/ny_pppm;
wy = powsinxx(argy,twoorder);
for (nz = -nbz; nz <= nbz; nz++) {
qz = unitkz*(mper+nz_pppm*nz);
sz = exp(-0.25*square(qz/g_ewald));
argz = 0.5*qz*zprd_slab/nz_pppm;
wz = powsinxx(argz,twoorder);
dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz;
dot2 = qx*qx+qy*qy+qz*qz;
sum1 += (dot1/dot2) * sx*sy*sz * wx*wy*wz;
}
}
}
greensfn[n] = numerator*sum1/denominator;
} else greensfn[n] = 0.0;
}
} // end of parallel region
}
/* ----------------------------------------------------------------------
compute optimized Green's function for energy calculation
------------------------------------------------------------------------- */
void PPPMOMP::compute_gf_ad()
{
const double * const prd = (triclinic==0) ? domain->prd : domain->prd_lamda;
const double xprd = prd[0];
const double yprd = prd[1];
const double zprd = prd[2];
const double zprd_slab = zprd*slab_volfactor;
const double unitkx = (MY_2PI/xprd);
const double unitky = (MY_2PI/yprd);
const double unitkz = (MY_2PI/zprd_slab);
const int numk = nxhi_fft - nxlo_fft + 1;
const int numl = nyhi_fft - nylo_fft + 1;
const int twoorder = 2*order;
double sf0=0.0,sf1=0.0,sf2=0.0,sf3=0.0,sf4=0.0,sf5=0.0;
#if defined(_OPENMP)
#pragma omp parallel default(none) reduction(+:sf0,sf1,sf2,sf3,sf4,sf5)
#endif
{
double snx,sny,snz,sqk;
double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
double numerator,denominator;
int k,l,m,kper,lper,mper,n,nfrom,nto,tid;
loop_setup_thr(nfrom, nto, tid, nfft, comm->nthreads);
for (n = nfrom; n < nto; ++n) {
m = n / (numl*numk);
l = (n - m*numl*numk) / numk;
k = n - m*numl*numk - l*numk;
m += nzlo_fft;
l += nylo_fft;
k += nxlo_fft;
mper = m - nz_pppm*(2*m/nz_pppm);
qz = unitkz*mper;
snz = square(sin(0.5*qz*zprd_slab/nz_pppm));
sz = exp(-0.25*square(qz/g_ewald));
argz = 0.5*qz*zprd_slab/nz_pppm;
wz = powsinxx(argz,twoorder);
lper = l - ny_pppm*(2*l/ny_pppm);
qy = unitky*lper;
sny = square(sin(0.5*qy*yprd/ny_pppm));
sy = exp(-0.25*square(qy/g_ewald));
argy = 0.5*qy*yprd/ny_pppm;
wy = powsinxx(argy,twoorder);
kper = k - nx_pppm*(2*k/nx_pppm);
qx = unitkx*kper;
snx = square(sin(0.5*qx*xprd/nx_pppm));
sx = exp(-0.25*square(qx/g_ewald));
argx = 0.5*qx*xprd/nx_pppm;
wx = powsinxx(argx,twoorder);
sqk = qx*qx + qy*qy + qz*qz;
if (sqk != 0.0) {
numerator = MY_4PI/sqk;
denominator = gf_denom(snx,sny,snz);
greensfn[n] = numerator*sx*sy*sz*wx*wy*wz/denominator;
sf0 += sf_precoeff1[n]*greensfn[n];
sf1 += sf_precoeff2[n]*greensfn[n];
sf2 += sf_precoeff3[n]*greensfn[n];
sf3 += sf_precoeff4[n]*greensfn[n];
sf4 += sf_precoeff5[n]*greensfn[n];
sf5 += sf_precoeff6[n]*greensfn[n];
} else {
greensfn[n] = 0.0;
sf0 += sf_precoeff1[n]*greensfn[n];
sf1 += sf_precoeff2[n]*greensfn[n];
sf2 += sf_precoeff3[n]*greensfn[n];
sf3 += sf_precoeff4[n]*greensfn[n];
sf4 += sf_precoeff5[n]*greensfn[n];
sf5 += sf_precoeff6[n]*greensfn[n];
}
}
} // end of paralle region
// compute the coefficients for the self-force correction
double prex, prey, prez, tmp[6];
prex = prey = prez = MY_PI/volume;
prex *= nx_pppm/xprd;
prey *= ny_pppm/yprd;
prez *= nz_pppm/zprd_slab;
tmp[0] = sf0 * prex;
tmp[1] = sf1 * prex*2;
tmp[2] = sf2 * prey;
tmp[3] = sf3 * prey*2;
tmp[4] = sf4 * prez;
tmp[5] = sf5 * prez*2;
// communicate values with other procs
MPI_Allreduce(tmp,sf_coeff,6,MPI_DOUBLE,MPI_SUM,world);
}
/* ----------------------------------------------------------------------
run the regular toplevel compute method from plain PPPM
which will have individual methods replaced by our threaded
versions and then call the obligatory force reduction.
------------------------------------------------------------------------- */
void PPPMOMP::compute(int eflag, int vflag)
{
PPPM::compute(eflag,vflag);
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(eflag,vflag)
#endif
{
#if defined(_OPENMP)
const int tid = omp_get_thread_num();
#else
const int tid = 0;
#endif
ThrData *thr = fix->get_thr(tid);
reduce_thr(this, eflag, vflag, thr);
} // end of omp parallel region
}
/* ----------------------------------------------------------------------
create discretized "density" on section of global grid due to my particles
density(x,y,z) = charge "density" at grid points of my 3d brick
(nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts)
in global grid
------------------------------------------------------------------------- */
void PPPMOMP::make_rho()
{
// clear 3d density array
FFT_SCALAR * _noalias const d = &(density_brick[nzlo_out][nylo_out][nxlo_out]);
memset(d,0,ngrid*sizeof(FFT_SCALAR));
// no local atoms => nothing else to do
const int nlocal = atom->nlocal;
if (nlocal == 0) return;
const int ix = nxhi_out - nxlo_out + 1;
const int iy = nyhi_out - nylo_out + 1;
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
const double * _noalias const q = atom->q;
const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
const int3_t * _noalias const p2g = (int3_t *) part2grid[0];
const double boxlox = boxlo[0];
const double boxloy = boxlo[1];
const double boxloz = boxlo[2];
// determine range of grid points handled by this thread
int i,jfrom,jto,tid;
loop_setup_thr(jfrom,jto,tid,ngrid,comm->nthreads);
// get per thread data
ThrData *thr = fix->get_thr(tid);
FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d());
// loop over my charges, add their contribution to nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// loop over all local atoms for all threads
for (i = 0; i < nlocal; i++) {
const int nx = p2g[i].a;
const int ny = p2g[i].b;
const int nz = p2g[i].t;
// pre-screen whether this atom will ever come within
// reach of the data segement this thread is updating.
if ( ((nz+nlower-nzlo_out)*ix*iy >= jto)
|| ((nz+nupper-nzlo_out+1)*ix*iy < jfrom) ) continue;
const FFT_SCALAR dx = nx+shiftone - (x[i].x-boxlox)*delxinv;
const FFT_SCALAR dy = ny+shiftone - (x[i].y-boxloy)*delyinv;
const FFT_SCALAR dz = nz+shiftone - (x[i].z-boxloz)*delzinv;
compute_rho1d_thr(r1d,dx,dy,dz);
const FFT_SCALAR z0 = delvolinv * q[i];
for (int n = nlower; n <= nupper; ++n) {
const int jn = (nz+n-nzlo_out)*ix*iy;
const FFT_SCALAR y0 = z0*r1d[2][n];
for (int m = nlower; m <= nupper; ++m) {
const int jm = jn+(ny+m-nylo_out)*ix;
const FFT_SCALAR x0 = y0*r1d[1][m];
for (int l = nlower; l <= nupper; ++l) {
const int jl = jm+nx+l-nxlo_out;
// make sure each thread only updates
// "his" elements of the density grid
if (jl >= jto) break;
if (jl < jfrom) continue;
d[jl] += x0*r1d[0][l];
}
}
}
}
}
}
/* ----------------------------------------------------------------------
interpolate from grid to get electric field & force on my particles for ik
------------------------------------------------------------------------- */
void PPPMOMP::fieldforce_ik()
{
// loop over my charges, interpolate electric field from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
// ek = 3 components of E-field on particle
const int nthreads = comm->nthreads;
const int nlocal = atom->nlocal;
// no local atoms => nothing to do
if (nlocal == 0) return;
const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
const double * _noalias const q = atom->q;
const int3_t * _noalias const p2g = (int3_t *) part2grid[0];
const double qqrd2e = force->qqrd2e;
const double boxlox = boxlo[0];
const double boxloy = boxlo[1];
const double boxloz = boxlo[2];
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
FFT_SCALAR x0,y0,z0,ekx,eky,ekz;
int i,ifrom,ito,tid,l,m,n,mx,my,mz;
loop_setup_thr(ifrom,ito,tid,nlocal,nthreads);
// get per thread data
ThrData *thr = fix->get_thr(tid);
dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0];
FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d());
for (i = ifrom; i < ito; ++i) {
const int nx = p2g[i].a;
const int ny = p2g[i].b;
const int nz = p2g[i].t;
const FFT_SCALAR dx = nx+shiftone - (x[i].x-boxlox)*delxinv;
const FFT_SCALAR dy = ny+shiftone - (x[i].y-boxloy)*delyinv;
const FFT_SCALAR dz = nz+shiftone - (x[i].z-boxloz)*delzinv;
compute_rho1d_thr(r1d,dx,dy,dz);
ekx = eky = ekz = ZEROF;
for (n = nlower; n <= nupper; n++) {
mz = n+nz;
z0 = r1d[2][n];
for (m = nlower; m <= nupper; m++) {
my = m+ny;
y0 = z0*r1d[1][m];
for (l = nlower; l <= nupper; l++) {
mx = l+nx;
x0 = y0*r1d[0][l];
ekx -= x0*vdx_brick[mz][my][mx];
eky -= x0*vdy_brick[mz][my][mx];
ekz -= x0*vdz_brick[mz][my][mx];
}
}
}
// convert E-field to force
const double qfactor = qqrd2e * scale * q[i];
f[i].x += qfactor*ekx;
f[i].y += qfactor*eky;
if (slabflag != 2) f[i].z += qfactor*ekz;
}
} // end of parallel region
}
/* ----------------------------------------------------------------------
interpolate from grid to get electric field & force on my particles for ad
------------------------------------------------------------------------- */
void PPPMOMP::fieldforce_ad()
{
const int nthreads = comm->nthreads;
const int nlocal = atom->nlocal;
// no local atoms => nothing to do
if (nlocal == 0) return;
const double *prd = (triclinic == 0) ? domain->prd : domain->prd_lamda;
const double hx_inv = nx_pppm/prd[0];
const double hy_inv = ny_pppm/prd[1];
const double hz_inv = nz_pppm/prd[2];
// loop over my charges, interpolate electric field from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
// ek = 3 components of E-field on particle
const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
const double * _noalias const q = atom->q;
const int3_t * _noalias const p2g = (int3_t *) part2grid[0];
const double qqrd2e = force->qqrd2e;
const double boxlox = boxlo[0];
const double boxloy = boxlo[1];
const double boxloz = boxlo[2];
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
double s1,s2,s3,sf;
FFT_SCALAR ekx,eky,ekz;
int i,ifrom,ito,tid,l,m,n,mx,my,mz;
loop_setup_thr(ifrom,ito,tid,nlocal,nthreads);
// get per thread data
ThrData *thr = fix->get_thr(tid);
dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0];
FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d());
FFT_SCALAR * const * const d1d = static_cast<FFT_SCALAR **>(thr->get_drho1d());
for (i = ifrom; i < ito; ++i) {
const int nx = p2g[i].a;
const int ny = p2g[i].b;
const int nz = p2g[i].t;
const FFT_SCALAR dx = nx+shiftone - (x[i].x-boxlox)*delxinv;
const FFT_SCALAR dy = ny+shiftone - (x[i].y-boxloy)*delyinv;
const FFT_SCALAR dz = nz+shiftone - (x[i].z-boxloz)*delzinv;
compute_rho1d_thr(r1d,dx,dy,dz);
compute_drho1d_thr(d1d,dx,dy,dz);
ekx = eky = ekz = ZEROF;
for (n = nlower; n <= nupper; n++) {
mz = n+nz;
for (m = nlower; m <= nupper; m++) {
my = m+ny;
for (l = nlower; l <= nupper; l++) {
mx = l+nx;
ekx += d1d[0][l]*r1d[1][m]*r1d[2][n]*u_brick[mz][my][mx];
eky += r1d[0][l]*d1d[1][m]*r1d[2][n]*u_brick[mz][my][mx];
ekz += r1d[0][l]*r1d[1][m]*d1d[2][n]*u_brick[mz][my][mx];
}
}
}
ekx *= hx_inv;
eky *= hy_inv;
ekz *= hz_inv;
// convert E-field to force and substract self forces
const double qi = q[i];
const double qfactor = qqrd2e * scale * qi;
s1 = x[i].x*hx_inv;
sf = sf_coeff[0]*sin(MY_2PI*s1);
sf += sf_coeff[1]*sin(MY_4PI*s1);
sf *= 2.0*qi;
f[i].x += qfactor*(ekx - sf);
s2 = x[i].y*hy_inv;
sf = sf_coeff[2]*sin(MY_2PI*s2);
sf += sf_coeff[3]*sin(MY_4PI*s2);
sf *= 2.0*qi;
f[i].y += qfactor*(eky - sf);
s3 = x[i].z*hz_inv;
sf = sf_coeff[4]*sin(MY_2PI*s3);
sf += sf_coeff[5]*sin(MY_4PI*s3);
sf *= 2.0*qi;
if (slabflag != 2) f[i].z += qfactor*(ekz - sf);
}
} // end of parallel region
}
/* ----------------------------------------------------------------------
interpolate from grid to get per-atom energy/virial
------------------------------------------------------------------------- */
void PPPMOMP::fieldforce_peratom()
{
const int nthreads = comm->nthreads;
const int nlocal = atom->nlocal;
// no local atoms => nothing to do
if (nlocal == 0) return;
// loop over my charges, interpolate from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
// (dx,dy,dz) = distance to "lower left" grid pt
// (mx,my,mz) = global coords of moving stencil pt
const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
const double * _noalias const q = atom->q;
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
FFT_SCALAR dx,dy,dz,x0,y0,z0;
FFT_SCALAR u,v0,v1,v2,v3,v4,v5;
int i,ifrom,ito,tid,l,m,n,nx,ny,nz,mx,my,mz;
loop_setup_thr(ifrom,ito,tid,nlocal,nthreads);
// get per thread data
ThrData *thr = fix->get_thr(tid);
FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d());
for (i = ifrom; i < ito; ++i) {
nx = part2grid[i][0];
ny = part2grid[i][1];
nz = part2grid[i][2];
dx = nx+shiftone - (x[i].x-boxlo[0])*delxinv;
dy = ny+shiftone - (x[i].y-boxlo[1])*delyinv;
dz = nz+shiftone - (x[i].z-boxlo[2])*delzinv;
compute_rho1d_thr(r1d,dx,dy,dz);
u = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF;
for (n = nlower; n <= nupper; n++) {
mz = n+nz;
z0 = r1d[2][n];
for (m = nlower; m <= nupper; m++) {
my = m+ny;
y0 = z0*r1d[1][m];
for (l = nlower; l <= nupper; l++) {
mx = l+nx;
x0 = y0*r1d[0][l];
if (eflag_atom) u += x0*u_brick[mz][my][mx];
if (vflag_atom) {
v0 += x0*v0_brick[mz][my][mx];
v1 += x0*v1_brick[mz][my][mx];
v2 += x0*v2_brick[mz][my][mx];
v3 += x0*v3_brick[mz][my][mx];
v4 += x0*v4_brick[mz][my][mx];
v5 += x0*v5_brick[mz][my][mx];
}
}
}
}
const double qi = q[i];
if (eflag_atom) eatom[i] += qi*u;
if (vflag_atom) {
vatom[i][0] += qi*v0;
vatom[i][1] += qi*v1;
vatom[i][2] += qi*v2;
vatom[i][3] += qi*v3;
vatom[i][4] += qi*v4;
vatom[i][5] += qi*v5;
}
}
} // end of parallel region
}
/* ----------------------------------------------------------------------
charge assignment into rho1d
dx,dy,dz = distance of particle from "lower left" grid point
------------------------------------------------------------------------- */
void PPPMOMP::compute_rho1d_thr(FFT_SCALAR * const * const r1d, const FFT_SCALAR &dx,
const FFT_SCALAR &dy, const FFT_SCALAR &dz)
{
int k,l;
FFT_SCALAR r1,r2,r3;
for (k = (1-order)/2; k <= order/2; k++) {
r1 = r2 = r3 = ZEROF;
for (l = order-1; l >= 0; l--) {
r1 = rho_coeff[l][k] + r1*dx;
r2 = rho_coeff[l][k] + r2*dy;
r3 = rho_coeff[l][k] + r3*dz;
}
r1d[0][k] = r1;
r1d[1][k] = r2;
r1d[2][k] = r3;
}
}
/* ----------------------------------------------------------------------
charge assignment into drho1d
dx,dy,dz = distance of particle from "lower left" grid point
------------------------------------------------------------------------- */
void PPPMOMP::compute_drho1d_thr(FFT_SCALAR * const * const d1d, const FFT_SCALAR &dx,
const FFT_SCALAR &dy, const FFT_SCALAR &dz)
{
int k,l;
FFT_SCALAR r1,r2,r3;
for (k = (1-order)/2; k <= order/2; k++) {
r1 = r2 = r3 = ZEROF;
for (l = order-2; l >= 0; l--) {
r1 = drho_coeff[l][k] + r1*dx;
r2 = drho_coeff[l][k] + r2*dy;
r3 = drho_coeff[l][k] + r3*dz;
}
d1d[0][k] = r1;
d1d[1][k] = r2;
d1d[2][k] = r3;
}
}

Event Timeline