Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F70658734
ewald_omp.cpp
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Sun, Jul 7, 15:52
Size
12 KB
Mime Type
text/x-c
Expires
Tue, Jul 9, 15:52 (2 d)
Engine
blob
Format
Raw Data
Handle
18808890
Attached To
rLAMMPS lammps
ewald_omp.cpp
View Options
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors: Roy Pollock (LLNL), Paul Crozier (SNL)
------------------------------------------------------------------------- */
#include "mpi.h"
#include "ewald_omp.h"
#include "atom.h"
#include "comm.h"
#include "force.h"
#include "memory.h"
#include <math.h>
#include "math_const.h"
#include "suffix.h"
using namespace LAMMPS_NS;
using namespace MathConst;
#define SMALL 0.00001
/* ---------------------------------------------------------------------- */
EwaldOMP::EwaldOMP(LAMMPS *lmp, int narg, char **arg)
: Ewald(lmp, narg, arg), ThrOMP(lmp, THR_KSPACE)
{
suffix_flag |= Suffix::OMP;
}
/* ---------------------------------------------------------------------- */
void EwaldOMP::allocate()
{
Ewald::allocate();
// always re-allocate for simplicity.
delete[] sfacrl;
delete[] sfacim;
sfacrl = new double[kmax3d*comm->nthreads];
sfacim = new double[kmax3d*comm->nthreads];
}
/* ----------------------------------------------------------------------
compute the Ewald long-range force, energy, virial
------------------------------------------------------------------------- */
void EwaldOMP::compute(int eflag, int vflag)
{
// set energy/virial flags
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = evflag_atom = eflag_global = vflag_global =
eflag_atom = vflag_atom = 0;
// extend size of per-atom arrays if necessary
if (atom->nlocal > nmax) {
memory->destroy(ek);
memory->destroy3d_offset(cs,-kmax_created);
memory->destroy3d_offset(sn,-kmax_created);
nmax = atom->nmax;
memory->create(ek,nmax,3,"ewald:ek");
memory->create3d_offset(cs,-kmax,kmax,3,nmax,"ewald:cs");
memory->create3d_offset(sn,-kmax,kmax,3,nmax,"ewald:sn");
kmax_created = kmax;
}
// partial structure factors on each processor
// total structure factor by summing over procs
eik_dot_r();
MPI_Allreduce(sfacrl,sfacrl_all,kcount,MPI_DOUBLE,MPI_SUM,world);
MPI_Allreduce(sfacim,sfacim_all,kcount,MPI_DOUBLE,MPI_SUM,world);
// K-space portion of electric field
// double loop over K-vectors and local atoms
double * const * const f = atom->f;
const double * const q = atom->q;
const int nthreads = comm->nthreads;
const int nlocal = atom->nlocal;
const double qscale = force->qqrd2e * scale;
double eng_tmp = 0.0;
double v0,v1,v2,v3,v4,v5;
v0=v1=v2=v3=v4=v5=0.0;
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(eflag,vflag) reduction(+:eng_tmp,v0,v1,v2,v3,v4,v5)
#endif
{
int i,j,k,ifrom,ito,tid;
int kx,ky,kz;
double cypz,sypz,exprl,expim,partial;
loop_setup_thr(ifrom, ito, tid, nlocal, nthreads);
ThrData *thr = fix->get_thr(tid);
ev_setup_thr(eflag, vflag, 0, NULL, NULL, thr);
for (i = ifrom; i < ito; i++) {
ek[i][0] = 0.0;
ek[i][1] = 0.0;
ek[i][2] = 0.0;
}
for (k = 0; k < kcount; k++) {
kx = kxvecs[k];
ky = kyvecs[k];
kz = kzvecs[k];
for (i = ifrom; i < ito; i++) {
cypz = cs[ky][1][i]*cs[kz][2][i] - sn[ky][1][i]*sn[kz][2][i];
sypz = sn[ky][1][i]*cs[kz][2][i] + cs[ky][1][i]*sn[kz][2][i];
exprl = cs[kx][0][i]*cypz - sn[kx][0][i]*sypz;
expim = sn[kx][0][i]*cypz + cs[kx][0][i]*sypz;
partial = expim*sfacrl_all[k] - exprl*sfacim_all[k];
ek[i][0] += partial*eg[k][0];
ek[i][1] += partial*eg[k][1];
ek[i][2] += partial*eg[k][2];
if (evflag_atom) {
const double partial_peratom = exprl*sfacrl_all[k] + expim*sfacim_all[k];
if (eflag_atom) eatom[i] += q[i]*ug[k]*partial_peratom;
if (vflag_atom)
for (j = 0; j < 6; j++)
vatom[i][j] += ug[k]*vg[k][j]*partial_peratom;
}
}
}
// convert E-field to force
for (i = ifrom; i < ito; i++) {
const double fac = qscale*q[i];
f[i][0] += fac*ek[i][0];
f[i][1] += fac*ek[i][1];
f[i][2] += fac*ek[i][2];
}
// global energy
if (eflag_global) {
#if defined(_OPENMP)
#pragma omp for private(k)
#endif
for (k = 0; k < kcount; k++)
eng_tmp += ug[k] * (sfacrl_all[k]*sfacrl_all[k] +
sfacim_all[k]*sfacim_all[k]);
}
// global virial
if (vflag_global) {
#if defined(_OPENMP)
#pragma omp for private(k)
#endif
for (k = 0; k < kcount; k++) {
double uk = ug[k] * (sfacrl_all[k]*sfacrl_all[k] + sfacim_all[k]*sfacim_all[k]);
v0 += uk*vg[k][0];
v1 += uk*vg[k][1];
v2 += uk*vg[k][2];
v3 += uk*vg[k][3];
v4 += uk*vg[k][4];
v5 += uk*vg[k][5];
}
}
// per-atom energy/virial
// energy includes self-energy correction
if (evflag_atom) {
if (eflag_atom) {
for (i = ifrom; i < ito; i++) {
eatom[i] -= g_ewald*q[i]*q[i]/MY_PIS + MY_PI2*q[i]*qsum /
(g_ewald*g_ewald*volume);
eatom[i] *= qscale;
}
}
if (vflag_atom)
for (i = ifrom; i < ito; i++)
for (j = 0; j < 6; j++) vatom[i][j] *= q[i]*qscale;
}
reduce_thr(this, eflag,vflag,thr);
} // end of omp parallel region
if (eflag_global) {
eng_tmp -= g_ewald*qsqsum/MY_PIS +
MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume);
energy = eng_tmp * qscale;
}
if (vflag_global) {
virial[0] = v0 * qscale;
virial[1] = v1 * qscale;
virial[2] = v2 * qscale;
virial[3] = v3 * qscale;
virial[4] = v4 * qscale;
virial[5] = v5 * qscale;
}
if (slabflag) slabcorr();
}
/* ---------------------------------------------------------------------- */
void EwaldOMP::eik_dot_r()
{
const double * const * const x = atom->x;
const double * const q = atom->q;
const int nlocal = atom->nlocal;
const int nthreads = comm->nthreads;
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
{
int i,ifrom,ito,k,l,m,n,ic,tid;
double cstr1,sstr1,cstr2,sstr2,cstr3,sstr3,cstr4,sstr4;
double sqk,clpm,slpm;
loop_setup_thr(ifrom, ito, tid, nlocal, nthreads);
double * const sfacrl_thr = sfacrl + tid*kmax3d;
double * const sfacim_thr = sfacim + tid*kmax3d;
n = 0;
// (k,0,0), (0,l,0), (0,0,m)
for (ic = 0; ic < 3; ic++) {
sqk = unitk[ic]*unitk[ic];
if (sqk <= gsqmx) {
cstr1 = 0.0;
sstr1 = 0.0;
for (i = ifrom; i < ito; i++) {
cs[0][ic][i] = 1.0;
sn[0][ic][i] = 0.0;
cs[1][ic][i] = cos(unitk[ic]*x[i][ic]);
sn[1][ic][i] = sin(unitk[ic]*x[i][ic]);
cs[-1][ic][i] = cs[1][ic][i];
sn[-1][ic][i] = -sn[1][ic][i];
cstr1 += q[i]*cs[1][ic][i];
sstr1 += q[i]*sn[1][ic][i];
}
sfacrl_thr[n] = cstr1;
sfacim_thr[n++] = sstr1;
}
}
for (m = 2; m <= kmax; m++) {
for (ic = 0; ic < 3; ic++) {
sqk = m*unitk[ic] * m*unitk[ic];
if (sqk <= gsqmx) {
cstr1 = 0.0;
sstr1 = 0.0;
for (i = ifrom; i < ito; i++) {
cs[m][ic][i] = cs[m-1][ic][i]*cs[1][ic][i] -
sn[m-1][ic][i]*sn[1][ic][i];
sn[m][ic][i] = sn[m-1][ic][i]*cs[1][ic][i] +
cs[m-1][ic][i]*sn[1][ic][i];
cs[-m][ic][i] = cs[m][ic][i];
sn[-m][ic][i] = -sn[m][ic][i];
cstr1 += q[i]*cs[m][ic][i];
sstr1 += q[i]*sn[m][ic][i];
}
sfacrl_thr[n] = cstr1;
sfacim_thr[n++] = sstr1;
}
}
}
// 1 = (k,l,0), 2 = (k,-l,0)
for (k = 1; k <= kxmax; k++) {
for (l = 1; l <= kymax; l++) {
sqk = (k*unitk[0] * k*unitk[0]) + (l*unitk[1] * l*unitk[1]);
if (sqk <= gsqmx) {
cstr1 = 0.0;
sstr1 = 0.0;
cstr2 = 0.0;
sstr2 = 0.0;
for (i = ifrom; i < ito; i++) {
cstr1 += q[i]*(cs[k][0][i]*cs[l][1][i] - sn[k][0][i]*sn[l][1][i]);
sstr1 += q[i]*(sn[k][0][i]*cs[l][1][i] + cs[k][0][i]*sn[l][1][i]);
cstr2 += q[i]*(cs[k][0][i]*cs[l][1][i] + sn[k][0][i]*sn[l][1][i]);
sstr2 += q[i]*(sn[k][0][i]*cs[l][1][i] - cs[k][0][i]*sn[l][1][i]);
}
sfacrl_thr[n] = cstr1;
sfacim_thr[n++] = sstr1;
sfacrl_thr[n] = cstr2;
sfacim_thr[n++] = sstr2;
}
}
}
// 1 = (0,l,m), 2 = (0,l,-m)
for (l = 1; l <= kymax; l++) {
for (m = 1; m <= kzmax; m++) {
sqk = (l*unitk[1] * l*unitk[1]) + (m*unitk[2] * m*unitk[2]);
if (sqk <= gsqmx) {
cstr1 = 0.0;
sstr1 = 0.0;
cstr2 = 0.0;
sstr2 = 0.0;
for (i = ifrom; i < ito; i++) {
cstr1 += q[i]*(cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i]);
sstr1 += q[i]*(sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i]);
cstr2 += q[i]*(cs[l][1][i]*cs[m][2][i] + sn[l][1][i]*sn[m][2][i]);
sstr2 += q[i]*(sn[l][1][i]*cs[m][2][i] - cs[l][1][i]*sn[m][2][i]);
}
sfacrl_thr[n] = cstr1;
sfacim_thr[n++] = sstr1;
sfacrl_thr[n] = cstr2;
sfacim_thr[n++] = sstr2;
}
}
}
// 1 = (k,0,m), 2 = (k,0,-m)
for (k = 1; k <= kxmax; k++) {
for (m = 1; m <= kzmax; m++) {
sqk = (k*unitk[0] * k*unitk[0]) + (m*unitk[2] * m*unitk[2]);
if (sqk <= gsqmx) {
cstr1 = 0.0;
sstr1 = 0.0;
cstr2 = 0.0;
sstr2 = 0.0;
for (i = ifrom; i < ito; i++) {
cstr1 += q[i]*(cs[k][0][i]*cs[m][2][i] - sn[k][0][i]*sn[m][2][i]);
sstr1 += q[i]*(sn[k][0][i]*cs[m][2][i] + cs[k][0][i]*sn[m][2][i]);
cstr2 += q[i]*(cs[k][0][i]*cs[m][2][i] + sn[k][0][i]*sn[m][2][i]);
sstr2 += q[i]*(sn[k][0][i]*cs[m][2][i] - cs[k][0][i]*sn[m][2][i]);
}
sfacrl_thr[n] = cstr1;
sfacim_thr[n++] = sstr1;
sfacrl_thr[n] = cstr2;
sfacim_thr[n++] = sstr2;
}
}
}
// 1 = (k,l,m), 2 = (k,-l,m), 3 = (k,l,-m), 4 = (k,-l,-m)
for (k = 1; k <= kxmax; k++) {
for (l = 1; l <= kymax; l++) {
for (m = 1; m <= kzmax; m++) {
sqk = (k*unitk[0] * k*unitk[0]) + (l*unitk[1] * l*unitk[1]) +
(m*unitk[2] * m*unitk[2]);
if (sqk <= gsqmx) {
cstr1 = 0.0;
sstr1 = 0.0;
cstr2 = 0.0;
sstr2 = 0.0;
cstr3 = 0.0;
sstr3 = 0.0;
cstr4 = 0.0;
sstr4 = 0.0;
for (i = ifrom; i < ito; i++) {
clpm = cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i];
slpm = sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i];
cstr1 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm);
sstr1 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm);
clpm = cs[l][1][i]*cs[m][2][i] + sn[l][1][i]*sn[m][2][i];
slpm = -sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i];
cstr2 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm);
sstr2 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm);
clpm = cs[l][1][i]*cs[m][2][i] + sn[l][1][i]*sn[m][2][i];
slpm = sn[l][1][i]*cs[m][2][i] - cs[l][1][i]*sn[m][2][i];
cstr3 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm);
sstr3 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm);
clpm = cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i];
slpm = -sn[l][1][i]*cs[m][2][i] - cs[l][1][i]*sn[m][2][i];
cstr4 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm);
sstr4 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm);
}
sfacrl_thr[n] = cstr1;
sfacim_thr[n++] = sstr1;
sfacrl_thr[n] = cstr2;
sfacim_thr[n++] = sstr2;
sfacrl_thr[n] = cstr3;
sfacim_thr[n++] = sstr3;
sfacrl_thr[n] = cstr4;
sfacim_thr[n++] = sstr4;
}
}
}
}
sync_threads();
data_reduce_thr(sfacrl,kmax3d,nthreads,1,tid);
data_reduce_thr(sfacim,kmax3d,nthreads,1,tid);
} // end of parallel region
}
Event Timeline
Log In to Comment