diff --git a/src/KSPACE/ewald.cpp b/src/KSPACE/ewald.cpp index 4ec4eed9e..af4803f57 100644 --- a/src/KSPACE/ewald.cpp +++ b/src/KSPACE/ewald.cpp @@ -1,967 +1,964 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing authors: Roy Pollock (LLNL), Paul Crozier (SNL) per-atom energy/virial added by German Samolyuk (ORNL), Stan Moore (BYU) ------------------------------------------------------------------------- */ #include "mpi.h" #include "stdlib.h" #include "stdio.h" #include "string.h" #include "math.h" #include "ewald.h" #include "atom.h" #include "comm.h" #include "force.h" #include "pair.h" #include "domain.h" #include "math_const.h" #include "memory.h" #include "error.h" using namespace LAMMPS_NS; using namespace MathConst; #define SMALL 0.00001 /* ---------------------------------------------------------------------- */ Ewald::Ewald(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg) { if (narg != 1) error->all(FLERR,"Illegal kspace_style ewald command"); accuracy_relative = atof(arg[0]); kmax = 0; kxvecs = kyvecs = kzvecs = NULL; ug = NULL; eg = vg = NULL; sfacrl = sfacim = sfacrl_all = sfacim_all = NULL; nmax = 0; ek = NULL; cs = sn = NULL; kcount = 0; } /* ---------------------------------------------------------------------- free all memory ------------------------------------------------------------------------- */ Ewald::~Ewald() { deallocate(); memory->destroy(ek); memory->destroy3d_offset(cs,-kmax_created); memory->destroy3d_offset(sn,-kmax_created); } /* ---------------------------------------------------------------------- */ void Ewald::init() { if (comm->me == 0) { if (screen) fprintf(screen,"Ewald initialization ...\n"); if (logfile) fprintf(logfile,"Ewald initialization ...\n"); } // error check if (domain->triclinic) error->all(FLERR,"Cannot use Ewald with triclinic box"); if (domain->dimension == 2) error->all(FLERR,"Cannot use Ewald with 2d simulation"); if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q"); if (slabflag == 0 && domain->nonperiodic > 0) error->all(FLERR,"Cannot use nonperiodic boundaries with Ewald"); if (slabflag == 1) { if (domain->xperiodic != 1 || domain->yperiodic != 1 || domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1) error->all(FLERR,"Incorrect boundaries with slab Ewald"); } // extract short-range Coulombic cutoff from pair style scale = 1.0; if (force->pair == NULL) error->all(FLERR,"KSpace style is incompatible with Pair style"); int itmp; double *p_cutoff = (double *) force->pair->extract("cut_coul",itmp); if (p_cutoff == NULL) error->all(FLERR,"KSpace style is incompatible with Pair style"); double cutoff = *p_cutoff; qsum = qsqsum = 0.0; for (int i = 0; i < atom->nlocal; i++) { qsum += atom->q[i]; qsqsum += atom->q[i]*atom->q[i]; } double tmp; MPI_Allreduce(&qsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world); qsum = tmp; MPI_Allreduce(&qsqsum,&tmp,1,MPI_DOUBLE,MPI_SUM,world); qsqsum = tmp; if (qsqsum == 0.0) error->all(FLERR,"Cannot use kspace solver on system with no charge"); if (fabs(qsum) > SMALL && comm->me == 0) { char str[128]; sprintf(str,"System is not charge neutral, net charge = %g",qsum); error->warning(FLERR,str); } // set accuracy (force units) from accuracy_relative or accuracy_absolute if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute; else accuracy = accuracy_relative * two_charge_force; // setup K-space resolution q2 = qsqsum * force->qqrd2e / force->dielectric; bigint natoms = atom->natoms; // use xprd,yprd,zprd even if triclinic so grid size is the same // adjust z dimension for 2d slab Ewald // 3d Ewald just uses zprd since slab_volfactor = 1.0 double xprd = domain->xprd; double yprd = domain->yprd; double zprd = domain->zprd; double zprd_slab = zprd*slab_volfactor; // make initial g_ewald estimate // based on desired accuracy and real space cutoff // fluid-occupied volume used to estimate real-space error // zprd used rather than zprd_slab if (!gewaldflag) g_ewald = sqrt(-log(accuracy*sqrt(natoms*cutoff*xprd*yprd*zprd) / (2.0*q2))) / cutoff; // setup Ewald coefficients so can print stats setup(); - // final RMS accuracy + // final RMS accuracy double lprx = rms(kxmax,xprd,natoms,q2); double lpry = rms(kymax,yprd,natoms,q2); double lprz = rms(kzmax,zprd_slab,natoms,q2); double lpr = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0); double spr = 2.0*q2 * exp(-g_ewald*g_ewald*cutoff*cutoff) / sqrt(natoms*cutoff*xprd*yprd*zprd_slab); - + // stats if (comm->me == 0) { if (screen) { fprintf(screen," G vector (1/distance) = %g\n",g_ewald); fprintf(screen," estimated absolute RMS force accuracy = %g\n", MAX(lpr,spr)); fprintf(screen," estimated relative force accuracy = %g\n", MAX(lpr,spr)/two_charge_force); fprintf(screen," KSpace vectors: actual max1d max3d = %d %d %d\n", kcount,kmax,kmax3d); } if (logfile) { fprintf(logfile," G vector (1/distnace) = %g\n",g_ewald); fprintf(logfile," estimated absolute RMS force accuracy = %g\n", MAX(lpr,spr)); fprintf(logfile," estimated relative force accuracy = %g\n", MAX(lpr,spr)/two_charge_force); fprintf(logfile," KSpace vectors: actual max1d max3d = %d %d %d\n", kcount,kmax,kmax3d); } } } /* ---------------------------------------------------------------------- adjust Ewald coeffs, called initially and whenever volume has changed ------------------------------------------------------------------------- */ void Ewald::setup() { // volume-dependent factors double xprd = domain->xprd; double yprd = domain->yprd; double zprd = domain->zprd; // adjustment of z dimension for 2d slab Ewald // 3d Ewald just uses zprd since slab_volfactor = 1.0 double zprd_slab = zprd*slab_volfactor; volume = xprd * yprd * zprd_slab; unitk[0] = 2.0*MY_PI/xprd; unitk[1] = 2.0*MY_PI/yprd; unitk[2] = 2.0*MY_PI/zprd_slab; // determine kmax // function of current box size, accuracy, G_ewald (short-range cutoff) bigint natoms = atom->natoms; double err; kxmax = 1; kymax = 1; kzmax = 1; err = rms(kxmax,xprd,natoms,q2); while (err > accuracy) { kxmax++; err = rms(kxmax,xprd,natoms,q2); } err = rms(kymax,yprd,natoms,q2); while (err > accuracy) { kymax++; err = rms(kymax,yprd,natoms,q2); } err = rms(kzmax,zprd_slab,natoms,q2); while (err > accuracy) { kzmax++; err = rms(kzmax,zprd_slab,natoms,q2); } int kmax_old = kmax; kmax = MAX(kxmax,kymax); kmax = MAX(kmax,kzmax); kmax3d = 4*kmax*kmax*kmax + 6*kmax*kmax + 3*kmax; double gsqxmx = unitk[0]*unitk[0]*kxmax*kxmax; double gsqymx = unitk[1]*unitk[1]*kymax*kymax; double gsqzmx = unitk[2]*unitk[2]*kzmax*kzmax; gsqmx = MAX(gsqxmx,gsqymx); gsqmx = MAX(gsqmx,gsqzmx); // if size has grown, reallocate k-dependent and nlocal-dependent arrays if (kmax > kmax_old) { deallocate(); allocate(); memory->destroy(ek); memory->destroy3d_offset(cs,-kmax_created); memory->destroy3d_offset(sn,-kmax_created); nmax = atom->nmax; memory->create(ek,nmax,3,"ewald:ek"); memory->create3d_offset(cs,-kmax,kmax,3,nmax,"ewald:cs"); memory->create3d_offset(sn,-kmax,kmax,3,nmax,"ewald:sn"); kmax_created = kmax; } // pre-compute Ewald coefficients coeffs(); } /* ---------------------------------------------------------------------- compute RMS accuracy for a dimension ------------------------------------------------------------------------- */ double Ewald::rms(int km, double prd, bigint natoms, double q2) { double value = 2.0*q2*g_ewald/prd * sqrt(1.0/(MY_PI*km*natoms)) * exp(-MY_PI*MY_PI*km*km/(g_ewald*g_ewald*prd*prd)); return value; } /* ---------------------------------------------------------------------- compute the Ewald long-range force, energy, virial ------------------------------------------------------------------------- */ void Ewald::compute(int eflag, int vflag) { int i,j,k; // set energy/virial flags if (eflag || vflag) ev_setup(eflag,vflag); else evflag = evflag_atom = eflag_global = vflag_global = eflag_atom = vflag_atom = 0; // extend size of per-atom arrays if necessary if (atom->nlocal > nmax) { memory->destroy(ek); memory->destroy3d_offset(cs,-kmax_created); memory->destroy3d_offset(sn,-kmax_created); nmax = atom->nmax; memory->create(ek,nmax,3,"ewald:ek"); memory->create3d_offset(cs,-kmax,kmax,3,nmax,"ewald:cs"); memory->create3d_offset(sn,-kmax,kmax,3,nmax,"ewald:sn"); kmax_created = kmax; } // partial structure factors on each processor // total structure factor by summing over procs eik_dot_r(); MPI_Allreduce(sfacrl,sfacrl_all,kcount,MPI_DOUBLE,MPI_SUM,world); MPI_Allreduce(sfacim,sfacim_all,kcount,MPI_DOUBLE,MPI_SUM,world); // K-space portion of electric field // double loop over K-vectors and local atoms // perform per-atom calculations if needed double **f = atom->f; double *q = atom->q; int nlocal = atom->nlocal; int kx,ky,kz; double cypz,sypz,exprl,expim,partial,partial_peratom; for (i = 0; i < nlocal; i++) { ek[i][0] = 0.0; ek[i][1] = 0.0; ek[i][2] = 0.0; } for (k = 0; k < kcount; k++) { kx = kxvecs[k]; ky = kyvecs[k]; kz = kzvecs[k]; for (i = 0; i < nlocal; i++) { cypz = cs[ky][1][i]*cs[kz][2][i] - sn[ky][1][i]*sn[kz][2][i]; sypz = sn[ky][1][i]*cs[kz][2][i] + cs[ky][1][i]*sn[kz][2][i]; exprl = cs[kx][0][i]*cypz - sn[kx][0][i]*sypz; expim = sn[kx][0][i]*cypz + cs[kx][0][i]*sypz; partial = expim*sfacrl_all[k] - exprl*sfacim_all[k]; ek[i][0] += partial*eg[k][0]; ek[i][1] += partial*eg[k][1]; ek[i][2] += partial*eg[k][2]; if (evflag_atom) { partial_peratom = exprl*sfacrl_all[k] + expim*sfacim_all[k]; if (eflag_atom) eatom[i] += q[i]*ug[k]*partial_peratom; if (vflag_atom) for (j = 0; j < 6; j++) vatom[i][j] += ug[k]*vg[k][j]*partial_peratom; } } } // convert E-field to force const double qscale = force->qqrd2e * scale; for (i = 0; i < nlocal; i++) { f[i][0] += qscale * q[i]*ek[i][0]; f[i][1] += qscale * q[i]*ek[i][1]; f[i][2] += qscale * q[i]*ek[i][2]; } // global energy if (eflag_global) { for (k = 0; k < kcount; k++) energy += ug[k] * (sfacrl_all[k]*sfacrl_all[k] + sfacim_all[k]*sfacim_all[k]); energy -= g_ewald*qsqsum/MY_PIS + MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume); energy *= qscale; } // global virial if (vflag_global) { double uk; for (k = 0; k < kcount; k++) { uk = ug[k] * (sfacrl_all[k]*sfacrl_all[k] + sfacim_all[k]*sfacim_all[k]); for (j = 0; j < 6; j++) virial[j] += uk*vg[k][j]; } for (j = 0; j < 6; j++) virial[j] *= qscale; } // per-atom energy/virial // energy includes self-energy correction if (evflag_atom) { if (eflag_atom) { for (i = 0; i < nlocal; i++) { eatom[i] -= g_ewald*q[i]*q[i]/MY_PIS + MY_PI2*q[i]*qsum / (g_ewald*g_ewald*volume); eatom[i] *= qscale; } } if (vflag_atom) for (i = 0; i < nlocal; i++) for (j = 0; j < 6; j++) vatom[i][j] *= q[i]*qscale; } // 2d slab correction if (slabflag) slabcorr(); } /* ---------------------------------------------------------------------- */ void Ewald::eik_dot_r() { int i,k,l,m,n,ic; double cstr1,sstr1,cstr2,sstr2,cstr3,sstr3,cstr4,sstr4; double sqk,clpm,slpm; double **x = atom->x; double *q = atom->q; int nlocal = atom->nlocal; n = 0; // (k,0,0), (0,l,0), (0,0,m) for (ic = 0; ic < 3; ic++) { sqk = unitk[ic]*unitk[ic]; if (sqk <= gsqmx) { cstr1 = 0.0; sstr1 = 0.0; for (i = 0; i < nlocal; i++) { cs[0][ic][i] = 1.0; sn[0][ic][i] = 0.0; cs[1][ic][i] = cos(unitk[ic]*x[i][ic]); sn[1][ic][i] = sin(unitk[ic]*x[i][ic]); cs[-1][ic][i] = cs[1][ic][i]; sn[-1][ic][i] = -sn[1][ic][i]; cstr1 += q[i]*cs[1][ic][i]; sstr1 += q[i]*sn[1][ic][i]; } sfacrl[n] = cstr1; sfacim[n++] = sstr1; } } for (m = 2; m <= kmax; m++) { for (ic = 0; ic < 3; ic++) { sqk = m*unitk[ic] * m*unitk[ic]; if (sqk <= gsqmx) { cstr1 = 0.0; sstr1 = 0.0; for (i = 0; i < nlocal; i++) { cs[m][ic][i] = cs[m-1][ic][i]*cs[1][ic][i] - sn[m-1][ic][i]*sn[1][ic][i]; sn[m][ic][i] = sn[m-1][ic][i]*cs[1][ic][i] + cs[m-1][ic][i]*sn[1][ic][i]; cs[-m][ic][i] = cs[m][ic][i]; sn[-m][ic][i] = -sn[m][ic][i]; cstr1 += q[i]*cs[m][ic][i]; sstr1 += q[i]*sn[m][ic][i]; } sfacrl[n] = cstr1; sfacim[n++] = sstr1; } } } // 1 = (k,l,0), 2 = (k,-l,0) for (k = 1; k <= kxmax; k++) { for (l = 1; l <= kymax; l++) { sqk = (k*unitk[0] * k*unitk[0]) + (l*unitk[1] * l*unitk[1]); if (sqk <= gsqmx) { cstr1 = 0.0; sstr1 = 0.0; cstr2 = 0.0; sstr2 = 0.0; for (i = 0; i < nlocal; i++) { cstr1 += q[i]*(cs[k][0][i]*cs[l][1][i] - sn[k][0][i]*sn[l][1][i]); sstr1 += q[i]*(sn[k][0][i]*cs[l][1][i] + cs[k][0][i]*sn[l][1][i]); cstr2 += q[i]*(cs[k][0][i]*cs[l][1][i] + sn[k][0][i]*sn[l][1][i]); sstr2 += q[i]*(sn[k][0][i]*cs[l][1][i] - cs[k][0][i]*sn[l][1][i]); } sfacrl[n] = cstr1; sfacim[n++] = sstr1; sfacrl[n] = cstr2; sfacim[n++] = sstr2; } } } // 1 = (0,l,m), 2 = (0,l,-m) for (l = 1; l <= kymax; l++) { for (m = 1; m <= kzmax; m++) { sqk = (l*unitk[1] * l*unitk[1]) + (m*unitk[2] * m*unitk[2]); if (sqk <= gsqmx) { cstr1 = 0.0; sstr1 = 0.0; cstr2 = 0.0; sstr2 = 0.0; for (i = 0; i < nlocal; i++) { cstr1 += q[i]*(cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i]); sstr1 += q[i]*(sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i]); cstr2 += q[i]*(cs[l][1][i]*cs[m][2][i] + sn[l][1][i]*sn[m][2][i]); sstr2 += q[i]*(sn[l][1][i]*cs[m][2][i] - cs[l][1][i]*sn[m][2][i]); } sfacrl[n] = cstr1; sfacim[n++] = sstr1; sfacrl[n] = cstr2; sfacim[n++] = sstr2; } } } // 1 = (k,0,m), 2 = (k,0,-m) for (k = 1; k <= kxmax; k++) { for (m = 1; m <= kzmax; m++) { sqk = (k*unitk[0] * k*unitk[0]) + (m*unitk[2] * m*unitk[2]); if (sqk <= gsqmx) { cstr1 = 0.0; sstr1 = 0.0; cstr2 = 0.0; sstr2 = 0.0; for (i = 0; i < nlocal; i++) { cstr1 += q[i]*(cs[k][0][i]*cs[m][2][i] - sn[k][0][i]*sn[m][2][i]); sstr1 += q[i]*(sn[k][0][i]*cs[m][2][i] + cs[k][0][i]*sn[m][2][i]); cstr2 += q[i]*(cs[k][0][i]*cs[m][2][i] + sn[k][0][i]*sn[m][2][i]); sstr2 += q[i]*(sn[k][0][i]*cs[m][2][i] - cs[k][0][i]*sn[m][2][i]); } sfacrl[n] = cstr1; sfacim[n++] = sstr1; sfacrl[n] = cstr2; sfacim[n++] = sstr2; } } } // 1 = (k,l,m), 2 = (k,-l,m), 3 = (k,l,-m), 4 = (k,-l,-m) for (k = 1; k <= kxmax; k++) { for (l = 1; l <= kymax; l++) { for (m = 1; m <= kzmax; m++) { sqk = (k*unitk[0] * k*unitk[0]) + (l*unitk[1] * l*unitk[1]) + (m*unitk[2] * m*unitk[2]); if (sqk <= gsqmx) { cstr1 = 0.0; sstr1 = 0.0; cstr2 = 0.0; sstr2 = 0.0; cstr3 = 0.0; sstr3 = 0.0; cstr4 = 0.0; sstr4 = 0.0; for (i = 0; i < nlocal; i++) { clpm = cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i]; slpm = sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i]; cstr1 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm); sstr1 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm); clpm = cs[l][1][i]*cs[m][2][i] + sn[l][1][i]*sn[m][2][i]; slpm = -sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i]; cstr2 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm); sstr2 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm); clpm = cs[l][1][i]*cs[m][2][i] + sn[l][1][i]*sn[m][2][i]; slpm = sn[l][1][i]*cs[m][2][i] - cs[l][1][i]*sn[m][2][i]; cstr3 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm); sstr3 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm); clpm = cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i]; slpm = -sn[l][1][i]*cs[m][2][i] - cs[l][1][i]*sn[m][2][i]; cstr4 += q[i]*(cs[k][0][i]*clpm - sn[k][0][i]*slpm); sstr4 += q[i]*(sn[k][0][i]*clpm + cs[k][0][i]*slpm); } sfacrl[n] = cstr1; sfacim[n++] = sstr1; sfacrl[n] = cstr2; sfacim[n++] = sstr2; sfacrl[n] = cstr3; sfacim[n++] = sstr3; sfacrl[n] = cstr4; sfacim[n++] = sstr4; } } } } } /* ---------------------------------------------------------------------- pre-compute coefficients for each Ewald K-vector ------------------------------------------------------------------------- */ void Ewald::coeffs() { int k,l,m; double sqk,vterm; - double unitkx = unitk[0]; - double unitky = unitk[1]; - double unitkz = unitk[2]; double g_ewald_sq_inv = 1.0 / (g_ewald*g_ewald); double preu = 4.0*MY_PI/volume; kcount = 0; // (k,0,0), (0,l,0), (0,0,m) for (m = 1; m <= kmax; m++) { - sqk = (m*unitkx) * (m*unitkx); + sqk = (m*unitk[0]) * (m*unitk[0]); if (sqk <= gsqmx) { kxvecs[kcount] = m; kyvecs[kcount] = 0; kzvecs[kcount] = 0; ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; - eg[kcount][0] = 2.0*unitkx*m*ug[kcount]; + eg[kcount][0] = 2.0*unitk[0]*m*ug[kcount]; eg[kcount][1] = 0.0; eg[kcount][2] = 0.0; vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); - vg[kcount][0] = 1.0 + vterm*(unitkx*m)*(unitkx*m); + vg[kcount][0] = 1.0 + vterm*(unitk[0]*m)*(unitk[0]*m); vg[kcount][1] = 1.0; vg[kcount][2] = 1.0; vg[kcount][3] = 0.0; vg[kcount][4] = 0.0; vg[kcount][5] = 0.0; kcount++; } - sqk = (m*unitky) * (m*unitky); + sqk = (m*unitk[1]) * (m*unitk[1]); if (sqk <= gsqmx) { kxvecs[kcount] = 0; kyvecs[kcount] = m; kzvecs[kcount] = 0; ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; eg[kcount][0] = 0.0; - eg[kcount][1] = 2.0*unitky*m*ug[kcount]; + eg[kcount][1] = 2.0*unitk[1]*m*ug[kcount]; eg[kcount][2] = 0.0; vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); vg[kcount][0] = 1.0; - vg[kcount][1] = 1.0 + vterm*(unitky*m)*(unitky*m); + vg[kcount][1] = 1.0 + vterm*(unitk[1]*m)*(unitk[1]*m); vg[kcount][2] = 1.0; vg[kcount][3] = 0.0; vg[kcount][4] = 0.0; vg[kcount][5] = 0.0; kcount++; } - sqk = (m*unitkz) * (m*unitkz); + sqk = (m*unitk[2]) * (m*unitk[2]); if (sqk <= gsqmx) { kxvecs[kcount] = 0; kyvecs[kcount] = 0; kzvecs[kcount] = m; ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; eg[kcount][0] = 0.0; eg[kcount][1] = 0.0; - eg[kcount][2] = 2.0*unitkz*m*ug[kcount]; + eg[kcount][2] = 2.0*unitk[2]*m*ug[kcount]; vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); vg[kcount][0] = 1.0; vg[kcount][1] = 1.0; - vg[kcount][2] = 1.0 + vterm*(unitkz*m)*(unitkz*m); + vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); vg[kcount][3] = 0.0; vg[kcount][4] = 0.0; vg[kcount][5] = 0.0; kcount++; } } // 1 = (k,l,0), 2 = (k,-l,0) for (k = 1; k <= kxmax; k++) { for (l = 1; l <= kymax; l++) { - sqk = (unitkx*k) * (unitkx*k) + (unitky*l) * (unitky*l); + sqk = (unitk[0]*k) * (unitk[0]*k) + (unitk[1]*l) * (unitk[1]*l); if (sqk <= gsqmx) { kxvecs[kcount] = k; kyvecs[kcount] = l; kzvecs[kcount] = 0; ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; - eg[kcount][0] = 2.0*unitkx*k*ug[kcount]; - eg[kcount][1] = 2.0*unitky*l*ug[kcount]; + eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount]; + eg[kcount][1] = 2.0*unitk[1]*l*ug[kcount]; eg[kcount][2] = 0.0; vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); - vg[kcount][0] = 1.0 + vterm*(unitkx*k)*(unitkx*k); - vg[kcount][1] = 1.0 + vterm*(unitky*l)*(unitky*l); + vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k); + vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l); vg[kcount][2] = 1.0; - vg[kcount][3] = vterm*unitkx*k*unitky*l; + vg[kcount][3] = vterm*unitk[0]*k*unitk[1]*l; vg[kcount][4] = 0.0; vg[kcount][5] = 0.0; kcount++; kxvecs[kcount] = k; kyvecs[kcount] = -l; kzvecs[kcount] = 0; ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; - eg[kcount][0] = 2.0*unitkx*k*ug[kcount]; - eg[kcount][1] = -2.0*unitky*l*ug[kcount]; + eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount]; + eg[kcount][1] = -2.0*unitk[1]*l*ug[kcount]; eg[kcount][2] = 0.0; - vg[kcount][0] = 1.0 + vterm*(unitkx*k)*(unitkx*k); - vg[kcount][1] = 1.0 + vterm*(unitky*l)*(unitky*l); + vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k); + vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l); vg[kcount][2] = 1.0; - vg[kcount][3] = -vterm*unitkx*k*unitky*l; + vg[kcount][3] = -vterm*unitk[0]*k*unitk[1]*l; vg[kcount][4] = 0.0; vg[kcount][5] = 0.0; kcount++;; } } } // 1 = (0,l,m), 2 = (0,l,-m) for (l = 1; l <= kymax; l++) { for (m = 1; m <= kzmax; m++) { - sqk = (unitky*l) * (unitky*l) + (unitkz*m) * (unitkz*m); + sqk = (unitk[1]*l) * (unitk[1]*l) + (unitk[2]*m) * (unitk[2]*m); if (sqk <= gsqmx) { kxvecs[kcount] = 0; kyvecs[kcount] = l; kzvecs[kcount] = m; ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; eg[kcount][0] = 0.0; - eg[kcount][1] = 2.0*unitky*l*ug[kcount]; - eg[kcount][2] = 2.0*unitkz*m*ug[kcount]; + eg[kcount][1] = 2.0*unitk[1]*l*ug[kcount]; + eg[kcount][2] = 2.0*unitk[2]*m*ug[kcount]; vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); vg[kcount][0] = 1.0; - vg[kcount][1] = 1.0 + vterm*(unitky*l)*(unitky*l); - vg[kcount][2] = 1.0 + vterm*(unitkz*m)*(unitkz*m); + vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l); + vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); vg[kcount][3] = 0.0; vg[kcount][4] = 0.0; - vg[kcount][5] = vterm*unitky*l*unitkz*m; + vg[kcount][5] = vterm*unitk[1]*l*unitk[2]*m; kcount++; kxvecs[kcount] = 0; kyvecs[kcount] = l; kzvecs[kcount] = -m; ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; eg[kcount][0] = 0.0; - eg[kcount][1] = 2.0*unitky*l*ug[kcount]; - eg[kcount][2] = -2.0*unitkz*m*ug[kcount]; + eg[kcount][1] = 2.0*unitk[1]*l*ug[kcount]; + eg[kcount][2] = -2.0*unitk[2]*m*ug[kcount]; vg[kcount][0] = 1.0; - vg[kcount][1] = 1.0 + vterm*(unitky*l)*(unitky*l); - vg[kcount][2] = 1.0 + vterm*(unitkz*m)*(unitkz*m); + vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l); + vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); vg[kcount][3] = 0.0; vg[kcount][4] = 0.0; - vg[kcount][5] = -vterm*unitky*l*unitkz*m; + vg[kcount][5] = -vterm*unitk[1]*l*unitk[2]*m; kcount++; } } } // 1 = (k,0,m), 2 = (k,0,-m) for (k = 1; k <= kxmax; k++) { for (m = 1; m <= kzmax; m++) { - sqk = (unitkx*k) * (unitkx*k) + (unitkz*m) * (unitkz*m); + sqk = (unitk[0]*k) * (unitk[0]*k) + (unitk[2]*m) * (unitk[2]*m); if (sqk <= gsqmx) { kxvecs[kcount] = k; kyvecs[kcount] = 0; kzvecs[kcount] = m; ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; - eg[kcount][0] = 2.0*unitkx*k*ug[kcount]; + eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount]; eg[kcount][1] = 0.0; - eg[kcount][2] = 2.0*unitkz*m*ug[kcount]; + eg[kcount][2] = 2.0*unitk[2]*m*ug[kcount]; vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); - vg[kcount][0] = 1.0 + vterm*(unitkx*k)*(unitkx*k); + vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k); vg[kcount][1] = 1.0; - vg[kcount][2] = 1.0 + vterm*(unitkz*m)*(unitkz*m); + vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); vg[kcount][3] = 0.0; - vg[kcount][4] = vterm*unitkx*k*unitkz*m; + vg[kcount][4] = vterm*unitk[0]*k*unitk[2]*m; vg[kcount][5] = 0.0; kcount++; kxvecs[kcount] = k; kyvecs[kcount] = 0; kzvecs[kcount] = -m; ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; - eg[kcount][0] = 2.0*unitkx*k*ug[kcount]; + eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount]; eg[kcount][1] = 0.0; - eg[kcount][2] = -2.0*unitkz*m*ug[kcount]; - vg[kcount][0] = 1.0 + vterm*(unitkx*k)*(unitkx*k); + eg[kcount][2] = -2.0*unitk[2]*m*ug[kcount]; + vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k); vg[kcount][1] = 1.0; - vg[kcount][2] = 1.0 + vterm*(unitkz*m)*(unitkz*m); + vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); vg[kcount][3] = 0.0; - vg[kcount][4] = -vterm*unitkx*k*unitkz*m; + vg[kcount][4] = -vterm*unitk[0]*k*unitk[2]*m; vg[kcount][5] = 0.0; kcount++; } } } // 1 = (k,l,m), 2 = (k,-l,m), 3 = (k,l,-m), 4 = (k,-l,-m) for (k = 1; k <= kxmax; k++) { for (l = 1; l <= kymax; l++) { for (m = 1; m <= kzmax; m++) { - sqk = (unitkx*k) * (unitkx*k) + (unitky*l) * (unitky*l) + - (unitkz*m) * (unitkz*m); + sqk = (unitk[0]*k) * (unitk[0]*k) + (unitk[1]*l) * (unitk[1]*l) + + (unitk[2]*m) * (unitk[2]*m); if (sqk <= gsqmx) { kxvecs[kcount] = k; kyvecs[kcount] = l; kzvecs[kcount] = m; ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; - eg[kcount][0] = 2.0*unitkx*k*ug[kcount]; - eg[kcount][1] = 2.0*unitky*l*ug[kcount]; - eg[kcount][2] = 2.0*unitkz*m*ug[kcount]; + eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount]; + eg[kcount][1] = 2.0*unitk[1]*l*ug[kcount]; + eg[kcount][2] = 2.0*unitk[2]*m*ug[kcount]; vterm = -2.0*(1.0/sqk + 0.25*g_ewald_sq_inv); - vg[kcount][0] = 1.0 + vterm*(unitkx*k)*(unitkx*k); - vg[kcount][1] = 1.0 + vterm*(unitky*l)*(unitky*l); - vg[kcount][2] = 1.0 + vterm*(unitkz*m)*(unitkz*m); - vg[kcount][3] = vterm*unitkx*k*unitky*l; - vg[kcount][4] = vterm*unitkx*k*unitkz*m; - vg[kcount][5] = vterm*unitky*l*unitkz*m; + vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k); + vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l); + vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); + vg[kcount][3] = vterm*unitk[0]*k*unitk[1]*l; + vg[kcount][4] = vterm*unitk[0]*k*unitk[2]*m; + vg[kcount][5] = vterm*unitk[1]*l*unitk[2]*m; kcount++; kxvecs[kcount] = k; kyvecs[kcount] = -l; kzvecs[kcount] = m; ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; - eg[kcount][0] = 2.0*unitkx*k*ug[kcount]; - eg[kcount][1] = -2.0*unitky*l*ug[kcount]; - eg[kcount][2] = 2.0*unitkz*m*ug[kcount]; - vg[kcount][0] = 1.0 + vterm*(unitkx*k)*(unitkx*k); - vg[kcount][1] = 1.0 + vterm*(unitky*l)*(unitky*l); - vg[kcount][2] = 1.0 + vterm*(unitkz*m)*(unitkz*m); - vg[kcount][3] = -vterm*unitkx*k*unitky*l; - vg[kcount][4] = vterm*unitkx*k*unitkz*m; - vg[kcount][5] = -vterm*unitky*l*unitkz*m; + eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount]; + eg[kcount][1] = -2.0*unitk[1]*l*ug[kcount]; + eg[kcount][2] = 2.0*unitk[2]*m*ug[kcount]; + vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k); + vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l); + vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); + vg[kcount][3] = -vterm*unitk[0]*k*unitk[1]*l; + vg[kcount][4] = vterm*unitk[0]*k*unitk[2]*m; + vg[kcount][5] = -vterm*unitk[1]*l*unitk[2]*m; kcount++; kxvecs[kcount] = k; kyvecs[kcount] = l; kzvecs[kcount] = -m; ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; - eg[kcount][0] = 2.0*unitkx*k*ug[kcount]; - eg[kcount][1] = 2.0*unitky*l*ug[kcount]; - eg[kcount][2] = -2.0*unitkz*m*ug[kcount]; - vg[kcount][0] = 1.0 + vterm*(unitkx*k)*(unitkx*k); - vg[kcount][1] = 1.0 + vterm*(unitky*l)*(unitky*l); - vg[kcount][2] = 1.0 + vterm*(unitkz*m)*(unitkz*m); - vg[kcount][3] = vterm*unitkx*k*unitky*l; - vg[kcount][4] = -vterm*unitkx*k*unitkz*m; - vg[kcount][5] = -vterm*unitky*l*unitkz*m; + eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount]; + eg[kcount][1] = 2.0*unitk[1]*l*ug[kcount]; + eg[kcount][2] = -2.0*unitk[2]*m*ug[kcount]; + vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k); + vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l); + vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); + vg[kcount][3] = vterm*unitk[0]*k*unitk[1]*l; + vg[kcount][4] = -vterm*unitk[0]*k*unitk[2]*m; + vg[kcount][5] = -vterm*unitk[1]*l*unitk[2]*m; kcount++; kxvecs[kcount] = k; kyvecs[kcount] = -l; kzvecs[kcount] = -m; ug[kcount] = preu*exp(-0.25*sqk*g_ewald_sq_inv)/sqk; - eg[kcount][0] = 2.0*unitkx*k*ug[kcount]; - eg[kcount][1] = -2.0*unitky*l*ug[kcount]; - eg[kcount][2] = -2.0*unitkz*m*ug[kcount]; - vg[kcount][0] = 1.0 + vterm*(unitkx*k)*(unitkx*k); - vg[kcount][1] = 1.0 + vterm*(unitky*l)*(unitky*l); - vg[kcount][2] = 1.0 + vterm*(unitkz*m)*(unitkz*m); - vg[kcount][3] = -vterm*unitkx*k*unitky*l; - vg[kcount][4] = -vterm*unitkx*k*unitkz*m; - vg[kcount][5] = vterm*unitky*l*unitkz*m; + eg[kcount][0] = 2.0*unitk[0]*k*ug[kcount]; + eg[kcount][1] = -2.0*unitk[1]*l*ug[kcount]; + eg[kcount][2] = -2.0*unitk[2]*m*ug[kcount]; + vg[kcount][0] = 1.0 + vterm*(unitk[0]*k)*(unitk[0]*k); + vg[kcount][1] = 1.0 + vterm*(unitk[1]*l)*(unitk[1]*l); + vg[kcount][2] = 1.0 + vterm*(unitk[2]*m)*(unitk[2]*m); + vg[kcount][3] = -vterm*unitk[0]*k*unitk[1]*l; + vg[kcount][4] = -vterm*unitk[0]*k*unitk[2]*m; + vg[kcount][5] = vterm*unitk[1]*l*unitk[2]*m; kcount++;; } } } } } /* ---------------------------------------------------------------------- allocate memory that depends on # of K-vectors ------------------------------------------------------------------------- */ void Ewald::allocate() { kxvecs = new int[kmax3d]; kyvecs = new int[kmax3d]; kzvecs = new int[kmax3d]; ug = new double[kmax3d]; memory->create(eg,kmax3d,3,"ewald:eg"); memory->create(vg,kmax3d,6,"ewald:vg"); sfacrl = new double[kmax3d]; sfacim = new double[kmax3d]; sfacrl_all = new double[kmax3d]; sfacim_all = new double[kmax3d]; } /* ---------------------------------------------------------------------- deallocate memory that depends on # of K-vectors ------------------------------------------------------------------------- */ void Ewald::deallocate() { delete [] kxvecs; delete [] kyvecs; delete [] kzvecs; delete [] ug; memory->destroy(eg); memory->destroy(vg); delete [] sfacrl; delete [] sfacim; delete [] sfacrl_all; delete [] sfacim_all; } /* ---------------------------------------------------------------------- Slab-geometry correction term to dampen inter-slab interactions between periodically repeating slabs. Yields good approximation to 2-D Ewald if adequate empty space is left between repeating slabs (J. Chem. Phys. 111, 3155). Slabs defined here to be parallel to the xy plane. ------------------------------------------------------------------------- */ void Ewald::slabcorr() { // compute local contribution to global dipole moment double *q = atom->q; double **x = atom->x; int nlocal = atom->nlocal; double dipole = 0.0; for (int i = 0; i < nlocal; i++) dipole += q[i]*x[i][2]; // sum local contributions to get global dipole moment double dipole_all; MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world); // compute corrections const double e_slabcorr = 2.0*MY_PI*dipole_all*dipole_all/volume; const double qscale = force->qqrd2e * scale; if (eflag_global) energy += qscale * e_slabcorr; // per-atom energy if (eflag_atom) { double efact = 2.0*MY_PI*dipole_all/volume; for (int i = 0; i < nlocal; i++) eatom[i] += qscale * q[i]*x[i][2]*efact; } // add on force corrections double ffact = -4.0*MY_PI*dipole_all/volume; double **f = atom->f; for (int i = 0; i < nlocal; i++) f[i][2] += qscale * q[i]*ffact; } /* ---------------------------------------------------------------------- memory usage of local arrays ------------------------------------------------------------------------- */ double Ewald::memory_usage() { double bytes = 3 * kmax3d * sizeof(int); bytes += (1 + 3 + 6) * kmax3d * sizeof(double); bytes += 4 * kmax3d * sizeof(double); bytes += nmax*3 * sizeof(double); bytes += 2 * (2*kmax+1)*3*nmax * sizeof(double); return bytes; } diff --git a/src/REPLICA/verlet_split.cpp b/src/REPLICA/verlet_split.cpp index d1f6eaef0..4272cc247 100644 --- a/src/REPLICA/verlet_split.cpp +++ b/src/REPLICA/verlet_split.cpp @@ -1,550 +1,551 @@ /* ------------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing authors: Yuxing Peng and Chris Knight (U Chicago) ------------------------------------------------------------------------- */ #include "string.h" #include "verlet_split.h" #include "universe.h" #include "neighbor.h" #include "domain.h" #include "comm.h" #include "atom.h" #include "atom_vec.h" #include "force.h" #include "pair.h" #include "bond.h" #include "angle.h" #include "dihedral.h" #include "improper.h" #include "kspace.h" #include "output.h" #include "update.h" #include "modify.h" #include "timer.h" #include "memory.h" #include "error.h" using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ VerletSplit::VerletSplit(LAMMPS *lmp, int narg, char **arg) : Verlet(lmp, narg, arg) { // error checks on partitions if (universe->nworlds != 2) error->universe_all(FLERR,"Verlet/split requires 2 partitions"); if (universe->procs_per_world[0] % universe->procs_per_world[1]) error->universe_all(FLERR,"Verlet/split requires Rspace partition " "size be multiple of Kspace partition size"); // master = 1 for Rspace procs, 0 for Kspace procs if (universe->iworld == 0) master = 1; else master = 0; ratio = universe->procs_per_world[0] / universe->procs_per_world[1]; // Kspace root proc broadcasts info about Kspace proc layout to Rspace procs int kspace_procgrid[3]; if (universe->me == universe->root_proc[1]) { kspace_procgrid[0] = comm->procgrid[0]; kspace_procgrid[1] = comm->procgrid[1]; kspace_procgrid[2] = comm->procgrid[2]; } MPI_Bcast(kspace_procgrid,3,MPI_INT,universe->root_proc[1],universe->uworld); int ***kspace_grid2proc; memory->create(kspace_grid2proc,kspace_procgrid[0], kspace_procgrid[1],kspace_procgrid[2], "verlet/split:kspace_grid2proc"); if (universe->me == universe->root_proc[1]) { for (int i = 0; i < comm->procgrid[0]; i++) for (int j = 0; j < comm->procgrid[1]; j++) for (int k = 0; k < comm->procgrid[2]; k++) kspace_grid2proc[i][j][k] = comm->grid2proc[i][j][k]; } MPI_Bcast(&kspace_grid2proc[0][0][0], kspace_procgrid[0]*kspace_procgrid[1]*kspace_procgrid[2],MPI_INT, universe->root_proc[1],universe->uworld); // Rspace partition must be multiple of Kspace partition in each dim // so atoms of one Kspace proc coincide with atoms of several Rspace procs if (master) { int flag = 0; if (comm->procgrid[0] % kspace_procgrid[0]) flag = 1; if (comm->procgrid[1] % kspace_procgrid[1]) flag = 1; if (comm->procgrid[2] % kspace_procgrid[2]) flag = 1; if (flag) error->one(FLERR, "Verlet/split requires Rspace partition layout be " "multiple of Kspace partition layout in each dim"); } // block = 1 Kspace proc with set of Rspace procs it overlays // me_block = 0 for Kspace proc // me_block = 1 to ratio for Rspace procs // block = MPI communicator for that set of procs int iblock,key; if (!master) { iblock = comm->me; key = 0; } else { int kpx = comm->myloc[0] / (comm->procgrid[0]/kspace_procgrid[0]); int kpy = comm->myloc[1] / (comm->procgrid[1]/kspace_procgrid[1]); int kpz = comm->myloc[2] / (comm->procgrid[2]/kspace_procgrid[2]); iblock = kspace_grid2proc[kpx][kpy][kpz]; key = 1; } MPI_Comm_split(universe->uworld,iblock,key,&block); MPI_Comm_rank(block,&me_block); // output block groupings to universe screen/logfile // bmap is ordered by block and then by proc within block int *bmap = new int[universe->nprocs]; for (int i = 0; i < universe->nprocs; i++) bmap[i] = -1; bmap[iblock*(ratio+1)+me_block] = universe->me; int *bmapall = new int[universe->nprocs]; MPI_Allreduce(bmap,bmapall,universe->nprocs,MPI_INT,MPI_MAX,universe->uworld); if (universe->me == 0) { if (universe->uscreen) { fprintf(universe->uscreen, "Per-block Rspace/Kspace proc IDs (original proc IDs):\n"); int m = 0; for (int i = 0; i < universe->nprocs/(ratio+1); i++) { fprintf(universe->uscreen," block %d:",i); int kspace_proc = bmapall[m]; for (int j = 1; j <= ratio; j++) fprintf(universe->uscreen," %d",bmapall[m+j]); fprintf(universe->uscreen," %d",kspace_proc); kspace_proc = bmapall[m]; for (int j = 1; j <= ratio; j++) { if (j == 1) fprintf(universe->uscreen," ("); else fprintf(universe->uscreen," "); fprintf(universe->uscreen,"%d", universe->uni2orig[bmapall[m+j]]); } fprintf(universe->uscreen," %d)\n",universe->uni2orig[kspace_proc]); m += ratio + 1; } } if (universe->ulogfile) { fprintf(universe->ulogfile, "Per-block Rspace/Kspace proc IDs (original proc IDs):\n"); int m = 0; for (int i = 0; i < universe->nprocs/(ratio+1); i++) { fprintf(universe->ulogfile," block %d:",i); int kspace_proc = bmapall[m]; for (int j = 1; j <= ratio; j++) fprintf(universe->ulogfile," %d",bmapall[m+j]); fprintf(universe->ulogfile," %d",kspace_proc); kspace_proc = bmapall[m]; for (int j = 1; j <= ratio; j++) { if (j == 1) fprintf(universe->ulogfile," ("); else fprintf(universe->ulogfile," "); fprintf(universe->ulogfile,"%d", universe->uni2orig[bmapall[m+j]]); } fprintf(universe->ulogfile," %d)\n",universe->uni2orig[kspace_proc]); m += ratio + 1; } } } memory->destroy(kspace_grid2proc); delete [] bmap; delete [] bmapall; // size/disp = vectors for MPI gather/scatter within block qsize = new int[ratio+1]; qdisp = new int[ratio+1]; xsize = new int[ratio+1]; xdisp = new int[ratio+1]; // f_kspace = Rspace copy of Kspace forces // allocate dummy version for Kspace partition maxatom = 0; f_kspace = NULL; if (!master) memory->create(f_kspace,1,1,"verlet/split:f_kspace"); } /* ---------------------------------------------------------------------- */ VerletSplit::~VerletSplit() { delete [] qsize; delete [] qdisp; delete [] xsize; delete [] xdisp; memory->destroy(f_kspace); MPI_Comm_free(&block); } /* ---------------------------------------------------------------------- initialization before run ------------------------------------------------------------------------- */ void VerletSplit::init() { if (!force->kspace && comm->me == 0) error->warning(FLERR,"No Kspace calculation with verlet/split"); if (force->kspace_match("tip4p",0)) tip4p_flag = 1; else tip4p_flag = 0; Verlet::init(); } /* ---------------------------------------------------------------------- setup before run servant partition only sets up KSpace calculation ------------------------------------------------------------------------- */ void VerletSplit::setup() { if (comm->me == 0 && screen) fprintf(screen,"Setting up run ...\n"); if (!master) force->kspace->setup(); else Verlet::setup(); } /* ---------------------------------------------------------------------- setup without output flag = 0 = just force calculation flag = 1 = reneighbor and force calculation servant partition only sets up KSpace calculation ------------------------------------------------------------------------- */ void VerletSplit::setup_minimal(int flag) { if (!master) force->kspace->setup(); else Verlet::setup_minimal(flag); } /* ---------------------------------------------------------------------- run for N steps master partition does everything but Kspace servant partition does just Kspace communicate back and forth every step: atom coords from master -> servant kspace forces from servant -> master also box bounds from master -> servant if necessary ------------------------------------------------------------------------- */ void VerletSplit::run(int n) { int nflag,ntimestep,sortflag; // sync both partitions before start timer MPI_Barrier(universe->uworld); timer->init(); timer->barrier_start(TIME_LOOP); // setup initial Rspace <-> Kspace comm params rk_setup(); // flags for timestepping iterations int n_post_integrate = modify->n_post_integrate; int n_pre_exchange = modify->n_pre_exchange; int n_pre_neighbor = modify->n_pre_neighbor; int n_pre_force = modify->n_pre_force; int n_post_force = modify->n_post_force; int n_end_of_step = modify->n_end_of_step; if (atom->sortfreq > 0) sortflag = 1; else sortflag = 0; for (int i = 0; i < n; i++) { ntimestep = ++update->ntimestep; ev_set(ntimestep); // initial time integration if (master) { modify->initial_integrate(vflag); if (n_post_integrate) modify->post_integrate(); } // regular communication vs neighbor list rebuild if (master) nflag = neighbor->decide(); MPI_Bcast(&nflag,1,MPI_INT,1,block); if (master) { if (nflag == 0) { timer->stamp(); comm->forward_comm(); timer->stamp(TIME_COMM); } else { if (n_pre_exchange) modify->pre_exchange(); if (triclinic) domain->x2lamda(atom->nlocal); domain->pbc(); if (domain->box_change) { domain->reset_box(); comm->setup(); if (neighbor->style) neighbor->setup_bins(); } timer->stamp(); comm->exchange(); if (sortflag && ntimestep >= atom->nextsort) atom->sort(); comm->borders(); if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost); timer->stamp(TIME_COMM); if (n_pre_neighbor) modify->pre_neighbor(); neighbor->build(); timer->stamp(TIME_NEIGHBOR); } } // if reneighboring occurred, re-setup Rspace <-> Kspace comm params // comm Rspace atom coords to Kspace procs if (nflag) rk_setup(); r2k_comm(); // force computations force_clear(); if (master) { if (n_pre_force) modify->pre_force(vflag); timer->stamp(); if (force->pair) { force->pair->compute(eflag,vflag); timer->stamp(TIME_PAIR); } if (atom->molecular) { if (force->bond) force->bond->compute(eflag,vflag); if (force->angle) force->angle->compute(eflag,vflag); if (force->dihedral) force->dihedral->compute(eflag,vflag); if (force->improper) force->improper->compute(eflag,vflag); timer->stamp(TIME_BOND); } if (force->newton) { comm->reverse_comm(); timer->stamp(TIME_COMM); } } else { if (force->kspace) { timer->stamp(); force->kspace->compute(eflag,vflag); timer->stamp(TIME_KSPACE); } // TIP4P PPPM puts forces on ghost atoms, so must reverse_comm() if (tip4p_flag && force->newton) { comm->reverse_comm(); timer->stamp(TIME_COMM); } } // comm and sum Kspace forces back to Rspace procs k2r_comm(); // force modifications, final time integration, diagnostics // all output if (master) { if (n_post_force) modify->post_force(vflag); modify->final_integrate(); if (n_end_of_step) modify->end_of_step(); if (ntimestep == output->next) { timer->stamp(); output->write(ntimestep); timer->stamp(TIME_OUTPUT); } } } } /* ---------------------------------------------------------------------- setup params for Rspace <-> Kspace communication called initially and after every reneighbor also communcicate atom charges from Rspace to KSpace since static ------------------------------------------------------------------------- */ void VerletSplit::rk_setup() { // grow f_kspace array on master procs if necessary if (master) { if (atom->nlocal > maxatom) { memory->destroy(f_kspace); maxatom = atom->nmax; memory->create(f_kspace,maxatom,3,"verlet/split:f_kspace"); } } // qsize = # of atoms owned by each master proc in block int n = 0; if (master) n = atom->nlocal; MPI_Gather(&n,1,MPI_INT,qsize,1,MPI_INT,0,block); // setup qdisp, xsize, xdisp based on qsize // only needed by Kspace proc // set Kspace nlocal to sum of Rspace nlocals // insure Kspace atom arrays are large enough if (!master) { qsize[0] = qdisp[0] = xsize[0] = xdisp[0] = 0; for (int i = 1; i <= ratio; i++) { qdisp[i] = qdisp[i-1]+qsize[i-1]; xsize[i] = 3*qsize[i]; xdisp[i] = xdisp[i-1]+xsize[i-1]; } atom->nlocal = qdisp[ratio] + qsize[ratio]; while (atom->nmax <= atom->nlocal) atom->avec->grow(0); atom->nghost = 0; } // one-time gather of Rspace atom charges to Kspace proc MPI_Gatherv(atom->q,n,MPI_DOUBLE,atom->q,qsize,qdisp,MPI_DOUBLE,0,block); // for TIP4P also need to send atom type and tag // KSpace procs need to acquire ghost atoms and map all their atoms // map_clear() call is in lieu of comm->exchange() which performs map_clear // borders() call acquires ghost atoms and maps them - // NOTE: don't atom coords need to be communicated here before borders() ?? + // NOTE: do atom coords need to be communicated here before borders() call? // could do this by calling r2k_comm() here and not again from run() + // except that forward_comm() in r2k_comm() is wrong if (tip4p_flag) { //r2k_comm(); MPI_Gatherv(atom->type,n,MPI_INT,atom->type,qsize,qdisp,MPI_INT,0,block); MPI_Gatherv(atom->tag,n,MPI_INT,atom->tag,qsize,qdisp,MPI_INT,0,block); if (!master) { if (triclinic) domain->x2lamda(atom->nlocal); if (domain->box_change) comm->setup(); timer->stamp(); atom->map_clear(); comm->borders(); if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost); timer->stamp(TIME_COMM); } } } /* ---------------------------------------------------------------------- communicate Rspace atom coords to Kspace also eflag,vflag and box bounds if needed ------------------------------------------------------------------------- */ void VerletSplit::r2k_comm() { MPI_Status status; int n = 0; if (master) n = atom->nlocal; MPI_Gatherv(atom->x[0],n*3,MPI_DOUBLE,atom->x[0],xsize,xdisp, MPI_DOUBLE,0,block); // send eflag,vflag from Rspace to Kspace if (me_block == 1) { int flags[2]; flags[0] = eflag; flags[1] = vflag; MPI_Send(flags,2,MPI_INT,0,0,block); } else if (!master) { int flags[2]; MPI_Recv(flags,2,MPI_DOUBLE,1,0,block,&status); eflag = flags[0]; vflag = flags[1]; } // send box bounds from Rspace to Kspace if simulation box is dynamic if (domain->box_change) { if (me_block == 1) { MPI_Send(domain->boxlo,3,MPI_DOUBLE,0,0,block); MPI_Send(domain->boxhi,3,MPI_DOUBLE,0,0,block); } else if (!master) { MPI_Recv(domain->boxlo,3,MPI_DOUBLE,1,0,block,&status); MPI_Recv(domain->boxhi,3,MPI_DOUBLE,1,0,block,&status); domain->set_global_box(); domain->set_local_box(); force->kspace->setup(); } } // for TIP4P, Kspace partition needs to update its ghost atoms if (tip4p_flag && !master) { timer->stamp(); comm->forward_comm(); timer->stamp(TIME_COMM); } } /* ---------------------------------------------------------------------- communicate and sum Kspace atom forces back to Rspace ------------------------------------------------------------------------- */ void VerletSplit::k2r_comm() { if (eflag) MPI_Bcast(&force->kspace->energy,1,MPI_DOUBLE,0,block); if (vflag) MPI_Bcast(force->kspace->virial,6,MPI_DOUBLE,0,block); int n = 0; if (master) n = atom->nlocal; MPI_Scatterv(atom->f[0],xsize,xdisp,MPI_DOUBLE, f_kspace[0],n*3,MPI_DOUBLE,0,block); if (master) { double **f = atom->f; int nlocal = atom->nlocal; for (int i = 0; i < nlocal; i++) { f[i][0] += f_kspace[i][0]; f[i][1] += f_kspace[i][1]; f[i][2] += f_kspace[i][2]; } } } /* ---------------------------------------------------------------------- memory usage of Kspace force array on master procs ------------------------------------------------------------------------- */ bigint VerletSplit::memory_usage() { bigint bytes = maxatom*3 * sizeof(double); return bytes; }