diff --git a/src/DIPOLE/pair_lj_long_dipole_long.cpp b/src/DIPOLE/pair_lj_long_dipole_long.cpp index 383e3814c..b476cfcee 100644 --- a/src/DIPOLE/pair_lj_long_dipole_long.cpp +++ b/src/DIPOLE/pair_lj_long_dipole_long.cpp @@ -1,683 +1,682 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing author: Pieter J. in 't Veld and Stan Moore (Sandia) ------------------------------------------------------------------------- */ #include "math.h" #include "stdio.h" #include "stdlib.h" #include "string.h" #include "math_const.h" #include "math_vector.h" #include "pair_lj_long_dipole_long.h" #include "atom.h" #include "comm.h" #include "neighbor.h" #include "neigh_list.h" #include "neigh_request.h" #include "force.h" #include "kspace.h" #include "update.h" #include "integrate.h" #include "respa.h" #include "memory.h" #include "error.h" using namespace LAMMPS_NS; using namespace MathConst; #define EWALD_F 1.12837917 #define EWALD_P 0.3275911 #define A1 0.254829592 #define A2 -0.284496736 #define A3 1.421413741 #define A4 -1.453152027 #define A5 1.061405429 // ---------------------------------------------------------------------- PairLJLongDipoleLong::PairLJLongDipoleLong(LAMMPS *lmp) : Pair(lmp) { dispersionflag = ewaldflag = dipoleflag = 1; respa_enable = 0; single_enable = 0; } // ---------------------------------------------------------------------- // global settings // ---------------------------------------------------------------------- void PairLJLongDipoleLong::options(char **arg, int order) { const char *option[] = {"long", "cut", "off", NULL}; int i; if (!*arg) error->all(FLERR,"Illegal pair_style lj/long/dipole/long command"); for (i=0; option[i]&&strcmp(arg[0], option[i]); ++i); switch (i) { default: error->all(FLERR,"Illegal pair_style lj/long/dipole/long command"); case 0: ewald_order |= 1<<order; break; // set kspace r^-order case 2: ewald_off |= 1<<order; // turn r^-order off case 1: break; } } void PairLJLongDipoleLong::settings(int narg, char **arg) { if (narg != 3 && narg != 4) error->all(FLERR,"Illegal pair_style command"); ewald_off = 0; ewald_order = 0; options(arg, 6); options(++arg, 3); options(arg, 1); if (!comm->me && ewald_order&(1<<6)) error->warning(FLERR,"Geometric mixing assumed for 1/r^6 coefficients"); if (!comm->me && ewald_order==((1<<3)|(1<<6))) error->warning(FLERR, "Using largest cut-off for lj/long/dipole/long long long"); if (!*(++arg)) error->all(FLERR,"Cut-offs missing in pair_style lj/long/dipole/long"); if (!((ewald_order^ewald_off)&(1<<3))) error->all(FLERR, "Coulombic cut not supported in pair_style lj/long/dipole/long"); cut_lj_global = force->numeric(FLERR,*(arg++)); if (narg == 4 && (ewald_order==74)) error->all(FLERR,"Only one cut-off allowed when requesting all long"); if (narg == 4) cut_coul = force->numeric(FLERR,*(arg++)); else cut_coul = cut_lj_global; if (allocated) { // reset explicit cuts int i,j; for (i = 1; i <= atom->ntypes; i++) for (j = i+1; j <= atom->ntypes; j++) if (setflag[i][j]) cut_lj[i][j] = cut_lj_global; } } // ---------------------------------------------------------------------- // free all arrays // ---------------------------------------------------------------------- PairLJLongDipoleLong::~PairLJLongDipoleLong() { if (allocated) { memory->destroy(setflag); memory->destroy(cutsq); memory->destroy(cut_lj_read); memory->destroy(cut_lj); memory->destroy(cut_ljsq); memory->destroy(epsilon_read); memory->destroy(epsilon); memory->destroy(sigma_read); memory->destroy(sigma); memory->destroy(lj1); memory->destroy(lj2); memory->destroy(lj3); memory->destroy(lj4); memory->destroy(offset); } //if (ftable) free_tables(); } /* ---------------------------------------------------------------------- allocate all arrays ------------------------------------------------------------------------- */ void PairLJLongDipoleLong::allocate() { allocated = 1; int n = atom->ntypes; memory->create(setflag,n+1,n+1,"pair:setflag"); for (int i = 1; i <= n; i++) for (int j = i; j <= n; j++) setflag[i][j] = 0; memory->create(cutsq,n+1,n+1,"pair:cutsq"); memory->create(cut_lj_read,n+1,n+1,"pair:cut_lj_read"); memory->create(cut_lj,n+1,n+1,"pair:cut_lj"); memory->create(cut_ljsq,n+1,n+1,"pair:cut_ljsq"); memory->create(epsilon_read,n+1,n+1,"pair:epsilon_read"); memory->create(epsilon,n+1,n+1,"pair:epsilon"); memory->create(sigma_read,n+1,n+1,"pair:sigma_read"); memory->create(sigma,n+1,n+1,"pair:sigma"); memory->create(lj1,n+1,n+1,"pair:lj1"); memory->create(lj2,n+1,n+1,"pair:lj2"); memory->create(lj3,n+1,n+1,"pair:lj3"); memory->create(lj4,n+1,n+1,"pair:lj4"); memory->create(offset,n+1,n+1,"pair:offset"); } /* ---------------------------------------------------------------------- extract protected data from object ------------------------------------------------------------------------- */ void *PairLJLongDipoleLong::extract(const char *id, int &dim) { const char *ids[] = { "B", "sigma", "epsilon", "ewald_order", "ewald_cut", "ewald_mix", "cut_coul", "cut_vdwl", NULL}; void *ptrs[] = { lj4, sigma, epsilon, &ewald_order, &cut_coul, &mix_flag, &cut_coul, &cut_lj_global, NULL}; int i; for (i=0; ids[i]&&strcmp(ids[i], id); ++i); if (i <= 2) dim = 2; else dim = 0; return ptrs[i]; } /* ---------------------------------------------------------------------- set coeffs for one or more type pairs ------------------------------------------------------------------------- */ void PairLJLongDipoleLong::coeff(int narg, char **arg) { if (narg < 4 || narg > 5) error->all(FLERR,"Incorrect args for pair coefficients"); if (!allocated) allocate(); int ilo,ihi,jlo,jhi; force->bounds(arg[0],atom->ntypes,ilo,ihi); force->bounds(arg[1],atom->ntypes,jlo,jhi); double epsilon_one = force->numeric(FLERR,arg[2]); double sigma_one = force->numeric(FLERR,arg[3]); double cut_lj_one = cut_lj_global; if (narg == 5) cut_lj_one = force->numeric(FLERR,arg[4]); int count = 0; for (int i = ilo; i <= ihi; i++) { for (int j = MAX(jlo,i); j <= jhi; j++) { epsilon_read[i][j] = epsilon_one; sigma_read[i][j] = sigma_one; cut_lj_read[i][j] = cut_lj_one; setflag[i][j] = 1; count++; } } if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients"); } /* ---------------------------------------------------------------------- init specific to this pair style ------------------------------------------------------------------------- */ void PairLJLongDipoleLong::init_style() { const char *style3[] = {"ewald/disp", NULL}; const char *style6[] = {"ewald/disp", NULL}; int i; if (strcmp(update->unit_style,"electron") == 0) error->all(FLERR,"Cannot (yet) use 'electron' units with dipoles"); // require an atom style with charge defined if (!atom->q_flag && (ewald_order&(1<<1))) error->all(FLERR, "Invoking coulombic in pair style lj/long/dipole/long requires atom attribute q"); if (!atom->mu && (ewald_order&(1<<3))) error->all(FLERR,"Pair lj/long/dipole/long requires atom attributes mu, torque"); if (!atom->torque && (ewald_order&(1<<3))) error->all(FLERR,"Pair lj/long/dipole/long requires atom attributes mu, torque"); neighbor->request(this); cut_coulsq = cut_coul * cut_coul; // ensure use of KSpace long-range solver, set g_ewald if (ewald_order&(1<<3)) { // r^-1 kspace if (force->kspace == NULL) error->all(FLERR,"Pair style is incompatible with KSpace style"); for (i=0; style3[i]&&strcmp(force->kspace_style, style3[i]); ++i); if (!style3[i]) error->all(FLERR,"Pair style is incompatible with KSpace style"); } if (ewald_order&(1<<6)) { // r^-6 kspace if (force->kspace == NULL) error->all(FLERR,"Pair style is incompatible with KSpace style"); for (i=0; style6[i]&&strcmp(force->kspace_style, style6[i]); ++i); if (!style6[i]) error->all(FLERR,"Pair style is incompatible with KSpace style"); } if (force->kspace) g_ewald = force->kspace->g_ewald; } /* ---------------------------------------------------------------------- neighbor callback to inform pair style of neighbor list to use regular or rRESPA ------------------------------------------------------------------------- */ void PairLJLongDipoleLong::init_list(int id, NeighList *ptr) { if (id == 0) list = ptr; else if (id == 1) listinner = ptr; else if (id == 2) listmiddle = ptr; else if (id == 3) listouter = ptr; if (id) error->all(FLERR,"Pair style lj/long/dipole/long does not currently support respa"); } /* ---------------------------------------------------------------------- init for one type pair i,j and corresponding j,i ------------------------------------------------------------------------- */ double PairLJLongDipoleLong::init_one(int i, int j) { if ((ewald_order&(1<<6))||(setflag[i][j] == 0)) { epsilon[i][j] = mix_energy(epsilon_read[i][i],epsilon_read[j][j], sigma_read[i][i],sigma_read[j][j]); sigma[i][j] = mix_distance(sigma_read[i][i],sigma_read[j][j]); if (ewald_order&(1<<6)) cut_lj[i][j] = cut_lj_global; else cut_lj[i][j] = mix_distance(cut_lj_read[i][i],cut_lj_read[j][j]); } else { sigma[i][j] = sigma_read[i][j]; epsilon[i][j] = epsilon_read[i][j]; cut_lj[i][j] = cut_lj_read[i][j]; } double cut = MAX(cut_lj[i][j], cut_coul); cutsq[i][j] = cut*cut; cut_ljsq[i][j] = cut_lj[i][j] * cut_lj[i][j]; lj1[i][j] = 48.0 * epsilon[i][j] * pow(sigma[i][j],12.0); lj2[i][j] = 24.0 * epsilon[i][j] * pow(sigma[i][j],6.0); lj3[i][j] = 4.0 * epsilon[i][j] * pow(sigma[i][j],12.0); lj4[i][j] = 4.0 * epsilon[i][j] * pow(sigma[i][j],6.0); // check interior rRESPA cutoff //if (cut_respa && MIN(cut_lj[i][j],cut_coul) < cut_respa[3]) //error->all(FLERR,"Pair cutoff < Respa interior cutoff"); if (offset_flag) { double ratio = sigma[i][j] / cut_lj[i][j]; offset[i][j] = 4.0 * epsilon[i][j] * (pow(ratio,12.0) - pow(ratio,6.0)); } else offset[i][j] = 0.0; cutsq[j][i] = cutsq[i][j]; cut_ljsq[j][i] = cut_ljsq[i][j]; lj1[j][i] = lj1[i][j]; lj2[j][i] = lj2[i][j]; lj3[j][i] = lj3[i][j]; lj4[j][i] = lj4[i][j]; offset[j][i] = offset[i][j]; return cut; } /* ---------------------------------------------------------------------- proc 0 writes to restart file ------------------------------------------------------------------------- */ void PairLJLongDipoleLong::write_restart(FILE *fp) { write_restart_settings(fp); int i,j; for (i = 1; i <= atom->ntypes; i++) for (j = i; j <= atom->ntypes; j++) { fwrite(&setflag[i][j],sizeof(int),1,fp); if (setflag[i][j]) { fwrite(&epsilon_read[i][j],sizeof(double),1,fp); fwrite(&sigma_read[i][j],sizeof(double),1,fp); fwrite(&cut_lj_read[i][j],sizeof(double),1,fp); } } } /* ---------------------------------------------------------------------- proc 0 reads from restart file, bcasts ------------------------------------------------------------------------- */ void PairLJLongDipoleLong::read_restart(FILE *fp) { read_restart_settings(fp); allocate(); int i,j; int me = comm->me; for (i = 1; i <= atom->ntypes; i++) for (j = i; j <= atom->ntypes; j++) { if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp); MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world); if (setflag[i][j]) { if (me == 0) { fread(&epsilon_read[i][j],sizeof(double),1,fp); fread(&sigma_read[i][j],sizeof(double),1,fp); fread(&cut_lj_read[i][j],sizeof(double),1,fp); } MPI_Bcast(&epsilon_read[i][j],1,MPI_DOUBLE,0,world); MPI_Bcast(&sigma_read[i][j],1,MPI_DOUBLE,0,world); MPI_Bcast(&cut_lj_read[i][j],1,MPI_DOUBLE,0,world); } } } /* ---------------------------------------------------------------------- proc 0 writes to restart file ------------------------------------------------------------------------- */ void PairLJLongDipoleLong::write_restart_settings(FILE *fp) { fwrite(&cut_lj_global,sizeof(double),1,fp); fwrite(&cut_coul,sizeof(double),1,fp); fwrite(&offset_flag,sizeof(int),1,fp); fwrite(&mix_flag,sizeof(int),1,fp); fwrite(&ewald_order,sizeof(int),1,fp); } /* ---------------------------------------------------------------------- proc 0 reads from restart file, bcasts ------------------------------------------------------------------------- */ void PairLJLongDipoleLong::read_restart_settings(FILE *fp) { if (comm->me == 0) { fread(&cut_lj_global,sizeof(double),1,fp); fread(&cut_coul,sizeof(double),1,fp); fread(&offset_flag,sizeof(int),1,fp); fread(&mix_flag,sizeof(int),1,fp); fread(&ewald_order,sizeof(int),1,fp); } MPI_Bcast(&cut_lj_global,1,MPI_DOUBLE,0,world); MPI_Bcast(&cut_coul,1,MPI_DOUBLE,0,world); MPI_Bcast(&offset_flag,1,MPI_INT,0,world); MPI_Bcast(&mix_flag,1,MPI_INT,0,world); MPI_Bcast(&ewald_order,1,MPI_INT,0,world); } /* ---------------------------------------------------------------------- compute pair interactions ------------------------------------------------------------------------- */ void PairLJLongDipoleLong::compute(int eflag, int vflag) { double evdwl,ecoul,fpair; evdwl = ecoul = 0.0; if (eflag || vflag) ev_setup(eflag,vflag); else evflag = vflag_fdotr = 0; double **x = atom->x, *x0 = x[0]; double **mu = atom->mu, *mu0 = mu[0], *imu, *jmu; double **tq = atom->torque, *tq0 = tq[0], *tqi; double **f = atom->f, *f0 = f[0], *fi = f0, fx, fy, fz; double *q = atom->q, qi = 0, qj; int *type = atom->type; int nlocal = atom->nlocal; double *special_coul = force->special_coul; double *special_lj = force->special_lj; int newton_pair = force->newton_pair; double qqrd2e = force->qqrd2e; int i, j; - int order1 = ewald_order&(1<<1), order3 = ewald_order&(1<<3), - order6 = ewald_order&(1<<6); + int order3 = ewald_order&(1<<3), order6 = ewald_order&(1<<6); int *ineigh, *ineighn, *jneigh, *jneighn, typei, typej, ni; double *cutsqi, *cut_ljsqi, *lj1i, *lj2i, *lj3i, *lj4i, *offseti; double rsq, r2inv, force_coul, force_lj; double g2 = g_ewald*g_ewald, g6 = g2*g2*g2, g8 = g6*g2; double B0, B1, B2, B3, G0, G1, G2, mudi, mudj, muij; vector force_d = VECTOR_NULL, ti = VECTOR_NULL, tj = VECTOR_NULL; vector mui, muj, xi, d; double C1 = 2.0 * g_ewald / MY_PIS; double C2 = 2.0 * g2 * C1; double C3 = 2.0 * g2 * C2; ineighn = (ineigh = list->ilist)+list->inum; for (; ineigh<ineighn; ++ineigh) { // loop over all neighs i = *ineigh; fi = f0+3*i; tqi = tq0+3*i; qi = q[i]; // initialize constants offseti = offset[typei = type[i]]; lj1i = lj1[typei]; lj2i = lj2[typei]; lj3i = lj3[typei]; lj4i = lj4[typei]; cutsqi = cutsq[typei]; cut_ljsqi = cut_ljsq[typei]; memcpy(xi, x0+(i+(i<<1)), sizeof(vector)); memcpy(mui, imu = mu0+(i<<2), sizeof(vector)); jneighn = (jneigh = list->firstneigh[i])+list->numneigh[i]; for (; jneigh<jneighn; ++jneigh) { // loop over neighbors j = *jneigh; ni = sbmask(j); // special index j &= NEIGHMASK; { register double *xj = x0+(j+(j<<1)); d[0] = xi[0] - xj[0]; // pair vector d[1] = xi[1] - xj[1]; d[2] = xi[2] - xj[2]; } if ((rsq = vec_dot(d, d)) >= cutsqi[typej = type[j]]) continue; r2inv = 1.0/rsq; if (order3 && (rsq < cut_coulsq)) { // dipole memcpy(muj, jmu = mu0+(j<<2), sizeof(vector)); { // series real space register double r = sqrt(rsq); register double x = g_ewald*r; register double f = exp(-x*x)*qqrd2e; B0 = 1.0/(1.0+EWALD_P*x); // eqn 2.8 B0 *= ((((A5*B0+A4)*B0+A3)*B0+A2)*B0+A1)*f/r; B1 = (B0 + C1 * f) * r2inv; B2 = (3.0*B1 + C2 * f) * r2inv; B3 = (5.0*B2 + C3 * f) * r2inv; mudi = mui[0]*d[0]+mui[1]*d[1]+mui[2]*d[2]; mudj = muj[0]*d[0]+muj[1]*d[1]+muj[2]*d[2]; muij = mui[0]*muj[0]+mui[1]*muj[1]+mui[2]*muj[2]; G0 = qi*(qj = q[j]); // eqn 2.10 G1 = qi*mudj-qj*mudi+muij; G2 = -mudi*mudj; force_coul = G0*B1+G1*B2+G2*B3; mudi *= B2; mudj *= B2; // torque contribs ti[0] = mudj*d[0]+(qj*d[0]-muj[0])*B1; ti[1] = mudj*d[1]+(qj*d[1]-muj[1])*B1; ti[2] = mudj*d[2]+(qj*d[2]-muj[2])*B1; if (newton_pair || j < nlocal) { tj[0] = mudi*d[0]-(qi*d[0]+mui[0])*B1; tj[1] = mudi*d[1]-(qi*d[1]+mui[1])*B1; tj[2] = mudi*d[2]-(qi*d[2]+mui[2])*B1; } if (eflag) ecoul = G0*B0+G1*B1+G2*B2; if (ni > 0) { // adj part, eqn 2.13 force_coul -= (f = qqrd2e*(1.0-special_coul[ni])/r)*( (3.0*G1+15.0*G2*r2inv)*r2inv+G0)*r2inv; if (eflag) ecoul -= f*((G1+3.0*G2*r2inv)*r2inv+G0); B1 -= f*r2inv; } B0 = mudj+qj*B1; B3 = -qi*B1+mudi; // position independent if (ni > 0) B0 -= f*3.0*mudj*r2inv*r2inv/B2; if (ni > 0) B3 -= f*3.0*mudi*r2inv*r2inv/B2; force_d[0] = B0*mui[0]+B3*muj[0]; // force contribs force_d[1] = B0*mui[1]+B3*muj[1]; force_d[2] = B0*mui[2]+B3*muj[2]; if (ni > 0) { ti[0] -= f*(3.0*mudj*r2inv*r2inv*d[0]/B2+(qj*r2inv*d[0]-muj[0]*r2inv)); ti[1] -= f*(3.0*mudj*r2inv*r2inv*d[1]/B2+(qj*r2inv*d[1]-muj[1]*r2inv)); ti[2] -= f*(3.0*mudj*r2inv*r2inv*d[2]/B2+(qj*r2inv*d[2]-muj[2]*r2inv)); if (newton_pair || j < nlocal) { tj[0] -= f*(3.0*mudi*r2inv*r2inv*d[0]/B2-(qi*r2inv*d[0]+mui[0]*r2inv)); tj[1] -= f*(3.0*mudi*r2inv*r2inv*d[1]/B2-(qi*r2inv*d[1]+mui[1]*r2inv)); tj[2] -= f*(3.0*mudi*r2inv*r2inv*d[2]/B2-(qi*r2inv*d[2]+mui[2]*r2inv)); } } } // table real space } else { force_coul = ecoul = 0.0; memset(force_d, 0, 3*sizeof(double)); } if (rsq < cut_ljsqi[typej]) { // lj if (order6) { // long-range lj register double rn = r2inv*r2inv*r2inv; register double x2 = g2*rsq, a2 = 1.0/x2; x2 = a2*exp(-x2)*lj4i[typej]; if (ni < 0) { force_lj = (rn*=rn)*lj1i[typej]-g8*(((6.0*a2+6.0)*a2+3.0)*a2+1.0)*x2*rsq; if (eflag) evdwl = rn*lj3i[typej]-g6*((a2+1.0)*a2+0.5)*x2; } else { // special case register double f = special_lj[ni], t = rn*(1.0-f); force_lj = f*(rn *= rn)*lj1i[typej]- g8*(((6.0*a2+6.0)*a2+3.0)*a2+1.0)*x2*rsq+t*lj2i[typej]; if (eflag) evdwl = f*rn*lj3i[typej]-g6*((a2+1.0)*a2+0.5)*x2+t*lj4i[typej]; } } else { // cut lj register double rn = r2inv*r2inv*r2inv; if (ni < 0) { force_lj = rn*(rn*lj1i[typej]-lj2i[typej]); if (eflag) evdwl = rn*(rn*lj3i[typej]-lj4i[typej])-offseti[typej]; } else { // special case register double f = special_lj[ni]; force_lj = f*rn*(rn*lj1i[typej]-lj2i[typej]); if (eflag) evdwl = f*( rn*(rn*lj3i[typej]-lj4i[typej])-offseti[typej]); } } force_lj *= r2inv; } else force_lj = evdwl = 0.0; fpair = force_coul+force_lj; // force if (newton_pair || j < nlocal) { register double *fj = f0+(j+(j<<1)); fi[0] += fx = d[0]*fpair+force_d[0]; fj[0] -= fx; fi[1] += fy = d[1]*fpair+force_d[1]; fj[1] -= fy; fi[2] += fz = d[2]*fpair+force_d[2]; fj[2] -= fz; tqi[0] += mui[1]*ti[2]-mui[2]*ti[1]; // torque tqi[1] += mui[2]*ti[0]-mui[0]*ti[2]; tqi[2] += mui[0]*ti[1]-mui[1]*ti[0]; register double *tqj = tq0+(j+(j<<1)); tqj[0] += muj[1]*tj[2]-muj[2]*tj[1]; tqj[1] += muj[2]*tj[0]-muj[0]*tj[2]; tqj[2] += muj[0]*tj[1]-muj[1]*tj[0]; } else { fi[0] += fx = d[0]*fpair+force_d[0]; // force fi[1] += fy = d[1]*fpair+force_d[1]; fi[2] += fz = d[2]*fpair+force_d[2]; tqi[0] += mui[1]*ti[2]-mui[2]*ti[1]; // torque tqi[1] += mui[2]*ti[0]-mui[0]*ti[2]; tqi[2] += mui[0]*ti[1]-mui[1]*ti[0]; } if (evflag) ev_tally_xyz(i,j,nlocal,newton_pair, evdwl,ecoul,fx,fy,fz,d[0],d[1],d[2]); } } if (vflag_fdotr) virial_fdotr_compute(); } /* ---------------------------------------------------------------------- */ /* double PairLJLongDipoleLong::single(int i, int j, int itype, int jtype, double rsq, double factor_coul, double factor_lj, double &fforce) { double r6inv, force_coul, force_lj; double g2 = g_ewald*g_ewald, g6 = g2*g2*g2, g8 = g6*g2, *q = atom->q; double eng = 0.0; double r2inv = 1.0/rsq; if ((ewald_order&(1<<3)) && (rsq < cut_coulsq)) { // coulombic double *mui = atom->mu[i], *muj = atom->mu[j]; double *xi = atom->x[i], *xj = atom->x[j]; double qi = q[i], qj = q[j]; double G0, G1, G2, B0, B1, B2, B3, mudi, mudj, muij; vector d = {xi[0]-xj[0], xi[1]-xj[1], xi[2]-xj[2]}; { // series real space register double r = sqrt(rsq); register double x = g_ewald*r; register double f = exp(-x*x)*qqrd2e; B0 = 1.0/(1.0+EWALD_P*x); // eqn 2.8 B0 *= ((((A5*B0+A4)*B0+A3)*B0+A2)*B0+A1)*f/r; B1 = (B0 + C1 * f) * r2inv; B2 = (3.0*B1 + C2 * f) * r2inv; B3 = (5.0*B2 + C3 * f) * r2inv; mudi = mui[0]*d[0]+mui[1]*d[1]+mui[2]*d[2]; mudj = muj[0]*d[0]+muj[1]*d[1]+muj[2]*d[2]; muij = mui[0]*muj[0]+mui[1]*muj[1]+mui[2]*muj[2]; G0 = qi*(qj = q[j]); // eqn 2.10 G1 = qi*mudj-qj*mudi+muij; G2 = -mudi*mudj; force_coul = G0*B1+G1*B2+G2*B3; eng += G0*B0+G1*B1+G2*B2; if (factor_coul < 1.0) { // adj part, eqn 2.13 force_coul -= (f = force->qqrd2e*(1.0-factor_coul)/r)*( (3.0*G1+6.0*muij+15.0*G2*r2inv)*r2inv+G0); eng -= f*((G1+3.0*G2*r2inv)*r2inv+G0); B1 -= f*r2inv; } B0 = mudj*B2-qj*B1; B3 = qi*B1+mudi*B2; // position independent //force_d[0] = B0*mui[0]+B3*muj[0]; // force contributions //force_d[1] = B0*mui[1]+B3*muj[1]; //force_d[2] = B0*mui[2]+B3*muj[2]; } // table real space } else force_coul = 0.0; if (rsq < cut_ljsq[itype][jtype]) { // lennard-jones r6inv = r2inv*r2inv*r2inv; if (ewald_order&0x40) { // long-range register double x2 = g2*rsq, a2 = 1.0/x2, t = r6inv*(1.0-factor_lj); x2 = a2*exp(-x2)*lj4[itype][jtype]; force_lj = factor_lj*(r6inv *= r6inv)*lj1[itype][jtype]- g8*(((6.0*a2+6.0)*a2+3.0)*a2+a2)*x2*rsq+t*lj2[itype][jtype]; eng += factor_lj*r6inv*lj3[itype][jtype]- g6*((a2+1.0)*a2+0.5)*x2+t*lj4[itype][jtype]; } else { // cut force_lj = factor_lj*r6inv*(lj1[itype][jtype]*r6inv-lj2[itype][jtype]); eng += factor_lj*(r6inv*(r6inv*lj3[itype][jtype]- lj4[itype][jtype])-offset[itype][jtype]); } } else force_lj = 0.0; fforce = (force_coul+force_lj)*r2inv; return eng; } */ diff --git a/src/KSPACE/fix_tune_kspace.cpp b/src/KSPACE/fix_tune_kspace.cpp index 44b2bfc55..05f4b6aa5 100644 --- a/src/KSPACE/fix_tune_kspace.cpp +++ b/src/KSPACE/fix_tune_kspace.cpp @@ -1,543 +1,542 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing author: Paul Crozier (SNL) ------------------------------------------------------------------------- */ #include "string.h" #include "stdlib.h" #include "fix_tune_kspace.h" #include "update.h" #include "domain.h" #include "atom.h" #include "comm.h" #include "force.h" #include "kspace.h" #include "pair.h" #include "error.h" #include "memory.h" #include "timer.h" #include "neighbor.h" #include "modify.h" #include "compute.h" #include <iostream> #include <cmath> #include <limits> #define SWAP(a,b) {temp=(a);(a)=(b);(b)=temp;} #define SIGN(a,b) ((b) >= 0.0 ? fabs(a) : -fabs(a)) #define GOLD 1.618034 using namespace std; using namespace LAMMPS_NS; using namespace FixConst; /* ---------------------------------------------------------------------- */ FixTuneKspace::FixTuneKspace(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg) { if (narg < 3) error->all(FLERR,"Illegal fix tune/kspace command"); global_freq = 1; firststep = 0; niter = 0; niter_adjust_rcut = 0; keep_bracketing = true; first_brent_pass = true; converged = false; need_fd2_brent = false; ewald_time = pppm_time = msm_time = 0.0; // parse arguments nevery = force->inumeric(FLERR,arg[3]); // set up reneighboring force_reneighbor = 1; next_reneighbor = update->ntimestep + 1; } /* ---------------------------------------------------------------------- */ int FixTuneKspace::setmask() { int mask = 0; mask |= PRE_EXCHANGE; mask |= PRE_NEIGHBOR; return mask; } /* ---------------------------------------------------------------------- */ void FixTuneKspace::init() { if (!force->kspace) error->all(FLERR,"Cannot use fix tune/kspace without a kspace style"); if (!force->pair) error->all(FLERR,"Cannot use fix tune/kspace without a pair style"); double old_acc = force->kspace->accuracy/force->kspace->two_charge_force; char old_acc_str[12]; sprintf(old_acc_str,"%g",old_acc); strcpy(new_acc_str,old_acc_str); int itmp; double *p_cutoff = (double *) force->pair->extract("cut_coul",itmp); pair_cut_coul = *p_cutoff; } /* ---------------------------------------------------------------------- perform dynamic kspace parameter optimization ------------------------------------------------------------------------- */ void FixTuneKspace::pre_exchange() { if (!nevery) return; if (!force->kspace) return; if (!force->pair) return; if (next_reneighbor != update->ntimestep) return; next_reneighbor = update->ntimestep + nevery; double time = get_timing_info(); if (strcmp(force->kspace_style,"ewald") == 0) ewald_time = time; if (strcmp(force->kspace_style,"pppm") == 0) pppm_time = time; if (strcmp(force->kspace_style,"msm") == 0) msm_time = time; niter++; if (niter == 1) { // test Ewald store_old_kspace_settings(); strcpy(new_kspace_style,"ewald"); sprintf(new_pair_style,"%s/long",base_pair_style); update_pair_style(new_pair_style,pair_cut_coul); update_kspace_style(new_kspace_style,new_acc_str); } else if (niter == 2) { // test PPPM store_old_kspace_settings(); strcpy(new_kspace_style,"pppm"); sprintf(new_pair_style,"%s/long",base_pair_style); update_pair_style(new_pair_style,pair_cut_coul); update_kspace_style(new_kspace_style,new_acc_str); } else if (niter == 3) { // test MSM store_old_kspace_settings(); strcpy(new_kspace_style,"msm"); sprintf(new_pair_style,"%s/msm",base_pair_style); update_pair_style(new_pair_style,pair_cut_coul); update_kspace_style(new_kspace_style,new_acc_str); } else if (niter == 4) { store_old_kspace_settings(); cout << "ewald_time = " << ewald_time << endl; cout << "pppm_time = " << pppm_time << endl; cout << "msm_time = " << msm_time << endl; // switch to fastest one strcpy(new_kspace_style,"ewald"); sprintf(new_pair_style,"%s/long",base_pair_style); if (pppm_time < ewald_time && pppm_time < msm_time) strcpy(new_kspace_style,"pppm"); else if (msm_time < pppm_time && msm_time < ewald_time) { strcpy(new_kspace_style,"msm"); sprintf(new_pair_style,"%s/msm",base_pair_style); } update_pair_style(new_pair_style,pair_cut_coul); update_kspace_style(new_kspace_style,new_acc_str); } else { adjust_rcut(time); } last_spcpu = timer->elapsed(Timer::TOTAL); } /* ---------------------------------------------------------------------- figure out CPU time per timestep since last time checked ------------------------------------------------------------------------- */ double FixTuneKspace::get_timing_info() { double dvalue; double new_cpu; int new_step = update->ntimestep; if (firststep == 0) { new_cpu = 0.0; dvalue = 0.0; firststep = 1; } else { new_cpu = timer->elapsed(Timer::TOTAL); double cpu_diff = new_cpu - last_spcpu; int step_diff = new_step - last_step; if (step_diff > 0.0) dvalue = cpu_diff/step_diff; else dvalue = 0.0; } last_step = new_step; last_spcpu = new_cpu; return dvalue; } /* ---------------------------------------------------------------------- store old kspace settings: style, accuracy, order, etc ------------------------------------------------------------------------- */ void FixTuneKspace::store_old_kspace_settings() { int n = strlen(force->kspace_style) + 1; char *old_kspace_style = new char[n]; strcpy(old_kspace_style,force->kspace_style); strcpy(new_kspace_style,old_kspace_style); double old_acc = force->kspace->accuracy_relative; char old_acc_str[12]; sprintf(old_acc_str,"%g",old_acc); strcpy(new_pair_style,force->pair_style); strcpy(base_pair_style,force->pair_style); char *trunc; if ((trunc = strstr(base_pair_style, "/long")) != NULL) *trunc = '\0'; if ((trunc = strstr(base_pair_style, "/msm" )) != NULL) *trunc = '\0'; old_differentiation_flag = force->kspace->differentiation_flag; old_slabflag = force->kspace->slabflag; old_slab_volfactor = force->kspace->slab_volfactor; } /* ---------------------------------------------------------------------- update the pair style if necessary, preserving the settings ------------------------------------------------------------------------- */ void FixTuneKspace::update_pair_style(char *new_pair_style, double pair_cut_coul) { int itmp; double *p_cutoff = (double *) force->pair->extract("cut_coul",itmp); *p_cutoff = pair_cut_coul; // check to see if we need to change pair styles if (strcmp(new_pair_style,force->pair_style) == 0) return; // create a temporary file to store current pair settings FILE *p_pair_settings_file; p_pair_settings_file = tmpfile(); force->pair->write_restart(p_pair_settings_file); rewind(p_pair_settings_file); cout << "Creating new pair style: " << new_pair_style << endl; // delete old pair style and create new one force->create_pair(new_pair_style,lmp->suffix); // restore current pair settings from temporary file force->pair->read_restart(p_pair_settings_file); double *pcutoff = (double *) force->pair->extract("cut_coul",itmp); double current_cutoff = *pcutoff; cout << "Coulomb cutoff for real space: " << current_cutoff << endl; // close temporary file fclose(p_pair_settings_file); } /* ---------------------------------------------------------------------- update the kspace style if necessary ------------------------------------------------------------------------- */ void FixTuneKspace::update_kspace_style(char *new_kspace_style, char *new_acc_str) { // create kspace style char string int narg = 2; char **arg; arg = NULL; int maxarg = 100; arg = (char **) memory->srealloc(arg,maxarg*sizeof(char *),"tune/kspace:arg"); int n = 12; arg[0] = new char[n]; strcpy(arg[0],new_kspace_style); arg[1] = new char[n]; strcpy(arg[1],new_acc_str); // delete old kspace style and create new one force->create_kspace(narg,arg,lmp->suffix); force->kspace->differentiation_flag = old_differentiation_flag; force->kspace->slabflag = old_slabflag; force->kspace->slab_volfactor = old_slab_volfactor; // initialize new kspace style, pair style, molecular styles force->init(); // set up grid force->kspace->setup_grid(); // Re-init neighbor list. Probably only needed when redefining the pair style. Should happen after pair->init() to get pair style neighbor list request registered neighbor->init(); // Re-init computes to update pointers to virials, etc. for (int i = 0; i < modify->ncompute; i++) modify->compute[i]->init(); memory->sfree(arg); } /* ---------------------------------------------------------------------- find the optimal real space coulomb cutoff ------------------------------------------------------------------------- */ void FixTuneKspace::adjust_rcut(double time) { if (strcmp(force->kspace_style,"msm") == 0) return; if (converged) return; double temp; const double TINY = 1.0e-20; // get the current cutoff int itmp; double *p_cutoff = (double *) force->pair->extract("cut_coul",itmp); double current_cutoff = *p_cutoff; cout << "Old Coulomb cutoff for real space: " << current_cutoff << endl; // use Brent's method from Numerical Recipes to find optimal real space cutoff // first time through, get ax_brent and fa_brent, and adjust cutoff if (keep_bracketing) { if (niter_adjust_rcut == 0) { pair_cut_coul /= 2; } else if (niter_adjust_rcut == 1) { ax_brent = current_cutoff; fa_brent = time; pair_cut_coul *= 2; // second time through, get bx_brent and fb_brent, and adjust cutoff } else if (niter_adjust_rcut == 2) { bx_brent = current_cutoff; fb_brent = time; if (fb_brent > fa_brent) { SWAP(ax_brent,bx_brent); SWAP(fb_brent,fa_brent); pair_cut_coul /= 4; } else { pair_cut_coul *= 2; } // third time through, get cx_brent and fc_brent, and adjust cutoff if needed } else if (niter_adjust_rcut == 3) { cx_brent = current_cutoff; fc_brent = time; if (fc_brent > fb_brent) keep_bracketing = false; else { double r = (bx_brent - ax_brent)*(fb_brent - fc_brent); double q = (bx_brent - cx_brent)*(fb_brent - fa_brent); dx_brent = bx_brent - ((bx_brent - cx_brent)*q - (bx_brent - ax_brent)*r)/ (2.0*SIGN(MAX(fabs(q - r),TINY),q - r)); pair_cut_coul = dx_brent; } // after third time through, bracket the minimum, and adjust cutoff } else if (niter_adjust_rcut > 3) { dx_brent = current_cutoff; if (need_fd2_brent) fd2_brent = time; else fd_brent = time; mnbrak(); pair_cut_coul = dx_brent; } } if (!keep_bracketing) { dx_brent = current_cutoff; fd_brent = time; if (first_brent_pass) brent0(); else brent2(); brent1(); pair_cut_coul = dx_brent; } niter_adjust_rcut++; if (pair_cut_coul <= 0.0) pair_cut_coul = fabs(MIN(ax_brent,MIN(bx_brent,(MIN(cx_brent,dx_brent))))/2.0) + TINY; if (pair_cut_coul != pair_cut_coul) error->all(FLERR,"Bad real space Coulomb cutoff in fix tune/kspace"); // change the cutoff to pair_cut_coul *p_cutoff = pair_cut_coul; // report the new cutoff double *new_cutoff = (double *) force->pair->extract("cut_coul",itmp); current_cutoff = *new_cutoff; cout << "Adjusted Coulomb cutoff for real space: " << current_cutoff << endl; store_old_kspace_settings(); update_pair_style(new_pair_style,pair_cut_coul); update_kspace_style(new_kspace_style,new_acc_str); } /* ---------------------------------------------------------------------- bracket a minimum using parabolic extrapolation ------------------------------------------------------------------------- */ void FixTuneKspace::mnbrak() { const double GLIMIT = 100.0, TINY = 1.0e-20; - double temp,r,q; + double r,q; r = (bx_brent - ax_brent)*(fb_brent - fc_brent); q = (bx_brent - cx_brent)*(fb_brent - fa_brent); dx_brent = bx_brent - ((bx_brent - cx_brent)*q - (bx_brent - ax_brent)*r)/ (2.0*SIGN(MAX(fabs(q - r),TINY),q - r)); dxlim = bx_brent + GLIMIT*(cx_brent - bx_brent); if ((bx_brent - dx_brent)*(dx_brent - cx_brent) > 0.0) { if (fd_brent < fc_brent) { ax_brent = bx_brent; bx_brent = dx_brent; fa_brent = fb_brent; fb_brent = fd_brent; keep_bracketing = false; return; } else if (fd_brent > fb_brent) { cx_brent = dx_brent; fc_brent = fd_brent; keep_bracketing = false; return; } dx_brent = cx_brent + GOLD*(cx_brent - bx_brent); if (need_fd2_brent) { fd_brent = fd2_brent; need_fd2_brent = false; } else { need_fd2_brent = true; return; } } else if ((cx_brent - dx_brent)*(dx_brent - dxlim) > 0.0) { if (fd_brent < fc_brent) { if (need_fd2_brent) { need_fd2_brent = false; } else { need_fd2_brent = true; dx_brent += GOLD*(dx_brent - cx_brent); return; } shft3(bx_brent,cx_brent,dx_brent,dx_brent + GOLD*(dx_brent - cx_brent)); shft3(fb_brent,fc_brent,fd_brent,fd2_brent); } } else if ((dx_brent - dxlim)*(dxlim - cx_brent) >= 0.0) { dx_brent = dxlim; if (need_fd2_brent) { fd_brent = fd2_brent; need_fd2_brent = false; } else { need_fd2_brent = true; return; } } else { dx_brent = cx_brent + GOLD*(cx_brent - bx_brent); if (need_fd2_brent) { fd_brent = fd2_brent; need_fd2_brent = false; } else { need_fd2_brent = true; return; } } shft3(ax_brent,bx_brent,cx_brent,dx_brent); shft3(fa_brent,fb_brent,fc_brent,fd_brent); } /* ---------------------------------------------------------------------- Brent's method from Numerical Recipes ------------------------------------------------------------------------- */ void FixTuneKspace::brent0() { a_brent=(ax_brent < cx_brent ? ax_brent : cx_brent); b_brent=(ax_brent > cx_brent ? ax_brent : cx_brent); x_brent=w_brent=v_brent=bx_brent; fw_brent=fv_brent=fx_brent=fb_brent; } /* ---------------------------------------------------------------------- Brent's method from Numerical Recipes ------------------------------------------------------------------------- */ void FixTuneKspace::brent1() { - const int ITMAX=100; const double CGOLD=0.3819660; const double ZEPS=numeric_limits<double>::epsilon()*1.0e-3; double d=0.0,etemp; double p,q,r,tol1,tol2,xm; double e=0.0; double tol=0.001; xm=0.5*(a_brent+b_brent); tol2=2.0*(tol1=tol*fabs(x_brent)+ZEPS); if (fabs(x_brent-xm) <= (tol2-0.5*(b_brent-a_brent))) { converged = true; dx_brent = x_brent; return; } if (fabs(e) > tol1) { r=(x_brent-w_brent)*(fx_brent-fv_brent); q=(x_brent-v_brent)*(fx_brent-fw_brent); p=(x_brent-v_brent)*q-(x_brent-w_brent)*r; q=2.0*(q-r); if (q > 0.0) p = -p; q=fabs(q); etemp=e; e=d; if (fabs(p) >= fabs(0.5*q*etemp) || p <= q*(a_brent-x_brent) || p >= q*(b_brent-x_brent)) d=CGOLD*(e=(x_brent >= xm ? a_brent-x_brent : b_brent-x_brent)); else { d=p/q; dx_brent=x_brent+d; if (dx_brent-a_brent < tol2 || b_brent-dx_brent < tol2) d=SIGN(tol1,xm-x_brent); } } else { d=CGOLD*(e=(x_brent >= xm ? a_brent-x_brent : b_brent-x_brent)); } dx_brent=(fabs(d) >= tol1 ? x_brent+d : x_brent+SIGN(tol1,d)); first_brent_pass = false; return; } /* ---------------------------------------------------------------------- Brent's method from Numerical Recipes ------------------------------------------------------------------------- */ void FixTuneKspace::brent2() { if (fd_brent <= fx_brent) { if (dx_brent >= x_brent) a_brent=x_brent; else b_brent=x_brent; shft3(v_brent,w_brent,x_brent,dx_brent); shft3(fv_brent,fw_brent,fx_brent,fd_brent); } else { if (dx_brent < x_brent) a_brent=dx_brent; else b_brent=dx_brent; if (fd_brent <= fw_brent || w_brent == x_brent) { v_brent=w_brent; w_brent=dx_brent; fv_brent=fw_brent; fw_brent=fd_brent; } else if (fd_brent <= fv_brent || v_brent == x_brent || v_brent == w_brent) { v_brent=dx_brent; fv_brent=fd_brent; } } } diff --git a/src/RIGID/fix_rigid_nh_small.cpp b/src/RIGID/fix_rigid_nh_small.cpp index 29223847c..342b4dc50 100644 --- a/src/RIGID/fix_rigid_nh_small.cpp +++ b/src/RIGID/fix_rigid_nh_small.cpp @@ -1,1534 +1,1532 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing author: Trung Dac Nguyen (ORNL) references: Kamberaj et al., J. Chem. Phys. 122, 224114 (2005) Miller et al., J Chem Phys. 116, 8649-8659 (2002) ------------------------------------------------------------------------- */ #include "math.h" #include "stdio.h" #include "string.h" #include "fix_rigid_nh_small.h" #include "math_extra.h" #include "atom.h" #include "compute.h" #include "domain.h" #include "update.h" #include "modify.h" #include "fix_deform.h" #include "group.h" #include "comm.h" #include "force.h" #include "kspace.h" #include "output.h" #include "memory.h" #include "error.h" using namespace LAMMPS_NS; using namespace FixConst; enum{NONE,XYZ,XY,YZ,XZ}; // same as in FixRigid enum{ISO,ANISO,TRICLINIC}; // same as in FixRigid #define EPSILON 1.0e-7 enum{FULL_BODY,INITIAL,FINAL,FORCE_TORQUE,VCM_ANGMOM,XCM_MASS,ITENSOR,DOF}; /* ---------------------------------------------------------------------- */ FixRigidNHSmall::FixRigidNHSmall(LAMMPS *lmp, int narg, char **arg) : FixRigidSmall(lmp, narg, arg) { // error checks if ((p_flag[0] == 1 && p_period[0] <= 0.0) || (p_flag[1] == 1 && p_period[1] <= 0.0) || (p_flag[2] == 1 && p_period[2] <= 0.0)) error->all(FLERR,"Fix rigid/small npt/nph period must be > 0.0"); if (domain->dimension == 2 && p_flag[2]) error->all(FLERR,"Invalid fix rigid/small npt/nph command for a 2d simulation"); if (domain->dimension == 2 && (pcouple == YZ || pcouple == XZ)) error->all(FLERR,"Invalid fix rigid/small npt/nph command for a 2d simulation"); if (pcouple == XYZ && (p_flag[0] == 0 || p_flag[1] == 0)) error->all(FLERR,"Invalid fix rigid/small npt/nph command pressure settings"); if (pcouple == XYZ && domain->dimension == 3 && p_flag[2] == 0) error->all(FLERR,"Invalid fix rigid/small npt/nph command pressure settings"); if (pcouple == XY && (p_flag[0] == 0 || p_flag[1] == 0)) error->all(FLERR,"Invalid fix rigid/small npt/nph command pressure settings"); if (pcouple == YZ && (p_flag[1] == 0 || p_flag[2] == 0)) error->all(FLERR,"Invalid fix rigid/small npt/nph command pressure settings"); if (pcouple == XZ && (p_flag[0] == 0 || p_flag[2] == 0)) error->all(FLERR,"Invalid fix rigid/small npt/nph command pressure settings"); // require periodicity in tensile dimension if (p_flag[0] && domain->xperiodic == 0) error->all(FLERR, "Cannot use fix rigid/small npt/nph on a non-periodic dimension"); if (p_flag[1] && domain->yperiodic == 0) error->all(FLERR, "Cannot use fix rigid/small npt/nph on a non-periodic dimension"); if (p_flag[2] && domain->zperiodic == 0) error->all(FLERR, "Cannot use fix rigid/small npt/nph on a non-periodic dimension"); if (pcouple == XYZ && domain->dimension == 3 && (p_start[0] != p_start[1] || p_start[0] != p_start[2] || p_stop[0] != p_stop[1] || p_stop[0] != p_stop[2] || p_period[0] != p_period[1] || p_period[0] != p_period[2])) error->all(FLERR,"Invalid fix rigid/small npt/nph pressure settings"); if (pcouple == XYZ && domain->dimension == 2 && (p_start[0] != p_start[1] || p_stop[0] != p_stop[1] || p_period[0] != p_period[1])) error->all(FLERR,"Invalid fix rigid/small npt/nph pressure settings"); if (pcouple == XY && (p_start[0] != p_start[1] || p_stop[0] != p_stop[1] || p_period[0] != p_period[1])) error->all(FLERR,"Invalid fix rigid/small npt/nph pressure settings"); if (pcouple == YZ && (p_start[1] != p_start[2] || p_stop[1] != p_stop[2] || p_period[1] != p_period[2])) error->all(FLERR,"Invalid fix rigid/small npt/nph pressure settings"); if (pcouple == XZ && (p_start[0] != p_start[2] || p_stop[0] != p_stop[2] || p_period[0] != p_period[2])) error->all(FLERR,"Invalid fix rigid/small npt/nph pressure settings"); if ((tstat_flag && t_period <= 0.0) || (p_flag[0] && p_period[0] <= 0.0) || (p_flag[1] && p_period[1] <= 0.0) || (p_flag[2] && p_period[2] <= 0.0)) error->all(FLERR,"Fix rigid/small nvt/npt/nph damping parameters must be > 0.0"); // memory allocation and initialization if (tstat_flag || pstat_flag) { allocate_chain(); allocate_order(); } if (tstat_flag) { eta_t[0] = eta_r[0] = 0.0; eta_dot_t[0] = eta_dot_r[0] = 0.0; f_eta_t[0] = f_eta_r[0] = 0.0; for (int i = 1; i < t_chain; i++) { eta_t[i] = eta_r[i] = 0.0; eta_dot_t[i] = eta_dot_r[i] = 0.0; } } if (pstat_flag) { epsilon_dot[0] = epsilon_dot[1] = epsilon_dot[2] = 0.0; eta_b[0] = eta_dot_b[0] = f_eta_b[0] = 0.0; for (int i = 1; i < p_chain; i++) eta_b[i] = eta_dot_b[i] = 0.0; } // rigid body pointers nrigidfix = 0; rfix = NULL; vol0 = 0.0; t0 = 1.0; tcomputeflag = 0; pcomputeflag = 0; } /* ---------------------------------------------------------------------- */ FixRigidNHSmall::~FixRigidNHSmall() { if (tstat_flag || pstat_flag) { deallocate_chain(); deallocate_order(); } if (rfix) delete [] rfix; if (tcomputeflag) { modify->delete_compute(id_temp); delete [] id_temp; } // delete pressure if fix created it if (pstat_flag) { if (pcomputeflag) modify->delete_compute(id_press); delete [] id_press; } } /* ---------------------------------------------------------------------- */ int FixRigidNHSmall::setmask() { int mask = 0; mask = FixRigidSmall::setmask(); if (tstat_flag || pstat_flag) mask |= THERMO_ENERGY; return mask; } /* ---------------------------------------------------------------------- */ void FixRigidNHSmall::init() { FixRigidSmall::init(); // recheck that dilate group has not been deleted if (allremap == 0) { int idilate = group->find(id_dilate); if (idilate == -1) error->all(FLERR,"Fix rigid npt/nph dilate group ID does not exist"); dilate_group_bit = group->bitmask[idilate]; } // initialize thermostats // set timesteps, constants // store Yoshida-Suzuki integrator parameters dtv = update->dt; dtf = 0.5 * update->dt * force->ftm2v; dtq = 0.5 * update->dt; boltz = force->boltz; nktv2p = force->nktv2p; mvv2e = force->mvv2e; dimension = domain->dimension; if (force->kspace) kspace_flag = 1; else kspace_flag = 0; // see Table 1 in Kamberaj et al if (tstat_flag || pstat_flag) { if (t_order == 3) { w[0] = 1.0 / (2.0 - pow(2.0, 1.0/3.0)); w[1] = 1.0 - 2.0*w[0]; w[2] = w[0]; } else if (t_order == 5) { w[0] = 1.0 / (4.0 - pow(4.0, 1.0/3.0)); w[1] = w[0]; w[2] = 1.0 - 4.0 * w[0]; w[3] = w[0]; w[4] = w[0]; } } int icompute; if (tcomputeflag) { icompute = modify->find_compute(id_temp); if (icompute < 0) error->all(FLERR,"Temp ID for fix rigid npt/nph does not exist"); temperature = modify->compute[icompute]; } if (pstat_flag) { if (domain->triclinic) error->all(FLERR,"fix rigid npt/nph does not yet allow triclinic box"); // ensure no conflict with fix deform for (int i = 0; i < modify->nfix; i++) if (strcmp(modify->fix[i]->style,"deform") == 0) { int *dimflag = ((FixDeform *) modify->fix[i])->dimflag; if ((p_flag[0] && dimflag[0]) || (p_flag[1] && dimflag[1]) || (p_flag[2] && dimflag[2])) error->all(FLERR,"Cannot use fix rigid npt/nph and fix deform on " "same component of stress tensor"); } // set frequency p_freq_max = 0.0; p_freq_max = MAX(p_freq[0],p_freq[1]); p_freq_max = MAX(p_freq_max,p_freq[2]); // tally the number of dimensions that are barostatted // set initial volume and reference cell, if not already done pdim = p_flag[0] + p_flag[1] + p_flag[2]; if (vol0 == 0.0) { if (dimension == 2) vol0 = domain->xprd * domain->yprd; else vol0 = domain->xprd * domain->yprd * domain->zprd; } // set pressure compute ptr icompute = modify->find_compute(id_press); if (icompute < 0) error->all(FLERR,"Press ID for fix rigid npt/nph does not exist"); pressure = modify->compute[icompute]; // detect if any rigid fixes exist so rigid bodies move on remap // rfix[] = indices to each fix rigid // this will include self if (rfix) delete [] rfix; nrigidfix = 0; rfix = NULL; for (int i = 0; i < modify->nfix; i++) if (modify->fix[i]->rigid_flag) nrigidfix++; if (nrigidfix) { rfix = new int[nrigidfix]; nrigidfix = 0; for (int i = 0; i < modify->nfix; i++) if (modify->fix[i]->rigid_flag) rfix[nrigidfix++] = i; } } } /* ---------------------------------------------------------------------- */ void FixRigidNHSmall::setup(int vflag) { FixRigidSmall::setup(vflag); // total translational and rotational degrees of freedom int k,ibody; - double *inertia; nf_t = nf_r = dimension * nlocal_body; for (ibody = 0; ibody < nlocal_body; ibody++) { - inertia = body[ibody].inertia; for (k = 0; k < domain->dimension; k++) if (fabs(body[ibody].inertia[k]) < EPSILON) nf_r--; } double nf[2], nfall[2]; nf[0] = nf_t; nf[1] = nf_r; MPI_Allreduce(nf,nfall,2,MPI_DOUBLE,MPI_SUM,world); nf_t = nfall[0]; nf_r = nfall[1]; g_f = nf_t + nf_r; onednft = 1.0 + (double)(dimension) / (double)g_f; onednfr = (double) (dimension) / (double)g_f; double mbody[3]; akin_t = akin_r = 0.0; for (int ibody = 0; ibody < nlocal_body; ibody++) { Body *b = &body[ibody]; MathExtra::transpose_matvec(b->ex_space,b->ey_space,b->ez_space, b->angmom,mbody); MathExtra::quatvec(b->quat,mbody,b->conjqm); b->conjqm[0] *= 2.0; b->conjqm[1] *= 2.0; b->conjqm[2] *= 2.0; b->conjqm[3] *= 2.0; if (tstat_flag || pstat_flag) { akin_t += b->mass*(b->vcm[0]*b->vcm[0] + b->vcm[1]*b->vcm[1] + b->vcm[2]*b->vcm[2]); akin_r += b->angmom[0]*b->omega[0] + b->angmom[1]*b->omega[1] + b->angmom[2]*b->omega[2]; } } // accumulate translational and rotational kinetic energies if (tstat_flag || pstat_flag) { double ke[2],keall[2]; ke[0] = akin_t; ke[1] = akin_r; MPI_Allreduce(ke,keall,2,MPI_DOUBLE,MPI_SUM,world); akin_t = keall[0]; akin_r = keall[1]; } // compute target temperature if (tstat_flag) compute_temp_target(); else if (pstat_flag) { t0 = temperature->compute_scalar(); if (t0 == 0.0) { if (strcmp(update->unit_style,"lj") == 0) t0 = 1.0; else t0 = 300.0; } t_target = t0; } // compute target pressure // compute current pressure // trigger virial computation on next timestep if (pstat_flag) { compute_press_target(); temperature->compute_scalar(); if (pstyle == ISO) pressure->compute_scalar(); else pressure->compute_vector(); couple(); pressure->addstep(update->ntimestep+1); } // initialize thermostat/barostat settings double kt, t_mass, tb_mass; kt = boltz * t_target; if (tstat_flag) { t_mass = kt / (t_freq*t_freq); q_t[0] = nf_t * t_mass; q_r[0] = nf_r * t_mass; for (int i = 1; i < t_chain; i++) q_t[i] = q_r[i] = t_mass; for (int i = 1; i < t_chain; i++) { f_eta_t[i] = (q_t[i-1] * eta_dot_t[i-1] * eta_dot_t[i-1] - kt)/q_t[i]; f_eta_r[i] = (q_r[i-1] * eta_dot_r[i-1] * eta_dot_r[i-1] - kt)/q_r[i]; } } // initial forces on barostat thermostat variables if (pstat_flag) { for (int i = 0; i < 3; i++) if (p_flag[i]) { epsilon_mass[i] = (g_f + dimension) * kt / (p_freq[i]*p_freq[i]); epsilon[i] = log(vol0)/dimension; } tb_mass = kt / (p_freq_max * p_freq_max); q_b[0] = dimension * dimension * tb_mass; for (int i = 1; i < p_chain; i++) { q_b[i] = tb_mass; f_eta_b[i] = (q_b[i] * eta_dot_b[i-1] * eta_dot_b[i-1] - kt)/q_b[i]; } } // update order/timestep dependent coefficients if (tstat_flag || pstat_flag) { for (int i = 0; i < t_order; i++) { wdti1[i] = w[i] * dtv / t_iter; wdti2[i] = wdti1[i] / 2.0; wdti4[i] = wdti1[i] / 4.0; } } } /* ---------------------------------------------------------------------- perform preforce velocity Verlet integration see Kamberaj paper for step references ------------------------------------------------------------------------- */ void FixRigidNHSmall::initial_integrate(int vflag) { double tmp,scale_r,scale_t[3],scale_v[3]; double dtfm,mbody[3],tbody[3],fquat[4]; double dtf2 = dtf * 2.0; // compute target temperature // update thermostat chains coupled to particles if (tstat_flag) { compute_temp_target(); nhc_temp_integrate(); } // compute target pressure // update epsilon dot // update thermostat coupled to barostat if (pstat_flag) { nhc_press_integrate(); if (pstyle == ISO) { temperature->compute_scalar(); pressure->compute_scalar(); } else { temperature->compute_vector(); pressure->compute_vector(); } couple(); pressure->addstep(update->ntimestep+1); compute_press_target(); nh_epsilon_dot(); } // compute scale variables scale_t[0] = scale_t[1] = scale_t[2] = 1.0; scale_v[0] = scale_v[1] = scale_v[2] = 1.0; scale_r = 1.0; if (tstat_flag) { tmp = exp(-dtq * eta_dot_t[0]); scale_t[0] = scale_t[1] = scale_t[2] = tmp; tmp = exp(-dtq * eta_dot_r[0]); scale_r = tmp; } if (pstat_flag) { scale_t[0] *= exp(-dtq * (epsilon_dot[0] + mtk_term2)); scale_t[1] *= exp(-dtq * (epsilon_dot[1] + mtk_term2)); scale_t[2] *= exp(-dtq * (epsilon_dot[2] + mtk_term2)); scale_r *= exp(-dtq * (pdim * mtk_term2)); tmp = dtq * epsilon_dot[0]; scale_v[0] = dtv * exp(tmp) * maclaurin_series(tmp); tmp = dtq * epsilon_dot[1]; scale_v[1] = dtv * exp(tmp) * maclaurin_series(tmp); tmp = dtq * epsilon_dot[2]; scale_v[2] = dtv * exp(tmp) * maclaurin_series(tmp); } // update xcm, vcm, quat, conjqm and angmom for (int ibody = 0; ibody < nlocal_body; ibody++) { Body *b = &body[ibody]; // step 1.1 - update vcm by 1/2 step dtfm = dtf / b->mass; b->vcm[0] += dtfm * b->fcm[0]; b->vcm[1] += dtfm * b->fcm[1]; b->vcm[2] += dtfm * b->fcm[2]; if (tstat_flag || pstat_flag) { b->vcm[0] *= scale_t[0]; b->vcm[1] *= scale_t[1]; b->vcm[2] *= scale_t[2]; } // step 1.2 - update xcm by full step if (!pstat_flag) { b->xcm[0] += dtv * b->vcm[0]; b->xcm[1] += dtv * b->vcm[1]; b->xcm[2] += dtv * b->vcm[2]; } else { b->xcm[0] += scale_v[0] * b->vcm[0]; b->xcm[1] += scale_v[1] * b->vcm[1]; b->xcm[2] += scale_v[2] * b->vcm[2]; } // step 1.3 - apply torque (body coords) to quaternion momentum MathExtra::transpose_matvec(b->ex_space,b->ey_space,b->ez_space, b->torque,tbody); MathExtra::quatvec(b->quat,tbody,fquat); b->conjqm[0] += dtf2 * fquat[0]; b->conjqm[1] += dtf2 * fquat[1]; b->conjqm[2] += dtf2 * fquat[2]; b->conjqm[3] += dtf2 * fquat[3]; if (tstat_flag || pstat_flag) { b->conjqm[0] *= scale_r; b->conjqm[1] *= scale_r; b->conjqm[2] *= scale_r; b->conjqm[3] *= scale_r; } // step 1.4 to 1.13 - use no_squish rotate to update p and q no_squish_rotate(3,b->conjqm,b->quat,b->inertia,dtq); no_squish_rotate(2,b->conjqm,b->quat,b->inertia,dtq); no_squish_rotate(1,b->conjqm,b->quat,b->inertia,dtv); no_squish_rotate(2,b->conjqm,b->quat,b->inertia,dtq); no_squish_rotate(3,b->conjqm,b->quat,b->inertia,dtq); // update exyz_space // transform p back to angmom // update angular velocity MathExtra::q_to_exyz(b->quat,b->ex_space,b->ey_space, b->ez_space); MathExtra::invquatvec(b->quat,b->conjqm,mbody); MathExtra::matvec(b->ex_space,b->ey_space,b->ez_space, mbody,b->angmom); b->angmom[0] *= 0.5; b->angmom[1] *= 0.5; b->angmom[2] *= 0.5; MathExtra::angmom_to_omega(b->angmom,b->ex_space,b->ey_space, b->ez_space,b->inertia,b->omega); } // virial setup before call to set_xv if (vflag) v_setup(vflag); else evflag = 0; // forward communicate updated info of all bodies commflag = INITIAL; comm->forward_comm_variable_fix(this); // accumulate translational and rotational kinetic energies if (tstat_flag || pstat_flag) { akin_t = akin_r = 0.0; for (int ibody = 0; ibody < nlocal_body; ibody++) { Body *b = &body[ibody]; akin_t += b->mass*(b->vcm[0]*b->vcm[0] + b->vcm[1]*b->vcm[1] + b->vcm[2]*b->vcm[2]); akin_r += b->angmom[0]*b->omega[0] + b->angmom[1]*b->omega[1] + b->angmom[2]*b->omega[2]; } double ke[2],keall[2]; ke[0] = akin_t; ke[1] = akin_r; MPI_Allreduce(ke,keall,2,MPI_DOUBLE,MPI_SUM,world); akin_t = keall[0]; akin_r = keall[1]; } // remap simulation box by 1/2 step if (pstat_flag) remap(); // set coords/orient and velocity/rotation of atoms in rigid bodies // from quarternion and omega set_xv(); // remap simulation box by full step // redo KSpace coeffs since volume has changed if (pstat_flag) { remap(); if (kspace_flag) force->kspace->setup(); } } /* ---------------------------------------------------------------------- */ void FixRigidNHSmall::final_integrate() { int i,ibody; double tmp,scale_t[3],scale_r; - double dtfm,xy,xz,yz; + double dtfm; double mbody[3],tbody[3],fquat[4]; double dtf2 = dtf * 2.0; // compute scale variables scale_t[0] = scale_t[1] = scale_t[2] = 1.0; scale_r = 1.0; if (tstat_flag) { tmp = exp(-1.0 * dtq * eta_dot_t[0]); scale_t[0] = scale_t[1] = scale_t[2] = tmp; scale_r = exp(-1.0 * dtq * eta_dot_r[0]); } if (pstat_flag) { scale_t[0] *= exp(-dtq * (epsilon_dot[0] + mtk_term2)); scale_t[1] *= exp(-dtq * (epsilon_dot[1] + mtk_term2)); scale_t[2] *= exp(-dtq * (epsilon_dot[2] + mtk_term2)); scale_r *= exp(-dtq * (pdim * mtk_term2)); } // sum over atoms to get force and torque on rigid body imageint *image = atom->image; double **x = atom->x; double **f = atom->f; int nlocal = atom->nlocal; double dx,dy,dz; double unwrap[3]; double *xcm,*fcm,*tcm; for (ibody = 0; ibody < nlocal_body+nghost_body; ibody++) { fcm = body[ibody].fcm; fcm[0] = fcm[1] = fcm[2] = 0.0; tcm = body[ibody].torque; tcm[0] = tcm[1] = tcm[2] = 0.0; } for (i = 0; i < nlocal; i++) { if (atom2body[i] < 0) continue; Body *b = &body[atom2body[i]]; fcm = b->fcm; fcm[0] += f[i][0]; fcm[1] += f[i][1]; fcm[2] += f[i][2]; domain->unmap(x[i],image[i],unwrap); xcm = b->xcm; dx = unwrap[0] - xcm[0]; dy = unwrap[1] - xcm[1]; dz = unwrap[2] - xcm[2]; tcm = b->torque; tcm[0] += dy*f[i][2] - dz*f[i][1]; tcm[1] += dz*f[i][0] - dx*f[i][2]; tcm[2] += dx*f[i][1] - dy*f[i][0]; } // extended particles add their torque to torque of body if (extended) { double **torque = atom->torque; for (i = 0; i < nlocal; i++) { if (atom2body[i] < 0) continue; if (eflags[i] & TORQUE) { tcm = body[atom2body[i]].torque; tcm[0] += torque[i][0]; tcm[1] += torque[i][1]; tcm[2] += torque[i][2]; } } } // reverse communicate fcm, torque of all bodies commflag = FORCE_TORQUE; comm->reverse_comm_variable_fix(this); // include Langevin thermostat forces and torques if (langflag) { for (int ibody = 0; ibody < nlocal_body; ibody++) { fcm = body[ibody].fcm; fcm[0] += langextra[ibody][0]; fcm[1] += langextra[ibody][1]; fcm[2] += langextra[ibody][2]; tcm = body[ibody].torque; tcm[0] += langextra[ibody][3]; tcm[1] += langextra[ibody][4]; tcm[2] += langextra[ibody][5]; } } // update vcm and angmom // include Langevin thermostat forces // fflag,tflag = 0 for some dimensions in 2d for (ibody = 0; ibody < nbody; ibody++) { Body *b = &body[ibody]; // update vcm by 1/2 step dtfm = dtf / b->mass; if (tstat_flag || pstat_flag) { b->vcm[0] *= scale_t[0]; b->vcm[1] *= scale_t[1]; b->vcm[2] *= scale_t[2]; } b->vcm[0] += dtfm * b->fcm[0]; b->vcm[1] += dtfm * b->fcm[1]; b->vcm[2] += dtfm * b->fcm[2]; // update conjqm, then transform to angmom, set velocity again // virial is already setup from initial_integrate MathExtra::transpose_matvec(b->ex_space,b->ey_space, b->ez_space,b->torque,tbody); MathExtra::quatvec(b->quat,tbody,fquat); if (tstat_flag || pstat_flag) { b->conjqm[0] = scale_r * b->conjqm[0] + dtf2 * fquat[0]; b->conjqm[1] = scale_r * b->conjqm[1] + dtf2 * fquat[1]; b->conjqm[2] = scale_r * b->conjqm[2] + dtf2 * fquat[2]; b->conjqm[3] = scale_r * b->conjqm[3] + dtf2 * fquat[3]; } else { b->conjqm[0] += dtf2 * fquat[0]; b->conjqm[1] += dtf2 * fquat[1]; b->conjqm[2] += dtf2 * fquat[2]; b->conjqm[3] += dtf2 * fquat[3]; } MathExtra::invquatvec(b->quat,b->conjqm,mbody); MathExtra::matvec(b->ex_space,b->ey_space,b->ez_space,mbody,b->angmom); b->angmom[0] *= 0.5; b->angmom[1] *= 0.5; b->angmom[2] *= 0.5; MathExtra::angmom_to_omega(b->angmom,b->ex_space,b->ey_space, b->ez_space,b->inertia,b->omega); } // forward communicate updated info of all bodies commflag = FINAL; comm->forward_comm_variable_fix(this); // accumulate translational and rotational kinetic energies if (pstat_flag) { akin_t = akin_r = 0.0; for (int ibody = 0; ibody < nlocal_body; ibody++) { Body *b = &body[ibody]; akin_t += b->mass*(b->vcm[0]*b->vcm[0] + b->vcm[1]*b->vcm[1] + b->vcm[2]*b->vcm[2]); akin_r += b->angmom[0]*b->omega[0] + b->angmom[1]*b->omega[1] + b->angmom[2]*b->omega[2]; } double ke[2],keall[2]; ke[0] = akin_t; ke[1] = akin_r; MPI_Allreduce(ke,keall,2,MPI_DOUBLE,MPI_SUM,world); akin_t = keall[0]; akin_r = keall[1]; } // set velocity/rotation of atoms in rigid bodies // virial is already setup from initial_integrate set_v(); // compute temperature and pressure tensor // couple to compute current pressure components // trigger virial computation on next timestep if (tcomputeflag) t_current = temperature->compute_scalar(); if (pstat_flag) { if (pstyle == ISO) pressure->compute_scalar(); else pressure->compute_vector(); couple(); pressure->addstep(update->ntimestep+1); } if (pstat_flag) nh_epsilon_dot(); // update eta_dot_t and eta_dot_r // update eta_dot_b if (tstat_flag) nhc_temp_integrate(); if (pstat_flag) nhc_press_integrate(); } /* ---------------------------------------------------------------------- */ void FixRigidNHSmall::nhc_temp_integrate() { int i,j,k; double kt,gfkt_t,gfkt_r,tmp,ms,s,s2; kt = boltz * t_target; gfkt_t = nf_t * kt; gfkt_r = nf_r * kt; // update thermostat masses double t_mass = boltz * t_target / (t_freq * t_freq); q_t[0] = nf_t * t_mass; q_r[0] = nf_r * t_mass; for (i = 1; i < t_chain; i++) q_t[i] = q_r[i] = t_mass; // update force of thermostats coupled to particles f_eta_t[0] = (akin_t * mvv2e - gfkt_t) / q_t[0]; f_eta_r[0] = (akin_r * mvv2e - gfkt_r) / q_r[0]; // multiple timestep iteration for (i = 0; i < t_iter; i++) { for (j = 0; j < t_order; j++) { // update thermostat velocities half step eta_dot_t[t_chain-1] += wdti2[j] * f_eta_t[t_chain-1]; eta_dot_r[t_chain-1] += wdti2[j] * f_eta_r[t_chain-1]; for (k = 1; k < t_chain; k++) { tmp = wdti4[j] * eta_dot_t[t_chain-k]; ms = maclaurin_series(tmp); s = exp(-1.0 * tmp); s2 = s * s; eta_dot_t[t_chain-k-1] = eta_dot_t[t_chain-k-1] * s2 + wdti2[j] * f_eta_t[t_chain-k-1] * s * ms; tmp = wdti4[j] * eta_dot_r[t_chain-k]; ms = maclaurin_series(tmp); s = exp(-1.0 * tmp); s2 = s * s; eta_dot_r[t_chain-k-1] = eta_dot_r[t_chain-k-1] * s2 + wdti2[j] * f_eta_r[t_chain-k-1] * s * ms; } // update thermostat positions a full step for (k = 0; k < t_chain; k++) { eta_t[k] += wdti1[j] * eta_dot_t[k]; eta_r[k] += wdti1[j] * eta_dot_r[k]; } // update thermostat forces for (k = 1; k < t_chain; k++) { f_eta_t[k] = q_t[k-1] * eta_dot_t[k-1] * eta_dot_t[k-1] - kt; f_eta_t[k] /= q_t[k]; f_eta_r[k] = q_r[k-1] * eta_dot_r[k-1] * eta_dot_r[k-1] - kt; f_eta_r[k] /= q_r[k]; } // update thermostat velocities a full step for (k = 0; k < t_chain-1; k++) { tmp = wdti4[j] * eta_dot_t[k+1]; ms = maclaurin_series(tmp); s = exp(-1.0 * tmp); s2 = s * s; eta_dot_t[k] = eta_dot_t[k] * s2 + wdti2[j] * f_eta_t[k] * s * ms; tmp = q_t[k] * eta_dot_t[k] * eta_dot_t[k] - kt; f_eta_t[k+1] = tmp / q_t[k+1]; tmp = wdti4[j] * eta_dot_r[k+1]; ms = maclaurin_series(tmp); s = exp(-1.0 * tmp); s2 = s * s; eta_dot_r[k] = eta_dot_r[k] * s2 + wdti2[j] * f_eta_r[k] * s * ms; tmp = q_r[k] * eta_dot_r[k] * eta_dot_r[k] - kt; f_eta_r[k+1] = tmp / q_r[k+1]; } eta_dot_t[t_chain-1] += wdti2[j] * f_eta_t[t_chain-1]; eta_dot_r[t_chain-1] += wdti2[j] * f_eta_r[t_chain-1]; } } } /* ---------------------------------------------------------------------- */ void FixRigidNHSmall::nhc_press_integrate() { int i,k; double tmp,s,s2,ms,kecurrent; double kt = boltz * t_target; double lkt_press = kt; // update thermostat masses double tb_mass = kt / (p_freq_max * p_freq_max); q_b[0] = tb_mass; for (int i = 1; i < p_chain; i++) { q_b[i] = tb_mass; f_eta_b[i] = q_b[i-1] * eta_dot_b[i-1] * eta_dot_b[i-1] - kt; f_eta_b[i] /= q_b[i]; } // update forces acting on thermostat kecurrent = 0.0; for (i = 0; i < 3; i++) if (p_flag[i]) { epsilon_mass[i] = (g_f + dimension) * kt / (p_freq[i] * p_freq[i]); kecurrent += epsilon_mass[i] * epsilon_dot[i] * epsilon_dot[i]; } f_eta_b[0] = (kecurrent - lkt_press) / q_b[0]; // update thermostat velocities a half step eta_dot_b[p_chain-1] += 0.5 * dtq * f_eta_b[p_chain-1]; for (k = 0; k < p_chain-1; k++) { tmp = 0.5 * dtq * eta_dot_b[p_chain-k-1]; ms = maclaurin_series(tmp); s = exp(-0.5 * tmp); s2 = s * s; eta_dot_b[p_chain-k-2] = eta_dot_b[p_chain-k-2] * s2 + dtq * f_eta_b[p_chain-k-2] * s * ms; } // update thermostat positions for (k = 0; k < p_chain; k++) eta_b[k] += dtv * eta_dot_b[k]; // update epsilon dot s = exp(-1.0 * dtq * eta_dot_b[0]); for (i = 0; i < 3; i++) if (p_flag[i]) epsilon_dot[i] *= s; kecurrent = 0.0; for (i = 0; i < 3; i++) if (p_flag[i]) kecurrent += epsilon_mass[i] * epsilon_dot[i] * epsilon_dot[i]; f_eta_b[0] = (kecurrent - lkt_press) / q_b[0]; // update thermostat velocites a full step for (k = 0; k < p_chain-1; k++) { tmp = 0.5 * dtq * eta_dot_b[k+1]; ms = maclaurin_series(tmp); s = exp(-0.5 * tmp); s2 = s * s; eta_dot_b[k] = eta_dot_b[k] * s2 + dtq * f_eta_b[k] * s * ms; tmp = q_b[k] * eta_dot_b[k] * eta_dot_b[k] - kt; f_eta_b[k+1] = tmp / q_b[k+1]; } eta_dot_b[p_chain-1] += 0.5 * dtq * f_eta_b[p_chain-1]; } /* ---------------------------------------------------------------------- compute kinetic energy in the extended Hamiltonian conserved quantity = sum of returned energy and potential energy -----------------------------------------------------------------------*/ double FixRigidNHSmall::compute_scalar() { - int i,k,ibody; + int i,k; double kt = boltz * t_target; double energy,ke_t,ke_q,tmp,Pkq[4]; - double *vcm,*inertia,*quat; + double *vcm,*quat; // compute the kinetic parts of H_NVE in Kameraj et al (JCP 2005, pp 224114) // translational and rotational kinetic energies ke_t = 0.0; ke_q = 0.0; for (int i = 0; i < nlocal_body; i++) { vcm = body[i].vcm; quat = body[i].quat; ke_t += body[i].mass * (vcm[0]*vcm[0] + vcm[1]*vcm[1] + vcm[2]*vcm[2]); for (k = 1; k < 4; k++) { if (k == 1) { Pkq[0] = -quat[1]; Pkq[1] = quat[0]; Pkq[2] = quat[3]; Pkq[3] = -quat[2]; } else if (k == 2) { Pkq[0] = -quat[2]; Pkq[1] = -quat[3]; Pkq[2] = quat[0]; Pkq[3] = quat[1]; } else if (k == 3) { Pkq[0] = -quat[3]; Pkq[1] = quat[2]; Pkq[2] = -quat[1]; Pkq[3] = quat[0]; } tmp = body[i].conjqm[0]*Pkq[0] + body[i].conjqm[1]*Pkq[1] + body[i].conjqm[2]*Pkq[2] + body[i].conjqm[3]*Pkq[3]; tmp *= tmp; if (fabs(body[i].inertia[k-1]) < 1e-6) tmp = 0.0; else tmp /= (8.0 * body[i].inertia[k-1]); ke_q += tmp; } } double ke[2],keall[2]; ke[0] = ke_t; ke[1] = ke_q; MPI_Allreduce(ke,keall,2,MPI_DOUBLE,MPI_SUM,world); ke_t = keall[0]; ke_q = keall[1]; energy = (ke_t + ke_q) * mvv2e; if (tstat_flag) { // thermostat chain energy: from equation 12 in Kameraj et al (JCP 2005) energy += kt * (nf_t * eta_t[0] + nf_r * eta_r[0]); for (i = 1; i < t_chain; i++) energy += kt * (eta_t[i] + eta_r[i]); for (i = 0; i < t_chain; i++) { energy += 0.5 * q_t[i] * (eta_dot_t[i] * eta_dot_t[i]); energy += 0.5 * q_r[i] * (eta_dot_r[i] * eta_dot_r[i]); } } if (pstat_flag) { // using equation 22 in Kameraj et al for H_NPT for (i = 0; i < 3; i++) energy += 0.5 * epsilon_mass[i] * epsilon_dot[i] * epsilon_dot[i]; double vol; if (dimension == 2) vol = domain->xprd * domain->yprd; else vol = domain->xprd * domain->yprd * domain->zprd; double p0 = (p_target[0] + p_target[1] + p_target[2]) / 3.0; energy += p0 * vol / nktv2p; for (i = 0; i < p_chain; i++) { energy += kt * eta_b[i]; energy += 0.5 * q_b[i] * (eta_dot_b[i] * eta_dot_b[i]); } } return energy; } /* ---------------------------------------------------------------------- */ void FixRigidNHSmall::couple() { double *tensor = pressure->vector; if (pstyle == ISO) { p_current[0] = p_current[1] = p_current[2] = pressure->scalar; } else if (pcouple == XYZ) { double ave = 1.0/3.0 * (tensor[0] + tensor[1] + tensor[2]); p_current[0] = p_current[1] = p_current[2] = ave; } else if (pcouple == XY) { double ave = 0.5 * (tensor[0] + tensor[1]); p_current[0] = p_current[1] = ave; p_current[2] = tensor[2]; } else if (pcouple == YZ) { double ave = 0.5 * (tensor[1] + tensor[2]); p_current[1] = p_current[2] = ave; p_current[0] = tensor[0]; } else if (pcouple == XZ) { double ave = 0.5 * (tensor[0] + tensor[2]); p_current[0] = p_current[2] = ave; p_current[1] = tensor[1]; } else { p_current[0] = tensor[0]; p_current[1] = tensor[1]; p_current[2] = tensor[2]; } } /* ---------------------------------------------------------------------- */ void FixRigidNHSmall::remap() { int i; double oldlo,oldhi,ctr,expfac; double **x = atom->x; int *mask = atom->mask; int nlocal = atom->nlocal; // epsilon is not used, except for book-keeping for (i = 0; i < 3; i++) epsilon[i] += dtq * epsilon_dot[i]; // convert pertinent atoms and rigid bodies to lamda coords if (allremap) domain->x2lamda(nlocal); else { for (i = 0; i < nlocal; i++) if (mask[i] & dilate_group_bit) domain->x2lamda(x[i],x[i]); } if (nrigidfix) for (i = 0; i < nrigidfix; i++) modify->fix[rfix[i]]->deform(0); // reset global and local box to new size/shape for (i = 0; i < 3; i++) { if (p_flag[i]) { oldlo = domain->boxlo[i]; oldhi = domain->boxhi[i]; ctr = 0.5 * (oldlo + oldhi); expfac = exp(dtq * epsilon_dot[i]); domain->boxlo[i] = (oldlo-ctr)*expfac + ctr; domain->boxhi[i] = (oldhi-ctr)*expfac + ctr; } } domain->set_global_box(); domain->set_local_box(); // convert pertinent atoms and rigid bodies back to box coords if (allremap) domain->lamda2x(nlocal); else { for (i = 0; i < nlocal; i++) if (mask[i] & dilate_group_bit) domain->lamda2x(x[i],x[i]); } if (nrigidfix) for (i = 0; i< nrigidfix; i++) modify->fix[rfix[i]]->deform(1); } /* ---------------------------------------------------------------------- compute target temperature and kinetic energy -----------------------------------------------------------------------*/ void FixRigidNHSmall::compute_temp_target() { double delta = update->ntimestep - update->beginstep; if (delta != 0.0) delta /= update->endstep - update->beginstep; t_target = t_start + delta * (t_stop-t_start); } /* ---------------------------------------------------------------------- compute hydrostatic target pressure -----------------------------------------------------------------------*/ void FixRigidNHSmall::compute_press_target() { double delta = update->ntimestep - update->beginstep; if (delta != 0.0) delta /= update->endstep - update->beginstep; p_hydro = 0.0; for (int i = 0; i < 3; i++) if (p_flag[i]) { p_target[i] = p_start[i] + delta * (p_stop[i]-p_start[i]); p_hydro += p_target[i]; } p_hydro /= pdim; } /* ---------------------------------------------------------------------- apply evolution operators to quat, quat momentum see Miller paper cited in fix rigid/nvt and fix rigid/npt ------------------------------------------------------------------------- */ void FixRigidNHSmall::no_squish_rotate(int k, double *p, double *q, double *inertia, double dt) { double phi,c_phi,s_phi,kp[4],kq[4]; // apply permuation operator on p and q, get kp and kq if (k == 1) { kq[0] = -q[1]; kp[0] = -p[1]; kq[1] = q[0]; kp[1] = p[0]; kq[2] = q[3]; kp[2] = p[3]; kq[3] = -q[2]; kp[3] = -p[2]; } else if (k == 2) { kq[0] = -q[2]; kp[0] = -p[2]; kq[1] = -q[3]; kp[1] = -p[3]; kq[2] = q[0]; kp[2] = p[0]; kq[3] = q[1]; kp[3] = p[1]; } else if (k == 3) { kq[0] = -q[3]; kp[0] = -p[3]; kq[1] = q[2]; kp[1] = p[2]; kq[2] = -q[1]; kp[2] = -p[1]; kq[3] = q[0]; kp[3] = p[0]; } // obtain phi, cosines and sines phi = p[0]*kq[0] + p[1]*kq[1] + p[2]*kq[2] + p[3]*kq[3]; if (fabs(inertia[k-1]) < 1e-6) phi *= 0.0; else phi /= 4.0 * inertia[k-1]; c_phi = cos(dt * phi); s_phi = sin(dt * phi); // advance p and q p[0] = c_phi*p[0] + s_phi*kp[0]; p[1] = c_phi*p[1] + s_phi*kp[1]; p[2] = c_phi*p[2] + s_phi*kp[2]; p[3] = c_phi*p[3] + s_phi*kp[3]; q[0] = c_phi*q[0] + s_phi*kq[0]; q[1] = c_phi*q[1] + s_phi*kq[1]; q[2] = c_phi*q[2] + s_phi*kq[2]; q[3] = c_phi*q[3] + s_phi*kq[3]; } /* ---------------------------------------------------------------------- update epsilon_dot -----------------------------------------------------------------------*/ void FixRigidNHSmall::nh_epsilon_dot() { int i; double volume,scale,f_epsilon; if (dimension == 2) volume = domain->xprd*domain->yprd; else volume = domain->xprd*domain->yprd*domain->zprd; // MTK terms mtk_term1 = (akin_t + akin_r) * mvv2e / g_f; scale = exp(-1.0 * dtq * eta_dot_b[0]); for (i = 0; i < 3; i++) if (p_flag[i]) { f_epsilon = (p_current[i]-p_hydro)*volume / nktv2p + mtk_term1; f_epsilon /= epsilon_mass[i]; epsilon_dot[i] += dtq * f_epsilon; epsilon_dot[i] *= scale; } mtk_term2 = 0.0; for (i = 0; i < 3; i++) if (p_flag[i]) mtk_term2 += epsilon_dot[i]; mtk_term2 /= g_f; } /* ---------------------------------------------------------------------- pack entire state of Fix into one write ------------------------------------------------------------------------- */ void FixRigidNHSmall::write_restart(FILE *fp) { if (tstat_flag == 0 && pstat_flag == 0) return; int nsize = 2; // tstat_flag and pstat_flag if (tstat_flag) { nsize += 1; // t_chain nsize += 4*t_chain; // eta_t, eta_r, eta_dot_t, eta_dot_r } if (pstat_flag) { nsize += 7; // p_chain, epsilon(3) and epsilon_dot(3) nsize += 2*p_chain; } double *list; memory->create(list,nsize,"rigid_nh:list"); int n = 0; list[n++] = tstat_flag; if (tstat_flag) { list[n++] = t_chain; for (int i = 0; i < t_chain; i++) { list[n++] = eta_t[i]; list[n++] = eta_r[i]; list[n++] = eta_dot_t[i]; list[n++] = eta_dot_r[i]; } } list[n++] = pstat_flag; if (pstat_flag) { list[n++] = epsilon[0]; list[n++] = epsilon[1]; list[n++] = epsilon[2]; list[n++] = epsilon_dot[0]; list[n++] = epsilon_dot[1]; list[n++] = epsilon_dot[2]; list[n++] = p_chain; for (int i = 0; i < p_chain; i++) { list[n++] = eta_b[i]; list[n++] = eta_dot_b[i]; } } if (comm->me == 0) { int size = (nsize)*sizeof(double); fwrite(&size,sizeof(int),1,fp); fwrite(list,sizeof(double),nsize,fp); } memory->destroy(list); } /* ---------------------------------------------------------------------- use state info from restart file to restart the Fix ------------------------------------------------------------------------- */ void FixRigidNHSmall::restart(char *buf) { int n = 0; double *list = (double *) buf; int flag = static_cast<int> (list[n++]); if (flag) { int m = static_cast<int> (list[n++]); if (tstat_flag && m == t_chain) { for (int i = 0; i < t_chain; i++) { eta_t[i] = list[n++]; eta_r[i] = list[n++]; eta_dot_t[i] = list[n++]; eta_dot_r[i] = list[n++]; } } else n += 4*m; } flag = static_cast<int> (list[n++]); if (flag) { epsilon[0] = list[n++]; epsilon[1] = list[n++]; epsilon[2] = list[n++]; epsilon_dot[0] = list[n++]; epsilon_dot[1] = list[n++]; epsilon_dot[2] = list[n++]; int m = static_cast<int> (list[n++]); if (pstat_flag && m == p_chain) { for (int i = 0; i < p_chain; i++) { eta_b[i] = list[n++]; eta_dot_b[i] = list[n++]; } } else n += 2*m; } } /* ---------------------------------------------------------------------- */ int FixRigidNHSmall::modify_param(int narg, char **arg) { if (strcmp(arg[0],"temp") == 0) { if (narg < 2) error->all(FLERR,"Illegal fix_modify command"); if (!pstat_flag) error->all(FLERR,"Illegal fix_modify command"); if (tcomputeflag) { modify->delete_compute(id_temp); tcomputeflag = 0; } delete [] id_temp; int n = strlen(arg[1]) + 1; id_temp = new char[n]; strcpy(id_temp,arg[1]); int icompute = modify->find_compute(arg[1]); if (icompute < 0) error->all(FLERR,"Could not find fix_modify temperature ID"); temperature = modify->compute[icompute]; if (temperature->tempflag == 0) error->all(FLERR, "Fix_modify temperature ID does not compute temperature"); if (temperature->igroup != 0 && comm->me == 0) error->warning(FLERR,"Temperature for fix modify is not for group all"); // reset id_temp of pressure to new temperature ID if (pstat_flag) { icompute = modify->find_compute(id_press); if (icompute < 0) error->all(FLERR,"Pressure ID for fix modify does not exist"); modify->compute[icompute]->reset_extra_compute_fix(id_temp); } return 2; } else if (strcmp(arg[0],"press") == 0) { if (narg < 2) error->all(FLERR,"Illegal fix_modify command"); if (!pstat_flag) error->all(FLERR,"Illegal fix_modify command"); if (pcomputeflag) { modify->delete_compute(id_press); pcomputeflag = 0; } delete [] id_press; int n = strlen(arg[1]) + 1; id_press = new char[n]; strcpy(id_press,arg[1]); int icompute = modify->find_compute(arg[1]); if (icompute < 0) error->all(FLERR,"Could not find fix_modify pressure ID"); pressure = modify->compute[icompute]; if (pressure->pressflag == 0) error->all(FLERR,"Fix_modify pressure ID does not compute pressure"); return 2; } return 0; } /* ---------------------------------------------------------------------- */ void FixRigidNHSmall::allocate_chain() { if (tstat_flag) { q_t = new double[t_chain]; q_r = new double[t_chain]; eta_t = new double[t_chain]; eta_r = new double[t_chain]; eta_dot_t = new double[t_chain]; eta_dot_r = new double[t_chain]; f_eta_t = new double[t_chain]; f_eta_r = new double[t_chain]; } if (pstat_flag) { q_b = new double[p_chain]; eta_b = new double[p_chain]; eta_dot_b = new double[p_chain]; f_eta_b = new double[p_chain]; } } /* ---------------------------------------------------------------------- */ void FixRigidNHSmall::reset_target(double t_new) { t_start = t_stop = t_new; } /* ---------------------------------------------------------------------- */ void FixRigidNHSmall::allocate_order() { w = new double[t_order]; wdti1 = new double[t_order]; wdti2 = new double[t_order]; wdti4 = new double[t_order]; } /* ---------------------------------------------------------------------- */ void FixRigidNHSmall::deallocate_chain() { if (tstat_flag) { delete [] q_t; delete [] q_r; delete [] eta_t; delete [] eta_r; delete [] eta_dot_t; delete [] eta_dot_r; delete [] f_eta_t; delete [] f_eta_r; } if (pstat_flag) { delete [] q_b; delete [] eta_b; delete [] eta_dot_b; delete [] f_eta_b; } } /* ---------------------------------------------------------------------- */ void FixRigidNHSmall::deallocate_order() { delete [] w; delete [] wdti1; delete [] wdti2; delete [] wdti4; } diff --git a/src/USER-LB/fix_lb_fluid.cpp b/src/USER-LB/fix_lb_fluid.cpp index b75452f26..0161fd658 100644 --- a/src/USER-LB/fix_lb_fluid.cpp +++ b/src/USER-LB/fix_lb_fluid.cpp @@ -1,3368 +1,3367 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing authors: Frances Mackay, Santtu Ollila, Colin Denniston (UWO) ------------------------------------------------------------------------- */ #include "fix_lb_fluid.h" #include "math.h" #include "mpi.h" #include "stdlib.h" #include "stdio.h" #include "string.h" #include "comm.h" #include "memory.h" #include "error.h" #include "domain.h" #include "atom.h" #include <iostream> #include <iomanip> #include "group.h" #include "random_mars.h" #include "update.h" #include "force.h" #include "modify.h" using namespace LAMMPS_NS; using namespace FixConst; static const double kappa_lb=0.0; -static const double sqrt2=1.41421356237310; FixLbFluid::FixLbFluid(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg) { //===================================================================================================== // Sample inputfile call: // fix # group lb/fluid nevery typeLB viscosity densityinit_real // // where: nevery: call this fix every nevery timesteps. // (nevery generally set to 1). // typeLB: there are two different integrators // in the code labelled "1" and "2". // viscosity: the viscosity of the fluid. // densityinit_real: the density of the fluid. // // optional arguments: // "setArea" type node_area: set the surface area per node associated with a // given atom type. By default the surface area // is set at 1.0*dx_lb^2. // "setGamma" gamma: specify a user-defined value for the force // coupling constant, instead of using the default // value. // "scaleGamma" type scale_factor: scale the user provided force coupling constant // by the factor, scale_factor, for the given atom // type. // "dx" dx_lb: the lattice-Boltzmann grid spacing. // "dm" dm_lb: the lattice-Boltzmann mass unit. // "a0" a_0_real: the square of the sound speed in the fluid. // "noise" Temperature seed: include noise in the system. // Temperature is the temperature for the fluid. // seed is the seed for the random number generator. // "calcforce" N group: print the force acting on a given group every // N timesteps. // "trilinear": use the trilinear interpolation stencil. // "read_restart" restart_file: restart a fluid run from restart_file. // "write_restart" N: write a fluid restart file every N timesteps. // "zwall_velocity" velocity_bottom velocity_top: assign velocities to the z-walls // in the system. // "bodyforce" bodyforcex bodyforcey bodyforcez: add a constant body force to the // fluid. // "printfluid" N: print the fluid density and velocity at each // grid point every N timesteps. // "D3Q19": use the 19 velocity D3Q19 model. By default, // the 15 velocity D3Q15 model is used. //===================================================================================================== if(narg <7) error->all(FLERR,"Illegal fix lb/fluid command"); MPI_Comm_rank(world,&me); MPI_Comm_size(world,&nprocs); nevery = atoi(arg[3]); typeLB = atoi(arg[4]); viscosity = atof(arg[5]); densityinit_real = atof(arg[6]); // Default values for optional arguments: force_diagnostic=0; noisestress = 0; trilinear_stencil = 0; readrestart = 0; printrestart = 0; bodyforcex = bodyforcey = bodyforcez = 0.0; vwtp = vwbt = 0.0; printfluid = 0; T = 300.0; dm_lb = 1.0; fixviscouslb = 0; setdx = 1; seta0 = 1; setGamma = 0; setArea = 0; numvel = 15; Gamma = NULL; NodeArea = NULL; int iarg = 7; while (iarg < narg){ if(strcmp(arg[iarg],"setArea")==0){ if(setGamma == 1) error->all(FLERR,"Illegal fix lb/fluid command: cannot use a combination of default and user-specified gamma values"); setArea = 1; int itype = atoi(arg[iarg+1]); double areafactor = atof(arg[iarg+2]); if(itype <= 0 || itype > atom->ntypes || areafactor < 0.0) error->all(FLERR,"Illegal fix lb/fluid command: setArea"); if(NodeArea == NULL){ NodeArea = new double[atom->ntypes+1]; for(int i=0; i<=atom->ntypes; i++) NodeArea[i] = -1.0; } NodeArea[itype] = areafactor; iarg += 3; } else if(strcmp(arg[iarg],"setGamma")==0){ if(setArea == 1) error->all(FLERR,"Illegal fix lb/fluid command: cannot use a combination of default and user-specified gamma values"); setGamma = 1; double Gammaone; Gammaone = atof(arg[iarg+1]); if(Gamma == NULL) Gamma = new double[atom->ntypes+1]; for(int i=0; i<=atom->ntypes; i++) Gamma[i] = Gammaone; iarg += 2; } else if(strcmp(arg[iarg],"scaleGamma")==0){ if(setGamma == 0) error->all(FLERR,"Illegal fix lb/fluid command: must set a value for Gamma before scaling it"); int itype = atoi(arg[iarg+1]); double scalefactor = atof(arg[iarg+2]); if(itype <= 0 || itype > atom->ntypes || scalefactor < 0.0) error->all(FLERR,"Illegal fix lb/fluid command: scaleGamma"); Gamma[itype] *= scalefactor; iarg += 3; } else if(strcmp(arg[iarg],"dx")==0){ dx_lb = atof(arg[iarg+1]); iarg += 2; setdx = 0; } else if(strcmp(arg[iarg],"dm")==0){ dm_lb = atof(arg[iarg+1]); iarg += 2; } else if(strcmp(arg[iarg],"a0")==0){ a_0_real = atof(arg[iarg+1]); iarg += 2; seta0 = 0; } else if(strcmp(arg[iarg],"noise")== 0){ noisestress = 1; T = atof(arg[iarg+1]); seed = atoi(arg[iarg+2]); iarg += 3; } else if(strcmp(arg[iarg],"calcforce")==0){ force_diagnostic = atoi(arg[iarg+1]); if(force_diagnostic % nevery != 0){ char str[200]; sprintf(str,"Requesting calcforce output every %i timesteps. Will only print output for those timesteps that are a multiple of nevery.",force_diagnostic); error->warning(FLERR,str); } igroupforce=group->find(arg[iarg+2]); iarg += 3; } else if(strcmp(arg[iarg],"trilinear")==0){ trilinear_stencil = 1; iarg += 1; } else if(strcmp(arg[iarg],"read_restart")==0){ readrestart = 1; int nlength = strlen(arg[iarg+1]) + 16; char *filename = new char[nlength]; strcpy(filename,arg[iarg+1]); MPI_File_open(world,filename,MPI_MODE_RDONLY,MPI_INFO_NULL,&pFileRead); delete [] filename; iarg += 2; } else if(strcmp(arg[iarg],"write_restart")==0){ printrestart = atoi(arg[iarg+1]); if(printrestart % nevery != 0){ char str[200]; sprintf(str,"Requesting restart files every %i timesteps. Will only print restart files for those timesteps that are a multiple of nevery.",printrestart); error->warning(FLERR,str); } iarg += 2; } else if(strcmp(arg[iarg],"zwall_velocity")==0){ if(domain->periodicity[2]!=0) error->all(FLERR,"fix lb/fluid error: setting \ a z wall velocity without implementing fixed BCs in z"); vwbt = atof(arg[iarg+1]); vwtp = atof(arg[iarg+2]); iarg += 3; } else if(strcmp(arg[iarg],"bodyforce")==0){ bodyforcex = atof(arg[iarg+1]); bodyforcey = atof(arg[iarg+2]); bodyforcez = atof(arg[iarg+3]); iarg += 4; } else if(strcmp(arg[iarg],"printfluid")==0){ printfluid = atoi(arg[iarg+1]); iarg += 2; } else if(strcmp(arg[iarg],"D3Q19")==0){ numvel = 19; iarg += 1; } else error->all(FLERR,"Illegal fix lb/fluid command"); } //-------------------------------------------------------------------------- //Choose between D3Q15 and D3Q19 functions: //-------------------------------------------------------------------------- if(numvel == 15){ initializeLB = &FixLbFluid::initializeLB15; equilibriumdist = &FixLbFluid::equilibriumdist15; update_full = &FixLbFluid::update_full15; }else{ initializeLB = &FixLbFluid::initializeLB19; equilibriumdist = &FixLbFluid::equilibriumdist19; update_full = &FixLbFluid::update_full19; } //-------------------------------------------------------------------------- // perform initial allocation of atom-based array register // with Atom class //-------------------------------------------------------------------------- hydroF = NULL; grow_arrays(atom->nmax); atom->add_callback(0); for(int i=0; i<atom->nmax; i++) for(int j=0; j<3; j++) hydroF[i][j] = 0.0; Ng_lb = NULL; w_lb = NULL; mg_lb = NULL; e = NULL; feq = NULL; feqold = NULL; feqn = NULL; feqoldn = NULL; f_lb = NULL; fnew = NULL; density_lb = NULL; u_lb = NULL; altogether = NULL; buf = NULL; Ff = NULL; Fftempx = NULL; Fftempy = NULL; Fftempz = NULL; //-------------------------------------------------------------------------- // Set the lattice Boltzmann dt. //-------------------------------------------------------------------------- dt_lb=nevery*(update->dt); //-------------------------------------------------------------------------- // Set the lattice Boltzmann dx if it wasn't specified in the // input. //-------------------------------------------------------------------------- if(setdx == 1){ double dx_lb1 = sqrt(3.0*viscosity*dt_lb/densityinit_real); double mindomain = std::min(std::min(domain->xprd/comm->procgrid[0],domain->yprd/comm->procgrid[1]),domain->zprd/comm->procgrid[2]); dx_lb = mindomain/floor(mindomain/dx_lb1); if(comm->me==0){ char str[128]; sprintf(str,"Setting the lattice-Boltzmann dx to %10.6f",dx_lb); error->message(FLERR,str); } } //-------------------------------------------------------------------------- // If the area per node has not been set by the user, set to the // default value of dx_lb*dx_lb. //-------------------------------------------------------------------------- if(setGamma == 0){ if(setArea == 0){ if(comm->me==0){ error->message(FLERR,"Assuming an area per node of dx*dx for all of the MD particles. This should only be used if these all correspond to point particles; otherwise, change using the setArea keyword"); } NodeArea = new double[atom->ntypes+1]; for(int i=0; i<=atom->ntypes; i++) NodeArea[i] = -1.0; } for(int i=0; i<=atom->ntypes; i++) if(NodeArea[i] < 0.0) NodeArea[i] = dx_lb*dx_lb; } //-------------------------------------------------------------------------- // Set a0 if it wasn't specified in the input //-------------------------------------------------------------------------- if(seta0 == 1) a_0_real = 0.33333333*dx_lb*dx_lb/dt_lb/dt_lb; //-------------------------------------------------------------------------- // Check to make sure that the total number of grid points in each direction // divides evenly among the processors in that direction. // Shrink-wrapped boundary conditions (which are not permitted by this fix) // might cause a problem, so check for this. A full check of the boundary // conditions is performed in the init routine, rather than here, as it is // possible to change the BCs between runs. //-------------------------------------------------------------------------- double aa; double eps=1.0e-8; aa = (domain->xprd/comm->procgrid[0])/dx_lb; if(fabs(aa - floor(aa+0.5)) > eps){ if(domain->boundary[0][0] != 0){ error->all(FLERR,"the x-direction must be periodic"); } char errormessage[200]; sprintf(errormessage,"With dx= %f, and the simulation domain divided by %i processors in the x direction, the simulation domain in the x direction must be a multiple of %f",dx_lb,comm->procgrid[0],comm->procgrid[0]*dx_lb); error->all(FLERR,errormessage); } aa = (domain->yprd/comm->procgrid[1])/dx_lb; if(fabs(aa - floor(aa+0.5)) > eps){ if(domain->boundary[1][0] != 0){ error->all(FLERR,"the y-direction must be periodic"); } char errormessage[200]; sprintf(errormessage,"With dx= %f, and the simulation domain divided by %i processors in the y direction, the simulation domain in the y direction must be a multiple of %f",dx_lb,comm->procgrid[1],comm->procgrid[1]*dx_lb); error->all(FLERR,errormessage); } aa = (domain->zprd/comm->procgrid[2])/dx_lb; if(fabs(aa - floor(aa+0.5)) > eps){ if(domain->boundary[2][0] == 2 || domain->boundary[2][0] == 3){ error->all(FLERR,"the z-direction can not have shrink-wrap boundary conditions"); } char errormessage[200]; sprintf(errormessage,"With dx= %f, and the simulation domain divided by %i processors in the z direction, the simulation domain in the z direction must be a multiple of %f",dx_lb,comm->procgrid[2],comm->procgrid[2]*dx_lb); error->all(FLERR,errormessage); } //-------------------------------------------------------------------------- // Set the total number of grid points in each direction. //-------------------------------------------------------------------------- Nbx = (int)(domain->xprd/dx_lb + 0.5); Nby = (int)(domain->yprd/dx_lb + 0.5); Nbz = (int)(domain->zprd/dx_lb + 0.5); //-------------------------------------------------------------------------- // Set the number of grid points in each dimension for the local subgrids. //-------------------------------------------------------------------------- subNbx= Nbx/comm->procgrid[0] + 2; subNby= Nby/comm->procgrid[1] + 2; subNbz= Nbz/comm->procgrid[2] + 2; //-------------------------------------------------------------------------- // In order to calculate the fluid forces correctly, need to have atleast // 5 grid points in each direction per processor. //-------------------------------------------------------------------------- if(subNbx<7 || subNby < 7 || subNbz<7) error->all(FLERR,"Need at least 5 grid points in each direction per processor"); // If there are walls in the z-direction add an extra grid point. if(domain->periodicity[2]==0){ Nbz += 1; if(comm->myloc[2]==comm->procgrid[2]-1) subNbz += 1; } if(comm->me==0){ char str[128]; if(setdx == 1){ sprintf(str,"Using a lattice-Boltzmann grid of %i by %i by %i total grid points. To change, use the dx keyword",Nbx,Nby,Nbz); }else{ sprintf(str,"Using a lattice-Boltzmann grid of %i by %i by %i total grid points.",Nbx,Nby,Nbz); } error->message(FLERR,str); } //-------------------------------------------------------------------------- // Store the largest value of subNbz, which is needed for allocating the // buf array (since a processor with comm->myloc[2] == comm->procgrid[2]-1 // may have an additional subNbz point as compared with the rest). //-------------------------------------------------------------------------- int subNbzmax; MPI_Allreduce(&subNbz,&subNbzmax,1,MPI_INT,MPI_MAX,world); //-------------------------------------------------------------------------- // Create the MPI datatypes used to pass portions of arrays: // datatypes to pass the f and feq arrays. //-------------------------------------------------------------------------- MPI_Aint lb, sizeofdouble; MPI_Type_get_extent(MPI_DOUBLE,&lb,&sizeofdouble); MPI_Type_vector(subNbz-2,numvel,numvel,MPI_DOUBLE,&oneslice); MPI_Type_commit(&oneslice); MPI_Type_create_hvector(subNby-2,1,numvel*subNbz*sizeofdouble,oneslice,&passxf); MPI_Type_commit(&passxf); MPI_Type_create_hvector(subNbx,1,numvel*subNbz*subNby*sizeofdouble,oneslice,&passyf); MPI_Type_commit(&passyf); MPI_Type_free(&oneslice); MPI_Type_vector(subNby,numvel,numvel*subNbz,MPI_DOUBLE,&oneslice); MPI_Type_commit(&oneslice); MPI_Type_create_hvector(subNbx,1,numvel*subNbz*subNby*sizeofdouble,oneslice,&passzf); MPI_Type_commit(&passzf); // datatypes to pass the u array, and the Ff array. MPI_Type_free(&oneslice); MPI_Type_vector(subNbz+3,3,3,MPI_DOUBLE,&oneslice); MPI_Type_commit(&oneslice); MPI_Type_create_hvector(subNby+3,1,3*(subNbz+3)*sizeofdouble,oneslice,&passxu); MPI_Type_commit(&passxu); MPI_Type_create_hvector(subNbx+3,1,3*(subNbz+3)*(subNby+3)*sizeofdouble,oneslice,&passyu); MPI_Type_commit(&passyu); MPI_Type_free(&oneslice); MPI_Type_vector(subNby+3,3,3*(subNbz+3),MPI_DOUBLE,&oneslice); MPI_Type_commit(&oneslice); MPI_Type_create_hvector(subNbx+3,1,3*(subNbz+3)*(subNby+3)*sizeofdouble,oneslice,&passzu); MPI_Type_commit(&passzu); // datatypes to pass the density array. MPI_Type_free(&oneslice); MPI_Type_vector(subNbz+3,1,1,MPI_DOUBLE,&oneslice); MPI_Type_commit(&oneslice); MPI_Type_create_hvector(subNby+3,1,1*(subNbz+3)*sizeofdouble,oneslice,&passxrho); MPI_Type_commit(&passxrho); MPI_Type_create_hvector(subNbx+3,1,1*(subNbz+3)*(subNby+3)*sizeofdouble,oneslice,&passyrho); MPI_Type_commit(&passyrho); MPI_Type_free(&oneslice); MPI_Type_vector(subNby+3,1,1*(subNbz+3),MPI_DOUBLE,&oneslice); MPI_Type_commit(&oneslice); MPI_Type_create_hvector(subNbx+3,1,1*(subNbz+3)*(subNby+3)*sizeofdouble,oneslice,&passzrho); MPI_Type_commit(&passzrho); // datatypes to receive a portion of the Ff array. MPI_Type_free(&oneslice); MPI_Type_vector(subNbz+3,3,3,MPI_DOUBLE,&oneslice); MPI_Type_commit(&oneslice); MPI_Type_create_hvector(subNby+3,1,3*(subNbz+3)*sizeofdouble,oneslice,&passxtemp); MPI_Type_commit(&passxtemp); MPI_Type_create_hvector(subNbx+3,1,3*(subNbz+3)*5*sizeofdouble,oneslice,&passytemp); MPI_Type_commit(&passytemp); MPI_Type_free(&oneslice); MPI_Type_vector(subNby+3,3,3*5,MPI_DOUBLE,&oneslice); MPI_Type_commit(&oneslice); MPI_Type_create_hvector(subNbx+3,1,3*5*(subNby+3)*sizeofdouble,oneslice,&passztemp); MPI_Type_commit(&passztemp); MPI_Type_free(&oneslice); //-------------------------------------------------------------------------- // Allocate the necessary arrays. //-------------------------------------------------------------------------- memory->create(Ng_lb,numvel,"FixLbFluid:Ng_lb"); memory->create(w_lb,numvel,"FixLbFluid:w_lb"); memory->create(mg_lb,numvel,numvel,"FixLbFluid:mg_lb"); memory->create(e,numvel,3,"FixLbFluid:e"); memory->create(feq,subNbx,subNby,subNbz,numvel,"FixLbFluid:feq"); if(typeLB == 2){ memory->create(feqold,subNbx,subNby,subNbz,numvel,"FixLbFluid:feqold"); memory->create(feqn,subNbx,subNby,subNbz,numvel,"FixLbFluid:feqn"); memory->create(feqoldn,subNbx,subNby,subNbz,numvel,"FixLbFluid:feqoldn"); } memory->create(f_lb,subNbx,subNby,subNbz,numvel,"FixLbFluid:f_lb"); memory->create(fnew,subNbx,subNby,subNbz,numvel,"FixLbFluid:fnew"); memory->create(density_lb,subNbx+3,subNby+3,subNbz+3,"FixLbFluid:density_lb"); memory->create(u_lb,subNbx+3,subNby+3,subNbz+3,3,"FixLbFluid:u_lb"); if(printfluid > 0){ memory->create(buf,subNbx,subNby,subNbzmax,4,"FixLbFluid:buf"); if(me==0) memory->create(altogether,Nbx,Nby,Nbz,4,"FixLbFluid:altogether"); } memory->create(Ff,subNbx+3,subNby+3,subNbz+3,3,"FixLbFluid:Ff"); memory->create(Fftempx,5,subNby+3,subNbz+3,3,"FixLbFluid:Fftempx"); memory->create(Fftempy,subNbx+3,5,subNbz+3,3,"FixLbFluid:Fftempy"); memory->create(Fftempz,subNbx+3,subNby+3,5,3,"FixLbFluid:Fftempz"); if(noisestress==1){ random = new RanMars(lmp,seed + comm->me); } //-------------------------------------------------------------------------- // Rescale the variables to Lattice Boltzmann dimensionless units. //-------------------------------------------------------------------------- rescale(); //-------------------------------------------------------------------------- // Initialize the arrays. //-------------------------------------------------------------------------- (*this.*initializeLB)(); initialize_feq(); } FixLbFluid::~FixLbFluid() { atom->delete_callback(id,0); memory->destroy(hydroF); memory->destroy(Ng_lb); memory->destroy(w_lb); memory->destroy(mg_lb); memory->destroy(e); memory->destroy(feq); if(typeLB == 2){ memory->destroy(feqold); memory->destroy(feqn); memory->destroy(feqoldn); } memory->destroy(f_lb); memory->destroy(fnew); memory->destroy(density_lb); memory->destroy(u_lb); if(printfluid>0){ if(me==0) memory->destroy(altogether); memory->destroy(buf); } memory->destroy(Ff); memory->destroy(Fftempx); memory->destroy(Fftempy); memory->destroy(Fftempz); if(noisestress==1){ delete random; } if(setGamma == 1){ delete [] Gamma; }else{ delete [] NodeArea; } } int FixLbFluid::setmask() { int mask =0; mask |= INITIAL_INTEGRATE; mask |= POST_FORCE; mask |= END_OF_STEP; return mask; } void FixLbFluid::init(void) { int i,j; //-------------------------------------------------------------------------- // Check to see if the MD timestep has changed between runs. //-------------------------------------------------------------------------- double dt_lb_now; dt_lb_now=nevery*(update->dt); if(fabs(dt_lb_now - dt_lb) > 1.0e-12){ error->warning(FLERR,"Timestep has changed between runs with the same lb/fluid. Unphysical results may occur"); } //-------------------------------------------------------------------------- // Make sure the size of the simulation domain has not changed // between runs. //-------------------------------------------------------------------------- int Nbx_now,Nby_now,Nbz_now; Nbx_now = (int)(domain->xprd/dx_lb + 0.5); Nby_now = (int)(domain->yprd/dx_lb + 0.5); Nbz_now = (int)(domain->zprd/dx_lb + 0.5); // If there are walls in the z-direction add an extra grid point. if(domain->periodicity[2]==0){ Nbz_now += 1; } if(Nbx_now != Nbx || Nby_now != Nby || Nbz_now != Nbz){ error->all(FLERR,"the simulation domain can not change shape between runs with the same lb/fluid"); } //-------------------------------------------------------------------------- // Check to make sure that the chosen LAMMPS boundary types are compatible // with this fix. // shrink-wrap is not compatible in any dimension. // fixed only works in the z-direction. //-------------------------------------------------------------------------- if(domain->boundary[0][0] != 0){ error->all(FLERR,"the x-direction must be periodic"); } if(domain->boundary[1][0] != 0){ error->all(FLERR,"the y-direction must be periodic"); } if(domain->boundary[2][0] == 2 || domain->boundary[2][0] == 3){ error->all(FLERR,"the z-direction can not have shrink-wrap boundary conditions"); } //-------------------------------------------------------------------------- // Check if the lb/viscous fix is also called: //-------------------------------------------------------------------------- groupbit_viscouslb = groupbit_pc = groupbit_rigid_pc_sphere = 0; for (i = 0; i < modify->nfix; i++){ if (strcmp(modify->fix[i]->style,"lb/viscous") == 0){ fixviscouslb = 1; groupbit_viscouslb = group->bitmask[modify->fix[i]->igroup]; } if(strcmp(modify->fix[i]->style,"lb/pc")==0){ groupbit_pc = group->bitmask[modify->fix[i]->igroup]; } if(strcmp(modify->fix[i]->style,"lb/rigid/pc/sphere")==0){ groupbit_rigid_pc_sphere = group->bitmask[modify->fix[i]->igroup]; } } // Warn if the fluid force is not applied to any of the particles. if(!(groupbit_viscouslb || groupbit_pc || groupbit_rigid_pc_sphere) && comm->me==0){ error->message(FLERR,"Not adding the fluid force to any of the MD particles. To add this force use one of the lb/viscous, lb/pc, or lb/rigid/pc/sphere fixes"); } // If fix lb/viscous is called for a particular atom, make sure // lb/pc or lb/rigid/pc/sphere are not: if(fixviscouslb == 1){ int *mask = atom->mask; int nlocal = atom->nlocal; for(j=0; j<nlocal; j++){ if((mask[j] & groupbit) && (mask[j] & groupbit_viscouslb) && (mask[j] & groupbit_pc)) error->one(FLERR,"should not use the lb/viscous command when integrating with the lb/pc fix"); if((mask[j] & groupbit) && (mask[j] & groupbit_viscouslb) && (mask[j] & groupbit_rigid_pc_sphere)) error->one(FLERR,"should not use the lb/viscous command when integrating with the lb/rigid/pc/sphere fix"); } } } void FixLbFluid::setup(int vflag) { //-------------------------------------------------------------------------- // Need to calculate the force on the fluid for a restart run. //-------------------------------------------------------------------------- if(step > 0) calc_fluidforce(); } void FixLbFluid::initial_integrate(int vflag) { // only call every nevery timesteps (by default nevery only affects how // often end_of_step is called. if(update->ntimestep % nevery == 0){ //-------------------------------------------------------------------------- // Print a header labelling any output printed to the screen. //-------------------------------------------------------------------------- static int printheader = 1; if(printheader == 1){ if(force_diagnostic > 0 && me == 0){ printf("-------------------------------------------------------------------------------\n"); printf(" F_x F_y F_z T_x T_y T_z\n"); printf("-------------------------------------------------------------------------------\n"); } if(printfluid > 0 && me == 0){ printf("---------------------------------------------------------------------\n"); printf(" density u_x u_y u_z \n"); printf("---------------------------------------------------------------------\n"); } printheader = 0; } //-------------------------------------------------------------------------- // Determine the equilibrium distribution on the local subgrid. //-------------------------------------------------------------------------- (*this.*equilibriumdist)(1,subNbx-1,1,subNby-1,1,subNbz-1); //-------------------------------------------------------------------------- // Using the equilibrium distribution, calculate the new // distribution function. //-------------------------------------------------------------------------- (*this.*update_full)(); std::swap(f_lb,fnew); //-------------------------------------------------------------------------- // Calculate moments of the distribution function. //-------------------------------------------------------------------------- parametercalc_full(); //-------------------------------------------------------------------------- // Store the equilibrium distribution function, it is needed in // the next time step by the update routine. //-------------------------------------------------------------------------- if(typeLB == 2){ std::swap(feqold,feq); std::swap(feqoldn,feqn); } } //-------------------------------------------------------------------------- // Perform diagnostics, and print output for the graphics program //-------------------------------------------------------------------------- if(printfluid > 0 && update->ntimestep > 0 && (update->ntimestep % printfluid == 0)) streamout(); } void FixLbFluid::post_force(int vflag) { // only call every nevery timesteps (by default nevery only affects how // often end_of_step is called. if(update->ntimestep % nevery == 0){ if(fixviscouslb==1) calc_fluidforce(); } } void FixLbFluid::end_of_step() { // end_of_step is only called every nevery timesteps if(fixviscouslb==0) calc_fluidforce(); if(printrestart>0){ if((update->ntimestep)%printrestart == 0){ write_restartfile(); } } } //========================================================================== // allocate atom-based array //========================================================================== void FixLbFluid::grow_arrays(int nmax) { memory->grow(hydroF,nmax,3,"FixLbFluid:hydroF"); } //========================================================================== // copy values within local atom-based array //========================================================================== void FixLbFluid::copy_arrays(int i, int j, int delflag) { hydroF[j][0] = hydroF[i][0]; hydroF[j][1] = hydroF[i][1]; hydroF[j][2] = hydroF[i][2]; } //========================================================================== // pack values in local atom-based array for exchange with another proc //========================================================================== int FixLbFluid::pack_exchange(int i, double *buf) { buf[0] = hydroF[i][0]; buf[1] = hydroF[i][1]; buf[2] = hydroF[i][2]; return 3; } //========================================================================== // unpack values in local atom-based array from exchange with another proc //========================================================================== int FixLbFluid::unpack_exchange(int nlocal, double *buf) { hydroF[nlocal][0] = buf[0]; hydroF[nlocal][1] = buf[1]; hydroF[nlocal][2] = buf[2]; return 3; } //========================================================================== // calculate the force from the local atoms acting on the fluid. //========================================================================== void FixLbFluid::calc_fluidforce(void) { int *mask = atom->mask; int nlocal = atom->nlocal; double **x = atom->x; int i,j,k,m; MPI_Request requests[20]; MPI_Status statuses[20]; double forceloc[3],force[3]; double torqueloc[3],torque[3]; //-------------------------------------------------------------------------- // Zero out arrays //-------------------------------------------------------------------------- std::fill(&Ff[0][0][0][0],&Ff[0][0][0][0] + (subNbx+3)*(subNby+3)*(subNbz+3)*3,0.0); std::fill(&Fftempx[0][0][0][0],&Fftempx[0][0][0][0] + 5*(subNby+3)*(subNbz+3)*3,0.0); std::fill(&Fftempy[0][0][0][0],&Fftempy[0][0][0][0] + (subNbx+3)*5*(subNbz+3)*3,0.0); std::fill(&Fftempz[0][0][0][0],&Fftempz[0][0][0][0] + (subNbx+3)*(subNby+3)*5*3,0.0); forceloc[0] = forceloc[1] = forceloc[2] = 0.0; torqueloc[0] = torqueloc[1] = torqueloc[2] = 0.0; for(i=0; i<atom->nmax; i++) for(j=0; j<3; j++) hydroF[i][j] = 0.0; double unwrap[3]; double dx,dy,dz; double massone; imageint *image = atom->image; double *rmass = atom->rmass; double *mass = atom->mass; int *type = atom->type; double sum[4],xcm[4]; if(force_diagnostic > 0 && update->ntimestep > 0 && (update->ntimestep % force_diagnostic == 0)){ //Calculate the center of mass of the particle group //(needed to calculate the torque). sum[0] = sum[1] = sum[2] = sum[3] = 0.0; for(i=0; i<nlocal; i++){ if(mask[i] & group->bitmask[igroupforce]){ domain->unmap(x[i],image[i],unwrap); if(rmass) massone = rmass[i]; else massone = mass[type[i]]; sum[0] += unwrap[0]*massone; sum[1] += unwrap[1]*massone; sum[2] += unwrap[2]*massone; sum[3] += massone; } } MPI_Allreduce(&sum[0],&xcm[0],4,MPI_DOUBLE,MPI_SUM,world); xcm[0] = xcm[0]/xcm[3]; xcm[1] = xcm[1]/xcm[3]; xcm[2] = xcm[2]/xcm[3]; } //-------------------------------------------------------------------------- //Calculate the contribution to the force on the fluid. //-------------------------------------------------------------------------- for(i=0; i<nlocal; i++){ if(mask[i] & groupbit){ if(trilinear_stencil==1) { trilinear_interpolation(i); }else{ peskin_interpolation(i); } if(force_diagnostic > 0 && update->ntimestep > 0 && (update->ntimestep % force_diagnostic == 0)){ if(mask[i] & group->bitmask[igroupforce]){ domain->unmap(x[i],image[i],unwrap); dx = unwrap[0] - xcm[0]; dy = unwrap[1] - xcm[1]; dz = unwrap[2] - xcm[2]; forceloc[0] += hydroF[i][0]; forceloc[1] += hydroF[i][1]; forceloc[2] += hydroF[i][2]; torqueloc[0] += dy*hydroF[i][2] - dz*hydroF[i][1]; torqueloc[1] += dz*hydroF[i][0] - dx*hydroF[i][2]; torqueloc[2] += dx*hydroF[i][1] - dy*hydroF[i][0]; } } } } //-------------------------------------------------------------------------- //Communicate the force contributions which lie outside the local processor //sub domain. //-------------------------------------------------------------------------- for(i=0; i<10; i++) requests[i]=MPI_REQUEST_NULL; MPI_Isend(&Ff[0][0][0][0],1,passxu,comm->procneigh[0][0],10,world,&requests[0]); MPI_Isend(&Ff[subNbx+2][0][0][0],1,passxu,comm->procneigh[0][0],20,world,&requests[1]); MPI_Isend(&Ff[subNbx-1][0][0][0],1,passxu,comm->procneigh[0][1],30,world,&requests[2]); MPI_Isend(&Ff[subNbx][0][0][0],1,passxu,comm->procneigh[0][1],40,world,&requests[3]); MPI_Isend(&Ff[subNbx+1][0][0][0],1,passxu,comm->procneigh[0][1],50,world,&requests[4]); MPI_Irecv(&Fftempx[0][0][0][0],1,passxtemp,comm->procneigh[0][1],10,world,&requests[5]); MPI_Irecv(&Fftempx[1][0][0][0],1,passxtemp,comm->procneigh[0][1],20,world,&requests[6]); MPI_Irecv(&Fftempx[2][0][0][0],1,passxtemp,comm->procneigh[0][0],30,world,&requests[7]); MPI_Irecv(&Fftempx[3][0][0][0],1,passxtemp,comm->procneigh[0][0],40,world,&requests[8]); MPI_Irecv(&Fftempx[4][0][0][0],1,passxtemp,comm->procneigh[0][0],50,world,&requests[9]); MPI_Waitall(10,requests,statuses); for(j=0; j<subNby+3; j++){ for(k=0; k<subNbz+3; k++){ for(m=0; m<3; m++){ Ff[subNbx-2][j][k][m] += Fftempx[0][j][k][m]; Ff[subNbx-3][j][k][m] += Fftempx[1][j][k][m]; Ff[1][j][k][m] += Fftempx[2][j][k][m]; Ff[2][j][k][m] += Fftempx[3][j][k][m]; Ff[3][j][k][m] += Fftempx[4][j][k][m]; } } } for(i=0; i<10; i++) requests[i]=MPI_REQUEST_NULL; MPI_Isend(&Ff[0][0][0][0],1,passyu,comm->procneigh[1][0],10,world,&requests[0]); MPI_Isend(&Ff[0][subNby+2][0][0],1,passyu,comm->procneigh[1][0],20,world,&requests[1]); MPI_Isend(&Ff[0][subNby-1][0][0],1,passyu,comm->procneigh[1][1],30,world,&requests[2]); MPI_Isend(&Ff[0][subNby][0][0],1,passyu,comm->procneigh[1][1],40,world,&requests[3]); MPI_Isend(&Ff[0][subNby+1][0][0],1,passyu,comm->procneigh[1][1],50,world,&requests[4]); MPI_Irecv(&Fftempy[0][0][0][0],1,passytemp,comm->procneigh[1][1],10,world,&requests[5]); MPI_Irecv(&Fftempy[0][1][0][0],1,passytemp,comm->procneigh[1][1],20,world,&requests[6]); MPI_Irecv(&Fftempy[0][2][0][0],1,passytemp,comm->procneigh[1][0],30,world,&requests[7]); MPI_Irecv(&Fftempy[0][3][0][0],1,passytemp,comm->procneigh[1][0],40,world,&requests[8]); MPI_Irecv(&Fftempy[0][4][0][0],1,passytemp,comm->procneigh[1][0],50,world,&requests[9]); MPI_Waitall(10,requests,statuses); for(i=0; i<subNbx+3; i++){ for(k=0; k<subNbz+3; k++){ for(m=0; m<3; m++){ Ff[i][subNby-2][k][m] += Fftempy[i][0][k][m]; Ff[i][subNby-3][k][m] += Fftempy[i][1][k][m]; Ff[i][1][k][m] += Fftempy[i][2][k][m]; Ff[i][2][k][m] += Fftempy[i][3][k][m]; Ff[i][3][k][m] += Fftempy[i][4][k][m]; } } } for(i=0; i<10; i++) requests[i]=MPI_REQUEST_NULL; MPI_Isend(&Ff[0][0][0][0],1,passzu,comm->procneigh[2][0],10,world,&requests[0]); MPI_Isend(&Ff[0][0][subNbz+2][0],1,passzu,comm->procneigh[2][0],20,world,&requests[1]); MPI_Isend(&Ff[0][0][subNbz-1][0],1,passzu,comm->procneigh[2][1],30,world,&requests[2]); MPI_Isend(&Ff[0][0][subNbz][0],1,passzu,comm->procneigh[2][1],40,world,&requests[3]); MPI_Isend(&Ff[0][0][subNbz+1][0],1,passzu,comm->procneigh[2][1],50,world,&requests[4]); MPI_Irecv(&Fftempz[0][0][0][0],1,passztemp,comm->procneigh[2][1],10,world,&requests[5]); MPI_Irecv(&Fftempz[0][0][1][0],1,passztemp,comm->procneigh[2][1],20,world,&requests[6]); MPI_Irecv(&Fftempz[0][0][2][0],1,passztemp,comm->procneigh[2][0],30,world,&requests[7]); MPI_Irecv(&Fftempz[0][0][3][0],1,passztemp,comm->procneigh[2][0],40,world,&requests[8]); MPI_Irecv(&Fftempz[0][0][4][0],1,passztemp,comm->procneigh[2][0],50,world,&requests[9]); MPI_Waitall(10,requests,statuses); for(i=0; i<subNbx+3; i++){ for(j=0; j<subNby+3; j++){ for(m=0; m<3; m++){ Ff[i][j][subNbz-2][m] += Fftempz[i][j][0][m]; Ff[i][j][subNbz-3][m] += Fftempz[i][j][1][m]; Ff[i][j][1][m] += Fftempz[i][j][2][m]; Ff[i][j][2][m] += Fftempz[i][j][3][m]; Ff[i][j][3][m] += Fftempz[i][j][4][m]; } } } if(force_diagnostic > 0 && update->ntimestep > 0 && (update->ntimestep % force_diagnostic == 0)){ force[0] = force[1] = force[2] = 0.0; torque[0] = torque[1] = torque[2] =0.0; MPI_Allreduce(&forceloc[0],&force[0],3,MPI_DOUBLE,MPI_SUM,world); MPI_Allreduce(&torqueloc[0],&torque[0],3,MPI_DOUBLE,MPI_SUM,world); if(me==0){ printf("%E %E %E %E %E %E\n",force[0],force[1],force[2], torque[0],torque[1],torque[2]); } } } //========================================================================== // uses the Peskin stencil to perform the velocity, density and // force interpolations. //========================================================================== void FixLbFluid::peskin_interpolation(int i) { double **x = atom->x; double **v = atom->v; int *type = atom->type; double *rmass = atom->rmass; double *mass = atom->mass; double massone; int ix,iy,iz; int ixp,iyp,izp; double dx1,dy1,dz1; int isten,ii,jj,kk; double r,rsq,weightx,weighty,weightz; double FfP[64]; int k; double unode[3]; double mnode; double gammavalue; //-------------------------------------------------------------------------- //Calculate nearest leftmost grid point. //Since array indices from 1 to subNb-2 correspond to the // local subprocessor domain (not indices from 0), use the // ceiling value. //-------------------------------------------------------------------------- ix = (int)ceil((x[i][0]-domain->sublo[0])/dx_lb); iy = (int)ceil((x[i][1]-domain->sublo[1])/dx_lb); iz = (int)ceil((x[i][2]-domain->sublo[2])/dx_lb); //-------------------------------------------------------------------------- //Calculate distances to the nearest points. //-------------------------------------------------------------------------- dx1 = x[i][0] - (domain->sublo[0] + (ix-1)*dx_lb); dy1 = x[i][1] - (domain->sublo[1] + (iy-1)*dx_lb); dz1 = x[i][2] - (domain->sublo[2] + (iz-1)*dx_lb); // Need to convert these to lattice units: dx1 = dx1/dx_lb; dy1 = dy1/dx_lb; dz1 = dz1/dx_lb; unode[0]=0.0; unode[1]=0.0; unode[2]=0.0; mnode = 0.0; isten=0; //-------------------------------------------------------------------------- // Calculate the interpolation weights, and interpolated values of // the fluid velocity, and density. //-------------------------------------------------------------------------- for(ii=-1; ii<3; ii++){ rsq=(-dx1+ii)*(-dx1+ii); if(rsq>=4) weightx=0.0; else{ r=sqrt(rsq); if(rsq>1){ weightx=(5.0-2.0*r-sqrt(-7.0+12.0*r-4.0*rsq))/8.; } else{ weightx=(3.0-2.0*r+sqrt(1.0+4.0*r-4.0*rsq))/8.; } } for(jj=-1; jj<3; jj++){ rsq=(-dy1+jj)*(-dy1+jj); if(rsq>=4) weighty=0.0; else{ r=sqrt(rsq); if(rsq>1){ weighty=(5.0-2.0*r-sqrt(-7.0+12.0*r-4.0*rsq))/8.; } else{ weighty=(3.0-2.0*r+sqrt(1.0+4.0*r-4.0*rsq))/8.; } } for(kk=-1; kk<3; kk++){ rsq=(-dz1+kk)*(-dz1+kk); if(rsq>=4) weightz=0.0; else{ r=sqrt(rsq); if(rsq>1){ weightz=(5.0-2.0*r-sqrt(-7.0+12.0*r-4.0*rsq))/8.; } else{ weightz=(3.0-2.0*r+sqrt(1.0+4.0*r-4.0*rsq))/8.; } } ixp = ix+ii; iyp = iy+jj; izp = iz+kk; //The atom is allowed to be within one lattice grid point outside the //local processor sub-domain. if(ixp < -1 || ixp > (subNbx+1) || iyp < -1 || iyp > (subNby+1) || izp < -1 || izp > (subNbz+1)) error->one(FLERR,"Atom outside local processor simulation domain. Either unstable fluid pararmeters, or \ require more frequent neighborlist rebuilds"); if(domain->periodicity[2] == 0 && comm->myloc[2] == 0 && izp < 1) error->warning(FLERR,"Atom too close to lower z wall. Unphysical results may occur"); if(domain->periodicity[2] == 0 && comm->myloc[2] == (comm->procgrid[2]-1) && (izp > (subNbz-2) )) error->warning(FLERR,"Atom too close to upper z wall. Unphysical results may occur"); if(ixp==-1) ixp=subNbx+2; if(iyp==-1) iyp=subNby+2; if(izp==-1) izp=subNbz+2; FfP[isten] = weightx*weighty*weightz; // interpolated velocity based on delta function. for(k=0; k<3; k++){ unode[k] += u_lb[ixp][iyp][izp][k]*FfP[isten]; } if(setGamma==0) mnode += density_lb[ixp][iyp][izp]*FfP[isten]; isten++; } } } if(setGamma==0){ mnode *= NodeArea[type[i]]; if(rmass) massone = rmass[i]; else massone = mass[type[i]]; massone = massone/dm_lb; gammavalue = 2.0*(mnode*massone)*dtoverdtcollision/(mnode+massone); } else{ gammavalue = Gamma[type[i]]; } isten=0; for(ii=-1; ii<3; ii++) for(jj=-1; jj<3; jj++) for(kk=-1; kk<3; kk++){ ixp = ix+ii; iyp = iy+jj; izp = iz+kk; if(ixp==-1) ixp=subNbx+2; if(iyp==-1) iyp=subNby+2; if(izp==-1) izp=subNbz+2; // Compute the force on the fluid. Need to convert the velocity from // LAMMPS units to LB units. for(k=0; k<3; k++){ Ff[ixp][iyp][izp][k] += gammavalue*((v[i][k]*dt_lb/dx_lb)-unode[k])*FfP[isten]; } isten++; } for(k=0; k<3; k++) hydroF[i][k] = -1.0*gammavalue*((v[i][k]*dt_lb/dx_lb)-unode[k])*dm_lb*dx_lb/dt_lb/dt_lb; } //========================================================================== // uses the trilinear stencil to perform the velocity, density and // force interpolations. //========================================================================== void FixLbFluid::trilinear_interpolation(int i) { double **x = atom->x; double **v = atom->v; int *type = atom->type; double *rmass = atom->rmass; double *mass = atom->mass; double massone; int ix,iy,iz; int ixp,iyp,izp; double dx1,dy1,dz1; double FfP[8]; int k; double unode[3]; double mnode; double gammavalue; //-------------------------------------------------------------------------- // Calculate nearest leftmost grid point. // Since array indices from 1 to subNb-2 correspond to the // local subprocessor domain (not indices from 0), use the // ceiling value. //-------------------------------------------------------------------------- ix = (int)ceil((x[i][0]-domain->sublo[0])/dx_lb); iy = (int)ceil((x[i][1]-domain->sublo[1])/dx_lb); iz = (int)ceil((x[i][2]-domain->sublo[2])/dx_lb); //-------------------------------------------------------------------------- //Calculate distances to the nearest points. //-------------------------------------------------------------------------- dx1 = x[i][0] - (domain->sublo[0] + (ix-1)*dx_lb); dy1 = x[i][1] - (domain->sublo[1] + (iy-1)*dx_lb); dz1 = x[i][2] - (domain->sublo[2] + (iz-1)*dx_lb); //-------------------------------------------------------------------------- // Need to convert these to lattice units: //-------------------------------------------------------------------------- dx1 = dx1/dx_lb; dy1 = dy1/dx_lb; dz1 = dz1/dx_lb; //-------------------------------------------------------------------------- // Calculate the interpolation weights //-------------------------------------------------------------------------- FfP[0] = (1.-dx1)*(1.-dy1)*(1.-dz1); FfP[1] = (1.-dx1)*(1.-dy1)*dz1; FfP[2] = (1.-dx1)*dy1*(1.-dz1); FfP[3] = (1.-dx1)*dy1*dz1; FfP[4] = dx1*(1.-dy1)*(1.-dz1); FfP[5] = dx1*(1.-dy1)*dz1; FfP[6] = dx1*dy1*(1.-dz1); FfP[7] = dx1*dy1*dz1; ixp = (ix+1); iyp = (iy+1); izp = (iz+1); //The atom is allowed to be within one lattice grid point outside the //local processor sub-domain. if(ix < 0 || ixp > (subNbx+1) || iy < 0 || iyp > (subNby+1) || iz < 0 || izp > (subNbz+1)) error->one(FLERR,"Atom outside local processor simulation domain. Either unstable fluid pararmeters, or \ require more frequent neighborlist rebuilds"); if(domain->periodicity[2] == 0 && comm->myloc[2] == 0 && (iz < 1 || izp < 1)) error->warning(FLERR,"Atom too close to lower z wall. Unphysical results may occur"); if(domain->periodicity[2] == 0 && comm->myloc[2] == (comm->procgrid[2]-1) && (izp > (subNbz-2) || iz > (subNbz-2))) error->warning(FLERR,"Atom too close to upper z wall. Unphysical results may occur"); for (k=0; k<3; k++) { // tri-linearly interpolated velocity at node unode[k] = u_lb[ix][iy][iz][k]*FfP[0] + u_lb[ix][iy][izp][k]*FfP[1] + u_lb[ix][iyp][iz][k]*FfP[2] + u_lb[ix][iyp][izp][k]*FfP[3] + u_lb[ixp][iy][iz][k]*FfP[4] + u_lb[ixp][iy][izp][k]*FfP[5] + u_lb[ixp][iyp][iz][k]*FfP[6] + u_lb[ixp][iyp][izp][k]*FfP[7]; } if(setGamma==0){ mnode = density_lb[ix][iy][iz]*FfP[0] + density_lb[ix][iy][izp]*FfP[1] + density_lb[ix][iyp][iz]*FfP[2] + density_lb[ix][iyp][izp]*FfP[3] + density_lb[ixp][iy][iz]*FfP[4] + density_lb[ixp][iy][izp]*FfP[5] + density_lb[ixp][iyp][iz]*FfP[6] + density_lb[ixp][iyp][izp]*FfP[7]; mnode *= NodeArea[type[i]]; if(rmass) massone = rmass[i]; else massone = mass[type[i]]; massone = massone/dm_lb; gammavalue = 2.0*(mnode*massone)*dtoverdtcollision/(mnode+massone); }else{ gammavalue = Gamma[type[i]]; } for(k=0; k<3; k++){ Ff[ix][iy][iz][k] += gammavalue*((v[i][k]*dt_lb/dx_lb)-unode[k])*FfP[0]; Ff[ix][iy][izp][k] += gammavalue*((v[i][k]*dt_lb/dx_lb)-unode[k])*FfP[1]; Ff[ix][iyp][iz][k] += gammavalue*((v[i][k]*dt_lb/dx_lb)-unode[k])*FfP[2]; Ff[ix][iyp][izp][k] += gammavalue*((v[i][k]*dt_lb/dx_lb)-unode[k])*FfP[3]; Ff[ixp][iy][iz][k] += gammavalue*((v[i][k]*dt_lb/dx_lb)-unode[k])*FfP[4]; Ff[ixp][iy][izp][k] += gammavalue*((v[i][k]*dt_lb/dx_lb)-unode[k])*FfP[5]; Ff[ixp][iyp][iz][k] += gammavalue*((v[i][k]*dt_lb/dx_lb)-unode[k])*FfP[6]; Ff[ixp][iyp][izp][k] += gammavalue*((v[i][k]*dt_lb/dx_lb)-unode[k])*FfP[7]; } for(k=0; k<3; k++) hydroF[i][k] = -1.0*gammavalue*((v[i][k]*dt_lb/dx_lb)-unode[k])*dm_lb*dx_lb/dt_lb/dt_lb; } //========================================================================== // read in a fluid restart file. This is only used to restart the // fluid portion of a LAMMPS simulation. //========================================================================== void FixLbFluid::read_restartfile(void) { MPI_Status status; MPI_Datatype realtype; MPI_Datatype filetype; int realsizes[4] = {subNbx,subNby,subNbz,numvel}; int realstarts[4] = {1,1,1,0}; int gsizes[4] = {Nbx,Nby,Nbz,numvel}; int lsizes[4] = {subNbx-2,subNby-2,subNbz-2,numvel}; int starts[4] = {comm->myloc[0]*(subNbx-2),comm->myloc[1]*(subNby-2),comm->myloc[2]*(subNbz-2),0}; if(domain->periodicity[2]==0 && comm->myloc[2]==comm->procgrid[2]-1){ starts[2] = comm->myloc[2]*(subNbz-3); } MPI_Type_create_subarray(4,realsizes,lsizes,realstarts,MPI_ORDER_C,MPI_DOUBLE,&realtype); MPI_Type_commit(&realtype); MPI_Type_create_subarray(4,gsizes,lsizes,starts,MPI_ORDER_C,MPI_DOUBLE,&filetype); MPI_Type_commit(&filetype); MPI_File_set_view(pFileRead,0,MPI_DOUBLE,filetype,(char *) "native", MPI_INFO_NULL); MPI_File_seek(pFileRead,0,MPI_SEEK_SET); MPI_File_read_all(pFileRead,&f_lb[0][0][0][0],1,realtype,&status); if(typeLB == 2){ MPI_File_read_all(pFileRead,&feqold[0][0][0][0],1,realtype,&status); MPI_File_read_all(pFileRead,&feqoldn[0][0][0][0],1,realtype,&status); } MPI_Type_free(&realtype); MPI_Type_free(&filetype); MPI_File_close(&pFileRead); } //========================================================================== // write a fluid restart file. //========================================================================== void FixLbFluid::write_restartfile(void) { MPI_File fh; MPI_Status status; MPI_Datatype realtype; MPI_Datatype filetype; char *hfile; hfile = new char[32]; sprintf(hfile,"FluidRestart_" BIGINT_FORMAT ".dat",update->ntimestep); MPI_File_open(world,hfile,MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL,&fh); int realsizes[4] = {subNbx,subNby,subNbz,numvel}; int realstarts[4] = {1,1,1,0}; int gsizes[4] = {Nbx,Nby,Nbz,numvel}; int lsizes[4] = {subNbx-2,subNby-2,subNbz-2,numvel}; int starts[4] = {comm->myloc[0]*(subNbx-2),comm->myloc[1]*(subNby-2),comm->myloc[2]*(subNbz-2),0}; if(domain->periodicity[2]==0 && comm->myloc[2]==comm->procgrid[2]-1){ starts[2] = comm->myloc[2]*(subNbz-3); } MPI_Type_create_subarray(4,realsizes,lsizes,realstarts,MPI_ORDER_C,MPI_DOUBLE,&realtype); MPI_Type_commit(&realtype); MPI_Type_create_subarray(4,gsizes,lsizes,starts,MPI_ORDER_C,MPI_DOUBLE,&filetype); MPI_Type_commit(&filetype); MPI_File_set_view(fh,0,MPI_DOUBLE,filetype,(char *) "native",MPI_INFO_NULL); MPI_File_write_all(fh,&f_lb[0][0][0][0],1,realtype,&status); if(typeLB == 2){ MPI_File_write_all(fh,&feqold[0][0][0][0],1,realtype,&status); MPI_File_write_all(fh,&feqoldn[0][0][0][0],1,realtype,&status); } MPI_Type_free(&realtype); MPI_Type_free(&filetype); MPI_File_close(&fh); delete [] hfile; } //========================================================================== // rescale the simulation parameters so that dx_lb=dt_lb=dm_lb=1. // This assumes that all the simulation parameters have been given in // terms of distance, time and mass units. //========================================================================== void FixLbFluid::rescale(void) { vwtp = vwtp*dt_lb/dx_lb; vwbt = vwbt*dt_lb/dx_lb; bodyforcex = bodyforcex*dt_lb*dt_lb/dx_lb; bodyforcey = bodyforcey*dt_lb*dt_lb/dx_lb; bodyforcez = bodyforcez*dt_lb*dt_lb/dx_lb; tau=(3.0*viscosity/densityinit_real)*dt_lb*dt_lb/dx_lb/dx_lb; tau /= dt_lb; if(typeLB==1) tau = tau + 0.5; if(setGamma == 0){ for(int i=0; i<= atom->ntypes; i++){ NodeArea[i] = NodeArea[i]/dx_lb/dx_lb; } }else{ for(int i=0; i<= atom->ntypes; i++){ Gamma[i] = Gamma[i]*dt_lb/dm_lb; } } densityinit = densityinit_real*dx_lb*dx_lb*dx_lb/dm_lb; a_0 = a_0_real*dt_lb*dt_lb/(dx_lb*dx_lb); // Warn if using the D3Q19 model with noise, and a0 is too small. if(numvel==19 && noisestress==1 && a_0 < 0.2){ error->warning(FLERR,"Fix lb/fluid WARNING: Chosen value for a0 may be too small. \ Check temperature reproduction.\n"); } if(noisestress==1){ if(a_0>0.5555555){ error->all(FLERR,"Fix lb/fluid ERROR: the Lattice Boltzmann dx and dt need \ to be chosen such that the scaled a_0 < 5/9\n"); } } // Courant Condition: if(a_0 >= 1.0){ error->all(FLERR,"Fix lb/fluid ERROR: the lattice Boltzmann dx and dt do not \ satisfy the Courant condition.\n"); } kB = (force->boltz/force->mvv2e)*dt_lb*dt_lb/dx_lb/dx_lb/dm_lb; if(typeLB==1){ expminusdtovertau = 0.0; Dcoeff = 0.0; namp = 2.0*kB*T*(tau-0.5)/3.0; noisefactor = 1.0; if(a_0 <= 0.333333333333333){ K_0 = 5.17*(0.333333333333333 - a_0); }else{ K_0 = 2.57*(a_0 - 0.333333333333333); } dtoverdtcollision = dt_lb*6.0*viscosity/densityinit_real/dx_lb/dx_lb; }else if(typeLB==2){ expminusdtovertau=exp(-1.0/tau); Dcoeff=(1.0-(1.0-expminusdtovertau)*tau); namp = 2.0*kB*T/3.; noisefactor=sqrt((1.0-expminusdtovertau*expminusdtovertau)/ (2.0))/(1.0-expminusdtovertau); K_0 = 4.5*(1.0/3.0-a_0); dtoverdtcollision = dt_lb*3.0*viscosity/densityinit_real/dx_lb/dx_lb; } } //========================================================================== // Set the lattice-Boltzmann velocity vectors and weights for the D3Q15 // model. Initialize the fluid velocity and density. //========================================================================== void FixLbFluid::initializeLB15(void) { int i,j,k,m; //velocity vectors. e[0][0]= 0; e[0][1]= 0; e[0][2]= 0; e[1][0]= 1; e[1][1]= 0; e[1][2]= 0; e[2][0]= 0; e[2][1]= 1; e[2][2]= 0; e[3][0]= -1; e[3][1]= 0; e[3][2]= 0; e[4][0]= 0; e[4][1]= -1; e[4][2]= 0; e[5][0]= 0; e[5][1]= 0; e[5][2]= 1; e[6][0]= 0; e[6][1]= 0; e[6][2]= -1; e[7][0]= 1; e[7][1]= 1; e[7][2]= 1; e[8][0]= -1; e[8][1]= 1; e[8][2]= 1; e[9][0]= -1; e[9][1]= -1; e[9][2]= 1; e[10][0]= 1; e[10][1]= -1; e[10][2]= 1; e[11][0]= 1; e[11][1]= 1; e[11][2]= -1; e[12][0]= -1; e[12][1]= 1; e[12][2]= -1; e[13][0]= -1; e[13][1]= -1; e[13][2]= -1; e[14][0]= 1; e[14][1]= -1; e[14][2]= -1; //weights. w_lb[0]=2./9.; w_lb[1]=1./9.; w_lb[2]=1./9.; w_lb[3]=1./9.; w_lb[4]=1./9.; w_lb[5]=1./9.; w_lb[6]=1./9.; w_lb[7]=1./72.; w_lb[8]=1./72.; w_lb[9]=1./72.; w_lb[10]=1./72.; w_lb[11]=1./72.; w_lb[12]=1./72.; w_lb[13]=1./72.; w_lb[14]=1./72.; Ng_lb[0]=1.; Ng_lb[1]=3.; Ng_lb[2]=3.; Ng_lb[3]=3.; Ng_lb[4]=9./2.; Ng_lb[5]=9./2.; Ng_lb[6]=9./2.; Ng_lb[7]=9.; Ng_lb[8]=9.; Ng_lb[9]=9.; Ng_lb[10]=27./2.; Ng_lb[11]=27./2.; Ng_lb[12]=27./2.; Ng_lb[13]=9.; Ng_lb[14]=1.; mg_lb[0][0]=1.;mg_lb[0][1]=1.;mg_lb[0][2]=1.;mg_lb[0][3]=1.;mg_lb[0][4]=1.; mg_lb[0][5]=1.;mg_lb[0][6]=1.;mg_lb[0][7]=1.;mg_lb[0][8]=1.;mg_lb[0][9]=1.; mg_lb[0][10]=1.;mg_lb[0][11]=1.;mg_lb[0][12]=1.;mg_lb[0][13]=1.;mg_lb[0][14]=1.; mg_lb[1][0]=0;mg_lb[1][1]=1.;mg_lb[1][2]=0;mg_lb[1][3]=-1.;mg_lb[1][4]=0; mg_lb[1][5]=0;mg_lb[1][6]=0;mg_lb[1][7]=1.;mg_lb[1][8]=-1.;mg_lb[1][9]=-1.; mg_lb[1][10]=1.;mg_lb[1][11]=1.;mg_lb[1][12]=-1.;mg_lb[1][13]=-1.;mg_lb[1][14]=1.; mg_lb[2][0]=0;mg_lb[2][1]=0;mg_lb[2][2]=1.;mg_lb[2][3]=0;mg_lb[2][4]=-1.; mg_lb[2][5]=0;mg_lb[2][6]=0;mg_lb[2][7]=1.;mg_lb[2][8]=1.;mg_lb[2][9]=-1.; mg_lb[2][10]=-1.;mg_lb[2][11]=1.;mg_lb[2][12]=1.;mg_lb[2][13]=-1.;mg_lb[2][14]=-1.; mg_lb[3][0]=0;mg_lb[3][1]=0;mg_lb[3][2]=0;mg_lb[3][3]=0;mg_lb[3][4]=0; mg_lb[3][5]=1.;mg_lb[3][6]=-1.;mg_lb[3][7]=1.;mg_lb[3][8]=1.;mg_lb[3][9]=1.; mg_lb[3][10]=1.;mg_lb[3][11]=-1.;mg_lb[3][12]=-1.;mg_lb[3][13]=-1.;mg_lb[3][14]=-1.; mg_lb[4][0]=-1./3.;mg_lb[4][1]=2./3.;mg_lb[4][2]=-1./3.;mg_lb[4][3]=2./3.;mg_lb[4][4]=-1./3.; mg_lb[4][5]=-1./3.;mg_lb[4][6]=-1./3.;mg_lb[4][7]=2./3.;mg_lb[4][8]=2./3.;mg_lb[4][9]=2./3.; mg_lb[4][10]=2./3.;mg_lb[4][11]=2./3.;mg_lb[4][12]=2./3.;mg_lb[4][13]=2./3.;mg_lb[4][14]=2./3.; mg_lb[5][0]=-1./3.;mg_lb[5][1]=-1./3.;mg_lb[5][2]=2./3.;mg_lb[5][3]=-1./3.;mg_lb[5][4]=2./3.; mg_lb[5][5]=-1./3.;mg_lb[5][6]=-1./3.;mg_lb[5][7]=2./3.;mg_lb[5][8]=2./3.;mg_lb[5][9]=2./3.; mg_lb[5][10]=2./3.;mg_lb[5][11]=2./3.;mg_lb[5][12]=2./3.;mg_lb[5][13]=2./3.;mg_lb[5][14]=2./3.; mg_lb[6][0]=-1./3.;mg_lb[6][1]=-1./3.;mg_lb[6][2]=-1./3.;mg_lb[6][3]=-1./3.;mg_lb[6][4]=-1./3.; mg_lb[6][5]=2./3.;mg_lb[6][6]=2./3.;mg_lb[6][7]=2./3.;mg_lb[6][8]=2./3.;mg_lb[6][9]=2./3.; mg_lb[6][10]=2./3.;mg_lb[6][11]=2./3.;mg_lb[6][12]=2./3.;mg_lb[6][13]=2./3.;mg_lb[6][14]=2./3.; mg_lb[7][0]=0;mg_lb[7][1]=0;mg_lb[7][2]=0;mg_lb[7][3]=0;mg_lb[7][4]=0; mg_lb[7][5]=0;mg_lb[7][6]=0;mg_lb[7][7]=1;mg_lb[7][8]=-1;mg_lb[7][9]=1; mg_lb[7][10]=-1;mg_lb[7][11]=1;mg_lb[7][12]=-1;mg_lb[7][13]=1;mg_lb[7][14]=-1; mg_lb[8][0]=0;mg_lb[8][1]=0;mg_lb[8][2]=0;mg_lb[8][3]=0;mg_lb[8][4]=0; mg_lb[8][5]=0;mg_lb[8][6]=0;mg_lb[8][7]=1;mg_lb[8][8]=1;mg_lb[8][9]=-1; mg_lb[8][10]=-1;mg_lb[8][11]=-1;mg_lb[8][12]=-1;mg_lb[8][13]=1;mg_lb[8][14]=1; mg_lb[9][0]=0;mg_lb[9][1]=0;mg_lb[9][2]=0;mg_lb[9][3]=0;mg_lb[9][4]=0; mg_lb[9][5]=0;mg_lb[9][6]=0;mg_lb[9][7]=1;mg_lb[9][8]=-1;mg_lb[9][9]=-1; mg_lb[9][10]=1;mg_lb[9][11]=-1;mg_lb[9][12]=1;mg_lb[9][13]=1;mg_lb[9][14]=-1; mg_lb[10][0]=0;mg_lb[10][1]=0;mg_lb[10][2]=-1./3.;mg_lb[10][3]=0;mg_lb[10][4]=1./3.; mg_lb[10][5]=0;mg_lb[10][6]=0;mg_lb[10][7]=2./3.;mg_lb[10][8]=2./3.;mg_lb[10][9]=-2./3.; mg_lb[10][10]=-2./3.;mg_lb[10][11]=2./3.;mg_lb[10][12]=2./3.;mg_lb[10][13]=-2./3.;mg_lb[10][14]=-2./3.; mg_lb[11][0]=0;mg_lb[11][1]=0;mg_lb[11][2]=0;mg_lb[11][3]=0;mg_lb[11][4]=0; mg_lb[11][5]=-1./3.;mg_lb[11][6]=1./3.;mg_lb[11][7]=2./3.;mg_lb[11][8]=2./3.;mg_lb[11][9]=2./3.; mg_lb[11][10]=2./3.;mg_lb[11][11]=-2./3.;mg_lb[11][12]=-2./3.;mg_lb[11][13]=-2./3.;mg_lb[11][14]=-2./3.; mg_lb[12][0]=0;mg_lb[12][1]=-1./3.;mg_lb[12][2]=0;mg_lb[12][3]=1./3.;mg_lb[12][4]=0; mg_lb[12][5]=0;mg_lb[12][6]=0;mg_lb[12][7]=2./3.;mg_lb[12][8]=-2./3.;mg_lb[12][9]=-2./3.; mg_lb[12][10]=2./3.;mg_lb[12][11]=2./3.;mg_lb[12][12]=-2./3.;mg_lb[12][13]=-2./3.;mg_lb[12][14]=2./3.; mg_lb[13][0]=0;mg_lb[13][1]=0;mg_lb[13][2]=0;mg_lb[13][3]=0;mg_lb[13][4]=0; mg_lb[13][5]=0;mg_lb[13][6]=0;mg_lb[13][7]=1;mg_lb[13][8]=-1;mg_lb[13][9]=1; mg_lb[13][10]=-1;mg_lb[13][11]=-1;mg_lb[13][12]=1;mg_lb[13][13]=-1;mg_lb[13][14]=1; mg_lb[14][0]=sqrt(2.);mg_lb[14][1]=-1./sqrt(2.);mg_lb[14][2]=-1./sqrt(2.); mg_lb[14][3]=-1./sqrt(2.);mg_lb[14][4]=-1./sqrt(2.); mg_lb[14][5]=-1./sqrt(2.);mg_lb[14][6]=-1./sqrt(2.);mg_lb[14][7]=sqrt(2.); mg_lb[14][8]=sqrt(2.);mg_lb[14][9]=sqrt(2.); mg_lb[14][10]=sqrt(2.);mg_lb[14][11]=sqrt(2.);mg_lb[14][12]=sqrt(2.); mg_lb[14][13]=sqrt(2.);mg_lb[14][14]=sqrt(2.); for(i=0; i<subNbx+3; i++) for(j=0; j<subNby+3; j++) for(k=0; k<subNbz+3; k++){ u_lb[i][j][k][0]=0.0; u_lb[i][j][k][1]=0.0; u_lb[i][j][k][2]=0.0; density_lb[i][j][k] = densityinit; } for(i=0; i<subNbx; i++) for(j=0; j<subNby; j++) for(k=0; k<subNbz; k++) for(m=0; m<15; m++) f_lb[i][j][k][m] = density_lb[i][j][k]/15.0; } //========================================================================== // Set the lattice-Boltzmann velocity vectors and weights for the D3Q19 // model. Initialize the fluid velocity and density. //========================================================================== void FixLbFluid::initializeLB19(void) { int i,j,k,m; //velocity vectors. e[0][0]= 0; e[0][1]= 0; e[0][2]= 0; e[1][0]= 1; e[1][1]= 0; e[1][2]= 0; e[2][0]= 0; e[2][1]= 1; e[2][2]= 0; e[3][0]= -1; e[3][1]= 0; e[3][2]= 0; e[4][0]= 0; e[4][1]= -1; e[4][2]= 0; e[5][0]= 0; e[5][1]= 0; e[5][2]= 1; e[6][0]= 0; e[6][1]= 0; e[6][2]= -1; e[7][0] = 1; e[7][1] = 1; e[7][2] = 0; e[8][0] = 1; e[8][1] = -1; e[8][2] = 0; e[9][0] = -1; e[9][1] = 1; e[9][2] = 0; e[10][0] = -1; e[10][1] = -1; e[10][2] = 0; e[11][0] = 1; e[11][1] = 0; e[11][2] = 1; e[12][0] = 1; e[12][1] = 0; e[12][2] = -1; e[13][0] = -1; e[13][1] = 0; e[13][2] = 1; e[14][0] = -1; e[14][1] = 0; e[14][2] = -1; e[15][0] = 0; e[15][1] = 1; e[15][2] = 1; e[16][0] = 0; e[16][1] = 1; e[16][2] = -1; e[17][0] = 0; e[17][1] = -1; e[17][2] = 1; e[18][0] = 0; e[18][1] = -1; e[18][2] = -1; //weights. w_lb[0]=1./3.; w_lb[1]=1./18.; w_lb[2]=1./18.; w_lb[3]=1./18.; w_lb[4]=1./18.; w_lb[5]=1./18.; w_lb[6]=1./18.; w_lb[7]=1./36.; w_lb[8]=1./36.; w_lb[9]=1./36.; w_lb[10]=1./36.; w_lb[11]=1./36.; w_lb[12]=1./36.; w_lb[13]=1./36.; w_lb[14]=1./36.; w_lb[15]=1./36.; w_lb[16]=1./36.; w_lb[17]=1./36.; w_lb[18]=1./36.; Ng_lb[0]=1.; Ng_lb[1]=3.; Ng_lb[2]=3.; Ng_lb[3]=3.; Ng_lb[4]=9./2.; Ng_lb[5]=9./2.; Ng_lb[6]=9./2.; Ng_lb[7]=9.; Ng_lb[8]=9.; Ng_lb[9]=9.; Ng_lb[10]=27./2.; Ng_lb[11]=27./2.; Ng_lb[12]=27./2.; Ng_lb[13]=18.; Ng_lb[14]=18.; Ng_lb[15]=18.; Ng_lb[16]=162./7.; Ng_lb[17]=126./5.; Ng_lb[18]=30.; mg_lb[0][0] = 1.; mg_lb[0][1] = 1.; mg_lb[0][2] = 1.; mg_lb[0][3] = 1.; mg_lb[0][4] = 1.; mg_lb[0][5] = 1.; mg_lb[0][6] = 1.; mg_lb[0][7] = 1.; mg_lb[0][8] = 1.; mg_lb[0][9] = 1.; mg_lb[0][10]= 1.; mg_lb[0][11]= 1.; mg_lb[0][12]= 1.; mg_lb[0][13]= 1.; mg_lb[0][14]= 1.; mg_lb[0][15]= 1.; mg_lb[0][16]= 1.; mg_lb[0][17]= 1.; mg_lb[0][18]= 1.; mg_lb[1][0] = 0.; mg_lb[1][1] = 1.; mg_lb[1][2] = 0.; mg_lb[1][3] =-1.; mg_lb[1][4] = 0.; mg_lb[1][5] = 0.; mg_lb[1][6] = 0.; mg_lb[1][7] = 1.; mg_lb[1][8] = 1.; mg_lb[1][9] =-1.; mg_lb[1][10]=-1.; mg_lb[1][11]= 1.; mg_lb[1][12]= 1.; mg_lb[1][13]=-1.; mg_lb[1][14]=-1.; mg_lb[1][15]= 0.; mg_lb[1][16]= 0.; mg_lb[1][17]= 0.; mg_lb[1][18]= 0.; mg_lb[2][0] = 0.; mg_lb[2][1] = 0.; mg_lb[2][2] = 1.; mg_lb[2][3] = 0.; mg_lb[2][4] =-1.; mg_lb[2][5] = 0.; mg_lb[2][6] = 0.; mg_lb[2][7] = 1.; mg_lb[2][8] =-1.; mg_lb[2][9] = 1.; mg_lb[2][10]=-1.; mg_lb[2][11]= 0.; mg_lb[2][12]= 0.; mg_lb[2][13]= 0.; mg_lb[2][14]= 0.; mg_lb[2][15]= 1.; mg_lb[2][16]= 1.; mg_lb[2][17]=-1.; mg_lb[2][18]=-1.; mg_lb[3][0] = 0.; mg_lb[3][1] = 0.; mg_lb[3][2] = 0.; mg_lb[3][3] = 0.; mg_lb[3][4] = 0.; mg_lb[3][5] = 1.; mg_lb[3][6] =-1.; mg_lb[3][7] = 0.; mg_lb[3][8] = 0.; mg_lb[3][9] = 0.; mg_lb[3][10]= 0.; mg_lb[3][11]= 1.; mg_lb[3][12]=-1.; mg_lb[3][13]= 1.; mg_lb[3][14]=-1.; mg_lb[3][15]= 1.; mg_lb[3][16]=-1.; mg_lb[3][17]= 1.; mg_lb[3][18]=-1.; mg_lb[4][0] =-1./3.; mg_lb[4][1] = 2./3.; mg_lb[4][2] =-1./3.; mg_lb[4][3] = 2./3.; mg_lb[4][4] =-1./3.; mg_lb[4][5] =-1./3.; mg_lb[4][6] =-1./3.; mg_lb[4][7] = 2./3.; mg_lb[4][8] = 2./3.; mg_lb[4][9] = 2./3.; mg_lb[4][10]= 2./3.; mg_lb[4][11]= 2./3.; mg_lb[4][12]= 2./3.; mg_lb[4][13]= 2./3.; mg_lb[4][14]= 2./3.; mg_lb[4][15]=-1./3.; mg_lb[4][16]=-1./3.; mg_lb[4][17]=-1./3.; mg_lb[4][18]=-1./3.; mg_lb[5][0] =-1./3.; mg_lb[5][1] =-1./3.; mg_lb[5][2] = 2./3.; mg_lb[5][3] =-1./3.; mg_lb[5][4] = 2./3.; mg_lb[5][5] =-1./3.; mg_lb[5][6] =-1./3.; mg_lb[5][7] = 2./3.; mg_lb[5][8] = 2./3.; mg_lb[5][9] = 2./3.; mg_lb[5][10]= 2./3.; mg_lb[5][11]=-1./3.; mg_lb[5][12]=-1./3.; mg_lb[5][13]=-1./3.; mg_lb[5][14]=-1./3.; mg_lb[5][15]= 2./3.; mg_lb[5][16]= 2./3.; mg_lb[5][17]= 2./3.; mg_lb[5][18]= 2./3.; mg_lb[6][0] =-1./3.; mg_lb[6][1] =-1./3.; mg_lb[6][2] =-1./3.; mg_lb[6][3] =-1./3.; mg_lb[6][4] =-1./3.; mg_lb[6][5] = 2./3.; mg_lb[6][6] = 2./3.; mg_lb[6][7] =-1./3.; mg_lb[6][8] =-1./3.; mg_lb[6][9] =-1./3.; mg_lb[6][10]=-1./3.; mg_lb[6][11]= 2./3.; mg_lb[6][12]= 2./3.; mg_lb[6][13]= 2./3.; mg_lb[6][14]= 2./3.; mg_lb[6][15]= 2./3.; mg_lb[6][16]= 2./3.; mg_lb[6][17]= 2./3.; mg_lb[6][18]= 2./3.; mg_lb[7][0] = 0.; mg_lb[7][1] = 0.; mg_lb[7][2] = 0.; mg_lb[7][3] = 0.; mg_lb[7][4] = 0.; mg_lb[7][5] = 0.; mg_lb[7][6] = 0.; mg_lb[7][7] = 1.; mg_lb[7][8] =-1.; mg_lb[7][9] =-1.; mg_lb[7][10]= 1.; mg_lb[7][11]= 0.; mg_lb[7][12]= 0.; mg_lb[7][13]= 0.; mg_lb[7][14]= 0.; mg_lb[7][15]= 0.; mg_lb[7][16]= 0.; mg_lb[7][17]= 0.; mg_lb[7][18]= 0.; mg_lb[8][0] = 0.; mg_lb[8][1] = 0.; mg_lb[8][2] = 0.; mg_lb[8][3] = 0.; mg_lb[8][4] = 0.; mg_lb[8][5] = 0.; mg_lb[8][6] = 0.; mg_lb[8][7] = 0.; mg_lb[8][8] = 0.; mg_lb[8][9] = 0.; mg_lb[8][10]= 0.; mg_lb[8][11]= 1.; mg_lb[8][12]=-1.; mg_lb[8][13]=-1.; mg_lb[8][14]= 1.; mg_lb[8][15]= 0.; mg_lb[8][16]= 0.; mg_lb[8][17]= 0.; mg_lb[8][18]= 0.; mg_lb[9][0] = 0.; mg_lb[9][1] = 0.; mg_lb[9][2] = 0.; mg_lb[9][3] = 0.; mg_lb[9][4] = 0.; mg_lb[9][5] = 0.; mg_lb[9][6] = 0.; mg_lb[9][7] = 0.; mg_lb[9][8] = 0.; mg_lb[9][9] = 0.; mg_lb[9][10]= 0.; mg_lb[9][11]= 0.; mg_lb[9][12]= 0.; mg_lb[9][13]= 0.; mg_lb[9][14]= 0.; mg_lb[9][15]= 1.; mg_lb[9][16]=-1.; mg_lb[9][17]=-1.; mg_lb[9][18]= 1.; mg_lb[10][0] = 0.; mg_lb[10][1] =-1./3.; mg_lb[10][2] = 0.; mg_lb[10][3] = 1./3.; mg_lb[10][4] = 0.; mg_lb[10][5] = 0.; mg_lb[10][6] = 0.; mg_lb[10][7] = 2./3.; mg_lb[10][8] = 2./3.; mg_lb[10][9] =-2./3.; mg_lb[10][10]=-2./3.; mg_lb[10][11]=-1./3.; mg_lb[10][12]=-1./3.; mg_lb[10][13]= 1./3.; mg_lb[10][14]= 1./3.; mg_lb[10][15]= 0.; mg_lb[10][16]= 0.; mg_lb[10][17]= 0.; mg_lb[10][18]= 0.; mg_lb[11][0] = 0.; mg_lb[11][1] = 0.; mg_lb[11][2] =-1./3.; mg_lb[11][3] = 0.; mg_lb[11][4] = 1./3.; mg_lb[11][5] = 0.; mg_lb[11][6] = 0.; mg_lb[11][7] = 2./3.; mg_lb[11][8] =-2./3.; mg_lb[11][9] = 2./3.; mg_lb[11][10]=-2./3.; mg_lb[11][11]= 0.; mg_lb[11][12]= 0.; mg_lb[11][13]= 0.; mg_lb[11][14]= 0.; mg_lb[11][15]=-1./3.; mg_lb[11][16]=-1./3.; mg_lb[11][17]= 1./3.; mg_lb[11][18]= 1./3.; mg_lb[12][0] = 0.; mg_lb[12][1] = 0.; mg_lb[12][2] = 0.; mg_lb[12][3] = 0.; mg_lb[12][4] = 0.; mg_lb[12][5] =-1./3.; mg_lb[12][6] = 1./3.; mg_lb[12][7] = 0.; mg_lb[12][8] = 0.; mg_lb[12][9] = 0.; mg_lb[12][10]= 0.; mg_lb[12][11]= 2./3.; mg_lb[12][12]=-2./3.; mg_lb[12][13]= 2./3.; mg_lb[12][14]=-2./3.; mg_lb[12][15]=-1./3.; mg_lb[12][16]= 1./3.; mg_lb[12][17]=-1./3.; mg_lb[12][18]= 1./3.; mg_lb[13][0] = 0.; mg_lb[13][1] =-0.5; mg_lb[13][2] = 0.; mg_lb[13][3] = 0.5; mg_lb[13][4] = 0.; mg_lb[13][5] = 0.; mg_lb[13][6] = 0.; mg_lb[13][7] = 0.; mg_lb[13][8] = 0.; mg_lb[13][9] = 0.; mg_lb[13][10]= 0.; mg_lb[13][11]= 0.5; mg_lb[13][12]= 0.5; mg_lb[13][13]=-0.5; mg_lb[13][14]=-0.5; mg_lb[13][15]= 0.; mg_lb[13][16]= 0.; mg_lb[13][17]= 0.; mg_lb[13][18]= 0.; mg_lb[14][0] = 0.; mg_lb[14][1] = 0.; mg_lb[14][2] = 0.; mg_lb[14][3] = 0.; mg_lb[14][4] = 0.; mg_lb[14][5] =-0.5; mg_lb[14][6] = 0.5; mg_lb[14][7] = 0.; mg_lb[14][8] = 0.; mg_lb[14][9] = 0.; mg_lb[14][10]= 0.; mg_lb[14][11]= 0.; mg_lb[14][12]= 0.; mg_lb[14][13]= 0.; mg_lb[14][14]= 0.; mg_lb[14][15]= 0.5; mg_lb[14][16]=-0.5; mg_lb[14][17]= 0.5; mg_lb[14][18]=-0.5; mg_lb[15][0] = 0.; mg_lb[15][1] = 0.; mg_lb[15][2] =-0.5; mg_lb[15][3] = 0.; mg_lb[15][4] = 0.5; mg_lb[15][5] = 0.; mg_lb[15][6] = 0.; mg_lb[15][7] = 0.; mg_lb[15][8] = 0.; mg_lb[15][9] = 0.; mg_lb[15][10]= 0.; mg_lb[15][11]= 0.; mg_lb[15][12]= 0.; mg_lb[15][13]= 0.; mg_lb[15][14]= 0.; mg_lb[15][15]= 0.5; mg_lb[15][16]= 0.5; mg_lb[15][17]=-0.5; mg_lb[15][18]=-0.5; mg_lb[16][0] = 1./18.; mg_lb[16][1] =-5./18.; mg_lb[16][2] =-5./18.; mg_lb[16][3] =-5./18.; mg_lb[16][4] =-5./18.; mg_lb[16][5] = 2./9.; mg_lb[16][6] = 2./9.; mg_lb[16][7] = 7./18.; mg_lb[16][8] = 7./18.; mg_lb[16][9] = 7./18.; mg_lb[16][10]= 7./18.; mg_lb[16][11]=-1./9.; mg_lb[16][12]=-1./9.; mg_lb[16][13]=-1./9.; mg_lb[16][14]=-1./9.; mg_lb[16][15]=-1./9.; mg_lb[16][16]=-1./9.; mg_lb[16][17]=-1./9.; mg_lb[16][18]=-1./9.; mg_lb[17][0] = 1./14.; mg_lb[17][1] =-5./14.; mg_lb[17][2] = 1./7.; mg_lb[17][3] =-5./14.; mg_lb[17][4] = 1./7.; mg_lb[17][5] =-3./14.; mg_lb[17][6] =-3./14.; mg_lb[17][7] = 0.; mg_lb[17][8] = 0.; mg_lb[17][9] = 0.; mg_lb[17][10]= 0.; mg_lb[17][11]= 5./14.; mg_lb[17][12]= 5./14.; mg_lb[17][13]= 5./14.; mg_lb[17][14]= 5./14.; mg_lb[17][15]=-1./7.; mg_lb[17][16]=-1./7.; mg_lb[17][17]=-1./7.; mg_lb[17][18]=-1./7.; mg_lb[18][0] = 1./10.; mg_lb[18][1] = 0.; mg_lb[18][2] =-3./10.; mg_lb[18][3] = 0.; mg_lb[18][4] =-3./10.; mg_lb[18][5] =-3./10.; mg_lb[18][6] =-3./10.; mg_lb[18][7] = 0.; mg_lb[18][8] = 0.; mg_lb[18][9] = 0.; mg_lb[18][10]= 0.; mg_lb[18][11]= 0.; mg_lb[18][12]= 0.; mg_lb[18][13]= 0.; mg_lb[18][14]= 0.; mg_lb[18][15]= 3./10.; mg_lb[18][16]= 3./10.; mg_lb[18][17]= 3./10.; mg_lb[18][18]= 3./10.; for(i=0; i<subNbx+3; i++) for(j=0; j<subNby+3; j++) for(k=0; k<subNbz+3; k++){ u_lb[i][j][k][0]=0.0; u_lb[i][j][k][1]=0.0; u_lb[i][j][k][2]=0.0; density_lb[i][j][k] = densityinit; } for(i=0; i<subNbx; i++) for(j=0; j<subNby; j++) for(k=0; k<subNbz; k++) for(m=0; m<19; m++) f_lb[i][j][k][m] = density_lb[i][j][k]/19.0; } //========================================================================== // Initialize the equilibrium distribution functions // (this just uses the initial fluid parameters, and assumes no forces). //========================================================================== void FixLbFluid::initialize_feq(void) { int i,j,k,p; MPI_Request requests[8]; MPI_Status statuses[8]; int numrequests; // If using the standary LB integrator, do not need to send feqn. if(typeLB == 1){ numrequests = 4; }else{ numrequests = 8; } std::fill(&Ff[0][0][0][0],&Ff[0][0][0][0] + (subNbx+3)*(subNby+3)*(subNbz+3)*3,0.0); std::fill(&Fftempx[0][0][0][0],&Fftempx[0][0][0][0] + 5*(subNby+3)*(subNbz+3)*3,0.0); std::fill(&Fftempy[0][0][0][0],&Fftempy[0][0][0][0] + (subNbx+3)*5*(subNbz+3)*3,0.0); std::fill(&Fftempz[0][0][0][0],&Fftempz[0][0][0][0] + (subNbx+3)*(subNby+3)*5*3,0.0); if(readrestart == 0){ step=0; parametercalc_full(); (*this.*equilibriumdist)(1,subNbx-1,1,subNby-1,1,subNbz-1); for(i=0; i<numrequests; i++) requests[i]=MPI_REQUEST_NULL; MPI_Isend(&feq[1][1][1][0],1,passxf,comm->procneigh[0][0],15,world,&requests[0]); MPI_Irecv(&feq[0][1][1][0],1,passxf,comm->procneigh[0][0],25,world,&requests[1]); MPI_Isend(&feq[subNbx-2][1][1][0],1,passxf,comm->procneigh[0][1],25,world,&requests[2]); MPI_Irecv(&feq[subNbx-1][1][1][0],1,passxf,comm->procneigh[0][1],15,world,&requests[3]); if(typeLB == 2){ MPI_Isend(&feqn[1][1][1][0],1,passxf,comm->procneigh[0][0],10,world,&requests[4]); MPI_Irecv(&feqn[0][1][1][0],1,passxf,comm->procneigh[0][0],20,world,&requests[5]); MPI_Isend(&feqn[subNbx-2][1][1][0],1,passxf,comm->procneigh[0][1],20,world,&requests[6]); MPI_Irecv(&feqn[subNbx-1][1][1][0],1,passxf,comm->procneigh[0][1],10,world,&requests[7]); } MPI_Waitall(numrequests,requests,statuses); for(i=0; i<numrequests; i++) requests[i]=MPI_REQUEST_NULL; MPI_Isend(&feq[0][1][1][0],1,passyf,comm->procneigh[1][0],15,world,&requests[0]); MPI_Irecv(&feq[0][0][1][0],1,passyf,comm->procneigh[1][0],25,world,&requests[1]); MPI_Isend(&feq[0][subNby-2][1][0],1,passyf,comm->procneigh[1][1],25,world,&requests[2]); MPI_Irecv(&feq[0][subNby-1][1][0],1,passyf,comm->procneigh[1][1],15,world,&requests[3]); if(typeLB == 2){ MPI_Isend(&feqn[0][1][1][0],1,passyf,comm->procneigh[1][0],10,world,&requests[4]); MPI_Irecv(&feqn[0][0][1][0],1,passyf,comm->procneigh[1][0],20,world,&requests[5]); MPI_Isend(&feqn[0][subNby-2][1][0],1,passyf,comm->procneigh[1][1],20,world,&requests[6]); MPI_Irecv(&feqn[0][subNby-1][1][0],1,passyf,comm->procneigh[1][1],10,world,&requests[7]); } MPI_Waitall(numrequests,requests,statuses); for(i=0; i<numrequests; i++) requests[i]=MPI_REQUEST_NULL; MPI_Isend(&feq[0][0][1][0],1,passzf,comm->procneigh[2][0],15,world,&requests[0]); MPI_Irecv(&feq[0][0][0][0],1,passzf,comm->procneigh[2][0],25,world,&requests[1]); MPI_Isend(&feq[0][0][subNbz-2][0],1,passzf,comm->procneigh[2][1],25,world,&requests[2]); MPI_Irecv(&feq[0][0][subNbz-1][0],1,passzf,comm->procneigh[2][1],15,world,&requests[3]); if(typeLB == 2){ MPI_Isend(&feqn[0][0][1][0],1,passzf,comm->procneigh[2][0],10,world,&requests[4]); MPI_Irecv(&feqn[0][0][0][0],1,passzf,comm->procneigh[2][0],20,world,&requests[5]); MPI_Isend(&feqn[0][0][subNbz-2][0],1,passzf,comm->procneigh[2][1],20,world,&requests[6]); MPI_Irecv(&feqn[0][0][subNbz-1][0],1,passzf,comm->procneigh[2][1],10,world,&requests[7]); } MPI_Waitall(numrequests,requests,statuses); //Save feqold. if(typeLB == 2){ for(i=0; i<subNbx; i++) for(j=0; j<subNby; j++) for(k=0; k<subNbz; k++) for(p=0; p<numvel; p++){ feqold[i][j][k][p] = feq[i][j][k][p]; feqoldn[i][j][k][p] = feqn[i][j][k][p]; } } }else{ step = 1; read_restartfile(); if(typeLB == 2){ for(i=0; i<8; i++) requests[i]=MPI_REQUEST_NULL; MPI_Isend(&feqold[1][1][1][0],1,passxf,comm->procneigh[0][0],15,world,&requests[0]); MPI_Irecv(&feqold[0][1][1][0],1,passxf,comm->procneigh[0][0],25,world,&requests[1]); MPI_Isend(&feqold[subNbx-2][1][1][0],1,passxf,comm->procneigh[0][1],25,world,&requests[2]); MPI_Irecv(&feqold[subNbx-1][1][1][0],1,passxf,comm->procneigh[0][1],15,world,&requests[3]); MPI_Isend(&feqoldn[1][1][1][0],1,passxf,comm->procneigh[0][0],10,world,&requests[4]); MPI_Irecv(&feqoldn[0][1][1][0],1,passxf,comm->procneigh[0][0],20,world,&requests[5]); MPI_Isend(&feqoldn[subNbx-2][1][1][0],1,passxf,comm->procneigh[0][1],20,world,&requests[6]); MPI_Irecv(&feqoldn[subNbx-1][1][1][0],1,passxf,comm->procneigh[0][1],10,world,&requests[7]); MPI_Waitall(8,requests,statuses); for(i=0; i<8; i++) requests[i]=MPI_REQUEST_NULL; MPI_Isend(&feqold[0][1][1][0],1,passyf,comm->procneigh[1][0],15,world,&requests[0]); MPI_Irecv(&feqold[0][0][1][0],1,passyf,comm->procneigh[1][0],25,world,&requests[1]); MPI_Isend(&feqold[0][subNby-2][1][0],1,passyf,comm->procneigh[1][1],25,world,&requests[2]); MPI_Irecv(&feqold[0][subNby-1][1][0],1,passyf,comm->procneigh[1][1],15,world,&requests[3]); MPI_Isend(&feqoldn[0][1][1][0],1,passyf,comm->procneigh[1][0],10,world,&requests[4]); MPI_Irecv(&feqoldn[0][0][1][0],1,passyf,comm->procneigh[1][0],20,world,&requests[5]); MPI_Isend(&feqoldn[0][subNby-2][1][0],1,passyf,comm->procneigh[1][1],20,world,&requests[6]); MPI_Irecv(&feqoldn[0][subNby-1][1][0],1,passyf,comm->procneigh[1][1],10,world,&requests[7]); MPI_Waitall(8,requests,statuses); for(i=0; i<8; i++) requests[i]=MPI_REQUEST_NULL; MPI_Isend(&feqold[0][0][1][0],1,passzf,comm->procneigh[2][0],15,world,&requests[0]); MPI_Irecv(&feqold[0][0][0][0],1,passzf,comm->procneigh[2][0],25,world,&requests[1]); MPI_Isend(&feqold[0][0][subNbz-2][0],1,passzf,comm->procneigh[2][1],25,world,&requests[2]); MPI_Irecv(&feqold[0][0][subNbz-1][0],1,passzf,comm->procneigh[2][1],15,world,&requests[3]); MPI_Isend(&feqoldn[0][0][1][0],1,passzf,comm->procneigh[2][0],10,world,&requests[4]); MPI_Irecv(&feqoldn[0][0][0][0],1,passzf,comm->procneigh[2][0],20,world,&requests[5]); MPI_Isend(&feqoldn[0][0][subNbz-2][0],1,passzf,comm->procneigh[2][1],20,world,&requests[6]); MPI_Irecv(&feqoldn[0][0][subNbz-1][0],1,passzf,comm->procneigh[2][1],10,world,&requests[7]); MPI_Waitall(8,requests,statuses); } parametercalc_full(); } } //========================================================================== // Compute the lattice Boltzmann equilibrium distribution functions for // the D3Q15 model. //========================================================================== void FixLbFluid::equilibriumdist15(int xstart, int xend, int ystart, int yend, int zstart, int zend) { double rho; int i, j, k, l, iup, idwn, jup, jdwn, kup, kdwn; double Fx_w, Fy_w, Fz_w; double total_density(0.0); double drhox, drhoy, drhoz, drhoxx, drhoyy, drhozz; double Pxx, Pyy, Pzz, Pxy, Pxz, Pyz; double grs, p0; double dPdrho; double S[2][3],std; int jj; double etacov[15],ghostnoise; for (i=xstart; i<xend; i++) { iup=i+1; idwn=i-1; for (j=ystart; j<yend; j++) { jup=j+1; jdwn=j-1; for (k=zstart; k<zend; k++) { kup=k+1; kdwn=k-1; rho=density_lb[i][j][k]; total_density += rho; // Derivatives. drhox = (density_lb[iup][j][k] - density_lb[idwn][j][k])/2.0; drhoxx = (density_lb[iup][j][k] - 2.0*density_lb[i][j][k] + density_lb[idwn][j][k]); drhoy = (density_lb[i][jup][k] - density_lb[i][jdwn][k])/2.0; drhoyy = (density_lb[i][jup][k] - 2.0*density_lb[i][j][k] + density_lb[i][jdwn][k]); drhoz = (density_lb[i][j][kup] - density_lb[i][j][kdwn])/2.0; drhozz = (density_lb[i][j][kup] - 2.0*density_lb[i][j][k] + density_lb[i][j][kdwn]); // Need one-sided derivatives for the boundary of the domain, if fixed boundary // conditions are used. if(domain->periodicity[2]==0){ if(comm->myloc[2]==0 && k==1){ drhoz = (-3.0*density_lb[i][j][k] + 4.0*density_lb[i][j][k+1] - density_lb[i][j][k+2])/2.0; drhozz = (-density_lb[i][j][k+3] + 4.0*density_lb[i][j][k+2] - 5.0*density_lb[i][j][k+1] + 2.0*rho); } if(comm->myloc[2]==comm->procgrid[2]-1 && k==subNbz-2){ drhoz = -(-3.0*density_lb[i][j][k] + 4.0*density_lb[i][j][k-1] - density_lb[i][j][k-2])/2.0; drhozz = (-density_lb[i][j][k-3] + 4.0*density_lb[i][j][k-2] - 5.0*density_lb[i][j][k-1] + 2.0*rho); } } grs = drhox*drhox + drhoy*drhoy + drhoz*drhoz; p0 = rho*a_0-kappa_lb*rho*(drhoxx + drhoyy + drhozz); // kappa_lb is the square gradient coeff in the pressure tensor dPdrho = a_0; //assuming here that kappa_lb = 0. if(typeLB==1){ Pxx = p0 + kappa_lb*(drhox*drhox - 0.5*grs)+(tau-0.5)*(1.0/3.0-dPdrho)* (3.0*u_lb[i][j][k][0]*drhox+u_lb[i][j][k][1]*drhoy+u_lb[i][j][k][2]*drhoz); Pyy = p0 + kappa_lb*(drhoy*drhoy - 0.5*grs)+(tau-0.5)*(1.0/3.0-dPdrho)* (u_lb[i][j][k][0]*drhox+3.0*u_lb[i][j][k][1]*drhoy+u_lb[i][j][k][2]*drhoz); Pzz = p0 + kappa_lb*(drhoz*drhoz - 0.5*grs)+(tau-0.5)*(1.0/3.0-dPdrho)* (u_lb[i][j][k][0]*drhox+u_lb[i][j][k][1]*drhoy+3.0*u_lb[i][j][k][2]*drhoz); Pxy = kappa_lb*drhox*drhoy+(tau-0.5)*(1.0/3.0-dPdrho)* (u_lb[i][j][k][0]*drhoy+u_lb[i][j][k][1]*drhox); Pxz = kappa_lb*drhox*drhoz+(tau-0.5)*(1.0/3.0-dPdrho)* (u_lb[i][j][k][0]*drhoz+u_lb[i][j][k][2]*drhox); Pyz = kappa_lb*drhoy*drhoz+(tau-0.5)*(1.0/3.0-dPdrho)* (u_lb[i][j][k][1]*drhoz+u_lb[i][j][k][2]*drhoy); }else if(typeLB==2){ Pxx = p0 + kappa_lb*(drhox*drhox - 0.5*grs)+tau*(1.0/3.0-dPdrho)* (3.0*u_lb[i][j][k][0]*drhox+u_lb[i][j][k][1]*drhoy+u_lb[i][j][k][2]*drhoz); Pyy = p0 + kappa_lb*(drhoy*drhoy - 0.5*grs)+tau*(1.0/3.0-dPdrho)* (u_lb[i][j][k][0]*drhox+3.0*u_lb[i][j][k][1]*drhoy+u_lb[i][j][k][2]*drhoz); Pzz = p0 + kappa_lb*(drhoz*drhoz - 0.5*grs)+tau*(1.0/3.0-dPdrho)* (u_lb[i][j][k][0]*drhox+u_lb[i][j][k][1]*drhoy+3.0*u_lb[i][j][k][2]*drhoz); Pxy = kappa_lb*drhox*drhoy+tau*(1.0/3.0-dPdrho)* (u_lb[i][j][k][0]*drhoy+u_lb[i][j][k][1]*drhox); Pxz = kappa_lb*drhox*drhoz+tau*(1.0/3.0-dPdrho)* (u_lb[i][j][k][0]*drhoz+u_lb[i][j][k][2]*drhox); Pyz = kappa_lb*drhoy*drhoz+tau*(1.0/3.0-dPdrho)* (u_lb[i][j][k][1]*drhoz+u_lb[i][j][k][2]*drhoy); } Fx_w = Ff[i][j][k][0]; Fy_w = Ff[i][j][k][1]; Fz_w = Ff[i][j][k][2]; etacov[0] = rho; etacov[1] = rho*u_lb[i][j][k][0] + Fx_w*tau + rho*bodyforcex*tau; etacov[2] = rho*u_lb[i][j][k][1] + Fy_w*tau + rho*bodyforcey*tau; etacov[3] = rho*u_lb[i][j][k][2] + Fz_w*tau + rho*bodyforcez*tau; etacov[4] = Pxx + rho*u_lb[i][j][k][0]*u_lb[i][j][k][0] -rho/3. + tau*(2.0*u_lb[i][j][k][0]*(Fx_w+rho*bodyforcex)); etacov[5] = Pyy + rho*u_lb[i][j][k][1]*u_lb[i][j][k][1] -rho/3. + tau*(2.0*u_lb[i][j][k][1]*(Fy_w+rho*bodyforcey)); etacov[6] = Pzz + rho*u_lb[i][j][k][2]*u_lb[i][j][k][2] -rho/3. + tau*(2.0*u_lb[i][j][k][2]*(Fz_w+rho*bodyforcez)); etacov[7] = Pxy + rho*u_lb[i][j][k][0]*u_lb[i][j][k][1] + tau*(u_lb[i][j][k][0]*(Fy_w+rho*bodyforcey) + (Fx_w+rho*bodyforcex)*u_lb[i][j][k][1]); etacov[8] = Pyz + rho*u_lb[i][j][k][1]*u_lb[i][j][k][2] + tau*(u_lb[i][j][k][1]*(Fz_w+rho*bodyforcez) + (Fy_w+rho*bodyforcey)*u_lb[i][j][k][2]); etacov[9] = Pxz + rho*u_lb[i][j][k][0]*u_lb[i][j][k][2] + tau*(u_lb[i][j][k][0]*(Fz_w+rho*bodyforcez) + (Fx_w+rho*bodyforcex)*u_lb[i][j][k][2]); etacov[10] = 0.0; etacov[11] = 0.0; etacov[12] = 0.0; etacov[13] = rho*u_lb[i][j][k][0]*u_lb[i][j][k][1]*u_lb[i][j][k][2]; const double TrP = Pxx+Pyy+Pzz; etacov[14] = K_0*(rho-TrP); for (l=0; l<15; l++) { feq[i][j][k][l] = 0.0; for (int ii=0; ii<15; ii++) feq[i][j][k][l] += w_lb[l]*mg_lb[ii][l]*etacov[ii]*Ng_lb[ii]; if(typeLB == 2){ feqn[i][j][k][l] = feq[i][j][k][l]; } } if(noisestress==1){ std = sqrt(namp*rho); for(jj=0; jj<3; jj++) S[0][jj] = std*random->gaussian(); for(jj=0; jj<3; jj++) S[1][jj] = std*random->gaussian(); etacov[4] = (S[0][0]*sqrt(3.0-3.0*a_0)); etacov[5] = ((1.0-3.0*a_0)*S[0][0]/sqrt(3.0-3.0*a_0)+ sqrt((8.0-12.0*a_0)/(3.0-3.0*a_0))*S[0][1]); etacov[6] = ((1.0-3.0*a_0)*S[0][0]/sqrt(3.0-3.0*a_0)+ (2.0-6.0*a_0)*S[0][1]/sqrt((8.0-12.0*a_0)*(3.0-3.0*a_0))+ sqrt((5.0-9.0*a_0)/(2.0-3.0*a_0))*S[0][2]); etacov[7] = S[1][0]; etacov[8] = S[1][1]; etacov[9] = S[1][2]; for (l=10; l<15; l++) { etacov[l] = sqrt(9.0*namp*rho/Ng_lb[l])*random->gaussian(); } etacov[14] += -K_0*(etacov[4]+etacov[5]+etacov[6]); //correction from noise to TrP for (l=0; l<15; l++) { ghostnoise = w_lb[l]* (mg_lb[4][l]*etacov[4]*Ng_lb[4] + mg_lb[5][l]*etacov[5]*Ng_lb[5] + mg_lb[6][l]*etacov[6]*Ng_lb[6] + mg_lb[7][l]*etacov[7]*Ng_lb[7] + mg_lb[8][l]*etacov[8]*Ng_lb[8] + mg_lb[9][l]*etacov[9]*Ng_lb[9] + mg_lb[10][l]*etacov[10]*Ng_lb[10] + mg_lb[11][l]*etacov[11]*Ng_lb[11] + mg_lb[12][l]*etacov[12]*Ng_lb[12] + mg_lb[13][l]*etacov[13]*Ng_lb[13] + mg_lb[14][l]*etacov[14]*Ng_lb[14]); feq[i][j][k][l] += ghostnoise*noisefactor; } } } } } } //========================================================================== // Compute the lattice Boltzmann equilibrium distribution functions for // the D3Q19 model. //========================================================================== void FixLbFluid::equilibriumdist19(int xstart, int xend, int ystart, int yend, int zstart, int zend) { double rho; int i, j, k, l, iup, idwn, jup, jdwn, kup, kdwn; double Fx_w, Fy_w, Fz_w; double total_density(0.0); double drhox, drhoy, drhoz, drhoxx, drhoyy, drhozz; double Pxx, Pyy, Pzz, Pxy, Pxz, Pyz; double grs, p0; double dPdrho; double S[2][3],std; int jj; double etacov[19],ghostnoise; for (i=xstart; i<xend; i++) { iup=i+1; idwn=i-1; for (j=ystart; j<yend; j++) { jup=j+1; jdwn=j-1; for (k=zstart; k<zend; k++) { kup=k+1; kdwn=k-1; rho=density_lb[i][j][k]; total_density += rho; // Derivatives. drhox = (density_lb[iup][j][k] - density_lb[idwn][j][k])/2.0; drhoxx = (density_lb[iup][j][k] - 2.0*density_lb[i][j][k] + density_lb[idwn][j][k]); drhoy = (density_lb[i][jup][k] - density_lb[i][jdwn][k])/2.0; drhoyy = (density_lb[i][jup][k] - 2.0*density_lb[i][j][k] + density_lb[i][jdwn][k]); drhoz = (density_lb[i][j][kup] - density_lb[i][j][kdwn])/2.0; drhozz = (density_lb[i][j][kup] - 2.0*density_lb[i][j][k] + density_lb[i][j][kdwn]); // Need one-sided derivatives for the boundary of the domain, if fixed boundary // conditions are used. if(domain->periodicity[2]==0){ if(comm->myloc[2]==0 && k==1){ drhoz = (-3.0*density_lb[i][j][k] + 4.0*density_lb[i][j][k+1] - density_lb[i][j][k+2])/2.0; drhozz = (-density_lb[i][j][k+3] + 4.0*density_lb[i][j][k+2] - 5.0*density_lb[i][j][k+1] + 2.0*rho); } if(comm->myloc[2]==comm->procgrid[2]-1 && k==subNbz-2){ drhoz = -(-3.0*density_lb[i][j][k] + 4.0*density_lb[i][j][k-1] - density_lb[i][j][k-2])/2.0; drhozz = (-density_lb[i][j][k-3] + 4.0*density_lb[i][j][k-2] - 5.0*density_lb[i][j][k-1] + 2.0*rho); } } grs = drhox*drhox + drhoy*drhoy + drhoz*drhoz; p0 = rho*a_0-kappa_lb*rho*(drhoxx + drhoyy + drhozz); // kappa_lb is the square gradient coeff in the pressure tensor dPdrho = a_0; //assuming here that kappa_lb = 0. if(typeLB==1){ Pxx = p0 + kappa_lb*(drhox*drhox - 0.5*grs)+(tau-0.5)*(1.0/3.0-dPdrho)* (3.0*u_lb[i][j][k][0]*drhox+u_lb[i][j][k][1]*drhoy+u_lb[i][j][k][2]*drhoz); Pyy = p0 + kappa_lb*(drhoy*drhoy - 0.5*grs)+(tau-0.5)*(1.0/3.0-dPdrho)* (u_lb[i][j][k][0]*drhox+3.0*u_lb[i][j][k][1]*drhoy+u_lb[i][j][k][2]*drhoz); Pzz = p0 + kappa_lb*(drhoz*drhoz - 0.5*grs)+(tau-0.5)*(1.0/3.0-dPdrho)* (u_lb[i][j][k][0]*drhox+u_lb[i][j][k][1]*drhoy+3.0*u_lb[i][j][k][2]*drhoz); Pxy = kappa_lb*drhox*drhoy+(tau-0.5)*(1.0/3.0-dPdrho)* (u_lb[i][j][k][0]*drhoy+u_lb[i][j][k][1]*drhox); Pxz = kappa_lb*drhox*drhoz+(tau-0.5)*(1.0/3.0-dPdrho)* (u_lb[i][j][k][0]*drhoz+u_lb[i][j][k][2]*drhox); Pyz = kappa_lb*drhoy*drhoz+(tau-0.5)*(1.0/3.0-dPdrho)* (u_lb[i][j][k][1]*drhoz+u_lb[i][j][k][2]*drhoy); }else if(typeLB==2){ Pxx = p0 + kappa_lb*(drhox*drhox - 0.5*grs)+tau*(1.0/3.0-dPdrho)* (3.0*u_lb[i][j][k][0]*drhox+u_lb[i][j][k][1]*drhoy+u_lb[i][j][k][2]*drhoz); Pyy = p0 + kappa_lb*(drhoy*drhoy - 0.5*grs)+tau*(1.0/3.0-dPdrho)* (u_lb[i][j][k][0]*drhox+3.0*u_lb[i][j][k][1]*drhoy+u_lb[i][j][k][2]*drhoz); Pzz = p0 + kappa_lb*(drhoz*drhoz - 0.5*grs)+tau*(1.0/3.0-dPdrho)* (u_lb[i][j][k][0]*drhox+u_lb[i][j][k][1]*drhoy+3.0*u_lb[i][j][k][2]*drhoz); Pxy = kappa_lb*drhox*drhoy+tau*(1.0/3.0-dPdrho)* (u_lb[i][j][k][0]*drhoy+u_lb[i][j][k][1]*drhox); Pxz = kappa_lb*drhox*drhoz+tau*(1.0/3.0-dPdrho)* (u_lb[i][j][k][0]*drhoz+u_lb[i][j][k][2]*drhox); Pyz = kappa_lb*drhoy*drhoz+tau*(1.0/3.0-dPdrho)* (u_lb[i][j][k][1]*drhoz+u_lb[i][j][k][2]*drhoy); } Fx_w = Ff[i][j][k][0]; Fy_w = Ff[i][j][k][1]; Fz_w = Ff[i][j][k][2]; etacov[0] = rho; etacov[1] = rho*u_lb[i][j][k][0] + Fx_w*tau + rho*bodyforcex*tau; etacov[2] = rho*u_lb[i][j][k][1] + Fy_w*tau + rho*bodyforcey*tau; etacov[3] = rho*u_lb[i][j][k][2] + Fz_w*tau + rho*bodyforcez*tau; etacov[4] = Pxx + rho*u_lb[i][j][k][0]*u_lb[i][j][k][0] -rho/3. + tau*(2.0*u_lb[i][j][k][0]*(Fx_w+rho*bodyforcex)); etacov[5] = Pyy + rho*u_lb[i][j][k][1]*u_lb[i][j][k][1] -rho/3. + tau*(2.0*u_lb[i][j][k][1]*(Fy_w+rho*bodyforcey)); etacov[6] = Pzz + rho*u_lb[i][j][k][2]*u_lb[i][j][k][2] -rho/3. + tau*(2.0*u_lb[i][j][k][2]*(Fz_w+rho*bodyforcez)); etacov[7] = Pxy + rho*u_lb[i][j][k][0]*u_lb[i][j][k][1] + tau*(u_lb[i][j][k][0]*(Fy_w+rho*bodyforcey) + (Fx_w+rho*bodyforcex)*u_lb[i][j][k][1]); etacov[8] = Pxz + rho*u_lb[i][j][k][0]*u_lb[i][j][k][2] + tau*(u_lb[i][j][k][0]*(Fz_w+rho*bodyforcez) + (Fx_w+rho*bodyforcex)*u_lb[i][j][k][2]); etacov[9] = Pyz + rho*u_lb[i][j][k][1]*u_lb[i][j][k][2] + tau*(u_lb[i][j][k][1]*(Fz_w+rho*bodyforcez) + (Fy_w+rho*bodyforcey)*u_lb[i][j][k][2]); etacov[10] = 0.0; etacov[11] = 0.0; etacov[12] = 0.0; etacov[13] = 0.0; etacov[14] = 0.0; etacov[15] = 0.0; etacov[16] = 0.0; etacov[17] = 0.0; etacov[18] = 0.0; for (l=0; l<19; l++) { feq[i][j][k][l] = 0.0; for (int ii=0; ii<19; ii++) feq[i][j][k][l] += w_lb[l]*mg_lb[ii][l]*etacov[ii]*Ng_lb[ii]; if(typeLB == 2){ feqn[i][j][k][l] = feq[i][j][k][l]; } } if(noisestress==1){ std = sqrt(namp*rho); for(jj=0; jj<3; jj++) S[0][jj] = std*random->gaussian(); for(jj=0; jj<3; jj++) S[1][jj] = std*random->gaussian(); etacov[4] = (S[0][0]*sqrt(3.0-3.0*a_0)); etacov[5] = ((1.0-3.0*a_0)*S[0][0]/sqrt(3.0-3.0*a_0)+ sqrt((8.0-12.0*a_0)/(3.0-3.0*a_0))*S[0][1]); etacov[6] = ((1.0-3.0*a_0)*S[0][0]/sqrt(3.0-3.0*a_0)+ (2.0-6.0*a_0)*S[0][1]/sqrt((8.0-12.0*a_0)*(3.0-3.0*a_0))+ sqrt((5.0-9.0*a_0)/(2.0-3.0*a_0))*S[0][2]); etacov[7] = S[1][0]; etacov[8] = S[1][1]; etacov[9] = S[1][2]; for (l=10; l<19; l++) { etacov[l] = sqrt(9.0*namp*rho/Ng_lb[l])*random->gaussian(); } for (l=0; l<19; l++) { ghostnoise = w_lb[l]* (mg_lb[4][l]*etacov[4]*Ng_lb[4] + mg_lb[5][l]*etacov[5]*Ng_lb[5] + mg_lb[6][l]*etacov[6]*Ng_lb[6] + mg_lb[7][l]*etacov[7]*Ng_lb[7] + mg_lb[8][l]*etacov[8]*Ng_lb[8] + mg_lb[9][l]*etacov[9]*Ng_lb[9] + mg_lb[10][l]*etacov[10]*Ng_lb[10] + mg_lb[11][l]*etacov[11]*Ng_lb[11] + mg_lb[12][l]*etacov[12]*Ng_lb[12] + mg_lb[13][l]*etacov[13]*Ng_lb[13] + mg_lb[14][l]*etacov[14]*Ng_lb[14] + mg_lb[15][l]*etacov[15]*Ng_lb[15] + mg_lb[16][l]*etacov[16]*Ng_lb[16] + mg_lb[17][l]*etacov[17]*Ng_lb[17] + mg_lb[18][l]*etacov[18]*Ng_lb[18]); feq[i][j][k][l] += ghostnoise*noisefactor; } } } } } } //========================================================================== // Calculate the fluid density and velocity over the entire simulation // domain. //========================================================================== void FixLbFluid::parametercalc_full(void) { MPI_Request requests[4]; MPI_Status statuses[4]; MPI_Request requests2[12]; MPI_Status statuses2[12]; int numrequests; int i; //-------------------------------------------------------------------------- // send the boundaries of f_lb, as they will be needed later by the update // routine, and use these to calculate the density and velocity on the // boundary. //-------------------------------------------------------------------------- for(i=0; i<4; i++) requests[i]=MPI_REQUEST_NULL; MPI_Isend(&f_lb[1][1][1][0],1,passxf,comm->procneigh[0][0],10,world,&requests[0]); MPI_Irecv(&f_lb[0][1][1][0],1,passxf,comm->procneigh[0][0],20,world,&requests[1]); MPI_Isend(&f_lb[subNbx-2][1][1][0],1,passxf,comm->procneigh[0][1],20,world,&requests[2]); MPI_Irecv(&f_lb[subNbx-1][1][1][0],1,passxf,comm->procneigh[0][1],10,world,&requests[3]); parametercalc_part(1,subNbx-1,1,subNby-1,1,subNbz-1); MPI_Waitall(4,requests,statuses); for(i=0; i<4; i++) requests[i]=MPI_REQUEST_NULL; MPI_Isend(&f_lb[0][1][1][0],1,passyf,comm->procneigh[1][0],10,world,&requests[0]); MPI_Irecv(&f_lb[0][0][1][0],1,passyf,comm->procneigh[1][0],20,world,&requests[1]); MPI_Isend(&f_lb[0][subNby-2][1][0],1,passyf,comm->procneigh[1][1],20,world,&requests[2]); MPI_Irecv(&f_lb[0][subNby-1][1][0],1,passyf,comm->procneigh[1][1],10,world,&requests[3]); parametercalc_part(0,1,1,subNby-1,1,subNbz-1); parametercalc_part(subNbx-1,subNbx,1,subNby-1,1,subNbz-1); MPI_Waitall(4,requests,statuses); for(i=0; i<4; i++) requests[i]=MPI_REQUEST_NULL; MPI_Isend(&f_lb[0][0][1][0],1,passzf,comm->procneigh[2][0],10,world,&requests[0]); MPI_Irecv(&f_lb[0][0][0][0],1,passzf,comm->procneigh[2][0],20,world,&requests[1]); MPI_Isend(&f_lb[0][0][subNbz-2][0],1,passzf,comm->procneigh[2][1],20,world,&requests[2]); MPI_Irecv(&f_lb[0][0][subNbz-1][0],1,passzf,comm->procneigh[2][1],10,world,&requests[3]); parametercalc_part(0,subNbx,0,1,1,subNbz-1); parametercalc_part(0,subNbx,subNby-1,subNby,1,subNbz-1); MPI_Waitall(4,requests,statuses); parametercalc_part(0,subNbx,0,subNby,0,1); parametercalc_part(0,subNbx,0,subNby,subNbz-1,subNbz); //-------------------------------------------------------------------------- // Send the remaining portions of the u array (and density array if Gamma // is set the default way). //-------------------------------------------------------------------------- if(setGamma == 0) numrequests = 12; else numrequests = 6; for(i=0; i<numrequests; i++) requests2[i]=MPI_REQUEST_NULL; MPI_Isend(&u_lb[2][0][0][0],1,passxu,comm->procneigh[0][0],10,world,&requests2[0]); MPI_Isend(&u_lb[3][0][0][0],1,passxu,comm->procneigh[0][0],20,world,&requests2[1]); MPI_Isend(&u_lb[subNbx-3][0][0][0],1,passxu,comm->procneigh[0][1],30,world,&requests2[2]); MPI_Irecv(&u_lb[subNbx][0][0][0],1,passxu,comm->procneigh[0][1],10,world,&requests2[3]); MPI_Irecv(&u_lb[subNbx+1][0][0][0],1,passxu,comm->procneigh[0][1],20,world,&requests2[4]); MPI_Irecv(&u_lb[subNbx+2][0][0][0],1,passxu,comm->procneigh[0][0],30,world,&requests2[5]); if(setGamma==0){ MPI_Isend(&density_lb[2][0][0],1,passxrho,comm->procneigh[0][0],40,world,&requests2[6]); MPI_Isend(&density_lb[3][0][0],1,passxrho,comm->procneigh[0][0],50,world,&requests2[7]); MPI_Isend(&density_lb[subNbx-3][0][0],1,passxrho,comm->procneigh[0][1],60,world,&requests2[8]); MPI_Irecv(&density_lb[subNbx][0][0],1,passxrho,comm->procneigh[0][1],40,world,&requests2[9]); MPI_Irecv(&density_lb[subNbx+1][0][0],1,passxrho,comm->procneigh[0][1],50,world,&requests2[10]); MPI_Irecv(&density_lb[subNbx+2][0][0],1,passxrho,comm->procneigh[0][0],60,world,&requests2[11]); } MPI_Waitall(numrequests,requests2,statuses2); for(i=0; i<numrequests; i++) requests2[i]=MPI_REQUEST_NULL; MPI_Isend(&u_lb[0][2][0][0],1,passyu,comm->procneigh[1][0],10,world,&requests2[0]); MPI_Isend(&u_lb[0][3][0][0],1,passyu,comm->procneigh[1][0],20,world,&requests2[1]); MPI_Isend(&u_lb[0][subNby-3][0][0],1,passyu,comm->procneigh[1][1],30,world,&requests2[2]); MPI_Irecv(&u_lb[0][subNby][0][0],1,passyu,comm->procneigh[1][1],10,world,&requests2[3]); MPI_Irecv(&u_lb[0][subNby+1][0][0],1,passyu,comm->procneigh[1][1],20,world,&requests2[4]); MPI_Irecv(&u_lb[0][subNby+2][0][0],1,passyu,comm->procneigh[1][0],30,world,&requests2[5]); if(setGamma==0){ MPI_Isend(&density_lb[0][2][0],1,passyrho,comm->procneigh[1][0],40,world,&requests2[6]); MPI_Isend(&density_lb[0][3][0],1,passyrho,comm->procneigh[1][0],50,world,&requests2[7]); MPI_Isend(&density_lb[0][subNby-3][0],1,passyrho,comm->procneigh[1][1],60,world,&requests2[8]); MPI_Irecv(&density_lb[0][subNby][0],1,passyrho,comm->procneigh[1][1],40,world,&requests2[9]); MPI_Irecv(&density_lb[0][subNby+1][0],1,passyrho,comm->procneigh[1][1],50,world,&requests2[10]); MPI_Irecv(&density_lb[0][subNby+2][0],1,passyrho,comm->procneigh[1][0],60,world,&requests2[11]); } MPI_Waitall(numrequests,requests2,statuses2); for(i=0; i<12; i++) requests2[i]=MPI_REQUEST_NULL; int requestcount=0; if(domain->periodicity[2]!=0 || comm->myloc[2] != 0){ MPI_Isend(&u_lb[0][0][2][0],1,passzu,comm->procneigh[2][0],10,world,&requests2[requestcount]); MPI_Isend(&u_lb[0][0][3][0],1,passzu,comm->procneigh[2][0],20,world,&requests2[requestcount+1]); MPI_Irecv(&u_lb[0][0][subNbz+2][0],1,passzu,comm->procneigh[2][0],30,world,&requests2[requestcount+2]); requestcount=requestcount+3; if(setGamma==0){ MPI_Isend(&density_lb[0][0][2],1,passzrho,comm->procneigh[2][0],40,world,&requests2[requestcount]); MPI_Isend(&density_lb[0][0][3],1,passzrho,comm->procneigh[2][0],50,world,&requests2[requestcount+1]); MPI_Irecv(&density_lb[0][0][subNbz+2],1,passzrho,comm->procneigh[2][0],60,world,&requests2[requestcount+2]); requestcount=requestcount+3; } } if(domain->periodicity[2]!=0 || comm->myloc[2] != (comm->procgrid[2]-1)){ MPI_Isend(&u_lb[0][0][subNbz-3][0],1,passzu,comm->procneigh[2][1],30,world,&requests2[requestcount]); MPI_Irecv(&u_lb[0][0][subNbz][0],1,passzu,comm->procneigh[2][1],10,world,&requests2[requestcount+1]); MPI_Irecv(&u_lb[0][0][subNbz+1][0],1,passzu,comm->procneigh[2][1],20,world,&requests2[requestcount+2]); requestcount=requestcount+3; if(setGamma==0){ MPI_Isend(&density_lb[0][0][subNbz-3],1,passzrho,comm->procneigh[2][1],60,world,&requests2[requestcount]); MPI_Irecv(&density_lb[0][0][subNbz],1,passzrho,comm->procneigh[2][1],40,world,&requests2[requestcount+1]); MPI_Irecv(&density_lb[0][0][subNbz+1],1,passzrho,comm->procneigh[2][1],50,world,&requests2[requestcount+2]); requestcount=requestcount+3; } } MPI_Waitall(requestcount,requests2,statuses2); } //========================================================================== // Calculate the fluid density and velocity over a simulation volume // specified by xstart,xend; ystart,yend; zstart,zend. //========================================================================== void FixLbFluid::parametercalc_part(int xstart, int xend, int ystart, int yend, int zstart, int zend) { int i,j,k,m; for(i=xstart; i<xend; i++){ for(j=ystart; j<yend; j++){ for(k=zstart; k<zend; k++){ density_lb[i][j][k]=0.0; u_lb[i][j][k][0]=0.0; u_lb[i][j][k][1]=0.0; u_lb[i][j][k][2]=0.0; for (m=0; m<numvel; m++) { density_lb[i][j][k] += f_lb[i][j][k][m]; u_lb[i][j][k][0] += f_lb[i][j][k][m]*e[m][0]; u_lb[i][j][k][1] += f_lb[i][j][k][m]*e[m][1]; u_lb[i][j][k][2] += f_lb[i][j][k][m]*e[m][2]; } //For the on-lattice wall scheme, need to set this velocity to zero. if(domain->periodicity[2]==0){ if(comm->myloc[2]==0){ if(k==1){ u_lb[i][j][k][2]=0.0; } } if(comm->myloc[2]==comm->procgrid[2]-1){ if(k==subNbz-2){ u_lb[i][j][k][2]=0.0; } } } u_lb[i][j][k][0]=u_lb[i][j][k][0]/density_lb[i][j][k]; u_lb[i][j][k][1]=u_lb[i][j][k][1]/density_lb[i][j][k]; u_lb[i][j][k][2]=u_lb[i][j][k][2]/density_lb[i][j][k]; } } } } //========================================================================== // Update the distribution function over a simulation volume specified // by xstart,xend; ystart,yend; zstart,zend. //========================================================================== void FixLbFluid::update_periodic(int xstart, int xend, int ystart, int yend, int zstart, int zend) { int i,j,k,m; int imod,jmod,kmod,imodm,jmodm,kmodm; for(i=xstart; i<xend; i++) for(j=ystart; j<yend; j++) for(k=zstart; k<zend; k++){ if(typeLB==1){ for(m=0; m<numvel; m++){ imod = i-e[m][0]; jmod = j-e[m][1]; kmod = k-e[m][2]; fnew[i][j][k][m] = f_lb[imod][jmod][kmod][m] + (feq[imod][jmod][kmod][m]-f_lb[imod][jmod][kmod][m])/tau; } }else if(typeLB==2){ for(m=0; m<numvel; m++){ imod = i-e[m][0]; jmod = j-e[m][1]; kmod = k-e[m][2]; fnew[i][j][k][m] = feq[imod][jmod][kmod][m] + (f_lb[imod][jmod][kmod][m] - feq[imod][jmod][kmod][m])*expminusdtovertau; } fnew[i][j][k][0]+=Dcoeff*(feq[i][j][k][0]-feqold[i][j][k][0]); for(m=1; m<numvel; m++){ imod = i-e[m][0]; jmod = j-e[m][1]; kmod = k-e[m][2]; imodm = i+e[m][0]; jmodm = j+e[m][1]; kmodm = k+e[m][2]; fnew[i][j][k][m] += Dcoeff*(feq[i][j][k][m] - feqold[imod][jmod][kmod][m]) + (0.5-Dcoeff*(tau+0.5))* (feqn[imodm][jmodm][kmodm][m] - feqoldn[i][j][k][m] - feqn[i][j][k][m] + feqoldn[imod][jmod][kmod][m]); } } } } //========================================================================== // Print the fluid properties to the screen. //========================================================================== void FixLbFluid::streamout(void) { int i,j,k; int istart,jstart,kstart; int iend,jend,kend; int w,iproc; int size,sizeloc; MPI_Request request_send,request_recv; MPI_Status status; //-------------------------------------------------------------------------- // **Uncomment in order to test conservation of mass and momentum. //-------------------------------------------------------------------------- // massloc=0.0; // momentumloc[0]=momentumloc[1]=momentumloc[2]=0.0; // for(i=1; i<subNbx-1; i++){ // for(j=1; j<subNby-1; j++){ // for(k=1; k<subNbz-1; k++){ // massloc += density_lb[i][j][k]; // momentumloc[0] += density_lb[i][j][k]*u_lb[i][j][k][0]; // momentumloc[1] += density_lb[i][j][k]*u_lb[i][j][k][1]; // momentumloc[2] += density_lb[i][j][k]*u_lb[i][j][k][2]; // } // } // } // MPI_Allreduce(&massloc,&mass,1,MPI_DOUBLE,MPI_SUM,world); // MPI_Allreduce(&momentumloc[0],&momentum[0],3,MPI_DOUBLE,MPI_SUM,world); // if(comm->me==0){ // printf("%16.12f %16.12f %16.12f %16.12f\n",mass*dm_lb,momentum[0]*dm_lb*dx_lb/dt_lb,momentum[1]*dm_lb*dx_lb/dt_lb,momentum[2]*dm_lb*dx_lb/dt_lb); // } sizeloc=(subNbx*subNby*subNbz*4); MPI_Allreduce(&sizeloc,&size,1,MPI_INT,MPI_MAX,world); if(me==0){ for(iproc=0; iproc < comm->nprocs; iproc++){ if(iproc){ MPI_Irecv(&buf[0][0][0][0],size,MPI_DOUBLE,iproc,0,world,&request_recv); MPI_Wait(&request_recv,&status); istart=static_cast<int> (buf[0][0][0][0]); jstart=static_cast<int> (buf[0][0][0][1]); kstart=static_cast<int> (buf[0][0][0][2]); iend=static_cast<int> (buf[0][0][1][0]); jend=static_cast<int> (buf[0][0][1][1]); kend=static_cast<int> (buf[0][0][1][2]); for(i=istart; i<iend; i++){ for(j=jstart; j<jend; j++){ for(k=kstart; k<kend; k++){ for(w=0; w<4; w++){ altogether[i][j][k][w]=buf[i-istart+1][j-jstart+1][k-kstart+1][w]; } } } } }else{ for(i=1; i<subNbx-1; i++){ for(j=1; j<subNby-1; j++){ for(k=1; k<subNbz-1; k++){ altogether[i-1][j-1][k-1][0]=density_lb[i][j][k]; altogether[i-1][j-1][k-1][1]=u_lb[i][j][k][0]; altogether[i-1][j-1][k-1][2]=u_lb[i][j][k][1]; altogether[i-1][j-1][k-1][3]=u_lb[i][j][k][2]; } } } } } //i = Nbx/2; //j = Nby/2; for(i=0; i<Nbx; i++) for(j=0; j<Nby; j++) for(k=0; k<Nbz; k++){ printf("%16.12f %16.12f %16.12f %16.12f\n",altogether[i][j][k][0]*dm_lb/dx_lb/dx_lb/dx_lb,altogether[i][j][k][1]*dx_lb/dt_lb,altogether[i][j][k][2]*dx_lb/dt_lb,altogether[i][j][k][3]*dx_lb/dt_lb); } } else { istart=comm->myloc[0]*(subNbx-2); jstart=comm->myloc[1]*(subNby-2); if(domain->periodicity[2]==0){ if(comm->myloc[2]==comm->procgrid[2]-1){ kstart=comm->myloc[2]*(subNbz-3); }else{ kstart=comm->myloc[2]*(subNbz-2); } }else{ kstart=comm->myloc[2]*(subNbz-2); } iend=istart+subNbx-2; jend=jstart+subNby-2; kend=kstart+subNbz-2; for(i=0; i<subNbx; i++){ for(j=0; j<subNby; j++){ for(k=0; k<subNbz; k++){ buf[i][j][k][0]=density_lb[i][j][k]; buf[i][j][k][1]=u_lb[i][j][k][0]; buf[i][j][k][2]=u_lb[i][j][k][1]; buf[i][j][k][3]=u_lb[i][j][k][2]; } } } buf[0][0][0][0]=istart; buf[0][0][0][1]=jstart; buf[0][0][0][2]=kstart; buf[0][0][1][0]=iend; buf[0][0][1][1]=jend; buf[0][0][1][2]=kend; MPI_Isend(&buf[0][0][0][0],size,MPI_DOUBLE,0,0,world,&request_send); MPI_Wait(&request_send,&status); } } //========================================================================== // Update the distribution functions over the entire simulation domain for // the D3Q15 model. //========================================================================== void FixLbFluid::update_full15(void) { MPI_Request req_send15,req_recv15; MPI_Request req_send25,req_recv25; MPI_Request requests[8]; MPI_Status statuses[8]; int numrequests; double tmp1; MPI_Status status; double rb; int i,j,k,m; int imod,jmod,kmod; int imodm,jmodm,kmodm; //-------------------------------------------------------------------------- // If using the standard LB integrator, do not need to send info about feqn. //-------------------------------------------------------------------------- if(typeLB == 1){ numrequests = 4; }else{ numrequests = 8; } //-------------------------------------------------------------------------- // Fixed z boundary conditions. //-------------------------------------------------------------------------- if(domain->periodicity[2]==0){ for(i=0; i<numrequests; i++) requests[i]=MPI_REQUEST_NULL; MPI_Isend(&feq[1][1][1][0],1,passxf,comm->procneigh[0][0],15,world,&requests[0]); MPI_Irecv(&feq[0][1][1][0],1,passxf,comm->procneigh[0][0],25,world,&requests[1]); MPI_Isend(&feq[subNbx-2][1][1][0],1,passxf,comm->procneigh[0][1],25,world,&requests[2]); MPI_Irecv(&feq[subNbx-1][1][1][0],1,passxf,comm->procneigh[0][1],15,world,&requests[3]); if(typeLB == 2){ MPI_Isend(&feqn[1][1][1][0],1,passxf,comm->procneigh[0][0],10,world,&requests[4]); MPI_Irecv(&feqn[0][1][1][0],1,passxf,comm->procneigh[0][0],20,world,&requests[5]); MPI_Isend(&feqn[subNbx-2][1][1][0],1,passxf,comm->procneigh[0][1],20,world,&requests[6]); MPI_Irecv(&feqn[subNbx-1][1][1][0],1,passxf,comm->procneigh[0][1],10,world,&requests[7]); } update_periodic(2,subNbx-2,2,subNby-2,2,subNbz-2); MPI_Waitall(numrequests,requests,statuses); for(i=0; i<numrequests; i++) requests[i]=MPI_REQUEST_NULL; MPI_Isend(&feq[0][1][1][0],1,passyf,comm->procneigh[1][0],15,world,&requests[0]); MPI_Irecv(&feq[0][0][1][0],1,passyf,comm->procneigh[1][0],25,world,&requests[1]); MPI_Isend(&feq[0][subNby-2][1][0],1,passyf,comm->procneigh[1][1],25,world,&requests[2]); MPI_Irecv(&feq[0][subNby-1][1][0],1,passyf,comm->procneigh[1][1],15,world,&requests[3]); if(typeLB == 2){ MPI_Isend(&feqn[0][1][1][0],1,passyf,comm->procneigh[1][0],10,world,&requests[4]); MPI_Irecv(&feqn[0][0][1][0],1,passyf,comm->procneigh[1][0],20,world,&requests[5]); MPI_Isend(&feqn[0][subNby-2][1][0],1,passyf,comm->procneigh[1][1],20,world,&requests[6]); MPI_Irecv(&feqn[0][subNby-1][1][0],1,passyf,comm->procneigh[1][1],10,world,&requests[7]); } update_periodic(1,2,2,subNby-2,2,subNbz-2); update_periodic(subNbx-2,subNbx-1,2,subNby-2,2,subNbz-2); MPI_Waitall(numrequests,requests,statuses); for(i=0; i<numrequests; i++) requests[i]=MPI_REQUEST_NULL; MPI_Isend(&feq[0][0][1][0],1,passzf,comm->procneigh[2][0],15,world,&requests[0]); MPI_Irecv(&feq[0][0][0][0],1,passzf,comm->procneigh[2][0],25,world,&requests[1]); MPI_Isend(&feq[0][0][subNbz-2][0],1,passzf,comm->procneigh[2][1],25,world,&requests[2]); MPI_Irecv(&feq[0][0][subNbz-1][0],1,passzf,comm->procneigh[2][1],15,world,&requests[3]); if(typeLB == 2){ MPI_Isend(&feqn[0][0][1][0],1,passzf,comm->procneigh[2][0],10,world,&requests[4]); MPI_Irecv(&feqn[0][0][0][0],1,passzf,comm->procneigh[2][0],20,world,&requests[5]); MPI_Isend(&feqn[0][0][subNbz-2][0],1,passzf,comm->procneigh[2][1],20,world,&requests[6]); MPI_Irecv(&feqn[0][0][subNbz-1][0],1,passzf,comm->procneigh[2][1],10,world,&requests[7]); } update_periodic(1,subNbx-1,1,2,2,subNbz-2); update_periodic(1,subNbx-1,subNby-2,subNby-1,2,subNbz-2); MPI_Waitall(numrequests,requests,statuses); if(typeLB==1){ update_periodic(1,subNbx-1,1,subNby-1,1,2); update_periodic(1,subNbx-1,1,subNby-1,subNbz-2,subNbz-1); }else if(typeLB==2){ if(comm->myloc[2]==0){ for(i=1; i<subNbx-1; i++){ for(j=1;j<subNby-1;j++){ k=1; for(m=0; m<15; m++){ imod = i-e[m][0]; jmod = j-e[m][1]; kmod = k-e[m][2]; fnew[i][j][k][m] = feq[imod][jmod][kmod][m] + (f_lb[imod][jmod][kmod][m]-feq[imod][jmod][kmod][m])*expminusdtovertau; } for(m=0; m<15; m++){ imod = i-e[m][0]; jmod = j-e[m][1]; kmod = k-e[m][2]; imodm = i+e[m][0]; jmodm = j+e[m][1]; kmodm = k+e[m][2]; if(m==5) fnew[i][j][k][m] += Dcoeff*(feq[imod][jmod][kmod][6] - feqold[imod][jmod][kmod][m]) + (0.5-Dcoeff*(tau+0.5))*(feqoldn[imod][jmod][kmod][m] - feqoldn[imod][jmod][kmod][6] - feqn[imod][jmod][kmod][6]); else if(m==7) fnew[i][j][k][m] += Dcoeff*(feq[imod][jmod][kmod][11] - feqold[imod][jmod][kmod][m]) + (0.5-Dcoeff*(tau+0.5))*(feqoldn[imod][jmod][kmod][m] - feqoldn[imod][jmod][kmod][11] - feqn[imod][jmod][kmod][11]); else if(m==8) fnew[i][j][k][m] += Dcoeff*(feq[imod][jmod][kmod][12] - feqold[imod][jmod][kmod][m]) + (0.5-Dcoeff*(tau+0.5))*(feqoldn[imod][jmod][kmod][m] - feqoldn[imod][jmod][kmod][12] - feqn[imod][jmod][kmod][12]); else if(m==9) fnew[i][j][k][m] += Dcoeff*(feq[imod][jmod][kmod][13] - feqold[imod][jmod][kmod][m]) + (0.5-Dcoeff*(tau+0.5))*(feqoldn[imod][jmod][kmod][m] - feqoldn[imod][jmod][kmod][13] - feqn[imod][jmod][kmod][13]); else if(m==10) fnew[i][j][k][m] += Dcoeff*(feq[imod][jmod][kmod][14] - feqold[imod][jmod][kmod][m]) + (0.5-Dcoeff*(tau+0.5))*(feqoldn[imod][jmod][kmod][m] - feqoldn[imod][jmod][kmod][14] - feqn[imod][jmod][kmod][14]); else if(m==6) fnew[i][j][k][m] += Dcoeff*(feq[i][j][k][m]-feqold[imod][jmod][kmod][m]) + (0.5-Dcoeff*(tau+0.5))*(feqn[i][j][k][5] - feqoldn[i][j][k][m] - feqn[i][j][k][m] + feqoldn[imod][jmod][kmod][m]); else if(m==11) fnew[i][j][k][m] += Dcoeff*(feq[i][j][k][m]-feqold[imod][jmod][kmod][m]) + (0.5-Dcoeff*(tau+0.5))*(feqn[i][j][k][7] - feqoldn[i][j][k][m] - feqn[i][j][k][m] + feqoldn[imod][jmod][kmod][m]); else if(m==12) fnew[i][j][k][m] += Dcoeff*(feq[i][j][k][m]-feqold[imod][jmod][kmod][m]) + (0.5-Dcoeff*(tau+0.5))*(feqn[i][j][k][8] - feqoldn[i][j][k][m] - feqn[i][j][k][m] + feqoldn[imod][jmod][kmod][m]); else if(m==13) fnew[i][j][k][m] += Dcoeff*(feq[i][j][k][m]-feqold[imod][jmod][kmod][m]) + (0.5-Dcoeff*(tau+0.5))*(feqn[i][j][k][9] - feqoldn[i][j][k][m] - feqn[i][j][k][m] + feqoldn[imod][jmod][kmod][m]); else if(m==14) fnew[i][j][k][m] += Dcoeff*(feq[i][j][k][m]-feqold[imod][jmod][kmod][m]) + (0.5-Dcoeff*(tau+0.5))*(feqn[i][j][k][10] - feqoldn[i][j][k][m] - feqn[i][j][k][m] + feqoldn[imod][jmod][kmod][m]); else fnew[i][j][k][m] += Dcoeff*(feq[i][j][k][m] - feqold[imod][jmod][kmod][m]) + (0.5-Dcoeff*(tau+0.5))*(feqn[imodm][jmodm][kmodm][m] - feqoldn[i][j][k][m] - feqn[i][j][k][m] + feqoldn[imod][jmod][kmod][m]); } } } }else{ update_periodic(1,subNbx-1,1,subNby-1,1,2); } if(comm->myloc[2]==comm->procgrid[2]-1){ for(i=1;i<subNbx-1;i++){ for(j=1;j<subNby-1;j++){ k=subNbz-2; for(m=0; m<15; m++){ imod = i-e[m][0]; jmod = j-e[m][1]; kmod = k-e[m][2]; fnew[i][j][k][m] = feq[imod][jmod][kmod][m] + (f_lb[imod][jmod][kmod][m]-feq[imod][jmod][kmod][m])*expminusdtovertau; } for(m=0; m<15; m++){ imod = i-e[m][0]; jmod = j-e[m][1]; kmod = k-e[m][2]; imodm = i+e[m][0]; jmodm = j+e[m][1]; kmodm = k+e[m][2]; if(m==6) fnew[i][j][k][m] += Dcoeff*(feq[imod][jmod][kmod][5] - feqold[imod][jmod][kmod][m]) + (0.5-Dcoeff*(tau+0.5))*(feqoldn[imod][jmod][kmod][m] - feqoldn[imod][jmod][kmod][5] - feqn[imod][jmod][kmod][5]); else if(m==11) fnew[i][j][k][m] += Dcoeff*(feq[imod][jmod][kmod][7] - feqold[imod][jmod][kmod][m]) + (0.5-Dcoeff*(tau+0.5))*(feqoldn[imod][jmod][kmod][m] - feqoldn[imod][jmod][kmod][7] - feqn[imod][jmod][kmod][7]); else if(m==12) fnew[i][j][k][m] += Dcoeff*(feq[imod][jmod][kmod][8] - feqold[imod][jmod][kmod][m]) + (0.5-Dcoeff*(tau+0.5))*(feqoldn[imod][jmod][kmod][m] - feqoldn[imod][jmod][kmod][8] - feqn[imod][jmod][kmod][8]); else if(m==13) fnew[i][j][k][m] += Dcoeff*(feq[imod][jmod][kmod][9] - feqold[imod][jmod][kmod][m]) + (0.5-Dcoeff*(tau+0.5))*(feqoldn[imod][jmod][kmod][m] - feqoldn[imod][jmod][kmod][9] - feqn[imod][jmod][kmod][9]); else if(m==14) fnew[i][j][k][m] += Dcoeff*(feq[imod][jmod][kmod][10] - feqold[imod][jmod][kmod][m]) + (0.5-Dcoeff*(tau+0.5))*(feqoldn[imod][jmod][kmod][m] - feqoldn[imod][jmod][kmod][10] - feqn[imod][jmod][kmod][10]); else if(m==5) fnew[i][j][k][m] += Dcoeff*(feq[i][j][k][m] - feqold[imod][jmod][kmod][m]) + (0.5-Dcoeff*(tau+0.5))*(feqn[i][j][k][6] - feqoldn[i][j][k][m] - feqn[i][j][k][m] + feqoldn[imod][jmod][kmod][m]); else if(m==7) fnew[i][j][k][m] += Dcoeff*(feq[i][j][k][m] - feqold[imod][jmod][kmod][m]) + (0.5-Dcoeff*(tau+0.5))*(feqn[i][j][k][11] - feqoldn[i][j][k][m] - feqn[i][j][k][m] + feqoldn[imod][jmod][kmod][m]); else if(m==8) fnew[i][j][k][m] += Dcoeff*(feq[i][j][k][m] - feqold[imod][jmod][kmod][m]) + (0.5-Dcoeff*(tau+0.5))*(feqn[i][j][k][12] - feqoldn[i][j][k][m] - feqn[i][j][k][m] + feqoldn[imod][jmod][kmod][m]); else if(m==9) fnew[i][j][k][m] += Dcoeff*(feq[i][j][k][m] - feqold[imod][jmod][kmod][m]) + (0.5-Dcoeff*(tau+0.5))*(feqn[i][j][k][13] - feqoldn[i][j][k][m] - feqn[i][j][k][m] + feqoldn[imod][jmod][kmod][m]); else if(m==10) fnew[i][j][k][m] += Dcoeff*(feq[i][j][k][m] - feqold[imod][jmod][kmod][m]) + (0.5-Dcoeff*(tau+0.5))*(feqn[i][j][k][14] - feqoldn[i][j][k][m] - feqn[i][j][k][m] + feqoldn[imod][jmod][kmod][m]); else fnew[i][j][k][m] += Dcoeff*(feq[i][j][k][m] - feqold[imod][jmod][kmod][m]) + (0.5-Dcoeff*(tau+0.5))*(feqn[imodm][jmodm][kmodm][m] - feqoldn[i][j][k][m] - feqn[i][j][k][m] + feqoldn[imod][jmod][kmod][m]); } } } } else{ update_periodic(1,subNbx-1,1,subNby-1,subNbz-2,subNbz-1); } } req_send15=MPI_REQUEST_NULL; req_recv25=MPI_REQUEST_NULL; req_send25=MPI_REQUEST_NULL; req_recv15=MPI_REQUEST_NULL; if(comm->myloc[2]==0){ MPI_Isend(&fnew[0][0][1][0],1,passzf,comm->procneigh[2][0],15,world,&req_send15); MPI_Irecv(&fnew[0][0][0][0],1,passzf,comm->procneigh[2][0],25,world,&req_recv25); } if(comm->myloc[2]==comm->procgrid[2]-1){ MPI_Isend(&fnew[0][0][subNbz-2][0],1,passzf,comm->procneigh[2][1],25,world,&req_send25); MPI_Irecv(&fnew[0][0][subNbz-1][0],1,passzf,comm->procneigh[2][1],15,world,&req_recv15); } if(comm->myloc[2]==0){ MPI_Wait(&req_send15,&status); MPI_Wait(&req_recv25,&status); for(i=1;i<subNbx-1;i++){ for(j=1;j<subNby-1;j++){ k=1; if(typeLB == 1){ fnew[i][j][k][5]=fnew[i][j][k-1][6]; tmp1=fnew[i][j][k-1][11]+fnew[i][j][k-1][12]+fnew[i][j][k-1][13]+fnew[i][j][k-1][14]; } else{ fnew[i][j][k][5]=fnew[i][j][k-1][6] + (0.5-Dcoeff*(tau+0.5))*feqn[i][j][k+1][5]; tmp1=fnew[i][j][k-1][11]+fnew[i][j][k-1][12]+fnew[i][j][k-1][13]+fnew[i][j][k-1][14] + (0.5-Dcoeff*(tau+0.5))*(feqn[i-1][j-1][k+1][7] + feqn[i+1][j-1][k+1][8] + feqn[i+1][j+1][k+1][9] + feqn[i-1][j+1][k+1][10]); } fnew[i][j][k][7]=-0.25*(fnew[i][j][k][1]+fnew[i][j][k][2]-fnew[i][j][k][3]- fnew[i][j][k][4]+2.0*fnew[i][j][k][11]-2.0*fnew[i][j][k][13]-tmp1); fnew[i][j][k][8]=0.25*(fnew[i][j][k][1]-fnew[i][j][k][2]-fnew[i][j][k][3]+ fnew[i][j][k][4]+2.0*fnew[i][j][k][14]-2.0*fnew[i][j][k][12]+tmp1); fnew[i][j][k][9]=0.25*(fnew[i][j][k][1]+fnew[i][j][k][2]-fnew[i][j][k][3]- fnew[i][j][k][4]+2.0*fnew[i][j][k][11]-2.0*fnew[i][j][k][13]+tmp1); fnew[i][j][k][10]=-0.25*(fnew[i][j][k][1]-fnew[i][j][k][2]-fnew[i][j][k][3]+ fnew[i][j][k][4]+2.0*fnew[i][j][k][14]-2.0*fnew[i][j][k][12]-tmp1); rb=fnew[i][j][k][0]+fnew[i][j][k][1]+fnew[i][j][k][2]+fnew[i][j][k][3]+fnew[i][j][k][4]+ fnew[i][j][k][5]+fnew[i][j][k][6]+tmp1+fnew[i][j][k][11]+fnew[i][j][k][12]+ fnew[i][j][k][13]+fnew[i][j][k][14]; fnew[i][j][k][7] += 0.25*rb*vwbt; fnew[i][j][k][8] += 0.25*rb*vwbt; fnew[i][j][k][9] += -0.25*rb*vwbt; fnew[i][j][k][10] += -0.25*rb*vwbt; } } } if(comm->myloc[2]==comm->procgrid[2]-1){ MPI_Wait(&req_send25,&status); MPI_Wait(&req_recv15,&status); for(i=1;i<subNbx-1;i++){ for(j=1;j<subNby-1;j++){ k=subNbz-2; if(typeLB == 1){ fnew[i][j][k][6]=fnew[i][j][k+1][5]; tmp1=fnew[i][j][k+1][7]+fnew[i][j][k+1][8]+fnew[i][j][k+1][9]+fnew[i][j][k+1][10]; } else{ fnew[i][j][k][6]=fnew[i][j][k+1][5] + (0.5-Dcoeff*(tau+0.5))*feqn[i][j][k-1][6]; tmp1=fnew[i][j][k+1][7]+fnew[i][j][k+1][8]+fnew[i][j][k+1][9]+fnew[i][j][k+1][10] + (0.5-Dcoeff*(tau+0.5))*(feqn[i-1][j-1][k-1][11] + feqn[i+1][j-1][k-1][12] + feqn[i+1][j+1][k-1][13] + feqn[i-1][j+1][k-1][14]); } fnew[i][j][k][11]=-0.25*(fnew[i][j][k][1]+fnew[i][j][k][2]-fnew[i][j][k][3]- fnew[i][j][k][4]+2.0*fnew[i][j][k][7]-2.0*fnew[i][j][k][9]-tmp1); fnew[i][j][k][12]=0.25*(fnew[i][j][k][1]-fnew[i][j][k][2]-fnew[i][j][k][3]+ fnew[i][j][k][4]-2.0*fnew[i][j][k][8]+2.0*fnew[i][j][k][10]+tmp1); fnew[i][j][k][13]=0.25*(fnew[i][j][k][1]+fnew[i][j][k][2]-fnew[i][j][k][3]- fnew[i][j][k][4]+2.0*fnew[i][j][k][7]-2.0*fnew[i][j][k][9]+tmp1); fnew[i][j][k][14]=-0.25*(fnew[i][j][k][1]-fnew[i][j][k][2]-fnew[i][j][k][3]+ fnew[i][j][k][4]-2.0*fnew[i][j][k][8]+2.0*fnew[i][j][k][10]-tmp1); rb=fnew[i][j][k][0]+fnew[i][j][k][1]+fnew[i][j][k][2]+fnew[i][j][k][3]+fnew[i][j][k][4]+ fnew[i][j][k][5]+fnew[i][j][k][6]+fnew[i][j][k][7]+fnew[i][j][k][8]+fnew[i][j][k][9]+ fnew[i][j][k][10]+tmp1; fnew[i][j][k][11] += 0.25*rb*vwtp; fnew[i][j][k][12] += 0.25*rb*vwtp; fnew[i][j][k][13] += -0.25*rb*vwtp; fnew[i][j][k][14] += -0.25*rb*vwtp; } } } //-------------------------------------------------------------------------- // Periodic z boundary conditions. //-------------------------------------------------------------------------- }else { for(i=0; i<numrequests; i++) requests[i]=MPI_REQUEST_NULL; MPI_Isend(&feq[1][1][1][0],1,passxf,comm->procneigh[0][0],15,world,&requests[0]); MPI_Irecv(&feq[0][1][1][0],1,passxf,comm->procneigh[0][0],25,world,&requests[1]); MPI_Isend(&feq[subNbx-2][1][1][0],1,passxf,comm->procneigh[0][1],25,world,&requests[2]); MPI_Irecv(&feq[subNbx-1][1][1][0],1,passxf,comm->procneigh[0][1],15,world,&requests[3]); if(typeLB == 2){ MPI_Isend(&feqn[1][1][1][0],1,passxf,comm->procneigh[0][0],10,world,&requests[4]); MPI_Irecv(&feqn[0][1][1][0],1,passxf,comm->procneigh[0][0],20,world,&requests[5]); MPI_Isend(&feqn[subNbx-2][1][1][0],1,passxf,comm->procneigh[0][1],20,world,&requests[6]); MPI_Irecv(&feqn[subNbx-1][1][1][0],1,passxf,comm->procneigh[0][1],10,world,&requests[7]); } update_periodic(2,subNbx-2,2,subNby-2,2,subNbz-2); MPI_Waitall(numrequests,requests,statuses); for(i=0; i<numrequests; i++) requests[i]=MPI_REQUEST_NULL; MPI_Isend(&feq[0][1][1][0],1,passyf,comm->procneigh[1][0],15,world,&requests[0]); MPI_Irecv(&feq[0][0][1][0],1,passyf,comm->procneigh[1][0],25,world,&requests[1]); MPI_Isend(&feq[0][subNby-2][1][0],1,passyf,comm->procneigh[1][1],25,world,&requests[2]); MPI_Irecv(&feq[0][subNby-1][1][0],1,passyf,comm->procneigh[1][1],15,world,&requests[3]); if(typeLB == 2){ MPI_Isend(&feqn[0][1][1][0],1,passyf,comm->procneigh[1][0],10,world,&requests[4]); MPI_Irecv(&feqn[0][0][1][0],1,passyf,comm->procneigh[1][0],20,world,&requests[5]); MPI_Isend(&feqn[0][subNby-2][1][0],1,passyf,comm->procneigh[1][1],20,world,&requests[6]); MPI_Irecv(&feqn[0][subNby-1][1][0],1,passyf,comm->procneigh[1][1],10,world,&requests[7]); } update_periodic(1,2,2,subNby-2,2,subNbz-2); update_periodic(subNbx-2,subNbx-1,2,subNby-2,2,subNbz-2); MPI_Waitall(numrequests,requests,statuses); for(i=0; i<numrequests; i++) requests[i]=MPI_REQUEST_NULL; MPI_Isend(&feq[0][0][1][0],1,passzf,comm->procneigh[2][0],15,world,&requests[0]); MPI_Irecv(&feq[0][0][0][0],1,passzf,comm->procneigh[2][0],25,world,&requests[1]); MPI_Isend(&feq[0][0][subNbz-2][0],1,passzf,comm->procneigh[2][1],25,world,&requests[2]); MPI_Irecv(&feq[0][0][subNbz-1][0],1,passzf,comm->procneigh[2][1],15,world,&requests[3]); if(typeLB == 2){ MPI_Isend(&feqn[0][0][1][0],1,passzf,comm->procneigh[2][0],10,world,&requests[4]); MPI_Irecv(&feqn[0][0][0][0],1,passzf,comm->procneigh[2][0],20,world,&requests[5]); MPI_Isend(&feqn[0][0][subNbz-2][0],1,passzf,comm->procneigh[2][1],20,world,&requests[6]); MPI_Irecv(&feqn[0][0][subNbz-1][0],1,passzf,comm->procneigh[2][1],10,world,&requests[7]); } update_periodic(1,subNbx-1,1,2,2,subNbz-2); update_periodic(1,subNbx-1,subNby-2,subNby-1,2,subNbz-2); MPI_Waitall(numrequests,requests,statuses); update_periodic(1,subNbx-1,1,subNby-1,1,2); update_periodic(1,subNbx-1,1,subNby-1,subNbz-2,subNbz-1); } } //========================================================================== // Update the distribution functions over the entire simulation domain for // the D3Q19 model. //========================================================================== void FixLbFluid::update_full19(void) { MPI_Request req_send15,req_recv15; MPI_Request req_send25,req_recv25; MPI_Request requests[8]; MPI_Status statuses[8]; int numrequests; double tmp1,tmp2,tmp3; MPI_Status status; double rb; int i,j,k,m; int imod,jmod,kmod; int imodm,jmodm,kmodm; //-------------------------------------------------------------------------- // If using the standard LB integrator, do not need to send info about feqn. //-------------------------------------------------------------------------- if(typeLB == 1){ numrequests = 4; }else{ numrequests = 8; } //-------------------------------------------------------------------------- // Fixed z boundary conditions. //-------------------------------------------------------------------------- if(domain->periodicity[2]==0){ for(i=0; i<numrequests; i++) requests[i]=MPI_REQUEST_NULL; MPI_Isend(&feq[1][1][1][0],1,passxf,comm->procneigh[0][0],15,world,&requests[0]); MPI_Irecv(&feq[0][1][1][0],1,passxf,comm->procneigh[0][0],25,world,&requests[1]); MPI_Isend(&feq[subNbx-2][1][1][0],1,passxf,comm->procneigh[0][1],25,world,&requests[2]); MPI_Irecv(&feq[subNbx-1][1][1][0],1,passxf,comm->procneigh[0][1],15,world,&requests[3]); if(typeLB == 2){ MPI_Isend(&feqn[1][1][1][0],1,passxf,comm->procneigh[0][0],10,world,&requests[4]); MPI_Irecv(&feqn[0][1][1][0],1,passxf,comm->procneigh[0][0],20,world,&requests[5]); MPI_Isend(&feqn[subNbx-2][1][1][0],1,passxf,comm->procneigh[0][1],20,world,&requests[6]); MPI_Irecv(&feqn[subNbx-1][1][1][0],1,passxf,comm->procneigh[0][1],10,world,&requests[7]); } update_periodic(2,subNbx-2,2,subNby-2,2,subNbz-2); MPI_Waitall(numrequests,requests,statuses); for(i=0; i<numrequests; i++) requests[i]=MPI_REQUEST_NULL; MPI_Isend(&feq[0][1][1][0],1,passyf,comm->procneigh[1][0],15,world,&requests[0]); MPI_Irecv(&feq[0][0][1][0],1,passyf,comm->procneigh[1][0],25,world,&requests[1]); MPI_Isend(&feq[0][subNby-2][1][0],1,passyf,comm->procneigh[1][1],25,world,&requests[2]); MPI_Irecv(&feq[0][subNby-1][1][0],1,passyf,comm->procneigh[1][1],15,world,&requests[3]); if(typeLB == 2){ MPI_Isend(&feqn[0][1][1][0],1,passyf,comm->procneigh[1][0],10,world,&requests[4]); MPI_Irecv(&feqn[0][0][1][0],1,passyf,comm->procneigh[1][0],20,world,&requests[5]); MPI_Isend(&feqn[0][subNby-2][1][0],1,passyf,comm->procneigh[1][1],20,world,&requests[6]); MPI_Irecv(&feqn[0][subNby-1][1][0],1,passyf,comm->procneigh[1][1],10,world,&requests[7]); } update_periodic(1,2,2,subNby-2,2,subNbz-2); update_periodic(subNbx-2,subNbx-1,2,subNby-2,2,subNbz-2); MPI_Waitall(numrequests,requests,statuses); for(i=0; i<numrequests; i++) requests[i]=MPI_REQUEST_NULL; MPI_Isend(&feq[0][0][1][0],1,passzf,comm->procneigh[2][0],15,world,&requests[0]); MPI_Irecv(&feq[0][0][0][0],1,passzf,comm->procneigh[2][0],25,world,&requests[1]); MPI_Isend(&feq[0][0][subNbz-2][0],1,passzf,comm->procneigh[2][1],25,world,&requests[2]); MPI_Irecv(&feq[0][0][subNbz-1][0],1,passzf,comm->procneigh[2][1],15,world,&requests[3]); if(typeLB == 2){ MPI_Isend(&feqn[0][0][1][0],1,passzf,comm->procneigh[2][0],10,world,&requests[4]); MPI_Irecv(&feqn[0][0][0][0],1,passzf,comm->procneigh[2][0],20,world,&requests[5]); MPI_Isend(&feqn[0][0][subNbz-2][0],1,passzf,comm->procneigh[2][1],20,world,&requests[6]); MPI_Irecv(&feqn[0][0][subNbz-1][0],1,passzf,comm->procneigh[2][1],10,world,&requests[7]); } update_periodic(1,subNbx-1,1,2,2,subNbz-2); update_periodic(1,subNbx-1,subNby-2,subNby-1,2,subNbz-2); MPI_Waitall(numrequests,requests,statuses); if(typeLB==1){ update_periodic(1,subNbx-1,1,subNby-1,1,2); update_periodic(1,subNbx-1,1,subNby-1,subNbz-2,subNbz-1); }else if(typeLB==2){ if(comm->myloc[2]==0){ for(i=1; i<subNbx-1; i++){ for(j=1;j<subNby-1;j++){ k=1; for(m=0; m<19; m++){ imod = i-e[m][0]; jmod = j-e[m][1]; kmod = k-e[m][2]; fnew[i][j][k][m] = feq[imod][jmod][kmod][m] + (f_lb[imod][jmod][kmod][m]-feq[imod][jmod][kmod][m])*expminusdtovertau; } for(m=0; m<19; m++){ imod = i-e[m][0]; jmod = j-e[m][1]; kmod = k-e[m][2]; imodm = i+e[m][0]; jmodm = j+e[m][1]; kmodm = k+e[m][2]; if(m==5) fnew[i][j][k][m] += Dcoeff*(feq[imod][jmod][kmod][6] - feqold[imod][jmod][kmod][m]) + (0.5-Dcoeff*(tau+0.5))*(feqoldn[imod][jmod][kmod][m] - feqoldn[imod][jmod][kmod][6] - feqn[imod][jmod][kmod][6]); else if(m==11) fnew[i][j][k][m] += Dcoeff*(feq[imod][jmod][kmod][12] - feqold[imod][jmod][kmod][m]) + (0.5-Dcoeff*(tau+0.5))*(feqoldn[imod][jmod][kmod][m] - feqoldn[imod][jmod][kmod][12] - feqn[imod][jmod][kmod][12]); else if(m==13) fnew[i][j][k][m] += Dcoeff*(feq[imod][jmod][kmod][14] - feqold[imod][jmod][kmod][m]) + (0.5-Dcoeff*(tau+0.5))*(feqoldn[imod][jmod][kmod][m] - feqoldn[imod][jmod][kmod][14] - feqn[imod][jmod][kmod][14]); else if(m==15) fnew[i][j][k][m] += Dcoeff*(feq[imod][jmod][kmod][16] - feqold[imod][jmod][kmod][m]) + (0.5-Dcoeff*(tau+0.5))*(feqoldn[imod][jmod][kmod][m] - feqoldn[imod][jmod][kmod][16] - feqn[imod][jmod][kmod][16]); else if(m==17) fnew[i][j][k][m] += Dcoeff*(feq[imod][jmod][kmod][18] - feqold[imod][jmod][kmod][m]) + (0.5-Dcoeff*(tau+0.5))*(feqoldn[imod][jmod][kmod][m] - feqoldn[imod][jmod][kmod][18] - feqn[imod][jmod][kmod][18]); else if(m==6) fnew[i][j][k][m] += Dcoeff*(feq[i][j][k][m] - feqold[imod][jmod][kmod][m]) + (0.5-Dcoeff*(tau+0.5))*(feqn[i][j][k][5] - feqoldn[i][j][k][m] - feqn[i][j][k][m] + feqoldn[imod][jmod][kmod][m]); else if(m==12) fnew[i][j][k][m] += Dcoeff*(feq[i][j][k][m] - feqold[imod][jmod][kmod][m]) + (0.5-Dcoeff*(tau+0.5))*(feqn[i][j][k][11] - feqoldn[i][j][k][m] - feqn[i][j][k][m] + feqoldn[imod][jmod][kmod][m]); else if(m==14) fnew[i][j][k][m] += Dcoeff*(feq[i][j][k][m] - feqold[imod][jmod][kmod][m]) + (0.5-Dcoeff*(tau+0.5))*(feqn[i][j][k][13] - feqoldn[i][j][k][m] - feqn[i][j][k][m] + feqoldn[imod][jmod][kmod][m]); else if(m==16) fnew[i][j][k][m] += Dcoeff*(feq[i][j][k][m] - feqold[imod][jmod][kmod][m]) + (0.5-Dcoeff*(tau+0.5))*(feqn[i][j][k][15] - feqoldn[i][j][k][m] - feqn[i][j][k][m] + feqoldn[imod][jmod][kmod][m]); else if(m==18) fnew[i][j][k][m] += Dcoeff*(feq[i][j][k][m] - feqold[imod][jmod][kmod][m]) + (0.5-Dcoeff*(tau+0.5))*(feqn[i][j][k][17] - feqoldn[i][j][k][m] - feqn[i][j][k][m] + feqoldn[imod][jmod][kmod][m]); else fnew[i][j][k][m] += Dcoeff*(feq[i][j][k][m] - feqold[imod][jmod][kmod][m]) + (0.5-Dcoeff*(tau+0.5))*(feqn[imodm][jmodm][kmodm][m] - feqoldn[i][j][k][m] - feqn[i][j][k][m] + feqoldn[imod][jmod][kmod][m]); } } } }else{ update_periodic(1,subNbx-1,1,subNby-1,1,2); } if(comm->myloc[2]==comm->procgrid[2]-1){ for(i=1;i<subNbx-1;i++){ for(j=1;j<subNby-1;j++){ k=subNbz-2; for(m=0; m<19; m++){ imod = i-e[m][0]; jmod = j-e[m][1]; kmod = k-e[m][2]; fnew[i][j][k][m] = feq[imod][jmod][kmod][m] + (f_lb[imod][jmod][kmod][m]-feq[imod][jmod][kmod][m])*expminusdtovertau; } for(m=0; m<19; m++){ imod = i-e[m][0]; jmod = j-e[m][1]; kmod = k-e[m][2]; imodm = i+e[m][0]; jmodm = j+e[m][1]; kmodm = k+e[m][2]; if(m==6) fnew[i][j][k][m] += Dcoeff*(feq[imod][jmod][kmod][5] - feqold[imod][jmod][kmod][m]) + (0.5-Dcoeff*(tau+0.5))*(feqoldn[imod][jmod][kmod][m] - feqoldn[imod][jmod][kmod][5] - feqn[imod][jmod][kmod][5]); else if(m==12) fnew[i][j][k][m] += Dcoeff*(feq[imod][jmod][kmod][11] - feqold[imod][jmod][kmod][m]) + (0.5-Dcoeff*(tau+0.5))*(feqoldn[imod][jmod][kmod][m] - feqoldn[imod][jmod][kmod][11] - feqn[imod][jmod][kmod][11]); else if(m==14) fnew[i][j][k][m] += Dcoeff*(feq[imod][jmod][kmod][13] - feqold[imod][jmod][kmod][m]) + (0.5-Dcoeff*(tau+0.5))*(feqoldn[imod][jmod][kmod][m] - feqoldn[imod][jmod][kmod][13] - feqn[imod][jmod][kmod][13]); else if(m==16) fnew[i][j][k][m] += Dcoeff*(feq[imod][jmod][kmod][15] - feqold[imod][jmod][kmod][m]) + (0.5-Dcoeff*(tau+0.5))*(feqoldn[imod][jmod][kmod][m] - feqoldn[imod][jmod][kmod][15] - feqn[imod][jmod][kmod][15]); else if(m==18) fnew[i][j][k][m] += Dcoeff*(feq[imod][jmod][kmod][17] - feqold[imod][jmod][kmod][m]) + (0.5-Dcoeff*(tau+0.5))*(feqoldn[imod][jmod][kmod][m] - feqoldn[imod][jmod][kmod][17] - feqn[imod][jmod][kmod][17]); else if(m==5) fnew[i][j][k][m] += Dcoeff*(feq[i][j][k][m] - feqold[imod][jmod][kmod][m]) + (0.5-Dcoeff*(tau+0.5))*(feqn[i][j][k][6] - feqoldn[i][j][k][m] - feqn[i][j][k][m] + feqoldn[imod][jmod][kmod][m]); else if(m==11) fnew[i][j][k][m] += Dcoeff*(feq[i][j][k][m] - feqold[imod][jmod][kmod][m]) + (0.5-Dcoeff*(tau+0.5))*(feqn[i][j][k][12] - feqoldn[i][j][k][m] - feqn[i][j][k][m] + feqoldn[imod][jmod][kmod][m]); else if(m==13) fnew[i][j][k][m] += Dcoeff*(feq[i][j][k][m] - feqold[imod][jmod][kmod][m]) + (0.5-Dcoeff*(tau+0.5))*(feqn[i][j][k][14] - feqoldn[i][j][k][m] - feqn[i][j][k][m] + feqoldn[imod][jmod][kmod][m]); else if(m==15) fnew[i][j][k][m] += Dcoeff*(feq[i][j][k][m] - feqold[imod][jmod][kmod][m]) + (0.5-Dcoeff*(tau+0.5))*(feqn[i][j][k][16] - feqoldn[i][j][k][m] - feqn[i][j][k][m] + feqoldn[imod][jmod][kmod][m]); else if(m==17) fnew[i][j][k][m] += Dcoeff*(feq[i][j][k][m] - feqold[imod][jmod][kmod][m]) + (0.5-Dcoeff*(tau+0.5))*(feqn[i][j][k][18] - feqoldn[i][j][k][m] - feqn[i][j][k][m] + feqoldn[imod][jmod][kmod][m]); else fnew[i][j][k][m] += Dcoeff*(feq[i][j][k][m] - feqold[imod][jmod][kmod][m]) + (0.5-Dcoeff*(tau+0.5))*(feqn[imodm][jmodm][kmodm][m] - feqoldn[i][j][k][m] - feqn[i][j][k][m] + feqoldn[imod][jmod][kmod][m]); } } } } else{ update_periodic(1,subNbx-1,1,subNby-1,subNbz-2,subNbz-1); } } req_send15=MPI_REQUEST_NULL; req_recv25=MPI_REQUEST_NULL; req_send25=MPI_REQUEST_NULL; req_recv15=MPI_REQUEST_NULL; if(comm->myloc[2]==0){ MPI_Isend(&fnew[0][0][1][0],1,passzf,comm->procneigh[2][0],15,world,&req_send15); MPI_Irecv(&fnew[0][0][0][0],1,passzf,comm->procneigh[2][0],25,world,&req_recv25); } if(comm->myloc[2]==comm->procgrid[2]-1){ MPI_Isend(&fnew[0][0][subNbz-2][0],1,passzf,comm->procneigh[2][1],25,world,&req_send25); MPI_Irecv(&fnew[0][0][subNbz-1][0],1,passzf,comm->procneigh[2][1],15,world,&req_recv15); } if(comm->myloc[2]==0){ MPI_Wait(&req_send15,&status); MPI_Wait(&req_recv25,&status); for(i=1;i<subNbx-1;i++){ for(j=1;j<subNby-1;j++){ k=1; if(typeLB == 1){ fnew[i][j][k][5]=fnew[i][j][k-1][6]; tmp1=fnew[i][j][k-1][12]+fnew[i][j][k-1][14]+fnew[i][j][k-1][16]+fnew[i][j][k-1][18]; } else{ fnew[i][j][k][5]=fnew[i][j][k-1][6] + (0.5-Dcoeff*(tau+0.5))*feqn[i][j][k+1][5]; tmp1=fnew[i][j][k-1][12]+fnew[i][j][k-1][14]+fnew[i][j][k-1][16]+fnew[i][j][k-1][18] + (0.5-Dcoeff*(tau+0.5))*(feqn[i-1][j][k+1][11] + feqn[i+1][j][k+1][13] + feqn[i][j-1][k+1][15] + feqn[i][j+1][k+1][17]); } tmp2=fnew[i][j][k][3]+fnew[i][j][k][9]+fnew[i][j][k][10]+fnew[i][j][k][14]- fnew[i][j][k][1]-fnew[i][j][k][7]-fnew[i][j][k][8]-fnew[i][j][k][12]; rb=fnew[i][j][k][0]+fnew[i][j][k][1]+fnew[i][j][k][2]+fnew[i][j][k][3]+fnew[i][j][k][4]+ fnew[i][j][k][5]+fnew[i][j][k][6]+fnew[i][j][k][7]+fnew[i][j][k][8]+fnew[i][j][k][9]+ fnew[i][j][k][10]+fnew[i][j][k][12]+fnew[i][j][k][14]+fnew[i][j][k][16]+fnew[i][j][k][18]+tmp1; tmp3=rb*vwbt-fnew[i][j][k][2]+fnew[i][j][k][4]-fnew[i][j][k][7]+fnew[i][j][k][8]-fnew[i][j][k][9]+ fnew[i][j][k][10]-fnew[i][j][k][16]+fnew[i][j][k][18]; fnew[i][j][k][11] = 0.25*(tmp1+2.0*tmp2); fnew[i][j][k][13] = 0.25*(tmp1-2.0*tmp2); fnew[i][j][k][15] = 0.25*(tmp1+2.0*tmp3); fnew[i][j][k][17] = 0.25*(tmp1-2.0*tmp3); } } } if(comm->myloc[2]==comm->procgrid[2]-1){ MPI_Wait(&req_send25,&status); MPI_Wait(&req_recv15,&status); for(i=1;i<subNbx-1;i++){ for(j=1;j<subNby-1;j++){ k=subNbz-2; if(typeLB == 1){ fnew[i][j][k][6]=fnew[i][j][k+1][5]; tmp1=fnew[i][j][k+1][11]+fnew[i][j][k+1][13]+fnew[i][j][k+1][15]+fnew[i][j][k+1][17]; } else{ fnew[i][j][k][6]=fnew[i][j][k+1][5] + (0.5-Dcoeff*(tau+0.5))*feqn[i][j][k-1][6]; tmp1=fnew[i][j][k+1][11]+fnew[i][j][k+1][13]+fnew[i][j][k+1][15]+fnew[i][j][k+1][17] + (0.5-Dcoeff*(tau+0.5))*(feqn[i-1][j][k-1][12] + feqn[i+1][j][k-1][14] + feqn[i][j-1][k-1][16] + feqn[i][j+1][k-1][18]); } tmp2=fnew[i][j][k][3]+fnew[i][j][k][9]+fnew[i][j][k][10]+fnew[i][j][k][13]-fnew[i][j][k][1]- fnew[i][j][k][7]-fnew[i][j][k][8]-fnew[i][j][k][11]; rb=fnew[i][j][k][0]+fnew[i][j][k][1]+fnew[i][j][k][2]+fnew[i][j][k][3]+fnew[i][j][k][4]+ fnew[i][j][k][5]+fnew[i][j][k][6]+fnew[i][j][k][7]+fnew[i][j][k][8]+fnew[i][j][k][9]+ fnew[i][j][k][10]+fnew[i][j][k][11]+fnew[i][j][k][13]+fnew[i][j][k][15]+fnew[i][j][k][17]+tmp1; tmp3=rb*vwtp-fnew[i][j][k][2]+fnew[i][j][k][4]-fnew[i][j][k][7]+fnew[i][j][k][8]-fnew[i][j][k][9]+ fnew[i][j][k][10]-fnew[i][j][k][15]+fnew[i][j][k][17]; fnew[i][j][k][12] = 0.25*(tmp1+2.0*tmp2); fnew[i][j][k][14] = 0.25*(tmp1-2.0*tmp2); fnew[i][j][k][16] = 0.25*(tmp1+2.0*tmp3); fnew[i][j][k][18] = 0.25*(tmp1-2.0*tmp3); } } } //-------------------------------------------------------------------------- // Periodic z boundary conditions. //-------------------------------------------------------------------------- }else { for(i=0; i<numrequests; i++) requests[i]=MPI_REQUEST_NULL; MPI_Isend(&feq[1][1][1][0],1,passxf,comm->procneigh[0][0],15,world,&requests[0]); MPI_Irecv(&feq[0][1][1][0],1,passxf,comm->procneigh[0][0],25,world,&requests[1]); MPI_Isend(&feq[subNbx-2][1][1][0],1,passxf,comm->procneigh[0][1],25,world,&requests[2]); MPI_Irecv(&feq[subNbx-1][1][1][0],1,passxf,comm->procneigh[0][1],15,world,&requests[3]); if(typeLB == 2){ MPI_Isend(&feqn[1][1][1][0],1,passxf,comm->procneigh[0][0],10,world,&requests[4]); MPI_Irecv(&feqn[0][1][1][0],1,passxf,comm->procneigh[0][0],20,world,&requests[5]); MPI_Isend(&feqn[subNbx-2][1][1][0],1,passxf,comm->procneigh[0][1],20,world,&requests[6]); MPI_Irecv(&feqn[subNbx-1][1][1][0],1,passxf,comm->procneigh[0][1],10,world,&requests[7]); } update_periodic(2,subNbx-2,2,subNby-2,2,subNbz-2); MPI_Waitall(numrequests,requests,statuses); for(i=0; i<numrequests; i++) requests[i]=MPI_REQUEST_NULL; MPI_Isend(&feq[0][1][1][0],1,passyf,comm->procneigh[1][0],15,world,&requests[0]); MPI_Irecv(&feq[0][0][1][0],1,passyf,comm->procneigh[1][0],25,world,&requests[1]); MPI_Isend(&feq[0][subNby-2][1][0],1,passyf,comm->procneigh[1][1],25,world,&requests[2]); MPI_Irecv(&feq[0][subNby-1][1][0],1,passyf,comm->procneigh[1][1],15,world,&requests[3]); if(typeLB == 2){ MPI_Isend(&feqn[0][1][1][0],1,passyf,comm->procneigh[1][0],10,world,&requests[4]); MPI_Irecv(&feqn[0][0][1][0],1,passyf,comm->procneigh[1][0],20,world,&requests[5]); MPI_Isend(&feqn[0][subNby-2][1][0],1,passyf,comm->procneigh[1][1],20,world,&requests[6]); MPI_Irecv(&feqn[0][subNby-1][1][0],1,passyf,comm->procneigh[1][1],10,world,&requests[7]); } update_periodic(1,2,2,subNby-2,2,subNbz-2); update_periodic(subNbx-2,subNbx-1,2,subNby-2,2,subNbz-2); MPI_Waitall(numrequests,requests,statuses); for(i=0; i<numrequests; i++) requests[i]=MPI_REQUEST_NULL; MPI_Isend(&feq[0][0][1][0],1,passzf,comm->procneigh[2][0],15,world,&requests[0]); MPI_Irecv(&feq[0][0][0][0],1,passzf,comm->procneigh[2][0],25,world,&requests[1]); MPI_Isend(&feq[0][0][subNbz-2][0],1,passzf,comm->procneigh[2][1],25,world,&requests[2]); MPI_Irecv(&feq[0][0][subNbz-1][0],1,passzf,comm->procneigh[2][1],15,world,&requests[3]); if(typeLB == 2){ MPI_Isend(&feqn[0][0][1][0],1,passzf,comm->procneigh[2][0],10,world,&requests[4]); MPI_Irecv(&feqn[0][0][0][0],1,passzf,comm->procneigh[2][0],20,world,&requests[5]); MPI_Isend(&feqn[0][0][subNbz-2][0],1,passzf,comm->procneigh[2][1],20,world,&requests[6]); MPI_Irecv(&feqn[0][0][subNbz-1][0],1,passzf,comm->procneigh[2][1],10,world,&requests[7]); } update_periodic(1,subNbx-1,1,2,2,subNbz-2); update_periodic(1,subNbx-1,subNby-2,subNby-1,2,subNbz-2); MPI_Waitall(numrequests,requests,statuses); update_periodic(1,subNbx-1,1,subNby-1,1,2); update_periodic(1,subNbx-1,1,subNby-1,subNbz-2,subNbz-1); } } diff --git a/src/USER-MISC/pair_cdeam.cpp b/src/USER-MISC/pair_cdeam.cpp index cbb8e23f7..dbfbac823 100644 --- a/src/USER-MISC/pair_cdeam.cpp +++ b/src/USER-MISC/pair_cdeam.cpp @@ -1,644 +1,643 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing author: Alexander Stukowski Technical University of Darmstadt, Germany Department of Materials Science ------------------------------------------------------------------------- */ #include "math.h" #include "stdio.h" #include "stdlib.h" #include "string.h" #include "pair_cdeam.h" #include "atom.h" #include "force.h" #include "comm.h" #include "neighbor.h" #include "neigh_list.h" #include "memory.h" #include "error.h" using namespace LAMMPS_NS; // This is for debugging purposes. The ASSERT() macro is used in the code to check // if everything runs as expected. Change this to #if 0 if you don't need the checking. #if 0 #define ASSERT(cond) ((!(cond)) ? my_failure(error,__FILE__,__LINE__) : my_noop()) inline void my_noop() {} inline void my_failure(Error* error, const char* file, int line) { char str[1024]; sprintf(str,"Assertion failure: File %s, line %i", file, line); error->one(FLERR,str); } #else #define ASSERT(cond) #endif #define MAXLINE 1024 // This sets the maximum line length in EAM input files. PairCDEAM::PairCDEAM(LAMMPS *lmp, int _cdeamVersion) : PairEAM(lmp), PairEAMAlloy(lmp), cdeamVersion(_cdeamVersion) { single_enable = 0; restartinfo = 0; rhoB = NULL; D_values = NULL; hcoeff = NULL; // Set communication buffer sizes needed by this pair style. if(cdeamVersion == 1) { comm_forward = 4; comm_reverse = 3; } else if(cdeamVersion == 2) { comm_forward = 3; comm_reverse = 2; } else { error->all(FLERR,"Invalid CD-EAM potential version."); } } PairCDEAM::~PairCDEAM() { memory->destroy(rhoB); memory->destroy(D_values); if(hcoeff) delete[] hcoeff; } void PairCDEAM::compute(int eflag, int vflag) { int i,j,ii,jj,inum,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; double rsq,rhoip,rhojp,recip,phi; int *ilist,*jlist,*numneigh,**firstneigh; evdwl = 0.0; if (eflag || vflag) ev_setup(eflag,vflag); else evflag = vflag_fdotr = eflag_global = eflag_atom = 0; // Grow per-atom arrays if necessary if(atom->nmax > nmax) { memory->destroy(rho); memory->destroy(fp); memory->destroy(rhoB); memory->destroy(D_values); nmax = atom->nmax; memory->create(rho,nmax,"pair:rho"); memory->create(rhoB,nmax,"pair:rhoB"); memory->create(fp,nmax,"pair:fp"); memory->create(D_values,nmax,"pair:D_values"); } double **x = atom->x; double **f = atom->f; int *type = atom->type; int nlocal = atom->nlocal; int newton_pair = force->newton_pair; inum = list->inum; ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; // Zero out per-atom arrays. int m = nlocal + atom->nghost; for(i = 0; i < m; i++) { rho[i] = 0.0; rhoB[i] = 0.0; D_values[i] = 0.0; } // Stage I // Compute rho and rhoB at each local atom site. // Additionally calculate the D_i values here if we are using the one-site formulation. // For the two-site formulation we have to calculate the D values in an extra loop (Stage II). for(ii = 0; ii < inum; ii++) { i = ilist[ii]; xtmp = x[i][0]; ytmp = x[i][1]; ztmp = x[i][2]; itype = type[i]; jlist = firstneigh[i]; jnum = numneigh[i]; for(jj = 0; jj < jnum; jj++) { j = jlist[jj]; j &= NEIGHMASK; delx = xtmp - x[j][0]; dely = ytmp - x[j][1]; delz = ztmp - x[j][2]; rsq = delx*delx + dely*dely + delz*delz; if(rsq < cutforcesq) { jtype = type[j]; double r = sqrt(rsq); const EAMTableIndex index = radiusToTableIndex(r); double localrho = RhoOfR(index, jtype, itype); rho[i] += localrho; if(jtype == speciesB) rhoB[i] += localrho; if(newton_pair || j < nlocal) { localrho = RhoOfR(index, itype, jtype); rho[j] += localrho; if(itype == speciesB) rhoB[j] += localrho; } if(cdeamVersion == 1 && itype != jtype) { // Note: if the i-j interaction is not concentration dependent (because either // i or j are not species A or B) then its contribution to D_i and D_j should // be ignored. // This if-clause is only required for a ternary. if((itype == speciesA && jtype == speciesB) || (jtype == speciesA && itype == speciesB)) { double Phi_AB = PhiOfR(index, itype, jtype, 1.0 / r); D_values[i] += Phi_AB; if(newton_pair || j < nlocal) D_values[j] += Phi_AB; } } } } } // Communicate and sum densities. if(newton_pair) { communicationStage = 1; comm->reverse_comm_pair(this); } // fp = derivative of embedding energy at each atom // phi = embedding energy at each atom for(ii = 0; ii < inum; ii++) { i = ilist[ii]; EAMTableIndex index = rhoToTableIndex(rho[i]); fp[i] = FPrimeOfRho(index, type[i]); if(eflag) { phi = FofRho(index, type[i]); if (eflag_global) eng_vdwl += phi; if (eflag_atom) eatom[i] += phi; } } // Communicate derivative of embedding function and densities // and D_values (this for one-site formulation only). communicationStage = 2; comm->forward_comm_pair(this); // The electron densities may not drop to zero because then the concentration would no longer be defined. // But the concentration is not needed anyway if there is no interaction with another atom, which is the case // if the electron density is exactly zero. That's why the following lines have been commented out. // //for(i = 0; i < nlocal + atom->nghost; i++) { // if(rho[i] == 0 && (type[i] == speciesA || type[i] == speciesB)) // error->one(FLERR,"CD-EAM potential routine: Detected atom with zero electron density."); //} // Stage II // This is only required for the original two-site formulation of the CD-EAM potential. if(cdeamVersion == 2) { // Compute intermediate value D_i for each atom. for(ii = 0; ii < inum; ii++) { i = ilist[ii]; xtmp = x[i][0]; ytmp = x[i][1]; ztmp = x[i][2]; itype = type[i]; jlist = firstneigh[i]; jnum = numneigh[i]; // This code line is required for ternary alloys. if(itype != speciesA && itype != speciesB) continue; double x_i = rhoB[i] / rho[i]; // Concentration at atom i. for(jj = 0; jj < jnum; jj++) { j = jlist[jj]; j &= NEIGHMASK; jtype = type[j]; if(itype == jtype) continue; // This code line is required for ternary alloys. if(jtype != speciesA && jtype != speciesB) continue; delx = xtmp - x[j][0]; dely = ytmp - x[j][1]; delz = ztmp - x[j][2]; rsq = delx*delx + dely*dely + delz*delz; if(rsq < cutforcesq) { double r = sqrt(rsq); const EAMTableIndex index = radiusToTableIndex(r); // The concentration independent part of the cross pair potential. double Phi_AB = PhiOfR(index, itype, jtype, 1.0 / r); // Average concentration of two sites double x_ij = 0.5 * (x_i + rhoB[j]/rho[j]); // Calculate derivative of h(x_ij) polynomial function. double h_prime = evalHprime(x_ij); D_values[i] += h_prime * Phi_AB / (2.0 * rho[i] * rho[i]); if(newton_pair || j < nlocal) D_values[j] += h_prime * Phi_AB / (2.0 * rho[j] * rho[j]); } } } // Communicate and sum D values. if(newton_pair) { communicationStage = 3; comm->reverse_comm_pair(this); } communicationStage = 4; comm->forward_comm_pair(this); } // Stage III // Compute force acting on each atom. for(ii = 0; ii < inum; ii++) { i = ilist[ii]; xtmp = x[i][0]; ytmp = x[i][1]; ztmp = x[i][2]; itype = type[i]; jlist = firstneigh[i]; jnum = numneigh[i]; // Concentration at site i double x_i = -1.0; // The value -1 indicates: no concentration dependence for all interactions of atom i. // It will be replaced by the concentration at site i if atom i is either A or B. double D_i, h_prime_i; // This if-clause is only required for ternary alloys. if((itype == speciesA || itype == speciesB) && rho[i] != 0.0) { // Compute local concentration at site i. x_i = rhoB[i]/rho[i]; ASSERT(x_i >= 0 && x_i<=1.0); if(cdeamVersion == 1) { // Calculate derivative of h(x_i) polynomial function. h_prime_i = evalHprime(x_i); D_i = D_values[i] * h_prime_i / (2.0 * rho[i] * rho[i]); - } - else if(cdeamVersion == 2) { + } else if(cdeamVersion == 2) { D_i = D_values[i]; + } else { + ASSERT(false); } - else ASSERT(false); } for(jj = 0; jj < jnum; jj++) { j = jlist[jj]; j &= NEIGHMASK; delx = xtmp - x[j][0]; dely = ytmp - x[j][1]; delz = ztmp - x[j][2]; rsq = delx*delx + dely*dely + delz*delz; if(rsq < cutforcesq) { jtype = type[j]; double r = sqrt(rsq); const EAMTableIndex index = radiusToTableIndex(r); // rhoip = derivative of (density at atom j due to atom i) // rhojp = derivative of (density at atom i due to atom j) // psip needs both fp[i] and fp[j] terms since r_ij appears in two // terms of embed eng: Fi(sum rho_ij) and Fj(sum rho_ji) // hence embed' = Fi(sum rho_ij) rhojp + Fj(sum rho_ji) rhoip rhoip = RhoPrimeOfR(index, itype, jtype); rhojp = RhoPrimeOfR(index, jtype, itype); fpair = fp[i]*rhojp + fp[j]*rhoip; recip = 1.0/r; double x_j = -1; // The value -1 indicates: no concentration dependence for this i-j pair // because atom j is not of species A nor B. // This code line is required for ternary alloy. if(jtype == speciesA || jtype == speciesB) { ASSERT(rho[i] != 0.0); ASSERT(rho[j] != 0.0); // Compute local concentration at site j. x_j = rhoB[j]/rho[j]; ASSERT(x_j >= 0 && x_j<=1.0); double D_j=0.0; if(cdeamVersion == 1) { // Calculate derivative of h(x_j) polynomial function. double h_prime_j = evalHprime(x_j); D_j = D_values[j] * h_prime_j / (2.0 * rho[j] * rho[j]); - } - else if(cdeamVersion == 2) { + } else if(cdeamVersion == 2) { D_j = D_values[j]; + } else { + ASSERT(false); } - else ASSERT(false); - double t2 = -rhoB[j]; if(itype == speciesB) t2 += rho[j]; fpair += D_j * rhoip * t2; } // This if-clause is only required for a ternary alloy. // Actually we don't need it at all because D_i should be zero anyway if // atom i has no concentration dependent interactions (because it is not species A or B). if(x_i != -1.0) { double t1 = -rhoB[i]; if(jtype == speciesB) t1 += rho[i]; fpair += D_i * rhojp * t1; } double phip; double phi = PhiOfR(index, itype, jtype, recip, phip); if(itype == jtype || x_i == -1.0 || x_j == -1.0) { // Case of no concentration dependence. fpair += phip; - } - else { + } else { // We have a concentration dependence for the i-j interaction. double h=0.0; if(cdeamVersion == 1) { // Calculate h(x_i) polynomial function. double h_i = evalH(x_i); // Calculate h(x_j) polynomial function. double h_j = evalH(x_j); h = 0.5 * (h_i + h_j); - } - else if(cdeamVersion == 2) { + } else if(cdeamVersion == 2) { // Average concentration. double x_ij = 0.5 * (x_i + x_j); // Calculate h(x_ij) polynomial function. h = evalH(x_ij); + } else { + ASSERT(false); } - else ASSERT(false); fpair += h * phip; phi *= h; } // Divide by r_ij and negate to get forces from gradient. fpair /= -r; f[i][0] += delx*fpair; f[i][1] += dely*fpair; f[i][2] += delz*fpair; if(newton_pair || j < nlocal) { f[j][0] -= delx*fpair; f[j][1] -= dely*fpair; f[j][2] -= delz*fpair; } if(eflag) evdwl = phi; if(evflag) ev_tally(i,j,nlocal,newton_pair,evdwl,0.0,fpair,delx,dely,delz); } } } if(vflag_fdotr) virial_fdotr_compute(); } /* ---------------------------------------------------------------------- */ void PairCDEAM::coeff(int narg, char **arg) { PairEAMAlloy::coeff(narg, arg); // Make sure the EAM file is a CD-EAM binary alloy. if(setfl->nelements < 2) error->all(FLERR,"The EAM file must contain at least 2 elements to be used with the eam/cd pair style."); // Read in the coefficients of the h polynomial from the end of the EAM file. read_h_coeff(arg[2]); // Determine which atom type is the A species and which is the B species in the alloy. // By default take the first element (index 0) in the EAM file as the A species // and the second element (index 1) in the EAM file as the B species. speciesA = -1; speciesB = -1; for(int i = 1; i <= atom->ntypes; i++) { if(map[i] == 0) { if(speciesA >= 0) error->all(FLERR,"The first element from the EAM file may only be mapped to a single atom type."); speciesA = i; } if(map[i] == 1) { if(speciesB >= 0) error->all(FLERR,"The second element from the EAM file may only be mapped to a single atom type."); speciesB = i; } } if(speciesA < 0) error->all(FLERR,"The first element from the EAM file must be mapped to exactly one atom type."); if(speciesB < 0) error->all(FLERR,"The second element from the EAM file must be mapped to exactly one atom type."); } /* ---------------------------------------------------------------------- Reads in the h(x) polynomial coefficients ------------------------------------------------------------------------- */ void PairCDEAM::read_h_coeff(char *filename) { if(comm->me == 0) { // Open potential file FILE *fp; char line[MAXLINE]; char nextline[MAXLINE]; fp = force->open_potential(filename); if (fp == NULL) { char str[128]; sprintf(str,"Cannot open EAM potential file %s", filename); error->one(FLERR,str); } // h coefficients are stored at the end of the file. // Skip to last line of file. while(fgets(nextline, MAXLINE, fp) != NULL) { strcpy(line, nextline); } char* ptr = strtok(line, " \t\n\r\f"); int degree = atoi(ptr); nhcoeff = degree+1; hcoeff = new double[nhcoeff]; int i = 0; while((ptr = strtok(NULL," \t\n\r\f")) != NULL && i < nhcoeff) { hcoeff[i++] = atof(ptr); } if(i != nhcoeff || nhcoeff < 1) error->one(FLERR,"Failed to read h(x) function coefficients from EAM file."); // Close the potential file. fclose(fp); } MPI_Bcast(&nhcoeff, 1, MPI_INT, 0, world); if(comm->me != 0) hcoeff = new double[nhcoeff]; MPI_Bcast(hcoeff, nhcoeff, MPI_DOUBLE, 0, world); } /* ---------------------------------------------------------------------- */ int PairCDEAM::pack_comm(int n, int *list, double *buf, int pbc_flag, int *pbc) { int i,j,m; m = 0; if(communicationStage == 2) { if(cdeamVersion == 1) { for (i = 0; i < n; i++) { j = list[i]; buf[m++] = fp[j]; buf[m++] = rho[j]; buf[m++] = rhoB[j]; buf[m++] = D_values[j]; } return 4; } else if(cdeamVersion == 2) { for (i = 0; i < n; i++) { j = list[i]; buf[m++] = fp[j]; buf[m++] = rho[j]; buf[m++] = rhoB[j]; } return 3; } else { ASSERT(false); return 0; } } else if(communicationStage == 4) { for (i = 0; i < n; i++) { j = list[i]; buf[m++] = D_values[j]; } return 1; } else return 0; } /* ---------------------------------------------------------------------- */ void PairCDEAM::unpack_comm(int n, int first, double *buf) { int i,m,last; m = 0; last = first + n; if(communicationStage == 2) { if(cdeamVersion == 1) { for(i = first; i < last; i++) { fp[i] = buf[m++]; rho[i] = buf[m++]; rhoB[i] = buf[m++]; D_values[i] = buf[m++]; } } else if(cdeamVersion == 2) { for(i = first; i < last; i++) { fp[i] = buf[m++]; rho[i] = buf[m++]; rhoB[i] = buf[m++]; } + } else { + ASSERT(false); } - else ASSERT(false); } else if(communicationStage == 4) { for(i = first; i < last; i++) { D_values[i] = buf[m++]; } } } /* ---------------------------------------------------------------------- */ int PairCDEAM::pack_reverse_comm(int n, int first, double *buf) { int i,m,last; m = 0; last = first + n; if(communicationStage == 1) { if(cdeamVersion == 1) { for(i = first; i < last; i++) { buf[m++] = rho[i]; buf[m++] = rhoB[i]; buf[m++] = D_values[i]; } return 3; } else if(cdeamVersion == 2) { for(i = first; i < last; i++) { buf[m++] = rho[i]; buf[m++] = rhoB[i]; } return 2; } else { ASSERT(false); return 0; } } else if(communicationStage == 3) { for(i = first; i < last; i++) { buf[m++] = D_values[i]; } return 1; } else return 0; } /* ---------------------------------------------------------------------- */ void PairCDEAM::unpack_reverse_comm(int n, int *list, double *buf) { int i,j,m; m = 0; if(communicationStage == 1) { if(cdeamVersion == 1) { for(i = 0; i < n; i++) { j = list[i]; rho[j] += buf[m++]; rhoB[j] += buf[m++]; D_values[j] += buf[m++]; } - } - else if(cdeamVersion == 2) { + } else if(cdeamVersion == 2) { for(i = 0; i < n; i++) { j = list[i]; rho[j] += buf[m++]; rhoB[j] += buf[m++]; } + } else { + ASSERT(false); } - else ASSERT(false); } else if(communicationStage == 3) { for(i = 0; i < n; i++) { j = list[i]; D_values[j] += buf[m++]; } } } /* ---------------------------------------------------------------------- memory usage of local atom-based arrays ------------------------------------------------------------------------- */ double PairCDEAM::memory_usage() { double bytes = 2 * nmax * sizeof(double); return PairEAMAlloy::memory_usage() + bytes; } diff --git a/src/USER-MISC/pair_edip.cpp b/src/USER-MISC/pair_edip.cpp index bb4d67015..e9a83eded 100644 --- a/src/USER-MISC/pair_edip.cpp +++ b/src/USER-MISC/pair_edip.cpp @@ -1,1062 +1,1056 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing author: Luca Ferraro (CASPUR) email: luca.ferraro@caspur.it Environment Dependent Interatomic Potential References: 1) J. F. Justo, M. Z. Bazant, E. Kaxiras, V. V. Bulatov, S. Yip Phys. Rev. B 58, 2539 (1998) ------------------------------------------------------------------------- */ #include "math.h" #include "float.h" #include "stdio.h" #include "stdlib.h" #include "string.h" #include "pair_edip.h" #include "atom.h" #include "neighbor.h" #include "neigh_list.h" #include "neigh_request.h" #include "force.h" #include "comm.h" #include "memory.h" #include "error.h" using namespace LAMMPS_NS; #define MAXLINE 1024 #define DELTA 4 #define GRIDDENSITY 8000 #define GRIDSTART 0.1 // max number of interaction per atom for f(Z) environment potential #define leadDimInteractionList 64 /* ---------------------------------------------------------------------- */ PairEDIP::PairEDIP(LAMMPS *lmp) : Pair(lmp) { single_enable = 0; restartinfo = 0; one_coeff = 1; manybody_flag = 1; nelements = 0; elements = NULL; nparams = maxparam = 0; params = NULL; elem2param = NULL; } /* ---------------------------------------------------------------------- check if allocated, since class can be destructed when incomplete ------------------------------------------------------------------------- */ PairEDIP::~PairEDIP() { if (elements) for (int i = 0; i < nelements; i++) delete [] elements[i]; delete [] elements; memory->destroy(params); memory->destroy(elem2param); if (allocated) { memory->destroy(setflag); memory->destroy(cutsq); delete [] map; deallocateGrids(); deallocatePreLoops(); } } /* ---------------------------------------------------------------------- */ void PairEDIP::compute(int eflag, int vflag) { int i,j,k,ii,inum,jnum; - int itype,jtype,ktype,ijparam,ikparam,ijkparam; + int itype,jtype,ktype,ijparam,ikparam; double xtmp,ytmp,ztmp,evdwl; int *ilist,*jlist,*numneigh,**firstneigh; register int preForceCoord_counter; double invR_ij; double invR_ik; double directorCos_ij_x; double directorCos_ij_y; double directorCos_ij_z; double directorCos_ik_x; double directorCos_ik_y; double directorCos_ik_z; double cosTeta; int interpolIDX; double interpolTMP; double interpolDeltaX; double interpolY1; double interpolY2; double invRMinusCutoffA; double sigmaInvRMinusCutoffA; double gammInvRMinusCutoffA; double cosTetaDiff; double cosTetaDiffCosTetaDiff; double cutoffFunction_ij; double exp2B_ij; double exp2BDerived_ij; double pow2B_ij; double pow2BDerived_ij; double exp3B_ij; double exp3BDerived_ij; double exp3B_ik; double exp3BDerived_ik; double qFunction; - double qFunctionDerived; double tauFunction; double tauFunctionDerived; double expMinusBetaZeta_iZeta_i; double qFunctionCosTetaDiffCosTetaDiff; double expMinusQFunctionCosTetaDiffCosTetaDiff; double zeta_i; double zeta_iDerived; double zeta_iDerivedInvR_ij; double forceModCoord_factor; double forceModCoord; double forceModCoord_ij; double forceMod2B; double forceMod3B_factor1_ij; double forceMod3B_factor2_ij; double forceMod3B_factor2; double forceMod3B_factor1_ik; double forceMod3B_factor2_ik; double potentia3B_factor; double potential2B_factor; evdwl = 0.0; if (eflag || vflag) ev_setup(eflag,vflag); else evflag = vflag_fdotr = 0; double **x = atom->x; double **f = atom->f; int *type = atom->type; int nlocal = atom->nlocal; int newton_pair = force->newton_pair; inum = list->inum; ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; // loop over full neighbor list of my atoms for (ii = 0; ii < inum; ii++) { zeta_i = 0.0; int numForceCoordPairs = 0; i = ilist[ii]; itype = map[type[i]]; xtmp = x[i][0]; ytmp = x[i][1]; ztmp = x[i][2]; jlist = firstneigh[i]; jnum = numneigh[i]; // pre-loop to compute environment coordination f(Z) for (int neighbor_j = 0; neighbor_j < jnum; neighbor_j++) { j = jlist[neighbor_j]; j &= NEIGHMASK; double dr_ij[3], r_ij; dr_ij[0] = xtmp - x[j][0]; dr_ij[1] = ytmp - x[j][1]; dr_ij[2] = ztmp - x[j][2]; r_ij = dr_ij[0]*dr_ij[0] + dr_ij[1]*dr_ij[1] + dr_ij[2]*dr_ij[2]; jtype = map[type[j]]; ijparam = elem2param[itype][jtype][jtype]; if (r_ij > params[ijparam].cutsq) continue; r_ij = sqrt(r_ij); invR_ij = 1.0 / r_ij; preInvR_ij[neighbor_j] = invR_ij; invRMinusCutoffA = 1.0 / (r_ij - cutoffA); sigmaInvRMinusCutoffA = sigma * invRMinusCutoffA; gammInvRMinusCutoffA = gamm * invRMinusCutoffA; interpolDeltaX = r_ij - GRIDSTART; interpolTMP = (interpolDeltaX * GRIDDENSITY); interpolIDX = (int) interpolTMP; interpolY1 = exp3B[interpolIDX]; interpolY2 = exp3B[interpolIDX+1]; exp3B_ij = interpolY1 + (interpolY2 - interpolY1) * (interpolTMP-interpolIDX); exp3BDerived_ij = - exp3B_ij * gammInvRMinusCutoffA * invRMinusCutoffA; preExp3B_ij[neighbor_j] = exp3B_ij; preExp3BDerived_ij[neighbor_j] = exp3BDerived_ij; interpolY1 = exp2B[interpolIDX]; interpolY2 = exp2B[interpolIDX+1]; exp2B_ij = interpolY1 + (interpolY2 - interpolY1) * (interpolTMP-interpolIDX); exp2BDerived_ij = - exp2B_ij * sigmaInvRMinusCutoffA * invRMinusCutoffA; preExp2B_ij[neighbor_j] = exp2B_ij; preExp2BDerived_ij[neighbor_j] = exp2BDerived_ij; interpolY1 = pow2B[interpolIDX]; interpolY2 = pow2B[interpolIDX+1]; pow2B_ij = interpolY1 + (interpolY2 - interpolY1) * (interpolTMP-interpolIDX); prePow2B_ij[neighbor_j] = pow2B_ij; // zeta and its derivative if (r_ij < cutoffC) zeta_i += 1.0; else { interpolY1 = cutoffFunction[interpolIDX]; interpolY2 = cutoffFunction[interpolIDX+1]; cutoffFunction_ij = interpolY1 + (interpolY2 - interpolY1) * (interpolTMP-interpolIDX); zeta_i += cutoffFunction_ij; interpolY1 = cutoffFunctionDerived[interpolIDX]; interpolY2 = cutoffFunctionDerived[interpolIDX+1]; zeta_iDerived = interpolY1 + (interpolY2 - interpolY1) * (interpolTMP-interpolIDX); zeta_iDerivedInvR_ij = zeta_iDerived * invR_ij; preForceCoord_counter=numForceCoordPairs*5; preForceCoord[preForceCoord_counter+0]=zeta_iDerivedInvR_ij; preForceCoord[preForceCoord_counter+1]=dr_ij[0]; preForceCoord[preForceCoord_counter+2]=dr_ij[1]; preForceCoord[preForceCoord_counter+3]=dr_ij[2]; preForceCoord[preForceCoord_counter+4]=j; numForceCoordPairs++; } } // quantities depending on zeta_i interpolDeltaX = zeta_i; interpolTMP = (interpolDeltaX * GRIDDENSITY); interpolIDX = (int) interpolTMP; interpolY1 = expMinusBetaZeta_iZeta_iGrid[interpolIDX]; interpolY2 = expMinusBetaZeta_iZeta_iGrid[interpolIDX+1]; expMinusBetaZeta_iZeta_i = interpolY1 + (interpolY2 - interpolY1) * (interpolTMP-interpolIDX); interpolY1 = qFunctionGrid[interpolIDX]; interpolY2 = qFunctionGrid[interpolIDX+1]; qFunction = interpolY1 + (interpolY2 - interpolY1) * (interpolTMP-interpolIDX); interpolY1 = tauFunctionGrid[interpolIDX]; interpolY2 = tauFunctionGrid[interpolIDX+1]; tauFunction = interpolY1 + (interpolY2 - interpolY1) * (interpolTMP-interpolIDX); interpolY1 = tauFunctionDerivedGrid[interpolIDX]; interpolY2 = tauFunctionDerivedGrid[interpolIDX+1]; tauFunctionDerived = interpolY1 + (interpolY2 - interpolY1) * (interpolTMP-interpolIDX); - qFunctionDerived = -mu * qFunction; - forceModCoord_factor = 2.0 * beta * zeta_i * expMinusBetaZeta_iZeta_i; forceModCoord = 0.0; // two-body interactions, skip half of them for (int neighbor_j = 0; neighbor_j < jnum; neighbor_j++) { double dr_ij[3], r_ij, f_ij[3]; j = jlist[neighbor_j]; j &= NEIGHMASK; dr_ij[0] = x[j][0] - xtmp; dr_ij[1] = x[j][1] - ytmp; dr_ij[2] = x[j][2] - ztmp; r_ij = dr_ij[0]*dr_ij[0] + dr_ij[1]*dr_ij[1] + dr_ij[2]*dr_ij[2]; jtype = map[type[j]]; ijparam = elem2param[itype][jtype][jtype]; if (r_ij > params[ijparam].cutsq) continue; r_ij = sqrt(r_ij); invR_ij = preInvR_ij[neighbor_j]; pow2B_ij = prePow2B_ij[neighbor_j]; potential2B_factor = pow2B_ij - expMinusBetaZeta_iZeta_i; exp2B_ij = preExp2B_ij[neighbor_j]; pow2BDerived_ij = - rho * invR_ij * pow2B_ij; forceModCoord += (forceModCoord_factor*exp2B_ij); exp2BDerived_ij = preExp2BDerived_ij[neighbor_j]; forceMod2B = exp2BDerived_ij * potential2B_factor + exp2B_ij * pow2BDerived_ij; directorCos_ij_x = invR_ij * dr_ij[0]; directorCos_ij_y = invR_ij * dr_ij[1]; directorCos_ij_z = invR_ij * dr_ij[2]; exp3B_ij = preExp3B_ij[neighbor_j]; exp3BDerived_ij = preExp3BDerived_ij[neighbor_j]; f_ij[0] = forceMod2B * directorCos_ij_x; f_ij[1] = forceMod2B * directorCos_ij_y; f_ij[2] = forceMod2B * directorCos_ij_z; f[i][0] += f_ij[0]; f[i][1] += f_ij[1]; f[i][2] += f_ij[2]; f[j][0] -= f_ij[0]; f[j][1] -= f_ij[1]; f[j][2] -= f_ij[2]; // potential energy evdwl = (exp2B_ij * potential2B_factor); if (evflag) ev_tally(i, j, nlocal, newton_pair, evdwl, 0.0, -forceMod2B*invR_ij, dr_ij[0], dr_ij[1], dr_ij[2]); // three-body Forces for (int neighbor_k = neighbor_j + 1; neighbor_k < jnum; neighbor_k++) { double dr_ik[3], r_ik, f_ik[3]; k = jlist[neighbor_k]; k &= NEIGHMASK; ktype = map[type[k]]; ikparam = elem2param[itype][ktype][ktype]; - ijkparam = elem2param[itype][jtype][ktype]; dr_ik[0] = x[k][0] - xtmp; dr_ik[1] = x[k][1] - ytmp; dr_ik[2] = x[k][2] - ztmp; r_ik = dr_ik[0]*dr_ik[0] + dr_ik[1]*dr_ik[1] + dr_ik[2]*dr_ik[2]; if (r_ik > params[ikparam].cutsq) continue; r_ik = sqrt(r_ik); invR_ik = preInvR_ij[neighbor_k]; directorCos_ik_x = invR_ik * dr_ik[0]; directorCos_ik_y = invR_ik * dr_ik[1]; directorCos_ik_z = invR_ik * dr_ik[2]; cosTeta = directorCos_ij_x * directorCos_ik_x + directorCos_ij_y * directorCos_ik_y + directorCos_ij_z * directorCos_ik_z; cosTetaDiff = cosTeta + tauFunction; cosTetaDiffCosTetaDiff = cosTetaDiff * cosTetaDiff; qFunctionCosTetaDiffCosTetaDiff = cosTetaDiffCosTetaDiff * qFunction; expMinusQFunctionCosTetaDiffCosTetaDiff = exp(-qFunctionCosTetaDiffCosTetaDiff); potentia3B_factor = lambda * ((1.0 - expMinusQFunctionCosTetaDiffCosTetaDiff) + eta * qFunctionCosTetaDiffCosTetaDiff); exp3B_ik = preExp3B_ij[neighbor_k]; exp3BDerived_ik = preExp3BDerived_ij[neighbor_k]; forceMod3B_factor1_ij = - exp3BDerived_ij * exp3B_ik * potentia3B_factor; forceMod3B_factor2 = 2.0 * lambda * exp3B_ij * exp3B_ik * qFunction * cosTetaDiff * (eta + expMinusQFunctionCosTetaDiffCosTetaDiff); forceMod3B_factor2_ij = forceMod3B_factor2 * invR_ij; f_ij[0] = forceMod3B_factor1_ij * directorCos_ij_x + forceMod3B_factor2_ij * (cosTeta * directorCos_ij_x - directorCos_ik_x); f_ij[1] = forceMod3B_factor1_ij * directorCos_ij_y + forceMod3B_factor2_ij * (cosTeta * directorCos_ij_y - directorCos_ik_y); f_ij[2] = forceMod3B_factor1_ij * directorCos_ij_z + forceMod3B_factor2_ij * (cosTeta * directorCos_ij_z - directorCos_ik_z); forceMod3B_factor1_ik = - exp3BDerived_ik * exp3B_ij * potentia3B_factor; forceMod3B_factor2_ik = forceMod3B_factor2 * invR_ik; f_ik[0] = forceMod3B_factor1_ik * directorCos_ik_x + forceMod3B_factor2_ik * (cosTeta * directorCos_ik_x - directorCos_ij_x); f_ik[1] = forceMod3B_factor1_ik * directorCos_ik_y + forceMod3B_factor2_ik * (cosTeta * directorCos_ik_y - directorCos_ij_y); f_ik[2] = forceMod3B_factor1_ik * directorCos_ik_z + forceMod3B_factor2_ik * (cosTeta * directorCos_ik_z - directorCos_ij_z); forceModCoord += (forceMod3B_factor2 * (tauFunctionDerived - 0.5 * mu * cosTetaDiff)); f[j][0] += f_ij[0]; f[j][1] += f_ij[1]; f[j][2] += f_ij[2]; f[k][0] += f_ik[0]; f[k][1] += f_ik[1]; f[k][2] += f_ik[2]; f[i][0] -= f_ij[0] + f_ik[0]; f[i][1] -= f_ij[1] + f_ik[1]; f[i][2] -= f_ij[2] + f_ik[2]; // potential energy evdwl = (exp3B_ij * exp3B_ik * potentia3B_factor); if (evflag) ev_tally3(i,j,k,evdwl,0.0,f_ij,f_ik,dr_ij,dr_ik); } } // forces due to environment coordination f(Z) for (int idx = 0; idx < numForceCoordPairs; idx++) { double dr_ij[3],f_ij[3]; preForceCoord_counter = idx * 5; zeta_iDerivedInvR_ij=preForceCoord[preForceCoord_counter+0]; dr_ij[0]=preForceCoord[preForceCoord_counter+1]; dr_ij[1]=preForceCoord[preForceCoord_counter+2]; dr_ij[2]=preForceCoord[preForceCoord_counter+3]; j = static_cast<int> (preForceCoord[preForceCoord_counter+4]); forceModCoord_ij = forceModCoord * zeta_iDerivedInvR_ij; f_ij[0] = forceModCoord_ij * dr_ij[0]; f_ij[1] = forceModCoord_ij * dr_ij[1]; f_ij[2] = forceModCoord_ij * dr_ij[2]; f[i][0] -= f_ij[0]; f[i][1] -= f_ij[1]; f[i][2] -= f_ij[2]; f[j][0] += f_ij[0]; f[j][1] += f_ij[1]; f[j][2] += f_ij[2]; // potential energy evdwl = 0.0; if (evflag) ev_tally(i, j, nlocal, newton_pair, evdwl, 0.0, -forceModCoord_ij, dr_ij[0], dr_ij[1], dr_ij[2]); } } if (vflag_fdotr) virial_fdotr_compute(); } /* ---------------------------------------------------------------------- */ void PairEDIP::allocateGrids(void) { int numGridPointsOneCutoffFunction; int numGridPointsNotOneCutoffFunction; int numGridPointsCutoffFunction; int numGridPointsR; int numGridPointsRTotal; int numGridPointsQFunctionGrid; int numGridPointsExpMinusBetaZeta_iZeta_i; int numGridPointsTauFunctionGrid; double maxArgumentTauFunctionGrid; double maxArgumentQFunctionGrid; double maxArgumentExpMinusBetaZeta_iZeta_i; double const leftLimitToZero = -DBL_MIN * 1000.0; // tauFunctionGrid maxArgumentTauFunctionGrid = leadDimInteractionList; numGridPointsTauFunctionGrid = (int) ((maxArgumentTauFunctionGrid) * GRIDDENSITY) + 2; memory->create(tauFunctionGrid,numGridPointsTauFunctionGrid, "edip:tauFunctionGrid"); memory->create(tauFunctionDerivedGrid,numGridPointsTauFunctionGrid, "edip:tauFunctionDerivedGrid"); // expMinusBetaZeta_iZeta_iGrid maxArgumentExpMinusBetaZeta_iZeta_i = leadDimInteractionList; numGridPointsExpMinusBetaZeta_iZeta_i = (int) ((maxArgumentExpMinusBetaZeta_iZeta_i) * GRIDDENSITY) + 2; memory->create(expMinusBetaZeta_iZeta_iGrid, numGridPointsExpMinusBetaZeta_iZeta_i, "edip:expMinusBetaZeta_iZeta_iGrid"); // qFunctionGrid maxArgumentQFunctionGrid = leadDimInteractionList; numGridPointsQFunctionGrid = (int) ((maxArgumentQFunctionGrid) * GRIDDENSITY) + 2; memory->create(qFunctionGrid,numGridPointsQFunctionGrid,"edip:qFunctionGrid"); // cutoffFunction numGridPointsOneCutoffFunction = (int) ((cutoffC - GRIDSTART) * GRIDDENSITY); numGridPointsNotOneCutoffFunction = (int) ((cutoffA-cutoffC) * GRIDDENSITY); numGridPointsCutoffFunction = numGridPointsOneCutoffFunction + numGridPointsNotOneCutoffFunction+2; memory->create(cutoffFunction,numGridPointsCutoffFunction, "edip:cutoffFunction"); memory->create(cutoffFunctionDerived,numGridPointsCutoffFunction, "edip:cutoffFunctionDerived"); // pow2B numGridPointsR = (int) ((cutoffA + leftLimitToZero - GRIDSTART) * GRIDDENSITY); numGridPointsRTotal = numGridPointsR + 2; memory->create(pow2B,numGridPointsRTotal,"edip:pow2B"); memory->create(exp2B,numGridPointsRTotal,"edip:exp2B"); memory->create(exp3B,numGridPointsRTotal,"edip:exp3B"); } /* ---------------------------------------------------------------------- pre-calculated structures ------------------------------------------------------------------------- */ void PairEDIP::allocatePreLoops(void) { int nthreads = comm->nthreads; memory->create(preInvR_ij,nthreads*leadDimInteractionList,"edip:preInvR_ij"); memory->create(preExp3B_ij,nthreads*leadDimInteractionList,"edip:preExp3B_ij"); memory->create(preExp3BDerived_ij,nthreads*leadDimInteractionList, "edip:preExp3BDerived_ij"); memory->create(preExp2B_ij,nthreads*leadDimInteractionList,"edip:preExp2B_ij"); memory->create(preExp2BDerived_ij,nthreads*leadDimInteractionList, "edip:preExp2BDerived_ij"); memory->create(prePow2B_ij,nthreads*leadDimInteractionList,"edip:prePow2B_ij"); memory->create(preForceCoord,5*nthreads*leadDimInteractionList,"edip:preForceCoord"); } /* ---------------------------------------------------------------------- deallocate grids ------------------------------------------------------------------------- */ void PairEDIP::deallocateGrids(void) { memory->destroy(cutoffFunction); memory->destroy(cutoffFunctionDerived); memory->destroy(pow2B); memory->destroy(exp2B); memory->destroy(exp3B); memory->destroy(qFunctionGrid); memory->destroy(expMinusBetaZeta_iZeta_iGrid); memory->destroy(tauFunctionGrid); memory->destroy(tauFunctionDerivedGrid); } /* ---------------------------------------------------------------------- deallocate preLoops ------------------------------------------------------------------------- */ void PairEDIP::deallocatePreLoops(void) { memory->destroy(preInvR_ij); memory->destroy(preExp3B_ij); memory->destroy(preExp3BDerived_ij); memory->destroy(preExp2B_ij); memory->destroy(preExp2BDerived_ij); memory->destroy(prePow2B_ij); memory->destroy(preForceCoord); } /* ---------------------------------------------------------------------- */ void PairEDIP::allocate() { allocated = 1; int n = atom->ntypes; memory->create(setflag,n+1,n+1,"pair:setflag"); memory->create(cutsq,n+1,n+1,"pair:cutsq"); map = new int[n+1]; } /* ---------------------------------------------------------------------- global settings ------------------------------------------------------------------------- */ void PairEDIP::settings(int narg, char **arg) { if (narg != 0) error->all(FLERR,"Illegal pair_style command"); } /* ---------------------------------------------------------------------- */ void PairEDIP::initGrids(void) { int l; int numGridPointsOneCutoffFunction; int numGridPointsNotOneCutoffFunction; int numGridPointsCutoffFunction; int numGridPointsR; - int numGridPointsRTotal; int numGridPointsQFunctionGrid; int numGridPointsExpMinusBetaZeta_iZeta_i; int numGridPointsTauFunctionGrid; double maxArgumentTauFunctionGrid; double maxArgumentQFunctionGrid; double maxArgumentExpMinusBetaZeta_iZeta_i; double r; double temp; double temp3; double temp4; double deltaArgumentR; double deltaArgumentCutoffFunction; double deltaArgumentQFunctionGrid; double deltaArgumentTauFunctionGrid; double deltaArgumentExpMinusBetaZeta_iZeta_i; double const leftLimitToZero = -DBL_MIN * 1000.0; // tauFunctionGrid maxArgumentTauFunctionGrid = leadDimInteractionList; numGridPointsTauFunctionGrid = (int) ((maxArgumentTauFunctionGrid) * GRIDDENSITY) + 2; r = 0.0; deltaArgumentTauFunctionGrid = 1.0 / GRIDDENSITY; for (l = 0; l < numGridPointsTauFunctionGrid; l++) { tauFunctionGrid[l] = u1 + u2 * u3 * exp(-u4 * r) - u2 * exp(-2.0 * u4 * r); tauFunctionDerivedGrid[l] = - u2 * u3 * u4 * exp(-u4 * r) + 2.0 * u2 * u4 * exp(-2.0 * u4 * r); r += deltaArgumentTauFunctionGrid; } // expMinusBetaZeta_iZeta_iGrid maxArgumentExpMinusBetaZeta_iZeta_i = leadDimInteractionList; numGridPointsExpMinusBetaZeta_iZeta_i = (int) ((maxArgumentExpMinusBetaZeta_iZeta_i) * GRIDDENSITY) + 2; r = 0.0; deltaArgumentExpMinusBetaZeta_iZeta_i = 1.0 / GRIDDENSITY; for (l = 0; l < numGridPointsExpMinusBetaZeta_iZeta_i; l++) { expMinusBetaZeta_iZeta_iGrid[l] = exp(-beta * r * r); r += deltaArgumentExpMinusBetaZeta_iZeta_i; } // qFunctionGrid maxArgumentQFunctionGrid = leadDimInteractionList; numGridPointsQFunctionGrid = (int) ((maxArgumentQFunctionGrid) * GRIDDENSITY) + 2; r = 0.0; deltaArgumentQFunctionGrid = 1.0 / GRIDDENSITY; for (l = 0; l < numGridPointsQFunctionGrid; l++) { qFunctionGrid[l] = Q0 * exp(-mu * r); r += deltaArgumentQFunctionGrid; } // cutoffFunction numGridPointsOneCutoffFunction = (int) ((cutoffC - GRIDSTART) * GRIDDENSITY); numGridPointsNotOneCutoffFunction = (int) ((cutoffA-cutoffC) * GRIDDENSITY); numGridPointsCutoffFunction = numGridPointsOneCutoffFunction+numGridPointsNotOneCutoffFunction+2; r = GRIDSTART; deltaArgumentCutoffFunction = 1.0 / GRIDDENSITY; for (l = 0; l < numGridPointsOneCutoffFunction; l++) { cutoffFunction[l] = 1.0; cutoffFunctionDerived[l] = 0.0; r += deltaArgumentCutoffFunction; } for (l = numGridPointsOneCutoffFunction; l < numGridPointsCutoffFunction; l++) { temp = (cutoffA - cutoffC)/(r - cutoffC); temp3 = temp * temp * temp; temp4 = temp3 * temp; cutoffFunction[l] = exp(alpha/(1.0-temp3)); cutoffFunctionDerived[l] = (-3*alpha/(cutoffA-cutoffC)) * (temp4/((1-temp3)*(1-temp3)))*exp(alpha/(1.0-temp3)); r += deltaArgumentCutoffFunction; } // pow2B numGridPointsR = (int) ((cutoffA + leftLimitToZero - GRIDSTART) * GRIDDENSITY); - numGridPointsRTotal = numGridPointsR + 2; r = GRIDSTART; deltaArgumentR = 1.0 / GRIDDENSITY; for (l = 0; l < numGridPointsR; l++) { pow2B[l] = pow((B/r),rho); exp2B[l] = A * exp(sigma/(r-cutoffA)); exp3B[l] = exp(gamm/(r-cutoffA)); r += deltaArgumentR; } pow2B[numGridPointsR] = pow((B/r),rho); exp2B[numGridPointsR]=0; exp3B[numGridPointsR]=0; r += deltaArgumentR; pow2B[numGridPointsR+1] = pow((B/r),rho); exp2B[numGridPointsR+1]=0; exp3B[numGridPointsR+1]=0; } /* ---------------------------------------------------------------------- set coeffs for one or more type pairs ------------------------------------------------------------------------- */ void PairEDIP::coeff(int narg, char **arg) { int i,j,n; if (!allocated) allocate(); if (narg != 3 + atom->ntypes) error->all(FLERR,"Incorrect args for pair coefficients"); // insure I,J args are * * if (strcmp(arg[0],"*") != 0 || strcmp(arg[1],"*") != 0) error->all(FLERR,"Incorrect args for pair coefficients"); // read args that map atom types to elements in potential file // map[i] = which element the Ith atom type is, -1 if NULL // nelements = # of unique elements // elements = list of element names if (elements) { for (i = 0; i < nelements; i++) delete [] elements[i]; delete [] elements; } elements = new char*[atom->ntypes]; for (i = 0; i < atom->ntypes; i++) elements[i] = NULL; nelements = 0; for (i = 3; i < narg; i++) { if (strcmp(arg[i],"NULL") == 0) { map[i-2] = -1; continue; } for (j = 0; j < nelements; j++) if (strcmp(arg[i],elements[j]) == 0) break; map[i-2] = j; if (j == nelements) { n = strlen(arg[i]) + 1; elements[j] = new char[n]; strcpy(elements[j],arg[i]); nelements++; } } // read potential file and initialize potential parameters read_file(arg[2]); setup(); // clear setflag since coeff() called once with I,J = * * n = atom->ntypes; for (int i = 1; i <= n; i++) for (int j = i; j <= n; j++) setflag[i][j] = 0; // set setflag i,j for type pairs where both are mapped to elements int count = 0; for (int i = 1; i <= n; i++) for (int j = i; j <= n; j++) if (map[i] >= 0 && map[j] >= 0) { setflag[i][j] = 1; count++; } if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients"); // allocate tables and internal structures allocatePreLoops(); allocateGrids(); initGrids(); } /* ---------------------------------------------------------------------- init specific to this pair style ------------------------------------------------------------------------- */ void PairEDIP::init_style() { if (force->newton_pair == 0) error->all(FLERR,"Pair style EDIP requires newton pair on"); // need a full neighbor list int irequest = neighbor->request(this); neighbor->requests[irequest]->half = 0; neighbor->requests[irequest]->full = 1; } /* ---------------------------------------------------------------------- init for one type pair i,j and corresponding j,i ------------------------------------------------------------------------- */ double PairEDIP::init_one(int i, int j) { if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set"); return cutmax; } /* ---------------------------------------------------------------------- */ void PairEDIP::read_file(char *file) { int params_per_line = 20; char **words = new char*[params_per_line+1]; memory->sfree(params); params = NULL; nparams = maxparam = 0; // open file on proc 0 FILE *fp; if (comm->me == 0) { fp = force->open_potential(file); if (fp == NULL) { char str[128]; sprintf(str,"Cannot open EDIP potential file %s",file); error->one(FLERR,str); } } // read each set of params from potential file // one set of params can span multiple lines // store params if all 3 element tags are in element list int n,nwords,ielement,jelement,kelement; char line[MAXLINE],*ptr; int eof = 0; while (1) { if (comm->me == 0) { ptr = fgets(line,MAXLINE,fp); if (ptr == NULL) { eof = 1; fclose(fp); } else n = strlen(line) + 1; } MPI_Bcast(&eof,1,MPI_INT,0,world); if (eof) break; MPI_Bcast(&n,1,MPI_INT,0,world); MPI_Bcast(line,n,MPI_CHAR,0,world); // strip comment, skip line if blank if ((ptr = strchr(line,'#'))) *ptr = '\0'; nwords = atom->count_words(line); if (nwords == 0) continue; // concatenate additional lines until have params_per_line words while (nwords < params_per_line) { n = strlen(line); if (comm->me == 0) { ptr = fgets(&line[n],MAXLINE-n,fp); if (ptr == NULL) { eof = 1; fclose(fp); } else n = strlen(line) + 1; } MPI_Bcast(&eof,1,MPI_INT,0,world); if (eof) break; MPI_Bcast(&n,1,MPI_INT,0,world); MPI_Bcast(line,n,MPI_CHAR,0,world); if ((ptr = strchr(line,'#'))) *ptr = '\0'; nwords = atom->count_words(line); } if (nwords != params_per_line) error->all(FLERR,"Incorrect format in EDIP potential file"); // words = ptrs to all words in line nwords = 0; words[nwords++] = strtok(line," \t\n\r\f"); while ((words[nwords++] = strtok(NULL," \t\n\r\f"))) continue; // ielement,jelement,kelement = 1st args // if all 3 args are in element list, then parse this line // else skip to next entry in file for (ielement = 0; ielement < nelements; ielement++) if (strcmp(words[0],elements[ielement]) == 0) break; if (ielement == nelements) continue; for (jelement = 0; jelement < nelements; jelement++) if (strcmp(words[1],elements[jelement]) == 0) break; if (jelement == nelements) continue; for (kelement = 0; kelement < nelements; kelement++) if (strcmp(words[2],elements[kelement]) == 0) break; if (kelement == nelements) continue; // load up parameter settings and error check their values if (nparams == maxparam) { maxparam += DELTA; params = (Param *) memory->srealloc(params,maxparam*sizeof(Param), "pair:params"); } params[nparams].ielement = ielement; params[nparams].jelement = jelement; params[nparams].kelement = kelement; params[nparams].A = atof(words[3]); params[nparams].B = atof(words[4]); params[nparams].cutoffA = atof(words[5]); params[nparams].cutoffC = atof(words[6]); params[nparams].alpha = atof(words[7]); params[nparams].beta = atof(words[8]); params[nparams].eta = atof(words[9]); params[nparams].gamm = atof(words[10]); params[nparams].lambda = atof(words[11]); params[nparams].mu = atof(words[12]); params[nparams].rho = atof(words[13]); params[nparams].sigma = atof(words[14]); params[nparams].Q0 = atof(words[15]); params[nparams].u1 = atof(words[16]); params[nparams].u2 = atof(words[17]); params[nparams].u3 = atof(words[18]); params[nparams].u4 = atof(words[19]); if (params[nparams].A < 0.0 || params[nparams].B < 0.0 || params[nparams].cutoffA < 0.0 || params[nparams].cutoffC < 0.0 || params[nparams].alpha < 0.0 || params[nparams].beta < 0.0 || params[nparams].eta < 0.0 || params[nparams].gamm < 0.0 || params[nparams].lambda < 0.0 || params[nparams].mu < 0.0 || params[nparams].rho < 0.0 || params[nparams].sigma < 0.0) error->all(FLERR,"Illegal EDIP parameter"); nparams++; } delete [] words; } /* ---------------------------------------------------------------------- */ void PairEDIP::setup() { int i,j,k,m,n; double rtmp; // set elem2param for all triplet combinations // must be a single exact match to lines read from file // do not allow for ACB in place of ABC memory->destroy(elem2param); memory->create(elem2param,nelements,nelements,nelements,"pair:elem2param"); for (i = 0; i < nelements; i++) for (j = 0; j < nelements; j++) for (k = 0; k < nelements; k++) { n = -1; for (m = 0; m < nparams; m++) { if (i == params[m].ielement && j == params[m].jelement && k == params[m].kelement) { if (n >= 0) error->all(FLERR,"Potential file has duplicate entry"); n = m; } } if (n < 0) error->all(FLERR,"Potential file is missing an entry"); elem2param[i][j][k] = n; } // set cutoff square for (m = 0; m < nparams; m++) { params[m].cutsq = params[m].cutoffA*params[m].cutoffA; } // set cutmax to max of all params cutmax = 0.0; for (m = 0; m < nparams; m++) { rtmp = sqrt(params[m].cutsq); if (rtmp > cutmax) cutmax = rtmp; } // this should be removed for multi species parametrizations A = params[0].A; B = params[0].B; rho = params[0].rho; cutoffA = params[0].cutoffA; cutoffC = params[0].cutoffC; sigma = params[0].sigma; lambda = params[0].lambda; gamm = params[0].gamm; eta = params[0].eta; Q0 = params[0].Q0; mu = params[0].mu; beta = params[0].beta; alpha = params[0].alpha; u1 = params[0].u1; u2 = params[0].u2; u3 = params[0].u3; u4 = params[0].u4; } diff --git a/src/USER-OMP/pair_cdeam_omp.cpp b/src/USER-OMP/pair_cdeam_omp.cpp index 56c9b9d33..8f0cd8072 100644 --- a/src/USER-OMP/pair_cdeam_omp.cpp +++ b/src/USER-OMP/pair_cdeam_omp.cpp @@ -1,537 +1,541 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ #include "math.h" #include "string.h" #include "pair_cdeam_omp.h" #include "atom.h" #include "comm.h" #include "error.h" #include "force.h" #include "memory.h" #include "neighbor.h" #include "neigh_list.h" #include "suffix.h" using namespace LAMMPS_NS; // This is for debugging purposes. The ASSERT() macro is used in the code to check // if everything runs as expected. Change this to #if 0 if you don't need the checking. #if 0 #define ASSERT(cond) ((!(cond)) ? my_failure(error,__FILE__,__LINE__) : my_noop()) inline void my_noop() {} inline void my_failure(Error* error, const char* file, int line) { char str[1024]; sprintf(str,"Assertion failure: File %s, line %i", file, line); error->one(FLERR,str); } #else #define ASSERT(cond) #endif /* ---------------------------------------------------------------------- */ PairCDEAMOMP::PairCDEAMOMP(LAMMPS *lmp, int _cdeamVersion) : PairEAM(lmp), PairCDEAM(lmp,_cdeamVersion), ThrOMP(lmp, THR_PAIR) { suffix_flag |= Suffix::OMP; respa_enable = 0; } /* ---------------------------------------------------------------------- */ void PairCDEAMOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); } else evflag = vflag_fdotr = eflag_global = eflag_atom = 0; const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; const int inum = list->inum; // grow energy and fp arrays if necessary // need to be atom->nmax in length if (atom->nmax > nmax) { memory->destroy(rho); memory->destroy(rhoB); memory->destroy(D_values); memory->destroy(fp); nmax = atom->nmax; memory->create(rho,nthreads*nmax,"pair:rho"); memory->create(rhoB,nthreads*nmax,"pair:mu"); memory->create(D_values,nthreads*nmax,"pair:D_values"); memory->create(fp,nmax,"pair:fp"); } #if defined(_OPENMP) #pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (force->newton_pair) thr->init_cdeam(nall, rho, rhoB, D_values); else thr->init_cdeam(atom->nlocal, rho, rhoB, D_values); switch (cdeamVersion) { case 1: if (evflag) { if (eflag) { if (force->newton_pair) eval<1,1,1,1>(ifrom, ito, thr); else eval<1,1,0,1>(ifrom, ito, thr); } else { if (force->newton_pair) eval<1,0,1,1>(ifrom, ito, thr); else eval<1,0,0,1>(ifrom, ito, thr); } } else { if (force->newton_pair) eval<0,0,1,1>(ifrom, ito, thr); else eval<0,0,0,1>(ifrom, ito, thr); } break; case 2: if (evflag) { if (eflag) { if (force->newton_pair) eval<1,1,1,2>(ifrom, ito, thr); else eval<1,1,0,2>(ifrom, ito, thr); } else { if (force->newton_pair) eval<1,0,1,2>(ifrom, ito, thr); else eval<1,0,0,2>(ifrom, ito, thr); } } else { if (force->newton_pair) eval<0,0,1,2>(ifrom, ito, thr); else eval<0,0,0,2>(ifrom, ito, thr); } break; default: #if defined(_OPENMP) #pragma omp master #endif error->all(FLERR,"unsupported eam/cd pair style variant"); } thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } template <int EVFLAG, int EFLAG, int NEWTON_PAIR, int CDEAMVERSION> void PairCDEAMOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; double rsq,rhoip,rhojp,recip,phi; int *ilist,*jlist,*numneigh,**firstneigh; evdwl = 0.0; const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0]; dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0]; double * const rho_t = thr->get_rho(); double * const rhoB_t = thr->get_rhoB(); double * const D_values_t = thr->get_D_values(); const int tid = thr->get_tid(); const int nthreads = comm->nthreads; const int * _noalias const type = atom->type; const int nlocal = atom->nlocal; const int nall = nlocal + atom->nghost; double fxtmp,fytmp,fztmp; ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; // Stage I // Compute rho and rhoB at each local atom site. // Additionally calculate the D_i values here if we are using the one-site formulation. // For the two-site formulation we have to calculate the D values in an extra loop (Stage II). for (ii = iifrom; ii < iito; ii++) { i = ilist[ii]; xtmp = x[i].x; ytmp = x[i].y; ztmp = x[i].z; itype = type[i]; jlist = firstneigh[i]; jnum = numneigh[i]; for (jj = 0; jj < jnum; jj++) { j = jlist[jj]; j &= NEIGHMASK; delx = xtmp - x[j].x; dely = ytmp - x[j].y; delz = ztmp - x[j].z; rsq = delx*delx + dely*dely + delz*delz; if(rsq < cutforcesq) { jtype = type[j]; double r = sqrt(rsq); const EAMTableIndex index = radiusToTableIndex(r); double localrho = RhoOfR(index, jtype, itype); rho_t[i] += localrho; if(jtype == speciesB) rhoB_t[i] += localrho; if(NEWTON_PAIR || j < nlocal) { localrho = RhoOfR(index, itype, jtype); rho_t[j] += localrho; if(itype == speciesB) rhoB_t[j] += localrho; } if(CDEAMVERSION == 1 && itype != jtype) { // Note: if the i-j interaction is not concentration dependent (because either // i or j are not species A or B) then its contribution to D_i and D_j should // be ignored. // This if-clause is only required for a ternary. if((itype == speciesA && jtype == speciesB) || (jtype == speciesA && itype == speciesB)) { double Phi_AB = PhiOfR(index, itype, jtype, 1.0 / r); D_values_t[i] += Phi_AB; if(NEWTON_PAIR || j < nlocal) D_values_t[j] += Phi_AB; } } } } } // wait until all threads are done with computation sync_threads(); // communicate and sum densities if (NEWTON_PAIR) { // reduce per thread density thr->timer(Timer::PAIR); data_reduce_thr(rho, nall, nthreads, 1, tid); data_reduce_thr(rhoB, nall, nthreads, 1, tid); if (CDEAMVERSION==1) data_reduce_thr(D_values, nall, nthreads, 1, tid); // wait until reduction is complete sync_threads(); #if defined(_OPENMP) #pragma omp master #endif { communicationStage = 1; comm->reverse_comm_pair(this); } // wait until master thread is done with communication sync_threads(); } else { // reduce per thread density thr->timer(Timer::PAIR); data_reduce_thr(rho, nlocal, nthreads, 1, tid); data_reduce_thr(rhoB, nlocal, nthreads, 1, tid); if (CDEAMVERSION==1) data_reduce_thr(D_values, nlocal, nthreads, 1, tid); // wait until reduction is complete sync_threads(); } // fp = derivative of embedding energy at each atom // phi = embedding energy at each atom for (ii = iifrom; ii < iito; ii++) { i = ilist[ii]; EAMTableIndex index = rhoToTableIndex(rho[i]); fp[i] = FPrimeOfRho(index, type[i]); if(EFLAG) { phi = FofRho(index, type[i]); e_tally_thr(this, i, i, nlocal, NEWTON_PAIR, phi, 0.0, thr); } } // wait until all theads are done with computation sync_threads(); // Communicate derivative of embedding function and densities // and D_values (this for one-site formulation only). #if defined(_OPENMP) #pragma omp master #endif { communicationStage = 2; comm->forward_comm_pair(this); } // wait until master thread is done with communication sync_threads(); // The electron densities may not drop to zero because then the concentration would no longer be defined. // But the concentration is not needed anyway if there is no interaction with another atom, which is the case // if the electron density is exactly zero. That's why the following lines have been commented out. // //for(i = 0; i < nlocal + atom->nghost; i++) { // if(rho[i] == 0 && (type[i] == speciesA || type[i] == speciesB)) // error->one(FLERR,"CD-EAM potential routine: Detected atom with zero electron density."); //} // Stage II // This is only required for the original two-site formulation of the CD-EAM potential. if(CDEAMVERSION == 2) { // Compute intermediate value D_i for each atom. for (ii = iifrom; ii < iito; ii++) { i = ilist[ii]; xtmp = x[i].x; ytmp = x[i].y; ztmp = x[i].z; itype = type[i]; jlist = firstneigh[i]; jnum = numneigh[i]; // This code line is required for ternary alloys. if(itype != speciesA && itype != speciesB) continue; double x_i = rhoB[i] / rho[i]; // Concentration at atom i. for(jj = 0; jj < jnum; jj++) { j = jlist[jj]; j &= NEIGHMASK; jtype = type[j]; if(itype == jtype) continue; // This code line is required for ternary alloys. if(jtype != speciesA && jtype != speciesB) continue; delx = xtmp - x[j].x; dely = ytmp - x[j].y; delz = ztmp - x[j].z; rsq = delx*delx + dely*dely + delz*delz; if(rsq < cutforcesq) { double r = sqrt(rsq); const EAMTableIndex index = radiusToTableIndex(r); // The concentration independent part of the cross pair potential. double Phi_AB = PhiOfR(index, itype, jtype, 1.0 / r); // Average concentration of two sites double x_ij = 0.5 * (x_i + rhoB[j]/rho[j]); // Calculate derivative of h(x_ij) polynomial function. double h_prime = evalHprime(x_ij); D_values_t[i] += h_prime * Phi_AB / (2.0 * rho[i] * rho[i]); if(NEWTON_PAIR || j < nlocal) D_values_t[j] += h_prime * Phi_AB / (2.0 * rho[j] * rho[j]); } } } if (NEWTON_PAIR) { thr->timer(Timer::PAIR); data_reduce_thr(D_values, nall, nthreads, 1, tid); // wait until reduction is complete sync_threads(); #if defined(_OPENMP) #pragma omp master #endif { communicationStage = 3; comm->reverse_comm_pair(this); } // wait until master thread is done with communication sync_threads(); } else { thr->timer(Timer::PAIR); data_reduce_thr(D_values, nlocal, nthreads, 1, tid); // wait until reduction is complete sync_threads(); } #if defined(_OPENMP) #pragma omp master #endif { communicationStage = 4; comm->forward_comm_pair(this); } // wait until master thread is done with communication sync_threads(); } // Stage III // Compute force acting on each atom. for (ii = iifrom; ii < iito; ii++) { i = ilist[ii]; xtmp = x[i].x; ytmp = x[i].y; ztmp = x[i].z; itype = type[i]; fxtmp = fytmp = fztmp = 0.0; jlist = firstneigh[i]; jnum = numneigh[i]; // Concentration at site i double x_i = -1.0; // The value -1 indicates: no concentration dependence for all interactions of atom i. // It will be replaced by the concentration at site i if atom i is either A or B. double D_i, h_prime_i; // This if-clause is only required for ternary alloys. if((itype == speciesA || itype == speciesB) && rho[i] != 0.0) { // Compute local concentration at site i. x_i = rhoB[i]/rho[i]; ASSERT(x_i >= 0 && x_i<=1.0); if(CDEAMVERSION == 1) { // Calculate derivative of h(x_i) polynomial function. h_prime_i = evalHprime(x_i); D_i = D_values[i] * h_prime_i / (2.0 * rho[i] * rho[i]); } else if(CDEAMVERSION == 2) { D_i = D_values[i]; - } else ASSERT(false); + } else { + ASSERT(false); + } } for(jj = 0; jj < jnum; jj++) { j = jlist[jj]; j &= NEIGHMASK; delx = xtmp - x[j].x; dely = ytmp - x[j].y; delz = ztmp - x[j].z; rsq = delx*delx + dely*dely + delz*delz; if(rsq < cutforcesq) { jtype = type[j]; double r = sqrt(rsq); const EAMTableIndex index = radiusToTableIndex(r); // rhoip = derivative of (density at atom j due to atom i) // rhojp = derivative of (density at atom i due to atom j) // psip needs both fp[i] and fp[j] terms since r_ij appears in two // terms of embed eng: Fi(sum rho_ij) and Fj(sum rho_ji) // hence embed' = Fi(sum rho_ij) rhojp + Fj(sum rho_ji) rhoip rhoip = RhoPrimeOfR(index, itype, jtype); rhojp = RhoPrimeOfR(index, jtype, itype); fpair = fp[i]*rhojp + fp[j]*rhoip; recip = 1.0/r; double x_j = -1; // The value -1 indicates: no concentration dependence for this i-j pair // because atom j is not of species A nor B. // This code line is required for ternary alloy. if(jtype == speciesA || jtype == speciesB) { ASSERT(rho[i] != 0.0); ASSERT(rho[j] != 0.0); // Compute local concentration at site j. x_j = rhoB[j]/rho[j]; ASSERT(x_j >= 0 && x_j<=1.0); double D_j; if(CDEAMVERSION == 1) { // Calculate derivative of h(x_j) polynomial function. double h_prime_j = evalHprime(x_j); D_j = D_values[j] * h_prime_j / (2.0 * rho[j] * rho[j]); } else if(CDEAMVERSION == 2) { D_j = D_values[j]; - } else ASSERT(false); - + } else { + ASSERT(false); + } double t2 = -rhoB[j]; if(itype == speciesB) t2 += rho[j]; fpair += D_j * rhoip * t2; } // This if-clause is only required for a ternary alloy. // Actually we don't need it at all because D_i should be zero anyway if // atom i has no concentration dependent interactions (because it is not species A or B). if(x_i != -1.0) { double t1 = -rhoB[i]; if(jtype == speciesB) t1 += rho[i]; fpair += D_i * rhojp * t1; } double phip; double phi = PhiOfR(index, itype, jtype, recip, phip); if(itype == jtype || x_i == -1.0 || x_j == -1.0) { // Case of no concentration dependence. fpair += phip; } else { // We have a concentration dependence for the i-j interaction. double h; if(CDEAMVERSION == 1) { // Calculate h(x_i) polynomial function. double h_i = evalH(x_i); // Calculate h(x_j) polynomial function. double h_j = evalH(x_j); h = 0.5 * (h_i + h_j); } else if(CDEAMVERSION == 2) { // Average concentration. double x_ij = 0.5 * (x_i + x_j); // Calculate h(x_ij) polynomial function. h = evalH(x_ij); - } else ASSERT(false); - + } else { + ASSERT(false); + } fpair += h * phip; phi *= h; } // Divide by r_ij and negate to get forces from gradient. fpair /= -r; fxtmp += delx*fpair; fytmp += dely*fpair; fztmp += delz*fpair; if(NEWTON_PAIR || j < nlocal) { f[j].x -= delx*fpair; f[j].y -= dely*fpair; f[j].z -= delz*fpair; } if(EFLAG) evdwl = phi; if(EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,evdwl,0.0, fpair,delx,dely,delz,thr); } } f[i].x += fxtmp; f[i].y += fytmp; f[i].z += fztmp; } } /* ---------------------------------------------------------------------- */ double PairCDEAMOMP::memory_usage() { double bytes = memory_usage_thr(); bytes += PairCDEAM::memory_usage(); return bytes; } diff --git a/src/USER-OMP/pair_gran_hooke_omp.cpp b/src/USER-OMP/pair_gran_hooke_omp.cpp index d3988bc5b..c4e792708 100644 --- a/src/USER-OMP/pair_gran_hooke_omp.cpp +++ b/src/USER-OMP/pair_gran_hooke_omp.cpp @@ -1,270 +1,270 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ #include "math.h" #include "pair_gran_hooke_omp.h" #include "atom.h" #include "comm.h" #include "fix.h" #include "force.h" #include "memory.h" #include "neighbor.h" #include "neigh_list.h" #include "suffix.h" using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairGranHookeOMP::PairGranHookeOMP(LAMMPS *lmp) : PairGranHooke(lmp), ThrOMP(lmp, THR_PAIR) { suffix_flag |= Suffix::OMP; respa_enable = 0; } /* ---------------------------------------------------------------------- */ void PairGranHookeOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); } else evflag = vflag_fdotr = 0; const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; const int inum = list->inum; // update rigid body info for owned & ghost atoms if using FixRigid masses // body[i] = which body atom I is in, -1 if none // mass_body = mass of each rigid body if (fix_rigid && neighbor->ago == 0) { int tmp; int *body = (int *) fix_rigid->extract("body",tmp); double *mass_body = (double *) fix_rigid->extract("masstotal",tmp); if (atom->nmax > nmax) { memory->destroy(mass_rigid); nmax = atom->nmax; memory->create(mass_rigid,nmax,"pair:mass_rigid"); } int nlocal = atom->nlocal; for (int i = 0; i < nlocal; i++) if (body[i] >= 0) mass_rigid[i] = mass_body[body[i]]; else mass_rigid[i] = 0.0; comm->forward_comm_pair(this); } #if defined(_OPENMP) #pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) if (force->newton_pair) eval<1,1>(ifrom, ito, thr); else eval<1,0>(ifrom, ito, thr); else if (force->newton_pair) eval<0,1>(ifrom, ito, thr); else eval<0,0>(ifrom, ito, thr); thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } template <int EVFLAG, int NEWTON_PAIR> void PairGranHookeOMP::eval(int iifrom, int iito, ThrData * const thr) { - int i,j,ii,jj,jnum,itype,jtype; + int i,j,ii,jj,jnum; double xtmp,ytmp,ztmp,delx,dely,delz,fx,fy,fz; double radi,radj,radsum,rsq,r,rinv,rsqinv; double vr1,vr2,vr3,vnnr,vn1,vn2,vn3,vt1,vt2,vt3; double wr1,wr2,wr3; double vtr1,vtr2,vtr3,vrel; double mi,mj,meff,damp,ccel,tor1,tor2,tor3; double fn,fs,ft,fs1,fs2,fs3; int *ilist,*jlist,*numneigh,**firstneigh; const double * const * const x = atom->x; const double * const * const v = atom->v; const double * const * const omega = atom->omega; const double * const radius = atom->radius; const double * const rmass = atom->rmass; const double * const mass = atom->mass; double * const * const f = thr->get_f(); double * const * const torque = thr->get_torque(); const int * const type = atom->type; const int * const mask = atom->mask; const int nlocal = atom->nlocal; double fxtmp,fytmp,fztmp; double t1tmp,t2tmp,t3tmp; ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; // loop over neighbors of my atoms for (ii = iifrom; ii < iito; ++ii) { i = ilist[ii]; xtmp = x[i][0]; ytmp = x[i][1]; ztmp = x[i][2]; radi = radius[i]; jlist = firstneigh[i]; jnum = numneigh[i]; fxtmp=fytmp=fztmp=t1tmp=t2tmp=t3tmp=0.0; for (jj = 0; jj < jnum; jj++) { j = jlist[jj]; j &= NEIGHMASK; delx = xtmp - x[j][0]; dely = ytmp - x[j][1]; delz = ztmp - x[j][2]; rsq = delx*delx + dely*dely + delz*delz; radj = radius[j]; radsum = radi + radj; if (rsq < radsum*radsum) { r = sqrt(rsq); rinv = 1.0/r; rsqinv = 1.0/rsq; // relative translational velocity vr1 = v[i][0] - v[j][0]; vr2 = v[i][1] - v[j][1]; vr3 = v[i][2] - v[j][2]; // normal component vnnr = vr1*delx + vr2*dely + vr3*delz; vn1 = delx*vnnr * rsqinv; vn2 = dely*vnnr * rsqinv; vn3 = delz*vnnr * rsqinv; // tangential component vt1 = vr1 - vn1; vt2 = vr2 - vn2; vt3 = vr3 - vn3; // relative rotational velocity wr1 = (radi*omega[i][0] + radj*omega[j][0]) * rinv; wr2 = (radi*omega[i][1] + radj*omega[j][1]) * rinv; wr3 = (radi*omega[i][2] + radj*omega[j][2]) * rinv; // meff = effective mass of pair of particles // if I or J part of rigid body, use body mass // if I or J is frozen, meff is other particle if (rmass) { mi = rmass[i]; mj = rmass[j]; } else { mi = mass[type[i]]; mj = mass[type[j]]; } if (fix_rigid) { if (mass_rigid[i] > 0.0) mi = mass_rigid[i]; if (mass_rigid[j] > 0.0) mj = mass_rigid[j]; } meff = mi*mj / (mi+mj); if (mask[i] & freeze_group_bit) meff = mj; if (mask[j] & freeze_group_bit) meff = mi; // normal forces = Hookian contact + normal velocity damping damp = meff*gamman*vnnr*rsqinv; ccel = kn*(radsum-r)*rinv - damp; // relative velocities vtr1 = vt1 - (delz*wr2-dely*wr3); vtr2 = vt2 - (delx*wr3-delz*wr1); vtr3 = vt3 - (dely*wr1-delx*wr2); vrel = vtr1*vtr1 + vtr2*vtr2 + vtr3*vtr3; vrel = sqrt(vrel); // force normalization fn = xmu * fabs(ccel*r); fs = meff*gammat*vrel; if (vrel != 0.0) ft = MIN(fn,fs) / vrel; else ft = 0.0; // tangential force due to tangential velocity damping fs1 = -ft*vtr1; fs2 = -ft*vtr2; fs3 = -ft*vtr3; // forces & torques fx = delx*ccel + fs1; fy = dely*ccel + fs2; fz = delz*ccel + fs3; fxtmp += fx; fytmp += fy; fztmp += fz; tor1 = rinv * (dely*fs3 - delz*fs2); tor2 = rinv * (delz*fs1 - delx*fs3); tor3 = rinv * (delx*fs2 - dely*fs1); t1tmp -= radi*tor1; t2tmp -= radi*tor2; t3tmp -= radi*tor3; if (NEWTON_PAIR || j < nlocal) { f[j][0] -= fx; f[j][1] -= fy; f[j][2] -= fz; torque[j][0] -= radj*tor1; torque[j][1] -= radj*tor2; torque[j][2] -= radj*tor3; } if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,NEWTON_PAIR, 0.0,0.0,fx,fy,fz,delx,dely,delz,thr); } } f[i][0] += fxtmp; f[i][1] += fytmp; f[i][2] += fztmp; torque[i][0] += t1tmp; torque[i][1] += t2tmp; torque[i][2] += t3tmp; } } /* ---------------------------------------------------------------------- */ double PairGranHookeOMP::memory_usage() { double bytes = memory_usage_thr(); bytes += PairGranHooke::memory_usage(); return bytes; } diff --git a/src/USER-OMP/pppm_tip4p_cg_omp.cpp b/src/USER-OMP/pppm_tip4p_cg_omp.cpp index 05229a380..4854aa84f 100644 --- a/src/USER-OMP/pppm_tip4p_cg_omp.cpp +++ b/src/USER-OMP/pppm_tip4p_cg_omp.cpp @@ -1,809 +1,808 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ #include "pppm_tip4p_cg_omp.h" #include "atom.h" #include "comm.h" #include "domain.h" #include "error.h" #include "fix_omp.h" #include "force.h" #include "memory.h" #include "math_const.h" #include "math_special.h" #include <string.h> #include <math.h> #include "suffix.h" using namespace LAMMPS_NS; using namespace MathConst; using namespace MathSpecial; #ifdef FFT_SINGLE #define ZEROF 0.0f #else #define ZEROF 0.0 #endif #define EPS_HOC 1.0e-7 #define OFFSET 16384 /* ---------------------------------------------------------------------- */ PPPMTIP4PCGOMP::PPPMTIP4PCGOMP(LAMMPS *lmp, int narg, char **arg) : PPPMTIP4PCG(lmp, narg, arg), ThrOMP(lmp, THR_KSPACE) { triclinic_support = 0; suffix_flag |= Suffix::OMP; } /* ---------------------------------------------------------------------- allocate memory that depends on # of K-vectors and order ------------------------------------------------------------------------- */ void PPPMTIP4PCGOMP::allocate() { PPPMTIP4PCG::allocate(); #if defined(_OPENMP) #pragma omp parallel default(none) #endif { #if defined(_OPENMP) const int tid = omp_get_thread_num(); #else const int tid = 0; #endif ThrData *thr = fix->get_thr(tid); thr->init_pppm(order,memory); } } /* ---------------------------------------------------------------------- free memory that depends on # of K-vectors and order ------------------------------------------------------------------------- */ void PPPMTIP4PCGOMP::deallocate() { PPPMTIP4PCG::deallocate(); #if defined(_OPENMP) #pragma omp parallel default(none) #endif { #if defined(_OPENMP) const int tid = omp_get_thread_num(); #else const int tid = 0; #endif ThrData *thr = fix->get_thr(tid); thr->init_pppm(-order,memory); } } /* ---------------------------------------------------------------------- pre-compute modified (Hockney-Eastwood) Coulomb Green's function ------------------------------------------------------------------------- */ void PPPMTIP4PCGOMP::compute_gf_ik() { const double * const prd = (triclinic==0) ? domain->prd : domain->prd_lamda; const double xprd = prd[0]; const double yprd = prd[1]; const double zprd = prd[2]; const double zprd_slab = zprd*slab_volfactor; const double unitkx = (MY_2PI/xprd); const double unitky = (MY_2PI/yprd); const double unitkz = (MY_2PI/zprd_slab); const int nbx = static_cast<int> ((g_ewald*xprd/(MY_PI*nx_pppm)) * pow(-log(EPS_HOC),0.25)); const int nby = static_cast<int> ((g_ewald*yprd/(MY_PI*ny_pppm)) * pow(-log(EPS_HOC),0.25)); const int nbz = static_cast<int> ((g_ewald*zprd_slab/(MY_PI*nz_pppm)) * pow(-log(EPS_HOC),0.25)); const int numk = nxhi_fft - nxlo_fft + 1; const int numl = nyhi_fft - nylo_fft + 1; const int twoorder = 2*order; #if defined(_OPENMP) #pragma omp parallel default(none) #endif { double snx,sny,snz; double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; double sum1,dot1,dot2; double numerator,denominator; double sqk; int k,l,m,nx,ny,nz,kper,lper,mper,n,nfrom,nto,tid; loop_setup_thr(nfrom, nto, tid, nfft, comm->nthreads); ThrData *thr = fix->get_thr(tid); thr->timer(Timer::START); for (n = nfrom; n < nto; ++n) { m = n / (numl*numk); l = (n - m*numl*numk) / numk; k = n - m*numl*numk - l*numk; m += nzlo_fft; l += nylo_fft; k += nxlo_fft; mper = m - nz_pppm*(2*m/nz_pppm); snz = square(sin(0.5*unitkz*mper*zprd_slab/nz_pppm)); lper = l - ny_pppm*(2*l/ny_pppm); sny = square(sin(0.5*unitky*lper*yprd/ny_pppm)); kper = k - nx_pppm*(2*k/nx_pppm); snx = square(sin(0.5*unitkx*kper*xprd/nx_pppm)); sqk = square(unitkx*kper) + square(unitky*lper) + square(unitkz*mper); if (sqk != 0.0) { numerator = 12.5663706/sqk; denominator = gf_denom(snx,sny,snz); sum1 = 0.0; for (nx = -nbx; nx <= nbx; nx++) { qx = unitkx*(kper+nx_pppm*nx); sx = exp(-0.25*square(qx/g_ewald)); argx = 0.5*qx*xprd/nx_pppm; wx = powsinxx(argx,twoorder); for (ny = -nby; ny <= nby; ny++) { qy = unitky*(lper+ny_pppm*ny); sy = exp(-0.25*square(qy/g_ewald)); argy = 0.5*qy*yprd/ny_pppm; wy = powsinxx(argy,twoorder); for (nz = -nbz; nz <= nbz; nz++) { qz = unitkz*(mper+nz_pppm*nz); sz = exp(-0.25*square(qz/g_ewald)); argz = 0.5*qz*zprd_slab/nz_pppm; wz = powsinxx(argz,twoorder); dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz; dot2 = qx*qx+qy*qy+qz*qz; sum1 += (dot1/dot2) * sx*sy*sz * wx*wy*wz; } } } greensfn[n] = numerator*sum1/denominator; } else greensfn[n] = 0.0; } thr->timer(Timer::KSPACE); } // end of parallel region } /* ---------------------------------------------------------------------- compute optimized Green's function for energy calculation ------------------------------------------------------------------------- */ void PPPMTIP4PCGOMP::compute_gf_ad() { const double * const prd = (triclinic==0) ? domain->prd : domain->prd_lamda; const double xprd = prd[0]; const double yprd = prd[1]; const double zprd = prd[2]; const double zprd_slab = zprd*slab_volfactor; const double unitkx = (MY_2PI/xprd); const double unitky = (MY_2PI/yprd); const double unitkz = (MY_2PI/zprd_slab); const int numk = nxhi_fft - nxlo_fft + 1; const int numl = nyhi_fft - nylo_fft + 1; const int twoorder = 2*order; double sf0=0.0,sf1=0.0,sf2=0.0,sf3=0.0,sf4=0.0,sf5=0.0; #if defined(_OPENMP) #pragma omp parallel default(none) reduction(+:sf0,sf1,sf2,sf3,sf4,sf5) #endif { double snx,sny,snz,sqk; double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; double numerator,denominator; int k,l,m,kper,lper,mper,n,nfrom,nto,tid; loop_setup_thr(nfrom, nto, tid, nfft, comm->nthreads); ThrData *thr = fix->get_thr(tid); thr->timer(Timer::START); for (n = nfrom; n < nto; ++n) { m = n / (numl*numk); l = (n - m*numl*numk) / numk; k = n - m*numl*numk - l*numk; m += nzlo_fft; l += nylo_fft; k += nxlo_fft; mper = m - nz_pppm*(2*m/nz_pppm); qz = unitkz*mper; snz = square(sin(0.5*qz*zprd_slab/nz_pppm)); sz = exp(-0.25*square(qz/g_ewald)); argz = 0.5*qz*zprd_slab/nz_pppm; wz = powsinxx(argz,twoorder); lper = l - ny_pppm*(2*l/ny_pppm); qy = unitky*lper; sny = square(sin(0.5*qy*yprd/ny_pppm)); sy = exp(-0.25*square(qy/g_ewald)); argy = 0.5*qy*yprd/ny_pppm; wy = powsinxx(argy,twoorder); kper = k - nx_pppm*(2*k/nx_pppm); qx = unitkx*kper; snx = square(sin(0.5*qx*xprd/nx_pppm)); sx = exp(-0.25*square(qx/g_ewald)); argx = 0.5*qx*xprd/nx_pppm; wx = powsinxx(argx,twoorder); sqk = qx*qx + qy*qy + qz*qz; if (sqk != 0.0) { numerator = MY_4PI/sqk; denominator = gf_denom(snx,sny,snz); greensfn[n] = numerator*sx*sy*sz*wx*wy*wz/denominator; sf0 += sf_precoeff1[n]*greensfn[n]; sf1 += sf_precoeff2[n]*greensfn[n]; sf2 += sf_precoeff3[n]*greensfn[n]; sf3 += sf_precoeff4[n]*greensfn[n]; sf4 += sf_precoeff5[n]*greensfn[n]; sf5 += sf_precoeff6[n]*greensfn[n]; } else { greensfn[n] = 0.0; sf0 += sf_precoeff1[n]*greensfn[n]; sf1 += sf_precoeff2[n]*greensfn[n]; sf2 += sf_precoeff3[n]*greensfn[n]; sf3 += sf_precoeff4[n]*greensfn[n]; sf4 += sf_precoeff5[n]*greensfn[n]; sf5 += sf_precoeff6[n]*greensfn[n]; } } thr->timer(Timer::KSPACE); } // end of paralle region // compute the coefficients for the self-force correction double prex, prey, prez, tmp[6]; prex = prey = prez = MY_PI/volume; prex *= nx_pppm/xprd; prey *= ny_pppm/yprd; prez *= nz_pppm/zprd_slab; tmp[0] = sf0 * prex; tmp[1] = sf1 * prex*2; tmp[2] = sf2 * prey; tmp[3] = sf3 * prey*2; tmp[4] = sf4 * prez; tmp[5] = sf5 * prez*2; // communicate values with other procs MPI_Allreduce(tmp,sf_coeff,6,MPI_DOUBLE,MPI_SUM,world); } /* ---------------------------------------------------------------------- run the regular toplevel compute method from plain PPPM which will have individual methods replaced by our threaded versions and then call the obligatory force reduction. ------------------------------------------------------------------------- */ void PPPMTIP4PCGOMP::compute(int eflag, int vflag) { PPPMTIP4PCG::compute(eflag,vflag); #if defined(_OPENMP) #pragma omp parallel default(none) shared(eflag,vflag) #endif { #if defined(_OPENMP) const int tid = omp_get_thread_num(); #else const int tid = 0; #endif ThrData *thr = fix->get_thr(tid); thr->timer(Timer::START); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } /* ---------------------------------------------------------------------- find center grid pt for each of my particles check that full stencil for the particle will fit in my 3d brick store central grid pt indices in part2grid array ------------------------------------------------------------------------- */ void PPPMTIP4PCGOMP::particle_map() { // no local atoms => nothing to do if (num_charged == 0) return; const int * _noalias const type = atom->type; const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0]; int3_t * _noalias const p2g = (int3_t *) part2grid[0]; const double boxlox = boxlo[0]; const double boxloy = boxlo[1]; const double boxloz = boxlo[2]; - const int nlocal = atom->nlocal; int j, flag = 0; #if defined(_OPENMP) #pragma omp parallel for private(j) default(none) reduction(+:flag) schedule(static) #endif for (int j = 0; j < num_charged; j++) { const int i = is_charged[j]; dbl3_t xM; int iH1,iH2; if (type[i] == typeO) { find_M_thr(i,iH1,iH2,xM); } else { xM = x[i]; } // (nx,ny,nz) = global coords of grid pt to "lower left" of charge // current particle coord can be outside global and local box // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 const int nx = static_cast<int> ((xM.x-boxlox)*delxinv+shift) - OFFSET; const int ny = static_cast<int> ((xM.y-boxloy)*delyinv+shift) - OFFSET; const int nz = static_cast<int> ((xM.z-boxloz)*delzinv+shift) - OFFSET; p2g[i].a = nx; p2g[i].b = ny; p2g[i].t = nz; // check that entire stencil around nx,ny,nz will fit in my 3d brick if (nx+nlower < nxlo_out || nx+nupper > nxhi_out || ny+nlower < nylo_out || ny+nupper > nyhi_out || nz+nlower < nzlo_out || nz+nupper > nzhi_out) flag++; } int flag_all; MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world); if (flag_all) error->all(FLERR,"Out of range atoms - cannot compute PPPM"); } /* ---------------------------------------------------------------------- create discretized "density" on section of global grid due to my particles density(x,y,z) = charge "density" at grid points of my 3d brick (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) in global grid ------------------------------------------------------------------------- */ void PPPMTIP4PCGOMP::make_rho() { // clear 3d density array FFT_SCALAR * _noalias const d = &(density_brick[nzlo_out][nylo_out][nxlo_out]); memset(d,0,ngrid*sizeof(FFT_SCALAR)); // no charged atoms => nothing else to do if (num_charged == 0) return; const int ix = nxhi_out - nxlo_out + 1; const int iy = nyhi_out - nylo_out + 1; #if defined(_OPENMP) #pragma omp parallel default(none) #endif { const double * _noalias const q = atom->q; const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0]; const int3_t * _noalias const p2g = (int3_t *) part2grid[0]; const int * _noalias const type = atom->type; dbl3_t xM; const double boxlox = boxlo[0]; const double boxloy = boxlo[1]; const double boxloz = boxlo[2]; // determine range of grid points handled by this thread int i,j,jfrom,jto,tid,iH1,iH2; loop_setup_thr(jfrom,jto,tid,ngrid,comm->nthreads); // get per thread data ThrData *thr = fix->get_thr(tid); thr->timer(Timer::START); FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d()); // loop over my charges, add their contribution to nearby grid points // (nx,ny,nz) = global coords of grid pt to "lower left" of charge // (dx,dy,dz) = distance to "lower left" grid pt // loop over all charged atoms for all threads for (j = 0; j < num_charged; j++) { i = is_charged[j]; const int nx = p2g[i].a; const int ny = p2g[i].b; const int nz = p2g[i].t; // pre-screen whether this atom will ever come within // reach of the data segement this thread is updating. if ( ((nz+nlower-nzlo_out)*ix*iy >= jto) || ((nz+nupper-nzlo_out+1)*ix*iy < jfrom) ) continue; if (type[i] == typeO) { find_M_thr(i,iH1,iH2,xM); } else { xM = x[i]; } const FFT_SCALAR dx = nx+shiftone - (xM.x-boxlox)*delxinv; const FFT_SCALAR dy = ny+shiftone - (xM.y-boxloy)*delyinv; const FFT_SCALAR dz = nz+shiftone - (xM.z-boxloz)*delzinv; compute_rho1d_thr(r1d,dx,dy,dz); const FFT_SCALAR z0 = delvolinv * q[i]; for (int n = nlower; n <= nupper; ++n) { const int jn = (nz+n-nzlo_out)*ix*iy; const FFT_SCALAR y0 = z0*r1d[2][n]; for (int m = nlower; m <= nupper; ++m) { const int jm = jn+(ny+m-nylo_out)*ix; const FFT_SCALAR x0 = y0*r1d[1][m]; for (int l = nlower; l <= nupper; ++l) { const int jl = jm+nx+l-nxlo_out; // make sure each thread only updates // "his" elements of the density grid if (jl >= jto) break; if (jl < jfrom) continue; d[jl] += x0*r1d[0][l]; } } } } thr->timer(Timer::KSPACE); } } /* ---------------------------------------------------------------------- interpolate from grid to get electric field & force on my particles for ik ------------------------------------------------------------------------- */ void PPPMTIP4PCGOMP::fieldforce_ik() { const int nthreads = comm->nthreads; // no local atoms => nothing to do if (num_charged == 0) return; // loop over my charges, interpolate electric field from nearby grid points // (nx,ny,nz) = global coords of grid pt to "lower left" of charge // (dx,dy,dz) = distance to "lower left" grid pt // (mx,my,mz) = global coords of moving stencil pt // ek = 3 components of E-field on particle const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0]; const double * _noalias const q = atom->q; const int3_t * _noalias const p2g = (int3_t *) part2grid[0]; const int * _noalias const type = atom->type; const double qqrd2e = force->qqrd2e; const double boxlox = boxlo[0]; const double boxloy = boxlo[1]; const double boxloz = boxlo[2]; #if defined(_OPENMP) #pragma omp parallel default(none) #endif { dbl3_t xM; FFT_SCALAR x0,y0,z0,ekx,eky,ekz; int i,j,ifrom,ito,tid,iH1,iH2,l,m,n,mx,my,mz; loop_setup_thr(ifrom,ito,tid,num_charged,nthreads); // get per thread data ThrData *thr = fix->get_thr(tid); thr->timer(Timer::START); dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0]; FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d()); for (j = ifrom; j < ito; ++j) { i = is_charged[j]; if (type[i] == typeO) { find_M_thr(i,iH1,iH2,xM); } else xM = x[i]; const int nx = p2g[i].a; const int ny = p2g[i].b; const int nz = p2g[i].t; const FFT_SCALAR dx = nx+shiftone - (xM.x-boxlox)*delxinv; const FFT_SCALAR dy = ny+shiftone - (xM.y-boxloy)*delyinv; const FFT_SCALAR dz = nz+shiftone - (xM.z-boxloz)*delzinv; compute_rho1d_thr(r1d,dx,dy,dz); ekx = eky = ekz = ZEROF; for (n = nlower; n <= nupper; n++) { mz = n+nz; z0 = r1d[2][n]; for (m = nlower; m <= nupper; m++) { my = m+ny; y0 = z0*r1d[1][m]; for (l = nlower; l <= nupper; l++) { mx = l+nx; x0 = y0*r1d[0][l]; ekx -= x0*vdx_brick[mz][my][mx]; eky -= x0*vdy_brick[mz][my][mx]; ekz -= x0*vdz_brick[mz][my][mx]; } } } // convert E-field to force const double qfactor = qqrd2e * scale * q[i]; if (type[i] != typeO) { f[i].x += qfactor*ekx; f[i].y += qfactor*eky; if (slabflag != 2) f[i].z += qfactor*ekz; } else { const double fx = qfactor * ekx; const double fy = qfactor * eky; const double fz = qfactor * ekz; f[i].x += fx*(1 - alpha); f[i].y += fy*(1 - alpha); if (slabflag != 2) f[i].z += fz*(1 - alpha); f[iH1].x += 0.5*alpha*fx; f[iH1].y += 0.5*alpha*fy; if (slabflag != 2) f[iH1].z += 0.5*alpha*fz; f[iH2].x += 0.5*alpha*fx; f[iH2].y += 0.5*alpha*fy; if (slabflag != 2) f[iH2].z += 0.5*alpha*fz; } } thr->timer(Timer::KSPACE); } // end of parallel region } /* ---------------------------------------------------------------------- interpolate from grid to get electric field & force on my particles for ad ------------------------------------------------------------------------- */ void PPPMTIP4PCGOMP::fieldforce_ad() { const int nthreads = comm->nthreads; // no local atoms => nothing to do if (num_charged == 0) return; const double *prd = (triclinic == 0) ? domain->prd : domain->prd_lamda; const double hx_inv = nx_pppm/prd[0]; const double hy_inv = ny_pppm/prd[1]; const double hz_inv = nz_pppm/prd[2]; // loop over my charges, interpolate electric field from nearby grid points // (nx,ny,nz) = global coords of grid pt to "lower left" of charge // (dx,dy,dz) = distance to "lower left" grid pt // (mx,my,mz) = global coords of moving stencil pt // ek = 3 components of E-field on particle const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0]; const double * _noalias const q = atom->q; const int3_t * _noalias const p2g = (int3_t *) part2grid[0]; const int * _noalias const type = atom->type; const double qqrd2e = force->qqrd2e; const double boxlox = boxlo[0]; const double boxloy = boxlo[1]; const double boxloz = boxlo[2]; #if defined(_OPENMP) #pragma omp parallel default(none) #endif { double s1,s2,s3,sf; dbl3_t xM; FFT_SCALAR ekx,eky,ekz; int i,j,ifrom,ito,tid,iH1,iH2,l,m,n,mx,my,mz; loop_setup_thr(ifrom,ito,tid,num_charged,nthreads); // get per thread data ThrData *thr = fix->get_thr(tid); thr->timer(Timer::START); dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0]; FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d()); FFT_SCALAR * const * const d1d = static_cast<FFT_SCALAR **>(thr->get_drho1d()); for (j = ifrom; j < ito; ++j) { i = is_charged[j]; if (type[i] == typeO) { find_M_thr(i,iH1,iH2,xM); } else xM = x[i]; const int nx = p2g[i].a; const int ny = p2g[i].b; const int nz = p2g[i].t; const FFT_SCALAR dx = nx+shiftone - (xM.x-boxlox)*delxinv; const FFT_SCALAR dy = ny+shiftone - (xM.y-boxloy)*delyinv; const FFT_SCALAR dz = nz+shiftone - (xM.z-boxloz)*delzinv; compute_rho1d_thr(r1d,dx,dy,dz); compute_drho1d_thr(d1d,dx,dy,dz); ekx = eky = ekz = ZEROF; for (n = nlower; n <= nupper; n++) { mz = n+nz; for (m = nlower; m <= nupper; m++) { my = m+ny; for (l = nlower; l <= nupper; l++) { mx = l+nx; ekx += d1d[0][l]*r1d[1][m]*r1d[2][n]*u_brick[mz][my][mx]; eky += r1d[0][l]*d1d[1][m]*r1d[2][n]*u_brick[mz][my][mx]; ekz += r1d[0][l]*r1d[1][m]*d1d[2][n]*u_brick[mz][my][mx]; } } } ekx *= hx_inv; eky *= hy_inv; ekz *= hz_inv; // convert E-field to force and substract self forces const double qi = q[i]; const double qfactor = qqrd2e * scale * qi; s1 = x[i].x*hx_inv; sf = sf_coeff[0]*sin(MY_2PI*s1); sf += sf_coeff[1]*sin(MY_4PI*s1); sf *= 2.0*qi; const double fx = qfactor*(ekx - sf); s2 = x[i].y*hy_inv; sf = sf_coeff[2]*sin(MY_2PI*s2); sf += sf_coeff[3]*sin(MY_4PI*s2); sf *= 2.0*qi; const double fy = qfactor*(eky - sf); s3 = x[i].z*hz_inv; sf = sf_coeff[4]*sin(MY_2PI*s3); sf += sf_coeff[5]*sin(MY_4PI*s3); sf *= 2.0*qi; const double fz = qfactor*(ekz - sf); if (type[i] != typeO) { f[i].x += fx; f[i].y += fy; if (slabflag != 2) f[i].z += fz; } else { f[i].x += fx*(1 - alpha); f[i].y += fy*(1 - alpha); if (slabflag != 2) f[i].z += fz*(1 - alpha); f[iH1].x += 0.5*alpha*fx; f[iH1].y += 0.5*alpha*fy; if (slabflag != 2) f[iH1].z += 0.5*alpha*fz; f[iH2].x += 0.5*alpha*fx; f[iH2].y += 0.5*alpha*fy; if (slabflag != 2) f[iH2].z += 0.5*alpha*fz; } } thr->timer(Timer::KSPACE); } // end of parallel region } /* ---------------------------------------------------------------------- find 2 H atoms bonded to O atom i compute position xM of fictitious charge site for O atom also return local indices iH1,iH2 of H atoms ------------------------------------------------------------------------- */ void PPPMTIP4PCGOMP::find_M_thr(int i, int &iH1, int &iH2, dbl3_t &xM) { iH1 = atom->map(atom->tag[i] + 1); iH2 = atom->map(atom->tag[i] + 2); if (iH1 == -1 || iH2 == -1) error->one(FLERR,"TIP4P hydrogen is missing"); if (atom->type[iH1] != typeH || atom->type[iH2] != typeH) error->one(FLERR,"TIP4P hydrogen has incorrect atom type"); const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0]; double delx1 = x[iH1].x - x[i].x; double dely1 = x[iH1].y - x[i].y; double delz1 = x[iH1].z - x[i].z; domain->minimum_image(delx1,dely1,delz1); double delx2 = x[iH2].x - x[i].x; double dely2 = x[iH2].y - x[i].y; double delz2 = x[iH2].z - x[i].z; domain->minimum_image(delx2,dely2,delz2); xM.x = x[i].x + alpha * 0.5 * (delx1 + delx2); xM.y = x[i].y + alpha * 0.5 * (dely1 + dely2); xM.z = x[i].z + alpha * 0.5 * (delz1 + delz2); } /* ---------------------------------------------------------------------- charge assignment into rho1d dx,dy,dz = distance of particle from "lower left" grid point ------------------------------------------------------------------------- */ void PPPMTIP4PCGOMP::compute_rho1d_thr(FFT_SCALAR * const * const r1d, const FFT_SCALAR &dx, const FFT_SCALAR &dy, const FFT_SCALAR &dz) { int k,l; FFT_SCALAR r1,r2,r3; for (k = (1-order)/2; k <= order/2; k++) { r1 = r2 = r3 = ZEROF; for (l = order-1; l >= 0; l--) { r1 = rho_coeff[l][k] + r1*dx; r2 = rho_coeff[l][k] + r2*dy; r3 = rho_coeff[l][k] + r3*dz; } r1d[0][k] = r1; r1d[1][k] = r2; r1d[2][k] = r3; } } /* ---------------------------------------------------------------------- charge assignment into drho1d dx,dy,dz = distance of particle from "lower left" grid point ------------------------------------------------------------------------- */ void PPPMTIP4PCGOMP::compute_drho1d_thr(FFT_SCALAR * const * const d1d, const FFT_SCALAR &dx, const FFT_SCALAR &dy, const FFT_SCALAR &dz) { int k,l; FFT_SCALAR r1,r2,r3; for (k = (1-order)/2; k <= order/2; k++) { r1 = r2 = r3 = ZEROF; for (l = order-2; l >= 0; l--) { r1 = drho_coeff[l][k] + r1*dx; r2 = drho_coeff[l][k] + r2*dy; r3 = drho_coeff[l][k] + r3*dz; } d1d[0][k] = r1; d1d[1][k] = r2; d1d[2][k] = r3; } }