diff --git a/src/Depend.sh b/src/Depend.sh index 3786172d6..632aa4a3e 100644 --- a/src/Depend.sh +++ b/src/Depend.sh @@ -1,110 +1,113 @@ # Depend.sh = Install/unInstall files due to package dependencies # this script is invoked after any package is installed/uninstalled # all parent/child package dependencies should be listed below # parent package = has files that files in another package derive from # child package = has files that derive from files in another package # update child packages that depend on the parent, # but only if the child package is already installed # this is necessary to insure the child package installs # only child files whose parent package files are now installed # decisions on (un)installing individual child files are made by # the Install.sh script in the child package # depend function: arg = child-package # checks if child-package is installed, if not just return # otherwise invoke update of child package via its Install.sh depend () { cd $1 installed=0 for file in *.cpp *.h; do if (test -e ../$file) then installed=1 fi done cd .. if (test $installed = 0) then return fi echo " updating package $1" if (test -e $1/Install.sh) then cd $1; /bin/sh Install.sh 2; cd .. else cd $1; /bin/sh ../Install.sh 2; cd .. fi } # add one if statement per parent package # add one depend() call per child package that depends on that parent if (test $1 = "ASPHERE") then depend GPU depend USER-OMP + depend USER-INTEL fi if (test $1 = "CLASS2") then depend GPU depend USER-CUDA depend USER-OMP fi if (test $1 = "COLLOID") then depend GPU depend USER-OMP fi if (test $1 = "DIPOLE") then depend USER-MISC depend USER-OMP fi if (test $1 = "GRANULAR") then depend USER-CUDA depend USER-OMP fi if (test $1 = "KSPACE") then depend GPU depend OPT depend USER-CUDA depend USER-OMP + depend USER-INTEL depend USER-PHONON fi if (test $1 = "MANYBODY") then depend GPU depend OPT depend USER-CUDA depend USER-MISC depend USER-OMP fi if (test $1 = "MOLECULE") then depend GPU depend USER-CUDA depend USER-MISC depend USER-OMP + depend USER-INTEL fi if (test $1 = "PERI") then depend USER-OMP fi if (test $1 = "RIGID") then depend USER-OMP fi if (test $1 = "USER-CG-CMM") then depend GPU depend USER-CUDA depend USER-OMP fi if (test $1 = "USER-MISC") then depend GPU depend USER-OMP fi diff --git a/src/GRANULAR/pair_gran_hooke_history.cpp b/src/GRANULAR/pair_gran_hooke_history.cpp index 77d2f0d70..7f98cfa4e 100644 --- a/src/GRANULAR/pair_gran_hooke_history.cpp +++ b/src/GRANULAR/pair_gran_hooke_history.cpp @@ -1,808 +1,806 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing authors: Leo Silbert (SNL), Gary Grest (SNL) ------------------------------------------------------------------------- */ #include "math.h" #include "stdio.h" #include "stdlib.h" #include "string.h" #include "pair_gran_hooke_history.h" #include "atom.h" #include "atom_vec.h" #include "domain.h" #include "force.h" #include "update.h" #include "modify.h" #include "fix.h" #include "fix_shear_history.h" #include "comm.h" #include "neighbor.h" #include "neigh_list.h" #include "neigh_request.h" #include "memory.h" #include "error.h" using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairGranHookeHistory::PairGranHookeHistory(LAMMPS *lmp) : Pair(lmp) { single_enable = 1; no_virial_fdotr_compute = 1; history = 1; fix_history = NULL; - suffix = NULL; single_extra = 4; svector = new double[4]; computeflag = 0; neighprev = 0; nmax = 0; mass_rigid = NULL; // set comm size needed by this Pair if used with fix rigid comm_forward = 1; } /* ---------------------------------------------------------------------- */ PairGranHookeHistory::~PairGranHookeHistory() { delete [] svector; if (fix_history) modify->delete_fix("SHEAR_HISTORY"); - if (suffix) delete[] suffix; if (allocated) { memory->destroy(setflag); memory->destroy(cutsq); delete [] onerad_dynamic; delete [] onerad_frozen; delete [] maxrad_dynamic; delete [] maxrad_frozen; } memory->destroy(mass_rigid); } /* ---------------------------------------------------------------------- */ void PairGranHookeHistory::compute(int eflag, int vflag) { int i,j,ii,jj,inum,jnum; double xtmp,ytmp,ztmp,delx,dely,delz,fx,fy,fz; double radi,radj,radsum,rsq,r,rinv,rsqinv; double vr1,vr2,vr3,vnnr,vn1,vn2,vn3,vt1,vt2,vt3; double wr1,wr2,wr3; double vtr1,vtr2,vtr3,vrel; double mi,mj,meff,damp,ccel,tor1,tor2,tor3; double fn,fs,fs1,fs2,fs3; double shrmag,rsht; int *ilist,*jlist,*numneigh,**firstneigh; int *touch,**firsttouch; double *shear,*allshear,**firstshear; if (eflag || vflag) ev_setup(eflag,vflag); else evflag = vflag_fdotr = 0; computeflag = 1; int shearupdate = 1; if (update->setupflag) shearupdate = 0; // update rigid body info for owned & ghost atoms if using FixRigid masses // body[i] = which body atom I is in, -1 if none // mass_body = mass of each rigid body if (fix_rigid && neighbor->ago == 0) { int tmp; int *body = (int *) fix_rigid->extract("body",tmp); double *mass_body = (double *) fix_rigid->extract("masstotal",tmp); if (atom->nmax > nmax) { memory->destroy(mass_rigid); nmax = atom->nmax; memory->create(mass_rigid,nmax,"pair:mass_rigid"); } int nlocal = atom->nlocal; for (i = 0; i < nlocal; i++) if (body[i] >= 0) mass_rigid[i] = mass_body[body[i]]; else mass_rigid[i] = 0.0; comm->forward_comm_pair(this); } double **x = atom->x; double **v = atom->v; double **f = atom->f; double **omega = atom->omega; double **torque = atom->torque; double *radius = atom->radius; double *rmass = atom->rmass; double *mass = atom->mass; int *type = atom->type; int *mask = atom->mask; int nlocal = atom->nlocal; inum = list->inum; ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; firsttouch = listgranhistory->firstneigh; firstshear = listgranhistory->firstdouble; // loop over neighbors of my atoms for (ii = 0; ii < inum; ii++) { i = ilist[ii]; xtmp = x[i][0]; ytmp = x[i][1]; ztmp = x[i][2]; radi = radius[i]; touch = firsttouch[i]; allshear = firstshear[i]; jlist = firstneigh[i]; jnum = numneigh[i]; for (jj = 0; jj < jnum; jj++) { j = jlist[jj]; j &= NEIGHMASK; delx = xtmp - x[j][0]; dely = ytmp - x[j][1]; delz = ztmp - x[j][2]; rsq = delx*delx + dely*dely + delz*delz; radj = radius[j]; radsum = radi + radj; if (rsq >= radsum*radsum) { // unset non-touching neighbors touch[jj] = 0; shear = &allshear[3*jj]; shear[0] = 0.0; shear[1] = 0.0; shear[2] = 0.0; } else { r = sqrt(rsq); rinv = 1.0/r; rsqinv = 1.0/rsq; // relative translational velocity vr1 = v[i][0] - v[j][0]; vr2 = v[i][1] - v[j][1]; vr3 = v[i][2] - v[j][2]; // normal component vnnr = vr1*delx + vr2*dely + vr3*delz; vn1 = delx*vnnr * rsqinv; vn2 = dely*vnnr * rsqinv; vn3 = delz*vnnr * rsqinv; // tangential component vt1 = vr1 - vn1; vt2 = vr2 - vn2; vt3 = vr3 - vn3; // relative rotational velocity wr1 = (radi*omega[i][0] + radj*omega[j][0]) * rinv; wr2 = (radi*omega[i][1] + radj*omega[j][1]) * rinv; wr3 = (radi*omega[i][2] + radj*omega[j][2]) * rinv; // meff = effective mass of pair of particles // if I or J part of rigid body, use body mass // if I or J is frozen, meff is other particle if (rmass) { mi = rmass[i]; mj = rmass[j]; } else { mi = mass[type[i]]; mj = mass[type[j]]; } if (fix_rigid) { if (mass_rigid[i] > 0.0) mi = mass_rigid[i]; if (mass_rigid[j] > 0.0) mj = mass_rigid[j]; } meff = mi*mj / (mi+mj); if (mask[i] & freeze_group_bit) meff = mj; if (mask[j] & freeze_group_bit) meff = mi; // normal forces = Hookian contact + normal velocity damping damp = meff*gamman*vnnr*rsqinv; ccel = kn*(radsum-r)*rinv - damp; // relative velocities vtr1 = vt1 - (delz*wr2-dely*wr3); vtr2 = vt2 - (delx*wr3-delz*wr1); vtr3 = vt3 - (dely*wr1-delx*wr2); vrel = vtr1*vtr1 + vtr2*vtr2 + vtr3*vtr3; vrel = sqrt(vrel); // shear history effects touch[jj] = 1; shear = &allshear[3*jj]; if (shearupdate) { shear[0] += vtr1*dt; shear[1] += vtr2*dt; shear[2] += vtr3*dt; } shrmag = sqrt(shear[0]*shear[0] + shear[1]*shear[1] + shear[2]*shear[2]); // rotate shear displacements rsht = shear[0]*delx + shear[1]*dely + shear[2]*delz; rsht *= rsqinv; if (shearupdate) { shear[0] -= rsht*delx; shear[1] -= rsht*dely; shear[2] -= rsht*delz; } // tangential forces = shear + tangential velocity damping fs1 = - (kt*shear[0] + meff*gammat*vtr1); fs2 = - (kt*shear[1] + meff*gammat*vtr2); fs3 = - (kt*shear[2] + meff*gammat*vtr3); // rescale frictional displacements and forces if needed fs = sqrt(fs1*fs1 + fs2*fs2 + fs3*fs3); fn = xmu * fabs(ccel*r); if (fs > fn) { if (shrmag != 0.0) { shear[0] = (fn/fs) * (shear[0] + meff*gammat*vtr1/kt) - meff*gammat*vtr1/kt; shear[1] = (fn/fs) * (shear[1] + meff*gammat*vtr2/kt) - meff*gammat*vtr2/kt; shear[2] = (fn/fs) * (shear[2] + meff*gammat*vtr3/kt) - meff*gammat*vtr3/kt; fs1 *= fn/fs; fs2 *= fn/fs; fs3 *= fn/fs; } else fs1 = fs2 = fs3 = 0.0; } // forces & torques fx = delx*ccel + fs1; fy = dely*ccel + fs2; fz = delz*ccel + fs3; f[i][0] += fx; f[i][1] += fy; f[i][2] += fz; tor1 = rinv * (dely*fs3 - delz*fs2); tor2 = rinv * (delz*fs1 - delx*fs3); tor3 = rinv * (delx*fs2 - dely*fs1); torque[i][0] -= radi*tor1; torque[i][1] -= radi*tor2; torque[i][2] -= radi*tor3; if (j < nlocal) { f[j][0] -= fx; f[j][1] -= fy; f[j][2] -= fz; torque[j][0] -= radj*tor1; torque[j][1] -= radj*tor2; torque[j][2] -= radj*tor3; } if (evflag) ev_tally_xyz(i,j,nlocal,0, 0.0,0.0,fx,fy,fz,delx,dely,delz); } } } } /* ---------------------------------------------------------------------- allocate all arrays ------------------------------------------------------------------------- */ void PairGranHookeHistory::allocate() { allocated = 1; int n = atom->ntypes; memory->create(setflag,n+1,n+1,"pair:setflag"); for (int i = 1; i <= n; i++) for (int j = i; j <= n; j++) setflag[i][j] = 0; memory->create(cutsq,n+1,n+1,"pair:cutsq"); onerad_dynamic = new double[n+1]; onerad_frozen = new double[n+1]; maxrad_dynamic = new double[n+1]; maxrad_frozen = new double[n+1]; } /* ---------------------------------------------------------------------- global settings ------------------------------------------------------------------------- */ void PairGranHookeHistory::settings(int narg, char **arg) { if (narg != 6) error->all(FLERR,"Illegal pair_style command"); kn = force->numeric(FLERR,arg[0]); if (strcmp(arg[1],"NULL") == 0) kt = kn * 2.0/7.0; else kt = force->numeric(FLERR,arg[1]); gamman = force->numeric(FLERR,arg[2]); if (strcmp(arg[3],"NULL") == 0) gammat = 0.5 * gamman; else gammat = force->numeric(FLERR,arg[3]); xmu = force->numeric(FLERR,arg[4]); dampflag = force->inumeric(FLERR,arg[5]); if (dampflag == 0) gammat = 0.0; if (kn < 0.0 || kt < 0.0 || gamman < 0.0 || gammat < 0.0 || xmu < 0.0 || xmu > 10000.0 || dampflag < 0 || dampflag > 1) error->all(FLERR,"Illegal pair_style command"); } /* ---------------------------------------------------------------------- set coeffs for one or more type pairs ------------------------------------------------------------------------- */ void PairGranHookeHistory::coeff(int narg, char **arg) { if (narg > 2) error->all(FLERR,"Incorrect args for pair coefficients"); if (!allocated) allocate(); int ilo,ihi,jlo,jhi; force->bounds(arg[0],atom->ntypes,ilo,ihi); force->bounds(arg[1],atom->ntypes,jlo,jhi); int count = 0; for (int i = ilo; i <= ihi; i++) { for (int j = MAX(jlo,i); j <= jhi; j++) { setflag[i][j] = 1; count++; } } if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients"); } /* ---------------------------------------------------------------------- init specific to this pair style ------------------------------------------------------------------------- */ void PairGranHookeHistory::init_style() { int i; // error and warning checks if (!atom->sphere_flag) error->all(FLERR,"Pair granular requires atom style sphere"); if (comm->ghost_velocity == 0) error->all(FLERR,"Pair granular requires ghost atoms store velocity"); // need a granular neigh list and optionally a granular history neigh list int irequest = neighbor->request(this); neighbor->requests[irequest]->half = 0; neighbor->requests[irequest]->gran = 1; if (history) { irequest = neighbor->request(this); neighbor->requests[irequest]->id = 1; neighbor->requests[irequest]->half = 0; neighbor->requests[irequest]->granhistory = 1; neighbor->requests[irequest]->dnum = 3; } dt = update->dt; // if shear history is stored: // check if newton flag is valid // if first init, create Fix needed for storing shear history if (history && force->newton_pair == 1) error->all(FLERR, "Pair granular with shear history requires newton pair off"); if (history && fix_history == NULL) { char **fixarg = new char*[3]; fixarg[0] = (char *) "SHEAR_HISTORY"; fixarg[1] = (char *) "all"; fixarg[2] = (char *) "SHEAR_HISTORY"; - modify->add_fix(3,fixarg,suffix); + modify->add_fix(3,fixarg,1); delete [] fixarg; fix_history = (FixShearHistory *) modify->fix[modify->nfix-1]; fix_history->pair = this; } // check for FixFreeze and set freeze_group_bit for (i = 0; i < modify->nfix; i++) if (strcmp(modify->fix[i]->style,"freeze") == 0) break; if (i < modify->nfix) freeze_group_bit = modify->fix[i]->groupbit; else freeze_group_bit = 0; // check for FixRigid so can extract rigid body masses fix_rigid = NULL; for (i = 0; i < modify->nfix; i++) if (modify->fix[i]->rigid_flag) break; if (i < modify->nfix) fix_rigid = modify->fix[i]; // check for FixPour and FixDeposit so can extract particle radii int ipour; for (ipour = 0; ipour < modify->nfix; ipour++) if (strcmp(modify->fix[ipour]->style,"pour") == 0) break; if (ipour == modify->nfix) ipour = -1; int idep; for (idep = 0; idep < modify->nfix; idep++) if (strcmp(modify->fix[idep]->style,"deposit") == 0) break; if (idep == modify->nfix) idep = -1; // set maxrad_dynamic and maxrad_frozen for each type // include future FixPour and FixDeposit particles as dynamic int itype; for (i = 1; i <= atom->ntypes; i++) { onerad_dynamic[i] = onerad_frozen[i] = 0.0; if (ipour >= 0) { itype = i; onerad_dynamic[i] = *((double *) modify->fix[ipour]->extract("radius",itype)); } if (idep >= 0) { itype = i; onerad_dynamic[i] = *((double *) modify->fix[idep]->extract("radius",itype)); } } double *radius = atom->radius; int *mask = atom->mask; int *type = atom->type; int nlocal = atom->nlocal; for (i = 0; i < nlocal; i++) if (mask[i] & freeze_group_bit) onerad_frozen[type[i]] = MAX(onerad_frozen[type[i]],radius[i]); else onerad_dynamic[type[i]] = MAX(onerad_dynamic[type[i]],radius[i]); MPI_Allreduce(&onerad_dynamic[1],&maxrad_dynamic[1],atom->ntypes, MPI_DOUBLE,MPI_MAX,world); MPI_Allreduce(&onerad_frozen[1],&maxrad_frozen[1],atom->ntypes, MPI_DOUBLE,MPI_MAX,world); } /* ---------------------------------------------------------------------- neighbor callback to inform pair style of neighbor list to use optional granular history list ------------------------------------------------------------------------- */ void PairGranHookeHistory::init_list(int id, NeighList *ptr) { if (id == 0) list = ptr; else if (id == 1) listgranhistory = ptr; } /* ---------------------------------------------------------------------- init for one type pair i,j and corresponding j,i ------------------------------------------------------------------------- */ double PairGranHookeHistory::init_one(int i, int j) { if (!allocated) allocate(); // cutoff = sum of max I,J radii for // dynamic/dynamic & dynamic/frozen interactions, but not frozen/frozen double cutoff = maxrad_dynamic[i]+maxrad_dynamic[j]; cutoff = MAX(cutoff,maxrad_frozen[i]+maxrad_dynamic[j]); cutoff = MAX(cutoff,maxrad_dynamic[i]+maxrad_frozen[j]); return cutoff; } /* ---------------------------------------------------------------------- proc 0 writes to restart file ------------------------------------------------------------------------- */ void PairGranHookeHistory::write_restart(FILE *fp) { write_restart_settings(fp); int i,j; for (i = 1; i <= atom->ntypes; i++) for (j = i; j <= atom->ntypes; j++) fwrite(&setflag[i][j],sizeof(int),1,fp); } /* ---------------------------------------------------------------------- proc 0 reads from restart file, bcasts ------------------------------------------------------------------------- */ void PairGranHookeHistory::read_restart(FILE *fp) { read_restart_settings(fp); allocate(); int i,j; int me = comm->me; for (i = 1; i <= atom->ntypes; i++) for (j = i; j <= atom->ntypes; j++) { if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp); MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world); } } /* ---------------------------------------------------------------------- proc 0 writes to restart file ------------------------------------------------------------------------- */ void PairGranHookeHistory::write_restart_settings(FILE *fp) { fwrite(&kn,sizeof(double),1,fp); fwrite(&kt,sizeof(double),1,fp); fwrite(&gamman,sizeof(double),1,fp); fwrite(&gammat,sizeof(double),1,fp); fwrite(&xmu,sizeof(double),1,fp); fwrite(&dampflag,sizeof(int),1,fp); } /* ---------------------------------------------------------------------- proc 0 reads from restart file, bcasts ------------------------------------------------------------------------- */ void PairGranHookeHistory::read_restart_settings(FILE *fp) { if (comm->me == 0) { fread(&kn,sizeof(double),1,fp); fread(&kt,sizeof(double),1,fp); fread(&gamman,sizeof(double),1,fp); fread(&gammat,sizeof(double),1,fp); fread(&xmu,sizeof(double),1,fp); fread(&dampflag,sizeof(int),1,fp); } MPI_Bcast(&kn,1,MPI_DOUBLE,0,world); MPI_Bcast(&kt,1,MPI_DOUBLE,0,world); MPI_Bcast(&gamman,1,MPI_DOUBLE,0,world); MPI_Bcast(&gammat,1,MPI_DOUBLE,0,world); MPI_Bcast(&xmu,1,MPI_DOUBLE,0,world); MPI_Bcast(&dampflag,1,MPI_INT,0,world); } /* ---------------------------------------------------------------------- */ void PairGranHookeHistory::reset_dt() { dt = update->dt; } /* ---------------------------------------------------------------------- */ double PairGranHookeHistory::single(int i, int j, int itype, int jtype, double rsq, double factor_coul, double factor_lj, double &fforce) { double radi,radj,radsum; double r,rinv,rsqinv,delx,dely,delz; double vr1,vr2,vr3,vnnr,vn1,vn2,vn3,vt1,vt2,vt3,wr1,wr2,wr3; double mi,mj,meff,damp,ccel; double vtr1,vtr2,vtr3,vrel,shrmag,rsht; double fs1,fs2,fs3,fs,fn; double *radius = atom->radius; radi = radius[i]; radj = radius[j]; radsum = radi + radj; if (rsq >= radsum*radsum) { fforce = 0.0; svector[0] = svector[1] = svector[2] = svector[3] = 0.0; return 0.0; } r = sqrt(rsq); rinv = 1.0/r; rsqinv = 1.0/rsq; // relative translational velocity double **v = atom->v; vr1 = v[i][0] - v[j][0]; vr2 = v[i][1] - v[j][1]; vr3 = v[i][2] - v[j][2]; // normal component double **x = atom->x; delx = x[i][0] - x[j][0]; dely = x[i][1] - x[j][1]; delz = x[i][2] - x[j][2]; vnnr = vr1*delx + vr2*dely + vr3*delz; vn1 = delx*vnnr * rsqinv; vn2 = dely*vnnr * rsqinv; vn3 = delz*vnnr * rsqinv; // tangential component vt1 = vr1 - vn1; vt2 = vr2 - vn2; vt3 = vr3 - vn3; // relative rotational velocity double **omega = atom->omega; wr1 = (radi*omega[i][0] + radj*omega[j][0]) * rinv; wr2 = (radi*omega[i][1] + radj*omega[j][1]) * rinv; wr3 = (radi*omega[i][2] + radj*omega[j][2]) * rinv; // meff = effective mass of pair of particles // if I or J part of rigid body, use body mass // if I or J is frozen, meff is other particle double *rmass = atom->rmass; double *mass = atom->mass; int *type = atom->type; int *mask = atom->mask; if (rmass) { mi = rmass[i]; mj = rmass[j]; } else { mi = mass[type[i]]; mj = mass[type[j]]; } if (fix_rigid) { // NOTE: insure mass_rigid is current for owned+ghost atoms? if (mass_rigid[i] > 0.0) mi = mass_rigid[i]; if (mass_rigid[j] > 0.0) mj = mass_rigid[j]; } meff = mi*mj / (mi+mj); if (mask[i] & freeze_group_bit) meff = mj; if (mask[j] & freeze_group_bit) meff = mi; // normal forces = Hookian contact + normal velocity damping damp = meff*gamman*vnnr*rsqinv; ccel = kn*(radsum-r)*rinv - damp; // relative velocities vtr1 = vt1 - (delz*wr2-dely*wr3); vtr2 = vt2 - (delx*wr3-delz*wr1); vtr3 = vt3 - (dely*wr1-delx*wr2); vrel = vtr1*vtr1 + vtr2*vtr2 + vtr3*vtr3; vrel = sqrt(vrel); // shear history effects // neighprev = index of found neigh on previous call // search entire jnum list of neighbors of I for neighbor J // start from neighprev, since will typically be next neighbor // reset neighprev to 0 as necessary int jnum = list->numneigh[i]; int *touch = list->listgranhistory->firstneigh[i]; double *allshear = list->listgranhistory->firstdouble[i]; for (int jj = 0; jj < jnum; jj++) { neighprev++; if (neighprev >= jnum) neighprev = 0; if (touch[neighprev] == j) break; } double *shear = &allshear[3*neighprev]; shrmag = sqrt(shear[0]*shear[0] + shear[1]*shear[1] + shear[2]*shear[2]); // rotate shear displacements rsht = shear[0]*delx + shear[1]*dely + shear[2]*delz; rsht *= rsqinv; // tangential forces = shear + tangential velocity damping fs1 = - (kt*shear[0] + meff*gammat*vtr1); fs2 = - (kt*shear[1] + meff*gammat*vtr2); fs3 = - (kt*shear[2] + meff*gammat*vtr3); // rescale frictional displacements and forces if needed fs = sqrt(fs1*fs1 + fs2*fs2 + fs3*fs3); fn = xmu * fabs(ccel*r); if (fs > fn) { if (shrmag != 0.0) { fs1 *= fn/fs; fs2 *= fn/fs; fs3 *= fn/fs; fs *= fn/fs; } else fs1 = fs2 = fs3 = fs = 0.0; } // set all forces and return no energy fforce = ccel; svector[0] = fs1; svector[1] = fs2; svector[2] = fs3; svector[3] = fs; return 0.0; } /* ---------------------------------------------------------------------- */ int PairGranHookeHistory::pack_forward_comm(int n, int *list, double *buf, int pbc_flag, int *pbc) { int i,j,m; m = 0; for (i = 0; i < n; i++) { j = list[i]; buf[m++] = mass_rigid[j]; } return m; } /* ---------------------------------------------------------------------- */ void PairGranHookeHistory::unpack_forward_comm(int n, int first, double *buf) { int i,m,last; m = 0; last = first + n; for (i = first; i < last; i++) mass_rigid[i] = buf[m++]; } /* ---------------------------------------------------------------------- */ void *PairGranHookeHistory::extract(const char *str, int &dim) { dim = 0; if (strcmp(str,"computeflag") == 0) return (void *) &computeflag; return NULL; } /* ---------------------------------------------------------------------- memory usage of local atom-based arrays ------------------------------------------------------------------------- */ double PairGranHookeHistory::memory_usage() { double bytes = nmax * sizeof(double); return bytes; } diff --git a/src/GRANULAR/pair_gran_hooke_history.h b/src/GRANULAR/pair_gran_hooke_history.h index 4e2e51a4c..25762ca65 100644 --- a/src/GRANULAR/pair_gran_hooke_history.h +++ b/src/GRANULAR/pair_gran_hooke_history.h @@ -1,103 +1,102 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ #ifdef PAIR_CLASS PairStyle(gran/hooke/history,PairGranHookeHistory) #else #ifndef LMP_PAIR_GRAN_HOOKE_HISTORY_H #define LMP_PAIR_GRAN_HOOKE_HISTORY_H #include "pair.h" namespace LAMMPS_NS { class PairGranHookeHistory : public Pair { public: int computeflag; PairGranHookeHistory(class LAMMPS *); virtual ~PairGranHookeHistory(); virtual void compute(int, int); virtual void settings(int, char **); void coeff(int, char **); void init_style(); void init_list(int, class NeighList *); double init_one(int, int); void write_restart(FILE *); void read_restart(FILE *); void write_restart_settings(FILE *); void read_restart_settings(FILE *); void reset_dt(); virtual double single(int, int, int, int, double, double, double, double &); int pack_forward_comm(int, int *, double *, int, int *); void unpack_forward_comm(int, int, double *); void *extract(const char *, int &); double memory_usage(); protected: double kn,kt,gamman,gammat,xmu; int dampflag; double dt; int freeze_group_bit; int history; - char *suffix; int neighprev; double *onerad_dynamic,*onerad_frozen; double *maxrad_dynamic,*maxrad_frozen; class FixShearHistory *fix_history; // storage of rigid body masses for use in granular interactions class Fix *fix_rigid; // ptr to rigid body fix, NULL if none double *mass_rigid; // rigid mass for owned+ghost atoms int nmax; // allocated size of mass_rigid void allocate(); }; } #endif #endif /* ERROR/WARNING messages: E: Illegal ... command Self-explanatory. Check the input script syntax and compare to the documentation for the command. You can use -echo screen as a command-line option when running LAMMPS to see the offending line. E: Incorrect args for pair coefficients Self-explanatory. Check the input script or data file. E: Pair granular requires atom style sphere Self-explanatory. E: Pair granular requires ghost atoms store velocity Use the comm_modify vel yes command to enable this. E: Pair granular with shear history requires newton pair off This is a current restriction of the implementation of pair granular styles with history. */ diff --git a/src/KSPACE/fix_tune_kspace.cpp b/src/KSPACE/fix_tune_kspace.cpp index 9abfc9d1b..e3d5a5b5c 100644 --- a/src/KSPACE/fix_tune_kspace.cpp +++ b/src/KSPACE/fix_tune_kspace.cpp @@ -1,542 +1,543 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing author: Paul Crozier (SNL) ------------------------------------------------------------------------- */ #include "string.h" #include "stdlib.h" #include "fix_tune_kspace.h" #include "update.h" #include "domain.h" #include "atom.h" #include "comm.h" #include "force.h" #include "kspace.h" #include "pair.h" #include "error.h" #include "memory.h" #include "timer.h" #include "neighbor.h" #include "modify.h" #include "compute.h" #include #include #include #define SWAP(a,b) {temp=(a);(a)=(b);(b)=temp;} #define SIGN(a,b) ((b) >= 0.0 ? fabs(a) : -fabs(a)) #define GOLD 1.618034 using namespace std; using namespace LAMMPS_NS; using namespace FixConst; /* ---------------------------------------------------------------------- */ FixTuneKspace::FixTuneKspace(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg) { if (narg < 3) error->all(FLERR,"Illegal fix tune/kspace command"); global_freq = 1; firststep = 0; niter = 0; niter_adjust_rcut = 0; keep_bracketing = true; first_brent_pass = true; converged = false; need_fd2_brent = false; ewald_time = pppm_time = msm_time = 0.0; // parse arguments nevery = force->inumeric(FLERR,arg[3]); // set up reneighboring force_reneighbor = 1; next_reneighbor = update->ntimestep + 1; } /* ---------------------------------------------------------------------- */ int FixTuneKspace::setmask() { int mask = 0; mask |= PRE_EXCHANGE; mask |= PRE_NEIGHBOR; return mask; } /* ---------------------------------------------------------------------- */ void FixTuneKspace::init() { if (!force->kspace) error->all(FLERR,"Cannot use fix tune/kspace without a kspace style"); if (!force->pair) error->all(FLERR,"Cannot use fix tune/kspace without a pair style"); double old_acc = force->kspace->accuracy/force->kspace->two_charge_force; char old_acc_str[12]; sprintf(old_acc_str,"%g",old_acc); strcpy(new_acc_str,old_acc_str); int itmp; double *p_cutoff = (double *) force->pair->extract("cut_coul",itmp); pair_cut_coul = *p_cutoff; } /* ---------------------------------------------------------------------- perform dynamic kspace parameter optimization ------------------------------------------------------------------------- */ void FixTuneKspace::pre_exchange() { if (!nevery) return; if (!force->kspace) return; if (!force->pair) return; if (next_reneighbor != update->ntimestep) return; next_reneighbor = update->ntimestep + nevery; double time = get_timing_info(); if (strcmp(force->kspace_style,"ewald") == 0) ewald_time = time; if (strcmp(force->kspace_style,"pppm") == 0) pppm_time = time; if (strcmp(force->kspace_style,"msm") == 0) msm_time = time; niter++; if (niter == 1) { // test Ewald store_old_kspace_settings(); strcpy(new_kspace_style,"ewald"); sprintf(new_pair_style,"%s/long",base_pair_style); update_pair_style(new_pair_style,pair_cut_coul); update_kspace_style(new_kspace_style,new_acc_str); } else if (niter == 2) { // test PPPM store_old_kspace_settings(); strcpy(new_kspace_style,"pppm"); sprintf(new_pair_style,"%s/long",base_pair_style); update_pair_style(new_pair_style,pair_cut_coul); update_kspace_style(new_kspace_style,new_acc_str); } else if (niter == 3) { // test MSM store_old_kspace_settings(); strcpy(new_kspace_style,"msm"); sprintf(new_pair_style,"%s/msm",base_pair_style); update_pair_style(new_pair_style,pair_cut_coul); update_kspace_style(new_kspace_style,new_acc_str); } else if (niter == 4) { store_old_kspace_settings(); cout << "ewald_time = " << ewald_time << endl; cout << "pppm_time = " << pppm_time << endl; cout << "msm_time = " << msm_time << endl; // switch to fastest one strcpy(new_kspace_style,"ewald"); sprintf(new_pair_style,"%s/long",base_pair_style); if (pppm_time < ewald_time && pppm_time < msm_time) strcpy(new_kspace_style,"pppm"); else if (msm_time < pppm_time && msm_time < ewald_time) { strcpy(new_kspace_style,"msm"); sprintf(new_pair_style,"%s/msm",base_pair_style); } update_pair_style(new_pair_style,pair_cut_coul); update_kspace_style(new_kspace_style,new_acc_str); } else { adjust_rcut(time); } last_spcpu = timer->elapsed(TIME_LOOP); } /* ---------------------------------------------------------------------- figure out CPU time per timestep since last time checked ------------------------------------------------------------------------- */ double FixTuneKspace::get_timing_info() { double dvalue; double new_cpu; int new_step = update->ntimestep; if (firststep == 0) { new_cpu = 0.0; dvalue = 0.0; firststep = 1; } else { new_cpu = timer->elapsed(TIME_LOOP); double cpu_diff = new_cpu - last_spcpu; int step_diff = new_step - last_step; if (step_diff > 0.0) dvalue = cpu_diff/step_diff; else dvalue = 0.0; } last_step = new_step; last_spcpu = new_cpu; return dvalue; } /* ---------------------------------------------------------------------- store old kspace settings: style, accuracy, order, etc ------------------------------------------------------------------------- */ void FixTuneKspace::store_old_kspace_settings() { int n = strlen(force->kspace_style) + 1; char *old_kspace_style = new char[n]; strcpy(old_kspace_style,force->kspace_style); strcpy(new_kspace_style,old_kspace_style); double old_acc = force->kspace->accuracy_relative; char old_acc_str[12]; sprintf(old_acc_str,"%g",old_acc); strcpy(new_pair_style,force->pair_style); strcpy(base_pair_style,force->pair_style); char *trunc; if ((trunc = strstr(base_pair_style, "/long")) != NULL) *trunc = '\0'; if ((trunc = strstr(base_pair_style, "/msm" )) != NULL) *trunc = '\0'; old_differentiation_flag = force->kspace->differentiation_flag; old_slabflag = force->kspace->slabflag; old_slab_volfactor = force->kspace->slab_volfactor; } /* ---------------------------------------------------------------------- update the pair style if necessary, preserving the settings ------------------------------------------------------------------------- */ -void FixTuneKspace::update_pair_style(char *new_pair_style, double pair_cut_coul) +void FixTuneKspace::update_pair_style(char *new_pair_style, + double pair_cut_coul) { int itmp; double *p_cutoff = (double *) force->pair->extract("cut_coul",itmp); *p_cutoff = pair_cut_coul; // check to see if we need to change pair styles if (strcmp(new_pair_style,force->pair_style) == 0) return; // create a temporary file to store current pair settings FILE *p_pair_settings_file; p_pair_settings_file = tmpfile(); force->pair->write_restart(p_pair_settings_file); rewind(p_pair_settings_file); cout << "Creating new pair style: " << new_pair_style << endl; // delete old pair style and create new one - force->create_pair(new_pair_style,lmp->suffix); + force->create_pair(new_pair_style,1); // restore current pair settings from temporary file force->pair->read_restart(p_pair_settings_file); double *pcutoff = (double *) force->pair->extract("cut_coul",itmp); double current_cutoff = *pcutoff; cout << "Coulomb cutoff for real space: " << current_cutoff << endl; // close temporary file fclose(p_pair_settings_file); } /* ---------------------------------------------------------------------- update the kspace style if necessary ------------------------------------------------------------------------- */ -void FixTuneKspace::update_kspace_style(char *new_kspace_style, char *new_acc_str) +void FixTuneKspace::update_kspace_style(char *new_kspace_style, + char *new_acc_str) { // create kspace style char string int narg = 2; char **arg; arg = NULL; int maxarg = 100; arg = (char **) memory->srealloc(arg,maxarg*sizeof(char *),"tune/kspace:arg"); int n = 12; arg[0] = new char[n]; strcpy(arg[0],new_kspace_style); arg[1] = new char[n]; strcpy(arg[1],new_acc_str); // delete old kspace style and create new one - force->create_kspace(narg,arg,lmp->suffix); - + force->create_kspace(narg,arg,1); force->kspace->differentiation_flag = old_differentiation_flag; force->kspace->slabflag = old_slabflag; force->kspace->slab_volfactor = old_slab_volfactor; // initialize new kspace style, pair style, molecular styles force->init(); // set up grid force->kspace->setup_grid(); // Re-init neighbor list. Probably only needed when redefining the pair style. Should happen after pair->init() to get pair style neighbor list request registered neighbor->init(); // Re-init computes to update pointers to virials, etc. for (int i = 0; i < modify->ncompute; i++) modify->compute[i]->init(); memory->sfree(arg); } /* ---------------------------------------------------------------------- find the optimal real space coulomb cutoff ------------------------------------------------------------------------- */ void FixTuneKspace::adjust_rcut(double time) { if (strcmp(force->kspace_style,"msm") == 0) return; if (converged) return; double temp; const double TINY = 1.0e-20; // get the current cutoff int itmp; double *p_cutoff = (double *) force->pair->extract("cut_coul",itmp); double current_cutoff = *p_cutoff; cout << "Old Coulomb cutoff for real space: " << current_cutoff << endl; // use Brent's method from Numerical Recipes to find optimal real space cutoff // first time through, get ax_brent and fa_brent, and adjust cutoff if (keep_bracketing) { if (niter_adjust_rcut == 0) { pair_cut_coul /= 2; } else if (niter_adjust_rcut == 1) { ax_brent = current_cutoff; fa_brent = time; pair_cut_coul *= 2; // second time through, get bx_brent and fb_brent, and adjust cutoff } else if (niter_adjust_rcut == 2) { bx_brent = current_cutoff; fb_brent = time; if (fb_brent > fa_brent) { SWAP(ax_brent,bx_brent); SWAP(fb_brent,fa_brent); pair_cut_coul /= 4; } else { pair_cut_coul *= 2; } // third time through, get cx_brent and fc_brent, and adjust cutoff if needed } else if (niter_adjust_rcut == 3) { cx_brent = current_cutoff; fc_brent = time; if (fc_brent > fb_brent) keep_bracketing = false; else { double r = (bx_brent - ax_brent)*(fb_brent - fc_brent); double q = (bx_brent - cx_brent)*(fb_brent - fa_brent); dx_brent = bx_brent - ((bx_brent - cx_brent)*q - (bx_brent - ax_brent)*r)/ (2.0*SIGN(MAX(fabs(q - r),TINY),q - r)); pair_cut_coul = dx_brent; } // after third time through, bracket the minimum, and adjust cutoff } else if (niter_adjust_rcut > 3) { dx_brent = current_cutoff; if (need_fd2_brent) fd2_brent = time; else fd_brent = time; mnbrak(); pair_cut_coul = dx_brent; } } if (!keep_bracketing) { dx_brent = current_cutoff; fd_brent = time; if (first_brent_pass) brent0(); else brent2(); brent1(); pair_cut_coul = dx_brent; } niter_adjust_rcut++; if (pair_cut_coul <= 0.0) pair_cut_coul = fabs(MIN(ax_brent,MIN(bx_brent,(MIN(cx_brent,dx_brent))))/2.0) + TINY; if (pair_cut_coul != pair_cut_coul) error->all(FLERR,"Bad real space Coulomb cutoff in fix tune/kspace"); // change the cutoff to pair_cut_coul *p_cutoff = pair_cut_coul; // report the new cutoff double *new_cutoff = (double *) force->pair->extract("cut_coul",itmp); current_cutoff = *new_cutoff; cout << "Adjusted Coulomb cutoff for real space: " << current_cutoff << endl; store_old_kspace_settings(); update_pair_style(new_pair_style,pair_cut_coul); update_kspace_style(new_kspace_style,new_acc_str); } /* ---------------------------------------------------------------------- bracket a minimum using parabolic extrapolation ------------------------------------------------------------------------- */ void FixTuneKspace::mnbrak() { const double GLIMIT = 100.0, TINY = 1.0e-20; double r,q; r = (bx_brent - ax_brent)*(fb_brent - fc_brent); q = (bx_brent - cx_brent)*(fb_brent - fa_brent); dx_brent = bx_brent - ((bx_brent - cx_brent)*q - (bx_brent - ax_brent)*r)/ (2.0*SIGN(MAX(fabs(q - r),TINY),q - r)); dxlim = bx_brent + GLIMIT*(cx_brent - bx_brent); if ((bx_brent - dx_brent)*(dx_brent - cx_brent) > 0.0) { if (fd_brent < fc_brent) { ax_brent = bx_brent; bx_brent = dx_brent; fa_brent = fb_brent; fb_brent = fd_brent; keep_bracketing = false; return; } else if (fd_brent > fb_brent) { cx_brent = dx_brent; fc_brent = fd_brent; keep_bracketing = false; return; } dx_brent = cx_brent + GOLD*(cx_brent - bx_brent); if (need_fd2_brent) { fd_brent = fd2_brent; need_fd2_brent = false; } else { need_fd2_brent = true; return; } } else if ((cx_brent - dx_brent)*(dx_brent - dxlim) > 0.0) { if (fd_brent < fc_brent) { if (need_fd2_brent) { need_fd2_brent = false; } else { need_fd2_brent = true; dx_brent += GOLD*(dx_brent - cx_brent); return; } shft3(bx_brent,cx_brent,dx_brent,dx_brent + GOLD*(dx_brent - cx_brent)); shft3(fb_brent,fc_brent,fd_brent,fd2_brent); } } else if ((dx_brent - dxlim)*(dxlim - cx_brent) >= 0.0) { dx_brent = dxlim; if (need_fd2_brent) { fd_brent = fd2_brent; need_fd2_brent = false; } else { need_fd2_brent = true; return; } } else { dx_brent = cx_brent + GOLD*(cx_brent - bx_brent); if (need_fd2_brent) { fd_brent = fd2_brent; need_fd2_brent = false; } else { need_fd2_brent = true; return; } } shft3(ax_brent,bx_brent,cx_brent,dx_brent); shft3(fa_brent,fb_brent,fc_brent,fd_brent); } /* ---------------------------------------------------------------------- Brent's method from Numerical Recipes ------------------------------------------------------------------------- */ void FixTuneKspace::brent0() { a_brent=(ax_brent < cx_brent ? ax_brent : cx_brent); b_brent=(ax_brent > cx_brent ? ax_brent : cx_brent); x_brent=w_brent=v_brent=bx_brent; fw_brent=fv_brent=fx_brent=fb_brent; } /* ---------------------------------------------------------------------- Brent's method from Numerical Recipes ------------------------------------------------------------------------- */ void FixTuneKspace::brent1() { const double CGOLD=0.3819660; const double ZEPS=numeric_limits::epsilon()*1.0e-3; double d=0.0,etemp; double p,q,r,tol1,tol2,xm; double e=0.0; double tol=0.001; xm=0.5*(a_brent+b_brent); tol2=2.0*(tol1=tol*fabs(x_brent)+ZEPS); if (fabs(x_brent-xm) <= (tol2-0.5*(b_brent-a_brent))) { converged = true; dx_brent = x_brent; return; } if (fabs(e) > tol1) { r=(x_brent-w_brent)*(fx_brent-fv_brent); q=(x_brent-v_brent)*(fx_brent-fw_brent); p=(x_brent-v_brent)*q-(x_brent-w_brent)*r; q=2.0*(q-r); if (q > 0.0) p = -p; q=fabs(q); etemp=e; e=d; if (fabs(p) >= fabs(0.5*q*etemp) || p <= q*(a_brent-x_brent) || p >= q*(b_brent-x_brent)) d=CGOLD*(e=(x_brent >= xm ? a_brent-x_brent : b_brent-x_brent)); else { d=p/q; dx_brent=x_brent+d; if (dx_brent-a_brent < tol2 || b_brent-dx_brent < tol2) d=SIGN(tol1,xm-x_brent); } } else { d=CGOLD*(e=(x_brent >= xm ? a_brent-x_brent : b_brent-x_brent)); } dx_brent=(fabs(d) >= tol1 ? x_brent+d : x_brent+SIGN(tol1,d)); first_brent_pass = false; return; } /* ---------------------------------------------------------------------- Brent's method from Numerical Recipes ------------------------------------------------------------------------- */ void FixTuneKspace::brent2() { if (fd_brent <= fx_brent) { if (dx_brent >= x_brent) a_brent=x_brent; else b_brent=x_brent; shft3(v_brent,w_brent,x_brent,dx_brent); shft3(fv_brent,fw_brent,fx_brent,fd_brent); } else { if (dx_brent < x_brent) a_brent=dx_brent; else b_brent=dx_brent; if (fd_brent <= fw_brent || w_brent == x_brent) { v_brent=w_brent; w_brent=dx_brent; fv_brent=fw_brent; fw_brent=fd_brent; } else if (fd_brent <= fv_brent || v_brent == x_brent || v_brent == w_brent) { v_brent=dx_brent; fv_brent=fd_brent; } } } diff --git a/src/MAKE/Makefile.linux b/src/MAKE/Makefile.beacon similarity index 84% copy from src/MAKE/Makefile.linux copy to src/MAKE/Makefile.beacon index c4264dc22..98e816a43 100755 --- a/src/MAKE/Makefile.linux +++ b/src/MAKE/Makefile.beacon @@ -1,108 +1,109 @@ # linux = RedHat Linux box, Intel icc, MPICH2, FFTW SHELL = /bin/sh # --------------------------------------------------------------------- # compiler/linker settings # specify flags and libraries needed for your compiler -CC = icc -CCFLAGS = -O +CC = mpiicpc -openmp -DLMP_INTEL_OFFLOAD -DLAMMPS_MEMALIGN=64 +MIC_OPT = -offload-option,mic,compiler,"-fp-model fast=2 -mGLOB_default_function_attrs=\"gather_scatter_loop_unroll=4\"" +CCFLAGS = -O3 -xAVX -fno-alias -ansi-alias -restrict -override-limits $(MIC_OPT) SHFLAGS = -fPIC DEPFLAGS = -M -LINK = icc -LINKFLAGS = -O -LIB = -lstdc++ +LINK = mpiicpc -openmp +LINKFLAGS = -O3 -xAVX +LIB = SIZE = size ARCHIVE = ar ARFLAGS = -rc SHLIBFLAGS = -shared # --------------------------------------------------------------------- # LAMMPS-specific settings # specify settings for LAMMPS features you will use # if you change any -D setting, do full re-compile after "make clean" # LAMMPS ifdef settings, OPTIONAL # see possible settings in doc/Section_start.html#2_2 (step 4) LMP_INC = -DLAMMPS_GZIP -DLAMMPS_JPEG # MPI library, REQUIRED # see discussion in doc/Section_start.html#2_2 (step 5) # can point to dummy MPI library in src/STUBS as in Makefile.serial # INC = path for mpi.h, MPI compiler settings # PATH = path for MPI library # LIB = name of MPI library MPI_INC = -DMPICH_SKIP_MPICXX MPI_PATH = -MPI_LIB = -lmpich -lmpl -lpthread +MPI_LIB = # FFT library, OPTIONAL # see discussion in doc/Section_start.html#2_2 (step 6) # can be left blank to use provided KISS FFT library # INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings # PATH = path for FFT library # LIB = name of FFT library -FFT_INC = -DFFT_FFTW +FFT_INC = -DFFT_MKL -DFFT_SINGLE -I$(MKLROOT) FFT_PATH = -FFT_LIB = -lfftw +FFT_LIB = -L$(MKLROOT) -lmkl_intel_ilp64 -lmkl_intel_thread -lmkl_core # JPEG and/or PNG library, OPTIONAL # see discussion in doc/Section_start.html#2_2 (step 7) # only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC # INC = path(s) for jpeglib.h and/or png.h # PATH = path(s) for JPEG library and/or PNG library # LIB = name(s) of JPEG library and/or PNG library JPG_INC = JPG_PATH = JPG_LIB = -ljpeg # --------------------------------------------------------------------- # build rules and dependencies # no need to edit this section include Makefile.package.settings include Makefile.package EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC) EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH) EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB) # Path to src files vpath %.cpp .. vpath %.h .. # Link target $(EXE): $(OBJ) $(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE) $(SIZE) $(EXE) # Library targets lib: $(OBJ) $(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ) shlib: $(OBJ) $(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \ $(OBJ) $(EXTRA_LIB) $(LIB) # Compilation rules %.o:%.cpp $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $< %.d:%.cpp $(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@ # Individual dependencies DEPENDS = $(OBJ:.o=.d) sinclude $(DEPENDS) diff --git a/src/MAKE/Makefile.linux b/src/MAKE/Makefile.g++_openmpi similarity index 90% copy from src/MAKE/Makefile.linux copy to src/MAKE/Makefile.g++_openmpi index c4264dc22..c8912f171 100755 --- a/src/MAKE/Makefile.linux +++ b/src/MAKE/Makefile.g++_openmpi @@ -1,108 +1,108 @@ -# linux = RedHat Linux box, Intel icc, MPICH2, FFTW +# g++ = RedHat Linux box, g++4, OpenMPI, FFTW SHELL = /bin/sh # --------------------------------------------------------------------- # compiler/linker settings # specify flags and libraries needed for your compiler -CC = icc -CCFLAGS = -O +CC = g++ +CCFLAGS = -g -O # -Wunused SHFLAGS = -fPIC DEPFLAGS = -M -LINK = icc -LINKFLAGS = -O -LIB = -lstdc++ +LINK = g++ +LINKFLAGS = -g -O +LIB = SIZE = size ARCHIVE = ar ARFLAGS = -rc SHLIBFLAGS = -shared # --------------------------------------------------------------------- # LAMMPS-specific settings # specify settings for LAMMPS features you will use # if you change any -D setting, do full re-compile after "make clean" # LAMMPS ifdef settings, OPTIONAL # see possible settings in doc/Section_start.html#2_2 (step 4) LMP_INC = -DLAMMPS_GZIP -DLAMMPS_JPEG # MPI library, REQUIRED # see discussion in doc/Section_start.html#2_2 (step 5) # can point to dummy MPI library in src/STUBS as in Makefile.serial # INC = path for mpi.h, MPI compiler settings # PATH = path for MPI library # LIB = name of MPI library -MPI_INC = -DMPICH_SKIP_MPICXX -MPI_PATH = -MPI_LIB = -lmpich -lmpl -lpthread +MPI_INC = -DMPICH_SKIP_MPICXX -I/usr/local/openmpi/include +MPI_PATH = -L/usr/local/openmpi/lib +MPI_LIB = -lmpi -lmpi_cxx # FFT library, OPTIONAL # see discussion in doc/Section_start.html#2_2 (step 6) # can be left blank to use provided KISS FFT library # INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings # PATH = path for FFT library # LIB = name of FFT library -FFT_INC = -DFFT_FFTW +FFT_INC = -DFFT_FFTW FFT_PATH = FFT_LIB = -lfftw # JPEG and/or PNG library, OPTIONAL # see discussion in doc/Section_start.html#2_2 (step 7) # only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC # INC = path(s) for jpeglib.h and/or png.h # PATH = path(s) for JPEG library and/or PNG library # LIB = name(s) of JPEG library and/or PNG library JPG_INC = JPG_PATH = JPG_LIB = -ljpeg # --------------------------------------------------------------------- # build rules and dependencies # no need to edit this section include Makefile.package.settings include Makefile.package EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC) EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH) EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB) # Path to src files vpath %.cpp .. vpath %.h .. # Link target $(EXE): $(OBJ) $(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE) $(SIZE) $(EXE) # Library targets lib: $(OBJ) $(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ) shlib: $(OBJ) $(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \ $(OBJ) $(EXTRA_LIB) $(LIB) # Compilation rules %.o:%.cpp $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $< %.d:%.cpp $(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@ # Individual dependencies DEPENDS = $(OBJ:.o=.d) sinclude $(DEPENDS) diff --git a/src/MAKE/Makefile.linux b/src/MAKE/Makefile.intel similarity index 86% copy from src/MAKE/Makefile.linux copy to src/MAKE/Makefile.intel index c4264dc22..2b209e27b 100755 --- a/src/MAKE/Makefile.linux +++ b/src/MAKE/Makefile.intel @@ -1,108 +1,108 @@ -# linux = RedHat Linux box, Intel icc, MPICH2, FFTW +# Intel compiler, Intel MPI, MKL FFT, no offload to coprocessor SHELL = /bin/sh # --------------------------------------------------------------------- # compiler/linker settings # specify flags and libraries needed for your compiler -CC = icc -CCFLAGS = -O +CC = mpiicpc -openmp -DLAMMPS_MEMALIGN=64 -no-offload +CCFLAGS = -O3 -xHost -fno-alias -ansi-alias -restrict -override-limits SHFLAGS = -fPIC DEPFLAGS = -M -LINK = icc -LINKFLAGS = -O -LIB = -lstdc++ +LINK = mpiicpc -openmp +LINKFLAGS = -O3 -xHost +LIB = SIZE = size ARCHIVE = ar ARFLAGS = -rc SHLIBFLAGS = -shared # --------------------------------------------------------------------- # LAMMPS-specific settings # specify settings for LAMMPS features you will use # if you change any -D setting, do full re-compile after "make clean" # LAMMPS ifdef settings, OPTIONAL # see possible settings in doc/Section_start.html#2_2 (step 4) LMP_INC = -DLAMMPS_GZIP -DLAMMPS_JPEG # MPI library, REQUIRED # see discussion in doc/Section_start.html#2_2 (step 5) # can point to dummy MPI library in src/STUBS as in Makefile.serial # INC = path for mpi.h, MPI compiler settings # PATH = path for MPI library # LIB = name of MPI library MPI_INC = -DMPICH_SKIP_MPICXX MPI_PATH = -MPI_LIB = -lmpich -lmpl -lpthread +MPI_LIB = # FFT library, OPTIONAL # see discussion in doc/Section_start.html#2_2 (step 6) # can be left blank to use provided KISS FFT library # INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings # PATH = path for FFT library # LIB = name of FFT library -FFT_INC = -DFFT_FFTW +FFT_INC = -DFFT_MKL -DFFT_SINGLE FFT_PATH = -FFT_LIB = -lfftw +FFT_LIB = -L$MKLROOT/lib/intel64/ -lmkl_intel_ilp64 -lmkl_intel_thread -lmkl_core # JPEG and/or PNG library, OPTIONAL # see discussion in doc/Section_start.html#2_2 (step 7) # only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC # INC = path(s) for jpeglib.h and/or png.h # PATH = path(s) for JPEG library and/or PNG library # LIB = name(s) of JPEG library and/or PNG library JPG_INC = JPG_PATH = JPG_LIB = -ljpeg # --------------------------------------------------------------------- # build rules and dependencies # no need to edit this section include Makefile.package.settings include Makefile.package EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC) EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH) EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB) # Path to src files vpath %.cpp .. vpath %.h .. # Link target $(EXE): $(OBJ) $(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE) $(SIZE) $(EXE) # Library targets lib: $(OBJ) $(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ) shlib: $(OBJ) $(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \ $(OBJ) $(EXTRA_LIB) $(LIB) # Compilation rules %.o:%.cpp $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $< %.d:%.cpp $(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@ # Individual dependencies DEPENDS = $(OBJ:.o=.d) sinclude $(DEPENDS) diff --git a/src/MAKE/Makefile.linux b/src/MAKE/Makefile.intel_offload similarity index 82% copy from src/MAKE/Makefile.linux copy to src/MAKE/Makefile.intel_offload index c4264dc22..eb4415fc8 100755 --- a/src/MAKE/Makefile.linux +++ b/src/MAKE/Makefile.intel_offload @@ -1,108 +1,109 @@ -# linux = RedHat Linux box, Intel icc, MPICH2, FFTW +# Intel compiler, Intel MPI, MKL FFT, no offload to coprocessor SHELL = /bin/sh # --------------------------------------------------------------------- # compiler/linker settings # specify flags and libraries needed for your compiler -CC = icc -CCFLAGS = -O +CC = mpiicpc -openmp -DLMP_INTEL_OFFLOAD -DLAMMPS_MEMALIGN=64 +MIC_OPT = -offload-option,mic,compiler,"-fp-model fast=2 -mGLOB_default_function_attrs=\"gather_scatter_loop_unroll=4\"" +CCFLAGS = -g -O3 -xHost -fno-alias -ansi-alias -restrict -override-limits $(MIC_OPT) SHFLAGS = -fPIC DEPFLAGS = -M -LINK = icc -LINKFLAGS = -O -LIB = -lstdc++ +LINK = mpiicpc -openmp -offload +LINKFLAGS = -O3 -xHost +LIB = SIZE = size ARCHIVE = ar ARFLAGS = -rc SHLIBFLAGS = -shared # --------------------------------------------------------------------- # LAMMPS-specific settings # specify settings for LAMMPS features you will use # if you change any -D setting, do full re-compile after "make clean" # LAMMPS ifdef settings, OPTIONAL # see possible settings in doc/Section_start.html#2_2 (step 4) LMP_INC = -DLAMMPS_GZIP -DLAMMPS_JPEG # MPI library, REQUIRED # see discussion in doc/Section_start.html#2_2 (step 5) # can point to dummy MPI library in src/STUBS as in Makefile.serial # INC = path for mpi.h, MPI compiler settings # PATH = path for MPI library # LIB = name of MPI library MPI_INC = -DMPICH_SKIP_MPICXX MPI_PATH = -MPI_LIB = -lmpich -lmpl -lpthread +MPI_LIB = # FFT library, OPTIONAL # see discussion in doc/Section_start.html#2_2 (step 6) # can be left blank to use provided KISS FFT library # INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings # PATH = path for FFT library # LIB = name of FFT library -FFT_INC = -DFFT_FFTW +FFT_INC = -DFFT_MKL -DFFT_SINGLE FFT_PATH = -FFT_LIB = -lfftw +FFT_LIB = -L$(MKLROOT)/lib/intel64/ -lmkl_intel_ilp64 -lmkl_intel_thread -lmkl_core # JPEG and/or PNG library, OPTIONAL # see discussion in doc/Section_start.html#2_2 (step 7) # only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC # INC = path(s) for jpeglib.h and/or png.h # PATH = path(s) for JPEG library and/or PNG library # LIB = name(s) of JPEG library and/or PNG library JPG_INC = JPG_PATH = JPG_LIB = -ljpeg # --------------------------------------------------------------------- # build rules and dependencies # no need to edit this section include Makefile.package.settings include Makefile.package EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC) EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH) EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB) # Path to src files vpath %.cpp .. vpath %.h .. # Link target $(EXE): $(OBJ) $(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE) $(SIZE) $(EXE) # Library targets lib: $(OBJ) $(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ) shlib: $(OBJ) $(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \ $(OBJ) $(EXTRA_LIB) $(LIB) # Compilation rules %.o:%.cpp $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $< %.d:%.cpp $(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@ # Individual dependencies DEPENDS = $(OBJ:.o=.d) sinclude $(DEPENDS) diff --git a/src/MAKE/Makefile.linux b/src/MAKE/Makefile.linux index c4264dc22..d835bed04 100755 --- a/src/MAKE/Makefile.linux +++ b/src/MAKE/Makefile.linux @@ -1,108 +1,108 @@ # linux = RedHat Linux box, Intel icc, MPICH2, FFTW SHELL = /bin/sh # --------------------------------------------------------------------- # compiler/linker settings # specify flags and libraries needed for your compiler CC = icc -CCFLAGS = -O +CCFLAGS = -O -DLAMMPS_MEMALIGN=64 -openmp -restrict SHFLAGS = -fPIC DEPFLAGS = -M LINK = icc -LINKFLAGS = -O +LINKFLAGS = -O -openmp LIB = -lstdc++ SIZE = size ARCHIVE = ar ARFLAGS = -rc SHLIBFLAGS = -shared # --------------------------------------------------------------------- # LAMMPS-specific settings # specify settings for LAMMPS features you will use # if you change any -D setting, do full re-compile after "make clean" # LAMMPS ifdef settings, OPTIONAL # see possible settings in doc/Section_start.html#2_2 (step 4) LMP_INC = -DLAMMPS_GZIP -DLAMMPS_JPEG # MPI library, REQUIRED # see discussion in doc/Section_start.html#2_2 (step 5) # can point to dummy MPI library in src/STUBS as in Makefile.serial # INC = path for mpi.h, MPI compiler settings # PATH = path for MPI library # LIB = name of MPI library MPI_INC = -DMPICH_SKIP_MPICXX MPI_PATH = MPI_LIB = -lmpich -lmpl -lpthread # FFT library, OPTIONAL # see discussion in doc/Section_start.html#2_2 (step 6) # can be left blank to use provided KISS FFT library # INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings # PATH = path for FFT library # LIB = name of FFT library FFT_INC = -DFFT_FFTW FFT_PATH = FFT_LIB = -lfftw # JPEG and/or PNG library, OPTIONAL # see discussion in doc/Section_start.html#2_2 (step 7) # only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC # INC = path(s) for jpeglib.h and/or png.h # PATH = path(s) for JPEG library and/or PNG library # LIB = name(s) of JPEG library and/or PNG library JPG_INC = JPG_PATH = JPG_LIB = -ljpeg # --------------------------------------------------------------------- # build rules and dependencies # no need to edit this section include Makefile.package.settings include Makefile.package EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC) EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH) EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB) # Path to src files vpath %.cpp .. vpath %.h .. # Link target $(EXE): $(OBJ) $(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE) $(SIZE) $(EXE) # Library targets lib: $(OBJ) $(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ) shlib: $(OBJ) $(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \ $(OBJ) $(EXTRA_LIB) $(LIB) # Compilation rules %.o:%.cpp $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $< %.d:%.cpp $(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@ # Individual dependencies DEPENDS = $(OBJ:.o=.d) sinclude $(DEPENDS) diff --git a/src/MAKE/Makefile.linux b/src/MAKE/Makefile.stampede similarity index 82% copy from src/MAKE/Makefile.linux copy to src/MAKE/Makefile.stampede index c4264dc22..8c9591d11 100755 --- a/src/MAKE/Makefile.linux +++ b/src/MAKE/Makefile.stampede @@ -1,108 +1,109 @@ -# linux = RedHat Linux box, Intel icc, MPICH2, FFTW +# Stampede, Intel Compiler, MKL FFT, Offload to Xeon Phi SHELL = /bin/sh # --------------------------------------------------------------------- # compiler/linker settings # specify flags and libraries needed for your compiler -CC = icc -CCFLAGS = -O +CC = mpicc -openmp -DLMP_INTEL_OFFLOAD -DLAMMPS_MEMALIGN=64 +MIC_OPT = -offload-option,mic,compiler,"-fp-model fast=2 -mGLOB_default_function_attrs=\"gather_scatter_loop_unroll=4\"" +CCFLAGS = -O3 -xAVX -fno-alias -ansi-alias -restrict -override-limits $(MIC_OPT) SHFLAGS = -fPIC DEPFLAGS = -M -LINK = icc -LINKFLAGS = -O -LIB = -lstdc++ +LINK = mpicc -openmp +LINKFLAGS = -O3 -xAVX +LIB = SIZE = size ARCHIVE = ar ARFLAGS = -rc SHLIBFLAGS = -shared # --------------------------------------------------------------------- # LAMMPS-specific settings # specify settings for LAMMPS features you will use # if you change any -D setting, do full re-compile after "make clean" # LAMMPS ifdef settings, OPTIONAL # see possible settings in doc/Section_start.html#2_2 (step 4) LMP_INC = -DLAMMPS_GZIP -DLAMMPS_JPEG # MPI library, REQUIRED # see discussion in doc/Section_start.html#2_2 (step 5) # can point to dummy MPI library in src/STUBS as in Makefile.serial # INC = path for mpi.h, MPI compiler settings # PATH = path for MPI library # LIB = name of MPI library MPI_INC = -DMPICH_SKIP_MPICXX MPI_PATH = -MPI_LIB = -lmpich -lmpl -lpthread +MPI_LIB = # FFT library, OPTIONAL # see discussion in doc/Section_start.html#2_2 (step 6) # can be left blank to use provided KISS FFT library # INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings # PATH = path for FFT library # LIB = name of FFT library -FFT_INC = -DFFT_FFTW +FFT_INC = -DFFT_MKL -DFFT_SINGLE -I$(TACC_MKL_INC) FFT_PATH = -FFT_LIB = -lfftw +FFT_LIB = -L$(TACC_MKL_LIB) -lmkl_intel_ilp64 -lmkl_intel_thread -lmkl_core # JPEG and/or PNG library, OPTIONAL # see discussion in doc/Section_start.html#2_2 (step 7) # only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC # INC = path(s) for jpeglib.h and/or png.h # PATH = path(s) for JPEG library and/or PNG library # LIB = name(s) of JPEG library and/or PNG library JPG_INC = JPG_PATH = JPG_LIB = -ljpeg # --------------------------------------------------------------------- # build rules and dependencies # no need to edit this section include Makefile.package.settings include Makefile.package EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC) EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH) EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB) # Path to src files vpath %.cpp .. vpath %.h .. # Link target $(EXE): $(OBJ) $(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE) $(SIZE) $(EXE) # Library targets lib: $(OBJ) $(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ) shlib: $(OBJ) $(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \ $(OBJ) $(EXTRA_LIB) $(LIB) # Compilation rules %.o:%.cpp $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $< %.d:%.cpp $(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@ # Individual dependencies DEPENDS = $(OBJ:.o=.d) sinclude $(DEPENDS) diff --git a/src/Makefile b/src/Makefile index f8e70a94d..2c4bb15fa 100755 --- a/src/Makefile +++ b/src/Makefile @@ -1,240 +1,240 @@ # LAMMPS multiple-machine Makefile SHELL = /bin/bash #.IGNORE: # Definitions ROOT = lmp EXE = $(ROOT)_$@ SRC = $(wildcard *.cpp) INC = $(wildcard *.h) OBJ = $(SRC:.cpp=.o) # Package variables PACKAGE = asphere body class2 colloid dipole fld gpu granular kim \ kokkos kspace manybody mc meam misc molecule mpiio opt peri poems \ reax replica rigid shock srd voronoi xtc PACKUSER = user-atc user-awpmd user-cg-cmm user-colvars \ - user-cuda user-eff user-fep user-lb user-misc user-molfile \ - user-omp user-phonon user-qmmm user-reaxc user-sph + user-cuda user-eff user-fep user-intel user-lb user-misc \ + user-molfile user-omp user-phonon user-qmmm user-reaxc user-sph PACKLIB = gpu kim meam poems reax voronoi \ user-atc user-awpmd user-colvars user-qmmm user-cuda user-molfile PACKALL = $(PACKAGE) $(PACKUSER) PACKAGEUC = $(shell echo $(PACKAGE) | tr a-z A-Z) PACKUSERUC = $(shell echo $(PACKUSER) | tr a-z A-Z) YESDIR = $(shell echo $(@:yes-%=%) | tr a-z A-Z) NODIR = $(shell echo $(@:no-%=%) | tr a-z A-Z) # List of all targets help: @echo '' @echo 'make clean-all delete all object files' @echo 'make clean-machine delete object files for one machine' @echo 'make purge purge obsolete copies of package sources' @echo 'make tar create lmp_src.tar.gz of src dir and packages' @echo 'make makelib create Makefile.lib for static library build' @echo 'make makeshlib create Makefile.shlib for shared library build' @echo 'make makelist create Makefile.list used by old makes' @echo 'make -f Makefile.lib machine build LAMMPS as static library for machine' @echo 'make -f Makefile.shlib machine build LAMMPS as shared library for machine' @echo 'make -f Makefile.list machine build LAMMPS from explicit list of files' @echo 'make stubs build dummy MPI library in STUBS' @echo 'make install-python install LAMMPS wrapper in Python' @echo '' @echo 'make package list available packages' @echo 'make package-status (ps) status of all packages' @echo 'make yes-package install a single package in src dir' @echo 'make no-package remove a single package from src dir' @echo 'make yes-all install all packages in src dir' @echo 'make no-all remove all packages from src dir' @echo 'make yes-standard install all standard packages' @echo 'make no-standard remove all standard packages' @echo 'make yes-user install all user packages' @echo 'make no-user remove all user packages' @echo 'make no-lib remove all packages with external libs' @echo '' @echo 'make package-update (pu) replace src files with updated package files' @echo 'make package-overwrite replace package files with src files' @echo 'make package-diff (pd) diff src files against package files' @echo '' @echo 'make machine build LAMMPS where machine is one of:' @echo '' @files="`ls MAKE/Makefile.*`"; \ for file in $$files; do head -1 $$file; done @echo '' # Build the code .DEFAULT: @test -f MAKE/Makefile.$@ @if [ ! -d Obj_$@ ]; then mkdir Obj_$@; fi @$(SHELL) Make.sh style @cp MAKE/Makefile.$@ Obj_$@/Makefile @if [ ! -e Makefile.package ]; \ then cp Makefile.package.empty Makefile.package; fi @if [ ! -e Makefile.package.settings ]; \ then cp Makefile.package.settings.empty Makefile.package.settings; fi @cp Makefile.package Makefile.package.settings Obj_$@ @cd Obj_$@; \ $(MAKE) $(MFLAGS) "OBJ = $(OBJ)" "INC = $(INC)" "SHFLAGS =" \ "EXE = ../$(EXE)" ../$(EXE) # Remove machine-specific object files clean: @echo 'make clean-all delete all object files' @echo 'make clean-machine delete object files for one machine' clean-all: rm -rf Obj_* clean-%: rm -rf Obj_$(@:clean-%=%) purge: Purge.list @echo 'Purging obsolete and auto-generated source files' @for f in `grep -v '#' Purge.list` ; \ do test -f $$f && rm $$f && echo $$f || : ; \ done # Create a tarball of src dir and packages tar: @cd STUBS; $(MAKE) clean @cd ..; tar cvzf src/$(ROOT)_src.tar.gz \ src/Make* src/Package.sh src/MAKE src/*.cpp src/*.h src/STUBS \ $(patsubst %,src/%,$(PACKAGEUC)) $(patsubst %,src/%,$(PACKUSERUC)) \ --exclude=*/.svn @cd STUBS; $(MAKE) @echo "Created $(ROOT)_src.tar.gz" # Make MPI STUBS library stubs: @cd STUBS; $(MAKE) clean; $(MAKE) # Create Makefile.lib, Makefile.shlib, and Makefile.list makelib: @$(SHELL) Make.sh style @$(SHELL) Make.sh Makefile.lib makeshlib: @$(SHELL) Make.sh style @$(SHELL) Make.sh Makefile.shlib makelist: @$(SHELL) Make.sh style @$(SHELL) Make.sh Makefile.list # install LAMMPS shared lib and Python wrapper for Python usage install-python: @python ../python/install.py # Package management package: @echo 'Standard packages:' $(PACKAGE) @echo '' @echo 'User-contributed packages:' $(PACKUSER) @echo '' @echo 'make package list available packages' @echo 'make package-status (ps) status of all packages' @echo 'make yes-package install a single package in src dir' @echo 'make no-package remove a single package from src dir' @echo 'make yes-all install all packages in src dir' @echo 'make no-all remove all packages from src dir' @echo 'make yes-standard install all standard packages' @echo 'make no-standard remove all standard packages' @echo 'make yes-user install all user packages' @echo 'make no-user remove all user packages' @echo 'make no-lib remove all packages with external libs' @echo '' @echo 'make package-update (pu) replace src files with package files' @echo 'make package-overwrite replace package files with src files' @echo 'make package-diff (pd) diff src files against package file' yes-all: @for p in $(PACKALL); do $(MAKE) yes-$$p; done no-all: @for p in $(PACKALL); do $(MAKE) no-$$p; done yes-standard: @for p in $(PACKAGE); do $(MAKE) yes-$$p; done no-standard: @for p in $(PACKAGE); do $(MAKE) no-$$p; done yes-user: @for p in $(PACKUSER); do $(MAKE) yes-$$p; done no-user: @for p in $(PACKUSER); do $(MAKE) no-$$p; done no-lib: @for p in $(PACKLIB); do $(MAKE) no-$$p; done yes-%: @if [ ! -e Makefile.package ]; \ then cp Makefile.package.empty Makefile.package; fi @if [ ! -e Makefile.package.settings ]; \ then cp Makefile.package.settings.empty Makefile.package.settings; fi @if [ ! -e $(YESDIR) ]; then \ echo "Package $(@:yes-%=%) does not exist"; \ elif [ -e $(YESDIR)/Install.sh ]; then \ echo "Installing package $(@:yes-%=%)"; \ cd $(YESDIR); $(SHELL) Install.sh 1; cd ..; \ $(SHELL) Depend.sh $(YESDIR) 1; \ else \ echo "Installing package $(@:yes-%=%)"; \ cd $(YESDIR); $(SHELL) ../Install.sh 1; cd ..; \ $(SHELL) Depend.sh $(YESDIR) 1; \ fi; no-%: @if [ ! -e $(NODIR) ]; then \ echo "Package $(@:no-%=%) does not exist"; \ elif [ -e $(NODIR)/Install.sh ]; then \ echo "Uninstalling package $(@:no-%=%)"; \ cd $(NODIR); $(SHELL) Install.sh 0; cd ..; \ $(SHELL) Depend.sh $(NODIR) 0; \ else \ echo "Uninstalling package $(@:no-%=%)"; \ cd $(NODIR); $(SHELL) ../Install.sh 0; cd ..; \ $(SHELL) Depend.sh $(NODIR) 0; \ fi; # status = list src files that differ from package files # update = replace src files with newer package files # overwrite = overwrite package files with newer src files # diff = show differences between src and package files package-status ps: @for p in $(PACKAGEUC); do $(SHELL) Package.sh $$p status; done @echo '' @for p in $(PACKUSERUC); do $(SHELL) Package.sh $$p status; done package-update pu: @for p in $(PACKAGEUC); do $(SHELL) Package.sh $$p update; done @echo '' @for p in $(PACKUSERUC); do $(SHELL) Package.sh $$p update; done package-overwrite: @for p in $(PACKAGEUC); do $(SHELL) Package.sh $$p overwrite; done @echo '' @for p in $(PACKUSERUC); do $(SHELL) Package.sh $$p overwrite; done package-diff pd: @for p in $(PACKAGEUC); do $(SHELL) Package.sh $$p diff; done @echo '' @for p in $(PACKUSERUC); do $(SHELL) Package.sh $$p diff; done diff --git a/src/USER-INTEL/Install.sh b/src/USER-INTEL/Install.sh new file mode 100644 index 000000000..70fc48306 --- /dev/null +++ b/src/USER-INTEL/Install.sh @@ -0,0 +1,107 @@ +# Install/unInstall package files in LAMMPS +# mode = 0/1/2 for uninstall/install/update + +mode=$1 + +# arg1 = file, arg2 = file it depends on + +action () { + if (test $mode = 0) then + rm -f ../$1 + elif (! cmp -s $1 ../$1) then + if (test -z "$2" || test -e ../$2) then + cp $1 .. + if (test $mode = 2) then + echo " updating src/$1" + fi + fi + elif (test -n "$2") then + if (test ! -e ../$2) then + rm -f ../$1 + fi + fi +} + +# step 1: process all *_intel.cpp and *_intel.h files. +# do not install child files if parent does not exist + +for file in *_intel.cpp; do + test $file = thr_intel.cpp && continue + dep=`echo $file | sed 's/neigh_full_intel/neigh_full/g' | \ + sed 's/_offload_intel//g' | sed 's/_intel//g'` + action $file $dep +done + +for file in *_intel.h; do + test $file = thr_intel.h && continue + dep=`echo $file | sed 's/_offload_intel//g' | sed 's/_intel//g'` + action $file $dep +done + +action intel_preprocess.h +action intel_buffers.h +action intel_buffers.cpp +action math_extra_intel.h + +# step 2: handle cases and tasks not handled in step 1. + +if (test $mode = 1) then + + if (test -e ../Makefile.package) then + sed -i -e 's/[^ \t]*INTEL[^ \t]* //' ../Makefile.package + sed -i -e 's|^PKG_INC =[ \t]*|&-DLMP_USER_INTEL |' ../Makefile.package + fi + + # force rebuild of files with LMP_USER_INTEL switch + + touch ../accelerator_intel.h + +elif (test $mode = 0) then + + if (test -e ../Makefile.package) then + sed -i -e 's/[^ \t]*INTEL[^ \t]* //' ../Makefile.package + fi + + # force rebuild of files with LMP_USER_INTEL switch + + touch ../accelerator_intel.h + +fi + +# step 3: map omp styles that are not in the intel package to intel suffix + +#if (test $mode = 0) then +# +# rm -f ../*ompinto_intel* +# +#else +# +# echo " The 'intel' suffix will use the USER-OMP package for all" +# echo " angle, bond, dihedral, kspace, and improper styles:" +# stylelist="pair fix angle bond dihedral improper" +# for header in $stylelist; do +# HEADER=`echo $header | sed 's/\(.*\)/\U\1/'` +# outfile=../$header"_ompinto_intel.h" +# echo " Creating $header style map: $outfile" +# echo -n "// -- Header to map USER-OMP " > $outfile +# echo "styles to the intel suffix" >> $outfile +# echo >> $outfile +# echo "#ifdef "$HEADER"_CLASS" >> $outfile +# grep -h 'Style(' ../$header*_omp.h | grep -v 'charmm/coul/long' | \ +# grep -v 'lj/cut' | grep -v 'gayberne' | \ +# sed 's/\/omp/\/intel/g' >> $outfile +# echo "#endif" >> $outfile +# done +# +# header="kspace" +# HEADER="KSPACE" +# outfile=../$header"_ompinto_intel.h" +# echo " Creating $header style map: $outfile" +# echo -n "// -- Header to map USER-OMP " > $outfile +# echo "styles to the intel suffix" >> $outfile +# echo >> $outfile +# echo "#ifdef "$HEADER"_CLASS" >> $outfile +# grep -h 'KSpaceStyle(' ../*_omp.h | sed 's/\/omp/\/intel/g' >> $outfile +# echo "#endif" >> $outfile +# +#fi diff --git a/src/USER-INTEL/README b/src/USER-INTEL/README new file mode 100644 index 000000000..0b38928b2 --- /dev/null +++ b/src/USER-INTEL/README @@ -0,0 +1,35 @@ + + -------------------------------- + LAMMPS Intel Package + -------------------------------- + + W. Michael Brown (Intel) + michael.w.brown at intel.com + +----------------------------------------------------------------------------- + +This package is based on the USER-OMP package and provides LAMMPS styles that: + + 1. include support for single and mixed precision in addition to double. + 2. include modifications to support vectorization for key routines + 3. include modifications to support offload to Xeon Phi coprocessors + +----------------------------------------------------------------------------- + +When using the suffix command with "intel", intel styles will be used if they +exist; if they do not, and an omp version exists, that style will be used. +This is accomplished through the files *ompinto_intel.h that are created +in the src directory when the intel package is installed. For example, + + kspace_style pppm/intel 1e-4 + +is equivalent to: + + kspace_style pppm/omp 1e-4 + +because no pppm style has been implemented for the Intel package. + +----------------------------------------------------------------------------- + +In order to use offload to Xeon Phi, the flag -DLMP_INTEL_OFFLOAD should be +set in the Makefile. Offload requires the use of Intel compilers. diff --git a/src/USER-INTEL/fix_intel.cpp b/src/USER-INTEL/fix_intel.cpp new file mode 100644 index 000000000..8fd3003b4 --- /dev/null +++ b/src/USER-INTEL/fix_intel.cpp @@ -0,0 +1,530 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: W. Michael Brown (Intel) +------------------------------------------------------------------------- */ + +#include "comm.h" +#include "error.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_request.h" +#include "pair.h" +#include "pair_hybrid.h" +#include "pair_hybrid_overlay.h" +#include "timer.h" +#include "universe.h" +#include "update.h" +#include "fix_intel.h" + +#include +#include +#include + +#include "suffix.h" + +using namespace LAMMPS_NS; +using namespace FixConst; + +#ifdef __INTEL_OFFLOAD +#ifndef _LMP_INTEL_OFFLOAD +#warning "Not building Intel package with Xeon Phi offload support." +#endif +#endif + +enum{NSQ,BIN,MULTI}; + +/* ---------------------------------------------------------------------- */ + +FixIntel::FixIntel(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg) +{ + if (narg < 4) + error->all(FLERR, "Illegal package intel command"); + if (strcmp(arg[1],"all") != 0) + error->all(FLERR, "fix Intel has to operate on group 'all'"); + + _precision_mode = PREC_MODE_MIXED; + _offload_balance = 1.0; + _overflow_flag[LMP_OVERFLOW] = 0; + _off_overflow_flag[LMP_OVERFLOW] = 0; + + _offload_affinity_balanced = 0; + _offload_threads = 1; + _offload_tpc = 4; + + #ifdef _LMP_INTEL_OFFLOAD + _offload_affinity_set = 0; + _off_force_array_s = 0; + _off_force_array_m = 0; + _off_force_array_d = 0; + _off_ev_array_s = 0; + _off_ev_array_d = 0; + _balance_fixed = 0.0; + + _cop = 0; + + int max_offload_threads, offload_cores; + #pragma offload target(mic:_cop) mandatory \ + out(max_offload_threads,offload_cores) + { + offload_cores = omp_get_num_procs(); + omp_set_num_threads(offload_cores); + max_offload_threads = omp_get_max_threads(); + } + _max_offload_threads = max_offload_threads; + _offload_cores = offload_cores; + _offload_threads = offload_cores; + #endif + int ncops = 1; + _allow_separate_buffers = 1; + _offload_ghost = -1; + + int iarg = 4; + while (iarg < narg) { + if (strcmp(arg[iarg], "mixed") == 0) + _precision_mode = PREC_MODE_MIXED; + else if (strcmp(arg[iarg], "double") == 0) + _precision_mode = PREC_MODE_DOUBLE; + else if (strcmp(arg[iarg], "single") == 0) + _precision_mode = PREC_MODE_SINGLE; + else if (strcmp(arg[iarg], "offload_affinity_balanced") == 0) + _offload_affinity_balanced = 1; + else if (strcmp(arg[iarg], "balance") == 0) { + if (iarg == narg - 1) + error->all(FLERR, "Illegal package intel mode requested"); + ++iarg; + _offload_balance = force->numeric(FLERR,arg[iarg]); + } else if (strcmp(arg[iarg], "offload_threads") == 0) { + if (iarg == narg - 1) + error->all(FLERR, "Illegal package intel mode requested"); + ++iarg; + _offload_threads = atoi(arg[iarg]); + } else if (strcmp(arg[iarg], "offload_tpc") == 0) { + if (iarg == narg - 1) + error->all(FLERR, "Illegal package intel mode requested"); + ++iarg; + _offload_tpc = atoi(arg[iarg]); + } else if (strcmp(arg[iarg], "offload_cards") == 0) { + if (iarg == narg - 1) + error->all(FLERR, "Illegal package intel mode requested"); + ++iarg; + ncops = atoi(arg[iarg]); + } else if (strcmp(arg[iarg], "buffers") == 0) { + if (iarg == narg - 1) + error->all(FLERR, "Illegal package intel mode requested"); + ++iarg; + _allow_separate_buffers = atoi(arg[iarg]); + } else if (strcmp(arg[iarg], "offload_ghost") == 0) { + if (iarg == narg - 1) + error->all(FLERR, "Illegal package intel mode requested"); + ++iarg; + _offload_ghost = atoi(arg[iarg]); + } else + error->all(FLERR, "Illegal package intel mode requested"); + ++iarg; + } + + if (_offload_balance > 1.0 || _offload_threads <= 0 || + _offload_tpc <= 0 || _offload_tpc > 4) + error->all(FLERR, "Illegal package intel mode requested"); + + #ifdef _LMP_INTEL_OFFLOAD + _ncops = ncops; + if (_offload_balance < 0.0) { + _balance_neighbor = 0.9; + _balance_pair = 0.9; + } else { + _balance_neighbor = _offload_balance; + _balance_pair = _offload_balance; + } + + _tscreen = screen; + zero_timers(); + _setup_time_cleared = false; + _timers_allocated = false; + #else + _offload_balance = 0.0; + #endif + + if (_precision_mode == PREC_MODE_SINGLE) + _single_buffers = new IntelBuffers(lmp); + else if (_precision_mode == PREC_MODE_MIXED) + _mixed_buffers = new IntelBuffers(lmp); + else + _double_buffers = new IntelBuffers(lmp); +} + +/* ---------------------------------------------------------------------- */ + +FixIntel::~FixIntel() +{ + #ifdef _LMP_INTEL_OFFLOAD + output_timing_data(); + if (_timers_allocated) { + double *time1 = off_watch_pair(); + double *time2 = off_watch_neighbor(); + int *overflow = get_off_overflow_flag(); + if (time1 != NULL && time2 != NULL && overflow != NULL) { + #pragma offload_transfer target(mic:_cop) \ + nocopy(time1,time2,overflow:alloc_if(0) free_if(1)) + } + } + #endif + + if (_precision_mode == PREC_MODE_SINGLE) + delete _single_buffers; + else if (_precision_mode == PREC_MODE_MIXED) + delete _mixed_buffers; + else + delete _double_buffers; +} + +/* ---------------------------------------------------------------------- */ + +int FixIntel::setmask() +{ + int mask = 0; + return mask; +} + +/* ---------------------------------------------------------------------- */ + +void FixIntel::init() +{ + #ifdef _LMP_INTEL_OFFLOAD + if (_offload_balance != 0.0) atom->sortfreq = 1; + + if (force->newton_pair == 0) + _offload_noghost = 0; + else if (_offload_ghost == 0) + _offload_noghost = 1; + + set_offload_affinity(); + + output_timing_data(); + if (!_timers_allocated) { + double *time1 = off_watch_pair(); + double *time2 = off_watch_neighbor(); + int *overflow = get_off_overflow_flag(); + if (time1 != NULL && time2 != NULL && overflow != NULL) { + #pragma offload_transfer target(mic:_cop) \ + nocopy(time1,time2:length(1) alloc_if(1) free_if(0)) \ + in(overflow:length(5) alloc_if(1) free_if(0)) + } + _timers_allocated = true; + } + + char kmode[80]; + if (_precision_mode == PREC_MODE_SINGLE) + strcpy(kmode, "single"); + else if (_precision_mode == PREC_MODE_MIXED) + strcpy(kmode, "mixed"); + else + strcpy(kmode, "double"); + + // print summary of settings + if (comm->me == 0) { + if (screen) { + #ifdef _LMP_INTEL_OFFLOAD + if (_offload_balance != 0.0) { + fprintf(screen,"using offload with %d threads per core, ",_offload_tpc); + fprintf(screen,"%d threads per task\n",_offload_threads); + } + #endif + } + } + if (update->whichflag == 2 && _offload_balance != 0.0) { + if (_offload_balance == 1.0 && _offload_noghost == 0) + _sync_at_pair = 1; + else + _sync_at_pair = 2; + } else { + _sync_at_pair = 0; + if (strstr(update->integrate_style,"intel") == 0) + error->all(FLERR, + "Specified run_style does not support the Intel package."); + } + #endif + + if (neighbor->style != BIN) + error->all(FLERR, + "Currently, neighbor style BIN must be used with Intel package."); + if (neighbor->exclude_setting() != 0) + error->all(FLERR, + "Currently, cannot use neigh_modify exclude with Intel package."); + int nstyles = 0; + if (force->pair_match("hybrid", 1) != NULL) { + PairHybrid *hybrid = (PairHybrid *) force->pair; + for (int i = 0; i < hybrid->nstyles; i++) + if (strstr(hybrid->keywords[i], "/intel") == NULL) + nstyles++; + } else if (force->pair_match("hybrid/overlay", 1) != NULL) { + PairHybridOverlay *hybrid = (PairHybridOverlay *) force->pair; + for (int i = 0; i < hybrid->nstyles; i++) + if (strstr(hybrid->keywords[i], "/intel") == NULL) + nstyles++; + else + force->pair->no_virial_fdotr_compute = 1; + } + if (nstyles > 1) + error->all(FLERR, + "Currently, cannot use more than one intel style with hybrid."); + + neighbor->fix_intel = (void *)this; + _nthreads = comm->nthreads; + + check_neighbor_intel(); + if (_precision_mode == PREC_MODE_SINGLE) + _single_buffers->zero_ev(); + else if (_precision_mode == PREC_MODE_MIXED) + _mixed_buffers->zero_ev(); + else + _double_buffers->zero_ev(); +} + +/* ---------------------------------------------------------------------- */ + +void FixIntel::check_neighbor_intel() +{ + #ifdef _LMP_INTEL_OFFLOAD + _full_host_list = 0; + #endif + const int nrequest = neighbor->nrequest; + + for (int i = 0; i < nrequest; ++i) { + #ifdef _LMP_INTEL_OFFLOAD + if (_offload_balance != 0.0 && neighbor->requests[i]->intel == 0) { + _full_host_list = 1; + _offload_noghost = 0; + } + #endif + if (neighbor->requests[i]->skip) + error->all(FLERR, "Cannot yet use hybrid styles with Intel package."); + } +} + +/* ---------------------------------------------------------------------- */ + +void FixIntel::sync_coprocessor() +{ + #ifdef _LMP_INTEL_OFFLOAD + if (_offload_balance != 0.0) { + if (_off_force_array_m != 0) { + add_off_results(_off_force_array_m, _off_ev_array_d); + _off_force_array_m = 0; + } else if (_off_force_array_d != 0) { + add_off_results(_off_force_array_d, _off_ev_array_d); + _off_force_array_d = 0; + } else if (_off_force_array_s != 0) { + add_off_results(_off_force_array_s, _off_ev_array_s); + _off_force_array_s = 0; + } + } + #endif +} + +/* ---------------------------------------------------------------------- */ + +double FixIntel::memory_usage() +{ + double bytes; + if (_precision_mode == PREC_MODE_SINGLE) + bytes = _single_buffers->memory_usage(_nthreads); + else if (_precision_mode == PREC_MODE_MIXED) + bytes = _mixed_buffers->memory_usage(_nthreads); + else + bytes = _double_buffers->memory_usage(_nthreads); + + return bytes; +} + +/* ---------------------------------------------------------------------- */ + +#ifdef _LMP_INTEL_OFFLOAD + +void FixIntel::output_timing_data() { + if (_im_real_space_task == 0 || _offload_affinity_set == 0) return; + + double timer_total = 0.0; + int size, rank; + double timers[NUM_ITIMERS]; + MPI_Comm_size(_real_space_comm, &size); + MPI_Comm_rank(_real_space_comm, &rank); + MPI_Allreduce(&_timers, &timers, NUM_ITIMERS, MPI_DOUBLE, MPI_SUM, + _real_space_comm); + for (int i=0; i < NUM_ITIMERS; i++) { + timers[i] /= size; + timer_total += timers[i]; + } + #ifdef TIME_BALANCE + double timers_min[NUM_ITIMERS], timers_max[NUM_ITIMERS]; + MPI_Allreduce(&_timers, &timers_max, NUM_ITIMERS, MPI_DOUBLE, MPI_MAX, + _real_space_comm); + MPI_Allreduce(&_timers, &timers_min, NUM_ITIMERS, MPI_DOUBLE, MPI_MIN, + _real_space_comm); + #endif + + if (timer_total > 0.0) { + double balance_out[2], balance_in[2]; + balance_out[0] = _balance_pair; + balance_out[1] = _balance_neighbor; + MPI_Reduce(balance_out, balance_in, 2, MPI_DOUBLE, MPI_SUM, + 0, _real_space_comm); + balance_in[0] /= size; + balance_in[1] /= size; + + if (rank == 0 && _tscreen) { + fprintf(_tscreen, "\n------------------------------------------------\n"); + fprintf(_tscreen, " Offload Timing Data\n"); + fprintf(_tscreen, "------------------------------------------------\n"); + fprintf(_tscreen, " Data Pack/Cast Seconds %f\n", + timers[TIME_PACK]); + if (_offload_balance != 0.0) { + fprintf(_tscreen, " Host Neighbor Seconds %f\n", + timers[TIME_HOST_NEIGHBOR]); + fprintf(_tscreen, " Host Pair Seconds %f\n", + timers[TIME_HOST_PAIR]); + fprintf(_tscreen, " Offload Neighbor Seconds %f\n", + timers[TIME_OFFLOAD_NEIGHBOR]); + fprintf(_tscreen, " Offload Pair Seconds %f\n", + timers[TIME_OFFLOAD_PAIR]); + fprintf(_tscreen, " Offload Wait Seconds %f\n", + timers[TIME_OFFLOAD_WAIT]); + fprintf(_tscreen, " Offload Latency Seconds %f\n", + timers[TIME_OFFLOAD_LATENCY]); + fprintf(_tscreen, " Offload Neighbor Balance %f\n", + balance_in[1]); + fprintf(_tscreen, " Offload Pair Balance %f\n", + balance_in[0]); + fprintf(_tscreen, " Offload Ghost Atoms "); + if (_offload_noghost) fprintf(_tscreen,"No\n"); + else fprintf(_tscreen,"Yes\n"); + #ifdef TIME_BALANCE + fprintf(_tscreen, " Offload Imbalance Seconds %f\n", + timers[TIME_IMBALANCE]); + fprintf(_tscreen, " Offload Min/Max Seconds "); + for (int i = 0; i < NUM_ITIMERS; i++) + fprintf(_tscreen, "[%f, %f] ",timers_min[i],timers_max[i]); + fprintf(_tscreen, "\n"); + #endif + } + fprintf(_tscreen, "------------------------------------------------\n"); + } + zero_timers(); + _setup_time_cleared = false; + } +} + +/* ---------------------------------------------------------------------- */ + +int FixIntel::get_ppn(int &node_rank) { + int nprocs; + int rank; + MPI_Comm_size(_real_space_comm, &nprocs); + MPI_Comm_rank(_real_space_comm, &rank); + + int name_length; + char node_name[MPI_MAX_PROCESSOR_NAME]; + MPI_Get_processor_name(node_name,&name_length); + node_name[name_length] = '\0'; + char *node_names = new char[MPI_MAX_PROCESSOR_NAME*nprocs]; + MPI_Allgather(node_name, MPI_MAX_PROCESSOR_NAME, MPI_CHAR, node_names, + MPI_MAX_PROCESSOR_NAME, MPI_CHAR, _real_space_comm); + int ppn = 0; + node_rank = 0; + for (int i = 0; i < nprocs; i++) { + if (strcmp(node_name, node_names + i * MPI_MAX_PROCESSOR_NAME) == 0) { + ppn++; + if (i < rank) + node_rank++; + } + } + + return ppn; +} + +/* ---------------------------------------------------------------------- */ + +void FixIntel::set_offload_affinity() +{ + _separate_buffers = 0; + if (_allow_separate_buffers) + if (_offload_balance != 0.0 && _offload_balance < 1.0) + _separate_buffers = 1; + + _im_real_space_task = 1; + if (strncmp(update->integrate_style,"verlet/split",12) == 0) { + _real_space_comm = world; + if (universe->iworld != 0) { + _im_real_space_task = 0; + return; + } + } else + _real_space_comm = universe->uworld; + + if (_offload_balance == 0.0) _cop = -1; + if (_offload_balance == 0.0 || _offload_affinity_set == 1) + return; + + _offload_affinity_set = 1; + int node_rank; + int ppn = get_ppn(node_rank); + + if (ppn % _ncops != 0) + error->all(FLERR, "MPI tasks per node must be multiple of offload_cards"); + ppn = ppn / _ncops; + _cop = node_rank / ppn; + node_rank = node_rank % ppn; + + int max_threads_per_task = _offload_cores / 4 * _offload_tpc / ppn; + if (_offload_threads > max_threads_per_task) + _offload_threads = max_threads_per_task; + if (_offload_threads > _max_offload_threads) + _offload_threads = _max_offload_threads; + + int offload_threads = _offload_threads; + int offload_tpc = _offload_tpc; + int offload_affinity_balanced = _offload_affinity_balanced; + #pragma offload target(mic:_cop) mandatory \ + in(node_rank,offload_threads,offload_tpc,offload_affinity_balanced) + { + omp_set_num_threads(offload_threads); + #pragma omp parallel + { + int tnum = omp_get_thread_num(); + kmp_affinity_mask_t mask; + kmp_create_affinity_mask(&mask); + int proc; + if (offload_affinity_balanced) { + proc = offload_threads * node_rank + tnum; + proc = proc * 4 - (proc / 60) * 240 + proc / 60 + 1; + } else { + proc = offload_threads * node_rank + tnum; + proc += (proc / 4) * (4 - offload_tpc) + 1; + } + kmp_set_affinity_mask_proc(proc, &mask); + if (kmp_set_affinity(&mask) != 0) + printf("Could not set affinity on rank %d thread %d to %d\n", + node_rank, tnum, proc); + } + } + if (_precision_mode == PREC_MODE_SINGLE) + _single_buffers->set_off_params(offload_threads, _cop, _separate_buffers); + else if (_precision_mode == PREC_MODE_MIXED) + _mixed_buffers->set_off_params(offload_threads, _cop, _separate_buffers); + else + _double_buffers->set_off_params(offload_threads, _cop, _separate_buffers); +} + +#endif diff --git a/src/USER-INTEL/fix_intel.h b/src/USER-INTEL/fix_intel.h new file mode 100644 index 000000000..82ebc734a --- /dev/null +++ b/src/USER-INTEL/fix_intel.h @@ -0,0 +1,593 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS + +FixStyle(Intel,FixIntel) + +#else + +#ifndef LMP_FIX_INTEL_H +#define LMP_FIX_INTEL_H + +#include "fix.h" +#include "intel_buffers.h" +#include "force.h" +#include "pair.h" +#include "error.h" +#include "update.h" + +namespace LAMMPS_NS { + +class IntelData; +template class IntelBuffers; + +class FixIntel : public Fix { + public: + FixIntel(class LAMMPS *, int, char **); + virtual ~FixIntel(); + virtual int setmask(); + virtual void init(); + + // Get all forces, calculation results from coprocesser + void sync_coprocessor(); + + double memory_usage(); + + typedef struct { double x,y,z; } lmp_ft; + + enum {PREC_MODE_SINGLE, PREC_MODE_MIXED, PREC_MODE_DOUBLE}; + + inline int precision() { return _precision_mode; } + inline IntelBuffers * get_single_buffers() + { return _single_buffers; } + inline IntelBuffers * get_mixed_buffers() + { return _mixed_buffers; } + inline IntelBuffers * get_double_buffers() + { return _double_buffers; } + + protected: + IntelBuffers *_single_buffers; + IntelBuffers *_mixed_buffers; + IntelBuffers *_double_buffers; + + int _precision_mode, _nthreads; + + public: + inline int* get_overflow_flag() { return _overflow_flag; } + inline int* get_off_overflow_flag() { return _off_overflow_flag; } + inline void add_result_array(IntelBuffers::vec3_acc_t *f_in, + double *ev_in, const int offload, + const int eatom = 0, const int vatom = 0); + inline void add_result_array(IntelBuffers::vec3_acc_t *f_in, + double *ev_in, const int offload, + const int eatom = 0, const int vatom = 0); + inline void add_result_array(IntelBuffers::vec3_acc_t *f_in, + float *ev_in, const int offload, + const int eatom = 0, const int vatom = 0); + inline void get_buffern(const int offload, int &nlocal, int &nall, + int &minlocal); + + #ifdef _LMP_INTEL_OFFLOAD + inline int coprocessor_number() { return _cop; } + inline int full_host_list() { return _full_host_list; } + void set_offload_affinity(); + inline double offload_balance() { return _offload_balance; } + inline int offload_end_neighbor() { return _balance_neighbor * atom->nlocal; } + inline int offload_end_pair(); + inline int host_start_neighbor() + { if (_offload_noghost) return 0; else return offload_end_neighbor(); } + inline int host_start_pair() + { if (_offload_noghost) return 0; else return offload_end_pair(); } + inline int offload_nlocal() { return _offload_nlocal; } + inline int offload_nall() { return _offload_nall; } + inline int offload_min_ghost() { return _offload_min_ghost; } + inline int host_min_local() { return _host_min_local; } + inline int host_min_ghost() { return _host_min_ghost; } + inline int host_used_local() { return _host_used_local; } + inline int host_used_ghost() { return _host_used_ghost; } + inline int host_nall() { return _host_nall; } + inline int separate_buffers() { return _separate_buffers; } + inline int offload_noghost() { return _offload_noghost; } + inline void set_offload_noghost(const int v) + { if (_offload_ghost < 0) _offload_noghost = v; } + inline void set_neighbor_host_sizes(); + + inline void zero_timers() + { memset(_timers, 0, sizeof(double) * NUM_ITIMERS); } + inline void start_watch(const int which) { _stopwatch[which] = MPI_Wtime(); } + inline double stop_watch(const int which); + inline double * off_watch_pair() { return _stopwatch_offload_pair; } + inline double * off_watch_neighbor() { return _stopwatch_offload_neighbor; } + inline void balance_stamp(); + inline void acc_timers(); + #else + inline int offload_end_neighbor() { return 0; } + inline int offload_end_pair() { return 0; } + inline int host_start_neighbor() { return 0; } + inline int host_start_pair() { return 0; } + inline void zero_timers() {} + inline void start_watch(const int which) {} + inline double stop_watch(const int which) { return 0.0; } + double * off_watch_pair() { return NULL; } + double * off_watch_neighbor() { return NULL; } + inline void balance_stamp() {} + inline void acc_timers() {} + inline int separate_buffers() { return 0; } + #endif + + protected: + int _overflow_flag[5]; + __declspec(align(64)) int _off_overflow_flag[5]; + int _allow_separate_buffers, _offload_ghost; + #ifdef _LMP_INTEL_OFFLOAD + double _balance_pair_time, _balance_other_time; + int _offload_nlocal, _offload_nall, _offload_min_ghost, _offload_nghost; + int _host_min_local, _host_min_ghost, _host_nall; + int _host_used_local, _host_used_ghost; + int _separate_buffers, _offload_noghost, _sync_at_pair; + bool _setup_time_cleared, _timers_allocated; + void output_timing_data(); + FILE *_tscreen; + + IntelBuffers::vec3_acc_t *_off_force_array_s; + IntelBuffers::vec3_acc_t *_off_force_array_m; + IntelBuffers::vec3_acc_t *_off_force_array_d; + float *_off_ev_array_s; + double *_off_ev_array_d; + int _off_results_eatom, _off_results_vatom; + int _full_host_list, _cop, _ncops; + + int get_ppn(int &); + #endif + void check_neighbor_intel(); + + double _offload_balance, _balance_neighbor, _balance_pair, _balance_fixed; + double _timers[NUM_ITIMERS]; + double _stopwatch[NUM_ITIMERS]; + __declspec(align(64)) double _stopwatch_offload_neighbor[1]; + __declspec(align(64)) double _stopwatch_offload_pair[1]; + + template + inline void add_results(const ft * restrict const f_in, + const acc_t * restrict const ev_global, + const int eatom, const int vatom, + const int offload); + + template + inline void add_oresults(const ft * restrict const f_in, + const acc_t * restrict const ev_global, + const int eatom, const int vatom, + const int out_offset, const int nall); + + int _offload_affinity_balanced, _offload_threads, _offload_tpc; + #ifdef _LMP_INTEL_OFFLOAD + int _max_offload_threads, _offload_cores, _offload_affinity_set; + int _im_real_space_task; + MPI_Comm _real_space_comm; + template + inline void add_off_results(const ft * restrict const f_in, + const acc_t * restrict const ev_global); + #endif +}; + +/* ---------------------------------------------------------------------- */ + +void FixIntel::get_buffern(const int offload, int &nlocal, int &nall, + int &minlocal) { + #ifdef _LMP_INTEL_OFFLOAD + if (_separate_buffers) { + if (offload) { + if (neighbor->ago != 0) { + nlocal = _offload_nlocal; + nall = _offload_nall; + } else { + nlocal = atom->nlocal; + nall = nlocal + atom->nghost; + } + minlocal = 0; + } else { + nlocal = atom->nlocal; + nall = _host_nall; + minlocal = _host_min_local; + } + return; + } + if (_offload_noghost && offload) + nall = atom->nlocal; + else + #endif + nall = atom->nlocal + atom->nghost; + nlocal = atom->nlocal; + minlocal = 0; +} + +/* ---------------------------------------------------------------------- */ + +void FixIntel::add_result_array(IntelBuffers::vec3_acc_t *f_in, + double *ev_in, const int offload, + const int eatom, const int vatom) { + #ifdef _LMP_INTEL_OFFLOAD + if (offload) { + _off_results_eatom = eatom; + _off_results_vatom = vatom; + _off_force_array_d = f_in; + _off_ev_array_d = ev_in; + if (_sync_at_pair == 1) sync_coprocessor(); + return; + } + #endif + add_results(f_in, ev_in, eatom, vatom, 0); + if (_overflow_flag[LMP_OVERFLOW]) + error->one(FLERR, "Neighbor list overflow, boost neigh_modify one"); + #ifdef _LMP_INTEL_OFFLOAD + if (_sync_at_pair) sync_coprocessor(); + #endif +} + +/* ---------------------------------------------------------------------- */ + +void FixIntel::add_result_array(IntelBuffers::vec3_acc_t *f_in, + double *ev_in, const int offload, + const int eatom, const int vatom) { + #ifdef _LMP_INTEL_OFFLOAD + if (offload) { + _off_results_eatom = eatom; + _off_results_vatom = vatom; + _off_force_array_m = f_in; + _off_ev_array_d = ev_in; + if (_sync_at_pair == 1) sync_coprocessor(); + return; + } + #endif + add_results(f_in, ev_in, eatom, vatom, 0); + if (_overflow_flag[LMP_OVERFLOW]) + error->one(FLERR, "Neighbor list overflow, boost neigh_modify one"); + #ifdef _LMP_INTEL_OFFLOAD + if (_sync_at_pair) sync_coprocessor(); + #endif +} + +/* ---------------------------------------------------------------------- */ + +void FixIntel::add_result_array(IntelBuffers::vec3_acc_t *f_in, + float *ev_in, const int offload, + const int eatom, const int vatom) { + #ifdef _LMP_INTEL_OFFLOAD + if (offload) { + _off_results_eatom = eatom; + _off_results_vatom = vatom; + _off_force_array_s = f_in; + _off_ev_array_s = ev_in; + if (_sync_at_pair == 1) sync_coprocessor(); + return; + } + #endif + add_results(f_in, ev_in, eatom, vatom, 0); + if (_overflow_flag[LMP_OVERFLOW]) + error->one(FLERR, "Neighbor list overflow, boost neigh_modify one"); + #ifdef _LMP_INTEL_OFFLOAD + if (_sync_at_pair) sync_coprocessor(); + #endif +} + +/* ---------------------------------------------------------------------- */ + +template +void FixIntel::add_results(const ft * restrict const f_in, + const acc_t * restrict const ev_global, + const int eatom, const int vatom, + const int offload) { + start_watch(TIME_PACK); + int f_length; + #ifdef _LMP_INTEL_OFFLOAD + if (_separate_buffers) { + if (offload) { + add_oresults(f_in, ev_global, eatom, vatom, 0, _offload_nlocal); + if (force->newton_pair) { + const acc_t * restrict const enull = 0; + int offset = _offload_nlocal; + if (atom->torque) offset *= 2; + add_oresults(f_in + offset, enull, eatom, vatom, + _offload_min_ghost, _offload_nghost); + } + } else { + add_oresults(f_in, ev_global, eatom, vatom, + _host_min_local, _host_used_local); + if (force->newton_pair) { + const acc_t * restrict const enull = 0; + int offset = _host_used_local; + if (atom->torque) offset *= 2; + add_oresults(f_in + offset, enull, eatom, + vatom, _host_min_ghost, _host_used_ghost); + } + } + stop_watch(TIME_PACK); + return; + } + if (force->newton_pair && (_offload_noghost == 0 || offload == 0)) + f_length = atom->nlocal + atom->nghost; + else + f_length = atom->nlocal; + #else + if (force->newton_pair) + f_length = atom->nlocal + atom->nghost; + else + f_length = atom->nlocal; + #endif + + add_oresults(f_in, ev_global, eatom, vatom, 0, f_length); + stop_watch(TIME_PACK); +} + +/* ---------------------------------------------------------------------- */ + +template +void FixIntel::add_oresults(const ft * restrict const f_in, + const acc_t * restrict const ev_global, + const int eatom, const int vatom, + const int out_offset, const int nall) { + lmp_ft * restrict const f = (lmp_ft *) lmp->atom->f[0] + out_offset; + if (atom->torque) { + if (f_in[1].w) + if (f_in[1].w == 1) + error->all(FLERR,"Bad matrix inversion in mldivide3"); + else + error->all(FLERR, + "Sphere particles not yet supported for gayberne/intel"); + } + + #if defined(_OPENMP) + #pragma omp parallel default(none) + #endif + { + const int tid = omp_get_thread_num(); + int ifrom, ito; + IP_PRE_omp_range_align(ifrom, ito, tid, nall, _nthreads, sizeof(acc_t)); + if (atom->torque) { + int ii = ifrom * 2; + lmp_ft * restrict const tor = (lmp_ft *) lmp->atom->torque[0] + + out_offset; + if (eatom) { + for (int i = ifrom; i < ito; i++) { + f[i].x += f_in[ii].x; + f[i].y += f_in[ii].y; + f[i].z += f_in[ii].z; + force->pair->eatom[i] += f_in[ii].w; + tor[i].x += f_in[ii+1].x; + tor[i].y += f_in[ii+1].y; + tor[i].z += f_in[ii+1].z; + ii += 2; + } + } else { + for (int i = ifrom; i < ito; i++) { + f[i].x += f_in[ii].x; + f[i].y += f_in[ii].y; + f[i].z += f_in[ii].z; + tor[i].x += f_in[ii+1].x; + tor[i].y += f_in[ii+1].y; + tor[i].z += f_in[ii+1].z; + ii += 2; + } + } + } else { + if (eatom) { + for (int i = ifrom; i < ito; i++) { + f[i].x += f_in[i].x; + f[i].y += f_in[i].y; + f[i].z += f_in[i].z; + force->pair->eatom[i] += f_in[i].w; + } + } else { + for (int i = ifrom; i < ito; i++) { + f[i].x += f_in[i].x; + f[i].y += f_in[i].y; + f[i].z += f_in[i].z; + } + } + } + } + + if (ev_global != NULL) { + force->pair->eng_vdwl += ev_global[0]; + force->pair->eng_coul += ev_global[1]; + force->pair->virial[0] += ev_global[2]; + force->pair->virial[1] += ev_global[3]; + force->pair->virial[2] += ev_global[4]; + force->pair->virial[3] += ev_global[5]; + force->pair->virial[4] += ev_global[6]; + force->pair->virial[5] += ev_global[7]; + } +} + +#ifdef _LMP_INTEL_OFFLOAD + +/* ---------------------------------------------------------------------- */ + +int FixIntel::offload_end_pair() { + if (neighbor->ago == 0) return _balance_neighbor * atom->nlocal; + else return _balance_pair * atom->nlocal; +} + +/* ---------------------------------------------------------------------- */ + +double FixIntel::stop_watch(const int which) { + double elapsed = MPI_Wtime() - _stopwatch[which]; + _timers[which] += elapsed; + return elapsed; +} + +/* ---------------------------------------------------------------------- */ + +void FixIntel::balance_stamp() { + if (_offload_balance < 0.0) { + double ct = MPI_Wtime(); + _balance_other_time = ct; + _balance_pair_time = ct - _stopwatch[TIME_HOST_PAIR]; + } +} + +/* ---------------------------------------------------------------------- */ + +void FixIntel::acc_timers() { + if (neighbor->ago == 0) { + _timers[TIME_OFFLOAD_NEIGHBOR] += *_stopwatch_offload_neighbor; + if (_setup_time_cleared == false) { + zero_timers(); + _setup_time_cleared = true; + } + } + _timers[TIME_OFFLOAD_PAIR] += *_stopwatch_offload_pair; +} + +/* ---------------------------------------------------------------------- */ + +void FixIntel::set_neighbor_host_sizes() { + _host_min_local = _overflow_flag[LMP_LOCAL_MIN]; + _host_min_ghost = _overflow_flag[LMP_GHOST_MIN]; + _host_used_local = atom->nlocal - _host_min_local; + _host_used_ghost = _overflow_flag[LMP_GHOST_MAX] + 1 - _host_min_ghost; + if (_host_used_ghost < 0) _host_used_ghost = 0; + _host_nall = atom->nlocal + _host_used_ghost; +} + +/* ---------------------------------------------------------------------- */ + +template +void FixIntel::add_off_results(const ft * restrict const f_in, + const acc_t * restrict const ev_global) { + if (_offload_balance < 0.0) + _balance_other_time = MPI_Wtime() - _balance_other_time; + + start_watch(TIME_OFFLOAD_WAIT); + #ifdef _LMP_INTEL_OFFLOAD + #pragma offload_wait target(mic:_cop) wait(f_in) + #endif + double wait_time = stop_watch(TIME_OFFLOAD_WAIT); + + if (neighbor->ago == 0) { + if (_off_overflow_flag[LMP_OVERFLOW]) + error->one(FLERR, "Neighbor list overflow, boost neigh_modify one"); + _offload_nlocal = _off_overflow_flag[LMP_LOCAL_MAX] + 1; + _offload_min_ghost = _off_overflow_flag[LMP_GHOST_MIN]; + _offload_nghost = _off_overflow_flag[LMP_GHOST_MAX] + 1 - + _offload_min_ghost; + if (_offload_nghost < 0) _offload_nghost = 0; + _offload_nall = _offload_nlocal + _offload_nghost; + _offload_nlocal; + } + + int nlocal = atom->nlocal; + // Load balance? + if (_offload_balance < 0.0) { + if (neighbor->ago == 0) + _balance_pair = _balance_neighbor; + double mic_time; + mic_time = *_stopwatch_offload_pair; + if (_balance_pair_time + _balance_other_time < mic_time) { + double ft = _balance_pair_time + _balance_other_time + wait_time - + mic_time; + _balance_fixed = (1.0 - INTEL_LB_MEAN_WEIGHT) * _balance_fixed + + INTEL_LB_MEAN_WEIGHT * ft; + } + + double ctps = _balance_pair_time / (1.0-_balance_pair); + double otps = mic_time / _balance_pair; + double new_balance = (ctps + _balance_other_time - _balance_fixed) / + (otps + ctps); + if (new_balance < 0.01) new_balance = 0.01; + else if (new_balance > 0.99) new_balance = 0.99; + _balance_neighbor = (1.0 - INTEL_LB_MEAN_WEIGHT) *_balance_neighbor + + INTEL_LB_MEAN_WEIGHT * new_balance; + } + + #ifdef TIME_BALANCE + start_watch(TIME_IMBALANCE); + MPI_Barrier(_real_space_comm); + stop_watch(TIME_IMBALANCE); + #endif + acc_timers(); + if (atom->torque) + if (f_in[1].w < 0.0) + error->all(FLERR, "Bad matrix inversion in mldivide3"); + add_results(f_in, ev_global, _off_results_eatom, _off_results_vatom, 1); +} + +#endif + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: The 'package intel' command is required for /intel styles + +Self-explanatory. + +E: Neighbor list overflow, boost neigh_modify one + +Increase the value for neigh_modify one to allow for larger allocations for +neighbor list builds. The value required can be different for the Intel +package in order to support offload to a coprocessor. + +E: Bad matrix inversion in mldivide3 + +This error should not occur unless the matrix is badly formed. + +E: Illegal package intel command + +The format for the package intel command is incorrect. Please see the +documentation. + +E: fix intel has to operate on group 'all' + +Self explanatory. + +E: Illegal package intel mode requested + +The format for the package intel command is incorrect. Please see the +documentation. + +E: Specified run_style does not support the Intel package. + +When using offload to a coprocessor, the Intel package requires a run style +with the intel suffix. + +E: Currently, neighbor style BIN must be used with Intel package. + +This is the only neighbor style that has been implemented for the Intel +package. + +E: Currently, cannot use neigh_modify exclude with Intel package. + +This is a current restriction of the Intel package. + +E: Currently, cannot use more than one intel style with hybrid. + +Currently, hybrid pair styles can only use the intel suffix for one of the +pair styles. + +E: Cannot yet use hybrid styles with Intel package. + +The hybrid pair style configuration is not yet supported by the Intel +package. Support is limited to hybrid/overlay or a hybrid style that does +not require a skip list. + +E: MPI tasks per node must be multiple of offload_cards + +For offload to multiple coprocessors on a single node, the Intel package +requires that each coprocessor is used by the same number of MPI tasks. + +*/ diff --git a/src/USER-INTEL/intel_buffers.cpp b/src/USER-INTEL/intel_buffers.cpp new file mode 100644 index 000000000..a541f0f35 --- /dev/null +++ b/src/USER-INTEL/intel_buffers.cpp @@ -0,0 +1,432 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: W. Michael Brown (Intel) +------------------------------------------------------------------------- */ + +#include "intel_buffers.h" +#include "force.h" +#include "memory.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +template +IntelBuffers::IntelBuffers(class LAMMPS *lmp_in) : + lmp(lmp_in), _x(0), _q(0), _quat(0), _f(0), _buf_size(0), + _buf_local_size(0), _off_threads(0) { + _list_alloc_atoms = 0; + _ntypes = 0; + _off_map_maxlocal = 0; + #ifdef _LMP_INTEL_OFFLOAD + _separate_buffers = 0; + _off_f = 0; + _off_map_ilist = 0; + _off_map_nmax = 0; + _off_map_maxhead = 0; + _off_list_alloc = false; + _off_threads = 0; + #endif +} + +/* ---------------------------------------------------------------------- */ + +template +IntelBuffers::~IntelBuffers() +{ + free_buffers(); + free_all_nbor_buffers(); + set_ntypes(0); +} + +/* ---------------------------------------------------------------------- */ + +template +void IntelBuffers::free_buffers() +{ + if (_buf_size > 0) { + atom_t * x = get_x(); + flt_t * q = get_q(); + quat_t * quat = get_quat(); + + #ifdef _LMP_INTEL_OFFLOAD + vec3_acc_t * f_start = get_off_f(); + if (f_start != 0) { + acc_t * ev_global = get_ev_global(); + if (ev_global != 0) { + #pragma offload_transfer target(mic:_cop) \ + nocopy(x:alloc_if(0) free_if(1)) \ + nocopy(f_start:alloc_if(0) free_if(1)) \ + nocopy(ev_global:alloc_if(0) free_if(1)) + } + + if (q != 0) { + #pragma offload_transfer target (mic:_cop) \ + nocopy(q:alloc_if(0) free_if(1)) + } + if (quat != 0) { + #pragma offload_transfer target (mic:_cop) \ + nocopy(quat:alloc_if(0) free_if(1)) + } + lmp->memory->destroy(f_start); + } + + if (_separate_buffers) { + lmp->memory->destroy(_host_x); + if (q != 0) lmp->memory->destroy(_host_q); + if (quat != 0) lmp->memory->destroy(_host_quat); + } + #endif + + lmp->memory->destroy(x); + if (q != 0) lmp->memory->destroy(q); + if (quat != 0) lmp->memory->destroy(quat); + lmp->memory->destroy(_f); + _buf_size = _buf_local_size = 0; + } +} + +/* ---------------------------------------------------------------------- */ + +template +void IntelBuffers::_grow(const int nall, const int nlocal, + const int nthreads, + const int offload_end) +{ + free_buffers(); + _buf_size = static_cast(nall) * 1.1 + 1; + if (lmp->force->newton_pair) + _buf_local_size = _buf_size; + else + _buf_local_size = static_cast(nlocal) * 1.1 + 1; + if (lmp->atom->torque) + _buf_local_size *= 2; + const int f_stride = get_stride(_buf_local_size); + lmp->memory->create(_x, _buf_size,"intel_x"); + if (lmp->atom->q != NULL) + lmp->memory->create(_q, _buf_size, "intel_q"); + if (lmp->atom->ellipsoid != NULL) + lmp->memory->create(_quat, _buf_size, "intel_quat"); + lmp->memory->create(_f, f_stride * nthreads, "intel_f"); + + #ifdef _LMP_INTEL_OFFLOAD + if (_separate_buffers) { + lmp->memory->create(_host_x, _buf_size,"intel_host_x"); + if (lmp->atom->q != NULL) + lmp->memory->create(_host_q, _buf_size, "intel_host_q"); + if (lmp->atom->ellipsoid != NULL) + lmp->memory->create(_host_quat, _buf_size, "intel_host_quat"); + } + + if (offload_end > 0) { + lmp->memory->create(_off_f, f_stride * _off_threads, "intel_off_f"); + const atom_t * const x = get_x(); + const flt_t * const q = get_q(); + const vec3_acc_t * f_start = get_off_f(); + acc_t * ev_global = get_ev_global(); + if (lmp->atom->q != NULL) { + if (x != NULL && q != NULL && f_start != NULL && ev_global != NULL) { + #pragma offload_transfer target(mic:_cop) \ + nocopy(x,q:length(_buf_size) alloc_if(1) free_if(0)) \ + nocopy(f_start:length(f_stride*_off_threads) alloc_if(1) free_if(0))\ + nocopy(ev_global:length(8) alloc_if(1) free_if(0)) + } + } else { + if (x != NULL && f_start != NULL && ev_global != NULL) { + #pragma offload_transfer target(mic:_cop) \ + nocopy(x:length(_buf_size) alloc_if(1) free_if(0)) \ + nocopy(f_start:length(f_stride*_off_threads) alloc_if(1) free_if(0))\ + nocopy(ev_global:length(8) alloc_if(1) free_if(0)) + } + } + if (lmp->atom->ellipsoid != NULL) { + const quat_t * const quat = get_quat(); + if (quat != NULL) { + #pragma offload_transfer target(mic:_cop) \ + nocopy(quat:length(_buf_size) alloc_if(1) free_if(0)) + } + } + } + #endif +} + +/* ---------------------------------------------------------------------- */ + +template +void IntelBuffers::free_nmax() +{ + #ifdef _LMP_INTEL_OFFLOAD + if (_off_map_nmax > 0) { + const int * tag = _off_map_tag; + const int * special = _off_map_special; + const int * nspecial = _off_map_nspecial; + const int * bins = _off_map_bins; + if (tag != 0 && special != 0 && nspecial !=0 && bins != 0) { + #pragma offload_transfer target(mic:_cop) \ + nocopy(tag:alloc_if(0) free_if(1)) \ + nocopy(special,nspecial:alloc_if(0) free_if(1)) \ + nocopy(bins:alloc_if(0) free_if(1)) + } + _off_map_nmax = 0; + } + #endif +} + +/* ---------------------------------------------------------------------- */ + +template +void IntelBuffers::_grow_nmax() +{ + #ifdef _LMP_INTEL_OFFLOAD + free_nmax(); + int *special, *nspecial; + int tag_length, special_length, nspecial_length; + int size = lmp->atom->nmax; + if (lmp->atom->molecular) { + special = lmp->atom->special[0]; + nspecial = lmp->atom->nspecial[0]; + special_length = size * lmp->atom->maxspecial; + nspecial_length = size * 3; + tag_length = size; + } else { + special = &_special_holder; + nspecial = &_nspecial_holder; + special_length = 1; + nspecial_length = 1; + tag_length = 1; + } + int *tag = lmp->atom->tag; + int *bins = lmp->neighbor->bins; + #pragma offload_transfer target(mic:_cop) \ + nocopy(bins:length(size) alloc_if(1) free_if(0)) \ + nocopy(tag:length(tag_length) alloc_if(1) free_if(0)) \ + nocopy(special:length(special_length) alloc_if(1) free_if(0)) \ + nocopy(nspecial:length(nspecial_length) alloc_if(1) free_if(0)) + _off_map_tag = tag; + _off_map_special = special; + _off_map_nspecial = nspecial; + _off_map_nmax = size; + _off_map_bins = bins; + #endif +} + +/* ---------------------------------------------------------------------- */ + +template +void IntelBuffers::free_local() +{ + if (_off_map_maxlocal > 0) { + int * cnumneigh = _cnumneigh; + #ifdef _LMP_INTEL_OFFLOAD + if (_off_map_ilist != NULL) { + const int * ilist = _off_map_ilist; + const int * numneigh = _off_map_numneigh; + _off_map_ilist = NULL; + if (numneigh != 0 && ilist != 0) { + #pragma offload_transfer target(mic:_cop) \ + nocopy(ilist,numneigh,cnumneigh:alloc_if(0) free_if(1)) + } + } + #endif + lmp->memory->destroy(cnumneigh); + _off_map_maxlocal = 0; + } +} + +/* ---------------------------------------------------------------------- */ + +template +void IntelBuffers::_grow_local(NeighList *list, + const int offload_end) +{ + free_local(); + int size = list->get_maxlocal(); + lmp->memory->create(_cnumneigh, size, "_cnumneigh"); + _off_map_maxlocal = size; + + #ifdef _LMP_INTEL_OFFLOAD + if (offload_end > 0) { + int * numneigh = list->numneigh; + int * ilist = list->ilist; + int * cnumneigh = _cnumneigh; + if (cnumneigh != 0) { + #pragma offload_transfer target(mic:_cop) \ + nocopy(ilist:length(size) alloc_if(1) free_if(0)) \ + nocopy(numneigh:length(size) alloc_if(1) free_if(0)) \ + nocopy(cnumneigh:length(size) alloc_if(1) free_if(0)) + } + _off_map_ilist = ilist; + _off_map_numneigh = numneigh; + } + #endif +} + +/* ---------------------------------------------------------------------- */ + +template +void IntelBuffers::free_binhead() +{ + #ifdef _LMP_INTEL_OFFLOAD + if (_off_map_maxhead > 0) { + const int * binhead = _off_map_binhead; + if (binhead !=0) { + #pragma offload_transfer target(mic:_cop) \ + nocopy(binhead:alloc_if(0) free_if(1)) + } + _off_map_maxhead = 0; + } + #endif +} + +/* ---------------------------------------------------------------------- */ + +template +void IntelBuffers::_grow_binhead() +{ + #ifdef _LMP_INTEL_OFFLOAD + free_binhead(); + int * binhead = lmp->neighbor->binhead; + const int maxhead = lmp->neighbor->maxhead; + #pragma offload_transfer target(mic:_cop) \ + nocopy(binhead:length(maxhead) alloc_if(1) free_if(0)) + _off_map_binhead = binhead; + _off_map_maxhead = maxhead; + #endif +} + +/* ---------------------------------------------------------------------- */ + +template +void IntelBuffers::free_nbor_list() +{ + if (_list_alloc_atoms > 0) { + lmp->memory->destroy(_list_alloc); + _list_alloc_atoms = 0; + + #ifdef _LMP_INTEL_OFFLOAD + if (_off_list_alloc) { + int * list_alloc = _list_alloc; + int * special_flag = lmp->neighbor->special_flag_alloc(); + int * stencil = _off_map_stencil; + if (list_alloc != 0 && special_flag != 0 && stencil != 0) { + #pragma offload_transfer target(mic:_cop) \ + nocopy(special_flag,stencil:alloc_if(0) free_if(1)) \ + nocopy(list_alloc:alloc_if(0) free_if(1)) + } + _off_list_alloc = false; + } + #endif + } +} + +/* ---------------------------------------------------------------------- */ + +template +void IntelBuffers::_grow_nbor_list(NeighList *list, + const int nlocal, + const int offload_end) +{ + free_nbor_list(); + _list_alloc_atoms = 1.10 * nlocal; + int list_alloc_size = (_list_alloc_atoms + _off_threads) * get_max_nbors(); + lmp->memory->create(_list_alloc, list_alloc_size, "_list_alloc"); + #ifdef _LMP_INTEL_OFFLOAD + if (offload_end > 0) { + int * list_alloc =_list_alloc; + int * special_flag = lmp->neighbor->special_flag; + int * stencil = list->stencil; + + if (special_flag != NULL && list_alloc != NULL) { + #pragma offload_transfer target(mic:_cop) \ + in(special_flag:length(4) alloc_if(1) free_if(0)) \ + in(stencil:length(list->maxstencil) alloc_if(1) free_if(0)) \ + nocopy(list_alloc:length(list_alloc_size) alloc_if(1) free_if(0)) + _off_map_stencil = stencil; + _off_list_alloc = true; + } + } + #endif +} + +template +void IntelBuffers::_grow_stencil(NeighList *list) +{ + #ifdef _LMP_INTEL_OFFLOAD + int * stencil = _off_map_stencil; + #pragma offload_transfer target(mic:_cop) \ + nocopy(stencil:alloc_if(0) free_if(1)) + stencil = list->stencil; + #pragma offload_transfer target(mic:_cop) \ + in(stencil:length(list->maxstencil) alloc_if(1) free_if(0)) + _off_map_stencil = stencil; + #endif +} + +/* ---------------------------------------------------------------------- */ + +template +void IntelBuffers::set_ntypes(const int ntypes) +{ + if (ntypes != _ntypes) { + if (_ntypes > 0) { + #ifdef _LMP_INTEL_OFFLOAD + flt_t * cutneighsqo = _cutneighsq[0]; + if (cutneighsqo != 0) { + #pragma offload_transfer target(mic:_cop) \ + nocopy(cutneighsqo:alloc_if(0) free_if(1)) + } + #endif + lmp->memory->destroy(_cutneighsq); + } + if (ntypes > 0) { + lmp->memory->create(_cutneighsq, ntypes, ntypes, "_cutneighsq"); + #ifdef _LMP_INTEL_OFFLOAD + flt_t * cutneighsqo = _cutneighsq[0]; + if (cutneighsqo != NULL) { + #pragma offload_transfer target(mic:_cop) \ + nocopy(cutneighsqo:length(ntypes * ntypes) alloc_if(1) free_if(0)) + } + #endif + } + _ntypes = ntypes; + } +} + +/* ---------------------------------------------------------------------- */ + +template +double IntelBuffers::memory_usage(const int nthreads) +{ + double tmem = sizeof(atom_t); + if (lmp->atom->q) tmem += sizeof(flt_t); + if (lmp->atom->torque) tmem += sizeof(quat_t); + #ifdef _LMP_INTEL_OFFLOAD + if (_separate_buffers) tmem *= 2; + #endif + tmem *= _buf_size; + + const int fstride = get_stride(_buf_local_size); + tmem += fstride * nthreads * sizeof(vec3_acc_t); + #ifdef _LMP_INTEL_OFFLOAD + if (_off_f) tmem += fstride*_off_threads * sizeof(vec3_acc_t); + #endif + + tmem += _off_map_maxlocal * sizeof(int); + tmem += (_list_alloc_atoms + _off_threads) * get_max_nbors() * sizeof(int); + tmem += _ntypes * _ntypes * sizeof(int); +} + +/* ---------------------------------------------------------------------- */ + +template class IntelBuffers; +template class IntelBuffers; +template class IntelBuffers; diff --git a/src/USER-INTEL/intel_buffers.h b/src/USER-INTEL/intel_buffers.h new file mode 100644 index 000000000..bc1ca9e3b --- /dev/null +++ b/src/USER-INTEL/intel_buffers.h @@ -0,0 +1,284 @@ +/* -*- c++ -*- ------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: W. Michael Brown (Intel) +------------------------------------------------------------------------- */ + +#ifndef LMP_INTEL_BUFFERS_H +#define LMP_INTEL_BUFFERS_H + +#if defined(_OPENMP) +#include +#endif +#include "atom.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "intel_preprocess.h" +#include + +namespace LAMMPS_NS { + +#define ATOM_T typename IntelBuffers::atom_t +#define QUAT_T typename IntelBuffers::quat_t +#define FORCE_T typename IntelBuffers::vec3_acc_t + +// May not need a separate force array for mixed/double +template +class IntelBuffers { + public: + typedef struct { flt_t x,y,z; int w; } atom_t; + typedef struct { flt_t w,i,j,k; } quat_t; + typedef struct { flt_t x,y,z,w; } vec3_t; + typedef struct { flt_t x,y,z,w; } vec4_t; + typedef struct { acc_t x,y,z,w; } vec3_acc_t; + + IntelBuffers(class LAMMPS *lmp_in); + ~IntelBuffers(); + + inline int get_stride(int nall) { + int stride; + IP_PRE_get_stride(stride, nall, sizeof(vec3_acc_t), + lmp->atom->torque); + return stride; + } + + void free_buffers(); + + inline void grow(const int nall, const int nlocal, const int nthreads, + const int offload_end) { + if (nall >= _buf_size || nlocal >= _buf_local_size) + _grow(nall, nlocal, nthreads, offload_end); + } + + inline void free_all_nbor_buffers() { + free_nbor_list(); + free_nmax(); + free_binhead(); + free_local(); + } + + inline void grow_nbor(NeighList *list, const int nlocal, + const int offload_end) { + grow_local(list, offload_end); + if (offload_end) { + grow_nmax(); + grow_binhead(); + } + grow_nbor_list(list, nlocal, offload_end); + } + + void free_nmax(); + + inline void grow_nmax() { + #ifdef _LMP_INTEL_OFFLOAD + if (lmp->atom->nmax > _off_map_nmax) + _grow_nmax(); + #endif + } + + void free_local(); + + inline void grow_local(NeighList *list, const int offload_end) { + if (list->get_maxlocal() > _off_map_maxlocal) + _grow_local(list, offload_end); + } + + void free_binhead(); + + inline void grow_binhead() { + #ifdef _LMP_INTEL_OFFLOAD + if (lmp->neighbor->maxhead > _off_map_maxhead) + _grow_binhead(); + #endif + } + + inline int get_max_nbors() { + int mn = lmp->neighbor->oneatom * sizeof(int) / + (INTEL_ONEATOM_FACTOR * INTEL_DATA_ALIGN); + return mn * INTEL_DATA_ALIGN / sizeof(int); + } + + void free_nbor_list(); + + inline void grow_nbor_list(NeighList *list, const int nlocal, + const int offload_end) { + if (nlocal > _list_alloc_atoms) + _grow_nbor_list(list, nlocal, offload_end); + #ifdef _LMP_INTEL_OFFLOAD + else if (offload_end > 0 && _off_map_stencil != list->stencil) + _grow_stencil(list); + #endif + } + + void set_ntypes(const int ntypes); + + inline int * firstneigh(const NeighList *list) { return _list_alloc; } + inline int * cnumneigh(const NeighList *list) { return _cnumneigh; } + + inline atom_t * get_x(const int offload = 1) { + #ifdef _LMP_INTEL_OFFLOAD + if (_separate_buffers && offload == 0) return _host_x; + #endif + return _x; + } + inline flt_t * get_q(const int offload = 1) { + #ifdef _LMP_INTEL_OFFLOAD + if (_separate_buffers && offload == 0) return _host_q; + #endif + return _q; + } + inline quat_t * get_quat(const int offload = 1) { + #ifdef _LMP_INTEL_OFFLOAD + if (_separate_buffers && offload == 0) return _host_quat; + #endif + return _quat; + } + inline vec3_acc_t * get_f() { return _f; } + inline acc_t * get_ev_global() { return _ev_global; } + inline acc_t * get_ev_global_host() { return _ev_global_host; } + inline void zero_ev() + { for (int i = 0; i < 8; i++) _ev_global[i] = _ev_global_host[i] = 0.0; } + inline flt_t ** get_cutneighsq() { return _cutneighsq; } + inline int get_off_threads() { return _off_threads; } + #ifdef _LMP_INTEL_OFFLOAD + inline void set_off_params(const int n, const int cop, + const int separate_buffers) + { _off_threads = n; _cop = cop; _separate_buffers = separate_buffers; } + inline vec3_acc_t * get_off_f() { return _off_f; } + #endif + + inline void thr_pack(const int ifrom, const int ito, const int ago) { + if (ago == 0) { + for (int i = ifrom; i < ito; i++) { + _x[i].x = lmp->atom->x[i][0]; + _x[i].y = lmp->atom->x[i][1]; + _x[i].z = lmp->atom->x[i][2]; + _x[i].w = lmp->atom->type[i]; + } + if (lmp->atom->q != NULL) + for (int i = ifrom; i < ito; i++) + _q[i] = lmp->atom->q[i]; + } else { + for (int i = ifrom; i < ito; i++) { + _x[i].x = lmp->atom->x[i][0]; + _x[i].y = lmp->atom->x[i][1]; + _x[i].z = lmp->atom->x[i][2]; + } + } + } + + #ifdef _LMP_INTEL_OFFLOAD + inline void thr_pack_cop(const int ifrom, const int ito, + const int offset, const bool dotype = false) { + double ** x = lmp->atom->x + offset; + if (dotype == false) { + #pragma vector nontemporal + for (int i = ifrom; i < ito; i++) { + _x[i].x = x[i][0]; + _x[i].y = x[i][1]; + _x[i].z = x[i][2]; + } + } else { + int *type = lmp->atom->type + offset; + #pragma vector nontemporal + for (int i = ifrom; i < ito; i++) { + _x[i].x = x[i][0]; + _x[i].y = x[i][1]; + _x[i].z = x[i][2]; + _x[i].w = type[i]; + } + } + } + + inline void thr_pack_host(const int ifrom, const int ito, + const int offset) { + double ** x = lmp->atom->x + offset; + for (int i = ifrom; i < ito; i++) { + _host_x[i].x = x[i][0]; + _host_x[i].y = x[i][1]; + _host_x[i].z = x[i][2]; + } + } + + inline void pack_sep_from_single(const int host_min_local, + const int used_local, + const int host_min_ghost, + const int used_ghost) { + memcpy(_host_x + host_min_local, _x + host_min_local, + used_local * sizeof(atom_t)); + memcpy(_host_x + host_min_local + used_local, _x + host_min_ghost, + used_ghost * sizeof(atom_t)); + int nall = used_local + used_ghost + host_min_local; + _host_x[nall].x = INTEL_BIGP; + _host_x[nall].y = INTEL_BIGP; + _host_x[nall].z = INTEL_BIGP; + _host_x[nall].w = 1; + if (lmp->atom->q != NULL) { + memcpy(_host_q + host_min_local, _q + host_min_local, + used_local * sizeof(flt_t)); + memcpy(_host_q + host_min_local + used_local, _q + host_min_ghost, + used_ghost * sizeof(flt_t)); + } + } + #endif + + double memory_usage(const int nthreads); + + int _special_holder, _nspecial_holder; + + protected: + LAMMPS *lmp; + atom_t *_x; + flt_t *_q; + quat_t *_quat; + vec3_acc_t * _f; + int _off_threads, _off_map_maxlocal; + + int _list_alloc_atoms; + int * _list_alloc; + int * _cnumneigh; + + flt_t **_cutneighsq; + int _ntypes; + + #ifdef _LMP_INTEL_OFFLOAD + int _separate_buffers; + atom_t *_host_x; + flt_t *_host_q; + quat_t *_host_quat; + vec3_acc_t *_off_f; + int _off_map_nmax, _off_map_maxhead, _cop; + int *_off_map_ilist; + int *_off_map_stencil, *_off_map_special, *_off_map_nspecial, *_off_map_tag; + int *_off_map_binhead, *_off_map_bins, *_off_map_numneigh; + bool _off_list_alloc; + #endif + + int _buf_size, _buf_local_size; + __declspec(align(64)) acc_t _ev_global[8]; + __declspec(align(64)) acc_t _ev_global_host[8]; + + void _grow(const int nall, const int nlocal, const int nthreads, + const int offload_end); + void _grow_nmax(); + void _grow_local(NeighList *list, const int offload_end); + void _grow_binhead(); + void _grow_nbor_list(NeighList *list, const int nlocal, + const int offload_end); + void _grow_stencil(NeighList *list); +}; + +} + +#endif diff --git a/src/USER-INTEL/intel_preprocess.h b/src/USER-INTEL/intel_preprocess.h new file mode 100644 index 000000000..49e3413e0 --- /dev/null +++ b/src/USER-INTEL/intel_preprocess.h @@ -0,0 +1,391 @@ +/* -*- c++ -*- ------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: W. Michael Brown (Intel) +------------------------------------------------------------------------- */ + +#ifdef __INTEL_OFFLOAD +#ifdef LMP_INTEL_OFFLOAD +#define _LMP_INTEL_OFFLOAD +#endif +#endif + +#ifndef LMP_INTEL_PREPROCESS_H +#define LMP_INTEL_PREPROCESS_H + +#ifndef LAMMPS_MEMALIGN +#error Please set -DLAMMPS_MEMALIGN=64 in CCFLAGS for your LAMMPS makefile. +#endif + +namespace LAMMPS_NS { + +enum {LMP_OVERFLOW, LMP_LOCAL_MIN, LMP_LOCAL_MAX, LMP_GHOST_MIN, + LMP_GHOST_MAX}; +enum {TIME_PACK, TIME_HOST_NEIGHBOR, TIME_HOST_PAIR, TIME_OFFLOAD_NEIGHBOR, + TIME_OFFLOAD_PAIR, TIME_OFFLOAD_WAIT, TIME_OFFLOAD_LATENCY, + TIME_IMBALANCE}; +#define NUM_ITIMERS ( TIME_IMBALANCE + 1 ) + +#define INTEL_DATA_ALIGN 64 +#define INTEL_ONEATOM_FACTOR 2 +#define INTEL_MIC_VECTOR_WIDTH 16 +#define INTEL_MIC_NBOR_PAD INTEL_MIC_VECTOR_WIDTH +#define INTEL_VECTOR_WIDTH 8 +#define INTEL_NBOR_PAD INTEL_VECTOR_WIDTH +#define INTEL_LB_MEAN_WEIGHT 0.1 +#define INTEL_BIGP 1e15 + +#define IP_PRE_get_stride(stride, n, datasize, torque) \ + { \ + int blength = n; \ + if (torque) blength *= 2; \ + const int bytes = blength * datasize; \ + stride = INTEL_DATA_ALIGN - (bytes % INTEL_DATA_ALIGN); \ + stride = blength + stride / datasize; \ + } + +#if defined(_OPENMP) + +#define IP_PRE_omp_range(ifrom, ito, tid, inum, nthreads) \ + { \ + const int idelta = 1 + inum/nthreads; \ + ifrom = tid * idelta; \ + ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta; \ + } + +#define IP_PRE_omp_range_id(ifrom, ito, tid, inum, nthreads) \ + { \ + tid = omp_get_thread_num(); \ + IP_PRE_omp_range(ifrom, ito, tid, inum, nthreads); \ + } + +#define IP_PRE_omp_range_align(ifrom, ito, tid, inum, nthreads, \ + datasize) \ +{ \ + int chunk_size = INTEL_DATA_ALIGN / datasize; \ + int idelta = static_cast(static_cast(inum) \ + /chunk_size/nthreads) + 1; \ + idelta *= chunk_size; \ + ifrom = tid*idelta; \ + ito = ifrom + idelta; \ + if (ito > inum) ito = inum; \ +} + +#define IP_PRE_omp_range_id_align(ifrom, ito, tid, inum, \ + nthreads, datasize) \ + { \ + tid = omp_get_thread_num(); \ + IP_PRE_omp_range_align(ifrom, ito, tid, inum, nthreads, \ + datasize); \ + } + +#else + +#define IP_PRE_omp_range(ifrom, ito, tid, inum, nthreads) \ + { \ + ifrom = 0; \ + ito = inum; \ + } + +#define IP_PRE_omp_range_id(ifrom, ito, tid, inum, nthreads) \ + { \ + tid = 0; \ + ifrom = 0; \ + ito = inum; \ + } + +#define IP_PRE_omp_range_align(ifrom, ito, tid, inum, nthreads, \ + datasize) \ +{ \ + ifrom = 0; \ + ito = inum; \ +} + +#define IP_PRE_omp_range_id_align(ifrom, ito, tid, inum, \ + nthreads, datasize) \ +{ \ + tid = 0; \ + ifrom = 0; \ + ito = inum; \ +} + +#endif + +#ifdef _LMP_INTEL_OFFLOAD +#include + +__declspec( target (mic)) +inline double MIC_Wtime() { + double time; + struct timeval tv; + + gettimeofday(&tv, NULL); + time = 1.0 * tv.tv_sec + 1.0e-6 * tv.tv_usec; + return time; +} + +#define IP_PRE_pack_separate_buffers(fix, buffers, ago, offload, \ + nlocal, nall) \ +{ \ + if (fix->separate_buffers() && ago != 0) { \ + fix->start_watch(TIME_PACK); \ + if (offload) { \ + _Pragma("omp parallel default(none) shared(buffers,nlocal,nall)") \ + { \ + int ifrom, ito, tid; \ + int nthreads = comm->nthreads; \ + IP_PRE_omp_range_id_align(ifrom, ito, tid, nlocal, \ + nthreads, sizeof(flt_t)); \ + buffers->thr_pack_cop(ifrom, ito, 0); \ + int nghost = nall - nlocal; \ + if (nghost) { \ + IP_PRE_omp_range_align(ifrom, ito, tid, nall - nlocal, \ + nthreads, sizeof(flt_t)); \ + buffers->thr_pack_cop(ifrom + nlocal, ito + nlocal, \ + fix->offload_min_ghost() - nlocal, \ + ago == 1); \ + } \ + } \ + } else { \ + buffers->thr_pack_host(fix->host_min_local(), nlocal, 0); \ + buffers->thr_pack_host(nlocal, nall, \ + fix->host_min_ghost()-nlocal); \ + } \ + fix->stop_watch(TIME_PACK); \ + } \ +} + +#define IP_PRE_get_transfern(ago, newton, evflag, eflag, vflag, \ + buffers, offload, fix, separate_flag, \ + x_size, q_size, ev_size, f_stride) \ +{ \ + separate_flag = 0; \ + if (ago == 0) { \ + x_size = 0; \ + q_size = nall; \ + if (offload) { \ + if (fix->separate_buffers()) { \ + if (lmp->atom->torque) \ + separate_flag = 2; \ + else \ + separate_flag = 1; \ + } else \ + separate_flag = 3; \ + } \ + } else { \ + x_size = nall; \ + q_size = 0; \ + } \ + ev_size = 0; \ + if (evflag) { \ + if (eflag) ev_size = 2; \ + if (vflag) ev_size = 8; \ + } \ + int f_length; \ + if (newton) \ + f_length = nall; \ + else \ + f_length = nlocal; \ + f_length -= minlocal; \ + f_stride = buffers->get_stride(f_length); \ +} + +#define IP_PRE_get_buffers(offload, buffers, fix, tc, f_start, \ + ev_global) \ +{ \ + if (offload) { \ + tc = buffers->get_off_threads(); \ + f_start = buffers->get_off_f(); \ + ev_global = buffers->get_ev_global(); \ + } else { \ + tc = comm->nthreads; \ + f_start = buffers->get_f(); \ + fix->start_watch(TIME_HOST_PAIR); \ + ev_global = buffers->get_ev_global_host(); \ + } \ +} + +#define IP_PRE_repack_for_offload(newton, separate_flag, nlocal, nall, \ + f_stride, x, q) \ +{ \ + if (separate_flag) { \ + if (separate_flag < 3) { \ + int all_local = nlocal; \ + int ghost_min = overflow[LMP_GHOST_MIN]; \ + nlocal = overflow[LMP_LOCAL_MAX] + 1; \ + int nghost = overflow[LMP_GHOST_MAX] + 1 - ghost_min; \ + if (nghost < 0) nghost = 0; \ + nall = nlocal + nghost; \ + separate_flag--; \ + int flength; \ + if (NEWTON_PAIR) flength = nall; \ + else flength = nlocal; \ + IP_PRE_get_stride(f_stride, flength, sizeof(FORCE_T), \ + separate_flag); \ + if (nghost) { \ + if (nlocal < all_local || ghost_min > all_local) { \ + memmove(x + nlocal, x + ghost_min, \ + (nall - nlocal) * sizeof(ATOM_T)); \ + if (q != 0) \ + memmove((void *)(q + nlocal), (void *)(q + ghost_min), \ + (nall - nlocal) * sizeof(flt_t)); \ + } \ + } \ + } \ + x[nall].x = INTEL_BIGP; \ + x[nall].y = INTEL_BIGP; \ + x[nall].z = INTEL_BIGP; \ + } \ +} + + +#else + +#define MIC_Wtime MPI_Wtime +#define IP_PRE_pack_separate_buffers(fix, buffers, ago, offload, \ + nlocal, nall) + +#define IP_PRE_get_transfern(ago, newton, evflag, eflag, vflag, \ + buffers, offload, fix, separate_flag, \ + x_size, q_size, ev_size, f_stride) \ +{ \ + separate_flag = 0; \ + int f_length; \ + if (newton) \ + f_length = nall; \ + else \ + f_length = nlocal; \ + f_stride = buffers->get_stride(f_length); \ +} + +#define IP_PRE_get_buffers(offload, buffers, fix, tc, f_start, \ + ev_global) \ +{ \ + tc = comm->nthreads; \ + f_start = buffers->get_f(); \ + fix->start_watch(TIME_HOST_PAIR); \ + ev_global = buffers->get_ev_global_host(); \ +} + +#define IP_PRE_repack_for_offload(newton, separate_flag, nlocal, nall, \ + f_stride, x, q) + + +#endif + +#define IP_PRE_ev_tally_nbor(vflag, ev_pre, fpair, delx, dely, delz) \ +{ \ + if (vflag == 1) { \ + sv0 += ev_pre * delx * delx * fpair; \ + sv1 += ev_pre * dely * dely * fpair; \ + sv2 += ev_pre * delz * delz * fpair; \ + sv3 += ev_pre * delx * dely * fpair; \ + sv4 += ev_pre * delx * delz * fpair; \ + sv5 += ev_pre * dely * delz * fpair; \ + } \ +} + +#define IP_PRE_ev_tally_atom(evflag, eflag, vflag, f, fwtmp) \ +{ \ + if (evflag) { \ + if (eflag) { \ + f[i].w += fwtmp; \ + oevdwl += sevdwl; \ + } \ + if (vflag == 1) { \ + ov0 += sv0; \ + ov1 += sv1; \ + ov2 += sv2; \ + ov3 += sv3; \ + ov4 += sv4; \ + ov5 += sv5; \ + } \ + } \ +} + +#define IP_PRE_ev_tally_atomq(evflag, eflag, vflag, f, fwtmp) \ +{ \ + if (evflag) { \ + if (eflag) { \ + f[i].w += fwtmp; \ + oevdwl += sevdwl; \ + oecoul += secoul; \ + } \ + if (vflag == 1) { \ + ov0 += sv0; \ + ov1 += sv1; \ + ov2 += sv2; \ + ov3 += sv3; \ + ov4 += sv4; \ + ov5 += sv5; \ + } \ + } \ +} + +#define IP_PRE_fdotr_acc_force(newton, evflag, eflag, vflag, eatom, \ + nall, nlocal, minlocal, nthreads, \ + f_start, f_stride, x) \ +{ \ + int o_range; \ + if (newton) \ + o_range = nall; \ + else \ + o_range = nlocal; \ + if (offload == 0) o_range -= minlocal; \ + IP_PRE_omp_range_align(iifrom, iito, tid, o_range, nthreads, \ + sizeof(acc_t)); \ + \ + int t_off = f_stride; \ + if (eflag && eatom) { \ + for (int t = 1; t < nthreads; t++) { \ + _Pragma("vector nontemporal") \ + for (int n = iifrom; n < iito; n++) { \ + f_start[n].x += f_start[n + t_off].x; \ + f_start[n].y += f_start[n + t_off].y; \ + f_start[n].z += f_start[n + t_off].z; \ + f_start[n].w += f_start[n + t_off].w; \ + } \ + t_off += f_stride; \ + } \ + } else { \ + for (int t = 1; t < nthreads; t++) { \ + _Pragma("vector nontemporal") \ + for (int n = iifrom; n < iito; n++) { \ + f_start[n].x += f_start[n + t_off].x; \ + f_start[n].y += f_start[n + t_off].y; \ + f_start[n].z += f_start[n + t_off].z; \ + } \ + t_off += f_stride; \ + } \ + } \ + \ + if (evflag) { \ + if (vflag == 2) { \ + const ATOM_T * restrict const xo = x + minlocal; \ + _Pragma("vector nontemporal") \ + for (int n = iifrom; n < iito; n++) { \ + ov0 += f_start[n].x * xo[n].x; \ + ov1 += f_start[n].y * xo[n].y; \ + ov2 += f_start[n].z * xo[n].z; \ + ov3 += f_start[n].y * xo[n].x; \ + ov4 += f_start[n].z * xo[n].x; \ + ov5 += f_start[n].z * xo[n].y; \ + } \ + } \ + } \ +} + +} + +#endif diff --git a/src/USER-INTEL/math_extra_intel.h b/src/USER-INTEL/math_extra_intel.h new file mode 100644 index 000000000..62163b3f6 --- /dev/null +++ b/src/USER-INTEL/math_extra_intel.h @@ -0,0 +1,354 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: W. Michael Brown (Intel) +------------------------------------------------------------------------- */ + +#ifndef LMP_MATH_EXTRA_INTEL_H +#define LMP_MATH_EXTRA_INTEL_H + +#define ME_quat_to_mat_trans(quat, mat) \ +{ \ + flt_t quat_w = quat.w; \ + flt_t quat_i = quat.i; \ + flt_t quat_j = quat.j; \ + flt_t quat_k = quat.k; \ + flt_t w2 = quat_w * quat_w; \ + flt_t i2 = quat_i * quat_i; \ + flt_t j2 = quat_j * quat_j; \ + flt_t k2 = quat_k * quat_k; \ + flt_t twoij = (flt_t)2.0 * quat_i * quat_j; \ + flt_t twoik = (flt_t)2.0 * quat_i * quat_k; \ + flt_t twojk = (flt_t)2.0 * quat_j * quat_k; \ + flt_t twoiw = (flt_t)2.0 * quat_i * quat_w; \ + flt_t twojw = (flt_t)2.0 * quat_j * quat_w; \ + flt_t twokw = (flt_t)2.0 * quat_k * quat_w; \ + \ + mat##_0 = w2 + i2 - j2 - k2; \ + mat##_3 = twoij - twokw; \ + mat##_6 = twojw + twoik; \ + \ + mat##_1 = twoij + twokw; \ + mat##_4 = w2 - i2 + j2 - k2; \ + mat##_7 = twojk - twoiw; \ + \ + mat##_2 = twoik - twojw; \ + mat##_5 = twojk + twoiw; \ + mat##_8 = w2 - i2 - j2 + k2; \ +} + +/* ---------------------------------------------------------------------- + diagonal matrix times a full matrix +------------------------------------------------------------------------- */ + +#define ME_diag_times3(d, m, ans) \ + { \ + ans##_0 = d[0] * m##_0; \ + ans##_1 = d[0] * m##_1; \ + ans##_2 = d[0] * m##_2; \ + ans##_3 = d[1] * m##_3; \ + ans##_4 = d[1] * m##_4; \ + ans##_5 = d[1] * m##_5; \ + ans##_6 = d[2] * m##_6; \ + ans##_7 = d[2] * m##_7; \ + ans##_8 = d[2] * m##_8; \ +} + +#define ME_diag_times3a(d, m, ans) \ + { \ + ans##_0 = d##_0 * m##_0; \ + ans##_1 = d##_0 * m##_1; \ + ans##_2 = d##_0 * m##_2; \ + ans##_3 = d##_1 * m##_3; \ + ans##_4 = d##_1 * m##_4; \ + ans##_5 = d##_1 * m##_5; \ + ans##_6 = d##_2 * m##_6; \ + ans##_7 = d##_2 * m##_7; \ + ans##_8 = d##_2 * m##_8; \ +} + +/* ---------------------------------------------------------------------- + multiply the transpose of mat1 times mat2 +------------------------------------------------------------------------- */ + +#define ME_transpose_times3(m1, m2, ans) \ +{ \ + ans##_0 = m1##_0*m2##_0 + m1##_3*m2##_3 + m1##_6*m2##_6; \ + ans##_1 = m1##_0*m2##_1 + m1##_3*m2##_4 + m1##_6*m2##_7; \ + ans##_2 = m1##_0*m2##_2 + m1##_3*m2##_5 + m1##_6*m2##_8; \ + ans##_3 = m1##_1*m2##_0 + m1##_4*m2##_3 + m1##_7*m2##_6; \ + ans##_4 = m1##_1*m2##_1 + m1##_4*m2##_4 + m1##_7*m2##_7; \ + ans##_5 = m1##_1*m2##_2 + m1##_4*m2##_5 + m1##_7*m2##_8; \ + ans##_6 = m1##_2*m2##_0 + m1##_5*m2##_3 + m1##_8*m2##_6; \ + ans##_7 = m1##_2*m2##_1 + m1##_5*m2##_4 + m1##_8*m2##_7; \ + ans##_8 = m1##_2*m2##_2 + m1##_5*m2##_5 + m1##_8*m2##_8; \ +} + +/* ---------------------------------------------------------------------- + normalize a vector, return in ans +------------------------------------------------------------------------- */ + +#define ME_normalize3(v0, v1, v2, ans) \ +{ \ + flt_t scale = (flt_t)1.0 / sqrt(v0*v0+v1*v1+v2*v2); \ + ans##_0 = v0 * scale; \ + ans##_1 = v1 * scale; \ + ans##_2 = v2 * scale; \ +} + +/* ---------------------------------------------------------------------- + add two matrices +------------------------------------------------------------------------- */ + +#define ME_plus3(m1, m2, ans) \ +{ \ + ans##_0 = m1##_0 + m2##_0; \ + ans##_1 = m1##_1 + m2##_1; \ + ans##_2 = m1##_2 + m2##_2; \ + ans##_3 = m1##_3 + m2##_3; \ + ans##_4 = m1##_4 + m2##_4; \ + ans##_5 = m1##_5 + m2##_5; \ + ans##_6 = m1##_6 + m2##_6; \ + ans##_7 = m1##_7 + m2##_7; \ + ans##_8 = m1##_8 + m2##_8; \ +} + +/* ---------------------------------------------------------------------- + dot product of 2 vectors +------------------------------------------------------------------------- */ + +#define ME_dot3(v1, v2) \ + (v1##_0*v2##_0 + v1##_1 * v2##_1 + v1##_2 * v2##_2) + +/* ---------------------------------------------------------------------- + determinant of a matrix +------------------------------------------------------------------------- */ + +#define ME_det3(m) \ + ( m##_0 * m##_4 * m##_8 - m##_0 * m##_5 * m##_7 - \ + m##_3 * m##_1 * m##_8 + m##_3 * m##_2 * m##_7 + \ + m##_6 * m##_1 * m##_5 - m##_6 * m##_2 * m##_4 ) + +/* ---------------------------------------------------------------------- + row vector times matrix +------------------------------------------------------------------------- */ + +#define ME_vecmat(v, m, ans) \ +{ \ + ans##_0 = v##_0 * m##_0 + v##_1 * m##_3 + v##_2 * m##_6; \ + ans##_1 = v##_0 * m##_1 + v##_1 * m##_4 + v##_2 * m##_7; \ + ans##_2 = v##_0 * m##_2 + v##_1 * m##_5 + v##_2 * m##_8; \ +} + +/* ---------------------------------------------------------------------- + cross product of 2 vectors +------------------------------------------------------------------------- */ + +#define ME_cross3(v1, v2, ans) \ +{ \ + ans##_0 = v1##_1 * v2##_2 - v1##_2 * v2##_1; \ + ans##_1 = v1##_2 * v2##_0 - v1##_0 * v2##_2; \ + ans##_2 = v1##_0 * v2##_1 - v1##_1 * v2##_0; \ +} + +/* ---------------------------------------------------------------------- + cross product of 2 vectors +------------------------------------------------------------------------- */ + +#define ME_mv0_cross3(m1, v2, ans) \ +{ \ + ans##_0 = m1##_1 * v2##_2 - m1##_2 * v2##_1; \ + ans##_1 = m1##_2 * v2##_0 - m1##_0 * v2##_2; \ + ans##_2 = m1##_0 * v2##_1 - m1##_1 * v2##_0; \ +} + +#define ME_mv1_cross3(m1, v2, ans) \ +{ \ + ans##_0 = m1##_4 * v2##_2 - m1##_5 * v2##_1; \ + ans##_1 = m1##_5 * v2##_0 - m1##_3 * v2##_2; \ + ans##_2 = m1##_3 * v2##_1 - m1##_4 * v2##_0; \ +} + +#define ME_mv2_cross3(m1, v2, ans) \ +{ \ + ans##_0 = m1##_7 * v2##_2 - m1##_8 * v2##_1; \ + ans##_1 = m1##_8 * v2##_0 - m1##_6 * v2##_2; \ + ans##_2 = m1##_6 * v2##_1 - m1##_7 * v2##_0; \ +} + + +#define ME_compute_eta_torque(m1, m2, s1, ans) \ +{ \ + flt_t den = m1##_3*m1##_2*m1##_7-m1##_0*m1##_5*m1##_7- \ + m1##_2*m1##_6*m1##_4+m1##_1*m1##_6*m1##_5- \ + m1##_3*m1##_1*m1##_8+m1##_0*m1##_4*m1##_8; \ + den = (flt_t)1.0 / den; \ + \ + ans##_0 = s1##_0*(m1##_5*m1##_1*m2##_2+(flt_t)2.0*m1##_4*m1##_8*m2##_0- \ + m1##_4*m2##_2*m1##_2-(flt_t)2.0*m1##_5*m2##_0*m1##_7+ \ + m2##_1*m1##_2*m1##_7-m2##_1*m1##_1*m1##_8- \ + m1##_3*m1##_8*m2##_1+m1##_6*m1##_5*m2##_1+ \ + m1##_3*m2##_2*m1##_7-m2##_2*m1##_6*m1##_4)*den; \ + \ + ans##_1 = s1##_0*(m1##_2*m2##_0*m1##_7-m1##_8*m2##_0*m1##_1+ \ + (flt_t)2.0*m1##_0*m1##_8*m2##_1-m1##_0*m2##_2*m1##_5- \ + (flt_t)2.0*m1##_6*m1##_2*m2##_1+m2##_2*m1##_3*m1##_2- \ + m1##_8*m1##_3*m2##_0+m1##_6*m2##_0*m1##_5+ \ + m1##_6*m2##_2*m1##_1-m2##_2*m1##_0*m1##_7)*den; \ + \ + ans##_2 = s1##_0*(m1##_1*m1##_5*m2##_0-m1##_2*m2##_0*m1##_4- \ + m1##_0*m1##_5*m2##_1+m1##_3*m1##_2*m2##_1- \ + m2##_1*m1##_0*m1##_7-m1##_6*m1##_4*m2##_0+ \ + (flt_t)2.0*m1##_4*m1##_0*m2##_2- \ + (flt_t)2.0*m1##_3*m2##_2*m1##_1+ \ + m1##_3*m1##_7*m2##_0+m1##_6*m2##_1*m1##_1)*den; \ + \ + ans##_3 = s1##_1*(-m1##_4*m2##_5*m1##_2+(flt_t)2.0*m1##_4*m1##_8*m2##_3+ \ + m1##_5*m1##_1*m2##_5-(flt_t)2.0*m1##_5*m2##_3*m1##_7+ \ + m2##_4*m1##_2*m1##_7-m2##_4*m1##_1*m1##_8- \ + m1##_3*m1##_8*m2##_4+m1##_6*m1##_5*m2##_4- \ + m2##_5*m1##_6*m1##_4+m1##_3*m2##_5*m1##_7)*den; \ + \ + ans##_4 = s1##_1*(m1##_2*m2##_3*m1##_7-m1##_1*m1##_8*m2##_3+ \ + (flt_t)2.0*m1##_8*m1##_0*m2##_4-m2##_5*m1##_0*m1##_5- \ + (flt_t)2.0*m1##_6*m2##_4*m1##_2-m1##_3*m1##_8*m2##_3+ \ + m1##_6*m1##_5*m2##_3+m1##_3*m2##_5*m1##_2- \ + m1##_0*m2##_5*m1##_7+m2##_5*m1##_1*m1##_6)*den; \ + \ + ans##_5 = s1##_1*(m1##_1*m1##_5*m2##_3-m1##_2*m2##_3*m1##_4- \ + m1##_0*m1##_5*m2##_4+m1##_3*m1##_2*m2##_4+ \ + (flt_t)2.0*m1##_4*m1##_0*m2##_5-m1##_0*m2##_4*m1##_7+ \ + m1##_1*m1##_6*m2##_4-m2##_3*m1##_6*m1##_4- \ + (flt_t)2.0*m1##_3*m1##_1*m2##_5+m1##_3*m2##_3*m1##_7)* \ + den; \ + \ + ans##_6 = s1##_2*(-m1##_4*m1##_2*m2##_8+m1##_1*m1##_5*m2##_8+ \ + (flt_t)2.0*m1##_4*m2##_6*m1##_8-m1##_1*m2##_7*m1##_8+ \ + m1##_2*m1##_7*m2##_7-(flt_t)2.0*m2##_6*m1##_7*m1##_5- \ + m1##_3*m2##_7*m1##_8+m1##_5*m1##_6*m2##_7- \ + m1##_4*m1##_6*m2##_8+m1##_7*m1##_3*m2##_8)*den; \ + \ + ans##_7 = s1##_2*-(m1##_1*m1##_8*m2##_6-m1##_2*m2##_6*m1##_7- \ + (flt_t)2.0*m2##_7*m1##_0*m1##_8+m1##_5*m2##_8*m1##_0+ \ + (flt_t)2.0*m2##_7*m1##_2*m1##_6+m1##_3*m2##_6*m1##_8- \ + m1##_3*m1##_2*m2##_8-m1##_5*m1##_6*m2##_6+ \ + m1##_0*m2##_8*m1##_7-m2##_8*m1##_1*m1##_6)*den; \ + \ + ans##_8 = s1##_2*(m1##_1*m1##_5*m2##_6-m1##_2*m2##_6*m1##_4- \ + m1##_0*m1##_5*m2##_7+m1##_3*m1##_2*m2##_7- \ + m1##_4*m1##_6*m2##_6-m1##_7*m2##_7*m1##_0+ \ + (flt_t)2.0*m1##_4*m2##_8*m1##_0+m1##_7*m1##_3*m2##_6+ \ + m1##_6*m1##_1*m2##_7-(flt_t)2.0*m2##_8*m1##_3*m1##_1)* \ + den; \ +} + +#define ME_vcopy4(dst,src) \ + dst##_0 = src##_0; \ + dst##_1 = src##_1; \ + dst##_2 = src##_2; \ + dst##_3 = src##_3; + +#define ME_mldivide3(m1, v_0, v_1, v_2, ans, error) \ +{ \ + flt_t aug_0, aug_1, aug_2, aug_3, aug_4, aug_5; \ + flt_t aug_6, aug_7, aug_8, aug_9, aug_10, aug_11, t; \ + \ + aug_3 = v_0; \ + aug_0 = m1##_0; \ + aug_1 = m1##_1; \ + aug_2 = m1##_2; \ + aug_7 = v_1; \ + aug_4 = m1##_3; \ + aug_5 = m1##_4; \ + aug_6 = m1##_5; \ + aug_11 = v_2; \ + aug_8 = m1##_6; \ + aug_9 = m1##_7; \ + aug_10 = m1##_8; \ + \ + if (fabs(aug_4) > fabs(aug_0)) { \ + flt_t swapt; \ + swapt = aug_0; aug_0 = aug_4; aug_4 = swapt; \ + swapt = aug_1; aug_1 = aug_5; aug_5 = swapt; \ + swapt = aug_2; aug_2 = aug_6; aug_6 = swapt; \ + swapt = aug_3; aug_3 = aug_7; aug_7 = swapt; \ + } \ + if (fabs(aug_8) > fabs(aug_0)) { \ + flt_t swapt; \ + swapt = aug_0; aug_0 = aug_8; aug_8 = swapt; \ + swapt = aug_1; aug_1 = aug_9; aug_9 = swapt; \ + swapt = aug_2; aug_2 = aug_10; aug_10 = swapt; \ + swapt = aug_3; aug_3 = aug_11; aug_11 = swapt; \ + } \ + \ + if (aug_0 != (flt_t)0.0) { \ + } else if (aug_4 != (flt_t)0.0) { \ + flt_t swapt; \ + swapt = aug_0; aug_0 = aug_4; aug_4 = swapt; \ + swapt = aug_1; aug_1 = aug_5; aug_5 = swapt; \ + swapt = aug_2; aug_2 = aug_6; aug_6 = swapt; \ + swapt = aug_3; aug_3 = aug_7; aug_7 = swapt; \ + } else if (aug_8 != (flt_t)0.0) { \ + flt_t swapt; \ + swapt = aug_0; aug_0 = aug_8; aug_8 = swapt; \ + swapt = aug_1; aug_1 = aug_9; aug_9 = swapt; \ + swapt = aug_2; aug_2 = aug_10; aug_10 = swapt; \ + swapt = aug_3; aug_3 = aug_11; aug_11 = swapt; \ + } else \ + error = 1; \ + \ + t = aug_4 / aug_0; \ + aug_5 -= t * aug_1; \ + aug_6 -= t * aug_2; \ + aug_7 -= t * aug_3; \ + t = aug_8 / aug_0; \ + aug_9 -= t * aug_1; \ + aug_10 -= t * aug_2; \ + aug_11 -= t * aug_3; \ + \ + if (fabs(aug_9) > fabs(aug_5)) { \ + flt_t swapt; \ + swapt = aug_4; aug_4 = aug_8; aug_8 = swapt; \ + swapt = aug_5; aug_5 = aug_9; aug_9 = swapt; \ + swapt = aug_6; aug_6 = aug_10; aug_10 = swapt; \ + swapt = aug_7; aug_7 = aug_11; aug_11 = swapt; \ + } \ + \ + if (aug_5 != (flt_t)0.0) { \ + } else if (aug_9 != (flt_t)0.0) { \ + flt_t swapt; \ + swapt = aug_4; aug_4 = aug_8; aug_8 = swapt; \ + swapt = aug_5; aug_5 = aug_9; aug_9 = swapt; \ + swapt = aug_6; aug_6 = aug_10; aug_10 = swapt; \ + swapt = aug_7; aug_7 = aug_11; aug_11 = swapt; \ + } \ + \ + t = aug_9 / aug_5; \ + aug_10 -= t * aug_6; \ + aug_11 -= t * aug_7; \ + \ + if (aug_10 == (flt_t)0.0) \ + error = 1; \ + \ + ans##_2 = aug_11/aug_10; \ + t = (flt_t)0.0; \ + t += aug_6 * ans##_2; \ + ans##_1 = (aug_7-t) / aug_5; \ + t = (flt_t)0.0; \ + t += aug_1 * ans##_1; \ + t += aug_2 * ans##_2; \ + ans##_0 = (aug_3 - t) / aug_0; \ +} + +#endif diff --git a/src/USER-INTEL/neigh_half_bin_intel.cpp b/src/USER-INTEL/neigh_half_bin_intel.cpp new file mode 100644 index 000000000..a5f12a56f --- /dev/null +++ b/src/USER-INTEL/neigh_half_bin_intel.cpp @@ -0,0 +1,1453 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: W. Michael Brown (Intel) +------------------------------------------------------------------------- */ + +#include "neighbor.h" +#include "neigh_list.h" +#include "atom.h" +#include "comm.h" +#include "group.h" +#include "fix_intel.h" + +#if defined(_OPENMP) +#include +#endif + +using namespace LAMMPS_NS; + +#ifdef _LMP_INTEL_OFFLOAD +#pragma offload_attribute(push,target(mic)) +#endif + +template +inline int mcoord2bin(const flt_t x0, const flt_t x1, const flt_t x2, + const flt_t bboxlo0, const flt_t bboxlo1, + const flt_t bboxlo2, const flt_t bboxhi0, + const flt_t bboxhi1, const flt_t bboxhi2, + const flt_t bininvx, const flt_t bininvy, + const flt_t bininvz, const int nbinx, const int nbiny, + const int nbinz, const int mbinx, const int mbiny, + const int mbinz, const int mbinxlo, const int mbinylo, + const int mbinzlo) +{ + int ix, iy, iz; + + if (x0 >= bboxhi0) + ix = static_cast ((x0 - bboxhi0) * bininvx) + nbinx; + else if (x0 >= bboxlo0) { + ix = static_cast ((x0 - bboxlo0) * bininvx); + ix = MIN(ix, nbinx-1); + } else + ix = static_cast ((x0 - bboxlo0) * bininvx) - 1; + + if (x1 >= bboxhi1) + iy = static_cast ((x1 - bboxhi1) * bininvy) + nbiny; + else if (x1 >= bboxlo1) { + iy = static_cast ((x1 - bboxlo1) * bininvy); + iy = MIN(iy, nbiny-1); + } else + iy = static_cast ((x1 - bboxlo1) * bininvy) - 1; + + if (x2 >= bboxhi2) + iz = static_cast ((x2 - bboxhi2) * bininvz) + nbinz; + else if (x2 >= bboxlo2) { + iz = static_cast ((x2 - bboxlo2) * bininvz); + iz = MIN(iz, nbinz - 1); + } else + iz = static_cast ((x2 - bboxlo2) * bininvz) - 1; + + return (iz - mbinzlo) * mbiny * mbinx + (iy - mbinylo) * mbinx + + (ix - mbinxlo); +} + +#define ofind_special(which, special, nspecial, i, tag, special_flag) \ +{ \ + which = 0; \ + const int n1 = nspecial[i * 3]; \ + const int n2 = nspecial[i * 3 + 1]; \ + const int n3 = nspecial[i * 3 + 2]; \ + const int *sptr = special + i * maxspecial; \ + for (int s = 0; s < n3; s++) { \ + if (sptr[s] == tag) { \ + if (s < n1) { \ + if (special_flag[1] == 0) which = -1; \ + else if (special_flag[1] == 1) which = 0; \ + else which = 1; \ + } else if (s < n2) { \ + if (special_flag[2] == 0) which = -1; \ + else if (special_flag[2] == 1) which = 0; \ + else which = 2; \ + } else { \ + if (special_flag[3] == 0) which = -1; \ + else if (special_flag[3] == 1) which = 0; \ + else which = 3; \ + } \ + } \ + } \ +} + +#ifdef _LMP_INTEL_OFFLOAD +#pragma offload_attribute(pop) +#endif + +template +void Neighbor::bin_atoms(void * xin) { + const ATOM_T * restrict const x = (const ATOM_T * restrict const)xin; + int nlocal = atom->nlocal; + const int nall = nlocal + atom->nghost; + + const flt_t bininvx = this->bininvx; + const flt_t bininvy = this->bininvy; + const flt_t bininvz = this->bininvz; + const flt_t bboxlo0 = this->bboxlo[0]; + const flt_t bboxlo1 = this->bboxlo[1]; + const flt_t bboxlo2 = this->bboxlo[2]; + const flt_t bboxhi0 = this->bboxhi[0]; + const flt_t bboxhi1 = this->bboxhi[1]; + const flt_t bboxhi2 = this->bboxhi[2]; + + int i, ibin; + + for (i = 0; i < mbins; i++) binhead[i] = -1; + + int *mask = atom->mask; + + if (includegroup) { + int bitmask = group->bitmask[includegroup]; + for (i = nall-1; i >= nlocal; i--) { + if (mask[i] & bitmask) { + ibin = mcoord2bin(x[i].x, x[i].y, x[i].z, bboxlo0, bboxlo1, bboxlo2, + bboxhi0, bboxhi1, bboxhi2, bininvx, bininvy, bininvz, nbinx, nbiny, + nbinz, mbinx, mbiny, mbinz, mbinxlo, mbinylo, mbinzlo); + bins[i] = binhead[ibin]; + binhead[ibin] = i; + } + } + for (i = atom->nfirst-1; i >= 0; i--) { + ibin = mcoord2bin(x[i].x, x[i].y, x[i].z, bboxlo0, bboxlo1, bboxlo2, + bboxhi0, bboxhi1, bboxhi2, bininvx, bininvy, bininvz, nbinx, nbiny, + nbinz, mbinx, mbiny, mbinz, mbinxlo, mbinylo, mbinzlo); + bins[i] = binhead[ibin]; + binhead[ibin] = i; + } + } else { + for (i = nall-1; i >= 0; i--) { + ibin = mcoord2bin(x[i].x, x[i].y, x[i].z, bboxlo0, bboxlo1, bboxlo2, + bboxhi0, bboxhi1, bboxhi2, bininvx, bininvy, bininvz, nbinx, nbiny, + nbinz, mbinx, mbiny, mbinz, mbinxlo, mbinylo, mbinzlo); + bins[i] = binhead[ibin]; + binhead[ibin] = i; + } + } +} + +/* ---------------------------------------------------------------------- + binned neighbor list construction with partial Newton's 3rd law + each owned atom i checks own bin and other bins in stencil + pair stored once if i,j are both owned and i < j + pair stored by me if j is ghost (also stored by proc owning j) +------------------------------------------------------------------------- */ + +void Neighbor::half_bin_no_newton_intel(NeighList *list) +{ + const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal; + list->inum = nlocal; + + // Get fix for intel stuff + FixIntel *fix = static_cast(fix_intel); + + const int off_end = fix->offload_end_neighbor(); + int host_start = off_end;; + #ifdef _LMP_INTEL_OFFLOAD + if (fix->full_host_list()) host_start = 0; + if (exclude) + error->all(FLERR, "Exclusion lists not yet supported for Intel offload"); + #endif + + if (fix->precision() == FixIntel::PREC_MODE_MIXED) { + hbnni(1, list, fix->get_mixed_buffers(), + 0, off_end, fix); + hbnni(0, list, fix->get_mixed_buffers(), + host_start, nlocal,fix); + } else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) { + hbnni(1, list, fix->get_double_buffers(), + 0, off_end, fix); + hbnni(0, list, fix->get_double_buffers(), + host_start, nlocal, fix); + } else { + hbnni(1, list, fix->get_single_buffers(), + 0, off_end, fix); + hbnni(0, list, fix->get_single_buffers(), + host_start, nlocal, fix); + } +} + +template +void Neighbor::hbnni(const int offload, NeighList *list, void *buffers_in, + const int astart, const int aend, void *fix_in) { + IntelBuffers *buffers = (IntelBuffers *)buffers_in; + FixIntel *fix = (FixIntel *)fix_in; + const int nall = atom->nlocal + atom->nghost; + int pad = 1; + + if (offload) { + fix->start_watch(TIME_PACK); + buffers->grow(nall, atom->nlocal, comm->nthreads, aend); + buffers->grow_nbor(list, atom->nlocal, aend); + + ATOM_T biga; + biga.x = INTEL_BIGP; + biga.y = INTEL_BIGP; + biga.z = INTEL_BIGP; + biga.w = 1; + buffers->get_x()[nall] = biga; + + const int nthreads = comm->nthreads; + #if defined(_OPENMP) + #pragma omp parallel default(none) shared(buffers) + #endif + { + int ifrom, ito, tid; + IP_PRE_omp_range_id_align(ifrom, ito, tid, nall, nthreads, + sizeof(ATOM_T)); + buffers->thr_pack(ifrom, ito, 0); + } + fix->stop_watch(TIME_PACK); + + fix->start_watch(TIME_HOST_NEIGHBOR); + bin_atoms(buffers->get_x()); + if (INTEL_MIC_NBOR_PAD > 1) + pad = INTEL_MIC_NBOR_PAD * sizeof(float) / sizeof(flt_t); + } else { + fix->start_watch(TIME_HOST_NEIGHBOR); + if (INTEL_NBOR_PAD > 1) + pad = INTEL_NBOR_PAD * sizeof(float) / sizeof(flt_t); + } + const int pad_width = pad; + + if (aend-astart == 0) { + fix->stop_watch(TIME_HOST_NEIGHBOR); + return; + } + + const ATOM_T * restrict const x = buffers->get_x(); + int * restrict const firstneigh = buffers->firstneigh(list); + + const int molecular = atom->molecular; + int *ns = NULL, *s = NULL; + int tag_size, special_size; + if (molecular) { + s = atom->special[0]; + ns = atom->nspecial[0]; + tag_size = nall; + special_size = aend; + } else { + s = &buffers->_special_holder; + ns = &buffers->_nspecial_holder; + tag_size = 0; + special_size = 0; + } + const int * restrict const special = s; + const int * restrict const nspecial = ns; + const int maxspecial = atom->maxspecial; + const int * restrict const tag = atom->tag; + + int * restrict const ilist = list->ilist; + int * restrict numneigh = list->numneigh; + int * restrict const cnumneigh = buffers->cnumneigh(list); + const int nstencil = list->nstencil; + const int * restrict const stencil = list->stencil; + const flt_t * restrict const cutneighsq = buffers->get_cutneighsq()[0]; + const int ntypes = atom->ntypes + 1; + const int nlocal = atom->nlocal; + + #ifndef _LMP_INTEL_OFFLOAD + int * const mask = atom->mask; + int * const molecule = atom->molecule; + #endif + + int tnum; + int *overflow; + double *timer_compute; + if (offload) { + timer_compute = fix->off_watch_neighbor(); + tnum = buffers->get_off_threads(); + overflow = fix->get_off_overflow_flag(); + fix->stop_watch(TIME_HOST_NEIGHBOR); + fix->start_watch(TIME_OFFLOAD_LATENCY); + } else { + tnum = comm->nthreads; + overflow = fix->get_overflow_flag(); + } + const int nthreads = tnum; + const int maxnbors = buffers->get_max_nbors(); + + const flt_t bboxlo0 = this->bboxlo[0]; + const flt_t bboxlo1 = this->bboxlo[1]; + const flt_t bboxlo2 = this->bboxlo[2]; + const flt_t bboxhi0 = this->bboxhi[0]; + const flt_t bboxhi1 = this->bboxhi[1]; + const flt_t bboxhi2 = this->bboxhi[2]; + const flt_t bininvx = this->bininvx; + const flt_t bininvy = this->bininvy; + const flt_t bininvz = this->bininvz; + + // Make sure dummy coordinates to eliminate loop remainder not within cutoff + { + const flt_t dx = (INTEL_BIGP - bboxhi0); + const flt_t dy = (INTEL_BIGP - bboxhi1); + const flt_t dz = (INTEL_BIGP - bboxhi2); + if (dx * dx + dy * dy + dz * dz < static_cast(cutneighmaxsq)) + error->one(FLERR, + "Intel package expects no atoms within cutoff of {1e15,1e15,1e15}."); + } + + #ifdef _LMP_INTEL_OFFLOAD + const int * restrict const binhead = this->binhead; + const int * restrict const special_flag = this->special_flag; + const int nbinx = this->nbinx; + const int nbiny = this->nbiny; + const int nbinz = this->nbinz; + const int mbinxlo = this->mbinxlo; + const int mbinylo = this->mbinylo; + const int mbinzlo = this->mbinzlo; + const int mbinx = this->mbinx; + const int mbiny = this->mbiny; + const int mbinz = this->mbinz; + const int * restrict const bins = this->bins; + const int cop = fix->coprocessor_number(); + const int separate_buffers = fix->separate_buffers(); + #pragma offload target(mic:cop) if(offload) \ + in(x:length(nall+1) alloc_if(0) free_if(0)) \ + in(tag:length(tag_size) alloc_if(0) free_if(0)) \ + in(special:length(special_size*maxspecial) alloc_if(0) free_if(0)) \ + in(nspecial:length(special_size*3) alloc_if(0) free_if(0)) \ + in(bins:length(nall) alloc_if(0) free_if(0)) \ + in(binhead:length(mbins) alloc_if(0) free_if(0)) \ + in(cutneighsq:length(0) alloc_if(0) free_if(0)) \ + in(firstneigh:length(0) alloc_if(0) free_if(0)) \ + in(cnumneigh:length(0) alloc_if(0) free_if(0)) \ + out(numneigh:length(0) alloc_if(0) free_if(0)) \ + in(ilist:length(0) alloc_if(0) free_if(0)) \ + in(stencil:length(nstencil) alloc_if(0) free_if(0)) \ + in(special_flag:length(0) alloc_if(0) free_if(0)) \ + in(maxnbors,nthreads,maxspecial,nstencil,nbinx,nbiny,nbinz) \ + in(mbinxlo,mbinylo,mbinzlo,mbinx,mbiny,mbinz,pad_width,offload) \ + in(bininvx,bininvy,bininvz,bboxlo0,bboxlo1,bboxlo2,separate_buffers) \ + in(bboxhi0, bboxhi1, bboxhi2, astart, aend, nlocal, molecular, ntypes) \ + out(overflow:length(5) alloc_if(0) free_if(0)) \ + out(timer_compute:length(1) alloc_if(0) free_if(0)) \ + signal(numneigh) + #endif + { + #ifdef __MIC__ + *timer_compute = MIC_Wtime(); + #endif + + #ifdef _LMP_INTEL_OFFLOAD + overflow[LMP_LOCAL_MIN] = astart; + overflow[LMP_LOCAL_MAX] = aend - 1; + overflow[LMP_GHOST_MIN] = nall; + overflow[LMP_GHOST_MAX] = -1; + #endif + + #if defined(_OPENMP) + #pragma omp parallel default(none) shared(numneigh,overflow) + #endif + { + #ifdef _LMP_INTEL_OFFLOAD + int lmin = nall, lmax = -1, gmin = nall, gmax = -1; + #endif + + const int num = aend - astart; + int tid, ifrom, ito; + IP_PRE_omp_range_id(ifrom, ito, tid, num, nthreads); + ifrom += astart; + ito += astart; + + int which; + + const int list_size = (ito + tid + 1) * maxnbors; + int ct = (ifrom + tid) * maxnbors; + int *neighptr = firstneigh + ct; + for (int i = ifrom; i < ito; i++) { + int j, k, n, n2, itype, jtype, ibin; + double xtmp, ytmp, ztmp, delx, dely, delz, rsq; + + n = 0; + n2 = maxnbors; + + xtmp = x[i].x; + ytmp = x[i].y; + ztmp = x[i].z; + itype = x[i].w; + const int ioffset = ntypes*itype; + + // loop over all atoms in other bins in stencil including self + // only store pair if i < j + // stores own/own pairs only once + // stores own/ghost pairs on both procs + + ibin = mcoord2bin(x[i].x, x[i].y, x[i].z, bboxlo0, bboxlo1, bboxlo2, + bboxhi0, bboxhi1, bboxhi2, bininvx, bininvy, bininvz, + nbinx, nbiny, nbinz, mbinx, mbiny, mbinz, + mbinxlo, mbinylo, mbinzlo); + + for (k = 0; k < nstencil; k++) { + for (j = binhead[ibin + stencil[k]]; j >= 0; j = bins[j]) { + if (j <= i) continue; + + jtype = x[j].w; + #ifndef _LMP_INTEL_OFFLOAD + if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; + #endif + + delx = xtmp - x[j].x; + dely = ytmp - x[j].y; + delz = ztmp - x[j].z; + rsq = delx * delx + dely * dely + delz * delz; + if (rsq <= cutneighsq[ioffset + jtype]) { + if (j < nlocal) { + neighptr[n++] = j; + #ifdef _LMP_INTEL_OFFLOAD + if (j < lmin) lmin = j; + if (j > lmax) lmax = j; + #endif + } else { + neighptr[n2++] = j; + #ifdef _LMP_INTEL_OFFLOAD + if (j < gmin) gmin = j; + if (j > gmax) gmax = j; + #endif + } + } + } + } + ilist[i] = i; + + cnumneigh[i] = ct; + if (n > maxnbors) *overflow = 1; + for (k = maxnbors; k < n2; k++) neighptr[n++] = neighptr[k]; + while( (n % pad_width) != 0 ) neighptr[n++] = nall; + numneigh[i] = n; + while((n % (INTEL_DATA_ALIGN / sizeof(int))) != 0) n++; + ct += n; + neighptr += n; + if (ct + n + maxnbors > list_size) { + *overflow = 1; + ct = (ifrom + tid) * maxnbors; + } + } + + if (*overflow == 1) + for (int i = ifrom; i < ito; i++) + numneigh[i] = 0; + + #ifdef _LMP_INTEL_OFFLOAD + if (separate_buffers) { + #if defined(_OPENMP) + #pragma omp critical + #endif + { + if (lmin < overflow[LMP_LOCAL_MIN]) overflow[LMP_LOCAL_MIN] = lmin; + if (lmax > overflow[LMP_LOCAL_MAX]) overflow[LMP_LOCAL_MAX] = lmax; + if (gmin < overflow[LMP_GHOST_MIN]) overflow[LMP_GHOST_MIN] = gmin; + if (gmax > overflow[LMP_GHOST_MAX]) overflow[LMP_GHOST_MAX] = gmax; + } + #pragma omp barrier + } + + int ghost_offset = 0, nall_offset = nall; + if (separate_buffers) { + int nghost = overflow[LMP_GHOST_MAX] + 1 - overflow[LMP_GHOST_MIN]; + if (nghost < 0) nghost = 0; + if (offload) { + ghost_offset = overflow[LMP_GHOST_MIN] - overflow[LMP_LOCAL_MAX] - 1; + nall_offset = overflow[LMP_LOCAL_MAX] + 1 + nghost; + } else { + ghost_offset = overflow[LMP_GHOST_MIN] - nlocal; + nall_offset = nlocal + nghost; + } + } + #endif + + if (molecular) { + for (int i = ifrom; i < ito; ++i) { + int * restrict jlist = firstneigh + cnumneigh[i]; + const int jnum = numneigh[i]; + for (int jj = 0; jj < jnum; jj++) { + const int j = jlist[jj]; + ofind_special(which, special, nspecial, i, tag[j], special_flag); + #ifdef _LMP_INTEL_OFFLOAD + if (j >= nlocal) { + if (j == nall) + jlist[jj] = nall_offset; + else if (which > 0) + jlist[jj] = (j-ghost_offset) ^ (which << SBBITS); + else jlist[jj]-=ghost_offset; + } else + #endif + if (which > 0) jlist[jj] = j ^ (which << SBBITS); + } + } + } + #ifdef _LMP_INTEL_OFFLOAD + else if (separate_buffers) { + for (int i = ifrom; i < ito; ++i) { + int * restrict jlist = firstneigh + cnumneigh[i]; + const int jnum = numneigh[i]; + int jj = 0; + for (jj = 0; jj < jnum; jj++) + if (jlist[jj] >= nlocal) break; + while (jj < jnum) { + if (jlist[jj] == nall) jlist[jj] = nall_offset; + else jlist[jj] -= ghost_offset; + jj++; + } + } + } + #endif + } // end omp + #ifdef __MIC__ + *timer_compute = MIC_Wtime() - *timer_compute; + #endif + } // end offload + + if (offload) { + fix->stop_watch(TIME_OFFLOAD_LATENCY); + #ifdef _LMP_INTEL_OFFLOAD + for (int n = 0; n < aend; n++) { + ilist[n] = n; + numneigh[n] = 0; + } + #endif + } else { + for (int i = astart; i < aend; i++) + list->firstneigh[i] = firstneigh + cnumneigh[i]; + fix->stop_watch(TIME_HOST_NEIGHBOR); + #ifdef _LMP_INTEL_OFFLOAD + if (separate_buffers) { + fix->start_watch(TIME_PACK); + fix->set_neighbor_host_sizes(); + buffers->pack_sep_from_single(fix->host_min_local(), + fix->host_used_local(), + fix->host_min_ghost(), + fix->host_used_ghost()); + fix->stop_watch(TIME_PACK); + } + #endif + } +} + +/* ---------------------------------------------------------------------- + binned neighbor list construction with full Newton's 3rd law + each owned atom i checks its own bin and other bins in Newton stencil + every pair stored exactly once by some processor +------------------------------------------------------------------------- */ + +void Neighbor::half_bin_newton_intel(NeighList *list) +{ + const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal; + list->inum = nlocal; + + // Get fix for intel stuff + FixIntel *fix = static_cast(fix_intel); + + const int off_end = fix->offload_end_neighbor(); + int host_start = fix->host_start_neighbor();; + int offload_noghost = 0; + #ifdef _LMP_INTEL_OFFLOAD + if (fix->full_host_list()) host_start = 0; + offload_noghost = fix->offload_noghost(); + if (exclude) + error->all(FLERR, "Exclusion lists not yet supported for Intel offload"); + #endif + + if (fix->precision() == FixIntel::PREC_MODE_MIXED) { + if (offload_noghost) { + hbni(1, list, fix->get_mixed_buffers(), + 0, off_end, fix); + hbni(0, list, fix->get_mixed_buffers(), + host_start, nlocal, fix, off_end); + } else { + hbni(1, list, fix->get_mixed_buffers(), + 0, off_end, fix); + hbni(0, list, fix->get_mixed_buffers(), + host_start, nlocal, fix); + } + } else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) { + if (offload_noghost) { + hbni(1, list, fix->get_double_buffers(), + 0, off_end, fix); + hbni(0, list, fix->get_double_buffers(), + host_start, nlocal, fix, off_end); + } else { + hbni(1, list, fix->get_double_buffers(), + 0, off_end, fix); + hbni(0, list, fix->get_double_buffers(), + host_start, nlocal, fix); + } + } else { + if (offload_noghost) { + hbni(1, list, fix->get_single_buffers(), 0, off_end, fix); + hbni(0, list, fix->get_single_buffers(), + host_start, nlocal, fix, off_end); + } else { + hbni(1, list, fix->get_single_buffers(), 0, off_end, fix); + hbni(0, list, fix->get_single_buffers(), + host_start, nlocal, fix); + } + } +} + +template +void Neighbor::hbni(const int offload, NeighList *list, void *buffers_in, + const int astart, const int aend, void *fix_in, + const int offload_end) { + IntelBuffers *buffers = (IntelBuffers *)buffers_in; + FixIntel *fix = (FixIntel *)fix_in; + const int nall = atom->nlocal + atom->nghost; + int pad = 1; + + if (offload) { + fix->start_watch(TIME_PACK); + buffers->grow(nall, atom->nlocal, comm->nthreads, aend); + buffers->grow_nbor(list, atom->nlocal, aend); + + ATOM_T biga; + biga.x = INTEL_BIGP; + biga.y = INTEL_BIGP; + biga.z = INTEL_BIGP; + biga.w = 1; + buffers->get_x()[nall]=biga; + + const int nthreads = comm->nthreads; + #if defined(_OPENMP) + #pragma omp parallel default(none) shared(buffers) + #endif + { + int ifrom, ito, tid; + IP_PRE_omp_range_id_align(ifrom, ito, tid, nall, nthreads, + sizeof(ATOM_T)); + buffers->thr_pack(ifrom, ito, 0); + } + fix->stop_watch(TIME_PACK); + + fix->start_watch(TIME_HOST_NEIGHBOR); + bin_atoms(buffers->get_x()); + if (INTEL_MIC_NBOR_PAD > 1) + pad = INTEL_MIC_NBOR_PAD * sizeof(float) / sizeof(flt_t); + } else { + fix->start_watch(TIME_HOST_NEIGHBOR); + if (INTEL_NBOR_PAD > 1) + pad = INTEL_NBOR_PAD * sizeof(float) / sizeof(flt_t); + } + const int pad_width = pad; + + if (aend-astart == 0) { + fix->stop_watch(TIME_HOST_NEIGHBOR); + return; + } + + const ATOM_T * restrict const x = buffers->get_x(); + int * restrict const firstneigh = buffers->firstneigh(list); + int nall_t = nall; + if (offload_noghost && offload) nall_t = atom->nlocal; + const int e_nall = nall_t; + + const int molecular = atom->molecular; + int *ns = NULL, *s = NULL; + int tag_size, special_size; + if (molecular) { + s = atom->special[0]; + ns = atom->nspecial[0]; + tag_size = e_nall; + special_size = aend; + } else { + s = &buffers->_special_holder; + ns = &buffers->_nspecial_holder; + tag_size = 0; + special_size = 0; + } + const int * restrict const special = s; + const int * restrict const nspecial = ns; + const int maxspecial = atom->maxspecial; + const int * restrict const tag = atom->tag; + + int * restrict const ilist = list->ilist; + int * restrict numneigh = list->numneigh; + int * restrict const cnumneigh = buffers->cnumneigh(list); + const int nstencil = list->nstencil; + const int * restrict const stencil = list->stencil; + const flt_t * restrict const cutneighsq = buffers->get_cutneighsq()[0]; + const int ntypes = atom->ntypes + 1; + const int nlocal = atom->nlocal; + + #ifndef _LMP_INTEL_OFFLOAD + int * const mask = atom->mask; + int * const molecule = atom->molecule; + #endif + + int tnum; + int *overflow; + double *timer_compute; + if (offload) { + timer_compute = fix->off_watch_neighbor(); + tnum = buffers->get_off_threads(); + overflow = fix->get_off_overflow_flag(); + fix->stop_watch(TIME_HOST_NEIGHBOR); + fix->start_watch(TIME_OFFLOAD_LATENCY); + } else { + tnum = comm->nthreads; + overflow = fix->get_overflow_flag(); + } + const int nthreads = tnum; + const int maxnbors = buffers->get_max_nbors(); + + const flt_t bboxlo0 = this->bboxlo[0]; + const flt_t bboxlo1 = this->bboxlo[1]; + const flt_t bboxlo2 = this->bboxlo[2]; + const flt_t bboxhi0 = this->bboxhi[0]; + const flt_t bboxhi1 = this->bboxhi[1]; + const flt_t bboxhi2 = this->bboxhi[2]; + const flt_t bininvx = this->bininvx; + const flt_t bininvy = this->bininvy; + const flt_t bininvz = this->bininvz; + // Make sure dummy coordinates to eliminate loop remainder not within cutoff + { + const flt_t dx = (INTEL_BIGP - bboxhi0); + const flt_t dy = (INTEL_BIGP - bboxhi1); + const flt_t dz = (INTEL_BIGP - bboxhi2); + if (dx * dx + dy * dy + dz * dz < static_cast(cutneighmaxsq)) + error->one(FLERR, + "Intel package expects no atoms within cutoff of {1e15,1e15,1e15}."); + } + + #ifdef _LMP_INTEL_OFFLOAD + const int * restrict const binhead = this->binhead; + const int * restrict const special_flag = this->special_flag; + const int nbinx = this->nbinx; + const int nbiny = this->nbiny; + const int nbinz = this->nbinz; + const int mbinxlo = this->mbinxlo; + const int mbinylo = this->mbinylo; + const int mbinzlo = this->mbinzlo; + const int mbinx = this->mbinx; + const int mbiny = this->mbiny; + const int mbinz = this->mbinz; + const int * restrict const bins = this->bins; + const int cop = fix->coprocessor_number(); + const int separate_buffers = fix->separate_buffers(); + #pragma offload target(mic:cop) if(offload) \ + in(x:length(e_nall+1) alloc_if(0) free_if(0)) \ + in(tag:length(tag_size) alloc_if(0) free_if(0)) \ + in(special:length(special_size*maxspecial) alloc_if(0) free_if(0)) \ + in(nspecial:length(special_size*3) alloc_if(0) free_if(0)) \ + in(bins:length(nall) alloc_if(0) free_if(0)) \ + in(binhead:length(mbins) alloc_if(0) free_if(0)) \ + in(cutneighsq:length(0) alloc_if(0) free_if(0)) \ + in(firstneigh:length(0) alloc_if(0) free_if(0)) \ + in(cnumneigh:length(0) alloc_if(0) free_if(0)) \ + out(numneigh:length(0) alloc_if(0) free_if(0)) \ + in(ilist:length(0) alloc_if(0) free_if(0)) \ + in(stencil:length(nstencil) alloc_if(0) free_if(0)) \ + in(special_flag:length(0) alloc_if(0) free_if(0)) \ + in(maxnbors,nthreads,maxspecial,nstencil,nbinx,nbiny,nbinz,e_nall,offload)\ + in(mbinxlo,mbinylo,mbinzlo,mbinx,mbiny,mbinz,pad_width,offload_end) \ + in(bininvx,bininvy,bininvz,bboxlo0,bboxlo1,bboxlo2,separate_buffers) \ + in(bboxhi0, bboxhi1, bboxhi2, astart, aend, nlocal, molecular, ntypes) \ + out(overflow:length(5) alloc_if(0) free_if(0)) \ + out(timer_compute:length(1) alloc_if(0) free_if(0)) \ + signal(numneigh) + #endif + { + #ifdef __MIC__ + *timer_compute = MIC_Wtime(); + #endif + + #ifdef _LMP_INTEL_OFFLOAD + overflow[LMP_LOCAL_MIN] = astart; + overflow[LMP_LOCAL_MAX] = aend - 1; + overflow[LMP_GHOST_MIN] = e_nall; + overflow[LMP_GHOST_MAX] = -1; + #endif + + #if defined(_OPENMP) + #pragma omp parallel default(none) shared(numneigh, overflow) + #endif + { + #ifdef _LMP_INTEL_OFFLOAD + int lmin = e_nall, lmax = -1, gmin = e_nall, gmax = -1; + #endif + + const int num = aend - astart; + int tid, ifrom, ito; + IP_PRE_omp_range_id(ifrom, ito, tid, num, nthreads); + ifrom += astart; + ito += astart; + + int which; + + const int list_size = (ito + tid + 1) * maxnbors; + int ct = (ifrom + tid) * maxnbors; + int *neighptr = firstneigh + ct; + for (int i = ifrom; i < ito; i++) { + int j, k, n, n2, itype, jtype, ibin; + double xtmp, ytmp, ztmp, delx, dely, delz, rsq; + + n = 0; + n2 = maxnbors; + + xtmp = x[i].x; + ytmp = x[i].y; + ztmp = x[i].z; + itype = x[i].w; + const int ioffset = ntypes * itype; + + // loop over rest of atoms in i's bin, ghosts are at end of linked list + // if j is owned atom, store it, since j is beyond i in linked list + // if j is ghost, only store if j coords are "above/to the right" of i + + for (j = bins[i]; j >= 0; j = bins[j]) { + if (j >= nlocal) { + if (offload_noghost && offload) continue; + if (x[j].z < ztmp) continue; + if (x[j].z == ztmp) { + if (x[j].y < ytmp) continue; + if (x[j].y == ytmp && x[j].x < xtmp) continue; + } + } else if (offload_noghost && i < offload_end) continue; + + jtype = x[j].w; + #ifndef _LMP_INTEL_OFFLOAD + if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; + #endif + + delx = xtmp - x[j].x; + dely = ytmp - x[j].y; + delz = ztmp - x[j].z; + rsq = delx * delx + dely * dely + delz * delz; + + if (rsq <= cutneighsq[ioffset + jtype]) { + if (j < nlocal) { + neighptr[n++] = j; + #ifdef _LMP_INTEL_OFFLOAD + if (j < lmin) lmin = j; + if (j > lmax) lmax = j; + #endif + } else { + neighptr[n2++] = j; + #ifdef _LMP_INTEL_OFFLOAD + if (j < gmin) gmin = j; + if (j > gmax) gmax = j; + #endif + } + } + } + // loop over all atoms in other bins in stencil, store every pair + + ibin = mcoord2bin(x[i].x, x[i].y, x[i].z, bboxlo0, bboxlo1, bboxlo2, + bboxhi0, bboxhi1, bboxhi2, bininvx, bininvy, bininvz, + nbinx, nbiny, nbinz, mbinx, mbiny, mbinz, + mbinxlo, mbinylo, mbinzlo); + + for (k = 0; k < nstencil; k++) { + for (j = binhead[ibin + stencil[k]]; j >= 0; j = bins[j]) { + if (offload_noghost) { + if (j < nlocal) { + if (i < offload_end) continue; + } else if (offload) continue; + } + + jtype = x[j].w; + #ifndef _LMP_INTEL_OFFLOAD + if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; + #endif + + delx = xtmp - x[j].x; + dely = ytmp - x[j].y; + delz = ztmp - x[j].z; + rsq = delx * delx + dely * dely + delz * delz; + if (rsq <= cutneighsq[ioffset + jtype]) { + if (j < nlocal) { + neighptr[n++] = j; + #ifdef _LMP_INTEL_OFFLOAD + if (j < lmin) lmin = j; + if (j > lmax) lmax = j; + #endif + } else { + neighptr[n2++] = j; + #ifdef _LMP_INTEL_OFFLOAD + if (j < gmin) gmin = j; + if (j > gmax) gmax = j; + #endif + } + } + } + } + ilist[i] = i; + + cnumneigh[i] = ct; + if (n > maxnbors) *overflow = 1; + for (k = maxnbors; k < n2; k++) neighptr[n++] = neighptr[k]; + while( (n % pad_width) != 0 ) neighptr[n++] = e_nall; + numneigh[i] = n; + while((n % (INTEL_DATA_ALIGN / sizeof(int))) != 0) n++; + ct += n; + neighptr += n; + if (ct + n + maxnbors > list_size) { + *overflow = 1; + ct = (ifrom + tid) * maxnbors; + } + } + + if (*overflow == 1) + for (int i = ifrom; i < ito; i++) + numneigh[i] = 0; + + #ifdef _LMP_INTEL_OFFLOAD + if (separate_buffers) { + #if defined(_OPENMP) + #pragma omp critical + #endif + { + if (lmin < overflow[LMP_LOCAL_MIN]) overflow[LMP_LOCAL_MIN] = lmin; + if (lmax > overflow[LMP_LOCAL_MAX]) overflow[LMP_LOCAL_MAX] = lmax; + if (gmin < overflow[LMP_GHOST_MIN]) overflow[LMP_GHOST_MIN] = gmin; + if (gmax > overflow[LMP_GHOST_MAX]) overflow[LMP_GHOST_MAX] = gmax; + } + #pragma omp barrier + } + + int ghost_offset = 0, nall_offset = e_nall; + if (separate_buffers) { + int nghost = overflow[LMP_GHOST_MAX] + 1 - overflow[LMP_GHOST_MIN]; + if (nghost < 0) nghost = 0; + if (offload) { + ghost_offset = overflow[LMP_GHOST_MIN] - overflow[LMP_LOCAL_MAX] - 1; + nall_offset = overflow[LMP_LOCAL_MAX] + 1 + nghost; + } else { + ghost_offset = overflow[LMP_GHOST_MIN] - nlocal; + nall_offset = nlocal + nghost; + } + } + #endif + + if (molecular) { + for (int i = ifrom; i < ito; ++i) { + int * restrict jlist = firstneigh + cnumneigh[i]; + const int jnum = numneigh[i]; + for (int jj = 0; jj < jnum; jj++) { + const int j = jlist[jj]; + ofind_special(which, special, nspecial, i, tag[j], + special_flag); + #ifdef _LMP_INTEL_OFFLOAD + if (j >= nlocal) { + if (j == e_nall) + jlist[jj] = nall_offset; + else if (which > 0) + jlist[jj] = (j-ghost_offset) ^ (which << SBBITS); + else jlist[jj]-=ghost_offset; + } else + #endif + if (which > 0) jlist[jj] = j ^ (which << SBBITS); + } + } + } + #ifdef _LMP_INTEL_OFFLOAD + else if (separate_buffers) { + for (int i = ifrom; i < ito; ++i) { + int * restrict jlist = firstneigh + cnumneigh[i]; + const int jnum = numneigh[i]; + int jj = 0; + for (jj = 0; jj < jnum; jj++) + if (jlist[jj] >= nlocal) break; + while (jj < jnum) { + if (jlist[jj] == e_nall) jlist[jj] = nall_offset; + else jlist[jj] -= ghost_offset; + jj++; + } + } + } + #endif + } // end omp + #ifdef __MIC__ + *timer_compute = MIC_Wtime() - *timer_compute; + #endif + } // end offload + + if (offload) { + fix->stop_watch(TIME_OFFLOAD_LATENCY); + #ifdef _LMP_INTEL_OFFLOAD + for (int n = 0; n < aend; n++) { + ilist[n] = n; + numneigh[n] = 0; + } + #endif + } else { + for (int i = astart; i < aend; i++) + list->firstneigh[i] = firstneigh + cnumneigh[i]; + fix->stop_watch(TIME_HOST_NEIGHBOR); + #ifdef _LMP_INTEL_OFFLOAD + if (separate_buffers) { + fix->start_watch(TIME_PACK); + fix->set_neighbor_host_sizes(); + buffers->pack_sep_from_single(fix->host_min_local(), + fix->host_used_local(), + fix->host_min_ghost(), + fix->host_used_ghost()); + fix->stop_watch(TIME_PACK); + } + #endif + } +} + +/* ---------------------------------------------------------------------- + binned neighbor list construction with Newton's 3rd law for triclinic + each owned atom i checks its own bin and other bins in triclinic stencil + every pair stored exactly once by some processor +------------------------------------------------------------------------- */ + +void Neighbor::half_bin_newton_tri_intel(NeighList *list) +{ + const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal; + list->inum = nlocal; + + // Get fix for intel stuff + FixIntel *fix = static_cast(fix_intel); + + const int off_end = fix->offload_end_neighbor(); + int host_start = fix->host_start_neighbor(); + int offload_noghost = 0; + #ifdef _LMP_INTEL_OFFLOAD + if (fix->full_host_list()) host_start = 0; + offload_noghost = fix->offload_noghost(); + if (exclude) + error->all(FLERR, "Exclusion lists not yet supported for Intel offload"); + #endif + + if (fix->precision() == FixIntel::PREC_MODE_MIXED) { + if (offload_noghost) { + hbnti(1, list, fix->get_mixed_buffers(), + 0, off_end, fix); + hbnti(0, list, fix->get_mixed_buffers(), + host_start, nlocal, fix, off_end); + } else { + hbnti(1, list, fix->get_mixed_buffers(), + 0, off_end, fix); + hbnti(0, list, fix->get_mixed_buffers(), + host_start, nlocal, fix); + } + } else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) { + if (offload_noghost) { + hbnti(1, list, fix->get_double_buffers(), + 0, off_end, fix); + hbnti(0, list, fix->get_double_buffers(), + host_start, nlocal, fix, off_end); + } else { + hbnti(1, list, fix->get_double_buffers(), + 0, off_end, fix); + hbnti(0, list, fix->get_double_buffers(), + host_start, nlocal, fix); + } + } else { + if (offload_noghost) { + hbnti(1, list, fix->get_single_buffers(), + 0, off_end, fix); + hbnti(0, list, fix->get_single_buffers(), + host_start, nlocal, fix, off_end); + } else { + hbnti(1, list, fix->get_single_buffers(), + 0, off_end, fix); + hbnti(0, list, fix->get_single_buffers(), + host_start, nlocal, fix); + } + } +} + +template +void Neighbor::hbnti(const int offload, NeighList *list, void *buffers_in, + const int astart, const int aend, void *fix_in, + const int offload_end) { + IntelBuffers *buffers = (IntelBuffers *)buffers_in; + FixIntel *fix = (FixIntel *)fix_in; + const int nall = atom->nlocal + atom->nghost; + int pad = 1; + + if (offload) { + fix->start_watch(TIME_PACK); + buffers->grow(nall, atom->nlocal, comm->nthreads, aend); + buffers->grow_nbor(list, atom->nlocal, aend); + + ATOM_T biga; + biga.x = INTEL_BIGP; + biga.y = INTEL_BIGP; + biga.z = INTEL_BIGP; + biga.w = 1; + buffers->get_x()[nall]=biga; + + const int nthreads = comm->nthreads; + #if defined(_OPENMP) + #pragma omp parallel default(none) shared(buffers) + #endif + { + int ifrom, ito, tid; + IP_PRE_omp_range_id_align(ifrom, ito, tid, nall, nthreads, + sizeof(ATOM_T)); + buffers->thr_pack(ifrom, ito, 0); + } + fix->stop_watch(TIME_PACK); + + fix->start_watch(TIME_HOST_NEIGHBOR); + bin_atoms(buffers->get_x()); + if (INTEL_MIC_NBOR_PAD > 1) + pad = INTEL_MIC_NBOR_PAD * sizeof(float) / sizeof(flt_t); + } else { + fix->start_watch(TIME_HOST_NEIGHBOR); + if (INTEL_NBOR_PAD > 1) + pad = INTEL_NBOR_PAD * sizeof(float) / sizeof(flt_t); + } + const int pad_width = pad; + + if (aend-astart == 0) { + fix->stop_watch(TIME_HOST_NEIGHBOR); + return; + } + + const ATOM_T * restrict const x = buffers->get_x(); + int * restrict const firstneigh = buffers->firstneigh(list); + int nall_t = nall; + if (offload_noghost && offload) nall_t = atom->nlocal; + const int e_nall = nall_t; + + const int molecular = atom->molecular; + int *ns = NULL, *s = NULL; + int tag_size, special_size; + if (molecular) { + s = atom->special[0]; + ns = atom->nspecial[0]; + tag_size = e_nall; + special_size = aend; + } else { + s = &buffers->_special_holder; + ns = &buffers->_nspecial_holder; + tag_size = 0; + special_size = 0; + } + const int * restrict const special = s; + const int * restrict const nspecial = ns; + const int maxspecial = atom->maxspecial; + const int * restrict const tag = atom->tag; + + int * restrict const ilist = list->ilist; + int * restrict numneigh = list->numneigh; + int * restrict const cnumneigh = buffers->cnumneigh(list); + const int nstencil = list->nstencil; + const int * restrict const stencil = list->stencil; + const flt_t * restrict const cutneighsq = buffers->get_cutneighsq()[0]; + const int ntypes = atom->ntypes + 1; + const int nlocal = atom->nlocal; + + #ifndef _LMP_INTEL_OFFLOAD + int * const mask = atom->mask; + int * const molecule = atom->molecule; + #endif + + int tnum; + int *overflow; + double *timer_compute; + if (offload) { + timer_compute = fix->off_watch_neighbor(); + tnum = buffers->get_off_threads(); + overflow = fix->get_off_overflow_flag(); + fix->stop_watch(TIME_HOST_NEIGHBOR); + fix->start_watch(TIME_OFFLOAD_LATENCY); + } else { + tnum = comm->nthreads; + overflow = fix->get_overflow_flag(); + } + const int nthreads = tnum; + const int maxnbors = buffers->get_max_nbors(); + + const flt_t bboxlo0 = this->bboxlo[0]; + const flt_t bboxlo1 = this->bboxlo[1]; + const flt_t bboxlo2 = this->bboxlo[2]; + const flt_t bboxhi0 = this->bboxhi[0]; + const flt_t bboxhi1 = this->bboxhi[1]; + const flt_t bboxhi2 = this->bboxhi[2]; + const flt_t bininvx = this->bininvx; + const flt_t bininvy = this->bininvy; + const flt_t bininvz = this->bininvz; + // Make sure dummy coordinates to eliminate loop remainder not within cutoff + { + const flt_t dx = (INTEL_BIGP - bboxhi0); + const flt_t dy = (INTEL_BIGP - bboxhi1); + const flt_t dz = (INTEL_BIGP - bboxhi2); + if (dx * dx + dy * dy + dz * dz < static_cast(cutneighmaxsq)) + error->one(FLERR, + "Intel package expects no atoms within cutoff of {1e15,1e15,1e15}."); + } + + #ifdef _LMP_INTEL_OFFLOAD + const int * restrict const binhead = this->binhead; + const int * restrict const special_flag = this->special_flag; + const int nbinx = this->nbinx; + const int nbiny = this->nbiny; + const int nbinz = this->nbinz; + const int mbinxlo = this->mbinxlo; + const int mbinylo = this->mbinylo; + const int mbinzlo = this->mbinzlo; + const int mbinx = this->mbinx; + const int mbiny = this->mbiny; + const int mbinz = this->mbinz; + const int * restrict const bins = this->bins; + const int cop = fix->coprocessor_number(); + const int separate_buffers = fix->separate_buffers(); + #pragma offload target(mic:cop) if(offload) \ + in(x:length(e_nall+1) alloc_if(0) free_if(0)) \ + in(tag:length(tag_size) alloc_if(0) free_if(0)) \ + in(special:length(special_size*maxspecial) alloc_if(0) free_if(0)) \ + in(nspecial:length(special_size*3) alloc_if(0) free_if(0)) \ + in(bins:length(nall) alloc_if(0) free_if(0)) \ + in(binhead:length(mbins) alloc_if(0) free_if(0)) \ + in(cutneighsq:length(0) alloc_if(0) free_if(0)) \ + in(firstneigh:length(0) alloc_if(0) free_if(0)) \ + in(cnumneigh:length(0) alloc_if(0) free_if(0)) \ + out(numneigh:length(0) alloc_if(0) free_if(0)) \ + in(ilist:length(0) alloc_if(0) free_if(0)) \ + in(stencil:length(nstencil) alloc_if(0) free_if(0)) \ + in(special_flag:length(0) alloc_if(0) free_if(0)) \ + in(maxnbors,nthreads,maxspecial,nstencil,nbinx,nbiny,nbinz,offload_end) \ + in(mbinxlo,mbinylo,mbinzlo,mbinx,mbiny,mbinz,pad_width,e_nall,offload) \ + in(bininvx,bininvy,bininvz,bboxlo0,bboxlo1,bboxlo2,separate_buffers) \ + in(bboxhi0, bboxhi1, bboxhi2, astart, aend, nlocal, molecular, ntypes) \ + out(overflow:length(5) alloc_if(0) free_if(0)) \ + out(timer_compute:length(1) alloc_if(0) free_if(0)) \ + signal(numneigh) + #endif + { + #ifdef __MIC__ + *timer_compute = MIC_Wtime(); + #endif + + #ifdef _LMP_INTEL_OFFLOAD + overflow[LMP_LOCAL_MIN] = astart; + overflow[LMP_LOCAL_MAX] = aend - 1; + overflow[LMP_GHOST_MIN] = e_nall; + overflow[LMP_GHOST_MAX] = -1; + #endif + + #if defined(_OPENMP) + #pragma omp parallel default(none) shared(numneigh, overflow) + #endif + { + #ifdef _LMP_INTEL_OFFLOAD + int lmin = e_nall, lmax = -1, gmin = e_nall, gmax = -1; + #endif + + const int num = aend-astart; + int tid, ifrom, ito; + IP_PRE_omp_range_id(ifrom,ito,tid,num,nthreads); + ifrom += astart; + ito += astart; + + int which; + + const int list_size = (ito + tid + 1) * maxnbors; + int ct = (ifrom + tid) * maxnbors; + int *neighptr = firstneigh + ct; + for (int i = ifrom; i < ito; i++) { + int j, k, n, n2, itype, jtype, ibin; + double xtmp, ytmp, ztmp, delx, dely, delz, rsq; + + n = 0; + n2 = maxnbors; + + xtmp = x[i].x; + ytmp = x[i].y; + ztmp = x[i].z; + itype = x[i].w; + const int ioffset = ntypes * itype; + + // loop over all atoms in bins in stencil + // pairs for atoms j "below" i are excluded + // below = lower z or (equal z and lower y) or (equal zy and lower x) + // (equal zyx and j <= i) + // latter excludes self-self interaction but allows superposed atoms + + ibin = mcoord2bin(x[i].x, x[i].y, x[i].z, bboxlo0, bboxlo1, bboxlo2, + bboxhi0, bboxhi1, bboxhi2, bininvx, bininvy, bininvz, + nbinx, nbiny, nbinz, mbinx, mbiny, mbinz, + mbinxlo, mbinylo, mbinzlo); + + for (k = 0; k < nstencil; k++) { + for (j = binhead[ibin + stencil[k]]; j >= 0; j = bins[j]) { + if (offload_noghost) { + if (j < nlocal) { + if (i < offload_end) continue; + } else if (offload) continue; + } + + if (x[j].z < ztmp) continue; + if (x[j].z == ztmp) { + if (x[j].y < ytmp) continue; + if (x[j].y == ytmp) { + if (x[j].x < xtmp) continue; + if (x[j].x == xtmp && j <= i) continue; + } + } + + jtype = x[j].w; + #ifndef _LMP_INTEL_OFFLOAD + if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; + #endif + + delx = xtmp - x[j].x; + dely = ytmp - x[j].y; + delz = ztmp - x[j].z; + rsq = delx * delx + dely * dely + delz * delz; + if (rsq <= cutneighsq[ioffset + jtype]) { + if (j < nlocal) { + neighptr[n++] = j; + #ifdef _LMP_INTEL_OFFLOAD + if (j < lmin) lmin = j; + if (j > lmax) lmax = j; + #endif + } else { + neighptr[n2++] = j; + #ifdef _LMP_INTEL_OFFLOAD + if (j < gmin) gmin = j; + if (j > gmax) gmax = j; + #endif + } + } + } + } + ilist[i] = i; + + cnumneigh[i] = ct; + if (n > maxnbors) *overflow = 1; + for (k = maxnbors; k < n2; k++) neighptr[n++] = neighptr[k]; + while( (n % pad_width) != 0 ) neighptr[n++] = e_nall; + numneigh[i] = n; + while((n % (INTEL_DATA_ALIGN / sizeof(int))) != 0) n++; + ct += n; + neighptr += n; + if (ct + n + maxnbors > list_size) { + *overflow = 1; + ct = (ifrom + tid) * maxnbors; + } + } + + if (*overflow == 1) + for (int i = ifrom; i < ito; i++) + numneigh[i] = 0; + + #ifdef _LMP_INTEL_OFFLOAD + if (separate_buffers) { + #if defined(_OPENMP) + #pragma omp critical + #endif + { + if (lmin < overflow[LMP_LOCAL_MIN]) overflow[LMP_LOCAL_MIN] = lmin; + if (lmax > overflow[LMP_LOCAL_MAX]) overflow[LMP_LOCAL_MAX] = lmax; + if (gmin < overflow[LMP_GHOST_MIN]) overflow[LMP_GHOST_MIN] = gmin; + if (gmax > overflow[LMP_GHOST_MAX]) overflow[LMP_GHOST_MAX] = gmax; + } + #pragma omp barrier + } + + int ghost_offset = 0, nall_offset = e_nall; + if (separate_buffers) { + int nghost = overflow[LMP_GHOST_MAX] + 1 - overflow[LMP_GHOST_MIN]; + if (nghost < 0) nghost = 0; + if (offload) { + ghost_offset = overflow[LMP_GHOST_MIN] - overflow[LMP_LOCAL_MAX] - 1; + nall_offset = overflow[LMP_LOCAL_MAX] + 1 + nghost; + } else { + ghost_offset = overflow[LMP_GHOST_MIN] - nlocal; + nall_offset = nlocal + nghost; + } + } + #endif + + if (molecular) { + for (int i = ifrom; i < ito; ++i) { + int * restrict jlist = firstneigh + cnumneigh[i]; + const int jnum = numneigh[i]; + for (int jj = 0; jj < jnum; jj++) { + const int j = jlist[jj]; + ofind_special(which, special, nspecial, i, tag[j], special_flag); + #ifdef _LMP_INTEL_OFFLOAD + if (j >= nlocal) { + if (j == e_nall) + jlist[jj] = nall_offset; + else if (which > 0) + jlist[jj] = (j-ghost_offset) ^ (which << SBBITS); + else jlist[jj]-=ghost_offset; + } else + #endif + if (which > 0) jlist[jj] = j ^ (which << SBBITS); + } + } + } + #ifdef _LMP_INTEL_OFFLOAD + else if (separate_buffers) { + for (int i = ifrom; i < ito; ++i) { + int * restrict jlist = firstneigh + cnumneigh[i]; + const int jnum = numneigh[i]; + int jj = 0; + for (jj = 0; jj < jnum; jj++) + if (jlist[jj] >= nlocal) break; + while (jj < jnum) { + if (jlist[jj] == e_nall) jlist[jj] = nall_offset; + else jlist[jj] -= ghost_offset; + jj++; + } + } + } + #endif + } // end omp + #ifdef __MIC__ + *timer_compute = MIC_Wtime() - *timer_compute; + #endif + } // end offload + + if (offload) { + fix->stop_watch(TIME_OFFLOAD_LATENCY); + #ifdef _LMP_INTEL_OFFLOAD + for (int n = 0; n < aend; n++) { + ilist[n] = n; + numneigh[n] = 0; + } + #endif + } else { + for (int i = astart; i < aend; i++) + list->firstneigh[i] = firstneigh + cnumneigh[i]; + fix->stop_watch(TIME_HOST_NEIGHBOR); + #ifdef _LMP_INTEL_OFFLOAD + if (separate_buffers) { + fix->start_watch(TIME_PACK); + fix->set_neighbor_host_sizes(); + buffers->pack_sep_from_single(fix->host_min_local(), + fix->host_used_local(), + fix->host_min_ghost(), + fix->host_used_ghost()); + fix->stop_watch(TIME_PACK); + } + #endif + } +} diff --git a/src/USER-INTEL/pair_gayberne_intel.cpp b/src/USER-INTEL/pair_gayberne_intel.cpp new file mode 100644 index 000000000..46e608c92 --- /dev/null +++ b/src/USER-INTEL/pair_gayberne_intel.cpp @@ -0,0 +1,1075 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: W. Michael Brown (Intel) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_gayberne_intel.h" +#include "math_extra_intel.h" +#include "atom.h" +#include "comm.h" +#include "atom_vec_ellipsoid.h" +#include "force.h" +#include "memory.h" +#include "modify.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "neigh_request.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +#define FC_PACKED1_T typename ForceConst::fc_packed1 +#define FC_PACKED2_T typename ForceConst::fc_packed2 +#define FC_PACKED3_T typename ForceConst::fc_packed3 + +/* ---------------------------------------------------------------------- */ + +PairGayBerneIntel::PairGayBerneIntel(LAMMPS *lmp) : + PairGayBerne(lmp) +{ + suffix_flag |= Suffix::INTEL; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairGayBerneIntel::compute(int eflag, int vflag) +{ + if (fix->precision()==FixIntel::PREC_MODE_MIXED) + compute(eflag, vflag, fix->get_mixed_buffers(), + force_const_single); + else if (fix->precision()==FixIntel::PREC_MODE_DOUBLE) + compute(eflag, vflag, fix->get_double_buffers(), + force_const_double); + else + compute(eflag, vflag, fix->get_single_buffers(), + force_const_single); + + fix->balance_stamp(); + vflag_fdotr = 0; +} + +template +void PairGayBerneIntel::compute(int eflag, int vflag, + IntelBuffers *buffers, + const ForceConst &fc) +{ + if (eflag || vflag) { + ev_setup(eflag, vflag); + } else evflag = vflag_fdotr = 0; + + const int inum = list->inum; + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int host_start = fix->host_start_pair(); + const int offload_end = fix->offload_end_pair(); + const int ago = neighbor->ago; + + if (fix->separate_buffers() == 0) { + fix->start_watch(TIME_PACK); + const AtomVecEllipsoid::Bonus * const bonus = avec->bonus; + const int * const ellipsoid = atom->ellipsoid; + QUAT_T * restrict const quat = buffers->get_quat(); + #if defined(_OPENMP) + #pragma omp parallel default(none) shared(eflag,vflag,buffers,fc) + #endif + { + int ifrom, ito, tid; + IP_PRE_omp_range_id_align(ifrom, ito, tid, nall, nthreads, + sizeof(ATOM_T)); + if (ago != 0) buffers->thr_pack(ifrom,ito,ago); + + for (int i = ifrom; i < ito; i++) { + int qi = ellipsoid[i]; + if (qi > -1) { + quat[i].w = bonus[qi].quat[0]; + quat[i].i = bonus[qi].quat[1]; + quat[i].j = bonus[qi].quat[2]; + quat[i].k = bonus[qi].quat[3]; + } + } + } + quat[nall].w = (flt_t)1.0; + quat[nall].i = (flt_t)0.0; + quat[nall].j = (flt_t)0.0; + quat[nall].k = (flt_t)0.0; + fix->stop_watch(TIME_PACK); + } + + if (evflag || vflag_fdotr) { + int ovflag = 0; + if (vflag_fdotr) ovflag = 2; + else if (vflag) ovflag = 1; + if (eflag) { + if (force->newton_pair) { + eval<1,1,1>(1, ovflag, buffers, fc, 0, offload_end); + eval<1,1,1>(0, ovflag, buffers, fc, host_start, inum); + } else { + eval<1,1,0>(1, ovflag, buffers, fc, 0, offload_end); + eval<1,1,0>(0, ovflag, buffers, fc, host_start, inum); + } + } else { + if (force->newton_pair) { + eval<1,0,1>(1, ovflag, buffers, fc, 0, offload_end); + eval<1,0,1>(0, ovflag, buffers, fc, host_start, inum); + } else { + eval<1,0,0>(1, ovflag, buffers, fc, 0, offload_end); + eval<1,0,0>(0, ovflag, buffers, fc, host_start, inum); + } + } + } else { + if (force->newton_pair) { + eval<0,0,1>(1, 0, buffers, fc, 0, offload_end); + eval<0,0,1>(0, 0, buffers, fc, host_start, inum); + } else { + eval<0,0,0>(1, 0, buffers, fc, 0, offload_end); + eval<0,0,0>(0, 0, buffers, fc, host_start, inum); + } + } +} + +template +void PairGayBerneIntel::eval(const int offload, const int vflag, + IntelBuffers *buffers, + const ForceConst &fc, + const int astart, const int aend) +{ + const int inum = aend - astart; + if (inum == 0) return; + int nlocal, nall, minlocal; + fix->get_buffern(offload, nlocal, nall, minlocal); + + const int ago = neighbor->ago; + ATOM_T * restrict const x = buffers->get_x(offload); + QUAT_T * restrict const quat = buffers->get_quat(offload); + const AtomVecEllipsoid::Bonus *bonus = avec->bonus; + const int *ellipsoid = atom->ellipsoid; + + #ifdef _LMP_INTEL_OFFLOAD + if (fix->separate_buffers()) { + fix->start_watch(TIME_PACK); + if (offload) { + #pragma omp parallel default(none) \ + shared(buffers,nlocal,nall,bonus,ellipsoid) + { + int ifrom, ito, tid; + int nthreads = comm->nthreads; + IP_PRE_omp_range_id_align(ifrom, ito, tid, nlocal, + nthreads, sizeof(ATOM_T)); + if (ago != 0) buffers->thr_pack_cop(ifrom, ito, 0); + for (int i = ifrom; i < ito; i++) { + int qi = ellipsoid[i]; + if (qi > -1) { + quat[i].w = bonus[qi].quat[0]; + quat[i].i = bonus[qi].quat[1]; + quat[i].j = bonus[qi].quat[2]; + quat[i].k = bonus[qi].quat[3]; + } + } + int nghost = nall - nlocal; + if (nghost) { + IP_PRE_omp_range_align(ifrom, ito, tid, nall - nlocal, + nthreads, sizeof(ATOM_T)); + int offset = 0; + ifrom += nlocal; + ito += nlocal; + if (ago != 0) { + offset = fix->offload_min_ghost() - nlocal; + buffers->thr_pack_cop(ifrom, ito, offset, ago == 1); + } + for (int i = ifrom; i < ito; i++) { + int qi = ellipsoid[i + offset]; + if (qi > -1) { + quat[i].w = bonus[qi].quat[0]; + quat[i].i = bonus[qi].quat[1]; + quat[i].j = bonus[qi].quat[2]; + quat[i].k = bonus[qi].quat[3]; + } + } + } + } + } else { + if (ago != 0) buffers->thr_pack_host(fix->host_min_local(), nlocal, 0); + for (int i = fix->host_min_local(); i < nlocal; i++) { + int qi = ellipsoid[i]; + if (qi > -1) { + quat[i].w = bonus[qi].quat[0]; + quat[i].i = bonus[qi].quat[1]; + quat[i].j = bonus[qi].quat[2]; + quat[i].k = bonus[qi].quat[3]; + } + } + int offset = fix->host_min_ghost() - nlocal; + if (ago != 0) buffers->thr_pack_host(nlocal, nall, offset); + for (int i = nlocal; i < nall; i++) { + int qi = ellipsoid[i + offset]; + if (qi > -1) { + quat[i].w = bonus[qi].quat[0]; + quat[i].i = bonus[qi].quat[1]; + quat[i].j = bonus[qi].quat[2]; + quat[i].k = bonus[qi].quat[3]; + } + } + } + fix->stop_watch(TIME_PACK); + } + #endif + + // const int * restrict const ilist = list->ilist; + const int * restrict const numneigh = list->numneigh; + const int * restrict const cnumneigh = buffers->cnumneigh(list); + const int * restrict const firstneigh = buffers->firstneigh(list); + const flt_t * restrict const special_lj = fc.special_lj; + + const FC_PACKED1_T * restrict const ijc = fc.ijc[0]; + const FC_PACKED2_T * restrict const lj34 = fc.lj34[0]; + const FC_PACKED3_T * restrict const ic = fc.ic; + const flt_t mu = fc.mu; + const flt_t gamma = fc.gamma; + const flt_t upsilon = fc.upsilon; + + flt_t * const rsq_formi = fc.rsq_form[0]; + flt_t * const delx_formi = fc.delx_form[0]; + flt_t * const dely_formi = fc.dely_form[0]; + flt_t * const delz_formi = fc.delz_form[0]; + int * const jtype_formi = fc.jtype_form[0]; + int * const jlist_formi = fc.jlist_form[0]; + + const int ntypes = atom->ntypes + 1; + const int eatom = this->eflag_atom; + + // Determine how much data to transfer + int x_size, q_size, f_stride, ev_size, separate_flag; + IP_PRE_get_transfern(ago, NEWTON_PAIR, EVFLAG, EFLAG, vflag, + buffers, offload, fix, separate_flag, + x_size, q_size, ev_size, f_stride); + + int tc; + FORCE_T * restrict f_start; + acc_t * restrict ev_global; + IP_PRE_get_buffers(offload, buffers, fix, tc, f_start, ev_global); + const int max_nbors = _max_nbors; + const int nthreads = tc; + + int pad = 1; + if (offload) { + if (INTEL_MIC_NBOR_PAD > 1) + pad = INTEL_MIC_NBOR_PAD * sizeof(float) / sizeof(flt_t); + } else { + if (INTEL_NBOR_PAD > 1) + pad = INTEL_NBOR_PAD * sizeof(float) / sizeof(flt_t); + } + const int pad_width = pad; + + #ifdef _LMP_INTEL_OFFLOAD + int *overflow = fix->get_off_overflow_flag(); + double *timer_compute = fix->off_watch_pair(); + + if (offload) fix->start_watch(TIME_OFFLOAD_LATENCY); + #pragma offload target(mic:_cop) if(offload) \ + in(special_lj:length(0) alloc_if(0) free_if(0)) \ + in(ijc,lj34,ic:length(0) alloc_if(0) free_if(0)) \ + in(rsq_formi, delx_formi, dely_formi: length(0) alloc_if(0) free_if(0)) \ + in(delz_formi, jtype_formi, jlist_formi: length(0) alloc_if(0) free_if(0))\ + in(firstneigh:length(0) alloc_if(0) free_if(0)) \ + in(cnumneigh:length(0) alloc_if(0) free_if(0)) \ + in(numneigh:length(0) alloc_if(0) free_if(0)) \ + in(x:length(x_size) alloc_if(0) free_if(0)) \ + in(quat:length(nall+1) alloc_if(0) free_if(0)) \ + in(overflow:length(0) alloc_if(0) free_if(0)) \ + in(nthreads,inum,nall,ntypes,vflag,eatom,minlocal,separate_flag) \ + in(astart,nlocal,f_stride,max_nbors,mu,gamma,upsilon,offload,pad_width) \ + out(f_start:length(f_stride) alloc_if(0) free_if(0)) \ + out(ev_global:length(ev_size) alloc_if(0) free_if(0)) \ + out(timer_compute:length(1) alloc_if(0) free_if(0)) \ + signal(f_start) + #endif + { + #ifdef __MIC__ + *timer_compute=MIC_Wtime(); + #endif + + #ifdef _LMP_INTEL_OFFLOAD + if (separate_flag) { + if (separate_flag < 3) { + int all_local = nlocal; + int ghost_min = overflow[LMP_GHOST_MIN]; + nlocal = overflow[LMP_LOCAL_MAX] + 1; + int nghost = overflow[LMP_GHOST_MAX] + 1 - ghost_min; + if (nghost < 0) nghost = 0; + nall = nlocal + nghost; + separate_flag--; + int flength; + if (NEWTON_PAIR) flength = nall; + else flength = nlocal; + IP_PRE_get_stride(f_stride, flength, sizeof(FORCE_T), + separate_flag); + if (nghost) { + if (nlocal < all_local || ghost_min > all_local) { + memmove(x + nlocal, x + ghost_min, + (nall - nlocal) * sizeof(ATOM_T)); + memmove(quat + nlocal, quat + ghost_min, + (nall - nlocal) * sizeof(QUAT_T)); + } + } + } + x[nall].x = (flt_t)INTEL_BIGP; + x[nall].y = (flt_t)INTEL_BIGP; + x[nall].z = (flt_t)INTEL_BIGP; + quat[nall].w = (flt_t)1.0; + quat[nall].i = (flt_t)0.0; + quat[nall].j = (flt_t)0.0; + quat[nall].k = (flt_t)0.0; + } + #endif + + acc_t oevdwl, ov0, ov1, ov2, ov3, ov4, ov5; + if (EVFLAG) { + oevdwl = (acc_t)0; + if (vflag) ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0; + } + + // loop over neighbors of my atoms + #if defined(_OPENMP) + #pragma omp parallel default(none) \ + shared(f_start,f_stride,nlocal,nall,minlocal) \ + reduction(+:oevdwl,ov0,ov1,ov2,ov3,ov4,ov5) + #endif + { + int iifrom, iito, tid; + IP_PRE_omp_range_id(iifrom, iito, tid, inum, nthreads); + iifrom += astart; + iito += astart; + + FORCE_T * restrict const f = f_start - minlocal * 2 + (tid * f_stride); + memset(f + minlocal * 2, 0, f_stride * sizeof(FORCE_T)); + + flt_t * restrict const rsq_form = rsq_formi + tid * max_nbors; + flt_t * restrict const delx_form = delx_formi + tid * max_nbors; + flt_t * restrict const dely_form = dely_formi + tid * max_nbors; + flt_t * restrict const delz_form = delz_formi + tid * max_nbors; + int * restrict const jtype_form = jtype_formi + tid * max_nbors; + int * restrict const jlist_form = jlist_formi + tid * max_nbors; + + int ierror = 0; + for (int i = iifrom; i < iito; ++i) { + // const int i = ilist[ii]; + const int itype = x[i].w; + const int ptr_off = itype * ntypes; + const FC_PACKED1_T * restrict const ijci = ijc + ptr_off; + const FC_PACKED2_T * restrict const lj34i = lj34 + ptr_off; + + const int * restrict const jlist = firstneigh + cnumneigh[i]; + const int jnum = numneigh[i]; + + const flt_t xtmp = x[i].x; + const flt_t ytmp = x[i].y; + const flt_t ztmp = x[i].z; + + flt_t a1_0, a1_1, a1_2, a1_3, a1_4, a1_5, a1_6, a1_7, a1_8; + flt_t b1_0, b1_1, b1_2, b1_3, b1_4, b1_5, b1_6, b1_7, b1_8; + flt_t g1_0, g1_1, g1_2, g1_3, g1_4, g1_5, g1_6, g1_7, g1_8; + + if (ijci[itype].form == ELLIPSE_ELLIPSE) { + flt_t temp_0,temp_1,temp_2,temp_3,temp_4,temp_5,temp_6,temp_7,temp_8; + ME_quat_to_mat_trans(quat[i],a1); + ME_diag_times3(ic[itype].well,a1,temp); + ME_transpose_times3(a1,temp,b1); + ME_diag_times3(ic[itype].shape2,a1,temp); + ME_transpose_times3(a1,temp,g1); + } + + acc_t fxtmp, fytmp, fztmp, fwtmp, t1tmp, t2tmp, t3tmp; + acc_t sevdwl, sv0, sv1, sv2, sv3, sv4, sv5; + fxtmp = fytmp = fztmp = t1tmp = t2tmp = t3tmp = (acc_t)0.0; + + if (EVFLAG) { + if (EFLAG) fwtmp = sevdwl = (acc_t)0; + if (vflag==1) sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0; + } + + bool multiple_forms = false; + int packed_j = 0; + for (int jj = 0; jj < jnum; jj++) { + int jm = jlist[jj]; + int j = jm & NEIGHMASK; + const int jtype = x[j].w; + + if (ijci[jtype].form == ELLIPSE_ELLIPSE) { + flt_t delx = x[j].x-xtmp; + flt_t dely = x[j].y-ytmp; + flt_t delz = x[j].z-ztmp; + flt_t rsq = delx * delx + dely * dely + delz * delz; + + if (rsq < ijci[jtype].cutsq) { + rsq_form[packed_j] = rsq; + delx_form[packed_j] = delx; + dely_form[packed_j] = dely; + delz_form[packed_j] = delz; + jtype_form[packed_j] = jtype; + jlist_form[packed_j] = jm; + packed_j++; + } + } else + multiple_forms = true; + } + while( (packed_j % pad_width) != 0 ) + jlist_form[packed_j++] = nall; + + // ------------------------------------------------------------- + + #ifdef __MIC__ + __assume(packed_j % INTEL_VECTOR_WIDTH == 0); + __assume(packed_j % 8 == 0); + __assume(packed_j % INTEL_MIC_VECTOR_WIDTH == 0); + #endif + #pragma vector aligned + #pragma simd reduction(+:fxtmp,fytmp,fztmp,fwtmp,t1tmp,t2tmp,t3tmp, \ + sevdwl,sv0,sv1,sv2,sv3,sv4,sv5) + for (int jj = 0; jj < packed_j; jj++) { + flt_t a2_0, a2_1, a2_2, a2_3, a2_4, a2_5, a2_6, a2_7, a2_8; + flt_t b2_0, b2_1, b2_2, b2_3, b2_4, b2_5, b2_6, b2_7, b2_8; + flt_t g2_0, g2_1, g2_2, g2_3, g2_4, g2_5, g2_6, g2_7, g2_8; + flt_t temp_0,temp_1,temp_2,temp_3,temp_4,temp_5,temp_6,temp_7,temp_8; + flt_t fforce_0, fforce_1, fforce_2, ttor_0, ttor_1, ttor_2; + flt_t rtor_0, rtor_1, rtor_2; + + const int sbindex = jlist_form[jj] >> SBBITS & 3; + const int j = jlist_form[jj] & NEIGHMASK; + flt_t factor_lj = special_lj[sbindex]; + const int jtype = jtype_form[jj]; + const flt_t sigma = ijci[jtype].sigma; + const flt_t epsilon = ijci[jtype].epsilon; + const flt_t shape2_0 = ic[jtype].shape2[0]; + const flt_t shape2_1 = ic[jtype].shape2[1]; + const flt_t shape2_2 = ic[jtype].shape2[2]; + flt_t one_eng, evdwl; + + ME_quat_to_mat_trans(quat[j], a2); + ME_diag_times3(ic[jtype].well, a2, temp); + ME_transpose_times3(a2, temp, b2); + ME_diag_times3a(shape2, a2, temp); + ME_transpose_times3(a2, temp, g2); + + flt_t tempv_0, tempv_1, tempv_2, tempv2_0, tempv2_1, tempv2_2; + flt_t temp1, temp2, temp3; + + flt_t r12hat_0, r12hat_1, r12hat_2; + ME_normalize3(delx_form[jj], dely_form[jj], delz_form[jj], r12hat); + flt_t r = sqrt(rsq_form[jj]); + + // compute distance of closest approach + + flt_t g12_0, g12_1, g12_2, g12_3, g12_4, g12_5, g12_6, g12_7, g12_8; + ME_plus3(g1, g2, g12); + flt_t kappa_0, kappa_1, kappa_2; + ME_mldivide3(g12, delx_form[jj], dely_form[jj], delz_form[jj], + kappa, ierror); + + // tempv = G12^-1*r12hat + + flt_t inv_r = (flt_t)1.0 / r; + tempv_0 = kappa_0 * inv_r; + tempv_1 = kappa_1 * inv_r; + tempv_2 = kappa_2 * inv_r; + flt_t sigma12 = ME_dot3(r12hat, tempv); + sigma12 = pow((flt_t)0.5 * sigma12,(flt_t) - 0.5); + flt_t h12 = r - sigma12; + + // energy + // compute u_r + + flt_t varrho = sigma / (h12 + gamma * sigma); + flt_t varrho6 = pow(varrho, (flt_t)6.0); + flt_t varrho12 = varrho6 * varrho6; + flt_t u_r = (flt_t)4.0 * epsilon * (varrho12 - varrho6); + + // compute eta_12 + + flt_t eta = (flt_t)2.0 * ijci[jtype].lshape; + flt_t det_g12 = ME_det3(g12); + eta = pow(eta / det_g12, upsilon); + + // compute chi_12 + + flt_t b12_0, b12_1, b12_2, b12_3, b12_4, b12_5, b12_6, b12_7, b12_8; + flt_t iota_0, iota_1, iota_2; + ME_plus3(b1, b2, b12); + ME_mldivide3(b12, delx_form[jj], dely_form[jj], delz_form[jj], + iota, ierror); + + // tempv = G12^-1*r12hat + + tempv_0 = iota_0 * inv_r; + tempv_1 = iota_1 * inv_r; + tempv_2 = iota_2 * inv_r; + flt_t chi = ME_dot3(r12hat, tempv); + chi = pow(chi * (flt_t)2.0, mu); + + // force + // compute dUr/dr + + temp1 = ((flt_t)2.0 * varrho12 * varrho - varrho6 * varrho) / + sigma; + temp1 = temp1 * (flt_t)24.0 * epsilon; + flt_t u_slj = temp1 * pow(sigma12, (flt_t)3.0) * (flt_t)0.5; + flt_t dUr_0, dUr_1, dUr_2; + temp2 = ME_dot3(kappa, r12hat); + flt_t uslj_rsq = u_slj / rsq_form[jj]; + dUr_0 = temp1 * r12hat_0 + uslj_rsq * (kappa_0 - temp2 * r12hat_0); + dUr_1 = temp1 * r12hat_1 + uslj_rsq * (kappa_1 - temp2 * r12hat_1); + dUr_2 = temp1 * r12hat_2 + uslj_rsq * (kappa_2 - temp2 * r12hat_2); + + // compute dChi_12/dr + + flt_t dchi_0, dchi_1, dchi_2; + temp1 = ME_dot3(iota, r12hat); + temp2 = (flt_t)-4.0 / rsq_form[jj] * mu * + pow(chi, (mu - (flt_t)1.0) / mu); + dchi_0 = temp2 * (iota_0 - temp1 * r12hat_0); + dchi_1 = temp2 * (iota_1 - temp1 * r12hat_1); + dchi_2 = temp2 * (iota_2 - temp1 * r12hat_2); + + temp1 = -eta * u_r; + temp2 = eta * chi; + fforce_0 = temp1 * dchi_0 - temp2 * dUr_0; + fforce_1 = temp1 * dchi_1 - temp2 * dUr_1; + fforce_2 = temp1 * dchi_2 - temp2 * dUr_2; + + // torque for particle 1 and 2 + // compute dUr + + tempv_0 = -uslj_rsq * kappa_0; + tempv_1 = -uslj_rsq * kappa_1; + tempv_2 = -uslj_rsq * kappa_2; + ME_vecmat(kappa, g1, tempv2); + ME_cross3(tempv, tempv2, dUr); + flt_t dUr2_0, dUr2_1, dUr2_2; + + if (NEWTON_PAIR || j < nlocal) { + ME_vecmat(kappa, g2, tempv2); + ME_cross3(tempv, tempv2, dUr2); + } + + // compute d_chi + + ME_vecmat(iota, b1, tempv); + ME_cross3(tempv, iota, dchi); + temp1 = (flt_t)-4.0 / rsq_form[jj]; + dchi_0 *= temp1; + dchi_1 *= temp1; + dchi_2 *= temp1; + flt_t dchi2_0, dchi2_1, dchi2_2; + + if (NEWTON_PAIR || j < nlocal) { + ME_vecmat(iota, b2, tempv); + ME_cross3(tempv, iota, dchi2); + dchi2_0 *= temp1; + dchi2_1 *= temp1; + dchi2_2 *= temp1; + } + + // compute d_eta + + flt_t deta_0, deta_1, deta_2; + deta_0 = deta_1 = deta_2 = (flt_t)0.0; + ME_compute_eta_torque(g12, a1, shape2, temp); + temp1 = -eta * upsilon; + + tempv_0 = temp1 * temp_0; + tempv_1 = temp1 * temp_1; + tempv_2 = temp1 * temp_2; + ME_mv0_cross3(a1, tempv, tempv2); + deta_0 += tempv2_0; + deta_1 += tempv2_1; + deta_2 += tempv2_2; + + tempv_0 = temp1 * temp_3; + tempv_1 = temp1 * temp_4; + tempv_2 = temp1 * temp_5; + ME_mv1_cross3(a1, tempv, tempv2); + deta_0 += tempv2_0; + deta_1 += tempv2_1; + deta_2 += tempv2_2; + + tempv_0 = temp1 * temp_6; + tempv_1 = temp1 * temp_7; + tempv_2 = temp1 * temp_8; + ME_mv2_cross3(a1, tempv, tempv2); + deta_0 += tempv2_0; + deta_1 += tempv2_1; + deta_2 += tempv2_2; + + // compute d_eta for particle 2 + + flt_t deta2_0, deta2_1, deta2_2; + if (NEWTON_PAIR || j < nlocal) { + deta2_0 = deta2_1 = deta2_2 = (flt_t)0.0; + ME_compute_eta_torque(g12, a2, shape2, temp); + + tempv_0 = temp1 * temp_0; + tempv_1 = temp1 * temp_1; + tempv_2 = temp1 * temp_2; + ME_mv0_cross3(a2, tempv, tempv2); + deta2_0 += tempv2_0; + deta2_1 += tempv2_1; + deta2_2 += tempv2_2; + + tempv_0 = temp1 * temp_3; + tempv_1 = temp1 * temp_4; + tempv_2 = temp1 * temp_5; + ME_mv1_cross3(a2, tempv, tempv2); + deta2_0 += tempv2_0; + deta2_1 += tempv2_1; + deta2_2 += tempv2_2; + + tempv_0 = temp1 * temp_6; + tempv_1 = temp1 * temp_7; + tempv_2 = temp1 * temp_8; + ME_mv2_cross3(a2, tempv, tempv2); + deta2_0 += tempv2_0; + deta2_1 += tempv2_1; + deta2_2 += tempv2_2; + } + + // torque + + temp1 = u_r * eta; + temp2 = u_r * chi; + temp3 = chi * eta; + + ttor_0 = (temp1 * dchi_0 + temp2 * deta_0 + temp3 * dUr_0) * + (flt_t)-1.0; + ttor_1 = (temp1 * dchi_1 + temp2 * deta_1 + temp3 * dUr_1) * + (flt_t)-1.0; + ttor_2 = (temp1 * dchi_2 + temp2 * deta_2 + temp3 * dUr_2) * + (flt_t)-1.0; + + if (NEWTON_PAIR || j < nlocal) { + rtor_0 = (temp1 * dchi2_0 + temp2 * deta2_0 + temp3 * dUr2_0) * + (flt_t)-1.0; + rtor_1 = (temp1 * dchi2_1 + temp2 * deta2_1 + temp3 * dUr2_1) * + (flt_t)-1.0; + rtor_2 = (temp1 * dchi2_2 + temp2 * deta2_2 + temp3 * dUr2_2) * + (flt_t)-1.0; + } + + one_eng = temp1 * chi; + #ifndef __MIC__ + if (jlist_form[jj] == nall) { + one_eng = (flt_t)0.0; + fforce_0 = 0.0; + fforce_1 = 0.0; + fforce_2 = 0.0; + ttor_0 = 0.0; + ttor_1 = 0.0; + ttor_2 = 0.0; + rtor_0 = 0.0; + rtor_1 = 0.0; + rtor_2 = 0.0; + } + #endif + + fforce_0 *= factor_lj; + fforce_1 *= factor_lj; + fforce_2 *= factor_lj; + ttor_0 *= factor_lj; + ttor_1 *= factor_lj; + ttor_2 *= factor_lj; + + #ifdef __MIC__ + if (jlist_form[jj] < nall) { + #endif + fxtmp += fforce_0; + fytmp += fforce_1; + fztmp += fforce_2; + t1tmp += ttor_0; + t2tmp += ttor_1; + t3tmp += ttor_2; + + if (NEWTON_PAIR || j < nlocal) { + rtor_0 *= factor_lj; + rtor_1 *= factor_lj; + rtor_2 *= factor_lj; + int jp = j * 2; + f[jp].x -= fforce_0; + f[jp].y -= fforce_1; + f[jp].z -= fforce_2; + jp++; + f[jp].x += rtor_0; + f[jp].y += rtor_1; + f[jp].z += rtor_2; + } + + if (EVFLAG) { + flt_t ev_pre = (flt_t)0; + if (NEWTON_PAIR || i < nlocal) + ev_pre += (flt_t)0.5; + if (NEWTON_PAIR || j < nlocal) + ev_pre += (flt_t)0.5; + + if (EFLAG) { + evdwl = factor_lj * one_eng; + sevdwl += ev_pre * evdwl; + if (eatom) { + if (NEWTON_PAIR || i < nlocal) + fwtmp += (flt_t)0.5 * evdwl; + if (NEWTON_PAIR || j < nlocal) + f[j*2].w += (flt_t)0.5 * evdwl; + } + } + + if (vflag == 1) { + ev_pre *= (flt_t)-1.0; + sv0 += ev_pre * delx_form[jj] * fforce_0; + sv1 += ev_pre * dely_form[jj] * fforce_1; + sv2 += ev_pre * delz_form[jj] * fforce_2; + sv3 += ev_pre * delx_form[jj] * fforce_1; + sv4 += ev_pre * delx_form[jj] * fforce_2; + sv5 += ev_pre * dely_form[jj] * fforce_2; + } + } // EVFLAG + #ifdef __MIC__ + } + #endif + } // for jj + + // ------------------------------------------------------------- + + if (multiple_forms) + ierror = 2; + + int ip = i * 2; + f[ip].x += fxtmp; + f[ip].y += fytmp; + f[ip].z += fztmp; + ip++; + f[ip].x += t1tmp; + f[ip].y += t2tmp; + f[ip].z += t3tmp; + + if (EVFLAG) { + if (EFLAG) { + if (eatom) f[i * 2].w += fwtmp; + oevdwl += sevdwl; + } + if (vflag == 1) { + ov0 += sv0; + ov1 += sv1; + ov2 += sv2; + ov3 += sv3; + ov4 += sv4; + ov5 += sv5; + } + } + } // for i + int o_range; + if (NEWTON_PAIR) + o_range = nall; + else + o_range = nlocal; + if (offload == 0) o_range -= minlocal; + IP_PRE_omp_range_align(iifrom, iito, tid, o_range, nthreads, + sizeof(FORCE_T)); + const int two_iito = iito * 2; + + #if defined(_OPENMP) + #pragma omp barrier + #endif + + acc_t *facc = &(f_start[0].x); + const int sto = two_iito * 4; + const int fst4 = f_stride * 4; + #if defined(_OPENMP) + #pragma omp barrier + #endif + int t_off = f_stride; + if (EFLAG && eatom) { + for (int t = 1; t < nthreads; t++) { + #pragma vector nontemporal + for (int n = iifrom * 2; n < two_iito; n++) { + f_start[n].x += f_start[n + t_off].x; + f_start[n].y += f_start[n + t_off].y; + f_start[n].z += f_start[n + t_off].z; + f_start[n].w += f_start[n + t_off].w; + } + t_off += f_stride; + } + } else { + for (int t = 1; t < nthreads; t++) { + #pragma vector nontemporal + for (int n = iifrom * 2; n < two_iito; n++) { + f_start[n].x += f_start[n + t_off].x; + f_start[n].y += f_start[n + t_off].y; + f_start[n].z += f_start[n + t_off].z; + } + t_off += f_stride; + } + } + + if (EVFLAG) { + if (vflag==2) { + const ATOM_T * restrict const xo = x + minlocal; + #pragma vector nontemporal + for (int n = iifrom; n < iito; n++) { + const int nt2 = n * 2; + ov0 += f_start[nt2].x * xo[n].x; + ov1 += f_start[nt2].y * xo[n].y; + ov2 += f_start[nt2].z * xo[n].z; + ov3 += f_start[nt2].y * xo[n].x; + ov4 += f_start[nt2].z * xo[n].x; + ov5 += f_start[nt2].z * xo[n].y; + } + } + } + + if (ierror) + f_start[1].w = ierror; + } // omp + + if (EVFLAG) { + if (EFLAG) { + ev_global[0] = oevdwl; + ev_global[1] = (acc_t)0.0; + } + if (vflag) { + ev_global[2] = ov0; + ev_global[3] = ov1; + ev_global[4] = ov2; + ev_global[5] = ov3; + ev_global[6] = ov4; + ev_global[7] = ov5; + } + } + + #ifdef __MIC__ + *timer_compute = MIC_Wtime() - *timer_compute; + #endif + } // offload + + if (offload) + fix->stop_watch(TIME_OFFLOAD_LATENCY); + else + fix->stop_watch(TIME_HOST_PAIR); + + if (EVFLAG) + fix->add_result_array(f_start, ev_global, offload,eatom); + else + fix->add_result_array(f_start, 0, offload); +} + +/* ---------------------------------------------------------------------- */ + +void PairGayBerneIntel::init_style() +{ + PairGayBerne::init_style(); + neighbor->requests[neighbor->nrequest-1]->intel = 1; + + int ifix = modify->find_fix("package_intel"); + if (ifix < 0) + error->all(FLERR, + "The 'package intel' command is required for /intel styles"); + fix = static_cast(modify->fix[ifix]); + + #ifdef _LMP_INTEL_OFFLOAD + fix->set_offload_affinity(); + if (force->newton_pair) fix->set_offload_noghost(1); + _cop = fix->coprocessor_number(); + #endif + if (fix->precision() == FixIntel::PREC_MODE_MIXED) { + fix->get_mixed_buffers()->free_all_nbor_buffers(); + pack_force_const(force_const_single, fix->get_mixed_buffers()); + } else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) { + fix->get_double_buffers()->free_all_nbor_buffers(); + pack_force_const(force_const_double, fix->get_double_buffers()); + } else { + fix->get_single_buffers()->free_all_nbor_buffers(); + pack_force_const(force_const_single, fix->get_single_buffers()); + } +} + +/* ---------------------------------------------------------------------- */ + +template +void PairGayBerneIntel::pack_force_const(ForceConst &fc, + IntelBuffers *buffers) +{ + int tp1 = atom->ntypes + 1; + _max_nbors = buffers->get_max_nbors(); + int mthreads = comm->nthreads; + if (mthreads < buffers->get_off_threads()) + mthreads = buffers->get_off_threads(); + fc.set_ntypes(tp1, _max_nbors, mthreads, memory, _cop); + buffers->set_ntypes(tp1); + flt_t **cutneighsq = buffers->get_cutneighsq(); + + // Repeat cutsq calculation because done after call to init_style + double cut, cutneigh; + for (int i = 1; i <= atom->ntypes; i++) { + for (int j = i; j <= atom->ntypes; j++) { + if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) { + cut = init_one(i,j); + cutneigh = cut + neighbor->skin; + cutsq[i][j] = cutsq[j][i] = cut*cut; + cutneighsq[i][j] = cutneighsq[j][i] = cutneigh * cutneigh; + } + } + } + + for (int i = 0; i < 4; i++) { + fc.special_lj[i] = force->special_lj[i]; + fc.special_lj[0] = 1.0; + } + fc.gamma = gamma; + fc.upsilon = upsilon; + fc.mu = mu; + + for (int i = 0; i < tp1; i++) { + for (int j = 0; j < tp1; j++) { + fc.ijc[i][j].lj1 = lj1[i][j]; + fc.ijc[i][j].lj2 = lj2[i][j]; + fc.ijc[i][j].cutsq = cutsq[i][j]; + fc.ijc[i][j].offset = offset[i][j]; + fc.ijc[i][j].sigma = sigma[i][j]; + fc.ijc[i][j].epsilon = epsilon[i][j]; + fc.ijc[i][j].form = form[i][j]; + fc.ijc[i][j].lshape = lshape[i] * lshape[j]; + fc.lj34[i][j].lj3 = lj3[i][j]; + fc.lj34[i][j].lj4 = lj4[i][j]; + } + for (int j = 0; j < 4; j++) { + fc.ic[i].shape2[j] = shape2[i][j]; + fc.ic[i].well[j] = well[i][j]; + } + } + + #ifdef _LMP_INTEL_OFFLOAD + if (_cop < 0) return; + flt_t * special_lj = fc.special_lj; + FC_PACKED1_T *oijc = fc.ijc[0]; + FC_PACKED2_T *olj34 = fc.lj34[0]; + FC_PACKED3_T *oic = fc.ic; + flt_t * ocutneighsq = cutneighsq[0]; + int tp1sq = tp1 * tp1; + if (oijc != NULL && oic != NULL) { + #pragma offload_transfer target(mic:_cop) \ + in(special_lj: length(4) alloc_if(0) free_if(0)) \ + in(oijc,olj34: length(tp1sq) alloc_if(0) free_if(0)) \ + in(oic: length(tp1) alloc_if(0) free_if(0)) \ + in(ocutneighsq: length(tp1sq)) + } + #endif +} + +/* ---------------------------------------------------------------------- */ + +template +void PairGayBerneIntel::ForceConst::set_ntypes(const int ntypes, + const int one_length, + const int nthreads, + Memory *memory, + const int cop) { + if (ntypes != _ntypes) { + if (_ntypes > 0) { + fc_packed3 *oic = ic; + + #ifdef _LMP_INTEL_OFFLOAD + flt_t * ospecial_lj = special_lj; + fc_packed1 *oijc = ijc[0]; + fc_packed2 *olj34 = lj34[0]; + flt_t * orsq_form = rsq_form[0]; + flt_t * odelx_form = delx_form[0]; + flt_t * odely_form = dely_form[0]; + flt_t * odelz_form = delz_form[0]; + int * ojtype_form = jtype_form[0]; + int * ojlist_form = jlist_form[0]; + + if (ospecial_lj != NULL && oijc != NULL && olj34 != NULL && + orsq_form != NULL && odelx_form != NULL && odely_form != NULL && + odelz_form != NULL && ojtype_form != NULL && ojlist_form != NULL && + _cop >= 0) { + #pragma offload_transfer target(mic:_cop) \ + nocopy(ospecial_lj, oijc, olj34, oic: alloc_if(0) free_if(1)) \ + nocopy(orsq_form, odelx_form, odely_form: alloc_if(0) free_if(1)) \ + nocopy(odelz_form, ojtype_form, ojlist_form: alloc_if(0) free_if(1)) + } + #endif + + _memory->destroy(oic); + _memory->destroy(ijc); + _memory->destroy(lj34); + _memory->destroy(rsq_form); + _memory->destroy(delx_form); + _memory->destroy(dely_form); + _memory->destroy(delz_form); + _memory->destroy(jtype_form); + _memory->destroy(jlist_form); + } + + if (ntypes > 0) { + _cop = cop; + memory->create(ijc, ntypes, ntypes, "fc.ijc"); + memory->create(lj34, ntypes, ntypes, "fc.lj34"); + memory->create(ic, ntypes, "fc.ic"); + memory->create(rsq_form, nthreads, one_length, "rsq_form"); + memory->create(delx_form, nthreads, one_length, "delx_form"); + memory->create(dely_form, nthreads, one_length, "dely_form"); + memory->create(delz_form, nthreads, one_length, "delz_form"); + memory->create(jtype_form, nthreads, one_length, "jtype_form"); + memory->create(jlist_form, nthreads, one_length, "jlist_form"); + + for (int zn = 0; zn < nthreads; zn++) + for (int zo = 0; zo < one_length; zo++) { + rsq_form[zn][zo] = 10.0; + delx_form[zn][zo] = 10.0; + dely_form[zn][zo] = 10.0; + delz_form[zn][zo] = 10.0; + jtype_form[zn][zo] = 1; + jlist_form[zn][zo] = 0; + } + + #ifdef _LMP_INTEL_OFFLOAD + flt_t * ospecial_lj = special_lj; + fc_packed1 *oijc = ijc[0]; + fc_packed2 *olj34 = lj34[0]; + fc_packed3 *oic = ic; + flt_t * orsq_form = rsq_form[0]; + flt_t * odelx_form = delx_form[0]; + flt_t * odely_form = dely_form[0]; + flt_t * odelz_form = delz_form[0]; + int * ojtype_form = jtype_form[0]; + int * ojlist_form = jlist_form[0]; + int off_onel = one_length * nthreads; + + int tp1sq = ntypes*ntypes; + if (ospecial_lj != NULL && oijc != NULL && olj34 != NULL && + oic != NULL && orsq_form != NULL && odelx_form != NULL && + odely_form != NULL && odelz_form != NULL && ojtype_form !=NULL && + ojlist_form !=NULL && cop >= 0) { + #pragma offload_transfer target(mic:cop) \ + nocopy(ospecial_lj: length(4) alloc_if(1) free_if(0)) \ + nocopy(oijc,olj34: length(tp1sq) alloc_if(1) free_if(0)) \ + nocopy(oic: length(ntypes) alloc_if(1) free_if(0)) \ + in(orsq_form: length(off_onel) alloc_if(1) free_if(0)) \ + in(odelx_form: length(off_onel) alloc_if(1) free_if(0)) \ + in(odely_form: length(off_onel) alloc_if(1) free_if(0)) \ + in(odelz_form: length(off_onel) alloc_if(1) free_if(0)) \ + in(ojtype_form: length(off_onel) alloc_if(1) free_if(0)) \ + in(ojlist_form: length(off_onel) alloc_if(1) free_if(0)) + } + #endif + } + } + _ntypes = ntypes; + _memory = memory; +} diff --git a/src/USER-INTEL/pair_gayberne_intel.h b/src/USER-INTEL/pair_gayberne_intel.h new file mode 100644 index 000000000..eb055e151 --- /dev/null +++ b/src/USER-INTEL/pair_gayberne_intel.h @@ -0,0 +1,99 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: W. Michael Brown (Intel) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(gayberne/intel,PairGayBerneIntel) + +#else + +#ifndef LMP_PAIR_GAYBERNE_INTEL_H +#define LMP_PAIR_GAYBERNE_INTEL_H + +#include "pair_gayberne.h" +#include "fix_intel.h" + +namespace LAMMPS_NS { + +class PairGayBerneIntel : public PairGayBerne { + + public: + PairGayBerneIntel(class LAMMPS *); + + virtual void compute(int, int); + void init_style(); + + private: + template class ForceConst; + + template + void compute(int eflag, int vflag, IntelBuffers *buffers, + const ForceConst &fc); + template + void eval(const int offload, const int vflag, + IntelBuffers * buffers, + const ForceConst &fc, const int astart, const int aend); + + template + void pack_force_const(ForceConst &fc, + IntelBuffers *buffers); + + template + class ForceConst { + public: + typedef struct { + flt_t cutsq, lj1, lj2, offset, sigma, epsilon, lshape; + int form; + } fc_packed1; + typedef struct { flt_t lj3, lj4; } fc_packed2; + typedef struct { flt_t shape2[4], well[4]; } fc_packed3; + + __declspec(align(64)) flt_t special_lj[4], gamma, upsilon, mu; + fc_packed1 **ijc; + fc_packed2 **lj34; + fc_packed3 *ic; + + flt_t **rsq_form, **delx_form, **dely_form, **delz_form; + int **jtype_form, **jlist_form; + + ForceConst() : _ntypes(0) {} + ~ForceConst() { set_ntypes(0, 0, 0, NULL, _cop); } + + void set_ntypes(const int ntypes, const int one_length, + const int nthreads, Memory *memory, const int cop); + + private: + int _ntypes, _cop; + Memory *_memory; + }; + + ForceConst force_const_single; + ForceConst force_const_double; + int _max_nbors; + + double gayberne_lj(const int i, const int j, double a1[3][3], + double b1[3][3], double g1[3][3], double *r12, + const double rsq, double *fforce, double *ttor); + + FixIntel *fix; + int _cop; +}; + +} + +#endif +#endif diff --git a/src/USER-INTEL/pair_lj_charmm_coul_long_intel.cpp b/src/USER-INTEL/pair_lj_charmm_coul_long_intel.cpp new file mode 100644 index 000000000..576d5b21c --- /dev/null +++ b/src/USER-INTEL/pair_lj_charmm_coul_long_intel.cpp @@ -0,0 +1,675 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: W. Michael Brown (Intel) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_lj_charmm_coul_long_intel.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "group.h" +#include "kspace.h" +#include "memory.h" +#include "modify.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "neigh_request.h" +#include "memory.h" +#include "suffix.h" +using namespace LAMMPS_NS; + +#define LJ_T typename IntelBuffers::vec4_t +#define TABLE_T typename ForceConst::table_t + +/* ---------------------------------------------------------------------- */ + +PairLJCharmmCoulLongIntel::PairLJCharmmCoulLongIntel(LAMMPS *lmp) : + PairLJCharmmCoulLong(lmp) +{ + suffix_flag |= Suffix::INTEL; + respa_enable = 0; + cut_respa = NULL; +} + +/* ---------------------------------------------------------------------- */ + +PairLJCharmmCoulLongIntel::~PairLJCharmmCoulLongIntel() +{ +} + +/* ---------------------------------------------------------------------- */ + +void PairLJCharmmCoulLongIntel::compute(int eflag, int vflag) +{ + if (fix->precision()==FixIntel::PREC_MODE_MIXED) + compute(eflag, vflag, fix->get_mixed_buffers(), + force_const_single); + else if (fix->precision()==FixIntel::PREC_MODE_DOUBLE) + compute(eflag, vflag, fix->get_double_buffers(), + force_const_double); + else + compute(eflag, vflag, fix->get_single_buffers(), + force_const_single); + + fix->balance_stamp(); + vflag_fdotr = 0; +} + +template +void PairLJCharmmCoulLongIntel::compute(int eflag, int vflag, + IntelBuffers *buffers, + const ForceConst &fc) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int inum = list->inum; + const int nthreads = comm->nthreads; + const int host_start = fix->host_start_pair(); + const int offload_end = fix->offload_end_pair(); + const int ago = neighbor->ago; + + if (ago != 0 && fix->separate_buffers() == 0) { + fix->start_watch(TIME_PACK); + #if defined(_OPENMP) + #pragma omp parallel default(none) shared(eflag,vflag,buffers,fc) + #endif + { + int ifrom, ito, tid; + IP_PRE_omp_range_id_align(ifrom, ito, tid, atom->nlocal+atom->nghost, + nthreads, sizeof(ATOM_T)); + buffers->thr_pack(ifrom,ito,ago); + } + fix->stop_watch(TIME_PACK); + } + + // -------------------- Regular version + if (evflag || vflag_fdotr) { + int ovflag = 0; + if (vflag_fdotr) ovflag = 2; + else if (vflag) ovflag = 1; + if (eflag) { + if (force->newton_pair) { + eval<1,1,1>(1, ovflag, buffers, fc, 0, offload_end); + eval<1,1,1>(0, ovflag, buffers, fc, host_start, inum); + } else { + eval<1,1,0>(1, ovflag, buffers, fc, 0, offload_end); + eval<1,1,0>(0, ovflag, buffers, fc, host_start, inum); + } + } else { + if (force->newton_pair) { + eval<1,0,1>(1, ovflag, buffers, fc, 0, offload_end); + eval<1,0,1>(0, ovflag, buffers, fc, host_start, inum); + } else { + eval<1,0,0>(1, ovflag, buffers, fc, 0, offload_end); + eval<1,0,0>(0, ovflag, buffers, fc, host_start, inum); + } + } + } else { + if (force->newton_pair) { + eval<0,0,1>(1, 0, buffers, fc, 0, offload_end); + eval<0,0,1>(0, 0, buffers, fc, host_start, inum); + } else { + eval<0,0,0>(1, 0, buffers, fc, 0, offload_end); + eval<0,0,0>(0, 0, buffers, fc, host_start, inum); + } + } +} + +/* ---------------------------------------------------------------------- */ + +template +void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag, + IntelBuffers *buffers, + const ForceConst &fc, + const int astart, const int aend) +{ + const int inum = aend - astart; + if (inum == 0) return; + int nlocal, nall, minlocal; + fix->get_buffern(offload, nlocal, nall, minlocal); + + const int ago = neighbor->ago; + IP_PRE_pack_separate_buffers(fix, buffers, ago, offload, nlocal, nall); + + ATOM_T * restrict const x = buffers->get_x(offload); + flt_t * restrict const q = buffers->get_q(offload); + + const int * restrict const numneigh = list->numneigh; + const int * restrict const cnumneigh = buffers->cnumneigh(list); + const int * restrict const firstneigh = buffers->firstneigh(list); + + const flt_t * restrict const special_coul = fc.special_coul; + const flt_t * restrict const special_lj = fc.special_lj; + const flt_t qqrd2e = force->qqrd2e; + const flt_t inv_denom_lj = (flt_t)1.0/denom_lj; + + const flt_t * restrict const cutsq = fc.cutsq[0]; + const LJ_T * restrict const lj = fc.lj[0]; + const TABLE_T * restrict const table = fc.table; + const flt_t * restrict const etable = fc.etable; + const flt_t * restrict const detable = fc.detable; + const flt_t * restrict const ctable = fc.ctable; + const flt_t * restrict const dctable = fc.dctable; + const flt_t cut_ljsq = fc.cut_ljsq; + const flt_t cut_lj_innersq = fc.cut_lj_innersq; + const flt_t cut_coulsq = fc.cut_coulsq; + const flt_t g_ewald = fc.g_ewald; + const flt_t tabinnersq = fc.tabinnersq; + + const int ntypes = atom->ntypes + 1; + const int eatom = this->eflag_atom; + + // Determine how much data to transfer + int x_size, q_size, f_stride, ev_size, separate_flag; + IP_PRE_get_transfern(ago, NEWTON_PAIR, EVFLAG, EFLAG, vflag, + buffers, offload, fix, separate_flag, + x_size, q_size, ev_size, f_stride); + + int tc; + FORCE_T * restrict f_start; + acc_t * restrict ev_global; + IP_PRE_get_buffers(offload, buffers, fix, tc, f_start, ev_global); + + const int nthreads = tc; + #ifdef _LMP_INTEL_OFFLOAD + int *overflow = fix->get_off_overflow_flag(); + double *timer_compute = fix->off_watch_pair(); + // Redeclare as local variables for offload + const int ncoultablebits = this->ncoultablebits; + const int ncoulmask = this->ncoulmask; + const int ncoulshiftbits = this->ncoulshiftbits; + #ifdef INTEL_ALLOW_TABLE + #define ITABLE_IN in(table,etable,detable:length(0) alloc_if(0) free_if(0)) \ + in(ctable,dctable:length(0) alloc_if(0) free_if(0)) \ + in(ncoultablebits,tabinnersq,ncoulmask,ncoulshiftbits) + #else + #define ITABLE_IN + #endif + + if (offload) fix->start_watch(TIME_OFFLOAD_LATENCY); + #pragma offload target(mic:_cop) if(offload) \ + in(special_lj,special_coul:length(0) alloc_if(0) free_if(0)) \ + in(cutsq,lj:length(0) alloc_if(0) free_if(0)) \ + in(firstneigh:length(0) alloc_if(0) free_if(0)) \ + in(cnumneigh:length(0) alloc_if(0) free_if(0)) \ + in(numneigh:length(0) alloc_if(0) free_if(0)) \ + in(x:length(x_size) alloc_if(0) free_if(0)) \ + in(q:length(q_size) alloc_if(0) free_if(0)) \ + in(overflow:length(0) alloc_if(0) free_if(0)) \ + in(nthreads,qqrd2e,g_ewald,inum,nall,ntypes,cut_coulsq,vflag,eatom) \ + in(f_stride,separate_flag,offload) \ + in(astart,cut_ljsq,cut_lj_innersq,nlocal,inv_denom_lj,minlocal) \ + out(f_start:length(f_stride) alloc_if(0) free_if(0)) \ + out(ev_global:length(ev_size) alloc_if(0) free_if(0)) \ + out(timer_compute:length(1) alloc_if(0) free_if(0)) \ + ITABLE_IN signal(f_start) + #endif + { + #ifdef __MIC__ + *timer_compute = MIC_Wtime(); + #endif + + IP_PRE_repack_for_offload(NEWTON_PAIR, separate_flag, nlocal, nall, + f_stride, x, q); + + acc_t oevdwl, oecoul, ov0, ov1, ov2, ov3, ov4, ov5; + if (EVFLAG) { + oevdwl = oecoul = (acc_t)0; + if (vflag) ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0; + } + + // loop over neighbors of my atoms + #if defined(_OPENMP) + #pragma omp parallel default(none) \ + shared(f_start,f_stride,nlocal,nall,minlocal) \ + reduction(+:oevdwl,oecoul,ov0,ov1,ov2,ov3,ov4,ov5) + #endif + { + int iifrom, iito, tid; + IP_PRE_omp_range_id(iifrom, iito, tid, inum, nthreads); + iifrom += astart; + iito += astart; + + FORCE_T * restrict const f = f_start - minlocal + (tid * f_stride); + memset(f + minlocal, 0, f_stride * sizeof(FORCE_T)); + flt_t cutboth = cut_coulsq; + + for (int i = iifrom; i < iito; ++i) { + // const int i = ilist[ii]; + const int itype = x[i].w; + + const int ptr_off = itype * ntypes; + const flt_t * restrict const cutsqi = cutsq + ptr_off; + const LJ_T * restrict const lji = lj + ptr_off; + + const int * restrict const jlist = firstneigh + cnumneigh[i]; + const int jnum = numneigh[i]; + + acc_t fxtmp,fytmp,fztmp,fwtmp; + acc_t sevdwl, secoul, sv0, sv1, sv2, sv3, sv4, sv5; + + const flt_t xtmp = x[i].x; + const flt_t ytmp = x[i].y; + const flt_t ztmp = x[i].z; + const flt_t qtmp = q[i]; + fxtmp = fytmp = fztmp = (acc_t)0; + if (EVFLAG) { + if (EFLAG) fwtmp = sevdwl = secoul = (acc_t)0; + if (vflag==1) sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0; + } + + #pragma vector aligned + #pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, secoul, \ + sv0, sv1, sv2, sv3, sv4, sv5) + for (int jj = 0; jj < jnum; jj++) { + flt_t forcecoul, forcelj, evdwl, ecoul; + forcecoul = forcelj = evdwl = ecoul = (flt_t)0.0; + + const int sbindex = jlist[jj] >> SBBITS & 3; + const int j = jlist[jj] & NEIGHMASK; + + const flt_t delx = xtmp - x[j].x; + const flt_t dely = ytmp - x[j].y; + const flt_t delz = ztmp - x[j].z; + const int jtype = x[j].w; + const flt_t rsq = delx * delx + dely * dely + delz * delz; + const flt_t r2inv = (flt_t)1.0 / rsq; + + #ifdef __MIC__ + if (rsq < cut_coulsq) { + #endif + #ifdef INTEL_ALLOW_TABLE + if (!ncoultablebits || rsq <= tabinnersq) { + #endif + const flt_t A1 = 0.254829592; + const flt_t A2 = -0.284496736; + const flt_t A3 = 1.421413741; + const flt_t A4 = -1.453152027; + const flt_t A5 = 1.061405429; + const flt_t EWALD_F = 1.12837917; + const flt_t INV_EWALD_P = 1.0 / 0.3275911; + + const flt_t r = sqrt(rsq); + const flt_t grij = g_ewald * r; + const flt_t expm2 = exp(-grij * grij); + const flt_t t = INV_EWALD_P / (INV_EWALD_P + grij); + const flt_t erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; + const flt_t prefactor = qqrd2e * qtmp * q[j] / r; + forcecoul = prefactor * (erfc + EWALD_F * grij * expm2); + if (EFLAG) ecoul = prefactor * erfc; + if (sbindex) { + const flt_t adjust = ((flt_t)1.0 - special_coul[sbindex])* + prefactor; + forcecoul -= adjust; + if (EFLAG) ecoul -= adjust; + } + #ifdef INTEL_ALLOW_TABLE + } else { + float rsq_lookup = rsq; + const int itable = (__intel_castf32_u32(rsq_lookup) & + ncoulmask) >> ncoulshiftbits; + const flt_t fraction = (rsq_lookup - table[itable].r) * + table[itable].dr; + + const flt_t tablet = table[itable].f + + fraction * table[itable].df; + forcecoul = qtmp * q[j] * tablet; + if (EFLAG) ecoul = qtmp * q[j] * (etable[itable] + + fraction * detable[itable]); + if (sbindex) { + const flt_t table2 = ctable[itable] + + fraction * dctable[itable]; + const flt_t prefactor = qtmp * q[j] * table2; + const flt_t adjust = ((flt_t)1.0 - special_coul[sbindex]) * + prefactor; + forcecoul -= adjust; + if (EFLAG) ecoul -= adjust; + } + } + #endif + #ifdef __MIC__ + } + #endif + + #ifdef __MIC__ + if (rsq < cut_ljsq) { + #endif + flt_t r6inv = r2inv * r2inv * r2inv; + forcelj = r6inv * (lji[jtype].x * r6inv - lji[jtype].y); + if (EFLAG) evdwl = r6inv*(lji[jtype].z * r6inv - lji[jtype].w); + + #ifdef __MIC__ + if (rsq > cut_lj_innersq) { + #endif + const flt_t drsq = cut_ljsq - rsq; + const flt_t cut2 = (rsq - cut_lj_innersq) * drsq; + const flt_t switch1 = drsq * (drsq * drsq + (flt_t)3.0 * cut2) * + inv_denom_lj; + const flt_t switch2 = (flt_t)12.0 * rsq * cut2 * inv_denom_lj; + if (EFLAG) { + #ifndef __MIC__ + if (rsq > cut_lj_innersq) { + #endif + forcelj = forcelj * switch1 + evdwl * switch2; + evdwl *= switch1; + #ifndef __MIC__ + } + #endif + } else { + const flt_t philj = r6inv * (lji[jtype].z*r6inv - + lji[jtype].w); + #ifndef __MIC__ + if (rsq > cut_lj_innersq) + #endif + forcelj = forcelj * switch1 + philj * switch2; + } + #ifdef __MIC__ + } + #endif + + if (sbindex) { + const flt_t factor_lj = special_lj[sbindex]; + forcelj *= factor_lj; + if (EFLAG) evdwl *= factor_lj; + } + #ifdef __MIC__ + } + #else + if (rsq > cut_coulsq) { forcecoul = (flt_t)0.0; ecoul = (flt_t)0.0; } + if (rsq > cut_ljsq) { forcelj = (flt_t)0.0; evdwl = (flt_t)0.0; } + #endif + + #ifdef __MIC__ + if (rsq < cut_coulsq) { + #endif + const flt_t fpair = (forcecoul + forcelj) * r2inv; + fxtmp += delx * fpair; + fytmp += dely * fpair; + fztmp += delz * fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j].x -= delx * fpair; + f[j].y -= dely * fpair; + f[j].z -= delz * fpair; + } + + if (EVFLAG) { + flt_t ev_pre = (flt_t)0; + if (NEWTON_PAIR || i < nlocal) + ev_pre += (flt_t)0.5; + if (NEWTON_PAIR || j < nlocal) + ev_pre += (flt_t)0.5; + + if (EFLAG) { + sevdwl += ev_pre * evdwl; + secoul += ev_pre * ecoul; + if (eatom) { + if (NEWTON_PAIR || i < nlocal) + fwtmp += (flt_t)0.5 * evdwl + (flt_t)0.5 * ecoul; + if (NEWTON_PAIR || j < nlocal) + f[j].w += (flt_t)0.5 * evdwl + (flt_t)0.5 * ecoul; + } + } + + IP_PRE_ev_tally_nbor(vflag, ev_pre, fpair, + delx, dely, delz); + } + #ifdef __MIC__ + } + #endif + } // for jj + f[i].x += fxtmp; + f[i].y += fytmp; + f[i].z += fztmp; + + IP_PRE_ev_tally_atomq(EVFLAG, EFLAG, vflag, f, fwtmp); + } // for ii + + #if defined(_OPENMP) + #pragma omp barrier + #endif + IP_PRE_fdotr_acc_force(NEWTON_PAIR, EVFLAG, EFLAG, vflag, eatom, nall, + nlocal, minlocal, nthreads, f_start, f_stride, + x); + } // end of omp parallel region + if (EVFLAG) { + if (EFLAG) { + ev_global[0] = oevdwl; + ev_global[1] = oecoul; + } + if (vflag) { + ev_global[2] = ov0; + ev_global[3] = ov1; + ev_global[4] = ov2; + ev_global[5] = ov3; + ev_global[6] = ov4; + ev_global[7] = ov5; + } + } + #ifdef __MIC__ + *timer_compute = MIC_Wtime() - *timer_compute; + #endif + } // end of offload region + + if (offload) + fix->stop_watch(TIME_OFFLOAD_LATENCY); + else + fix->stop_watch(TIME_HOST_PAIR); + + if (EVFLAG) + fix->add_result_array(f_start, ev_global, offload, eatom); + else + fix->add_result_array(f_start, 0, offload); +} + +/* ---------------------------------------------------------------------- */ + +void PairLJCharmmCoulLongIntel::init_style() +{ + PairLJCharmmCoulLong::init_style(); + neighbor->requests[neighbor->nrequest-1]->intel = 1; + + int ifix = modify->find_fix("package_intel"); + if (ifix < 0) + error->all(FLERR, + "The 'package intel' command is required for /intel styles"); + fix = static_cast(modify->fix[ifix]); + + #ifdef _LMP_INTEL_OFFLOAD + fix->set_offload_affinity(); + _cop = fix->coprocessor_number(); + #endif + if (fix->precision() == FixIntel::PREC_MODE_MIXED) { + fix->get_mixed_buffers()->free_all_nbor_buffers(); + pack_force_const(force_const_single, fix->get_mixed_buffers()); + } else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) { + fix->get_double_buffers()->free_all_nbor_buffers(); + pack_force_const(force_const_double, fix->get_double_buffers()); + } else { + fix->get_single_buffers()->free_all_nbor_buffers(); + pack_force_const(force_const_single, fix->get_single_buffers()); + } +} + +template +void PairLJCharmmCoulLongIntel::pack_force_const(ForceConst &fc, + IntelBuffers *buffers) +{ + int tp1 = atom->ntypes + 1; + int ntable = 1; + if (ncoultablebits) + for (int i = 0; i < ncoultablebits; i++) ntable *= 2; + + fc.set_ntypes(tp1, ntable, memory, _cop); + buffers->set_ntypes(tp1); + flt_t **cutneighsq = buffers->get_cutneighsq(); + + // Repeat cutsq calculation because done after call to init_style + double cut, cutneigh; + if (cut_lj > cut_coul) + error->all(FLERR, + "Intel varient of lj/charmm/coul/long expects lj cutoff<=coulombic"); + for (int i = 1; i <= atom->ntypes; i++) { + for (int j = i; j <= atom->ntypes; j++) { + if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) { + cut = init_one(i, j); + cutneigh = cut + neighbor->skin; + cutsq[i][j] = cutsq[j][i] = cut*cut; + cutneighsq[i][j] = cutneighsq[j][i] = cutneigh * cutneigh; + } + } + } + + cut_lj_innersq = cut_lj_inner * cut_lj_inner; + cut_ljsq = cut_lj * cut_lj; + cut_coulsq = cut_coul * cut_coul; + cut_bothsq = MAX(cut_ljsq, cut_coulsq); + + fc.g_ewald = force->kspace->g_ewald; + fc.tabinnersq = tabinnersq; + fc.cut_coulsq = cut_coulsq; + fc.cut_ljsq = cut_ljsq; + fc.cut_lj_innersq = cut_lj_innersq; + + for (int i = 0; i < 4; i++) { + fc.special_lj[i] = force->special_lj[i]; + fc.special_coul[i] = force->special_coul[i]; + fc.special_coul[0] = 1.0; + fc.special_lj[0] = 1.0; + } + + for (int i = 0; i < tp1; i++) { + for (int j = 0; j < tp1; j++) { + fc.lj[i][j].x = lj1[i][j]; + fc.lj[i][j].y = lj2[i][j]; + fc.lj[i][j].z = lj3[i][j]; + fc.lj[i][j].w = lj4[i][j]; + fc.cutsq[i][j] = cutsq[i][j]; + } + } + + if (ncoultablebits) { + for (int i = 0; i < ntable; i++) { + fc.table[i].r = rtable[i]; + fc.table[i].dr = drtable[i]; + fc.table[i].f = ftable[i]; + fc.table[i].df = dftable[i]; + fc.etable[i] = etable[i]; + fc.detable[i] = detable[i]; + fc.ctable[i] = ctable[i]; + fc.dctable[i] = dctable[i]; + } + } + + #ifdef _LMP_INTEL_OFFLOAD + if (_cop < 0) return; + flt_t * special_lj = fc.special_lj; + flt_t * special_coul = fc.special_coul; + flt_t * cutsq = fc.cutsq[0]; + LJ_T * lj = fc.lj[0]; + TABLE_T * table = fc.table; + flt_t * etable = fc.etable; + flt_t * detable = fc.detable; + flt_t * ctable = fc.ctable; + flt_t * dctable = fc.dctable; + flt_t * ocutneighsq = cutneighsq[0]; + int tp1sq = tp1 * tp1; + #pragma offload_transfer target(mic:_cop) \ + in(special_lj, special_coul: length(4) alloc_if(0) free_if(0)) \ + in(cutsq,lj: length(tp1sq) alloc_if(0) free_if(0)) \ + in(table: length(ntable) alloc_if(0) free_if(0)) \ + in(etable,detable,ctable,dctable: length(ntable) alloc_if(0) free_if(0)) \ + in(ocutneighsq: length(tp1sq) alloc_if(0) free_if(0)) + #endif +} + +/* ---------------------------------------------------------------------- */ + +template +void PairLJCharmmCoulLongIntel::ForceConst::set_ntypes(const int ntypes, + const int ntable, + Memory *memory, + const int cop) { + if ( (ntypes != _ntypes || ntable != _ntable) ) { + if (_ntypes > 0) { + #ifdef _LMP_INTEL_OFFLOAD + flt_t * ospecial_lj = special_lj; + flt_t * ospecial_coul = special_coul; + flt_t * ocutsq = cutsq[0]; + typename IntelBuffers::vec4_t * olj = lj[0]; + table_t * otable = table; + flt_t * oetable = etable; + flt_t * odetable = detable; + flt_t * octable = ctable; + flt_t * odctable = dctable; + if (ospecial_lj != NULL && ocutsq != NULL && olj != NULL && + otable != NULL && oetable != NULL && odetable != NULL && + octable != NULL && odctable != NULL && ospecial_coul != NULL && + cop >= 0) { + #pragma offload_transfer target(mic:cop) \ + nocopy(ospecial_lj, ospecial_coul: alloc_if(0) free_if(1)) \ + nocopy(ocutsq, olj: alloc_if(0) free_if(1)) \ + nocopy(otable: alloc_if(0) free_if(1)) \ + nocopy(oetable, odetable, octable, odctable: alloc_if(0) free_if(1)) + } + #endif + + _memory->destroy(cutsq); + _memory->destroy(lj); + _memory->destroy(table); + _memory->destroy(etable); + _memory->destroy(detable); + _memory->destroy(ctable); + _memory->destroy(dctable); + } + if (ntypes > 0) { + _cop = cop; + memory->create(cutsq,ntypes,ntypes,"fc.cutsq"); + memory->create(lj,ntypes,ntypes,"fc.lj"); + memory->create(table,ntable,"pair:fc.table"); + memory->create(etable,ntable,"pair:fc.etable"); + memory->create(detable,ntable,"pair:fc.detable"); + memory->create(ctable,ntable,"pair:fc.ctable"); + memory->create(dctable,ntable,"pair:fc.dctable"); + + #ifdef _LMP_INTEL_OFFLOAD + flt_t * ospecial_lj = special_lj; + flt_t * ospecial_coul = special_coul; + flt_t * ocutsq = cutsq[0]; + typename IntelBuffers::vec4_t * olj = lj[0]; + table_t * otable = table; + flt_t * oetable = etable; + flt_t * odetable = detable; + flt_t * octable = ctable; + flt_t * odctable = dctable; + int tp1sq = ntypes*ntypes; + if (ospecial_lj != NULL && ocutsq != NULL && olj != NULL && + otable !=NULL && oetable != NULL && odetable != NULL && + octable != NULL && odctable != NULL && ospecial_coul != NULL && + cop >= 0) { + #pragma offload_transfer target(mic:cop) \ + nocopy(ospecial_lj: length(4) alloc_if(1) free_if(0)) \ + nocopy(ospecial_coul: length(4) alloc_if(1) free_if(0)) \ + nocopy(ocutsq,olj: length(tp1sq) alloc_if(1) free_if(0)) \ + nocopy(otable: length(ntable) alloc_if(1) free_if(0)) \ + nocopy(oetable,odetable: length(ntable) alloc_if(1) free_if(0)) \ + nocopy(octable,odctable: length(ntable) alloc_if(1) free_if(0)) + } + #endif + } + } + _ntypes=ntypes; + _ntable=ntable; + _memory=memory; +} diff --git a/src/USER-INTEL/pair_lj_charmm_coul_long_intel.h b/src/USER-INTEL/pair_lj_charmm_coul_long_intel.h new file mode 100644 index 000000000..ad66c786b --- /dev/null +++ b/src/USER-INTEL/pair_lj_charmm_coul_long_intel.h @@ -0,0 +1,104 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: W. Michael Brown (Intel) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj/charmm/coul/long/intel,PairLJCharmmCoulLongIntel) + +#else + +#ifndef LMP_PAIR_LJ_CHARMM_COUL_LONG_INTEL_H +#define LMP_PAIR_LJ_CHARMM_COUL_LONG_INTEL_H + +#include "pair_lj_charmm_coul_long.h" +#include "fix_intel.h" + +namespace LAMMPS_NS { + +class PairLJCharmmCoulLongIntel : public PairLJCharmmCoulLong { + + public: + PairLJCharmmCoulLongIntel(class LAMMPS *); + virtual ~PairLJCharmmCoulLongIntel(); + + virtual void compute(int, int); + void init_style(); + + typedef struct { float x,y,z; int w; } sng4_t; + + private: + FixIntel *fix; + int _cop; + + template class ForceConst; + template + void compute(int eflag, int vflag, IntelBuffers *buffers, + const ForceConst &fc); + template + void eval(const int offload, const int vflag, + IntelBuffers * buffers, + const ForceConst &fc, const int astart, const int aend); + + template + void pack_force_const(ForceConst &fc, + IntelBuffers *buffers); + + // ---------------------------------------------------------------------- + template + class ForceConst { + public: + typedef struct { flt_t r, dr, f, df; } table_t; + __declspec(align(64)) flt_t special_coul[4]; + __declspec(align(64)) flt_t special_lj[4]; + flt_t **cutsq, g_ewald, tabinnersq; + flt_t cut_coulsq, cut_ljsq; + flt_t cut_lj_innersq; + table_t *table; + flt_t *etable, *detable, *ctable, *dctable; + typename IntelBuffers::vec4_t **lj; + + ForceConst() : _ntypes(0), _ntable(0) {} + ~ForceConst() { set_ntypes(0,0,NULL,_cop); } + + void set_ntypes(const int ntypes, const int ntable, Memory *memory, + const int cop); + + private: + int _ntypes, _ntable, _cop; + Memory *_memory; + }; + ForceConst force_const_single; + ForceConst force_const_double; +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: The 'package intel' command is required for /intel styles + +Self-explanatory. + +E: Intel varient of lj/charmm/coul/long expects lj cutoff<=coulombic + +The intel accelerated version of the CHARMM style requires that the +Lennard-Jones cutoff is not greater than the coulombic cutoff. + +*/ diff --git a/src/USER-INTEL/pair_lj_cut_coul_long_intel.cpp b/src/USER-INTEL/pair_lj_cut_coul_long_intel.cpp new file mode 100644 index 000000000..4163a1f7d --- /dev/null +++ b/src/USER-INTEL/pair_lj_cut_coul_long_intel.cpp @@ -0,0 +1,634 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: W. Michael Brown (Intel) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_lj_cut_coul_long_intel.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "group.h" +#include "kspace.h" +#include "memory.h" +#include "modify.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "neigh_request.h" +#include "memory.h" +#include "suffix.h" +using namespace LAMMPS_NS; + +#define C_FORCE_T typename ForceConst::c_force_t +#define C_ENERGY_T typename ForceConst::c_energy_t +#define TABLE_T typename ForceConst::table_t + +/* ---------------------------------------------------------------------- */ + +PairLJCutCoulLongIntel::PairLJCutCoulLongIntel(LAMMPS *lmp) : + PairLJCutCoulLong(lmp) +{ + suffix_flag |= Suffix::INTEL; + respa_enable = 0; + cut_respa = NULL; +} + +/* ---------------------------------------------------------------------- */ + +PairLJCutCoulLongIntel::~PairLJCutCoulLongIntel() +{ +} + +/* ---------------------------------------------------------------------- */ + +void PairLJCutCoulLongIntel::compute(int eflag, int vflag) +{ + if (fix->precision()==FixIntel::PREC_MODE_MIXED) + compute(eflag, vflag, fix->get_mixed_buffers(), + force_const_single); + else if (fix->precision()==FixIntel::PREC_MODE_DOUBLE) + compute(eflag, vflag, fix->get_double_buffers(), + force_const_double); + else + compute(eflag, vflag, fix->get_single_buffers(), + force_const_single); + + fix->balance_stamp(); + vflag_fdotr = 0; +} + +template +void PairLJCutCoulLongIntel::compute(int eflag, int vflag, + IntelBuffers *buffers, + const ForceConst &fc) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int inum = list->inum; + const int nthreads = comm->nthreads; + const int host_start = fix->host_start_pair(); + const int offload_end = fix->offload_end_pair(); + const int ago = neighbor->ago; + + if (ago != 0 && fix->separate_buffers() == 0) { + fix->start_watch(TIME_PACK); + #if defined(_OPENMP) + #pragma omp parallel default(none) shared(eflag,vflag,buffers,fc) + #endif + { + int ifrom, ito, tid; + IP_PRE_omp_range_id_align(ifrom, ito, tid, atom->nlocal + atom->nghost, + nthreads, sizeof(ATOM_T)); + buffers->thr_pack(ifrom,ito,ago); + } + fix->stop_watch(TIME_PACK); + } + + if (evflag || vflag_fdotr) { + int ovflag = 0; + if (vflag_fdotr) ovflag = 2; + else if (vflag) ovflag = 1; + if (eflag) { + if (force->newton_pair) { + eval<1,1,1>(1, ovflag, buffers, fc, 0, offload_end); + eval<1,1,1>(0, ovflag, buffers, fc, host_start, inum); + } else { + eval<1,1,0>(1, ovflag, buffers, fc, 0, offload_end); + eval<1,1,0>(0, ovflag, buffers, fc, host_start, inum); + } + } else { + if (force->newton_pair) { + eval<1,0,1>(1, ovflag, buffers, fc, 0, offload_end); + eval<1,0,1>(0, ovflag, buffers, fc, host_start, inum); + } else { + eval<1,0,0>(1, ovflag, buffers, fc, 0, offload_end); + eval<1,0,0>(0, ovflag, buffers, fc, host_start, inum); + } + } + } else { + if (force->newton_pair) { + eval<0,0,1>(1, 0, buffers, fc, 0, offload_end); + eval<0,0,1>(0, 0, buffers, fc, host_start, inum); + } else { + eval<0,0,0>(1, 0, buffers, fc, 0, offload_end); + eval<0,0,0>(0, 0, buffers, fc, host_start, inum); + } + } +} + +/* ---------------------------------------------------------------------- */ + +template +void PairLJCutCoulLongIntel::eval(const int offload, const int vflag, + IntelBuffers *buffers, + const ForceConst &fc, + const int astart, const int aend) +{ + const int inum = aend - astart; + if (inum == 0) return; + int nlocal, nall, minlocal; + fix->get_buffern(offload, nlocal, nall, minlocal); + + const int ago = neighbor->ago; + IP_PRE_pack_separate_buffers(fix, buffers, ago, offload, nlocal, nall); + + ATOM_T * restrict const x = buffers->get_x(offload); + flt_t * restrict const q = buffers->get_q(offload); + + const int * restrict const numneigh = list->numneigh; + const int * restrict const cnumneigh = buffers->cnumneigh(list); + const int * restrict const firstneigh = buffers->firstneigh(list); + + const flt_t * restrict const special_coul = fc.special_coul; + const flt_t * restrict const special_lj = fc.special_lj; + const flt_t qqrd2e = force->qqrd2e; + + const C_FORCE_T * restrict const c_force = fc.c_force[0]; + const C_ENERGY_T * restrict const c_energy = fc.c_energy[0]; + const TABLE_T * restrict const table = fc.table; + const flt_t * restrict const etable = fc.etable; + const flt_t * restrict const detable = fc.detable; + const flt_t * restrict const ctable = fc.ctable; + const flt_t * restrict const dctable = fc.dctable; + const flt_t g_ewald = fc.g_ewald; + const flt_t tabinnersq = fc.tabinnersq; + + const int ntypes = atom->ntypes + 1; + const int eatom = this->eflag_atom; + + // Determine how much data to transfer + int x_size, q_size, f_stride, ev_size, separate_flag; + IP_PRE_get_transfern(ago, NEWTON_PAIR, EVFLAG, EFLAG, vflag, + buffers, offload, fix, separate_flag, + x_size, q_size, ev_size, f_stride); + + int tc; + FORCE_T * restrict f_start; + acc_t * restrict ev_global; + IP_PRE_get_buffers(offload, buffers, fix, tc, f_start, ev_global); + + const int nthreads = tc; + #ifdef _LMP_INTEL_OFFLOAD + int *overflow = fix->get_off_overflow_flag(); + double *timer_compute = fix->off_watch_pair(); + // Redeclare as local variables for offload + const int ncoultablebits = this->ncoultablebits; + const int ncoulmask = this->ncoulmask; + const int ncoulshiftbits = this->ncoulshiftbits; + #ifdef INTEL_ALLOW_TABLE + #define ITABLE_IN in(table,etable,detable:length(0) alloc_if(0) free_if(0)) \ + in(ctable,dctable:length(0) alloc_if(0) free_if(0)) \ + in(ncoultablebits,tabinnersq,ncoulmask,ncoulshiftbits) + #else + #define ITABLE_IN + #endif + + if (offload) fix->start_watch(TIME_OFFLOAD_LATENCY); + #pragma offload target(mic:_cop) if(offload) \ + in(special_lj,special_coul:length(0) alloc_if(0) free_if(0)) \ + in(c_force, c_energy:length(0) alloc_if(0) free_if(0)) \ + in(firstneigh:length(0) alloc_if(0) free_if(0)) \ + in(cnumneigh:length(0) alloc_if(0) free_if(0)) \ + in(numneigh:length(0) alloc_if(0) free_if(0)) \ + in(x:length(x_size) alloc_if(0) free_if(0)) \ + in(q:length(q_size) alloc_if(0) free_if(0)) \ + in(overflow:length(0) alloc_if(0) free_if(0)) \ + in(astart,nthreads,qqrd2e,g_ewald,inum,nall,ntypes,vflag,eatom) \ + in(f_stride,nlocal,minlocal,separate_flag,offload) \ + out(f_start:length(f_stride) alloc_if(0) free_if(0)) \ + out(ev_global:length(ev_size) alloc_if(0) free_if(0)) \ + out(timer_compute:length(1) alloc_if(0) free_if(0)) \ + ITABLE_IN signal(f_start) + #endif + { + #ifdef __MIC__ + *timer_compute = MIC_Wtime(); + #endif + + IP_PRE_repack_for_offload(NEWTON_PAIR, separate_flag, nlocal, nall, + f_stride, x, q); + + acc_t oevdwl, oecoul, ov0, ov1, ov2, ov3, ov4, ov5; + if (EVFLAG) { + oevdwl = oecoul = (acc_t)0; + if (vflag) ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0; + } + + // loop over neighbors of my atoms + #if defined(_OPENMP) + #pragma omp parallel default(none) \ + shared(f_start,f_stride,nlocal,nall,minlocal) \ + reduction(+:oevdwl,oecoul,ov0,ov1,ov2,ov3,ov4,ov5) + #endif + { + int iifrom, iito, tid; + IP_PRE_omp_range_id(iifrom, iito, tid, inum, nthreads); + iifrom += astart; + iito += astart; + + FORCE_T * restrict const f = f_start - minlocal + (tid * f_stride); + memset(f + minlocal, 0, f_stride * sizeof(FORCE_T)); + + for (int i = iifrom; i < iito; ++i) { + const int itype = x[i].w; + + const int ptr_off = itype * ntypes; + const C_FORCE_T * restrict const c_forcei = c_force + ptr_off; + const C_ENERGY_T * restrict const c_energyi = c_energy + ptr_off; + + const int * restrict const jlist = firstneigh + cnumneigh[i]; + const int jnum = numneigh[i]; + + acc_t fxtmp,fytmp,fztmp,fwtmp; + acc_t sevdwl, secoul, sv0, sv1, sv2, sv3, sv4, sv5; + + const flt_t xtmp = x[i].x; + const flt_t ytmp = x[i].y; + const flt_t ztmp = x[i].z; + const flt_t qtmp = q[i]; + fxtmp = fytmp = fztmp = (acc_t)0; + if (EVFLAG) { + if (EFLAG) fwtmp = sevdwl = secoul = (acc_t)0; + if (vflag==1) sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0; + } + + #pragma vector aligned + #pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, secoul, \ + sv0, sv1, sv2, sv3, sv4, sv5) + for (int jj = 0; jj < jnum; jj++) { + flt_t forcecoul, forcelj, evdwl, ecoul; + forcecoul = forcelj = evdwl = ecoul = (flt_t)0.0; + + const int sbindex = jlist[jj] >> SBBITS & 3; + const int j = jlist[jj] & NEIGHMASK; + + const flt_t delx = xtmp - x[j].x; + const flt_t dely = ytmp - x[j].y; + const flt_t delz = ztmp - x[j].z; + const int jtype = x[j].w; + const flt_t rsq = delx * delx + dely * dely + delz * delz; + + const flt_t r2inv = (flt_t)1.0 / rsq; + + #ifdef __MIC__ + if (rsq < c_forcei[jtype].cutsq) { + #endif + #ifdef INTEL_ALLOW_TABLE + if (!ncoultablebits || rsq <= tabinnersq) { + #endif + const flt_t A1 = 0.254829592; + const flt_t A2 = -0.284496736; + const flt_t A3 = 1.421413741; + const flt_t A4 = -1.453152027; + const flt_t A5 = 1.061405429; + const flt_t EWALD_F = 1.12837917; + const flt_t INV_EWALD_P = 1.0 / 0.3275911; + + const flt_t r = sqrt(rsq); + const flt_t grij = g_ewald * r; + const flt_t expm2 = exp(-grij * grij); + const flt_t t = INV_EWALD_P / (INV_EWALD_P + grij); + const flt_t erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; + const flt_t prefactor = qqrd2e * qtmp * q[j] / r; + forcecoul = prefactor * (erfc + EWALD_F * grij * expm2); + if (EFLAG) ecoul = prefactor * erfc; + if (sbindex) { + const flt_t adjust = ((flt_t)1.0 - special_coul[sbindex])* + prefactor; + forcecoul -= adjust; + if (EFLAG) ecoul -= adjust; + } + #ifdef INTEL_ALLOW_TABLE + } else { + float rsq_lookup = rsq; + const int itable = (__intel_castf32_u32(rsq_lookup) & + ncoulmask) >> ncoulshiftbits; + const flt_t fraction = (rsq_lookup - table[itable].r) * + table[itable].dr; + + const flt_t tablet = table[itable].f + + fraction * table[itable].df; + forcecoul = qtmp * q[j] * tablet; + if (EFLAG) ecoul = qtmp * q[j] * (etable[itable] + + fraction * detable[itable]); + if (sbindex) { + const flt_t table2 = ctable[itable] + + fraction * dctable[itable]; + const flt_t prefactor = qtmp * q[j] * table2; + const flt_t adjust = ((flt_t)1.0 - special_coul[sbindex]) * + prefactor; + forcecoul -= adjust; + if (EFLAG) ecoul -= adjust; + } + } + #endif + #ifdef __MIC__ + } + #endif + + #ifdef __MIC__ + if (rsq < c_forcei[jtype].cut_ljsq) { + #endif + flt_t r6inv = r2inv * r2inv * r2inv; + forcelj = r6inv * (c_forcei[jtype].lj1 * r6inv - + c_forcei[jtype].lj2); + if (EFLAG) evdwl = r6inv*(c_energyi[jtype].lj3 * r6inv - + c_energyi[jtype].lj4) - + c_energyi[jtype].offset; + + if (sbindex) { + const flt_t factor_lj = special_lj[sbindex]; + forcelj *= factor_lj; + if (EFLAG) evdwl *= factor_lj; + } + #ifdef __MIC__ + } + #else + if (rsq > c_forcei[jtype].cutsq) + { forcecoul = (flt_t)0.0; ecoul = (flt_t)0.0; } + if (rsq > c_forcei[jtype].cut_ljsq) + { forcelj = (flt_t)0.0; evdwl = (flt_t)0.0; } + #endif + + #ifdef __MIC__ + if (rsq < c_forcei[jtype].cutsq) { + #endif + const flt_t fpair = (forcecoul + forcelj) * r2inv; + fxtmp += delx * fpair; + fytmp += dely * fpair; + fztmp += delz * fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j].x -= delx * fpair; + f[j].y -= dely * fpair; + f[j].z -= delz * fpair; + } + + if (EVFLAG) { + flt_t ev_pre = (flt_t)0; + if (NEWTON_PAIR || i < nlocal) + ev_pre += (flt_t)0.5; + if (NEWTON_PAIR || j < nlocal) + ev_pre += (flt_t)0.5; + + if (EFLAG) { + sevdwl += ev_pre * evdwl; + secoul += ev_pre * ecoul; + if (eatom) { + if (NEWTON_PAIR || i < nlocal) + fwtmp += (flt_t)0.5 * evdwl + (flt_t)0.5 * ecoul; + if (NEWTON_PAIR || j < nlocal) + f[j].w += (flt_t)0.5 * evdwl + (flt_t)0.5 * ecoul; + } + } + IP_PRE_ev_tally_nbor(vflag, ev_pre, fpair, delx, dely, delz); + } + #ifdef __MIC__ + } + #endif + } // for jj + + f[i].x += fxtmp; + f[i].y += fytmp; + f[i].z += fztmp; + IP_PRE_ev_tally_atomq(EVFLAG, EFLAG, vflag, f, fwtmp); + } // for ii + #if defined(_OPENMP) + #pragma omp barrier + #endif + IP_PRE_fdotr_acc_force(NEWTON_PAIR, EVFLAG, EFLAG, vflag, eatom, nall, + nlocal, minlocal, nthreads, f_start, f_stride, + x); + } // end of omp parallel region + if (EVFLAG) { + if (EFLAG) { + ev_global[0] = oevdwl; + ev_global[1] = oecoul; + } + if (vflag) { + ev_global[2] = ov0; + ev_global[3] = ov1; + ev_global[4] = ov2; + ev_global[5] = ov3; + ev_global[6] = ov4; + ev_global[7] = ov5; + } + } + #ifdef __MIC__ + *timer_compute = MIC_Wtime() - *timer_compute; + #endif + } // end of offload region + + if (offload) + fix->stop_watch(TIME_OFFLOAD_LATENCY); + else + fix->stop_watch(TIME_HOST_PAIR); + + if (EVFLAG) + fix->add_result_array(f_start, ev_global, offload, eatom); + else + fix->add_result_array(f_start, 0, offload); +} + +/* ---------------------------------------------------------------------- */ + +void PairLJCutCoulLongIntel::init_style() +{ + PairLJCutCoulLong::init_style(); + neighbor->requests[neighbor->nrequest-1]->intel = 1; + + int ifix = modify->find_fix("package_intel"); + if (ifix < 0) + error->all(FLERR, + "The 'package intel' command is required for /intel styles"); + fix = static_cast(modify->fix[ifix]); + + #ifdef _LMP_INTEL_OFFLOAD + fix->set_offload_affinity(); + _cop = fix->coprocessor_number(); + #endif + if (fix->precision() == FixIntel::PREC_MODE_MIXED) { + fix->get_mixed_buffers()->free_all_nbor_buffers(); + pack_force_const(force_const_single, fix->get_mixed_buffers()); + } else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) { + fix->get_double_buffers()->free_all_nbor_buffers(); + pack_force_const(force_const_double, fix->get_double_buffers()); + } else { + fix->get_single_buffers()->free_all_nbor_buffers(); + pack_force_const(force_const_single, fix->get_single_buffers()); + } +} + +template +void PairLJCutCoulLongIntel::pack_force_const(ForceConst &fc, + IntelBuffers *buffers) +{ + int tp1 = atom->ntypes + 1; + int ntable = 1; + if (ncoultablebits) + for (int i = 0; i < ncoultablebits; i++) ntable *= 2; + + fc.set_ntypes(tp1, ntable, memory, _cop); + buffers->set_ntypes(tp1); + flt_t **cutneighsq = buffers->get_cutneighsq(); + + // Repeat cutsq calculation because done after call to init_style + double cut, cutneigh; + for (int i = 1; i <= atom->ntypes; i++) { + for (int j = i; j <= atom->ntypes; j++) { + if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) { + cut = init_one(i, j); + cutneigh = cut + neighbor->skin; + cutsq[i][j] = cutsq[j][i] = cut*cut; + cutneighsq[i][j] = cutneighsq[j][i] = cutneigh * cutneigh; + } + } + } + + fc.g_ewald = force->kspace->g_ewald; + fc.tabinnersq = tabinnersq; + + for (int i = 0; i < 4; i++) { + fc.special_lj[i] = force->special_lj[i]; + fc.special_coul[i] = force->special_coul[i]; + fc.special_coul[0] = 1.0; + fc.special_lj[0] = 1.0; + } + + for (int i = 0; i < tp1; i++) { + for (int j = 0; j < tp1; j++) { + fc.c_force[i][j].cutsq = cutsq[i][j]; + fc.c_force[i][j].cut_ljsq = cut_ljsq[i][j]; + fc.c_force[i][j].lj1 = lj1[i][j]; + fc.c_force[i][j].lj2 = lj2[i][j]; + fc.c_energy[i][j].lj3 = lj3[i][j]; + fc.c_energy[i][j].lj4 = lj4[i][j]; + fc.c_energy[i][j].offset = offset[i][j]; + } + } + + if (ncoultablebits) { + for (int i = 0; i < ntable; i++) { + fc.table[i].r = rtable[i]; + fc.table[i].dr = drtable[i]; + fc.table[i].f = ftable[i]; + fc.table[i].df = dftable[i]; + fc.etable[i] = etable[i]; + fc.detable[i] = detable[i]; + fc.ctable[i] = ctable[i]; + fc.dctable[i] = dctable[i]; + } + } + + #ifdef _LMP_INTEL_OFFLOAD + if (_cop < 0) return; + flt_t * special_lj = fc.special_lj; + flt_t * special_coul = fc.special_coul; + C_FORCE_T * c_force = fc.c_force[0]; + C_ENERGY_T * c_energy = fc.c_energy[0]; + TABLE_T * table = fc.table; + flt_t * etable = fc.etable; + flt_t * detable = fc.detable; + flt_t * ctable = fc.ctable; + flt_t * dctable = fc.dctable; + flt_t * ocutneighsq = cutneighsq[0]; + int tp1sq = tp1 * tp1; + #pragma offload_transfer target(mic:_cop) \ + in(special_lj, special_coul: length(4) alloc_if(0) free_if(0)) \ + in(c_force, c_energy: length(tp1sq) alloc_if(0) free_if(0)) \ + in(table: length(ntable) alloc_if(0) free_if(0)) \ + in(etable,detable,ctable,dctable: length(ntable) alloc_if(0) free_if(0)) \ + in(ocutneighsq: length(tp1sq) alloc_if(0) free_if(0)) + #endif +} + +/* ---------------------------------------------------------------------- */ + +template +void PairLJCutCoulLongIntel::ForceConst::set_ntypes(const int ntypes, + const int ntable, + Memory *memory, + const int cop) { + if ( (ntypes != _ntypes || ntable != _ntable) ) { + if (_ntypes > 0) { + #ifdef _LMP_INTEL_OFFLOAD + flt_t * ospecial_lj = special_lj; + flt_t * ospecial_coul = special_coul; + c_force_t * oc_force = c_force[0]; + c_energy_t * oc_energy = c_energy[0]; + table_t * otable = table; + flt_t * oetable = etable; + flt_t * odetable = detable; + flt_t * octable = ctable; + flt_t * odctable = dctable; + if (ospecial_lj != NULL && oc_force != NULL && + oc_energy != NULL && otable != NULL && oetable != NULL && + odetable != NULL && octable != NULL && odctable != NULL && + ospecial_coul != NULL && _cop >= 0) { + #pragma offload_transfer target(mic:cop) \ + nocopy(ospecial_lj, ospecial_coul: alloc_if(0) free_if(1)) \ + nocopy(oc_force, oc_energy: alloc_if(0) free_if(1)) \ + nocopy(otable: alloc_if(0) free_if(1)) \ + nocopy(oetable, odetable, octable, odctable: alloc_if(0) free_if(1)) + } + #endif + + _memory->destroy(c_force); + _memory->destroy(c_energy); + _memory->destroy(table); + _memory->destroy(etable); + _memory->destroy(detable); + _memory->destroy(ctable); + _memory->destroy(dctable); + } + if (ntypes > 0) { + _cop = cop; + memory->create(c_force,ntypes,ntypes,"fc.c_force"); + memory->create(c_energy,ntypes,ntypes,"fc.c_energy"); + memory->create(table,ntable,"pair:fc.table"); + memory->create(etable,ntable,"pair:fc.etable"); + memory->create(detable,ntable,"pair:fc.detable"); + memory->create(ctable,ntable,"pair:fc.ctable"); + memory->create(dctable,ntable,"pair:fc.dctable"); + + #ifdef _LMP_INTEL_OFFLOAD + flt_t * ospecial_lj = special_lj; + flt_t * ospecial_coul = special_coul; + c_force_t * oc_force = c_force[0]; + c_energy_t * oc_energy = c_energy[0]; + table_t * otable = table; + flt_t * oetable = etable; + flt_t * odetable = detable; + flt_t * octable = ctable; + flt_t * odctable = dctable; + int tp1sq = ntypes*ntypes; + if (ospecial_lj != NULL && oc_force != NULL && + oc_energy != NULL && otable !=NULL && oetable != NULL && + odetable != NULL && octable != NULL && odctable != NULL && + ospecial_coul != NULL && cop >= 0) { + #pragma offload_transfer target(mic:cop) \ + nocopy(ospecial_lj: length(4) alloc_if(1) free_if(0)) \ + nocopy(ospecial_coul: length(4) alloc_if(1) free_if(0)) \ + nocopy(oc_force: length(tp1sq) alloc_if(1) free_if(0)) \ + nocopy(oc_energy: length(tp1sq) alloc_if(1) free_if(0)) \ + nocopy(otable: length(ntable) alloc_if(1) free_if(0)) \ + nocopy(oetable,odetable: length(ntable) alloc_if(1) free_if(0)) \ + nocopy(octable,odctable: length(ntable) alloc_if(1) free_if(0)) + } + #endif + } + } + _ntypes=ntypes; + _ntable=ntable; + _memory=memory; +} diff --git a/src/USER-INTEL/pair_lj_cut_coul_long_intel.h b/src/USER-INTEL/pair_lj_cut_coul_long_intel.h new file mode 100644 index 000000000..d7b4282a9 --- /dev/null +++ b/src/USER-INTEL/pair_lj_cut_coul_long_intel.h @@ -0,0 +1,100 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: W. Michael Brown (Intel) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj/cut/coul/long/intel,PairLJCutCoulLongIntel) + +#else + +#ifndef LMP_PAIR_LJ_CUT_COUL_LONG_INTEL_H +#define LMP_PAIR_LJ_CUT_COUL_LONG_INTEL_H + +#include "pair_lj_cut_coul_long.h" +#include "fix_intel.h" + +namespace LAMMPS_NS { + +class PairLJCutCoulLongIntel : public PairLJCutCoulLong { + + public: + PairLJCutCoulLongIntel(class LAMMPS *); + virtual ~PairLJCutCoulLongIntel(); + + virtual void compute(int, int); + void init_style(); + + typedef struct { float x,y,z; int w; } sng4_t; + + private: + FixIntel *fix; + int _cop; + + template class ForceConst; + template + void compute(int eflag, int vflag, IntelBuffers *buffers, + const ForceConst &fc); + template + void eval(const int offload, const int vflag, + IntelBuffers * buffers, + const ForceConst &fc, const int astart, const int aend); + + template + void pack_force_const(ForceConst &fc, + IntelBuffers *buffers); + + // ---------------------------------------------------------------------- + template + class ForceConst { + public: + typedef struct { flt_t cutsq, cut_ljsq, lj1, lj2; } c_force_t; + typedef struct { flt_t lj3, lj4, offset, pad; } c_energy_t; + typedef struct { flt_t r, dr, f, df; } table_t; + __declspec(align(64)) flt_t special_coul[4]; + __declspec(align(64)) flt_t special_lj[4]; + flt_t g_ewald, tabinnersq; + c_force_t **c_force; + c_energy_t **c_energy; + table_t *table; + flt_t *etable, *detable, *ctable, *dctable; + + ForceConst() : _ntypes(0), _ntable(0) {} + ~ForceConst() { set_ntypes(0,0,NULL,_cop); } + + void set_ntypes(const int ntypes, const int ntable, Memory *memory, + const int cop); + + private: + int _ntypes, _ntable, _cop; + Memory *_memory; + }; + ForceConst force_const_single; + ForceConst force_const_double; +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: The 'package intel' command is required for /intel styles + +Self-explanatory. + +*/ diff --git a/src/USER-INTEL/pair_lj_cut_intel.cpp b/src/USER-INTEL/pair_lj_cut_intel.cpp new file mode 100644 index 000000000..bca3a7349 --- /dev/null +++ b/src/USER-INTEL/pair_lj_cut_intel.cpp @@ -0,0 +1,412 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: W. Michael Brown (Intel) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_lj_cut_intel.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "memory.h" +#include "modify.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "neigh_request.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +#define FC_PACKED1_T typename ForceConst::fc_packed1 +#define FC_PACKED2_T typename ForceConst::fc_packed2 + +/* ---------------------------------------------------------------------- */ + +PairLJCutIntel::PairLJCutIntel(LAMMPS *lmp) : + PairLJCut(lmp) +{ + suffix_flag |= Suffix::INTEL; + respa_enable = 0; + cut_respa = NULL; +} + +/* ---------------------------------------------------------------------- */ + +void PairLJCutIntel::compute(int eflag, int vflag) +{ + if (fix->precision() == FixIntel::PREC_MODE_MIXED) + compute(eflag, vflag, fix->get_mixed_buffers(), + force_const_single); + else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) + compute(eflag, vflag, fix->get_double_buffers(), + force_const_double); + else + compute(eflag, vflag, fix->get_single_buffers(), + force_const_single); + + fix->balance_stamp(); + vflag_fdotr = 0; +} + +template +void PairLJCutIntel::compute(int eflag, int vflag, + IntelBuffers *buffers, + const ForceConst &fc) +{ + if (eflag || vflag) { + ev_setup(eflag, vflag); + } else evflag = vflag_fdotr = 0; + + const int inum = list->inum; + const int nthreads = comm->nthreads; + const int host_start = fix->host_start_pair(); + const int offload_end = fix->offload_end_pair(); + const int ago = neighbor->ago; + + if (ago != 0 && fix->separate_buffers() == 0) { + fix->start_watch(TIME_PACK); + if (ago != 0) { + #if defined(_OPENMP) + #pragma omp parallel default(none) shared(eflag,vflag,buffers,fc) + #endif + { + int ifrom, ito, tid; + IP_PRE_omp_range_id_align(ifrom, ito, tid, atom->nlocal + atom->nghost, + nthreads, sizeof(ATOM_T)); + buffers->thr_pack(ifrom,ito,ago); + } + } + fix->stop_watch(TIME_PACK); + } + + if (evflag || vflag_fdotr) { + int ovflag = 0; + if (vflag_fdotr) ovflag = 2; + else if (vflag) ovflag = 1; + if (eflag) { + if (force->newton_pair) { + eval<1,1,1>(1, ovflag, buffers, fc, 0, offload_end); + eval<1,1,1>(0, ovflag, buffers, fc, host_start, inum); + } else { + eval<1,1,0>(1, ovflag, buffers, fc, 0, offload_end); + eval<1,1,0>(0, ovflag, buffers, fc, host_start, inum); + } + } else { + if (force->newton_pair) { + eval<1,0,1>(1, ovflag, buffers, fc, 0, offload_end); + eval<1,0,1>(0, ovflag, buffers, fc, host_start, inum); + } else { + eval<1,0,0>(1, ovflag, buffers, fc, 0, offload_end); + eval<1,0,0>(0, ovflag, buffers, fc, host_start, inum); + } + } + } else { + if (force->newton_pair) { + eval<0,0,1>(1, 0, buffers, fc, 0, offload_end); + eval<0,0,1>(0, 0, buffers, fc, host_start, inum); + } else { + eval<0,0,0>(1, 0, buffers, fc, 0, offload_end); + eval<0,0,0>(0, 0, buffers, fc, host_start, inum); + } + } +} + +template +void PairLJCutIntel::eval(const int offload, const int vflag, + IntelBuffers *buffers, + const ForceConst &fc, + const int astart, const int aend) +{ + const int inum = aend - astart; + if (inum == 0) return; + int nlocal, nall, minlocal; + fix->get_buffern(offload, nlocal, nall, minlocal); + + const int ago = neighbor->ago; + IP_PRE_pack_separate_buffers(fix, buffers, ago, offload, nlocal, nall); + + ATOM_T * restrict const x = buffers->get_x(offload); + + const int * restrict const numneigh = list->numneigh; + const int * restrict const cnumneigh = buffers->cnumneigh(list); + const int * restrict const firstneigh = buffers->firstneigh(list); + const flt_t * restrict const special_lj = fc.special_lj; + const FC_PACKED1_T * restrict const ljc12o = fc.ljc12o[0]; + const FC_PACKED2_T * restrict const lj34 = fc.lj34[0]; + + const int ntypes = atom->ntypes + 1; + const int eatom = this->eflag_atom; + + // Determine how much data to transfer + int x_size, q_size, f_stride, ev_size, separate_flag; + IP_PRE_get_transfern(ago, NEWTON_PAIR, EVFLAG, EFLAG, vflag, + buffers, offload, fix, separate_flag, + x_size, q_size, ev_size, f_stride); + + int tc; + FORCE_T * restrict f_start; + acc_t * restrict ev_global; + IP_PRE_get_buffers(offload, buffers, fix, tc, f_start, ev_global); + const int nthreads = tc; + int *overflow = fix->get_off_overflow_flag(); + { + #ifdef __MIC__ + *timer_compute = MIC_Wtime(); + #endif + + IP_PRE_repack_for_offload(NEWTON_PAIR, separate_flag, nlocal, nall, + f_stride, x, 0); + + acc_t oevdwl, ov0, ov1, ov2, ov3, ov4, ov5; + if (EVFLAG) { + oevdwl = (acc_t)0; + if (vflag) ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0; + } + + // loop over neighbors of my atoms + #if defined(_OPENMP) + #pragma omp parallel default(none) \ + shared(f_start,f_stride,nlocal,nall,minlocal) \ + reduction(+:oevdwl,ov0,ov1,ov2,ov3,ov4,ov5) + #endif + { + int iifrom, iito, tid; + IP_PRE_omp_range_id(iifrom, iito, tid, inum, nthreads); + iifrom += astart; + iito += astart; + + FORCE_T * restrict const f = f_start - minlocal + (tid * f_stride); + memset(f + minlocal, 0, f_stride * sizeof(FORCE_T)); + + for (int i = iifrom; i < iito; ++i) { + const int itype = x[i].w; + + const int ptr_off = itype * ntypes; + const FC_PACKED1_T * restrict const ljc12oi = ljc12o + ptr_off; + const FC_PACKED2_T * restrict const lj34i = lj34 + ptr_off; + + const int * restrict const jlist = firstneigh + cnumneigh[i]; + const int jnum = numneigh[i]; + + acc_t fxtmp, fytmp, fztmp, fwtmp; + acc_t sevdwl, sv0, sv1, sv2, sv3, sv4, sv5; + + const flt_t xtmp = x[i].x; + const flt_t ytmp = x[i].y; + const flt_t ztmp = x[i].z; + fxtmp = fytmp = fztmp = (acc_t)0; + if (EVFLAG) { + if (EFLAG) fwtmp = sevdwl = (acc_t)0; + if (vflag==1) sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0; + } + + #pragma vector aligned + #pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \ + sv0, sv1, sv2, sv3, sv4, sv5) + for (int jj = 0; jj < jnum; jj++) { + flt_t forcelj, evdwl; + forcelj = evdwl = (flt_t)0.0; + + const int sbindex = jlist[jj] >> SBBITS & 3; + const int j = jlist[jj] & NEIGHMASK; + const flt_t delx = xtmp - x[j].x; + const flt_t dely = ytmp - x[j].y; + const flt_t delz = ztmp - x[j].z; + const int jtype = x[j].w; + const flt_t rsq = delx * delx + dely * dely + delz * delz; + + #ifdef __MIC__ + if (rsq < ljc12oi[jtype].cutsq) { + #endif + flt_t factor_lj = special_lj[sbindex]; + flt_t r2inv = 1.0 / rsq; + flt_t r6inv = r2inv * r2inv * r2inv; + #ifndef __MIC__ + if (rsq > ljc12oi[jtype].cutsq) r6inv = (flt_t)0.0; + #endif + forcelj = r6inv * (ljc12oi[jtype].lj1 * r6inv - ljc12oi[jtype].lj2); + flt_t fpair = factor_lj * forcelj * r2inv; + + fxtmp += delx * fpair; + fytmp += dely * fpair; + fztmp += delz * fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j].x -= delx * fpair; + f[j].y -= dely * fpair; + f[j].z -= delz * fpair; + } + + if (EVFLAG) { + flt_t ev_pre = (flt_t)0; + if (NEWTON_PAIR || istop_watch(TIME_OFFLOAD_LATENCY); + else + fix->stop_watch(TIME_HOST_PAIR); + + if (EVFLAG) + fix->add_result_array(f_start, ev_global, offload, eatom); + else + fix->add_result_array(f_start, 0, offload); +} + +/* ---------------------------------------------------------------------- */ + +void PairLJCutIntel::init_style() +{ + PairLJCut::init_style(); + neighbor->requests[neighbor->nrequest-1]->intel = 1; + + int ifix = modify->find_fix("package_intel"); + if (ifix < 0) + error->all(FLERR, + "The 'package intel' command is required for /intel styles"); + fix = static_cast(modify->fix[ifix]); + + #ifdef _LMP_INTEL_OFFLOAD + if (fix->offload_balance() != 0.0) + error->all(FLERR, + "Offload for lj/cut/intel is not yet available. Set balance to 0."); + #endif + if (fix->precision() == FixIntel::PREC_MODE_MIXED) { + fix->get_mixed_buffers()->free_all_nbor_buffers(); + pack_force_const(force_const_single, fix->get_mixed_buffers()); + } else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) { + fix->get_double_buffers()->free_all_nbor_buffers(); + pack_force_const(force_const_double, fix->get_double_buffers()); + } else { + fix->get_single_buffers()->free_all_nbor_buffers(); + pack_force_const(force_const_single, fix->get_single_buffers()); + } +} + +/* ---------------------------------------------------------------------- */ + +template +void PairLJCutIntel::pack_force_const(ForceConst &fc, + IntelBuffers *buffers) +{ + int tp1 = atom->ntypes + 1; + fc.set_ntypes(tp1,memory,_cop); + buffers->set_ntypes(tp1); + flt_t **cutneighsq = buffers->get_cutneighsq(); + + // Repeat cutsq calculation because done after call to init_style + double cut, cutneigh; + for (int i = 1; i <= atom->ntypes; i++) { + for (int j = i; j <= atom->ntypes; j++) { + if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) { + cut = init_one(i,j); + cutneigh = cut + neighbor->skin; + cutsq[i][j] = cutsq[j][i] = cut*cut; + cutneighsq[i][j] = cutneighsq[j][i] = cutneigh * cutneigh; + } + } + } + + for (int i = 0; i < 4; i++) { + fc.special_lj[i] = force->special_lj[i]; + fc.special_lj[0] = 1.0; + } + + for (int i = 0; i < tp1; i++) { + for (int j = 0; j < tp1; j++) { + fc.ljc12o[i][j].lj1 = lj1[i][j]; + fc.ljc12o[i][j].lj2 = lj2[i][j]; + fc.lj34[i][j].lj3 = lj3[i][j]; + fc.lj34[i][j].lj4 = lj4[i][j]; + fc.ljc12o[i][j].cutsq = cutsq[i][j]; + fc.ljc12o[i][j].offset = offset[i][j]; + } + } +} + +/* ---------------------------------------------------------------------- */ + +template +void PairLJCutIntel::ForceConst::set_ntypes(const int ntypes, + Memory *memory, + const int cop) { + if (ntypes != _ntypes) { + if (_ntypes > 0) { + fc_packed1 *oljc12o = ljc12o[0]; + fc_packed2 *olj34 = lj34[0]; + + _memory->destroy(oljc12o); + _memory->destroy(olj34); + } + if (ntypes > 0) { + _cop = cop; + memory->create(ljc12o,ntypes,ntypes,"fc.c12o"); + memory->create(lj34,ntypes,ntypes,"fc.lj34"); + } + } + _ntypes = ntypes; + _memory = memory; +} diff --git a/src/USER-INTEL/pair_lj_cut_intel.h b/src/USER-INTEL/pair_lj_cut_intel.h new file mode 100644 index 000000000..a40e39af5 --- /dev/null +++ b/src/USER-INTEL/pair_lj_cut_intel.h @@ -0,0 +1,93 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: W. Michael Brown (Intel) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj/cut/intel,PairLJCutIntel) + +#else + +#ifndef LMP_PAIR_LJ_CUT_INTEL_H +#define LMP_PAIR_LJ_CUT_INTEL_H + +#include "pair_lj_cut.h" +#include "fix_intel.h" + +namespace LAMMPS_NS { + +class PairLJCutIntel : public PairLJCut { + + public: + PairLJCutIntel(class LAMMPS *); + + virtual void compute(int, int); + void init_style(); + + private: + FixIntel *fix; + int _cop; + + template class ForceConst; + template + void compute(int eflag, int vflag, IntelBuffers *buffers, + const ForceConst &fc); + template + void eval(const int offload, const int vflag, + IntelBuffers * buffers, + const ForceConst &fc, const int astart, const int aend); + + template + void pack_force_const(ForceConst &fc, + IntelBuffers *buffers); + + // ---------------------------------------------------------------------- + + template + class ForceConst { + public: + typedef struct { flt_t cutsq, lj1, lj2, offset; } fc_packed1; + typedef struct { flt_t lj3, lj4; } fc_packed2; + + __declspec(align(64)) flt_t special_lj[4]; + fc_packed1 **ljc12o; + fc_packed2 **lj34; + + ForceConst() : _ntypes(0) {} + ~ForceConst() { set_ntypes(0, NULL, _cop); } + + void set_ntypes(const int ntypes, Memory *memory, const int cop); + + private: + int _ntypes, _cop; + Memory *_memory; + }; + ForceConst force_const_single; + ForceConst force_const_double; +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: The 'package intel' command is required for /intel styles + +Self-explanatory. + +*/ diff --git a/src/USER-INTEL/verlet_intel.cpp b/src/USER-INTEL/verlet_intel.cpp new file mode 100644 index 000000000..64177e0f0 --- /dev/null +++ b/src/USER-INTEL/verlet_intel.cpp @@ -0,0 +1,486 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "string.h" +#include "verlet_intel.h" +#include "neighbor.h" +#include "domain.h" +#include "comm.h" +#include "atom.h" +#include "force.h" +#include "pair.h" +#include "bond.h" +#include "angle.h" +#include "dihedral.h" +#include "improper.h" +#include "kspace.h" +#include "output.h" +#include "update.h" +#include "modify.h" +#include "compute.h" +#include "fix.h" +#include "timer.h" +#include "memory.h" +#include "error.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +VerletIntel::VerletIntel(LAMMPS *lmp, int narg, char **arg) : + Integrate(lmp, narg, arg) {} + +/* ---------------------------------------------------------------------- + initialization before run +------------------------------------------------------------------------- */ + +void VerletIntel::init() +{ + Integrate::init(); + + // warn if no fixes + + if (modify->nfix == 0 && comm->me == 0) + error->warning(FLERR,"No fixes defined, atoms won't move"); + + // virial_style: + // 1 if computed explicitly by pair->compute via sum over pair interactions + // 2 if computed implicitly by pair->virial_fdotr_compute via sum over ghosts + + if (force->newton_pair) virial_style = 2; + else virial_style = 1; + + // setup lists of computes for global and per-atom PE and pressure + + ev_setup(); + + // detect if fix omp is present for clearing force arrays + + int ifix = modify->find_fix("package_omp"); + if (ifix >= 0) external_force_clear = 1; + + if (nvlist_atom) + error->all(FLERR, + "Cannot currently get per-atom virials with Intel package."); + #ifdef _LMP_INTEL_OFFLOAD + ifix = modify->find_fix("package_intel"); + if (ifix >= 0) fix_intel = static_cast(modify->fix[ifix]); + else fix_intel = 0; + #endif + + // set flags for what arrays to clear in force_clear() + // need to clear additionals arrays if they exist + + torqueflag = 0; + if (atom->torque_flag) torqueflag = 1; + erforceflag = 0; + if (atom->erforce_flag) erforceflag = 1; + e_flag = 0; + if (atom->e_flag) e_flag = 1; + rho_flag = 0; + if (atom->rho_flag) rho_flag = 1; + + // orthogonal vs triclinic simulation box + + triclinic = domain->triclinic; +} + +/* ---------------------------------------------------------------------- + setup before run +------------------------------------------------------------------------- */ + +void VerletIntel::setup() +{ + if (comm->me == 0 && screen) fprintf(screen,"Setting up run ...\n"); + + update->setupflag = 1; + + // setup domain, communication and neighboring + // acquire ghosts + // build neighbor lists + + atom->setup(); + modify->setup_pre_exchange(); + if (triclinic) domain->x2lamda(atom->nlocal); + domain->pbc(); + domain->reset_box(); + comm->setup(); + if (neighbor->style) neighbor->setup_bins(); + comm->exchange(); + if (atom->sortfreq > 0) atom->sort(); + comm->borders(); + if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost); + domain->image_check(); + domain->box_too_small_check(); + modify->setup_pre_neighbor(); + neighbor->build(); + neighbor->ncalls = 0; + + // compute all forces + + ev_set(update->ntimestep); + force_clear(); + modify->setup_pre_force(vflag); + + if (pair_compute_flag) force->pair->compute(eflag,vflag); + else if (force->pair) force->pair->compute_dummy(eflag,vflag); + + if (atom->molecular) { + if (force->bond) force->bond->compute(eflag,vflag); + if (force->angle) force->angle->compute(eflag,vflag); + if (force->dihedral) force->dihedral->compute(eflag,vflag); + if (force->improper) force->improper->compute(eflag,vflag); + } + + if (force->kspace) { + force->kspace->setup(); + if (kspace_compute_flag) force->kspace->compute(eflag,vflag); + else force->kspace->compute_dummy(eflag,vflag); + } + + #ifdef _LMP_INTEL_OFFLOAD + sync_mode = 0; + if (fix_intel) { + if (fix_intel->offload_balance() != 0.0) { + if (fix_intel->offload_noghost()) + sync_mode = 2; + else + sync_mode = 1; + } + } + + if (sync_mode == 1) fix_intel->sync_coprocessor(); + #endif + + if (force->newton) comm->reverse_comm(); + + #ifdef _LMP_INTEL_OFFLOAD + if (sync_mode == 2) fix_intel->sync_coprocessor(); + #endif + + modify->setup(vflag); + output->setup(); + update->setupflag = 0; +} + +/* ---------------------------------------------------------------------- + setup without output + flag = 0 = just force calculation + flag = 1 = reneighbor and force calculation +------------------------------------------------------------------------- */ + +void VerletIntel::setup_minimal(int flag) +{ + update->setupflag = 1; + + // setup domain, communication and neighboring + // acquire ghosts + // build neighbor lists + + if (flag) { + modify->setup_pre_exchange(); + if (triclinic) domain->x2lamda(atom->nlocal); + domain->pbc(); + domain->reset_box(); + comm->setup(); + if (neighbor->style) neighbor->setup_bins(); + comm->exchange(); + comm->borders(); + if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost); + domain->image_check(); + domain->box_too_small_check(); + modify->setup_pre_neighbor(); + neighbor->build(); + neighbor->ncalls = 0; + } + + // compute all forces + + ev_set(update->ntimestep); + force_clear(); + modify->setup_pre_force(vflag); + + if (pair_compute_flag) force->pair->compute(eflag,vflag); + else if (force->pair) force->pair->compute_dummy(eflag,vflag); + + if (atom->molecular) { + if (force->bond) force->bond->compute(eflag,vflag); + if (force->angle) force->angle->compute(eflag,vflag); + if (force->dihedral) force->dihedral->compute(eflag,vflag); + if (force->improper) force->improper->compute(eflag,vflag); + } + + if (force->kspace) { + force->kspace->setup(); + if (kspace_compute_flag) force->kspace->compute(eflag,vflag); + else force->kspace->compute_dummy(eflag,vflag); + } + + #ifdef _LMP_INTEL_OFFLOAD + sync_mode = 0; + if (fix_intel) { + if (fix_intel->offload_balance() != 0.0) { + if (fix_intel->offload_noghost()) + sync_mode = 2; + else + sync_mode = 1; + } + } + + if (sync_mode == 1) fix_intel->sync_coprocessor(); + #endif + + if (force->newton) comm->reverse_comm(); + + #ifdef _LMP_INTEL_OFFLOAD + if (sync_mode == 2) fix_intel->sync_coprocessor(); + #endif + + modify->setup(vflag); + update->setupflag = 0; +} + +/* ---------------------------------------------------------------------- + run for N steps +------------------------------------------------------------------------- */ + +void VerletIntel::run(int n) +{ + bigint ntimestep; + int nflag,sortflag; + + int n_post_integrate = modify->n_post_integrate; + int n_pre_exchange = modify->n_pre_exchange; + int n_pre_neighbor = modify->n_pre_neighbor; + int n_pre_force = modify->n_pre_force; + int n_post_force = modify->n_post_force; + int n_end_of_step = modify->n_end_of_step; + + if (atom->sortfreq > 0) sortflag = 1; + else sortflag = 0; + + for (int i = 0; i < n; i++) { + + ntimestep = ++update->ntimestep; + ev_set(ntimestep); + + // initial time integration + + modify->initial_integrate(vflag); + if (n_post_integrate) modify->post_integrate(); + + // regular communication vs neighbor list rebuild + + nflag = neighbor->decide(); + + if (nflag == 0) { + timer->stamp(); + comm->forward_comm(); + timer->stamp(TIME_COMM); + } else { + if (n_pre_exchange) modify->pre_exchange(); + if (triclinic) domain->x2lamda(atom->nlocal); + domain->pbc(); + if (domain->box_change) { + domain->reset_box(); + comm->setup(); + if (neighbor->style) neighbor->setup_bins(); + } + timer->stamp(); + comm->exchange(); + if (sortflag && ntimestep >= atom->nextsort) atom->sort(); + comm->borders(); + if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost); + timer->stamp(TIME_COMM); + if (n_pre_neighbor) modify->pre_neighbor(); + neighbor->build(); + timer->stamp(TIME_NEIGHBOR); + } + + // force computations + // important for pair to come before bonded contributions + // since some bonded potentials tally pairwise energy/virial + // and Pair:ev_tally() needs to be called before any tallying + + force_clear(); + if (n_pre_force) modify->pre_force(vflag); + + timer->stamp(); + + if (pair_compute_flag) { + force->pair->compute(eflag,vflag); + timer->stamp(TIME_PAIR); + } + + if (atom->molecular) { + if (force->bond) force->bond->compute(eflag,vflag); + if (force->angle) force->angle->compute(eflag,vflag); + if (force->dihedral) force->dihedral->compute(eflag,vflag); + if (force->improper) force->improper->compute(eflag,vflag); + timer->stamp(TIME_BOND); + } + + if (kspace_compute_flag) { + force->kspace->compute(eflag,vflag); + timer->stamp(TIME_KSPACE); + } + + #ifdef _LMP_INTEL_OFFLOAD + if (sync_mode == 1) { + fix_intel->sync_coprocessor(); + timer->stamp(TIME_PAIR); + } + #endif + + // reverse communication of forces + + if (force->newton) { + comm->reverse_comm(); + timer->stamp(TIME_COMM); + } + + #ifdef _LMP_INTEL_OFFLOAD + if (sync_mode == 2) { + fix_intel->sync_coprocessor(); + timer->stamp(TIME_PAIR); + } + #endif + + // force modifications, final time integration, diagnostics + + if (n_post_force) modify->post_force(vflag); + modify->final_integrate(); + if (n_end_of_step) modify->end_of_step(); + + // all output + + if (ntimestep == output->next) { + timer->stamp(); + output->write(ntimestep); + timer->stamp(TIME_OUTPUT); + } + } +} + +/* ---------------------------------------------------------------------- */ + +void VerletIntel::cleanup() +{ + modify->post_run(); + domain->box_too_small_check(); + update->update_time(); +} + +/* ---------------------------------------------------------------------- + clear force on own & ghost atoms + clear other arrays as needed +------------------------------------------------------------------------- */ + +void VerletIntel::force_clear() +{ + int i; + + if (external_force_clear) return; + + // clear force on all particles + // if either newton flag is set, also include ghosts + // when using threads always clear all forces. + + if (neighbor->includegroup == 0) { + int nall; + if (force->newton) nall = atom->nlocal + atom->nghost; + else nall = atom->nlocal; + + size_t nbytes = sizeof(double) * nall; + + if (nbytes) { + memset(&(atom->f[0][0]),0,3*nbytes); + if (torqueflag) memset(&(atom->torque[0][0]),0,3*nbytes); + if (erforceflag) memset(&(atom->erforce[0]), 0, nbytes); + if (e_flag) memset(&(atom->de[0]), 0, nbytes); + if (rho_flag) memset(&(atom->drho[0]), 0, nbytes); + } + + // neighbor includegroup flag is set + // clear force only on initial nfirst particles + // if either newton flag is set, also include ghosts + + } else { + int nall = atom->nfirst; + + double **f = atom->f; + for (i = 0; i < nall; i++) { + f[i][0] = 0.0; + f[i][1] = 0.0; + f[i][2] = 0.0; + } + + if (torqueflag) { + double **torque = atom->torque; + for (i = 0; i < nall; i++) { + torque[i][0] = 0.0; + torque[i][1] = 0.0; + torque[i][2] = 0.0; + } + } + + if (erforceflag) { + double *erforce = atom->erforce; + for (i = 0; i < nall; i++) erforce[i] = 0.0; + } + + if (e_flag) { + double *de = atom->de; + for (i = 0; i < nall; i++) de[i] = 0.0; + } + + if (rho_flag) { + double *drho = atom->drho; + for (i = 0; i < nall; i++) drho[i] = 0.0; + } + + if (force->newton) { + nall = atom->nlocal + atom->nghost; + + for (i = atom->nlocal; i < nall; i++) { + f[i][0] = 0.0; + f[i][1] = 0.0; + f[i][2] = 0.0; + } + + if (torqueflag) { + double **torque = atom->torque; + for (i = atom->nlocal; i < nall; i++) { + torque[i][0] = 0.0; + torque[i][1] = 0.0; + torque[i][2] = 0.0; + } + } + + if (erforceflag) { + double *erforce = atom->erforce; + for (i = atom->nlocal; i < nall; i++) erforce[i] = 0.0; + } + + if (e_flag) { + double *de = atom->de; + for (i = 0; i < nall; i++) de[i] = 0.0; + } + + if (rho_flag) { + double *drho = atom->drho; + for (i = 0; i < nall; i++) drho[i] = 0.0; + } + } + } +} diff --git a/src/USER-INTEL/verlet_intel.h b/src/USER-INTEL/verlet_intel.h new file mode 100644 index 000000000..de4231431 --- /dev/null +++ b/src/USER-INTEL/verlet_intel.h @@ -0,0 +1,68 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef INTEGRATE_CLASS + +IntegrateStyle(verlet/intel,VerletIntel) + +#else + +#ifndef LMP_VERLET_INTEL_H +#define LMP_VERLET_INTEL_H + +#include "integrate.h" +#ifdef LMP_INTEL_OFFLOAD +#include "fix_intel.h" +#endif + +namespace LAMMPS_NS { + +class VerletIntel : public Integrate { + public: + VerletIntel(class LAMMPS *, int, char **); + virtual ~VerletIntel() {} + virtual void init(); + virtual void setup(); + virtual void setup_minimal(int); + virtual void run(int); + void cleanup(); + + protected: + int triclinic; // 0 if domain is orthog, 1 if triclinic + int torqueflag,erforceflag; + int e_flag,rho_flag; + + virtual void force_clear(); + #ifdef _LMP_INTEL_OFFLOAD + FixIntel *fix_intel; + int sync_mode; + #endif +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +W: No fixes defined, atoms won't move + +If you are not using a fix like nve, nvt, npt then atom velocities and +coordinates will not be updated during timestepping. + +E: Cannot currently get per-atom virials with intel package. + +The Intel package does not yet support per-atom virial calculation. + +*/ diff --git a/src/USER-INTEL/verlet_split_intel.cpp b/src/USER-INTEL/verlet_split_intel.cpp new file mode 100644 index 000000000..3976607b1 --- /dev/null +++ b/src/USER-INTEL/verlet_split_intel.cpp @@ -0,0 +1,589 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing authors: Yuxing Peng and Chris Knight (U Chicago) +------------------------------------------------------------------------- */ + +#include "string.h" +#include "verlet_split_intel.h" +#include "universe.h" +#include "neighbor.h" +#include "domain.h" +#include "comm.h" +#include "atom.h" +#include "atom_vec.h" +#include "force.h" +#include "pair.h" +#include "bond.h" +#include "angle.h" +#include "dihedral.h" +#include "improper.h" +#include "kspace.h" +#include "output.h" +#include "update.h" +#include "fix.h" +#include "modify.h" +#include "timer.h" +#include "memory.h" +#include "error.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +VerletSplitIntel::VerletSplitIntel(LAMMPS *lmp, int narg, char **arg) : + VerletIntel(lmp, narg, arg) +{ + // error checks on partitions + + if (universe->nworlds != 2) + error->universe_all(FLERR,"Verlet/split requires 2 partitions"); + if (universe->procs_per_world[0] % universe->procs_per_world[1]) + error->universe_all(FLERR,"Verlet/split requires Rspace partition " + "size be multiple of Kspace partition size"); + + // master = 1 for Rspace procs, 0 for Kspace procs + + if (universe->iworld == 0) master = 1; + else master = 0; + + ratio = universe->procs_per_world[0] / universe->procs_per_world[1]; + + // Kspace root proc broadcasts info about Kspace proc layout to Rspace procs + + int kspace_procgrid[3]; + + if (universe->me == universe->root_proc[1]) { + kspace_procgrid[0] = comm->procgrid[0]; + kspace_procgrid[1] = comm->procgrid[1]; + kspace_procgrid[2] = comm->procgrid[2]; + } + MPI_Bcast(kspace_procgrid,3,MPI_INT,universe->root_proc[1],universe->uworld); + + int ***kspace_grid2proc; + memory->create(kspace_grid2proc,kspace_procgrid[0], + kspace_procgrid[1],kspace_procgrid[2], + "verlet/split:kspace_grid2proc"); + + if (universe->me == universe->root_proc[1]) { + for (int i = 0; i < comm->procgrid[0]; i++) + for (int j = 0; j < comm->procgrid[1]; j++) + for (int k = 0; k < comm->procgrid[2]; k++) + kspace_grid2proc[i][j][k] = comm->grid2proc[i][j][k]; + } + MPI_Bcast(&kspace_grid2proc[0][0][0], + kspace_procgrid[0]*kspace_procgrid[1]*kspace_procgrid[2],MPI_INT, + universe->root_proc[1],universe->uworld); + + // Rspace partition must be multiple of Kspace partition in each dim + // so atoms of one Kspace proc coincide with atoms of several Rspace procs + + if (master) { + int flag = 0; + if (comm->procgrid[0] % kspace_procgrid[0]) flag = 1; + if (comm->procgrid[1] % kspace_procgrid[1]) flag = 1; + if (comm->procgrid[2] % kspace_procgrid[2]) flag = 1; + if (flag) + error->one(FLERR, + "Verlet/split requires Rspace partition layout be " + "multiple of Kspace partition layout in each dim"); + } + + // block = 1 Kspace proc with set of Rspace procs it overlays + // me_block = 0 for Kspace proc + // me_block = 1 to ratio for Rspace procs + // block = MPI communicator for that set of procs + + int iblock,key; + + if (!master) { + iblock = comm->me; + key = 0; + } else { + int kpx = comm->myloc[0] / (comm->procgrid[0]/kspace_procgrid[0]); + int kpy = comm->myloc[1] / (comm->procgrid[1]/kspace_procgrid[1]); + int kpz = comm->myloc[2] / (comm->procgrid[2]/kspace_procgrid[2]); + iblock = kspace_grid2proc[kpx][kpy][kpz]; + key = 1; + } + + MPI_Comm_split(universe->uworld,iblock,key,&block); + MPI_Comm_rank(block,&me_block); + + // output block groupings to universe screen/logfile + // bmap is ordered by block and then by proc within block + + int *bmap = new int[universe->nprocs]; + for (int i = 0; i < universe->nprocs; i++) bmap[i] = -1; + bmap[iblock*(ratio+1)+me_block] = universe->me; + + int *bmapall = new int[universe->nprocs]; + MPI_Allreduce(bmap,bmapall,universe->nprocs,MPI_INT,MPI_MAX,universe->uworld); + + if (universe->me == 0) { + if (universe->uscreen) { + fprintf(universe->uscreen, + "Per-block Rspace/Kspace proc IDs (original proc IDs):\n"); + int m = 0; + for (int i = 0; i < universe->nprocs/(ratio+1); i++) { + fprintf(universe->uscreen," block %d:",i); + int kspace_proc = bmapall[m]; + for (int j = 1; j <= ratio; j++) + fprintf(universe->uscreen," %d",bmapall[m+j]); + fprintf(universe->uscreen," %d",kspace_proc); + kspace_proc = bmapall[m]; + for (int j = 1; j <= ratio; j++) { + if (j == 1) fprintf(universe->uscreen," ("); + else fprintf(universe->uscreen," "); + fprintf(universe->uscreen,"%d", + universe->uni2orig[bmapall[m+j]]); + } + fprintf(universe->uscreen," %d)\n",universe->uni2orig[kspace_proc]); + m += ratio + 1; + } + } + if (universe->ulogfile) { + fprintf(universe->ulogfile, + "Per-block Rspace/Kspace proc IDs (original proc IDs):\n"); + int m = 0; + for (int i = 0; i < universe->nprocs/(ratio+1); i++) { + fprintf(universe->ulogfile," block %d:",i); + int kspace_proc = bmapall[m]; + for (int j = 1; j <= ratio; j++) + fprintf(universe->ulogfile," %d",bmapall[m+j]); + + fprintf(universe->ulogfile," %d",kspace_proc); + kspace_proc = bmapall[m]; + for (int j = 1; j <= ratio; j++) { + if (j == 1) fprintf(universe->ulogfile," ("); + else fprintf(universe->ulogfile," "); + fprintf(universe->ulogfile,"%d", + universe->uni2orig[bmapall[m+j]]); + } + fprintf(universe->ulogfile," %d)\n",universe->uni2orig[kspace_proc]); + m += ratio + 1; + } + } + } + + memory->destroy(kspace_grid2proc); + delete [] bmap; + delete [] bmapall; + + // size/disp = vectors for MPI gather/scatter within block + + qsize = new int[ratio+1]; + qdisp = new int[ratio+1]; + xsize = new int[ratio+1]; + xdisp = new int[ratio+1]; + + // f_kspace = Rspace copy of Kspace forces + // allocate dummy version for Kspace partition + + maxatom = 0; + f_kspace = NULL; + if (!master) memory->create(f_kspace,1,1,"verlet/split:f_kspace"); +} + +/* ---------------------------------------------------------------------- */ + +VerletSplitIntel::~VerletSplitIntel() +{ + delete [] qsize; + delete [] qdisp; + delete [] xsize; + delete [] xdisp; + memory->destroy(f_kspace); + MPI_Comm_free(&block); +} + +/* ---------------------------------------------------------------------- + initialization before run +------------------------------------------------------------------------- */ + +void VerletSplitIntel::init() +{ + if (!force->kspace && comm->me == 0) + error->warning(FLERR,"No Kspace calculation with verlet/split"); + + if (force->kspace_match("tip4p",0)) tip4p_flag = 1; + else tip4p_flag = 0; + + // currently TIP4P does not work with verlet/split, so generate error + // see Axel email on this, also other TIP4P notes below + + if (tip4p_flag) error->all(FLERR,"Verlet/split does not yet support TIP4P"); + + VerletIntel::init(); +} + +/* ---------------------------------------------------------------------- + setup before run + servant partition only sets up KSpace calculation +------------------------------------------------------------------------- */ + +void VerletSplitIntel::setup() +{ + if (comm->me == 0 && screen) fprintf(screen,"Setting up run ...\n"); + + if (!master) force->kspace->setup(); + else { + VerletIntel::setup(); + } +} + +/* ---------------------------------------------------------------------- + setup without output + flag = 0 = just force calculation + flag = 1 = reneighbor and force calculation + servant partition only sets up KSpace calculation +------------------------------------------------------------------------- */ + +void VerletSplitIntel::setup_minimal(int flag) +{ + if (!master) force->kspace->setup(); + else { + VerletIntel::setup_minimal(flag); + } +} + +/* ---------------------------------------------------------------------- + run for N steps + master partition does everything but Kspace + servant partition does just Kspace + communicate back and forth every step: + atom coords from master -> servant + kspace forces from servant -> master + also box bounds from master -> servant if necessary +------------------------------------------------------------------------- */ + +void VerletSplitIntel::run(int n) +{ + bigint ntimestep; + int nflag,sortflag; + + // sync both partitions before start timer + + MPI_Barrier(universe->uworld); + timer->init(); + timer->barrier_start(TIME_LOOP); + + // setup initial Rspace <-> Kspace comm params + + rk_setup(); + + // check if OpenMP support fix defined + + Fix *fix_omp; + int ifix = modify->find_fix("package_omp"); + if (ifix < 0) fix_omp = NULL; + else fix_omp = modify->fix[ifix]; + + // flags for timestepping iterations + + int n_post_integrate = modify->n_post_integrate; + int n_pre_exchange = modify->n_pre_exchange; + int n_pre_neighbor = modify->n_pre_neighbor; + int n_pre_force = modify->n_pre_force; + int n_post_force = modify->n_post_force; + int n_end_of_step = modify->n_end_of_step; + + if (atom->sortfreq > 0) sortflag = 1; + else sortflag = 0; + + for (int i = 0; i < n; i++) { + + ntimestep = ++update->ntimestep; + ev_set(ntimestep); + + // initial time integration + + if (master) { + modify->initial_integrate(vflag); + if (n_post_integrate) modify->post_integrate(); + } + + // regular communication vs neighbor list rebuild + + if (master) nflag = neighbor->decide(); + MPI_Bcast(&nflag,1,MPI_INT,1,block); + + if (master) { + if (nflag == 0) { + timer->stamp(); + comm->forward_comm(); + timer->stamp(TIME_COMM); + } else { + if (n_pre_exchange) modify->pre_exchange(); + if (triclinic) domain->x2lamda(atom->nlocal); + domain->pbc(); + if (domain->box_change) { + domain->reset_box(); + comm->setup(); + if (neighbor->style) neighbor->setup_bins(); + } + timer->stamp(); + comm->exchange(); + if (sortflag && ntimestep >= atom->nextsort) atom->sort(); + comm->borders(); + if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost); + timer->stamp(TIME_COMM); + if (n_pre_neighbor) modify->pre_neighbor(); + neighbor->build(); + timer->stamp(TIME_NEIGHBOR); + } + } + + // if reneighboring occurred, re-setup Rspace <-> Kspace comm params + // comm Rspace atom coords to Kspace procs + + if (nflag) rk_setup(); + r2k_comm(); + + // force computations + + force_clear(); + + if (master) { + if (n_pre_force) modify->pre_force(vflag); + + timer->stamp(); + if (force->pair) { + force->pair->compute(eflag,vflag); + timer->stamp(TIME_PAIR); + } + + if (atom->molecular) { + if (force->bond) force->bond->compute(eflag,vflag); + if (force->angle) force->angle->compute(eflag,vflag); + if (force->dihedral) force->dihedral->compute(eflag,vflag); + if (force->improper) force->improper->compute(eflag,vflag); + timer->stamp(TIME_BOND); + } + + #ifdef _LMP_INTEL_OFFLOAD + if (sync_mode == 1) { + fix_intel->sync_coprocessor(); + timer->stamp(TIME_PAIR); + } + #endif + + if (force->newton) { + comm->reverse_comm(); + timer->stamp(TIME_COMM); + } + + #ifdef _LMP_INTEL_OFFLOAD + if (sync_mode == 2) { + fix_intel->sync_coprocessor(); + timer->stamp(TIME_PAIR); + } + #endif + + } else { + + // run FixOMP as sole pre_force fix, if defined + + if (fix_omp) fix_omp->pre_force(vflag); + + if (force->kspace) { + timer->stamp(); + force->kspace->compute(eflag,vflag); + timer->stamp(TIME_KSPACE); + } + + // TIP4P PPPM puts forces on ghost atoms, so must reverse_comm() + + if (tip4p_flag && force->newton) { + comm->reverse_comm(); + timer->stamp(TIME_COMM); + } + } + + // comm and sum Kspace forces back to Rspace procs + + k2r_comm(); + + // force modifications, final time integration, diagnostics + // all output + + if (master) { + if (n_post_force) modify->post_force(vflag); + modify->final_integrate(); + if (n_end_of_step) modify->end_of_step(); + + if (ntimestep == output->next) { + timer->stamp(); + output->write(ntimestep); + timer->stamp(TIME_OUTPUT); + } + } + } +} + +/* ---------------------------------------------------------------------- + setup params for Rspace <-> Kspace communication + called initially and after every reneighbor + also communcicate atom charges from Rspace to KSpace since static +------------------------------------------------------------------------- */ + +void VerletSplitIntel::rk_setup() +{ + // grow f_kspace array on master procs if necessary + + if (master) { + if (atom->nlocal > maxatom) { + memory->destroy(f_kspace); + maxatom = atom->nmax; + memory->create(f_kspace,maxatom,3,"verlet/split:f_kspace"); + } + } + + // qsize = # of atoms owned by each master proc in block + + int n = 0; + if (master) n = atom->nlocal; + MPI_Gather(&n,1,MPI_INT,qsize,1,MPI_INT,0,block); + + // setup qdisp, xsize, xdisp based on qsize + // only needed by Kspace proc + // set Kspace nlocal to sum of Rspace nlocals + // insure Kspace atom arrays are large enough + + if (!master) { + qsize[0] = qdisp[0] = xsize[0] = xdisp[0] = 0; + for (int i = 1; i <= ratio; i++) { + qdisp[i] = qdisp[i-1]+qsize[i-1]; + xsize[i] = 3*qsize[i]; + xdisp[i] = xdisp[i-1]+xsize[i-1]; + } + + atom->nlocal = qdisp[ratio] + qsize[ratio]; + while (atom->nmax <= atom->nlocal) atom->avec->grow(0); + atom->nghost = 0; + } + + // one-time gather of Rspace atom charges to Kspace proc + + MPI_Gatherv(atom->q,n,MPI_DOUBLE,atom->q,qsize,qdisp,MPI_DOUBLE,0,block); + + // for TIP4P also need to send atom type and tag + // KSpace procs need to acquire ghost atoms and map all their atoms + // map_clear() call is in lieu of comm->exchange() which performs map_clear + // borders() call acquires ghost atoms and maps them + // NOTE: do atom coords need to be communicated here before borders() call? + // could do this by calling r2k_comm() here and not again from run() + // except that forward_comm() in r2k_comm() is wrong + + if (tip4p_flag) { + //r2k_comm(); + MPI_Gatherv(atom->type,n,MPI_INT,atom->type,qsize,qdisp,MPI_INT,0,block); + MPI_Gatherv(atom->tag,n,MPI_LMP_TAGINT, + atom->tag,qsize,qdisp,MPI_LMP_TAGINT,0,block); + if (!master) { + if (triclinic) domain->x2lamda(atom->nlocal); + if (domain->box_change) comm->setup(); + timer->stamp(); + atom->map_clear(); + comm->borders(); + if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost); + timer->stamp(TIME_COMM); + } + } +} + +/* ---------------------------------------------------------------------- + communicate Rspace atom coords to Kspace + also eflag,vflag and box bounds if needed +------------------------------------------------------------------------- */ + +void VerletSplitIntel::r2k_comm() +{ + MPI_Status status; + + int n = 0; + if (master) n = atom->nlocal; + MPI_Gatherv(atom->x[0],n*3,MPI_DOUBLE,atom->x[0],xsize,xdisp, + MPI_DOUBLE,0,block); + + // send eflag,vflag from Rspace to Kspace + + if (me_block == 1) { + int flags[2]; + flags[0] = eflag; flags[1] = vflag; + MPI_Send(flags,2,MPI_INT,0,0,block); + } else if (!master) { + int flags[2]; + MPI_Recv(flags,2,MPI_DOUBLE,1,0,block,&status); + eflag = flags[0]; vflag = flags[1]; + } + + // send box bounds from Rspace to Kspace if simulation box is dynamic + + if (domain->box_change) { + if (me_block == 1) { + MPI_Send(domain->boxlo,3,MPI_DOUBLE,0,0,block); + MPI_Send(domain->boxhi,3,MPI_DOUBLE,0,0,block); + } else if (!master) { + MPI_Recv(domain->boxlo,3,MPI_DOUBLE,1,0,block,&status); + MPI_Recv(domain->boxhi,3,MPI_DOUBLE,1,0,block,&status); + domain->set_global_box(); + domain->set_local_box(); + force->kspace->setup(); + } + } + + // for TIP4P, Kspace partition needs to update its ghost atoms + + if (tip4p_flag && !master) { + timer->stamp(); + comm->forward_comm(); + timer->stamp(TIME_COMM); + } +} + +/* ---------------------------------------------------------------------- + communicate and sum Kspace atom forces back to Rspace +------------------------------------------------------------------------- */ + +void VerletSplitIntel::k2r_comm() +{ + if (eflag) MPI_Bcast(&force->kspace->energy,1,MPI_DOUBLE,0,block); + if (vflag) MPI_Bcast(force->kspace->virial,6,MPI_DOUBLE,0,block); + + int n = 0; + if (master) n = atom->nlocal; + MPI_Scatterv(atom->f[0],xsize,xdisp,MPI_DOUBLE, + f_kspace[0],n*3,MPI_DOUBLE,0,block); + + if (master) { + double **f = atom->f; + int nlocal = atom->nlocal; + for (int i = 0; i < nlocal; i++) { + f[i][0] += f_kspace[i][0]; + f[i][1] += f_kspace[i][1]; + f[i][2] += f_kspace[i][2]; + } + } +} + +/* ---------------------------------------------------------------------- + memory usage of Kspace force array on master procs +------------------------------------------------------------------------- */ + +bigint VerletSplitIntel::memory_usage() +{ + bigint bytes = maxatom*3 * sizeof(double); + return bytes; +} diff --git a/src/USER-INTEL/verlet_split_intel.h b/src/USER-INTEL/verlet_split_intel.h new file mode 100644 index 000000000..3f81d41a9 --- /dev/null +++ b/src/USER-INTEL/verlet_split_intel.h @@ -0,0 +1,89 @@ +/* ------------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef INTEGRATE_CLASS + +IntegrateStyle(verlet/split/intel,VerletSplitIntel) + +#else + +#ifndef LMP_VERLET_SPLIT_INTEL_H +#define LMP_VERLET_SPLIT_INTEL_H + +#include "verlet_intel.h" +#ifdef LMP_INTEL_OFFLOAD +#include "fix_intel.h" +#endif + +namespace LAMMPS_NS { + +class VerletSplitIntel : public VerletIntel { + public: + VerletSplitIntel(class LAMMPS *, int, char **); + ~VerletSplitIntel(); + void init(); + void setup(); + void setup_minimal(int); + void run(int); + bigint memory_usage(); + + private: + int master; // 1 if an Rspace proc, 0 if Kspace + int me_block; // proc ID within Rspace/Kspace block + int ratio; // ratio of Rspace procs to Kspace procs + int *qsize,*qdisp,*xsize,*xdisp; // MPI gather/scatter params for block comm + MPI_Comm block; // communicator within one block + int tip4p_flag; // 1 if PPPM/tip4p so do extra comm + + double **f_kspace; // copy of Kspace forces on Rspace procs + int maxatom; + + void rk_setup(); + void r2k_comm(); + void k2r_comm(); +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: Verlet/split requires 2 partitions + +See the -partition command-line switch. + +E: Verlet/split requires Rspace partition size be multiple of Kspace partition size + +This is so there is an equal number of Rspace processors for every +Kspace processor. + +E: Verlet/split requires Rspace partition layout be multiple of Kspace partition layout in each dim + +This is controlled by the processors command. + +W: No Kspace calculation with verlet/split + +The 2nd partition performs a kspace calculation so the kspace_style +command must be used. + +E: Verlet/split does not yet support TIP4P + +This is a current limitation. + +E: Cannot currently get per-atom virials with Intel package. + +The Intel package does not yet support per-atom virial calculation. + +*/ diff --git a/src/USER-OMP/pair_gran_hooke_history_omp.cpp b/src/USER-OMP/pair_gran_hooke_history_omp.cpp index 57827adf2..afe287c60 100644 --- a/src/USER-OMP/pair_gran_hooke_history_omp.cpp +++ b/src/USER-OMP/pair_gran_hooke_history_omp.cpp @@ -1,329 +1,326 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ #include "math.h" #include "pair_gran_hooke_history_omp.h" #include "atom.h" #include "comm.h" #include "fix.h" #include "force.h" #include "memory.h" #include "neighbor.h" #include "neigh_list.h" #include "update.h" #include "string.h" #include "suffix.h" using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairGranHookeHistoryOMP::PairGranHookeHistoryOMP(LAMMPS *lmp) : PairGranHookeHistory(lmp), ThrOMP(lmp, THR_PAIR) { suffix_flag |= Suffix::OMP; respa_enable = 0; - // trigger use of OpenMP version of FixShearHistory - suffix = new char[4]; - memcpy(suffix,"omp",4); } /* ---------------------------------------------------------------------- */ void PairGranHookeHistoryOMP::compute(int eflag, int vflag) { if (eflag || vflag) { ev_setup(eflag,vflag); } else evflag = vflag_fdotr = 0; computeflag = 1; const int shearupdate = (update->setupflag) ? 0 : 1; // update rigid body info for owned & ghost atoms if using FixRigid masses // body[i] = which body atom I is in, -1 if none // mass_body = mass of each rigid body if (fix_rigid && neighbor->ago == 0) { int tmp; int *body = (int *) fix_rigid->extract("body",tmp); double *mass_body = (double *) fix_rigid->extract("masstotal",tmp); if (atom->nmax > nmax) { memory->destroy(mass_rigid); nmax = atom->nmax; memory->create(mass_rigid,nmax,"pair:mass_rigid"); } int nlocal = atom->nlocal; for (int i = 0; i < nlocal; i++) if (body[i] >= 0) mass_rigid[i] = mass_body[body[i]]; else mass_rigid[i] = 0.0; comm->forward_comm_pair(this); } const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; const int inum = list->inum; #if defined(_OPENMP) #pragma omp parallel default(none) shared(eflag,vflag) #endif { int ifrom, ito, tid; loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (evflag) if (shearupdate) eval<1,1>(ifrom, ito, thr); else eval<1,0>(ifrom, ito, thr); else if (shearupdate) eval<0,1>(ifrom, ito, thr); else eval<0,0>(ifrom, ito, thr); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } template void PairGranHookeHistoryOMP::eval(int iifrom, int iito, ThrData * const thr) { int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,fx,fy,fz; double myshear[3]; double radi,radj,radsum,rsq,r,rinv,rsqinv; double vr1,vr2,vr3,vnnr,vn1,vn2,vn3,vt1,vt2,vt3; double wr1,wr2,wr3; double vtr1,vtr2,vtr3,vrel; double mi,mj,meff,damp,ccel,tor1,tor2,tor3; double fn,fs,fs1,fs2,fs3; double shrmag,rsht; int *ilist,*jlist,*numneigh,**firstneigh; int *touch,**firsttouch; double *allshear,**firstshear; const double * const * const x = atom->x; const double * const * const v = atom->v; const double * const * const omega = atom->omega; const double * const radius = atom->radius; const double * const rmass = atom->rmass; const double * const mass = atom->mass; double * const * const f = thr->get_f(); double * const * const torque = thr->get_torque(); const int * const type = atom->type; const int * const mask = atom->mask; const int nlocal = atom->nlocal; double fxtmp,fytmp,fztmp; double t1tmp,t2tmp,t3tmp; ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; firsttouch = listgranhistory->firstneigh; firstshear = listgranhistory->firstdouble; // loop over neighbors of my atoms for (ii = iifrom; ii < iito; ++ii) { i = ilist[ii]; xtmp = x[i][0]; ytmp = x[i][1]; ztmp = x[i][2]; radi = radius[i]; touch = firsttouch[i]; allshear = firstshear[i]; jlist = firstneigh[i]; jnum = numneigh[i]; fxtmp=fytmp=fztmp=t1tmp=t2tmp=t3tmp=0.0; for (jj = 0; jj < jnum; jj++) { j = jlist[jj]; j &= NEIGHMASK; delx = xtmp - x[j][0]; dely = ytmp - x[j][1]; delz = ztmp - x[j][2]; rsq = delx*delx + dely*dely + delz*delz; radj = radius[j]; radsum = radi + radj; if (rsq >= radsum*radsum) { // unset non-touching neighbors touch[jj] = 0; myshear[0] = 0.0; myshear[1] = 0.0; myshear[2] = 0.0; } else { r = sqrt(rsq); rinv = 1.0/r; rsqinv = 1.0/rsq; // relative translational velocity vr1 = v[i][0] - v[j][0]; vr2 = v[i][1] - v[j][1]; vr3 = v[i][2] - v[j][2]; // normal component vnnr = vr1*delx + vr2*dely + vr3*delz; vn1 = delx*vnnr * rsqinv; vn2 = dely*vnnr * rsqinv; vn3 = delz*vnnr * rsqinv; // tangential component vt1 = vr1 - vn1; vt2 = vr2 - vn2; vt3 = vr3 - vn3; // relative rotational velocity wr1 = (radi*omega[i][0] + radj*omega[j][0]) * rinv; wr2 = (radi*omega[i][1] + radj*omega[j][1]) * rinv; wr3 = (radi*omega[i][2] + radj*omega[j][2]) * rinv; // meff = effective mass of pair of particles // if I or J part of rigid body, use body mass // if I or J is frozen, meff is other particle if (rmass) { mi = rmass[i]; mj = rmass[j]; } else { mi = mass[type[i]]; mj = mass[type[j]]; } if (fix_rigid) { if (mass_rigid[i] > 0.0) mi = mass_rigid[i]; if (mass_rigid[j] > 0.0) mj = mass_rigid[j]; } meff = mi*mj / (mi+mj); if (mask[i] & freeze_group_bit) meff = mj; if (mask[j] & freeze_group_bit) meff = mi; // normal forces = Hookian contact + normal velocity damping damp = meff*gamman*vnnr*rsqinv; ccel = kn*(radsum-r)*rinv - damp; // relative velocities vtr1 = vt1 - (delz*wr2-dely*wr3); vtr2 = vt2 - (delx*wr3-delz*wr1); vtr3 = vt3 - (dely*wr1-delx*wr2); vrel = vtr1*vtr1 + vtr2*vtr2 + vtr3*vtr3; vrel = sqrt(vrel); // shear history effects touch[jj] = 1; memcpy(myshear,allshear + 3*jj, 3*sizeof(double)); if (SHEARUPDATE) { myshear[0] += vtr1*dt; myshear[1] += vtr2*dt; myshear[2] += vtr3*dt; } shrmag = sqrt(myshear[0]*myshear[0] + myshear[1]*myshear[1] + myshear[2]*myshear[2]); // rotate shear displacements rsht = myshear[0]*delx + myshear[1]*dely + myshear[2]*delz; rsht *= rsqinv; if (SHEARUPDATE) { myshear[0] -= rsht*delx; myshear[1] -= rsht*dely; myshear[2] -= rsht*delz; } // tangential forces = shear + tangential velocity damping fs1 = - (kt*myshear[0] + meff*gammat*vtr1); fs2 = - (kt*myshear[1] + meff*gammat*vtr2); fs3 = - (kt*myshear[2] + meff*gammat*vtr3); // rescale frictional displacements and forces if needed fs = sqrt(fs1*fs1 + fs2*fs2 + fs3*fs3); fn = xmu * fabs(ccel*r); if (fs > fn) { if (shrmag != 0.0) { const double fnfs = fn/fs; const double mgkt = meff*gammat/kt; myshear[0] = fnfs * (myshear[0] + mgkt*vtr1) - mgkt*vtr1; myshear[1] = fnfs * (myshear[1] + mgkt*vtr2) - mgkt*vtr2; myshear[2] = fnfs * (myshear[2] + mgkt*vtr3) - mgkt*vtr3; fs1 *= fnfs; fs2 *= fnfs; fs3 *= fnfs; } else fs1 = fs2 = fs3 = 0.0; } // forces & torques fx = delx*ccel + fs1; fy = dely*ccel + fs2; fz = delz*ccel + fs3; fxtmp += fx; fytmp += fy; fztmp += fz; tor1 = rinv * (dely*fs3 - delz*fs2); tor2 = rinv * (delz*fs1 - delx*fs3); tor3 = rinv * (delx*fs2 - dely*fs1); t1tmp -= radi*tor1; t2tmp -= radi*tor2; t3tmp -= radi*tor3; if (j < nlocal) { f[j][0] -= fx; f[j][1] -= fy; f[j][2] -= fz; torque[j][0] -= radj*tor1; torque[j][1] -= radj*tor2; torque[j][2] -= radj*tor3; } if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,/* newton_pair */ 0, 0.0,0.0,fx,fy,fz,delx,dely,delz,thr); } memcpy(allshear + 3*jj, myshear, 3*sizeof(double)); } f[i][0] += fxtmp; f[i][1] += fytmp; f[i][2] += fztmp; torque[i][0] += t1tmp; torque[i][1] += t2tmp; torque[i][2] += t3tmp; } } /* ---------------------------------------------------------------------- */ double PairGranHookeHistoryOMP::memory_usage() { double bytes = memory_usage_thr(); bytes += PairGranHookeHistory::memory_usage(); return bytes; } diff --git a/src/angle_hybrid.cpp b/src/angle_hybrid.cpp index 6f1cceba0..1780c4344 100644 --- a/src/angle_hybrid.cpp +++ b/src/angle_hybrid.cpp @@ -1,373 +1,374 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ #include "math.h" #include "string.h" #include "ctype.h" #include "angle_hybrid.h" #include "atom.h" #include "neighbor.h" #include "domain.h" #include "comm.h" #include "force.h" #include "memory.h" #include "error.h" using namespace LAMMPS_NS; #define EXTRA 1000 /* ---------------------------------------------------------------------- */ AngleHybrid::AngleHybrid(LAMMPS *lmp) : Angle(lmp) { writedata = 0; nstyles = 0; } /* ---------------------------------------------------------------------- */ AngleHybrid::~AngleHybrid() { if (nstyles) { for (int i = 0; i < nstyles; i++) delete styles[i]; delete [] styles; for (int i = 0; i < nstyles; i++) delete [] keywords[i]; delete [] keywords; } if (allocated) { memory->destroy(setflag); memory->destroy(map); delete [] nanglelist; delete [] maxangle; for (int i = 0; i < nstyles; i++) memory->destroy(anglelist[i]); delete [] anglelist; } } /* ---------------------------------------------------------------------- */ void AngleHybrid::compute(int eflag, int vflag) { int i,j,m,n; // save ptrs to original anglelist int nanglelist_orig = neighbor->nanglelist; int **anglelist_orig = neighbor->anglelist; // if this is re-neighbor step, create sub-style anglelists // nanglelist[] = length of each sub-style list // realloc sub-style anglelist if necessary // load sub-style anglelist with 4 values from original anglelist if (neighbor->ago == 0) { for (m = 0; m < nstyles; m++) nanglelist[m] = 0; for (i = 0; i < nanglelist_orig; i++) { m = map[anglelist_orig[i][3]]; if (m >= 0) nanglelist[m]++; } for (m = 0; m < nstyles; m++) { if (nanglelist[m] > maxangle[m]) { memory->destroy(anglelist[m]); maxangle[m] = nanglelist[m] + EXTRA; memory->create(anglelist[m],maxangle[m],4,"angle_hybrid:anglelist"); } nanglelist[m] = 0; } for (i = 0; i < nanglelist_orig; i++) { m = map[anglelist_orig[i][3]]; if (m < 0) continue; n = nanglelist[m]; anglelist[m][n][0] = anglelist_orig[i][0]; anglelist[m][n][1] = anglelist_orig[i][1]; anglelist[m][n][2] = anglelist_orig[i][2]; anglelist[m][n][3] = anglelist_orig[i][3]; nanglelist[m]++; } } // call each sub-style's compute function // set neighbor->anglelist to sub-style anglelist before call // accumulate sub-style global/peratom energy/virial in hybrid if (eflag || vflag) ev_setup(eflag,vflag); else evflag = 0; for (m = 0; m < nstyles; m++) { neighbor->nanglelist = nanglelist[m]; neighbor->anglelist = anglelist[m]; styles[m]->compute(eflag,vflag); if (eflag_global) energy += styles[m]->energy; if (vflag_global) for (n = 0; n < 6; n++) virial[n] += styles[m]->virial[n]; if (eflag_atom) { n = atom->nlocal; if (force->newton_bond) n += atom->nghost; double *eatom_substyle = styles[m]->eatom; for (i = 0; i < n; i++) eatom[i] += eatom_substyle[i]; } if (vflag_atom) { n = atom->nlocal; if (force->newton_bond) n += atom->nghost; double **vatom_substyle = styles[m]->vatom; for (i = 0; i < n; i++) for (j = 0; j < 6; j++) vatom[i][j] += vatom_substyle[i][j]; } } // restore ptrs to original anglelist neighbor->nanglelist = nanglelist_orig; neighbor->anglelist = anglelist_orig; } /* ---------------------------------------------------------------------- */ void AngleHybrid::allocate() { allocated = 1; int n = atom->nangletypes; memory->create(map,n+1,"angle:map"); memory->create(setflag,n+1,"angle:setflag"); for (int i = 1; i <= n; i++) setflag[i] = 0; nanglelist = new int[nstyles]; maxangle = new int[nstyles]; anglelist = new int**[nstyles]; for (int m = 0; m < nstyles; m++) maxangle[m] = 0; for (int m = 0; m < nstyles; m++) anglelist[m] = NULL; } /* ---------------------------------------------------------------------- create one angle style for each arg in list ------------------------------------------------------------------------- */ void AngleHybrid::settings(int narg, char **arg) { int i,m,istyle; if (narg < 1) error->all(FLERR,"Illegal angle_style command"); // delete old lists, since cannot just change settings if (nstyles) { for (int i = 0; i < nstyles; i++) delete styles[i]; delete [] styles; for (int i = 0; i < nstyles; i++) delete [] keywords[i]; delete [] keywords; } if (allocated) { memory->destroy(setflag); memory->destroy(map); delete [] nanglelist; delete [] maxangle; for (int i = 0; i < nstyles; i++) memory->destroy(anglelist[i]); delete [] anglelist; } allocated = 0; // count sub-styles by skipping numeric args // one exception is 1st arg of style "table", which is non-numeric word // need a better way to skip these exceptions nstyles = 0; i = 0; while (i < narg) { if (strcmp(arg[i],"table") == 0) i++; i++; while (i < narg && !isalpha(arg[i][0])) i++; nstyles++; } // allocate list of sub-styles styles = new Angle*[nstyles]; keywords = new char*[nstyles]; // allocate each sub-style and call its settings() with subset of args // define subset of args for a sub-style by skipping numeric args // one exception is 1st arg of style "table", which is non-numeric // need a better way to skip these exceptions - int dummy; + int sflag; nstyles = 0; i = 0; while (i < narg) { for (m = 0; m < nstyles; m++) if (strcmp(arg[i],keywords[m]) == 0) error->all(FLERR,"Angle style hybrid cannot use " "same angle style twice"); if (strcmp(arg[i],"hybrid") == 0) error->all(FLERR,"Angle style hybrid cannot have hybrid as an argument"); if (strcmp(arg[i],"none") == 0) error->all(FLERR,"Angle style hybrid cannot have none as an argument"); - styles[nstyles] = force->new_angle(arg[i],lmp->suffix,dummy); - keywords[nstyles] = new char[strlen(arg[i])+1]; - strcpy(keywords[nstyles],arg[i]); + + styles[nstyles] = force->new_angle(arg[i],1,sflag); + force->store_style(keywords[nstyles],arg[i],sflag); + istyle = i; if (strcmp(arg[i],"table") == 0) i++; i++; while (i < narg && !isalpha(arg[i][0])) i++; styles[nstyles]->settings(i-istyle-1,&arg[istyle+1]); nstyles++; } } /* ---------------------------------------------------------------------- set coeffs for one type ---------------------------------------------------------------------- */ void AngleHybrid::coeff(int narg, char **arg) { if (!allocated) allocate(); int ilo,ihi; force->bounds(arg[0],atom->nangletypes,ilo,ihi); // 2nd arg = angle sub-style name // allow for "none" or "skip" as valid sub-style name int m; for (m = 0; m < nstyles; m++) if (strcmp(arg[1],keywords[m]) == 0) break; int none = 0; int skip = 0; if (m == nstyles) { if (strcmp(arg[1],"none") == 0) none = 1; else if (strcmp(arg[1],"skip") == 0) none = skip = 1; else if (strcmp(arg[1],"ba") == 0) error->all(FLERR,"BondAngle coeff for hybrid angle has invalid format"); else if (strcmp(arg[1],"bb") == 0) error->all(FLERR,"BondBond coeff for hybrid angle has invalid format"); else error->all(FLERR,"Angle coeff for hybrid has invalid style"); } // move 1st arg to 2nd arg // just copy ptrs, since arg[] points into original input line arg[1] = arg[0]; // invoke sub-style coeff() starting with 1st arg if (!none) styles[m]->coeff(narg-1,&arg[1]); // set setflag and which type maps to which sub-style // if sub-style is skip: auxiliary class2 setting in data file so ignore // if sub-style is none: set hybrid setflag, wipe out map for (int i = ilo; i <= ihi; i++) { if (skip) continue; else if (none) { setflag[i] = 1; map[i] = -1; } else { setflag[i] = styles[m]->setflag[i]; map[i] = m; } } } /* ---------------------------------------------------------------------- run angle style specific initialization ------------------------------------------------------------------------- */ void AngleHybrid::init_style() { for (int m = 0; m < nstyles; m++) if (styles[m]) styles[m]->init_style(); } /* ---------------------------------------------------------------------- return an equilbrium angle length ------------------------------------------------------------------------- */ double AngleHybrid::equilibrium_angle(int i) { if (map[i] < 0) error->one(FLERR,"Invoked angle equil angle on angle style none"); return styles[map[i]]->equilibrium_angle(i); } /* ---------------------------------------------------------------------- proc 0 writes to restart file ------------------------------------------------------------------------- */ void AngleHybrid::write_restart(FILE *fp) { fwrite(&nstyles,sizeof(int),1,fp); int n; for (int m = 0; m < nstyles; m++) { n = strlen(keywords[m]) + 1; fwrite(&n,sizeof(int),1,fp); fwrite(keywords[m],sizeof(char),n,fp); } } /* ---------------------------------------------------------------------- proc 0 reads from restart file, bcasts ------------------------------------------------------------------------- */ void AngleHybrid::read_restart(FILE *fp) { int me = comm->me; if (me == 0) fread(&nstyles,sizeof(int),1,fp); MPI_Bcast(&nstyles,1,MPI_INT,0,world); styles = new Angle*[nstyles]; keywords = new char*[nstyles]; allocate(); int n,dummy; for (int m = 0; m < nstyles; m++) { if (me == 0) fread(&n,sizeof(int),1,fp); MPI_Bcast(&n,1,MPI_INT,0,world); keywords[m] = new char[n]; if (me == 0) fread(keywords[m],sizeof(char),n,fp); MPI_Bcast(keywords[m],n,MPI_CHAR,0,world); - styles[m] = force->new_angle(keywords[m],lmp->suffix,dummy); + styles[m] = force->new_angle(keywords[m],0,dummy); } } /* ---------------------------------------------------------------------- */ double AngleHybrid::single(int type, int i1, int i2, int i3) { if (map[type] < 0) error->one(FLERR,"Invoked angle single on angle style none"); return styles[map[type]]->single(type,i1,i2,i3); } /* ---------------------------------------------------------------------- memory usage ------------------------------------------------------------------------- */ double AngleHybrid::memory_usage() { double bytes = maxeatom * sizeof(double); bytes += maxvatom*6 * sizeof(double); for (int m = 0; m < nstyles; m++) bytes += maxangle[m]*4 * sizeof(int); for (int m = 0; m < nstyles; m++) if (styles[m]) bytes += styles[m]->memory_usage(); return bytes; } diff --git a/src/atom.cpp b/src/atom.cpp index 7efbf4740..550b959f2 100644 --- a/src/atom.cpp +++ b/src/atom.cpp @@ -1,2003 +1,2018 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ #include "mpi.h" #include "math.h" #include "stdio.h" #include "stdlib.h" #include "string.h" #include "limits.h" #include "atom.h" #include "style_atom.h" #include "atom_vec.h" #include "atom_vec_ellipsoid.h" #include "comm.h" #include "neighbor.h" #include "force.h" #include "modify.h" #include "fix.h" #include "output.h" #include "thermo.h" #include "update.h" #include "domain.h" #include "group.h" #include "molecule.h" #include "accelerator_cuda.h" #include "atom_masks.h" #include "math_const.h" #include "memory.h" #include "error.h" using namespace LAMMPS_NS; using namespace MathConst; #define DELTA 1 #define DELTA_MEMSTR 1024 #define EPSILON 1.0e-6 #define CUDA_CHUNK 3000 #define MAXBODY 20 // max # of lines in one body, also in ReadData class enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED}; // several files /* ---------------------------------------------------------------------- */ Atom::Atom(LAMMPS *lmp) : Pointers(lmp) { natoms = 0; nlocal = nghost = nmax = 0; ntypes = 0; nbondtypes = nangletypes = ndihedraltypes = nimpropertypes = 0; nbonds = nangles = ndihedrals = nimpropers = 0; firstgroupname = NULL; sortfreq = 1000; nextsort = 0; userbinsize = 0.0; maxbin = maxnext = 0; binhead = NULL; next = permute = NULL; // initialize atom arrays // customize by adding new array tag = NULL; type = mask = NULL; image = NULL; x = v = f = NULL; molecule = NULL; molindex = molatom = NULL; q = NULL; mu = NULL; omega = angmom = torque = NULL; radius = rmass = NULL; ellipsoid = line = tri = body = NULL; vfrac = s0 = NULL; x0 = NULL; spin = NULL; eradius = ervel = erforce = NULL; cs = csforce = vforce = ervelforce = NULL; etag = NULL; rho = drho = e = de = cv = NULL; vest = NULL; bond_per_atom = extra_bond_per_atom = 0; num_bond = NULL; bond_type = NULL; bond_atom = NULL; angle_per_atom = extra_angle_per_atom = 0; num_angle = NULL; angle_type = NULL; angle_atom1 = angle_atom2 = angle_atom3 = NULL; dihedral_per_atom = extra_dihedral_per_atom = 0; num_dihedral = NULL; dihedral_type = NULL; dihedral_atom1 = dihedral_atom2 = dihedral_atom3 = dihedral_atom4 = NULL; improper_per_atom = extra_improper_per_atom = 0; num_improper = NULL; improper_type = NULL; improper_atom1 = improper_atom2 = improper_atom3 = improper_atom4 = NULL; maxspecial = 1; nspecial = NULL; special = NULL; // user-defined molecules nmolecule = 0; molecules = NULL; // custom atom arrays nivector = ndvector = 0; ivector = NULL; dvector = NULL; iname = dname = NULL; // initialize atom style and array existence flags // customize by adding new flag sphere_flag = peri_flag = electron_flag = 0; wavepacket_flag = sph_flag = 0; molecule_flag = 0; q_flag = mu_flag = 0; omega_flag = torque_flag = angmom_flag = 0; radius_flag = rmass_flag = 0; ellipsoid_flag = line_flag = tri_flag = body_flag = 0; vfrac_flag = 0; spin_flag = eradius_flag = ervel_flag = erforce_flag = ervelforce_flag = 0; cs_flag = csforce_flag = vforce_flag = etag_flag = 0; rho_flag = e_flag = cv_flag = vest_flag = 0; // Peridynamic scale factor pdscale = 1.0; // ntype-length arrays mass = NULL; mass_setflag = NULL; // callback lists & extra restart info nextra_grow = nextra_restart = nextra_border = 0; extra_grow = extra_restart = extra_border = NULL; nextra_grow_max = nextra_restart_max = nextra_border_max = 0; nextra_store = 0; extra = NULL; // default atom ID and mapping values tag_enable = 1; map_style = map_user = 0; map_tag_max = -1; map_maxarray = map_nhash = -1; max_same = 0; sametag = NULL; map_array = NULL; map_bucket = NULL; map_hash = NULL; atom_style = NULL; avec = NULL; datamask = ALL_MASK; datamask_ext = ALL_MASK; } /* ---------------------------------------------------------------------- */ Atom::~Atom() { delete [] atom_style; delete avec; delete [] firstgroupname; memory->destroy(binhead); memory->destroy(next); memory->destroy(permute); // delete atom arrays // customize by adding new array memory->destroy(tag); memory->destroy(type); memory->destroy(mask); memory->destroy(image); memory->destroy(x); memory->destroy(v); memory->destroy(f); memory->destroy(molecule); memory->destroy(molindex); memory->destroy(molatom); memory->destroy(q); memory->destroy(mu); memory->destroy(omega); memory->destroy(angmom); memory->destroy(torque); memory->destroy(radius); memory->destroy(rmass); memory->destroy(ellipsoid); memory->destroy(line); memory->destroy(tri); memory->destroy(body); memory->destroy(vfrac); memory->destroy(s0); memory->destroy(x0); memory->destroy(spin); memory->destroy(eradius); memory->destroy(ervel); memory->destroy(erforce); memory->destroy(ervelforce); memory->destroy(cs); memory->destroy(csforce); memory->destroy(vforce); memory->destroy(etag); memory->destroy(rho); memory->destroy(drho); memory->destroy(e); memory->destroy(de); memory->destroy(cv); memory->destroy(vest); memory->destroy(nspecial); memory->destroy(special); memory->destroy(num_bond); memory->destroy(bond_type); memory->destroy(bond_atom); memory->destroy(num_angle); memory->destroy(angle_type); memory->destroy(angle_atom1); memory->destroy(angle_atom2); memory->destroy(angle_atom3); memory->destroy(num_dihedral); memory->destroy(dihedral_type); memory->destroy(dihedral_atom1); memory->destroy(dihedral_atom2); memory->destroy(dihedral_atom3); memory->destroy(dihedral_atom4); memory->destroy(num_improper); memory->destroy(improper_type); memory->destroy(improper_atom1); memory->destroy(improper_atom2); memory->destroy(improper_atom3); memory->destroy(improper_atom4); // delete custom atom arrays for (int i = 0; i < nivector; i++) { delete [] iname[i]; memory->destroy(ivector[i]); } for (int i = 0; i < ndvector; i++) { delete [] dname[i]; memory->destroy(dvector[i]); } memory->sfree(iname); memory->sfree(dname); memory->sfree(ivector); memory->sfree(dvector); // delete user-defined molecules for (int i = 0; i < nmolecule; i++) delete molecules[i]; memory->sfree(molecules); // delete per-type arrays delete [] mass; delete [] mass_setflag; // delete extra arrays memory->destroy(extra_grow); memory->destroy(extra_restart); memory->destroy(extra_border); memory->destroy(extra); // delete mapping data structures map_delete(); } /* ---------------------------------------------------------------------- copy modify settings from old Atom class to current Atom class ------------------------------------------------------------------------- */ void Atom::settings(Atom *old) { tag_enable = old->tag_enable; map_user = old->map_user; map_style = old->map_style; sortfreq = old->sortfreq; userbinsize = old->userbinsize; if (old->firstgroupname) { int n = strlen(old->firstgroupname) + 1; firstgroupname = new char[n]; strcpy(firstgroupname,old->firstgroupname); } } /* ---------------------------------------------------------------------- create an AtomVec style called from lammps.cpp, input script, restart file, replicate ------------------------------------------------------------------------- */ -void Atom::create_avec(const char *style, int narg, char **arg, char *suffix) +void Atom::create_avec(const char *style, int narg, char **arg, int trysuffix) { delete [] atom_style; if (avec) delete avec; // unset atom style and array existence flags // may have been set by old avec // customize by adding new flag sphere_flag = peri_flag = electron_flag = 0; wavepacket_flag = sph_flag = 0; molecule_flag = 0; q_flag = mu_flag = 0; omega_flag = torque_flag = angmom_flag = 0; radius_flag = rmass_flag = 0; ellipsoid_flag = line_flag = tri_flag = body_flag = 0; vfrac_flag = 0; spin_flag = eradius_flag = ervel_flag = erforce_flag = ervelforce_flag = 0; cs_flag = csforce_flag = vforce_flag = etag_flag = 0; rho_flag = e_flag = cv_flag = vest_flag = 0; // create instance of AtomVec // use grow() to initialize atom-based arrays to length 1 // so that x[0][0] can always be referenced even if proc has no atoms int sflag; - avec = new_avec(style,suffix,sflag); + avec = new_avec(style,trysuffix,sflag); avec->store_args(narg,arg); avec->process_args(narg,arg); avec->grow(1); if (sflag) { char estyle[256]; - sprintf(estyle,"%s/%s",style,suffix); + if (sflag = 1) sprintf(estyle,"%s/%s",style,lmp->suffix); + else sprintf(estyle,"%s/%s",style,lmp->suffix2); int n = strlen(estyle) + 1; atom_style = new char[n]; strcpy(atom_style,estyle); } else { int n = strlen(style) + 1; atom_style = new char[n]; strcpy(atom_style,style); } // if molecular system: // atom IDs must be defined // force atom map to be created // map style may be reset by map_init() and its call to map_style_set() molecular = avec->molecular; if (molecular && tag_enable == 0) error->all(FLERR,"Atom IDs must be used for molecular systems"); if (molecular) map_style = 1; } /* ---------------------------------------------------------------------- generate an AtomVec class, first with suffix appended ------------------------------------------------------------------------- */ -AtomVec *Atom::new_avec(const char *style, char *suffix, int &sflag) +AtomVec *Atom::new_avec(const char *style, int trysuffix, int &sflag) { - if (suffix && lmp->suffix_enable) { - sflag = 1; - char estyle[256]; - sprintf(estyle,"%s/%s",style,suffix); + if (trysuffix && lmp->suffix_enable) { + if (lmp->suffix) { + sflag = 1; + char estyle[256]; + sprintf(estyle,"%s/%s",style,lmp->suffix); - if (0) return NULL; + if (0) return NULL; #define ATOM_CLASS #define AtomStyle(key,Class) \ - else if (strcmp(estyle,#key) == 0) return new Class(lmp); + else if (strcmp(estyle,#key) == 0) return new Class(lmp); #include "style_atom.h" #undef AtomStyle #undef ATOM_CLASS + } + + if (lmp->suffix2) { + sflag = 1; + char estyle[256]; + sprintf(estyle,"%s/%s",style,lmp->suffix2); + if (0) return NULL; + +#define ATOM_CLASS +#define AtomStyle(key,Class) \ + else if (strcmp(estyle,#key) == 0) return new Class(lmp); +#include "style_atom.h" +#undef AtomStyle +#undef ATOM_CLASS + } } sflag = 0; - if (0) return NULL; #define ATOM_CLASS #define AtomStyle(key,Class) \ else if (strcmp(style,#key) == 0) return new Class(lmp); #include "style_atom.h" #undef ATOM_CLASS else error->all(FLERR,"Invalid atom style"); - return NULL; } /* ---------------------------------------------------------------------- */ void Atom::init() { // delete extra array since it doesn't persist past first run if (nextra_store) { memory->destroy(extra); extra = NULL; nextra_store = 0; } // check arrays that are atom type in length check_mass(); // setup of firstgroup if (firstgroupname) { firstgroup = group->find(firstgroupname); if (firstgroup < 0) error->all(FLERR,"Could not find atom_modify first group ID"); } else firstgroup = -1; // init AtomVec avec->init(); } /* ---------------------------------------------------------------------- */ void Atom::setup() { // setup bins for sorting // cannot do this in init() because uses neighbor cutoff if (sortfreq > 0) setup_sort_bins(); } /* ---------------------------------------------------------------------- return ptr to AtomVec class if matches style or to matching hybrid sub-class return NULL if no match ------------------------------------------------------------------------- */ AtomVec *Atom::style_match(const char *style) { if (strcmp(atom_style,style) == 0) return avec; else if (strcmp(atom_style,"hybrid") == 0) { AtomVecHybrid *avec_hybrid = (AtomVecHybrid *) avec; for (int i = 0; i < avec_hybrid->nstyles; i++) if (strcmp(avec_hybrid->keywords[i],style) == 0) return avec_hybrid->styles[i]; } return NULL; } /* ---------------------------------------------------------------------- modify parameters of the atom style some options can only be invoked before simulation box is defined first and sort options cannot be used together ------------------------------------------------------------------------- */ void Atom::modify_params(int narg, char **arg) { if (narg == 0) error->all(FLERR,"Illegal atom_modify command"); int iarg = 0; while (iarg < narg) { if (strcmp(arg[iarg],"id") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal atom_modify command"); if (domain->box_exist) error->all(FLERR, "Atom_modify id command after simulation box is defined"); if (strcmp(arg[iarg+1],"yes") == 0) tag_enable = 1; else if (strcmp(arg[iarg+1],"no") == 0) tag_enable = 2; else error->all(FLERR,"Illegal atom_modify command"); iarg += 2; } if (strcmp(arg[iarg],"map") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal atom_modify command"); if (domain->box_exist) error->all(FLERR, "Atom_modify map command after simulation box is defined"); if (strcmp(arg[iarg+1],"array") == 0) map_user = 1; else if (strcmp(arg[iarg+1],"hash") == 0) map_user = 2; else error->all(FLERR,"Illegal atom_modify command"); map_style = map_user; iarg += 2; } else if (strcmp(arg[iarg],"first") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal atom_modify command"); if (strcmp(arg[iarg+1],"all") == 0) { delete [] firstgroupname; firstgroupname = NULL; } else { int n = strlen(arg[iarg+1]) + 1; firstgroupname = new char[n]; strcpy(firstgroupname,arg[iarg+1]); sortfreq = 0; } iarg += 2; } else if (strcmp(arg[iarg],"sort") == 0) { if (iarg+3 > narg) error->all(FLERR,"Illegal atom_modify command"); sortfreq = force->inumeric(FLERR,arg[iarg+1]); userbinsize = force->numeric(FLERR,arg[iarg+2]); if (sortfreq < 0 || userbinsize < 0.0) error->all(FLERR,"Illegal atom_modify command"); if (sortfreq >= 0 && firstgroupname) error->all(FLERR,"Atom_modify sort and first options " "cannot be used together"); iarg += 3; } else error->all(FLERR,"Illegal atom_modify command"); } } /* ---------------------------------------------------------------------- check that atom IDs are valid error if any atom ID < 0 or atom ID = MAXTAGINT if any atom ID > 0, error if any atom ID == 0 if all atom IDs = 0, tag_enable must be 0 OK if atom IDs > natoms NOTE: not checking that atom IDs are unique ------------------------------------------------------------------------- */ void Atom::tag_check() { int nlocal = atom->nlocal; tagint *tag = atom->tag; tagint min = MAXTAGINT; tagint max = 0; for (int i = 0; i < nlocal; i++) { min = MIN(min,tag[i]); max = MAX(max,tag[i]); } tagint minall,maxall; MPI_Allreduce(&min,&minall,1,MPI_LMP_TAGINT,MPI_MIN,world); MPI_Allreduce(&max,&maxall,1,MPI_LMP_TAGINT,MPI_MAX,world); if (minall < 0) error->all(FLERR,"Atom ID is negative"); if (maxall >= MAXTAGINT) error->all(FLERR,"Atom ID is too big"); if (maxall > 0 && minall == 0) error->all(FLERR,"Atom ID is zero"); if (maxall == 0 && tag_enable && natoms) error->all(FLERR,"Not all atom IDs are 0"); } /* ---------------------------------------------------------------------- add unique tags to any atoms with tag = 0 new tags are grouped by proc and start after max current tag called after creating new atoms error if new tags will exceed MAXTAGINT ------------------------------------------------------------------------- */ void Atom::tag_extend() { // maxtag_all = max tag for all atoms tagint maxtag = 0; for (int i = 0; i < nlocal; i++) maxtag = MAX(maxtag,tag[i]); tagint maxtag_all; MPI_Allreduce(&maxtag,&maxtag_all,1,MPI_LMP_TAGINT,MPI_MAX,world); // DEBUG: useful for generating 64-bit IDs even for small systems // use only when LAMMPS is compiled with BIGBIG //maxtag_all += 1000000000000; // notag = # of atoms I own with no tag (tag = 0) // notag_sum = # of total atoms on procs <= me with no tag bigint notag = 0; for (int i = 0; i < nlocal; i++) if (tag[i] == 0) notag++; bigint notag_total; MPI_Allreduce(¬ag,¬ag_total,1,MPI_LMP_BIGINT,MPI_SUM,world); if (notag_total >= MAXTAGINT) error->all(FLERR,"New atom IDs exceed maximum allowed ID"); bigint notag_sum; MPI_Scan(¬ag,¬ag_sum,1,MPI_LMP_BIGINT,MPI_SUM,world); // itag = 1st new tag that my untagged atoms should use tagint itag = maxtag_all + notag_sum - notag + 1; for (int i = 0; i < nlocal; i++) if (tag[i] == 0) tag[i] = itag++; } /* ---------------------------------------------------------------------- check that atom IDs span range from 1 to Natoms inclusive return 0 if mintag != 1 or maxtag != Natoms return 1 if OK doesn't actually check if all tag values are used ------------------------------------------------------------------------- */ int Atom::tag_consecutive() { tagint idmin = MAXTAGINT; tagint idmax = 0; for (int i = 0; i < nlocal; i++) { idmin = MIN(idmin,tag[i]); idmax = MAX(idmax,tag[i]); } tagint idminall,idmaxall; MPI_Allreduce(&idmin,&idminall,1,MPI_LMP_TAGINT,MPI_MIN,world); MPI_Allreduce(&idmax,&idmaxall,1,MPI_LMP_TAGINT,MPI_MAX,world); if (idminall != 1 || idmaxall != natoms) return 0; return 1; } /* ---------------------------------------------------------------------- count and return words in a single line make copy of line before using strtok so as not to change line trim anything from '#' onward ------------------------------------------------------------------------- */ int Atom::count_words(const char *line) { int n = strlen(line) + 1; char *copy; memory->create(copy,n,"atom:copy"); strcpy(copy,line); char *ptr; if ((ptr = strchr(copy,'#'))) *ptr = '\0'; if (strtok(copy," \t\n\r\f") == NULL) { memory->destroy(copy); return 0; } n = 1; while (strtok(NULL," \t\n\r\f")) n++; memory->destroy(copy); return n; } /* ---------------------------------------------------------------------- deallocate molecular topology arrays done before realloc with (possibly) new 2nd dimension set to correctly initialized per-atom values, e.g. bond_per_atom needs to be called whenever 2nd dimensions are changed and these arrays are already pre-allocated, e.g. due to grow(1) in create_avec() ------------------------------------------------------------------------- */ void Atom::deallocate_topology() { memory->destroy(atom->bond_type); memory->destroy(atom->bond_atom); atom->bond_type = NULL; atom->bond_atom = NULL; memory->destroy(atom->angle_type); memory->destroy(atom->angle_atom1); memory->destroy(atom->angle_atom2); memory->destroy(atom->angle_atom3); atom->angle_type = NULL; atom->angle_atom1 = atom->angle_atom2 = atom->angle_atom3 = NULL; memory->destroy(atom->dihedral_type); memory->destroy(atom->dihedral_atom1); memory->destroy(atom->dihedral_atom2); memory->destroy(atom->dihedral_atom3); memory->destroy(atom->dihedral_atom4); atom->dihedral_type = NULL; atom->dihedral_atom1 = atom->dihedral_atom2 = atom->dihedral_atom3 = atom->dihedral_atom4 = NULL; memory->destroy(atom->improper_type); memory->destroy(atom->improper_atom1); memory->destroy(atom->improper_atom2); memory->destroy(atom->improper_atom3); memory->destroy(atom->improper_atom4); atom->improper_type = NULL; atom->improper_atom1 = atom->improper_atom2 = atom->improper_atom3 = atom->improper_atom4 = NULL; } /* ---------------------------------------------------------------------- unpack n lines from Atom section of data file call style-specific routine to parse line ------------------------------------------------------------------------- */ void Atom::data_atoms(int n, char *buf) { int m,xptr,iptr; imageint imagedata; double xdata[3],lamda[3]; double *coord; char *next; next = strchr(buf,'\n'); *next = '\0'; int nwords = count_words(buf); *next = '\n'; if (nwords != avec->size_data_atom && nwords != avec->size_data_atom + 3) error->all(FLERR,"Incorrect atom format in data file"); char **values = new char*[nwords]; // set bounds for my proc // if periodic and I am lo/hi proc, adjust bounds by EPSILON // insures all data atoms will be owned even with round-off int triclinic = domain->triclinic; double epsilon[3]; if (triclinic) epsilon[0] = epsilon[1] = epsilon[2] = EPSILON; else { epsilon[0] = domain->prd[0] * EPSILON; epsilon[1] = domain->prd[1] * EPSILON; epsilon[2] = domain->prd[2] * EPSILON; } double sublo[3],subhi[3]; if (triclinic == 0) { sublo[0] = domain->sublo[0]; subhi[0] = domain->subhi[0]; sublo[1] = domain->sublo[1]; subhi[1] = domain->subhi[1]; sublo[2] = domain->sublo[2]; subhi[2] = domain->subhi[2]; } else { sublo[0] = domain->sublo_lamda[0]; subhi[0] = domain->subhi_lamda[0]; sublo[1] = domain->sublo_lamda[1]; subhi[1] = domain->subhi_lamda[1]; sublo[2] = domain->sublo_lamda[2]; subhi[2] = domain->subhi_lamda[2]; } if (comm->layout != LAYOUT_TILED) { if (domain->xperiodic) { if (comm->myloc[0] == 0) sublo[0] -= epsilon[0]; if (comm->myloc[0] == comm->procgrid[0]-1) subhi[0] += epsilon[0]; } if (domain->yperiodic) { if (comm->myloc[1] == 0) sublo[1] -= epsilon[1]; if (comm->myloc[1] == comm->procgrid[1]-1) subhi[1] += epsilon[1]; } if (domain->zperiodic) { if (comm->myloc[2] == 0) sublo[2] -= epsilon[2]; if (comm->myloc[2] == comm->procgrid[2]-1) subhi[2] += epsilon[2]; } } else { if (domain->xperiodic) { if (comm->mysplit[0][0] == 0.0) sublo[0] -= epsilon[0]; if (comm->mysplit[0][1] == 1.0) subhi[0] += epsilon[0]; } if (domain->yperiodic) { if (comm->mysplit[1][0] == 0.0) sublo[1] -= epsilon[1]; if (comm->mysplit[1][1] == 1.0) subhi[1] += epsilon[1]; } if (domain->zperiodic) { if (comm->mysplit[2][0] == 0.0) sublo[2] -= epsilon[2]; if (comm->mysplit[2][1] == 1.0) subhi[2] += epsilon[2]; } } // xptr = which word in line starts xyz coords // iptr = which word in line starts ix,iy,iz image flags xptr = avec->xcol_data - 1; int imageflag = 0; if (nwords > avec->size_data_atom) imageflag = 1; if (imageflag) iptr = nwords - 3; // loop over lines of atom data // tokenize the line into values // extract xyz coords and image flags // remap atom into simulation box // if atom is in my sub-domain, unpack its values for (int i = 0; i < n; i++) { next = strchr(buf,'\n'); values[0] = strtok(buf," \t\n\r\f"); if (values[0] == NULL) error->all(FLERR,"Incorrect atom format in data file"); for (m = 1; m < nwords; m++) { values[m] = strtok(NULL," \t\n\r\f"); if (values[m] == NULL) error->all(FLERR,"Incorrect atom format in data file"); } if (imageflag) imagedata = ((imageint) (atoi(values[iptr]) + IMGMAX) & IMGMASK) | (((imageint) (atoi(values[iptr+1]) + IMGMAX) & IMGMASK) << IMGBITS) | (((imageint) (atoi(values[iptr+2]) + IMGMAX) & IMGMASK) << IMG2BITS); else imagedata = ((imageint) IMGMAX << IMG2BITS) | ((imageint) IMGMAX << IMGBITS) | IMGMAX; xdata[0] = atof(values[xptr]); xdata[1] = atof(values[xptr+1]); xdata[2] = atof(values[xptr+2]); domain->remap(xdata,imagedata); if (triclinic) { domain->x2lamda(xdata,lamda); coord = lamda; } else coord = xdata; if (coord[0] >= sublo[0] && coord[0] < subhi[0] && coord[1] >= sublo[1] && coord[1] < subhi[1] && coord[2] >= sublo[2] && coord[2] < subhi[2]) avec->data_atom(xdata,imagedata,values); buf = next + 1; } delete [] values; } /* ---------------------------------------------------------------------- unpack n lines from Velocity section of data file check that atom IDs are > 0 and <= map_tag_max call style-specific routine to parse line ------------------------------------------------------------------------- */ void Atom::data_vels(int n, char *buf) { int j,m; tagint tagdata; char *next; next = strchr(buf,'\n'); *next = '\0'; int nwords = count_words(buf); *next = '\n'; if (nwords != avec->size_data_vel) error->all(FLERR,"Incorrect velocity format in data file"); char **values = new char*[nwords]; // loop over lines of atom velocities // tokenize the line into values // if I own atom tag, unpack its values for (int i = 0; i < n; i++) { next = strchr(buf,'\n'); values[0] = strtok(buf," \t\n\r\f"); for (j = 1; j < nwords; j++) values[j] = strtok(NULL," \t\n\r\f"); tagdata = ATOTAGINT(values[0]); if (tagdata <= 0 || tagdata > map_tag_max) error->one(FLERR,"Invalid atom ID in Velocities section of data file"); if ((m = map(tagdata)) >= 0) avec->data_vel(m,&values[1]); buf = next + 1; } delete [] values; } /* ---------------------------------------------------------------------- process N bonds read into buf from data files if count is non-NULL, just count bonds per atom else store them with atoms check that atom IDs are > 0 and <= map_tag_max ------------------------------------------------------------------------- */ void Atom::data_bonds(int n, char *buf, int *count) { int m,tmp,itype; tagint atom1,atom2; char *next; int newton_bond = force->newton_bond; for (int i = 0; i < n; i++) { next = strchr(buf,'\n'); *next = '\0'; sscanf(buf,"%d %d " TAGINT_FORMAT " " TAGINT_FORMAT, &tmp,&itype,&atom1,&atom2); if (atom1 <= 0 || atom1 > map_tag_max || atom2 <= 0 || atom2 > map_tag_max) error->one(FLERR,"Invalid atom ID in Bonds section of data file"); if (itype <= 0 || itype > nbondtypes) error->one(FLERR,"Invalid bond type in Bonds section of data file"); if ((m = map(atom1)) >= 0) { if (count) count[m]++; else { bond_type[m][num_bond[m]] = itype; bond_atom[m][num_bond[m]] = atom2; num_bond[m]++; } } if (newton_bond == 0) { if ((m = map(atom2)) >= 0) { if (count) count[m]++; else { bond_type[m][num_bond[m]] = itype; bond_atom[m][num_bond[m]] = atom1; num_bond[m]++; } } } buf = next + 1; } } /* ---------------------------------------------------------------------- process N angles read into buf from data files if count is non-NULL, just count angles per atom else store them with atoms check that atom IDs are > 0 and <= map_tag_max ------------------------------------------------------------------------- */ void Atom::data_angles(int n, char *buf, int *count) { int m,tmp,itype; tagint atom1,atom2,atom3; char *next; int newton_bond = force->newton_bond; for (int i = 0; i < n; i++) { next = strchr(buf,'\n'); *next = '\0'; sscanf(buf,"%d %d " TAGINT_FORMAT " " TAGINT_FORMAT " " TAGINT_FORMAT, &tmp,&itype,&atom1,&atom2,&atom3); if (atom1 <= 0 || atom1 > map_tag_max || atom2 <= 0 || atom2 > map_tag_max || atom3 <= 0 || atom3 > map_tag_max) error->one(FLERR,"Invalid atom ID in Angles section of data file"); if (itype <= 0 || itype > nangletypes) error->one(FLERR,"Invalid angle type in Angles section of data file"); if ((m = map(atom2)) >= 0) { if (count) count[m]++; else { angle_type[m][num_angle[m]] = itype; angle_atom1[m][num_angle[m]] = atom1; angle_atom2[m][num_angle[m]] = atom2; angle_atom3[m][num_angle[m]] = atom3; num_angle[m]++; } } if (newton_bond == 0) { if ((m = map(atom1)) >= 0) { if (count) count[m]++; else { angle_type[m][num_angle[m]] = itype; angle_atom1[m][num_angle[m]] = atom1; angle_atom2[m][num_angle[m]] = atom2; angle_atom3[m][num_angle[m]] = atom3; num_angle[m]++; } } if ((m = map(atom3)) >= 0) { if (count) count[m]++; else { angle_type[m][num_angle[m]] = itype; angle_atom1[m][num_angle[m]] = atom1; angle_atom2[m][num_angle[m]] = atom2; angle_atom3[m][num_angle[m]] = atom3; num_angle[m]++; } } } buf = next + 1; } } /* ---------------------------------------------------------------------- process N dihedrals read into buf from data files if count is non-NULL, just count diihedrals per atom else store them with atoms check that atom IDs are > 0 and <= map_tag_max ------------------------------------------------------------------------- */ void Atom::data_dihedrals(int n, char *buf, int *count) { int m,tmp,itype; tagint atom1,atom2,atom3,atom4; char *next; int newton_bond = force->newton_bond; for (int i = 0; i < n; i++) { next = strchr(buf,'\n'); *next = '\0'; sscanf(buf,"%d %d " TAGINT_FORMAT " " TAGINT_FORMAT " " TAGINT_FORMAT " " TAGINT_FORMAT, &tmp,&itype,&atom1,&atom2,&atom3,&atom4); if (atom1 <= 0 || atom1 > map_tag_max || atom2 <= 0 || atom2 > map_tag_max || atom3 <= 0 || atom3 > map_tag_max || atom4 <= 0 || atom4 > map_tag_max) error->one(FLERR,"Invalid atom ID in Dihedrals section of data file"); if (itype <= 0 || itype > ndihedraltypes) error->one(FLERR, "Invalid dihedral type in Dihedrals section of data file"); if ((m = map(atom2)) >= 0) { if (count) count[m]++; else { dihedral_type[m][num_dihedral[m]] = itype; dihedral_atom1[m][num_dihedral[m]] = atom1; dihedral_atom2[m][num_dihedral[m]] = atom2; dihedral_atom3[m][num_dihedral[m]] = atom3; dihedral_atom4[m][num_dihedral[m]] = atom4; num_dihedral[m]++; } } if (newton_bond == 0) { if ((m = map(atom1)) >= 0) { if (count) count[m]++; else { dihedral_type[m][num_dihedral[m]] = itype; dihedral_atom1[m][num_dihedral[m]] = atom1; dihedral_atom2[m][num_dihedral[m]] = atom2; dihedral_atom3[m][num_dihedral[m]] = atom3; dihedral_atom4[m][num_dihedral[m]] = atom4; num_dihedral[m]++; } } if ((m = map(atom3)) >= 0) { if (count) count[m]++; else { dihedral_type[m][num_dihedral[m]] = itype; dihedral_atom1[m][num_dihedral[m]] = atom1; dihedral_atom2[m][num_dihedral[m]] = atom2; dihedral_atom3[m][num_dihedral[m]] = atom3; dihedral_atom4[m][num_dihedral[m]] = atom4; num_dihedral[m]++; } } if ((m = map(atom4)) >= 0) { if (count) count[m]++; else { dihedral_type[m][num_dihedral[m]] = itype; dihedral_atom1[m][num_dihedral[m]] = atom1; dihedral_atom2[m][num_dihedral[m]] = atom2; dihedral_atom3[m][num_dihedral[m]] = atom3; dihedral_atom4[m][num_dihedral[m]] = atom4; num_dihedral[m]++; } } } buf = next + 1; } } /* ---------------------------------------------------------------------- process N impropers read into buf from data files if count is non-NULL, just count impropers per atom else store them with atoms check that atom IDs are > 0 and <= map_tag_max ------------------------------------------------------------------------- */ void Atom::data_impropers(int n, char *buf, int *count) { int m,tmp,itype; tagint atom1,atom2,atom3,atom4; char *next; int newton_bond = force->newton_bond; for (int i = 0; i < n; i++) { next = strchr(buf,'\n'); *next = '\0'; sscanf(buf,"%d %d " TAGINT_FORMAT " " TAGINT_FORMAT " " TAGINT_FORMAT " " TAGINT_FORMAT, &tmp,&itype,&atom1,&atom2,&atom3,&atom4); if (atom1 <= 0 || atom1 > map_tag_max || atom2 <= 0 || atom2 > map_tag_max || atom3 <= 0 || atom3 > map_tag_max || atom4 <= 0 || atom4 > map_tag_max) error->one(FLERR,"Invalid atom ID in Impropers section of data file"); if (itype <= 0 || itype > nimpropertypes) error->one(FLERR, "Invalid improper type in Impropers section of data file"); if ((m = map(atom2)) >= 0) { if (count) count[m]++; else { improper_type[m][num_improper[m]] = itype; improper_atom1[m][num_improper[m]] = atom1; improper_atom2[m][num_improper[m]] = atom2; improper_atom3[m][num_improper[m]] = atom3; improper_atom4[m][num_improper[m]] = atom4; num_improper[m]++; } } if (newton_bond == 0) { if ((m = map(atom1)) >= 0) { if (count) count[m]++; else { improper_type[m][num_improper[m]] = itype; improper_atom1[m][num_improper[m]] = atom1; improper_atom2[m][num_improper[m]] = atom2; improper_atom3[m][num_improper[m]] = atom3; improper_atom4[m][num_improper[m]] = atom4; num_improper[m]++; } } if ((m = map(atom3)) >= 0) { if (count) count[m]++; else { improper_type[m][num_improper[m]] = itype; improper_atom1[m][num_improper[m]] = atom1; improper_atom2[m][num_improper[m]] = atom2; improper_atom3[m][num_improper[m]] = atom3; improper_atom4[m][num_improper[m]] = atom4; num_improper[m]++; } } if ((m = map(atom4)) >= 0) { if (count) count[m]++; else { improper_type[m][num_improper[m]] = itype; improper_atom1[m][num_improper[m]] = atom1; improper_atom2[m][num_improper[m]] = atom2; improper_atom3[m][num_improper[m]] = atom3; improper_atom4[m][num_improper[m]] = atom4; num_improper[m]++; } } } buf = next + 1; } } /* ---------------------------------------------------------------------- unpack n lines from atom-style specific section of data file check that atom IDs are > 0 and <= map_tag_max call style-specific routine to parse line ------------------------------------------------------------------------- */ void Atom::data_bonus(int n, char *buf, AtomVec *avec_bonus) { int j,m,tagdata; char *next; next = strchr(buf,'\n'); *next = '\0'; int nwords = count_words(buf); *next = '\n'; if (nwords != avec_bonus->size_data_bonus) error->all(FLERR,"Incorrect bonus data format in data file"); char **values = new char*[nwords]; // loop over lines of bonus atom data // tokenize the line into values // if I own atom tag, unpack its values for (int i = 0; i < n; i++) { next = strchr(buf,'\n'); values[0] = strtok(buf," \t\n\r\f"); for (j = 1; j < nwords; j++) values[j] = strtok(NULL," \t\n\r\f"); tagdata = ATOTAGINT(values[0]); if (tagdata <= 0 || tagdata > map_tag_max) error->one(FLERR,"Invalid atom ID in Bonus section of data file"); // ok to call child's data_atom_bonus() method thru parent avec_bonus, // since data_bonus() was called with child ptr, and method is virtual if ((m = map(tagdata)) >= 0) avec_bonus->data_atom_bonus(m,&values[1]); buf = next + 1; } delete [] values; } /* ---------------------------------------------------------------------- unpack n lines from atom-style specific section of data file check that atom IDs are > 0 and <= map_tag_max call style-specific routine to parse line ------------------------------------------------------------------------- */ void Atom::data_bodies(int n, char *buf, AtomVecBody *avec_body) { int j,m,tagdata,ninteger,ndouble; char **ivalues = new char*[10*MAXBODY]; char **dvalues = new char*[10*MAXBODY]; // loop over lines of body data // tokenize the lines into ivalues and dvalues // if I own atom tag, unpack its values for (int i = 0; i < n; i++) { if (i == 0) tagdata = ATOTAGINT(strtok(buf," \t\n\r\f")); else tagdata = ATOTAGINT(strtok(NULL," \t\n\r\f")); ninteger = atoi(strtok(NULL," \t\n\r\f")); ndouble = atoi(strtok(NULL," \t\n\r\f")); for (j = 0; j < ninteger; j++) ivalues[j] = strtok(NULL," \t\n\r\f"); for (j = 0; j < ndouble; j++) dvalues[j] = strtok(NULL," \t\n\r\f"); if (tagdata <= 0 || tagdata > map_tag_max) error->one(FLERR,"Invalid atom ID in Bodies section of data file"); if ((m = map(tagdata)) >= 0) avec_body->data_body(m,ninteger,ndouble,ivalues,dvalues); } delete [] ivalues; delete [] dvalues; } /* ---------------------------------------------------------------------- allocate arrays of length ntypes only done after ntypes is set ------------------------------------------------------------------------- */ void Atom::allocate_type_arrays() { if (avec->mass_type) { mass = new double[ntypes+1]; mass_setflag = new int[ntypes+1]; for (int itype = 1; itype <= ntypes; itype++) mass_setflag[itype] = 0; } } /* ---------------------------------------------------------------------- set a mass and flag it as set called from reading of data file ------------------------------------------------------------------------- */ void Atom::set_mass(const char *str) { if (mass == NULL) error->all(FLERR,"Cannot set mass for this atom style"); int itype; double mass_one; int n = sscanf(str,"%d %lg",&itype,&mass_one); if (n != 2) error->all(FLERR,"Invalid mass line in data file"); if (itype < 1 || itype > ntypes) error->all(FLERR,"Invalid type for mass set"); mass[itype] = mass_one; mass_setflag[itype] = 1; if (mass[itype] <= 0.0) error->all(FLERR,"Invalid mass value"); } /* ---------------------------------------------------------------------- set a mass and flag it as set called from EAM pair routine ------------------------------------------------------------------------- */ void Atom::set_mass(int itype, double value) { if (mass == NULL) error->all(FLERR,"Cannot set mass for this atom style"); if (itype < 1 || itype > ntypes) error->all(FLERR,"Invalid type for mass set"); mass[itype] = value; mass_setflag[itype] = 1; if (mass[itype] <= 0.0) error->all(FLERR,"Invalid mass value"); } /* ---------------------------------------------------------------------- set one or more masses and flag them as set called from reading of input script ------------------------------------------------------------------------- */ void Atom::set_mass(int narg, char **arg) { if (mass == NULL) error->all(FLERR,"Cannot set mass for this atom style"); int lo,hi; force->bounds(arg[0],ntypes,lo,hi); if (lo < 1 || hi > ntypes) error->all(FLERR,"Invalid type for mass set"); for (int itype = lo; itype <= hi; itype++) { mass[itype] = atof(arg[1]); mass_setflag[itype] = 1; if (mass[itype] <= 0.0) error->all(FLERR,"Invalid mass value"); } } /* ---------------------------------------------------------------------- set all masses as read in from restart file ------------------------------------------------------------------------- */ void Atom::set_mass(double *values) { for (int itype = 1; itype <= ntypes; itype++) { mass[itype] = values[itype]; mass_setflag[itype] = 1; } } /* ---------------------------------------------------------------------- check that all masses have been set ------------------------------------------------------------------------- */ void Atom::check_mass() { if (mass == NULL) return; for (int itype = 1; itype <= ntypes; itype++) if (mass_setflag[itype] == 0) error->all(FLERR,"All masses are not set"); } /* ---------------------------------------------------------------------- check that radii of all particles of itype are the same return 1 if true, else return 0 also return the radius value for that type ------------------------------------------------------------------------- */ int Atom::radius_consistency(int itype, double &rad) { double value = -1.0; int flag = 0; for (int i = 0; i < nlocal; i++) { if (type[i] != itype) continue; if (value < 0.0) value = radius[i]; else if (value != radius[i]) flag = 1; } int flagall; MPI_Allreduce(&flag,&flagall,1,MPI_INT,MPI_SUM,world); if (flagall) return 0; MPI_Allreduce(&value,&rad,1,MPI_DOUBLE,MPI_MAX,world); return 1; } /* ---------------------------------------------------------------------- check that shape of all particles of itype are the same return 1 if true, else return 0 also return the 3 shape params for itype ------------------------------------------------------------------------- */ int Atom::shape_consistency(int itype, double &shapex, double &shapey, double &shapez) { double zero[3] = {0.0, 0.0, 0.0}; double one[3] = {-1.0, -1.0, -1.0}; double *shape; AtomVecEllipsoid *avec_ellipsoid = (AtomVecEllipsoid *) style_match("ellipsoid"); AtomVecEllipsoid::Bonus *bonus = avec_ellipsoid->bonus; int flag = 0; for (int i = 0; i < nlocal; i++) { if (type[i] != itype) continue; if (ellipsoid[i] < 0) shape = zero; else shape = bonus[ellipsoid[i]].shape; if (one[0] < 0.0) { one[0] = shape[0]; one[1] = shape[1]; one[2] = shape[2]; } else if (one[0] != shape[0] || one[1] != shape[1] || one[2] != shape[2]) flag = 1; } int flagall; MPI_Allreduce(&flag,&flagall,1,MPI_INT,MPI_SUM,world); if (flagall) return 0; double oneall[3]; MPI_Allreduce(one,oneall,3,MPI_DOUBLE,MPI_MAX,world); shapex = oneall[0]; shapey = oneall[1]; shapez = oneall[2]; return 1; } /* ---------------------------------------------------------------------- add a new molecule template = set of molecules ------------------------------------------------------------------------- */ void Atom::add_molecule(int narg, char **arg) { if (narg < 2) error->all(FLERR,"Illegal molecule command"); if (find_molecule(arg[0]) >= 0) error->all(FLERR,"Reuse of molecule template ID"); int nprevious = nmolecule; nmolecule += narg-1; molecules = (Molecule **) memory->srealloc(molecules,nmolecule*sizeof(Molecule *),"atom::molecules"); for (int i = 1; i < narg; i++) { molecules[nprevious] = new Molecule(lmp,arg[0],arg[i]); if (i == 1) molecules[nprevious]->nset = narg-1; else molecules[nprevious]->nset = 0; nprevious++; } } /* ---------------------------------------------------------------------- find first molecule in set with template ID return -1 if does not exist ------------------------------------------------------------------------- */ int Atom::find_molecule(char *id) { int imol; for (imol = 0; imol < nmolecule; imol++) if (strcmp(id,molecules[imol]->id) == 0) return imol; return -1; } /* ---------------------------------------------------------------------- add info to current atom ilocal from molecule template onemol and its iatom offset = atom ID preceeding IDs of atoms in this molecule called by fixes and commands that add molecules ------------------------------------------------------------------------- */ void Atom::add_molecule_atom(Molecule *onemol, int iatom, int ilocal, tagint offset) { if (onemol->qflag && q_flag) q[ilocal] = onemol->q[iatom]; if (onemol->radiusflag && radius_flag) radius[ilocal] = onemol->radius[iatom]; if (onemol->rmassflag && rmass_flag) rmass[ilocal] = onemol->rmass[iatom]; else if (rmass_flag) rmass[ilocal] = 4.0*MY_PI/3.0 * radius[ilocal]*radius[ilocal]*radius[ilocal]; if (molecular != 1) return; // add bond topology info // for molecular atom styles, but not atom style template if (avec->bonds_allow) { num_bond[ilocal] = onemol->num_bond[iatom]; for (int i = 0; i < num_bond[ilocal]; i++) { bond_type[ilocal][i] = onemol->bond_type[iatom][i]; bond_atom[ilocal][i] = onemol->bond_atom[iatom][i] + offset; } } if (avec->angles_allow) { num_angle[ilocal] = onemol->num_angle[iatom]; for (int i = 0; i < num_angle[ilocal]; i++) { angle_type[ilocal][i] = onemol->angle_type[iatom][i]; angle_atom1[ilocal][i] = onemol->angle_atom1[iatom][i] + offset; angle_atom2[ilocal][i] = onemol->angle_atom2[iatom][i] + offset; angle_atom3[ilocal][i] = onemol->angle_atom3[iatom][i] + offset; } } if (avec->dihedrals_allow) { num_dihedral[ilocal] = onemol->num_dihedral[iatom]; for (int i = 0; i < num_dihedral[ilocal]; i++) { dihedral_type[ilocal][i] = onemol->dihedral_type[iatom][i]; dihedral_atom1[ilocal][i] = onemol->dihedral_atom1[iatom][i] + offset; dihedral_atom2[ilocal][i] = onemol->dihedral_atom2[iatom][i] + offset; dihedral_atom3[ilocal][i] = onemol->dihedral_atom3[iatom][i] + offset; dihedral_atom4[ilocal][i] = onemol->dihedral_atom4[iatom][i] + offset; } } if (avec->impropers_allow) { num_improper[ilocal] = onemol->num_improper[iatom]; for (int i = 0; i < num_improper[ilocal]; i++) { improper_type[ilocal][i] = onemol->improper_type[iatom][i]; improper_atom1[ilocal][i] = onemol->improper_atom1[iatom][i] + offset; improper_atom2[ilocal][i] = onemol->improper_atom2[iatom][i] + offset; improper_atom3[ilocal][i] = onemol->improper_atom3[iatom][i] + offset; improper_atom4[ilocal][i] = onemol->improper_atom4[iatom][i] + offset; } } if (onemol->specialflag) { nspecial[ilocal][0] = onemol->nspecial[iatom][0]; nspecial[ilocal][1] = onemol->nspecial[iatom][1]; int n = nspecial[ilocal][2] = onemol->nspecial[iatom][2]; for (int i = 0; i < n; i++) special[ilocal][i] = onemol->special[iatom][i] + offset; } } /* ---------------------------------------------------------------------- reorder owned atoms so those in firstgroup appear first called by comm->exchange() if atom_modify first group is set only owned atoms exist at this point, no ghost atoms ------------------------------------------------------------------------- */ void Atom::first_reorder() { // insure there is one extra atom location at end of arrays for swaps if (nlocal == nmax) avec->grow(0); // loop over owned atoms // nfirst = index of first atom not in firstgroup // when find firstgroup atom out of place, swap it with atom nfirst int bitmask = group->bitmask[firstgroup]; nfirst = 0; while (nfirst < nlocal && mask[nfirst] & bitmask) nfirst++; for (int i = 0; i < nlocal; i++) { if (mask[i] & bitmask && i > nfirst) { avec->copy(i,nlocal,0); avec->copy(nfirst,i,0); avec->copy(nlocal,nfirst,0); while (nfirst < nlocal && mask[nfirst] & bitmask) nfirst++; } } } /* ---------------------------------------------------------------------- perform spatial sort of atoms within my sub-domain always called between comm->exchange() and comm->borders() don't have to worry about clearing/setting atom->map since done in comm ------------------------------------------------------------------------- */ void Atom::sort() { int i,m,n,ix,iy,iz,ibin,empty; // set next timestep for sorting to take place nextsort = (update->ntimestep/sortfreq)*sortfreq + sortfreq; // download data from GPU if necessary if (lmp->cuda && !lmp->cuda->oncpu) lmp->cuda->downloadAll(); // re-setup sort bins if needed if (domain->box_change) setup_sort_bins(); if (nbins == 1) return; // reallocate per-atom vectors if needed if (nlocal > maxnext) { memory->destroy(next); memory->destroy(permute); maxnext = atom->nmax; memory->create(next,maxnext,"atom:next"); memory->create(permute,maxnext,"atom:permute"); } // insure there is one extra atom location at end of arrays for swaps if (nlocal == nmax) avec->grow(0); // bin atoms in reverse order so linked list will be in forward order for (i = 0; i < nbins; i++) binhead[i] = -1; for (i = nlocal-1; i >= 0; i--) { ix = static_cast ((x[i][0]-bboxlo[0])*bininvx); iy = static_cast ((x[i][1]-bboxlo[1])*bininvy); iz = static_cast ((x[i][2]-bboxlo[2])*bininvz); ix = MAX(ix,0); iy = MAX(iy,0); iz = MAX(iz,0); ix = MIN(ix,nbinx-1); iy = MIN(iy,nbiny-1); iz = MIN(iz,nbinz-1); ibin = iz*nbiny*nbinx + iy*nbinx + ix; next[i] = binhead[ibin]; binhead[ibin] = i; } // permute = desired permutation of atoms // permute[I] = J means Ith new atom will be Jth old atom n = 0; for (m = 0; m < nbins; m++) { i = binhead[m]; while (i >= 0) { permute[n++] = i; i = next[i]; } } // current = current permutation, just reuse next vector // current[I] = J means Ith current atom is Jth old atom int *current = next; for (i = 0; i < nlocal; i++) current[i] = i; // reorder local atom list, when done, current = permute // perform "in place" using copy() to extra atom location at end of list // inner while loop processes one cycle of the permutation // copy before inner-loop moves an atom to end of atom list // copy after inner-loop moves atom at end of list back into list // empty = location in atom list that is currently empty for (i = 0; i < nlocal; i++) { if (current[i] == permute[i]) continue; avec->copy(i,nlocal,0); empty = i; while (permute[empty] != i) { avec->copy(permute[empty],empty,0); empty = current[empty] = permute[empty]; } avec->copy(nlocal,empty,0); current[empty] = permute[empty]; } // upload data back to GPU if necessary if (lmp->cuda && !lmp->cuda->oncpu) lmp->cuda->uploadAll(); // sanity check that current = permute //int flag = 0; //for (i = 0; i < nlocal; i++) // if (current[i] != permute[i]) flag = 1; //int flagall; //MPI_Allreduce(&flag,&flagall,1,MPI_INT,MPI_SUM,world); //if (flagall) error->all(FLERR,"Atom sort did not operate correctly"); } /* ---------------------------------------------------------------------- setup bins for spatial sorting of atoms ------------------------------------------------------------------------- */ void Atom::setup_sort_bins() { // binsize: // user setting if explicitly set // 1/2 of neighbor cutoff for non-CUDA // CUDA_CHUNK atoms/proc for CUDA // check if neighbor cutoff = 0.0 double binsize; if (userbinsize > 0.0) binsize = userbinsize; else if (!lmp->cuda) binsize = 0.5 * neighbor->cutneighmax; else { if (domain->dimension == 3) { double vol = (domain->boxhi[0]-domain->boxlo[0]) * (domain->boxhi[1]-domain->boxlo[1]) * (domain->boxhi[2]-domain->boxlo[2]); binsize = pow(1.0*CUDA_CHUNK/natoms*vol,1.0/3.0); } else { double area = (domain->boxhi[0]-domain->boxlo[0]) * (domain->boxhi[1]-domain->boxlo[1]); binsize = pow(1.0*CUDA_CHUNK/natoms*area,1.0/2.0); } } if (binsize == 0.0) error->all(FLERR,"Atom sorting has bin size = 0.0"); double bininv = 1.0/binsize; // nbin xyz = local bins // bbox lo/hi = bounding box of my sub-domain if (domain->triclinic) domain->bbox(domain->sublo_lamda,domain->subhi_lamda,bboxlo,bboxhi); else { bboxlo[0] = domain->sublo[0]; bboxlo[1] = domain->sublo[1]; bboxlo[2] = domain->sublo[2]; bboxhi[0] = domain->subhi[0]; bboxhi[1] = domain->subhi[1]; bboxhi[2] = domain->subhi[2]; } nbinx = static_cast ((bboxhi[0]-bboxlo[0]) * bininv); nbiny = static_cast ((bboxhi[1]-bboxlo[1]) * bininv); nbinz = static_cast ((bboxhi[2]-bboxlo[2]) * bininv); if (domain->dimension == 2) nbinz = 1; if (nbinx == 0) nbinx = 1; if (nbiny == 0) nbiny = 1; if (nbinz == 0) nbinz = 1; bininvx = nbinx / (bboxhi[0]-bboxlo[0]); bininvy = nbiny / (bboxhi[1]-bboxlo[1]); bininvz = nbinz / (bboxhi[2]-bboxlo[2]); if (1.0*nbinx*nbiny*nbinz > INT_MAX) error->one(FLERR,"Too many atom sorting bins"); nbins = nbinx*nbiny*nbinz; // reallocate per-bin memory if needed if (nbins > maxbin) { memory->destroy(binhead); maxbin = nbins; memory->create(binhead,maxbin,"atom:binhead"); } } /* ---------------------------------------------------------------------- register a callback to a fix so it can manage atom-based arrays happens when fix is created flag = 0 for grow, 1 for restart, 2 for border comm ------------------------------------------------------------------------- */ void Atom::add_callback(int flag) { int ifix; // find the fix // if find NULL ptr: // it's this one, since it is being replaced and has just been deleted // at this point in re-creation // if don't find NULL ptr: // i is set to nfix = new one currently being added at end of list for (ifix = 0; ifix < modify->nfix; ifix++) if (modify->fix[ifix] == NULL) break; // add callback to lists, reallocating if necessary if (flag == 0) { if (nextra_grow == nextra_grow_max) { nextra_grow_max += DELTA; memory->grow(extra_grow,nextra_grow_max,"atom:extra_grow"); } extra_grow[nextra_grow] = ifix; nextra_grow++; } else if (flag == 1) { if (nextra_restart == nextra_restart_max) { nextra_restart_max += DELTA; memory->grow(extra_restart,nextra_restart_max,"atom:extra_restart"); } extra_restart[nextra_restart] = ifix; nextra_restart++; } else if (flag == 2) { if (nextra_border == nextra_border_max) { nextra_border_max += DELTA; memory->grow(extra_border,nextra_border_max,"atom:extra_border"); } extra_border[nextra_border] = ifix; nextra_border++; } } /* ---------------------------------------------------------------------- unregister a callback to a fix happens when fix is deleted, called by its destructor flag = 0 for grow, 1 for restart ------------------------------------------------------------------------- */ void Atom::delete_callback(const char *id, int flag) { int ifix; for (ifix = 0; ifix < modify->nfix; ifix++) if (strcmp(id,modify->fix[ifix]->id) == 0) break; // compact the list of callbacks if (flag == 0) { int match; for (match = 0; match < nextra_grow; match++) if (extra_grow[match] == ifix) break; for (int i = match; i < nextra_grow-1; i++) extra_grow[i] = extra_grow[i+1]; nextra_grow--; } else if (flag == 1) { int match; for (match = 0; match < nextra_restart; match++) if (extra_restart[match] == ifix) break; for (int i = match; i < nextra_restart-1; i++) extra_restart[i] = extra_restart[i+1]; nextra_restart--; } else if (flag == 2) { int match; for (match = 0; match < nextra_border; match++) if (extra_border[match] == ifix) break; for (int i = match; i < nextra_border-1; i++) extra_border[i] = extra_border[i+1]; nextra_border--; } } /* ---------------------------------------------------------------------- decrement ptrs in callback lists to fixes beyond the deleted ifix happens after fix is deleted ------------------------------------------------------------------------- */ void Atom::update_callback(int ifix) { for (int i = 0; i < nextra_grow; i++) if (extra_grow[i] > ifix) extra_grow[i]--; for (int i = 0; i < nextra_restart; i++) if (extra_restart[i] > ifix) extra_restart[i]--; for (int i = 0; i < nextra_border; i++) if (extra_border[i] > ifix) extra_border[i]--; } /* ---------------------------------------------------------------------- find custom per-atom vector with name return index if found, and flag = 0/1 for int/double return -1 if not found ------------------------------------------------------------------------- */ int Atom::find_custom(char *name, int &flag) { for (int i = 0; i < nivector; i++) if (iname[i] && strcmp(iname[i],name) == 0) { flag = 0; return i; } for (int i = 0; i < ndvector; i++) if (dname[i] && strcmp(dname[i],name) == 0) { flag = 1; return i; } return -1; } /* ---------------------------------------------------------------------- add a custom variable with name of type flag = 0/1 for int/double assumes name does not already exist return index in ivector or dvector of its location ------------------------------------------------------------------------- */ int Atom::add_custom(char *name, int flag) { int index; if (flag == 0) { index = nivector; nivector++; iname = (char **) memory->srealloc(iname,nivector*sizeof(char *), "atom:iname"); int n = strlen(name) + 1; iname[index] = new char[n]; strcpy(iname[index],name); ivector = (int **) memory->srealloc(ivector,nivector*sizeof(int *), "atom:ivector"); memory->create(ivector[index],nmax,"atom:ivector"); } else { index = ndvector; ndvector++; dname = (char **) memory->srealloc(dname,ndvector*sizeof(char *), "atom:dname"); int n = strlen(name) + 1; dname[index] = new char[n]; strcpy(dname[index],name); dvector = (double **) memory->srealloc(dvector,ndvector*sizeof(double *), "atom:dvector"); memory->create(dvector[index],nmax,"atom:dvector"); } return index; } /* ---------------------------------------------------------------------- remove a custom variable of type flag = 0/1 for int/double at index free memory for vector and name and set ptrs to NULL ivector/dvector and iname/dname lists never shrink ------------------------------------------------------------------------- */ void Atom::remove_custom(int flag, int index) { if (flag == 0) { memory->destroy(ivector[index]); ivector[index] = NULL; delete [] iname[index]; iname[index] = NULL; } else { memory->destroy(dvector[index]); dvector[index] = NULL; delete [] dname[index]; dname[index] = NULL; } } /* ---------------------------------------------------------------------- return a pointer to a named internal variable if don't recognize name, return NULL customize by adding names ------------------------------------------------------------------------- */ void *Atom::extract(char *name) { if (strcmp(name,"mass") == 0) return (void *) mass; if (strcmp(name,"id") == 0) return (void *) tag; if (strcmp(name,"type") == 0) return (void *) type; if (strcmp(name,"mask") == 0) return (void *) mask; if (strcmp(name,"image") == 0) return (void *) image; if (strcmp(name,"x") == 0) return (void *) x; if (strcmp(name,"v") == 0) return (void *) v; if (strcmp(name,"f") == 0) return (void *) f; if (strcmp(name,"molecule") == 0) return (void *) molecule; if (strcmp(name,"q") == 0) return (void *) q; if (strcmp(name,"mu") == 0) return (void *) mu; if (strcmp(name,"omega") == 0) return (void *) omega; if (strcmp(name,"angmom") == 0) return (void *) angmom; if (strcmp(name,"torque") == 0) return (void *) torque; if (strcmp(name,"radius") == 0) return (void *) radius; if (strcmp(name,"rmass") == 0) return (void *) rmass; if (strcmp(name,"ellipsoid") == 0) return (void *) ellipsoid; if (strcmp(name,"line") == 0) return (void *) line; if (strcmp(name,"tri") == 0) return (void *) tri; if (strcmp(name,"vfrac") == 0) return (void *) vfrac; if (strcmp(name,"s0") == 0) return (void *) s0; if (strcmp(name,"x0") == 0) return (void *) x0; if (strcmp(name,"spin") == 0) return (void *) spin; if (strcmp(name,"eradius") == 0) return (void *) eradius; if (strcmp(name,"ervel") == 0) return (void *) ervel; if (strcmp(name,"erforce") == 0) return (void *) erforce; if (strcmp(name,"ervelforce") == 0) return (void *) ervelforce; if (strcmp(name,"cs") == 0) return (void *) cs; if (strcmp(name,"csforce") == 0) return (void *) csforce; if (strcmp(name,"vforce") == 0) return (void *) vforce; if (strcmp(name,"etag") == 0) return (void *) etag; if (strcmp(name,"rho") == 0) return (void *) rho; if (strcmp(name,"drho") == 0) return (void *) drho; if (strcmp(name,"e") == 0) return (void *) e; if (strcmp(name,"de") == 0) return (void *) de; if (strcmp(name,"cv") == 0) return (void *) cv; if (strcmp(name,"vest") == 0) return (void *) vest; return NULL; } /* ---------------------------------------------------------------------- return # of bytes of allocated memory call to avec tallies per-atom vectors add in global to local mapping storage ------------------------------------------------------------------------- */ bigint Atom::memory_usage() { memlength = DELTA_MEMSTR; memory->create(memstr,memlength,"atom:memstr"); memstr[0] = '\0'; bigint bytes = avec->memory_usage(); memory->destroy(memstr); bytes += max_same*sizeof(int); if (map_style == 1) bytes += memory->usage(map_array,map_maxarray); else if (map_style == 2) { bytes += map_nbucket*sizeof(int); bytes += map_nhash*sizeof(HashElem); } if (maxnext) { bytes += memory->usage(next,maxnext); bytes += memory->usage(permute,maxnext); } return bytes; } /* ---------------------------------------------------------------------- accumulate per-atom vec names in memstr, padded by spaces return 1 if padded str is not already in memlist, else 0 ------------------------------------------------------------------------- */ int Atom::memcheck(const char *str) { int n = strlen(str) + 3; char *padded = new char[n]; strcpy(padded," "); strcat(padded,str); strcat(padded," "); if (strstr(memstr,padded)) { delete [] padded; return 0; } if (strlen(memstr) + n >= memlength) { memlength += DELTA_MEMSTR; memory->grow(memstr,memlength,"atom:memstr"); } strcat(memstr,padded); delete [] padded; return 1; } diff --git a/src/atom.h b/src/atom.h index c6bebe88a..2f21fee72 100644 --- a/src/atom.h +++ b/src/atom.h @@ -1,468 +1,468 @@ /* -*- c++ -*- ---------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ #ifndef LMP_ATOM_H #define LMP_ATOM_H #include "pointers.h" namespace LAMMPS_NS { class Atom : protected Pointers { public: char *atom_style; class AtomVec *avec; // atom counts bigint natoms; // total # of atoms in system, could be 0 // natoms may not be current if atoms lost int nlocal,nghost; // # of owned and ghost atoms on this proc int nmax; // max # of owned+ghost in arrays on this proc int tag_enable; // 0/1 if atom ID tags are defined int molecular; // 0 = atomic, 1 = standard molecular system, // 2 = molecule template system bigint nbonds,nangles,ndihedrals,nimpropers; int ntypes,nbondtypes,nangletypes,ndihedraltypes,nimpropertypes; int bond_per_atom,angle_per_atom,dihedral_per_atom,improper_per_atom; int extra_bond_per_atom,extra_angle_per_atom; int extra_dihedral_per_atom,extra_improper_per_atom; int firstgroup; // store atoms in this group first, -1 if unset int nfirst; // # of atoms in first group on this proc char *firstgroupname; // group-ID to store first, NULL if unset // per-atom arrays // customize by adding new array tagint *tag; int *type,*mask; imageint *image; double **x,**v,**f; tagint *molecule; int *molindex,*molatom; double *q,**mu; double **omega,**angmom,**torque; double *radius,*rmass; int *ellipsoid,*line,*tri,*body; // PERI package double *vfrac,*s0; double **x0; // USER-EFF and USER-AWPMD packages int *spin; double *eradius,*ervel,*erforce,*ervelforce; double *cs,*csforce,*vforce; int *etag; // USER-SPH package double *rho,*drho,*e,*de,*cv; double **vest; int **nspecial; // 0,1,2 = cummulative # of 1-2,1-3,1-4 neighs tagint **special; // IDs of 1-2,1-3,1-4 neighs of each atom int maxspecial; // special[nlocal][maxspecial] int *num_bond; int **bond_type; tagint **bond_atom; int *num_angle; int **angle_type; tagint **angle_atom1,**angle_atom2,**angle_atom3; int *num_dihedral; int **dihedral_type; tagint **dihedral_atom1,**dihedral_atom2,**dihedral_atom3,**dihedral_atom4; int *num_improper; int **improper_type; tagint **improper_atom1,**improper_atom2,**improper_atom3,**improper_atom4; // custom arrays used by fix property/atom int **ivector; double **dvector; char **iname,**dname; int nivector,ndvector; // used by USER-CUDA to flag used per-atom arrays unsigned int datamask; unsigned int datamask_ext; // atom style and per-atom array existence flags // customize by adding new flag int sphere_flag,ellipsoid_flag,line_flag,tri_flag,body_flag; int peri_flag,electron_flag; int ecp_flag; int wavepacket_flag,sph_flag; int molecule_flag,molindex_flag,molatom_flag; int q_flag,mu_flag; int rmass_flag,radius_flag,omega_flag,torque_flag,angmom_flag; int vfrac_flag,spin_flag,eradius_flag,ervel_flag,erforce_flag; int cs_flag,csforce_flag,vforce_flag,ervelforce_flag,etag_flag; int rho_flag,e_flag,cv_flag,vest_flag; // Peridynamics scale factor, used by dump cfg double pdscale; // molecule templates // each template can be a set of consecutive molecules // each with same ID (stored in molecules) // 1st molecule in template stores nset = # in set int nmolecule; class Molecule **molecules; // extra peratom info in restart file destined for fix & diag double **extra; // per-type arrays double *mass; int *mass_setflag; // callback ptrs for atom arrays managed by fix classes int nextra_grow,nextra_restart,nextra_border; // # of callbacks of each type int *extra_grow,*extra_restart,*extra_border; // index of fix to callback to int nextra_grow_max,nextra_restart_max; // size of callback lists int nextra_border_max; int nextra_store; int map_style; // style of atom map: 0=none, 1=array, 2=hash int map_user; // user selected style = same 0,1,2 tagint map_tag_max; // max atom ID that map() is setup for // spatial sorting of atoms int sortfreq; // sort atoms every this many steps, 0 = off bigint nextsort; // next timestep to sort on // indices of atoms with same ID int *sametag; // sametag[I] = next atom with same ID, -1 if no more // functions Atom(class LAMMPS *); ~Atom(); void settings(class Atom *); - void create_avec(const char *, int, char **, char *suffix = NULL); - class AtomVec *new_avec(const char *, char *, int &); + void create_avec(const char *, int, char **, int); + class AtomVec *new_avec(const char *, int, int &); void init(); void setup(); class AtomVec *style_match(const char *); void modify_params(int, char **); void tag_check(); void tag_extend(); int tag_consecutive(); int parse_data(const char *); int count_words(const char *); void deallocate_topology(); void data_atoms(int, char *); void data_vels(int, char *); void data_bonds(int, char *, int *); void data_angles(int, char *, int *); void data_dihedrals(int, char *, int *); void data_impropers(int, char *, int *); void data_bonus(int, char *, class AtomVec *); void data_bodies(int, char *, class AtomVecBody *); virtual void allocate_type_arrays(); void set_mass(const char *); void set_mass(int, double); void set_mass(int, char **); void set_mass(double *); void check_mass(); int radius_consistency(int, double &); int shape_consistency(int, double &, double &, double &); void add_molecule(int, char **); int find_molecule(char *); void add_molecule_atom(class Molecule *, int, int, tagint); void first_reorder(); virtual void sort(); void add_callback(int); void delete_callback(const char *, int); void update_callback(int); int find_custom(char *, int &); int add_custom(char *, int); void remove_custom(int, int); void *extract(char *); inline int* get_map_array() {return map_array;}; inline int get_map_size() {return map_tag_max+1;}; bigint memory_usage(); int memcheck(const char *); // functions for global to local ID mapping // map lookup function inlined for efficiency // return -1 if no map defined inline int map(tagint global) { if (map_style == 1) return map_array[global]; else if (map_style == 2) return map_find_hash(global); else return -1; }; void map_init(int check = 1); void map_clear(); void map_set(); void map_one(tagint, int); int map_style_set(); void map_delete(); int map_find_hash(tagint); protected: // global to local ID mapping int *map_array; // direct map via array that holds map_tag_max int map_maxarray; // allocated size of map_array (1 larger than this) struct HashElem { // hashed map tagint global; // key to search on = global ID int local; // value associated with key = local index int next; // next entry in this bucket, -1 if last }; int map_nhash; // # of entries hash table can hold int map_nused; // # of actual entries in hash table int map_free; // ptr to 1st unused entry in hash table int map_nbucket; // # of hash buckets int *map_bucket; // ptr to 1st entry in each bucket HashElem *map_hash; // hash table int max_same; // allocated size of sametag // spatial sorting of atoms int nbins; // # of sorting bins int nbinx,nbiny,nbinz; // bins in each dimension int maxbin; // max # of bins int maxnext; // max size of next,permute int *binhead; // 1st atom in each bin int *next; // next atom in bin int *permute; // permutation vector double userbinsize; // requested sort bin size double bininvx,bininvy,bininvz; // inverse actual bin sizes double bboxlo[3],bboxhi[3]; // bounding box of my sub-domain int memlength; // allocated size of memstr char *memstr; // string of array names already counted void setup_sort_bins(); int next_prime(int); }; } #endif /* ERROR/WARNING messages: E: Atom IDs must be used for molecular systems Atom IDs are used to identify and find partner atoms in bonds. E: Invalid atom style The choice of atom style is unknown. E: Could not find atom_modify first group ID Self-explanatory. E: Illegal ... command Self-explanatory. Check the input script syntax and compare to the documentation for the command. You can use -echo screen as a command-line option when running LAMMPS to see the offending line. E: Atom_modify id command after simulation box is defined The atom_modify id command cannot be used after a read_data, read_restart, or create_box command. E: Atom_modify map command after simulation box is defined The atom_modify map command cannot be used after a read_data, read_restart, or create_box command. E: Atom_modify sort and first options cannot be used together Self-explanatory. E: Atom ID is negative Self-explanatory. E: Atom ID is too big The limit on atom IDs is set by the SMALLBIG, BIGBIG, SMALLSMALL setting in your Makefile. See Section_start 2.2 of the manual for more details. E: Atom ID is zero Either all atoms IDs must be zero or none of them. E: Not all atom IDs are 0 Either all atoms IDs must be zero or none of them. E: New atom IDs exceed maximum allowed ID See the setting for tagint in the src/lmptype.h file. E: Incorrect atom format in data file Number of values per atom line in the data file is not consistent with the atom style. E: Incorrect velocity format in data file Each atom style defines a format for the Velocity section of the data file. The read-in lines do not match. E: Invalid atom ID in Velocities section of data file Atom IDs must be positive integers and within range of defined atoms. E: Invalid atom ID in Bonds section of data file Atom IDs must be positive integers and within range of defined atoms. E: Invalid bond type in Bonds section of data file Bond type must be positive integer and within range of specified bond types. E: Invalid atom ID in Angles section of data file Atom IDs must be positive integers and within range of defined atoms. E: Invalid angle type in Angles section of data file Angle type must be positive integer and within range of specified angle types. E: Invalid atom ID in Dihedrals section of data file Atom IDs must be positive integers and within range of defined atoms. E: Invalid dihedral type in Dihedrals section of data file Dihedral type must be positive integer and within range of specified dihedral types. E: Invalid atom ID in Impropers section of data file Atom IDs must be positive integers and within range of defined atoms. E: Invalid improper type in Impropers section of data file Improper type must be positive integer and within range of specified improper types. E: Incorrect bonus data format in data file See the read_data doc page for a description of how various kinds of bonus data must be formatted for certain atom styles. E: Invalid atom ID in Bonus section of data file Atom IDs must be positive integers and within range of defined atoms. E: Invalid atom ID in Bodies section of data file Atom IDs must be positive integers and within range of defined atoms. E: Cannot set mass for this atom style This atom style does not support mass settings for each atom type. Instead they are defined on a per-atom basis in the data file. E: Invalid mass line in data file Self-explanatory. E: Invalid type for mass set Mass command must set a type from 1-N where N is the number of atom types. E: Invalid mass value Self-explanatory. E: All masses are not set For atom styles that define masses for each atom type, all masses must be set in the data file or by the mass command before running a simulation. They must also be set before using the velocity command. E: Reuse of molecule template ID The template IDs must be unique. E: Atom sort did not operate correctly This is an internal LAMMPS error. Please report it to the developers. E: Atom sorting has bin size = 0.0 The neighbor cutoff is being used as the bin size, but it is zero. Thus you must explicitly list a bin size in the atom_modify sort command or turn off sorting. E: Too many atom sorting bins This is likely due to an immense simulation box that has blown up to a large size. */ diff --git a/src/bond_hybrid.cpp b/src/bond_hybrid.cpp index 454500861..63357a12e 100644 --- a/src/bond_hybrid.cpp +++ b/src/bond_hybrid.cpp @@ -1,359 +1,360 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ #include "math.h" #include "string.h" #include "ctype.h" #include "bond_hybrid.h" #include "atom.h" #include "neighbor.h" #include "domain.h" #include "comm.h" #include "force.h" #include "memory.h" #include "error.h" using namespace LAMMPS_NS; #define EXTRA 1000 /* ---------------------------------------------------------------------- */ BondHybrid::BondHybrid(LAMMPS *lmp) : Bond(lmp) { writedata = 0; nstyles = 0; } /* ---------------------------------------------------------------------- */ BondHybrid::~BondHybrid() { if (nstyles) { for (int i = 0; i < nstyles; i++) delete styles[i]; delete [] styles; for (int i = 0; i < nstyles; i++) delete [] keywords[i]; delete [] keywords; } if (allocated) { memory->destroy(setflag); memory->destroy(map); delete [] nbondlist; delete [] maxbond; for (int i = 0; i < nstyles; i++) memory->destroy(bondlist[i]); delete [] bondlist; } } /* ---------------------------------------------------------------------- */ void BondHybrid::compute(int eflag, int vflag) { int i,j,m,n; // save ptrs to original bondlist int nbondlist_orig = neighbor->nbondlist; int **bondlist_orig = neighbor->bondlist; // if this is re-neighbor step, create sub-style bondlists // nbondlist[] = length of each sub-style list // realloc sub-style bondlist if necessary // load sub-style bondlist with 3 values from original bondlist if (neighbor->ago == 0) { for (m = 0; m < nstyles; m++) nbondlist[m] = 0; for (i = 0; i < nbondlist_orig; i++) { m = map[bondlist_orig[i][2]]; if (m >= 0) nbondlist[m]++; } for (m = 0; m < nstyles; m++) { if (nbondlist[m] > maxbond[m]) { memory->destroy(bondlist[m]); maxbond[m] = nbondlist[m] + EXTRA; memory->create(bondlist[m],maxbond[m],3,"bond_hybrid:bondlist"); } nbondlist[m] = 0; } for (i = 0; i < nbondlist_orig; i++) { m = map[bondlist_orig[i][2]]; if (m < 0) continue; n = nbondlist[m]; bondlist[m][n][0] = bondlist_orig[i][0]; bondlist[m][n][1] = bondlist_orig[i][1]; bondlist[m][n][2] = bondlist_orig[i][2]; nbondlist[m]++; } } // call each sub-style's compute function // set neighbor->bondlist to sub-style bondlist before call // accumulate sub-style global/peratom energy/virial in hybrid if (eflag || vflag) ev_setup(eflag,vflag); else evflag = 0; for (m = 0; m < nstyles; m++) { neighbor->nbondlist = nbondlist[m]; neighbor->bondlist = bondlist[m]; styles[m]->compute(eflag,vflag); if (eflag_global) energy += styles[m]->energy; if (vflag_global) for (n = 0; n < 6; n++) virial[n] += styles[m]->virial[n]; if (eflag_atom) { n = atom->nlocal; if (force->newton_bond) n += atom->nghost; double *eatom_substyle = styles[m]->eatom; for (i = 0; i < n; i++) eatom[i] += eatom_substyle[i]; } if (vflag_atom) { n = atom->nlocal; if (force->newton_bond) n += atom->nghost; double **vatom_substyle = styles[m]->vatom; for (i = 0; i < n; i++) for (j = 0; j < 6; j++) vatom[i][j] += vatom_substyle[i][j]; } } // restore ptrs to original bondlist neighbor->nbondlist = nbondlist_orig; neighbor->bondlist = bondlist_orig; } /* ---------------------------------------------------------------------- */ void BondHybrid::allocate() { allocated = 1; int n = atom->nbondtypes; memory->create(map,n+1,"bond:map"); memory->create(setflag,n+1,"bond:setflag"); for (int i = 1; i <= n; i++) setflag[i] = 0; nbondlist = new int[nstyles]; maxbond = new int[nstyles]; bondlist = new int**[nstyles]; for (int m = 0; m < nstyles; m++) maxbond[m] = 0; for (int m = 0; m < nstyles; m++) bondlist[m] = NULL; } /* ---------------------------------------------------------------------- create one bond style for each arg in list ------------------------------------------------------------------------- */ void BondHybrid::settings(int narg, char **arg) { int i,m,istyle; if (narg < 1) error->all(FLERR,"Illegal bond_style command"); // delete old lists, since cannot just change settings if (nstyles) { for (int i = 0; i < nstyles; i++) delete styles[i]; delete [] styles; for (int i = 0; i < nstyles; i++) delete [] keywords[i]; delete [] keywords; } if (allocated) { memory->destroy(setflag); memory->destroy(map); delete [] nbondlist; delete [] maxbond; for (int i = 0; i < nstyles; i++) memory->destroy(bondlist[i]); delete [] bondlist; } allocated = 0; // count sub-styles by skipping numeric args // one exception is 1st arg of style "table", which is non-numeric word // need a better way to skip these exceptions nstyles = 0; i = 0; while (i < narg) { if (strcmp(arg[i],"table") == 0) i++; i++; while (i < narg && !isalpha(arg[i][0])) i++; nstyles++; } // allocate list of sub-styles styles = new Bond*[nstyles]; keywords = new char*[nstyles]; // allocate each sub-style and call its settings() with subset of args // define subset of args for a sub-style by skipping numeric args // one exception is 1st arg of style "table", which is non-numeric // need a better way to skip these exceptions - int dummy; + int sflag; nstyles = 0; i = 0; while (i < narg) { for (m = 0; m < nstyles; m++) if (strcmp(arg[i],keywords[m]) == 0) error->all(FLERR,"Bond style hybrid cannot use same bond style twice"); if (strcmp(arg[i],"hybrid") == 0) error->all(FLERR,"Bond style hybrid cannot have hybrid as an argument"); if (strcmp(arg[i],"none") == 0) error->all(FLERR,"Bond style hybrid cannot have none as an argument"); - styles[nstyles] = force->new_bond(arg[i],lmp->suffix,dummy); - keywords[nstyles] = new char[strlen(arg[i])+1]; - strcpy(keywords[nstyles],arg[i]); + + styles[nstyles] = force->new_bond(arg[i],1,sflag); + force->store_style(keywords[nstyles],arg[i],sflag); + istyle = i; if (strcmp(arg[i],"table") == 0) i++; i++; while (i < narg && !isalpha(arg[i][0])) i++; styles[nstyles]->settings(i-istyle-1,&arg[istyle+1]); nstyles++; } } /* ---------------------------------------------------------------------- set coeffs for one type ---------------------------------------------------------------------- */ void BondHybrid::coeff(int narg, char **arg) { if (!allocated) allocate(); int ilo,ihi; force->bounds(arg[0],atom->nbondtypes,ilo,ihi); // 2nd arg = bond sub-style name // allow for "none" as valid sub-style name int m; for (m = 0; m < nstyles; m++) if (strcmp(arg[1],keywords[m]) == 0) break; int none = 0; if (m == nstyles) { if (strcmp(arg[1],"none") == 0) none = 1; else error->all(FLERR,"Bond coeff for hybrid has invalid style"); } // move 1st arg to 2nd arg // just copy ptrs, since arg[] points into original input line arg[1] = arg[0]; // invoke sub-style coeff() starting with 1st arg if (!none) styles[m]->coeff(narg-1,&arg[1]); // set setflag and which type maps to which sub-style // if sub-style is none: set hybrid setflag, wipe out map for (int i = ilo; i <= ihi; i++) { setflag[i] = 1; if (none) map[i] = -1; else map[i] = m; } } /* ---------------------------------------------------------------------- */ void BondHybrid::init_style() { for (int m = 0; m < nstyles; m++) if (styles[m]) styles[m]->init_style(); } /* ---------------------------------------------------------------------- return an equilbrium bond length ------------------------------------------------------------------------- */ double BondHybrid::equilibrium_distance(int i) { if (map[i] < 0) error->one(FLERR,"Invoked bond equil distance on bond style none"); return styles[map[i]]->equilibrium_distance(i); } /* ---------------------------------------------------------------------- proc 0 writes to restart file ------------------------------------------------------------------------- */ void BondHybrid::write_restart(FILE *fp) { fwrite(&nstyles,sizeof(int),1,fp); int n; for (int m = 0; m < nstyles; m++) { n = strlen(keywords[m]) + 1; fwrite(&n,sizeof(int),1,fp); fwrite(keywords[m],sizeof(char),n,fp); } } /* ---------------------------------------------------------------------- proc 0 reads from restart file, bcasts ------------------------------------------------------------------------- */ void BondHybrid::read_restart(FILE *fp) { int me = comm->me; if (me == 0) fread(&nstyles,sizeof(int),1,fp); MPI_Bcast(&nstyles,1,MPI_INT,0,world); styles = new Bond*[nstyles]; keywords = new char*[nstyles]; allocate(); int n,dummy; for (int m = 0; m < nstyles; m++) { if (me == 0) fread(&n,sizeof(int),1,fp); MPI_Bcast(&n,1,MPI_INT,0,world); keywords[m] = new char[n]; if (me == 0) fread(keywords[m],sizeof(char),n,fp); MPI_Bcast(keywords[m],n,MPI_CHAR,0,world); - styles[m] = force->new_bond(keywords[m],lmp->suffix,dummy); + styles[m] = force->new_bond(keywords[m],0,dummy); } } /* ---------------------------------------------------------------------- */ double BondHybrid::single(int type, double rsq, int i, int j, double &fforce) { if (map[type] < 0) error->one(FLERR,"Invoked bond single on bond style none"); return styles[map[type]]->single(type,rsq,i,j,fforce); } /* ---------------------------------------------------------------------- memory usage ------------------------------------------------------------------------- */ double BondHybrid::memory_usage() { double bytes = maxeatom * sizeof(double); bytes += maxvatom*6 * sizeof(double); for (int m = 0; m < nstyles; m++) bytes += maxbond[m]*3 * sizeof(int); for (int m = 0; m < nstyles; m++) if (styles[m]) bytes += styles[m]->memory_usage(); return bytes; } diff --git a/src/delete_bonds.cpp b/src/delete_bonds.cpp index 3b2e9a528..b380508ee 100644 --- a/src/delete_bonds.cpp +++ b/src/delete_bonds.cpp @@ -1,592 +1,592 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ #include "mpi.h" #include "stdlib.h" #include "string.h" #include "delete_bonds.h" #include "atom.h" #include "atom_vec.h" #include "domain.h" #include "neighbor.h" #include "comm.h" #include "force.h" #include "group.h" #include "special.h" #include "error.h" using namespace LAMMPS_NS; enum{MULTI,ATOM,BOND,ANGLE,DIHEDRAL,IMPROPER,STATS}; /* ---------------------------------------------------------------------- */ DeleteBonds::DeleteBonds(LAMMPS *lmp) : Pointers(lmp) {} /* ---------------------------------------------------------------------- */ void DeleteBonds::command(int narg, char **arg) { if (domain->box_exist == 0) error->all(FLERR,"Delete_bonds command before simulation box is defined"); if (atom->natoms == 0) error->all(FLERR,"Delete_bonds command with no atoms existing"); if (atom->molecular != 1) error->all(FLERR,"Cannot use delete_bonds with non-molecular system"); if (narg < 2) error->all(FLERR,"Illegal delete_bonds command"); // init entire system since comm->borders is done // comm::init needs neighbor::init needs pair::init needs kspace::init, etc if (comm->me == 0 && screen) fprintf(screen,"System init for delete_bonds ...\n"); lmp->init(); if (comm->me == 0 && screen) fprintf(screen,"Deleting bonds ...\n"); // identify group int igroup = group->find(arg[0]); if (igroup == -1) error->all(FLERR,"Cannot find delete_bonds group ID"); int groupbit = group->bitmask[igroup]; // set style and which = type value int style = -1; if (strcmp(arg[1],"multi") == 0) style = MULTI; else if (strcmp(arg[1],"atom") == 0) style = ATOM; else if (strcmp(arg[1],"bond") == 0) style = BOND; else if (strcmp(arg[1],"angle") == 0) style = ANGLE; else if (strcmp(arg[1],"dihedral") == 0) style = DIHEDRAL; else if (strcmp(arg[1],"improper") == 0) style = IMPROPER; else if (strcmp(arg[1],"stats") == 0) style = STATS; else error->all(FLERR,"Illegal delete_bonds command"); // setup list of types (atom,bond,etc) to consider // use force->bounds() to allow setting of range of types // range can be 0 to ntypes inclusive int *tlist = NULL; int iarg = 2; int which; if (style != MULTI && style != STATS) { if (narg < 3) error->all(FLERR,"Illegal delete_bonds command"); int n = -1; if (style == ATOM) n = atom->ntypes; if (style == BOND) n = atom->nbondtypes; if (style == ANGLE) n = atom->nangletypes; if (style == DIHEDRAL) n = atom->ndihedraltypes; if (style == IMPROPER) n = atom->nimpropertypes; tlist = new int[n+1]; for (int i = 0; i <= n; i++) tlist[i] = 0; int nlo,nhi; force->bounds(arg[2],n,nlo,nhi,0); for (int i = nlo; i <= nhi; i++) tlist[i] = 1; iarg++; } // grab optional keywords int any_flag = 0; int undo_flag = 0; int remove_flag = 0; int special_flag = 0; int induce_flag = 0; while (iarg < narg) { if (strcmp(arg[iarg],"any") == 0) any_flag = 1; else if (strcmp(arg[iarg],"undo") == 0) undo_flag = 1; else if (strcmp(arg[iarg],"remove") == 0) remove_flag = 1; else if (strcmp(arg[iarg],"special") == 0) special_flag = 1; else if (strcmp(arg[iarg],"induce") == 0) induce_flag = 1; else error->all(FLERR,"Illegal delete_bonds command"); iarg++; } // border swap to insure type and mask is current for off-proc atoms // enforce PBC before in case atoms are outside box if (domain->triclinic) domain->x2lamda(atom->nlocal); domain->pbc(); domain->reset_box(); comm->setup(); comm->exchange(); comm->borders(); if (domain->triclinic) domain->lamda2x(atom->nlocal+atom->nghost); // set topology interactions either off or on // criteria for an interaction to potentially be changed (set flag = 1) // all atoms or any atom in interaction must be in group, based on any_flag // for style = MULTI, all bond/angle/dihedral/improper, no other criteria // for style = ATOM, same as MULTI, plus at least one atom is specified type // for style = BOND/ANGLE/DIHEDRAL/IMPROPER, interaction is specified type // for style = STATS only compute stats, flag is always 0 // if flag = 1 // set interaction type negative if undo_flag = 0 // set interaction type positive if undo_flag = 1 int *mask = atom->mask; int *type = atom->type; int nlocal = atom->nlocal; int i,m,n,consider,flag,itype; int atom1,atom2,atom3,atom4; if (atom->avec->bonds_allow && (style == BOND || style == MULTI || style == ATOM)) { int *num_bond = atom->num_bond; int **bond_type = atom->bond_type; for (i = 0; i < nlocal; i++) { for (m = 0; m < num_bond[i]; m++) { atom1 = atom->map(atom->bond_atom[i][m]); if (atom1 == -1) error->one(FLERR,"Bond atom missing in delete_bonds"); consider = 0; if (!any_flag && mask[i] & groupbit && mask[atom1] & groupbit) consider = 1; if (any_flag && (mask[i] & groupbit || mask[atom1] & groupbit)) consider = 1; if (consider) { flag = 0; if (style == MULTI) flag = 1; else if (style == ATOM) { if (tlist[type[i]] || tlist[type[atom1]]) flag = 1; } else if (style == BOND) { - itype = static_cast (fabs(bond_type[i][m])); + itype = abs(bond_type[i][m]); if (tlist[itype]) flag = 1; } if (flag) { if (undo_flag == 0 && bond_type[i][m] > 0) bond_type[i][m] = -bond_type[i][m]; if (undo_flag == 1 && bond_type[i][m] < 0) bond_type[i][m] = -bond_type[i][m]; } } } } } if (atom->avec->angles_allow && (style == ANGLE || style == MULTI || style == ATOM)) { int *num_angle = atom->num_angle; int **angle_type = atom->angle_type; for (i = 0; i < nlocal; i++) { for (m = 0; m < num_angle[i]; m++) { atom1 = atom->map(atom->angle_atom1[i][m]); atom2 = atom->map(atom->angle_atom2[i][m]); atom3 = atom->map(atom->angle_atom3[i][m]); if (atom1 == -1 || atom2 == -1 || atom3 == -1) error->one(FLERR,"Angle atom missing in delete_bonds"); consider = 0; if (!any_flag && mask[atom1] & groupbit && mask[atom2] & groupbit && mask[atom3] & groupbit) consider = 1; if (any_flag && (mask[atom1] & groupbit || mask[atom2] & groupbit || mask[atom3] & groupbit)) consider = 1; if (consider) { flag = 0; if (style == MULTI) flag = 1; else if (style == ATOM) { if (tlist[type[atom1]] || tlist[type[atom2]] || tlist[type[atom3]]) flag = 1; } else if (style == ANGLE) { - itype = static_cast (fabs(angle_type[i][m])); + itype = abs(angle_type[i][m]); if (tlist[itype]) flag = 1; } if (flag) { if (undo_flag == 0 && angle_type[i][m] > 0) angle_type[i][m] = -angle_type[i][m]; if (undo_flag == 1 && angle_type[i][m] < 0) angle_type[i][m] = -angle_type[i][m]; } } } } } if (atom->avec->dihedrals_allow && (style == DIHEDRAL || style == MULTI || style == ATOM)) { int *num_dihedral = atom->num_dihedral; int **dihedral_type = atom->dihedral_type; for (i = 0; i < nlocal; i++) { for (m = 0; m < num_dihedral[i]; m++) { atom1 = atom->map(atom->dihedral_atom1[i][m]); atom2 = atom->map(atom->dihedral_atom2[i][m]); atom3 = atom->map(atom->dihedral_atom3[i][m]); atom4 = atom->map(atom->dihedral_atom4[i][m]); if (atom1 == -1 || atom2 == -1 || atom3 == -1 || atom4 == -1) error->one(FLERR,"Dihedral atom missing in delete_bonds"); consider = 0; if (!any_flag && mask[atom1] & groupbit && mask[atom2] & groupbit && mask[atom3] & groupbit && mask[atom4] & groupbit) consider = 1; if (any_flag && (mask[atom1] & groupbit || mask[atom2] & groupbit || mask[atom3] & groupbit || mask[atom4] & groupbit)) consider = 1; if (consider) { flag = 0; if (style == MULTI) flag = 1; else if (style == ATOM) { if (tlist[type[atom1]] || tlist[type[atom2]] || tlist[type[atom3]] || tlist[type[atom4]]) flag = 1; } else if (style == DIHEDRAL) { - itype = static_cast (fabs(dihedral_type[i][m])); + itype = abs(dihedral_type[i][m]); if (tlist[itype]) flag = 1; } if (flag) { if (undo_flag == 0 && dihedral_type[i][m] > 0) dihedral_type[i][m] = -dihedral_type[i][m]; if (undo_flag == 1 && dihedral_type[i][m] < 0) dihedral_type[i][m] = -dihedral_type[i][m]; } } } } } if (atom->avec->impropers_allow && (style == IMPROPER || style == MULTI || style == ATOM)) { int *num_improper = atom->num_improper; int **improper_type = atom->improper_type; for (i = 0; i < nlocal; i++) { for (m = 0; m < num_improper[i]; m++) { atom1 = atom->map(atom->improper_atom1[i][m]); atom2 = atom->map(atom->improper_atom2[i][m]); atom3 = atom->map(atom->improper_atom3[i][m]); atom4 = atom->map(atom->improper_atom4[i][m]); if (atom1 == -1 || atom2 == -1 || atom3 == -1 || atom4 == -1) error->one(FLERR,"Improper atom missing in delete_bonds"); consider = 0; if (!any_flag && mask[atom1] & groupbit && mask[atom2] & groupbit && mask[atom3] & groupbit && mask[atom4] & groupbit) consider = 1; if (any_flag && (mask[atom1] & groupbit || mask[atom2] & groupbit || mask[atom3] & groupbit || mask[atom4] & groupbit)) consider = 1; if (consider) { flag = 0; if (style == MULTI) flag = 1; else if (style == ATOM) { if (tlist[type[atom1]] || tlist[type[atom2]] || tlist[type[atom3]] || tlist[type[atom4]]) flag = 1; } else if (style == IMPROPER) { - itype = static_cast (fabs(improper_type[i][m])); + itype = abs(improper_type[i][m]); if (tlist[itype]) flag = 1; } if (flag) { if (undo_flag == 0 && improper_type[i][m] > 0) improper_type[i][m] = -improper_type[i][m]; if (undo_flag == 1 && improper_type[i][m] < 0) improper_type[i][m] = -improper_type[i][m]; } } } } } delete [] tlist; // induce turn off of angles, dihedral, impropers due to turned off bonds // induce turn off of dihedrals due to turned off angles // all atoms or any atom in interaction must be in group, based on any_flag if (induce_flag) { // circulate list of turned off bonds around ring of procs // circulate list of turned off angles around ring of procs } // remove interactions if requested // all atoms or any atom in interaction must be in group, based on any_flag if (remove_flag) { if (atom->avec->bonds_allow) { for (i = 0; i < nlocal; i++) { m = 0; while (m < atom->num_bond[i]) { if (atom->bond_type[i][m] <= 0) { atom1 = atom->map(atom->bond_atom[i][m]); flag = 0; if (!any_flag && mask[i] & groupbit && mask[atom1] & groupbit) flag = 1; if (any_flag && (mask[i] & groupbit || mask[atom1] & groupbit)) flag = 1; if (flag) { n = atom->num_bond[i]; atom->bond_type[i][m] = atom->bond_type[i][n-1]; atom->bond_atom[i][m] = atom->bond_atom[i][n-1]; atom->num_bond[i]--; } else m++; } else m++; } } } if (atom->avec->angles_allow) { for (i = 0; i < nlocal; i++) { m = 0; while (m < atom->num_angle[i]) { if (atom->angle_type[i][m] <= 0) { atom1 = atom->map(atom->angle_atom1[i][m]); atom2 = atom->map(atom->angle_atom2[i][m]); atom3 = atom->map(atom->angle_atom3[i][m]); flag = 0; if (!any_flag && mask[atom1] & groupbit && mask[atom2] & groupbit && mask[atom3] & groupbit) flag = 1; if (any_flag && (mask[atom1] & groupbit || mask[atom2] & groupbit || mask[atom3] & groupbit)) flag = 1; if (flag) { n = atom->num_angle[i]; atom->angle_type[i][m] = atom->angle_type[i][n-1]; atom->angle_atom1[i][m] = atom->angle_atom1[i][n-1]; atom->angle_atom2[i][m] = atom->angle_atom2[i][n-1]; atom->angle_atom3[i][m] = atom->angle_atom3[i][n-1]; atom->num_angle[i]--; } else m++; } else m++; } } } if (atom->avec->dihedrals_allow) { for (i = 0; i < nlocal; i++) { m = 0; while (m < atom->num_dihedral[i]) { if (atom->dihedral_type[i][m] <= 0) { atom1 = atom->map(atom->dihedral_atom1[i][m]); atom2 = atom->map(atom->dihedral_atom2[i][m]); atom3 = atom->map(atom->dihedral_atom3[i][m]); atom4 = atom->map(atom->dihedral_atom4[i][m]); flag = 0; if (!any_flag && mask[atom1] & groupbit && mask[atom2] & groupbit && mask[atom3] & groupbit && mask[atom4] & groupbit) flag = 1; if (any_flag && (mask[atom1] & groupbit || mask[atom2] & groupbit || mask[atom3] & groupbit || mask[atom4] & groupbit)) flag = 1; if (flag) { n = atom->num_dihedral[i]; atom->dihedral_type[i][m] = atom->dihedral_type[i][n-1]; atom->dihedral_atom1[i][m] = atom->dihedral_atom1[i][n-1]; atom->dihedral_atom2[i][m] = atom->dihedral_atom2[i][n-1]; atom->dihedral_atom3[i][m] = atom->dihedral_atom3[i][n-1]; atom->dihedral_atom4[i][m] = atom->dihedral_atom4[i][n-1]; atom->num_dihedral[i]--; } else m++; } else m++; } } } if (atom->avec->impropers_allow) { for (i = 0; i < nlocal; i++) { m = 0; while (m < atom->num_improper[i]) { if (atom->improper_type[i][m] <= 0) { atom1 = atom->map(atom->improper_atom1[i][m]); atom2 = atom->map(atom->improper_atom2[i][m]); atom3 = atom->map(atom->improper_atom3[i][m]); atom4 = atom->map(atom->improper_atom4[i][m]); flag = 0; if (!any_flag && mask[atom1] & groupbit && mask[atom2] & groupbit && mask[atom3] & groupbit && mask[atom4] & groupbit) flag = 1; if (any_flag && (mask[atom1] & groupbit || mask[atom2] & groupbit || mask[atom3] & groupbit || mask[atom4] & groupbit)) flag = 1; if (flag) { n = atom->num_improper[i]; atom->improper_type[i][m] = atom->improper_type[i][n-1]; atom->improper_atom1[i][m] = atom->improper_atom1[i][n-1]; atom->improper_atom2[i][m] = atom->improper_atom2[i][n-1]; atom->improper_atom3[i][m] = atom->improper_atom3[i][n-1]; atom->improper_atom4[i][m] = atom->improper_atom4[i][n-1]; atom->num_improper[i]--; } else m++; } else m++; } } } } // if interactions were removed, recompute global counts if (remove_flag) { if (atom->avec->bonds_allow) { bigint nbonds = 0; for (i = 0; i < nlocal; i++) nbonds += atom->num_bond[i]; MPI_Allreduce(&nbonds,&atom->nbonds,1,MPI_LMP_BIGINT, MPI_SUM,world); if (force->newton_bond == 0) atom->nbonds /= 2; } if (atom->avec->angles_allow) { bigint nangles = 0; for (i = 0; i < nlocal; i++) nangles += atom->num_angle[i]; MPI_Allreduce(&nangles,&atom->nangles,1,MPI_LMP_BIGINT, MPI_SUM,world); if (force->newton_bond == 0) atom->nangles /= 3; } if (atom->avec->dihedrals_allow) { bigint ndihedrals = 0; for (i = 0; i < nlocal; i++) ndihedrals += atom->num_dihedral[i]; MPI_Allreduce(&ndihedrals,&atom->ndihedrals, 1,MPI_LMP_BIGINT,MPI_SUM,world); if (force->newton_bond == 0) atom->ndihedrals /= 4; } if (atom->avec->impropers_allow) { bigint nimpropers = 0; for (i = 0; i < nlocal; i++) nimpropers += atom->num_improper[i]; MPI_Allreduce(&nimpropers,&atom->nimpropers, 1,MPI_LMP_BIGINT,MPI_SUM,world); if (force->newton_bond == 0) atom->nimpropers /= 4; } } // compute and print stats bigint tmp; bigint bond_on,bond_off; bigint angle_on,angle_off; bigint dihedral_on,dihedral_off; bigint improper_on,improper_off; if (atom->avec->bonds_allow) { bond_on = bond_off = 0; for (i = 0; i < nlocal; i++) for (m = 0; m < atom->num_bond[i]; m++) if (atom->bond_type[i][m] > 0) bond_on++; else bond_off++; MPI_Allreduce(&bond_on,&tmp,1,MPI_LMP_BIGINT,MPI_SUM,world); bond_on = tmp; MPI_Allreduce(&bond_off,&tmp,1,MPI_LMP_BIGINT,MPI_SUM,world); bond_off = tmp; if (force->newton_bond == 0) { bond_on /= 2; bond_off /= 2; } } if (atom->avec->angles_allow) { angle_on = angle_off = 0; for (i = 0; i < nlocal; i++) for (m = 0; m < atom->num_angle[i]; m++) if (atom->angle_type[i][m] > 0) angle_on++; else angle_off++; MPI_Allreduce(&angle_on,&tmp,1,MPI_LMP_BIGINT,MPI_SUM,world); angle_on = tmp; MPI_Allreduce(&angle_off,&tmp,1,MPI_LMP_BIGINT,MPI_SUM,world); angle_off = tmp; if (force->newton_bond == 0) { angle_on /= 3; angle_off /= 3; } } if (atom->avec->dihedrals_allow) { dihedral_on = dihedral_off = 0; for (i = 0; i < nlocal; i++) for (m = 0; m < atom->num_dihedral[i]; m++) if (atom->dihedral_type[i][m] > 0) dihedral_on++; else dihedral_off++; MPI_Allreduce(&dihedral_on,&tmp,1,MPI_LMP_BIGINT,MPI_SUM,world); dihedral_on = tmp; MPI_Allreduce(&dihedral_off,&tmp,1,MPI_LMP_BIGINT,MPI_SUM,world); dihedral_off = tmp; if (force->newton_bond == 0) { dihedral_on /= 4; dihedral_off /= 4; } } if (atom->avec->impropers_allow) { improper_on = improper_off = 0; for (i = 0; i < nlocal; i++) for (m = 0; m < atom->num_improper[i]; m++) if (atom->improper_type[i][m] > 0) improper_on++; else improper_off++; MPI_Allreduce(&improper_on,&tmp,1,MPI_LMP_BIGINT,MPI_SUM,world); improper_on = tmp; MPI_Allreduce(&improper_off,&tmp,1,MPI_LMP_BIGINT,MPI_SUM,world); improper_off = tmp; if (force->newton_bond == 0) { improper_on /= 4; improper_off /= 4; } } if (comm->me == 0) { if (atom->avec->bonds_allow) { if (screen) fprintf(screen, " " BIGINT_FORMAT " total bonds, " BIGINT_FORMAT " turned on, " BIGINT_FORMAT " turned off\n", atom->nbonds,bond_on,bond_off); if (logfile) fprintf(logfile, " " BIGINT_FORMAT " total bonds, " BIGINT_FORMAT " turned on, " BIGINT_FORMAT " turned off\n", atom->nbonds,bond_on,bond_off); } if (atom->avec->angles_allow) { if (screen) fprintf(screen, " " BIGINT_FORMAT " total angles, " BIGINT_FORMAT " turned on, " BIGINT_FORMAT " turned off\n", atom->nangles,angle_on,angle_off); if (logfile) fprintf(logfile, " " BIGINT_FORMAT " total angles, " BIGINT_FORMAT " turned on, " BIGINT_FORMAT " turned off\n", atom->nangles,angle_on,angle_off); } if (atom->avec->dihedrals_allow) { if (screen) fprintf(screen, " " BIGINT_FORMAT " total dihedrals, " BIGINT_FORMAT " turned on, " BIGINT_FORMAT " turned off\n", atom->ndihedrals,dihedral_on,dihedral_off); if (logfile) fprintf(logfile, " " BIGINT_FORMAT " total dihedrals, " BIGINT_FORMAT " turned on, " BIGINT_FORMAT " turned off\n", atom->ndihedrals,dihedral_on,dihedral_off); } if (atom->avec->impropers_allow) { if (screen) fprintf(screen, " " BIGINT_FORMAT " total impropers, " BIGINT_FORMAT " turned on, " BIGINT_FORMAT " turned off\n", atom->nimpropers,improper_on,improper_off); if (logfile) fprintf(logfile, " " BIGINT_FORMAT " total impropers, " BIGINT_FORMAT " turned on, " BIGINT_FORMAT " turned off\n", atom->nimpropers,improper_on,improper_off); } } // re-compute special list if requested if (special_flag) { Special special(lmp); special.build(); } } diff --git a/src/dihedral_hybrid.cpp b/src/dihedral_hybrid.cpp index 7b0dea64d..6f8ef5a09 100644 --- a/src/dihedral_hybrid.cpp +++ b/src/dihedral_hybrid.cpp @@ -1,350 +1,351 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ #include "math.h" #include "string.h" #include "ctype.h" #include "dihedral_hybrid.h" #include "atom.h" #include "neighbor.h" #include "domain.h" #include "comm.h" #include "force.h" #include "memory.h" #include "error.h" using namespace LAMMPS_NS; #define EXTRA 1000 /* ---------------------------------------------------------------------- */ DihedralHybrid::DihedralHybrid(LAMMPS *lmp) : Dihedral(lmp) { nstyles = 0; } /* ---------------------------------------------------------------------- */ DihedralHybrid::~DihedralHybrid() { if (nstyles) { for (int i = 0; i < nstyles; i++) delete styles[i]; delete [] styles; for (int i = 0; i < nstyles; i++) delete [] keywords[i]; delete [] keywords; } if (allocated) { memory->destroy(setflag); memory->destroy(map); delete [] ndihedrallist; delete [] maxdihedral; for (int i = 0; i < nstyles; i++) memory->destroy(dihedrallist[i]); delete [] dihedrallist; } } /* ---------------------------------------------------------------------- */ void DihedralHybrid::compute(int eflag, int vflag) { int i,j,m,n; // save ptrs to original dihedrallist int ndihedrallist_orig = neighbor->ndihedrallist; int **dihedrallist_orig = neighbor->dihedrallist; // if this is re-neighbor step, create sub-style dihedrallists // ndihedrallist[] = length of each sub-style list // realloc sub-style dihedrallist if necessary // load sub-style dihedrallist with 5 values from original dihedrallist if (neighbor->ago == 0) { for (m = 0; m < nstyles; m++) ndihedrallist[m] = 0; for (i = 0; i < ndihedrallist_orig; i++) { m = map[dihedrallist_orig[i][4]]; if (m >= 0) ndihedrallist[m]++; } for (m = 0; m < nstyles; m++) { if (ndihedrallist[m] > maxdihedral[m]) { memory->destroy(dihedrallist[m]); maxdihedral[m] = ndihedrallist[m] + EXTRA; memory->create(dihedrallist[m],maxdihedral[m],5, "dihedral_hybrid:dihedrallist"); } ndihedrallist[m] = 0; } for (i = 0; i < ndihedrallist_orig; i++) { m = map[dihedrallist_orig[i][4]]; if (m < 0) continue; n = ndihedrallist[m]; dihedrallist[m][n][0] = dihedrallist_orig[i][0]; dihedrallist[m][n][1] = dihedrallist_orig[i][1]; dihedrallist[m][n][2] = dihedrallist_orig[i][2]; dihedrallist[m][n][3] = dihedrallist_orig[i][3]; dihedrallist[m][n][4] = dihedrallist_orig[i][4]; ndihedrallist[m]++; } } // call each sub-style's compute function // set neighbor->dihedrallist to sub-style dihedrallist before call // accumulate sub-style global/peratom energy/virial in hybrid if (eflag || vflag) ev_setup(eflag,vflag); else evflag = 0; for (m = 0; m < nstyles; m++) { neighbor->ndihedrallist = ndihedrallist[m]; neighbor->dihedrallist = dihedrallist[m]; styles[m]->compute(eflag,vflag); if (eflag_global) energy += styles[m]->energy; if (vflag_global) for (n = 0; n < 6; n++) virial[n] += styles[m]->virial[n]; if (eflag_atom) { n = atom->nlocal; if (force->newton_bond) n += atom->nghost; double *eatom_substyle = styles[m]->eatom; for (i = 0; i < n; i++) eatom[i] += eatom_substyle[i]; } if (vflag_atom) { n = atom->nlocal; if (force->newton_bond) n += atom->nghost; double **vatom_substyle = styles[m]->vatom; for (i = 0; i < n; i++) for (j = 0; j < 6; j++) vatom[i][j] += vatom_substyle[i][j]; } } // restore ptrs to original dihedrallist neighbor->ndihedrallist = ndihedrallist_orig; neighbor->dihedrallist = dihedrallist_orig; } /* ---------------------------------------------------------------------- */ void DihedralHybrid::allocate() { allocated = 1; int n = atom->ndihedraltypes; memory->create(map,n+1,"dihedral:map"); memory->create(setflag,n+1,"dihedral:setflag"); for (int i = 1; i <= n; i++) setflag[i] = 0; ndihedrallist = new int[nstyles]; maxdihedral = new int[nstyles]; dihedrallist = new int**[nstyles]; for (int m = 0; m < nstyles; m++) maxdihedral[m] = 0; for (int m = 0; m < nstyles; m++) dihedrallist[m] = NULL; } /* ---------------------------------------------------------------------- create one dihedral style for each arg in list ------------------------------------------------------------------------- */ void DihedralHybrid::settings(int narg, char **arg) { int i,m,istyle; if (narg < 1) error->all(FLERR,"Illegal dihedral_style command"); // delete old lists, since cannot just change settings if (nstyles) { for (int i = 0; i < nstyles; i++) delete styles[i]; delete [] styles; for (int i = 0; i < nstyles; i++) delete [] keywords[i]; delete [] keywords; } if (allocated) { memory->destroy(setflag); memory->destroy(map); delete [] ndihedrallist; delete [] maxdihedral; for (int i = 0; i < nstyles; i++) memory->destroy(dihedrallist[i]); delete [] dihedrallist; } allocated = 0; // count sub-styles by skipping numeric args // one exception is 1st arg of style "table", which is non-numeric word // need a better way to skip these exceptions nstyles = 0; i = 0; while (i < narg) { if (strcmp(arg[i],"table") == 0) i++; i++; while (i < narg && !isalpha(arg[i][0])) i++; nstyles++; } // allocate list of sub-styles styles = new Dihedral*[nstyles]; keywords = new char*[nstyles]; // allocate each sub-style and call its settings() with subset of args // define subset of args for a sub-style by skipping numeric args // one exception is 1st arg of style "table", which is non-numeric // need a better way to skip these exceptions - int dummy; + int sflag; nstyles = 0; i = 0; while (i < narg) { for (m = 0; m < nstyles; m++) if (strcmp(arg[i],keywords[m]) == 0) error->all(FLERR,"Dihedral style hybrid cannot use " "same dihedral style twice"); if (strcmp(arg[i],"hybrid") == 0) error->all(FLERR, "Dihedral style hybrid cannot have hybrid as an argument"); if (strcmp(arg[i],"none") == 0) error->all(FLERR,"Dihedral style hybrid cannot have none as an argument"); - styles[nstyles] = force->new_dihedral(arg[i],lmp->suffix,dummy); - keywords[nstyles] = new char[strlen(arg[i])+1]; - strcpy(keywords[nstyles],arg[i]); + + styles[nstyles] = force->new_dihedral(arg[i],1,sflag); + force->store_style(keywords[nstyles],arg[i],sflag); + istyle = i; if (strcmp(arg[i],"table") == 0) i++; i++; while (i < narg && !isalpha(arg[i][0])) i++; styles[nstyles]->settings(i-istyle-1,&arg[istyle+1]); nstyles++; } } /* ---------------------------------------------------------------------- set coeffs for one type ---------------------------------------------------------------------- */ void DihedralHybrid::coeff(int narg, char **arg) { if (!allocated) allocate(); int ilo,ihi; force->bounds(arg[0],atom->ndihedraltypes,ilo,ihi); // 2nd arg = dihedral sub-style name // allow for "none" or "skip" as valid sub-style name int m; for (m = 0; m < nstyles; m++) if (strcmp(arg[1],keywords[m]) == 0) break; int none = 0; int skip = 0; if (m == nstyles) { if (strcmp(arg[1],"none") == 0) none = 1; else if (strcmp(arg[1],"skip") == 0) none = skip = 1; else error->all(FLERR,"Dihedral coeff for hybrid has invalid style"); } // move 1st arg to 2nd arg // just copy ptrs, since arg[] points into original input line arg[1] = arg[0]; // invoke sub-style coeff() starting with 1st arg if (!none) styles[m]->coeff(narg-1,&arg[1]); // set setflag and which type maps to which sub-style // if sub-style is skip: auxiliary class2 setting in data file so ignore // if sub-style is none and not skip: set hybrid setflag, wipe out map for (int i = ilo; i <= ihi; i++) { if (skip) continue; else if (none) { setflag[i] = 1; map[i] = -1; } else { setflag[i] = styles[m]->setflag[i]; map[i] = m; } } } /* ---------------------------------------------------------------------- */ void DihedralHybrid::init_style() { for (int m = 0; m < nstyles; m++) if (styles[m]) styles[m]->init_style(); } /* ---------------------------------------------------------------------- proc 0 writes to restart file ------------------------------------------------------------------------- */ void DihedralHybrid::write_restart(FILE *fp) { fwrite(&nstyles,sizeof(int),1,fp); int n; for (int m = 0; m < nstyles; m++) { n = strlen(keywords[m]) + 1; fwrite(&n,sizeof(int),1,fp); fwrite(keywords[m],sizeof(char),n,fp); } } /* ---------------------------------------------------------------------- proc 0 reads from restart file, bcasts ------------------------------------------------------------------------- */ void DihedralHybrid::read_restart(FILE *fp) { int me = comm->me; if (me == 0) fread(&nstyles,sizeof(int),1,fp); MPI_Bcast(&nstyles,1,MPI_INT,0,world); styles = new Dihedral*[nstyles]; keywords = new char*[nstyles]; allocate(); int n,dummy; for (int m = 0; m < nstyles; m++) { if (me == 0) fread(&n,sizeof(int),1,fp); MPI_Bcast(&n,1,MPI_INT,0,world); keywords[m] = new char[n]; if (me == 0) fread(keywords[m],sizeof(char),n,fp); MPI_Bcast(keywords[m],n,MPI_CHAR,0,world); - styles[m] = force->new_dihedral(keywords[m],lmp->suffix,dummy); + styles[m] = force->new_dihedral(keywords[m],0,dummy); } } /* ---------------------------------------------------------------------- memory usage ------------------------------------------------------------------------- */ double DihedralHybrid::memory_usage() { double bytes = maxeatom * sizeof(double); bytes += maxvatom*6 * sizeof(double); for (int m = 0; m < nstyles; m++) bytes += maxdihedral[m]*5 * sizeof(int); for (int m = 0; m < nstyles; m++) if (styles[m]) bytes += styles[m]->memory_usage(); return bytes; } diff --git a/src/force.cpp b/src/force.cpp index c316c04a5..832133548 100644 --- a/src/force.cpp +++ b/src/force.cpp @@ -1,933 +1,975 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ #include "stdlib.h" #include "string.h" #include "ctype.h" #include "force.h" #include "style_bond.h" #include "style_angle.h" #include "style_dihedral.h" #include "style_improper.h" #include "style_pair.h" #include "style_kspace.h" #include "atom.h" #include "comm.h" #include "pair.h" #include "pair_hybrid.h" #include "pair_hybrid_overlay.h" #include "bond.h" #include "bond_hybrid.h" #include "angle.h" #include "dihedral.h" #include "improper.h" #include "kspace.h" #include "group.h" #include "memory.h" #include "error.h" using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ Force::Force(LAMMPS *lmp) : Pointers(lmp) { newton = newton_pair = newton_bond = 1; special_lj[0] = special_coul[0] = 1.0; special_lj[1] = special_lj[2] = special_lj[3] = 0.0; special_coul[1] = special_coul[2] = special_coul[3] = 0.0; special_angle = special_dihedral = 0; special_extra = 0; dielectric = 1.0; pair = NULL; bond = NULL; angle = NULL; dihedral = NULL; improper = NULL; kspace = NULL; char *str = (char *) "none"; int n = strlen(str) + 1; pair_style = new char[n]; strcpy(pair_style,str); bond_style = new char[n]; strcpy(bond_style,str); angle_style = new char[n]; strcpy(angle_style,str); dihedral_style = new char[n]; strcpy(dihedral_style,str); improper_style = new char[n]; strcpy(improper_style,str); kspace_style = new char[n]; strcpy(kspace_style,str); // fill pair map with pair styles listed in style_pair.h pair_map = new std::map(); #define PAIR_CLASS #define PairStyle(key,Class) \ (*pair_map)[#key] = &pair_creator; #include "style_pair.h" #undef PairStyle #undef PAIR_CLASS } /* ---------------------------------------------------------------------- */ Force::~Force() { delete [] pair_style; delete [] bond_style; delete [] angle_style; delete [] dihedral_style; delete [] improper_style; delete [] kspace_style; if (pair) delete pair; if (bond) delete bond; if (angle) delete angle; if (dihedral) delete dihedral; if (improper) delete improper; if (kspace) delete kspace; delete pair_map; } /* ---------------------------------------------------------------------- */ void Force::init() { qqrd2e = qqr2e/dielectric; if (kspace) kspace->init(); // kspace must come before pair if (pair) pair->init(); // so g_ewald is defined if (bond) bond->init(); if (angle) angle->init(); if (dihedral) dihedral->init(); if (improper) improper->init(); } /* ---------------------------------------------------------------------- create a pair style, called from input script or restart file ------------------------------------------------------------------------- */ -void Force::create_pair(const char *style, const char *suffix) +void Force::create_pair(const char *style, int trysuffix) { delete [] pair_style; if (pair) delete pair; int sflag; - pair = new_pair(style,suffix,sflag); - - if (sflag) { - char estyle[256]; - sprintf(estyle,"%s/%s",style,suffix); - int n = strlen(estyle) + 1; - pair_style = new char[n]; - strcpy(pair_style,estyle); - } else { - int n = strlen(style) + 1; - pair_style = new char[n]; - strcpy(pair_style,style); - } + pair = new_pair(style,trysuffix,sflag); + store_style(pair_style,style,sflag); } /* ---------------------------------------------------------------------- generate a pair class - try first with suffix appended + if trysuffix = 1, try first with suffix1/2 appended + return sflag = 0 for no suffix added, 1 or 2 for suffix1/2 added ------------------------------------------------------------------------- */ -Pair *Force::new_pair(const char *style, const char *suffix, int &sflag) +Pair *Force::new_pair(const char *style, int trysuffix, int &sflag) { - if (suffix && lmp->suffix_enable) { - sflag = 1; - char estyle[256]; - sprintf(estyle,"%s/%s",style,suffix); - - if (pair_map->find(estyle) != pair_map->end()) { - PairCreator pair_creator = (*pair_map)[estyle]; - return pair_creator(lmp); + if (trysuffix && lmp->suffix_enable) { + if (lmp->suffix) { + sflag = 1; + char estyle[256]; + sprintf(estyle,"%s/%s",style,lmp->suffix); + if (pair_map->find(estyle) != pair_map->end()) { + PairCreator pair_creator = (*pair_map)[estyle]; + return pair_creator(lmp); + } + } + if (lmp->suffix2) { + sflag = 2; + char estyle[256]; + sprintf(estyle,"%s/%s",style,lmp->suffix2); + if (pair_map->find(estyle) != pair_map->end()) { + PairCreator pair_creator = (*pair_map)[estyle]; + return pair_creator(lmp); + } } } sflag = 0; - if (strcmp(style,"none") == 0) return NULL; if (pair_map->find(style) != pair_map->end()) { PairCreator pair_creator = (*pair_map)[style]; return pair_creator(lmp); } error->all(FLERR,"Invalid pair style"); return NULL; } /* ---------------------------------------------------------------------- one instance per pair style in style_pair.h ------------------------------------------------------------------------- */ template Pair *Force::pair_creator(LAMMPS *lmp) { return new T(lmp); } /* ---------------------------------------------------------------------- return ptr to Pair class if matches word or matches hybrid sub-style if exact, then style name must be exact match to word if not exact, style name must contain word return NULL if no match or multiple sub-styles match ------------------------------------------------------------------------- */ Pair *Force::pair_match(const char *word, int exact) { int iwhich,count; if (exact && strcmp(pair_style,word) == 0) return pair; else if (!exact && strstr(pair_style,word)) return pair; else if (strstr(pair_style,"hybrid/overlay")) { PairHybridOverlay *hybrid = (PairHybridOverlay *) pair; count = 0; for (int i = 0; i < hybrid->nstyles; i++) if ((exact && strcmp(hybrid->keywords[i],word) == 0) || (!exact && strstr(hybrid->keywords[i],word))) { iwhich = i; count++; } if (count == 1) return hybrid->styles[iwhich]; } else if (strstr(pair_style,"hybrid")) { PairHybrid *hybrid = (PairHybrid *) pair; count = 0; for (int i = 0; i < hybrid->nstyles; i++) if ((exact && strcmp(hybrid->keywords[i],word) == 0) || (!exact && strstr(hybrid->keywords[i],word))) { iwhich = i; count++; } if (count == 1) return hybrid->styles[iwhich]; } return NULL; } /* ---------------------------------------------------------------------- create a bond style, called from input script or restart file ------------------------------------------------------------------------- */ -void Force::create_bond(const char *style, const char *suffix) +void Force::create_bond(const char *style, int trysuffix) { delete [] bond_style; if (bond) delete bond; int sflag; - bond = new_bond(style,suffix,sflag); - - if (sflag) { - char estyle[256]; - sprintf(estyle,"%s/%s",style,suffix); - int n = strlen(estyle) + 1; - bond_style = new char[n]; - strcpy(bond_style,estyle); - } else { - int n = strlen(style) + 1; - bond_style = new char[n]; - strcpy(bond_style,style); - } + bond = new_bond(style,trysuffix,sflag); + store_style(bond_style,style,sflag); } /* ---------------------------------------------------------------------- generate a bond class, fist with suffix appended ------------------------------------------------------------------------- */ -Bond *Force::new_bond(const char *style, const char *suffix, int &sflag) +Bond *Force::new_bond(const char *style, int trysuffix, int &sflag) { - if (suffix && lmp->suffix_enable) { - sflag = 1; - char estyle[256]; - sprintf(estyle,"%s/%s",style,suffix); + if (trysuffix && lmp->suffix_enable) { + if (lmp->suffix) { + sflag = 1; + char estyle[256]; + sprintf(estyle,"%s/%s",style,lmp->suffix); + + if (0) return NULL; + +#define BOND_CLASS +#define BondStyle(key,Class) \ + else if (strcmp(estyle,#key) == 0) return new Class(lmp); +#include "style_bond.h" +#undef BondStyle +#undef BOND_CLASS + } - if (0) return NULL; + if (lmp->suffix2) { + sflag = 2; + char estyle[256]; + sprintf(estyle,"%s/%s",style,lmp->suffix2); + + if (0) return NULL; #define BOND_CLASS #define BondStyle(key,Class) \ - else if (strcmp(estyle,#key) == 0) return new Class(lmp); + else if (strcmp(estyle,#key) == 0) return new Class(lmp); #include "style_bond.h" #undef BondStyle #undef BOND_CLASS + } } sflag = 0; - if (strcmp(style,"none") == 0) return NULL; #define BOND_CLASS #define BondStyle(key,Class) \ else if (strcmp(style,#key) == 0) return new Class(lmp); #include "style_bond.h" #undef BOND_CLASS else error->all(FLERR,"Invalid bond style"); return NULL; } /* ---------------------------------------------------------------------- return ptr to current bond class or hybrid sub-class if matches style ------------------------------------------------------------------------- */ Bond *Force::bond_match(const char *style) { if (strcmp(bond_style,style) == 0) return bond; else if (strcmp(bond_style,"hybrid") == 0) { BondHybrid *hybrid = (BondHybrid *) bond; for (int i = 0; i < hybrid->nstyles; i++) if (strcmp(hybrid->keywords[i],style) == 0) return hybrid->styles[i]; } return NULL; } /* ---------------------------------------------------------------------- create an angle style, called from input script or restart file ------------------------------------------------------------------------- */ -void Force::create_angle(const char *style, const char *suffix) +void Force::create_angle(const char *style, int trysuffix) { delete [] angle_style; if (angle) delete angle; int sflag; - angle = new_angle(style,suffix,sflag); - - if (sflag) { - char estyle[256]; - sprintf(estyle,"%s/%s",style,suffix); - int n = strlen(estyle) + 1; - angle_style = new char[n]; - strcpy(angle_style,estyle); - } else { - int n = strlen(style) + 1; - angle_style = new char[n]; - strcpy(angle_style,style); - } + angle = new_angle(style,trysuffix,sflag); + store_style(angle_style,style,sflag); } /* ---------------------------------------------------------------------- generate an angle class ------------------------------------------------------------------------- */ -Angle *Force::new_angle(const char *style, const char *suffix, int &sflag) +Angle *Force::new_angle(const char *style, int trysuffix, int &sflag) { - if (suffix && lmp->suffix_enable) { - sflag = 1; - char estyle[256]; - sprintf(estyle,"%s/%s",style,suffix); - - if (0) return NULL; + if (trysuffix && lmp->suffix_enable) { + if (lmp->suffix) { + sflag = 1; + char estyle[256]; + sprintf(estyle,"%s/%s",style,lmp->suffix); + + if (0) return NULL; #define ANGLE_CLASS #define AngleStyle(key,Class) \ - else if (strcmp(estyle,#key) == 0) return new Class(lmp); + else if (strcmp(estyle,#key) == 0) return new Class(lmp); #include "style_angle.h" #undef AngleStyle #undef ANGLE_CLASS + } + if (lmp->suffix2) { + sflag = 2; + char estyle[256]; + sprintf(estyle,"%s/%s",style,lmp->suffix); + + if (0) return NULL; + +#define ANGLE_CLASS +#define AngleStyle(key,Class) \ + else if (strcmp(estyle,#key) == 0) return new Class(lmp); +#include "style_angle.h" +#undef AngleStyle +#undef ANGLE_CLASS + } } sflag = 0; - if (strcmp(style,"none") == 0) return NULL; #define ANGLE_CLASS #define AngleStyle(key,Class) \ else if (strcmp(style,#key) == 0) return new Class(lmp); #include "style_angle.h" #undef ANGLE_CLASS else error->all(FLERR,"Invalid angle style"); return NULL; } /* ---------------------------------------------------------------------- create a dihedral style, called from input script or restart file ------------------------------------------------------------------------- */ -void Force::create_dihedral(const char *style, const char *suffix) +void Force::create_dihedral(const char *style, int trysuffix) { delete [] dihedral_style; if (dihedral) delete dihedral; int sflag; - dihedral = new_dihedral(style,suffix,sflag); - - if (sflag) { - char estyle[256]; - sprintf(estyle,"%s/%s",style,suffix); - int n = strlen(estyle) + 1; - dihedral_style = new char[n]; - strcpy(dihedral_style,estyle); - } else { - int n = strlen(style) + 1; - dihedral_style = new char[n]; - strcpy(dihedral_style,style); - } + dihedral = new_dihedral(style,trysuffix,sflag); + store_style(dihedral_style,style,sflag); } /* ---------------------------------------------------------------------- generate a dihedral class ------------------------------------------------------------------------- */ -Dihedral *Force::new_dihedral(const char *style, const char *suffix, int &sflag) +Dihedral *Force::new_dihedral(const char *style, int trysuffix, int &sflag) { - if (suffix && lmp->suffix_enable) { - sflag = 1; - char estyle[256]; - sprintf(estyle,"%s/%s",style,suffix); + if (trysuffix && lmp->suffix_enable) { + if (lmp->suffix) { + sflag = 1; + char estyle[256]; + sprintf(estyle,"%s/%s",style,lmp->suffix); - if (0) return NULL; + if (0) return NULL; #define DIHEDRAL_CLASS #define DihedralStyle(key,Class) \ - else if (strcmp(estyle,#key) == 0) return new Class(lmp); + else if (strcmp(estyle,#key) == 0) return new Class(lmp); #include "style_dihedral.h" #undef DihedralStyle #undef DIHEDRAL_CLASS + } + if (lmp->suffix) { + sflag = 2; + char estyle[256]; + sprintf(estyle,"%s/%s",style,lmp->suffix2); + + if (0) return NULL; + +#define DIHEDRAL_CLASS +#define DihedralStyle(key,Class) \ + else if (strcmp(estyle,#key) == 0) return new Class(lmp); +#include "style_dihedral.h" +#undef DihedralStyle +#undef DIHEDRAL_CLASS + } } sflag = 0; - if (strcmp(style,"none") == 0) return NULL; #define DIHEDRAL_CLASS #define DihedralStyle(key,Class) \ else if (strcmp(style,#key) == 0) return new Class(lmp); #include "style_dihedral.h" #undef DihedralStyle #undef DIHEDRAL_CLASS else error->all(FLERR,"Invalid dihedral style"); return NULL; } /* ---------------------------------------------------------------------- create an improper style, called from input script or restart file ------------------------------------------------------------------------- */ -void Force::create_improper(const char *style, const char *suffix) +void Force::create_improper(const char *style, int trysuffix) { delete [] improper_style; if (improper) delete improper; int sflag; - improper = new_improper(style,suffix,sflag); - - if (sflag) { - char estyle[256]; - sprintf(estyle,"%s/%s",style,suffix); - int n = strlen(estyle) + 1; - improper_style = new char[n]; - strcpy(improper_style,estyle); - } else { - int n = strlen(style) + 1; - improper_style = new char[n]; - strcpy(improper_style,style); - } + improper = new_improper(style,trysuffix,sflag); + store_style(improper_style,style,sflag); } /* ---------------------------------------------------------------------- generate a improper class ------------------------------------------------------------------------- */ -Improper *Force::new_improper(const char *style, const char *suffix, int &sflag) +Improper *Force::new_improper(const char *style, int trysuffix, int &sflag) { - if (suffix && lmp->suffix_enable) { - sflag = 1; - char estyle[256]; - sprintf(estyle,"%s/%s",style,suffix); + if (trysuffix && lmp->suffix_enable) { + if (lmp->suffix) { + sflag = 1; + char estyle[256]; + sprintf(estyle,"%s/%s",style,lmp->suffix); - if (0) return NULL; + if (0) return NULL; #define IMPROPER_CLASS #define ImproperStyle(key,Class) \ - else if (strcmp(estyle,#key) == 0) return new Class(lmp); + else if (strcmp(estyle,#key) == 0) return new Class(lmp); #include "style_improper.h" #undef ImproperStyle #undef IMPROPER_CLASS + } + if (lmp->suffix2) { + sflag = 2; + char estyle[256]; + sprintf(estyle,"%s/%s",style,lmp->suffix2); + + if (0) return NULL; + +#define IMPROPER_CLASS +#define ImproperStyle(key,Class) \ + else if (strcmp(estyle,#key) == 0) return new Class(lmp); +#include "style_improper.h" +#undef ImproperStyle +#undef IMPROPER_CLASS + } } sflag = 0; - if (strcmp(style,"none") == 0) return NULL; #define IMPROPER_CLASS #define ImproperStyle(key,Class) \ else if (strcmp(style,#key) == 0) return new Class(lmp); #include "style_improper.h" #undef IMPROPER_CLASS else error->all(FLERR,"Invalid improper style"); return NULL; } /* ---------------------------------------------------------------------- return ptr to current improper class or hybrid sub-class if matches style ------------------------------------------------------------------------- */ Improper *Force::improper_match(const char *style) { if (strcmp(improper_style,style) == 0) return improper; else if (strcmp(improper_style,"hybrid") == 0) { ImproperHybrid *hybrid = (ImproperHybrid *) bond; for (int i = 0; i < hybrid->nstyles; i++) if (strcmp(hybrid->keywords[i],style) == 0) return hybrid->styles[i]; } return NULL; } /* ---------------------------------------------------------------------- new kspace style ------------------------------------------------------------------------- */ -void Force::create_kspace(int narg, char **arg, const char *suffix) +void Force::create_kspace(int narg, char **arg, int trysuffix) { delete [] kspace_style; if (kspace) delete kspace; int sflag; - kspace = new_kspace(narg,arg,suffix,sflag); - - if (sflag) { - char estyle[256]; - sprintf(estyle,"%s/%s",arg[0],suffix); - int n = strlen(estyle) + 1; - kspace_style = new char[n]; - strcpy(kspace_style,estyle); - } else { - int n = strlen(arg[0]) + 1; - kspace_style = new char[n]; - strcpy(kspace_style,arg[0]); - } + kspace = new_kspace(narg,arg,trysuffix,sflag); + store_style(kspace_style,arg[0],sflag); if (comm->style == 1 && !kspace_match("ewald",0)) error->all(FLERR, "Cannot yet use KSpace solver with grid with comm style tiled"); } /* ---------------------------------------------------------------------- generate a kspace class ------------------------------------------------------------------------- */ -KSpace *Force::new_kspace(int narg, char **arg, const char *suffix, int &sflag) +KSpace *Force::new_kspace(int narg, char **arg, int trysuffix, int &sflag) { - if (suffix && lmp->suffix_enable) { - sflag = 1; - char estyle[256]; - sprintf(estyle,"%s/%s",arg[0],suffix); + if (trysuffix && lmp->suffix_enable) { + if (lmp->suffix) { + sflag = 1; + char estyle[256]; + sprintf(estyle,"%s/%s",arg[0],lmp->suffix); - if (0) return NULL; + if (0) return NULL; #define KSPACE_CLASS #define KSpaceStyle(key,Class) \ - else if (strcmp(estyle,#key) == 0) return new Class(lmp,narg-1,&arg[1]); + else if (strcmp(estyle,#key) == 0) return new Class(lmp,narg-1,&arg[1]); #include "style_kspace.h" #undef KSpaceStyle #undef KSPACE_CLASS + } + if (lmp->suffix2) { + sflag = 1; + char estyle[256]; + sprintf(estyle,"%s/%s",arg[0],lmp->suffix2); + + if (0) return NULL; + +#define KSPACE_CLASS +#define KSpaceStyle(key,Class) \ + else if (strcmp(estyle,#key) == 0) return new Class(lmp,narg-1,&arg[1]); +#include "style_kspace.h" +#undef KSpaceStyle +#undef KSPACE_CLASS + } } sflag = 0; - if (strcmp(arg[0],"none") == 0) return NULL; #define KSPACE_CLASS #define KSpaceStyle(key,Class) \ else if (strcmp(arg[0],#key) == 0) return new Class(lmp,narg-1,&arg[1]); #include "style_kspace.h" #undef KSPACE_CLASS else error->all(FLERR,"Invalid kspace style"); return NULL; } /* ---------------------------------------------------------------------- return ptr to Kspace class if matches word if exact, then style name must be exact match to word if not exact, style name must contain word return NULL if no match ------------------------------------------------------------------------- */ KSpace *Force::kspace_match(const char *word, int exact) { if (exact && strcmp(kspace_style,word) == 0) return kspace; else if (!exact && strstr(kspace_style,word)) return kspace; return NULL; } +/* ---------------------------------------------------------------------- + store style name in str allocated here + if sflag = 0, no suffix + if sflag = 1/2, append suffix or suffix2 to style +------------------------------------------------------------------------- */ + +void Force::store_style(char *&str, const char *style, int sflag) +{ + if (sflag) { + char estyle[256]; + if (sflag == 1) sprintf(estyle,"%s/%s",style,lmp->suffix); + else sprintf(estyle,"%s/%s",style,lmp->suffix2); + int n = strlen(estyle) + 1; + str = new char[n]; + strcpy(str,estyle); + } else { + int n = strlen(style) + 1; + str = new char[n]; + strcpy(str,style); + } +} + /* ---------------------------------------------------------------------- set special bond values ------------------------------------------------------------------------- */ void Force::set_special(int narg, char **arg) { if (narg == 0) error->all(FLERR,"Illegal special_bonds command"); // defaults, but do not reset special_extra special_lj[1] = special_lj[2] = special_lj[3] = 0.0; special_coul[1] = special_coul[2] = special_coul[3] = 0.0; special_angle = special_dihedral = 0; int iarg = 0; while (iarg < narg) { if (strcmp(arg[iarg],"amber") == 0) { if (iarg+1 > narg) error->all(FLERR,"Illegal special_bonds command"); special_lj[1] = 0.0; special_lj[2] = 0.0; special_lj[3] = 0.5; special_coul[1] = 0.0; special_coul[2] = 0.0; special_coul[3] = 5.0/6.0; iarg += 1; } else if (strcmp(arg[iarg],"charmm") == 0) { if (iarg+1 > narg) error->all(FLERR,"Illegal special_bonds command"); special_lj[1] = 0.0; special_lj[2] = 0.0; special_lj[3] = 0.0; special_coul[1] = 0.0; special_coul[2] = 0.0; special_coul[3] = 0.0; iarg += 1; } else if (strcmp(arg[iarg],"dreiding") == 0) { if (iarg+1 > narg) error->all(FLERR,"Illegal special_bonds command"); special_lj[1] = 0.0; special_lj[2] = 0.0; special_lj[3] = 1.0; special_coul[1] = 0.0; special_coul[2] = 0.0; special_coul[3] = 1.0; iarg += 1; } else if (strcmp(arg[iarg],"fene") == 0) { if (iarg+1 > narg) error->all(FLERR,"Illegal special_bonds command"); special_lj[1] = 0.0; special_lj[2] = 1.0; special_lj[3] = 1.0; special_coul[1] = 0.0; special_coul[2] = 1.0; special_coul[3] = 1.0; iarg += 1; } else if (strcmp(arg[iarg],"lj/coul") == 0) { if (iarg+4 > narg) error->all(FLERR,"Illegal special_bonds command"); special_lj[1] = special_coul[1] = atof(arg[iarg+1]); special_lj[2] = special_coul[2] = atof(arg[iarg+2]); special_lj[3] = special_coul[3] = atof(arg[iarg+3]); iarg += 4; } else if (strcmp(arg[iarg],"lj") == 0) { if (iarg+4 > narg) error->all(FLERR,"Illegal special_bonds command"); special_lj[1] = atof(arg[iarg+1]); special_lj[2] = atof(arg[iarg+2]); special_lj[3] = atof(arg[iarg+3]); iarg += 4; } else if (strcmp(arg[iarg],"coul") == 0) { if (iarg+4 > narg) error->all(FLERR,"Illegal special_bonds command"); special_coul[1] = atof(arg[iarg+1]); special_coul[2] = atof(arg[iarg+2]); special_coul[3] = atof(arg[iarg+3]); iarg += 4; } else if (strcmp(arg[iarg],"angle") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal special_bonds command"); if (strcmp(arg[iarg+1],"no") == 0) special_angle = 0; else if (strcmp(arg[iarg+1],"yes") == 0) special_angle = 1; else error->all(FLERR,"Illegal special_bonds command"); iarg += 2; } else if (strcmp(arg[iarg],"dihedral") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal special_bonds command"); if (strcmp(arg[iarg+1],"no") == 0) special_dihedral = 0; else if (strcmp(arg[iarg+1],"yes") == 0) special_dihedral = 1; else error->all(FLERR,"Illegal special_bonds command"); iarg += 2; } else if (strcmp(arg[iarg],"extra") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal special_bonds command"); special_extra = atoi(arg[iarg+1]); iarg += 2; } else error->all(FLERR,"Illegal special_bonds command"); } for (int i = 1; i <= 3; i++) if (special_lj[i] < 0.0 || special_lj[i] > 1.0 || special_coul[i] < 0.0 || special_coul[i] > 1.0) error->all(FLERR,"Illegal special_bonds command"); if (special_extra < 0) error->all(FLERR,"Illegal special_bonds command"); } /* ---------------------------------------------------------------------- compute bounds implied by numeric str with a possible wildcard asterik 1 = lower bound, nmax = upper bound 5 possibilities: (1) i = i to i, (2) * = nmin to nmax, (3) i* = i to nmax, (4) *j = nmin to j, (5) i*j = i to j return nlo,nhi ------------------------------------------------------------------------- */ void Force::bounds(char *str, int nmax, int &nlo, int &nhi, int nmin) { char *ptr = strchr(str,'*'); if (ptr == NULL) { nlo = nhi = atoi(str); } else if (strlen(str) == 1) { nlo = nmin; nhi = nmax; } else if (ptr == str) { nlo = nmin; nhi = atoi(ptr+1); } else if (strlen(ptr+1) == 0) { nlo = atoi(str); nhi = nmax; } else { nlo = atoi(str); nhi = atoi(ptr+1); } if (nlo < nmin || nhi > nmax) error->all(FLERR,"Numeric index is out of bounds"); } /* ---------------------------------------------------------------------- compute bounds implied by numeric str with a possible wildcard asterik 1 = lower bound, nmax = upper bound 5 possibilities: (1) i = i to i, (2) * = nmin to nmax, (3) i* = i to nmax, (4) *j = nmin to j, (5) i*j = i to j return nlo,nhi ------------------------------------------------------------------------- */ void Force::boundsbig(char *str, bigint nmax, bigint &nlo, bigint &nhi, bigint nmin) { char *ptr = strchr(str,'*'); if (ptr == NULL) { nlo = nhi = ATOBIGINT(str); } else if (strlen(str) == 1) { nlo = nmin; nhi = nmax; } else if (ptr == str) { nlo = nmin; nhi = ATOBIGINT(ptr+1); } else if (strlen(ptr+1) == 0) { nlo = ATOBIGINT(str); nhi = nmax; } else { nlo = ATOBIGINT(str); nhi = ATOBIGINT(ptr+1); } if (nlo < nmin || nhi > nmax) error->all(FLERR,"Numeric index is out of bounds"); } /* ---------------------------------------------------------------------- read a floating point value from a string generate an error if not a legitimate floating point value called by various commands to check validity of their arguments ------------------------------------------------------------------------- */ double Force::numeric(const char *file, int line, char *str) { if (!str) error->all(file,line,"Expected floating point parameter " "in input script or data file"); int n = strlen(str); if (n == 0) error->all(file,line,"Expected floating point parameter " "in input script or data file"); for (int i = 0; i < n; i++) { if (isdigit(str[i])) continue; if (str[i] == '-' || str[i] == '+' || str[i] == '.') continue; if (str[i] == 'e' || str[i] == 'E') continue; error->all(file,line,"Expected floating point parameter " "in input script or data file"); } return atof(str); } /* ---------------------------------------------------------------------- read an integer value from a string generate an error if not a legitimate integer value called by various commands to check validity of their arguments ------------------------------------------------------------------------- */ int Force::inumeric(const char *file, int line, char *str) { if (!str) error->all(file,line, "Expected integer parameter in input script or data file"); int n = strlen(str); if (n == 0) error->all(file,line, "Expected integer parameter in input script or data file"); for (int i = 0; i < n; i++) { if (isdigit(str[i]) || str[i] == '-' || str[i] == '+') continue; error->all(file,line, "Expected integer parameter in input script or data file"); } return atoi(str); } /* ---------------------------------------------------------------------- read a big integer value from a string generate an error if not a legitimate integer value called by various commands to check validity of their arguments ------------------------------------------------------------------------- */ bigint Force::bnumeric(const char *file, int line, char *str) { if (!str) error->all(file,line, "Expected integer parameter in input script or data file"); int n = strlen(str); if (n == 0) error->all(file,line, "Expected integer parameter in input script or data file"); for (int i = 0; i < n; i++) { if (isdigit(str[i]) || str[i] == '-' || str[i] == '+') continue; error->all(file,line, "Expected integer parameter in input script or data file"); } return ATOBIGINT(str); } /* ---------------------------------------------------------------------- read a tag integer value from a string generate an error if not a legitimate integer value called by various commands to check validity of their arguments ------------------------------------------------------------------------- */ tagint Force::tnumeric(const char *file, int line, char *str) { if (!str) error->all(file,line, "Expected integer parameter in input script or data file"); int n = strlen(str); if (n == 0) error->all(file,line, "Expected integer parameter in input script or data file"); for (int i = 0; i < n; i++) { if (isdigit(str[i]) || str[i] == '-' || str[i] == '+') continue; error->all(file,line, "Expected integer parameter in input script or data file"); } return ATOTAGINT(str); } /* ---------------------------------------------------------------------- open a potential file as specified by name; failing that, search in dir specified by env variable LAMMPS_POTENTIALS ------------------------------------------------------------------------- */ FILE *Force::open_potential(const char *name) { FILE *fp; if (name == NULL) return NULL; // attempt to open file directly // if successful, return ptr fp = fopen(name,"r"); if (fp) return fp; // try the environment variable directory const char *path = getenv("LAMMPS_POTENTIALS"); if (path == NULL) return NULL; const char *pot = potname(name); if (pot == NULL) return NULL; size_t len1 = strlen(path); size_t len2 = strlen(pot); char *newpath = new char[len1+len2+2]; strcpy(newpath,path); #if defined(_WIN32) newpath[len1] = '\\'; newpath[len1+1] = 0; #else newpath[len1] = '/'; newpath[len1+1] = 0; #endif strcat(newpath,pot); fp = fopen(newpath,"r"); delete[] newpath; return fp; } /* ---------------------------------------------------------------------- strip off leading part of path, return just the filename ------------------------------------------------------------------------- */ const char *Force::potname(const char *path) { const char *pot; if (path == NULL) return NULL; #if defined(_WIN32) // skip over the disk drive part of windows pathnames if (isalpha(path[0]) && path[1] == ':') path += 2; #endif for (pot = path; *path != '\0'; ++path) { #if defined(_WIN32) if ((*path == '\\') || (*path == '/')) pot = path + 1; #else if (*path == '/') pot = path + 1; #endif } return pot; } /* ---------------------------------------------------------------------- memory usage of force classes ------------------------------------------------------------------------- */ bigint Force::memory_usage() { bigint bytes = 0; if (pair) bytes += static_cast (pair->memory_usage()); if (bond) bytes += static_cast (bond->memory_usage()); if (angle) bytes += static_cast (angle->memory_usage()); if (dihedral) bytes += static_cast (dihedral->memory_usage()); if (improper) bytes += static_cast (improper->memory_usage()); if (kspace) bytes += static_cast (kspace->memory_usage()); return bytes; } diff --git a/src/force.h b/src/force.h index bf364f253..f857c1a11 100644 --- a/src/force.h +++ b/src/force.h @@ -1,161 +1,162 @@ /* -*- c++ -*- ---------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ #ifndef LMP_FORCE_H #define LMP_FORCE_H #include "pointers.h" #include #include namespace LAMMPS_NS { class Force : protected Pointers { public: double boltz; // Boltzmann constant (eng/degree-K) double hplanck; // Planck's constant (energy-time) double mvv2e; // conversion of mv^2 to energy double ftm2v; // conversion of ft/m to velocity double mv2d; // conversion of mass/volume to density double nktv2p; // conversion of NkT/V to pressure double qqr2e; // conversion of q^2/r to energy double qe2f; // conversion of qE to force double vxmu2f; // conversion of vx dynamic-visc to force double xxt2kmu; // conversion of xx/t to kinematic-visc double dielectric; // dielectric constant double qqrd2e; // q^2/r to energy w/ dielectric constant double e_mass; // electron mass double hhmrr2e; // conversion of (hbar)^2/(mr^2) to energy double mvh2r; // conversion of mv/hbar to distance // hbar = h/(2*pi) double angstrom; // 1 angstrom in native units double femtosecond; // 1 femtosecond in native units double qelectron; // 1 electron charge abs() in native units int newton,newton_pair,newton_bond; // Newton's 3rd law settings class Pair *pair; char *pair_style; typedef Pair *(*PairCreator)(LAMMPS *); std::map *pair_map; class Bond *bond; char *bond_style; class Angle *angle; char *angle_style; class Dihedral *dihedral; char *dihedral_style; class Improper *improper; char *improper_style; class KSpace *kspace; char *kspace_style; // index [0] is not used in these arrays double special_lj[4]; // 1-2, 1-3, 1-4 prefactors for LJ double special_coul[4]; // 1-2, 1-3, 1-4 prefactors for Coulombics int special_angle; // 0 if defined angles are ignored // 1 if only weight 1,3 atoms if in an angle int special_dihedral; // 0 if defined dihedrals are ignored // 1 if only weight 1,4 atoms if in a dihedral int special_extra; // extra space for added bonds Force(class LAMMPS *); ~Force(); void init(); - void create_pair(const char *, const char *suffix = NULL); - class Pair *new_pair(const char *, const char *, int &); + void create_pair(const char *, int); + class Pair *new_pair(const char *, int, int &); class Pair *pair_match(const char *, int); - void create_bond(const char *, const char *suffix = NULL); - class Bond *new_bond(const char *, const char *, int &); + void create_bond(const char *, int); + class Bond *new_bond(const char *, int, int &); class Bond *bond_match(const char *); - void create_angle(const char *, const char *suffix = NULL); - class Angle *new_angle(const char *, const char *, int &); + void create_angle(const char *, int); + class Angle *new_angle(const char *, int, int &); - void create_dihedral(const char *, const char *suffix = NULL); - class Dihedral *new_dihedral(const char *, const char *, int &); + void create_dihedral(const char *, int); + class Dihedral *new_dihedral(const char *, int, int &); - void create_improper(const char *, const char *suffix = NULL); - class Improper *new_improper(const char *, const char *, int &); + void create_improper(const char *, int); + class Improper *new_improper(const char *, int, int &); class Improper *improper_match(const char *); - void create_kspace(int, char **, const char *suffix = NULL); - class KSpace *new_kspace(int, char **, const char *, int &); + void create_kspace(int, char **, int); + class KSpace *new_kspace(int, char **, int, int &); class KSpace *kspace_match(const char *, int); + void store_style(char *&, const char *, int); void set_special(int, char **); void bounds(char *, int, int &, int &, int nmin=1); void boundsbig(char *, bigint, bigint &, bigint &, bigint nmin=1); double numeric(const char *, int, char *); int inumeric(const char *, int, char *); bigint bnumeric(const char *, int, char *); tagint tnumeric(const char *, int, char *); FILE *open_potential(const char *); const char *potname(const char *); bigint memory_usage(); private: template static Pair *pair_creator(LAMMPS *); }; } #endif /* ERROR/WARNING messages: E: Invalid pair style The choice of pair style is unknown. E: Invalid bond style The choice of bond style is unknown. E: Invalid angle style The choice of angle style is unknown. E: Invalid dihedral style The choice of dihedral style is unknown. E: Invalid improper style The choice of improper style is unknown. E: Invalid kspace style The choice of kspace style is unknown. E: Illegal ... command Self-explanatory. Check the input script syntax and compare to the documentation for the command. You can use -echo screen as a command-line option when running LAMMPS to see the offending line. E: Numeric index is out of bounds A command with an argument that specifies an integer or range of integers is using a value that is less than 1 or greater than the maximum allowed limit. */ diff --git a/src/improper_hybrid.cpp b/src/improper_hybrid.cpp index 9212051e3..09e73ac9b 100644 --- a/src/improper_hybrid.cpp +++ b/src/improper_hybrid.cpp @@ -1,338 +1,339 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ #include "math.h" #include "string.h" #include "ctype.h" #include "improper_hybrid.h" #include "atom.h" #include "neighbor.h" #include "domain.h" #include "comm.h" #include "force.h" #include "memory.h" #include "error.h" using namespace LAMMPS_NS; #define EXTRA 1000 /* ---------------------------------------------------------------------- */ ImproperHybrid::ImproperHybrid(LAMMPS *lmp) : Improper(lmp) { nstyles = 0; } /* ---------------------------------------------------------------------- */ ImproperHybrid::~ImproperHybrid() { if (nstyles) { for (int i = 0; i < nstyles; i++) delete styles[i]; delete [] styles; for (int i = 0; i < nstyles; i++) delete [] keywords[i]; delete [] keywords; } if (allocated) { memory->destroy(setflag); memory->destroy(map); delete [] nimproperlist; delete [] maximproper; for (int i = 0; i < nstyles; i++) memory->destroy(improperlist[i]); delete [] improperlist; } } /* ---------------------------------------------------------------------- */ void ImproperHybrid::compute(int eflag, int vflag) { int i,j,m,n; // save ptrs to original improperlist int nimproperlist_orig = neighbor->nimproperlist; int **improperlist_orig = neighbor->improperlist; // if this is re-neighbor step, create sub-style improperlists // nimproperlist[] = length of each sub-style list // realloc sub-style improperlist if necessary // load sub-style improperlist with 5 values from original improperlist if (neighbor->ago == 0) { for (m = 0; m < nstyles; m++) nimproperlist[m] = 0; for (i = 0; i < nimproperlist_orig; i++) { m = map[improperlist_orig[i][4]]; nimproperlist[m]++; } for (m = 0; m < nstyles; m++) { if (nimproperlist[m] > maximproper[m]) { memory->destroy(improperlist[m]); maximproper[m] = nimproperlist[m] + EXTRA; memory->create(improperlist[m],maximproper[m],5, "improper_hybrid:improperlist"); } nimproperlist[m] = 0; } for (i = 0; i < nimproperlist_orig; i++) { m = map[improperlist_orig[i][4]]; if (m < 0) continue; n = nimproperlist[m]; improperlist[m][n][0] = improperlist_orig[i][0]; improperlist[m][n][1] = improperlist_orig[i][1]; improperlist[m][n][2] = improperlist_orig[i][2]; improperlist[m][n][3] = improperlist_orig[i][3]; improperlist[m][n][4] = improperlist_orig[i][4]; nimproperlist[m]++; } } // call each sub-style's compute function // set neighbor->improperlist to sub-style improperlist before call // accumulate sub-style global/peratom energy/virial in hybrid if (eflag || vflag) ev_setup(eflag,vflag); else evflag = 0; for (m = 0; m < nstyles; m++) { neighbor->nimproperlist = nimproperlist[m]; neighbor->improperlist = improperlist[m]; styles[m]->compute(eflag,vflag); if (eflag_global) energy += styles[m]->energy; if (vflag_global) for (n = 0; n < 6; n++) virial[n] += styles[m]->virial[n]; if (eflag_atom) { n = atom->nlocal; if (force->newton_bond) n += atom->nghost; double *eatom_substyle = styles[m]->eatom; for (i = 0; i < n; i++) eatom[i] += eatom_substyle[i]; } if (vflag_atom) { n = atom->nlocal; if (force->newton_bond) n += atom->nghost; double **vatom_substyle = styles[m]->vatom; for (i = 0; i < n; i++) for (j = 0; j < 6; j++) vatom[i][j] += vatom_substyle[i][j]; } } // restore ptrs to original improperlist neighbor->nimproperlist = nimproperlist_orig; neighbor->improperlist = improperlist_orig; } /* ---------------------------------------------------------------------- */ void ImproperHybrid::allocate() { allocated = 1; int n = atom->nimpropertypes; memory->create(map,n+1,"improper:map"); memory->create(setflag,n+1,"improper:setflag"); for (int i = 1; i <= n; i++) setflag[i] = 0; nimproperlist = new int[nstyles]; maximproper = new int[nstyles]; improperlist = new int**[nstyles]; for (int m = 0; m < nstyles; m++) maximproper[m] = 0; for (int m = 0; m < nstyles; m++) improperlist[m] = NULL; } /* ---------------------------------------------------------------------- create one improper style for each arg in list ------------------------------------------------------------------------- */ void ImproperHybrid::settings(int narg, char **arg) { int i,m,istyle; if (narg < 1) error->all(FLERR,"Illegal improper_style command"); // delete old lists, since cannot just change settings if (nstyles) { for (int i = 0; i < nstyles; i++) delete styles[i]; delete [] styles; for (int i = 0; i < nstyles; i++) delete [] keywords[i]; delete [] keywords; } if (allocated) { memory->destroy(setflag); memory->destroy(map); delete [] nimproperlist; delete [] maximproper; for (int i = 0; i < nstyles; i++) memory->destroy(improperlist[i]); delete [] improperlist; } allocated = 0; // count sub-styles by skipping numeric args // one exception is 1st arg of style "table", which is non-numeric word // need a better way to skip these exceptions nstyles = 0; i = 0; while (i < narg) { if (strcmp(arg[i],"table") == 0) i++; i++; while (i < narg && !isalpha(arg[i][0])) i++; nstyles++; } // allocate list of sub-styles styles = new Improper*[nstyles]; keywords = new char*[nstyles]; // allocate each sub-style and call its settings() with subset of args // define subset of args for a sub-style by skipping numeric args // one exception is 1st arg of style "table", which is non-numeric // need a better way to skip these exceptions - int dummy; + int sflag; nstyles = 0; i = 0; while (i < narg) { for (m = 0; m < nstyles; m++) if (strcmp(arg[i],keywords[m]) == 0) error->all(FLERR,"Improper style hybrid cannot use " "same improper style twice"); if (strcmp(arg[i],"hybrid") == 0) error->all(FLERR, "Improper style hybrid cannot have hybrid as an argument"); if (strcmp(arg[i],"none") == 0) error->all(FLERR,"Improper style hybrid cannot have none as an argument"); - styles[nstyles] = force->new_improper(arg[i],lmp->suffix,dummy); - keywords[nstyles] = new char[strlen(arg[i])+1]; - strcpy(keywords[nstyles],arg[i]); + + styles[nstyles] = force->new_improper(arg[i],1,sflag); + force->store_style(keywords[nstyles],arg[i],sflag); + istyle = i; if (strcmp(arg[i],"table") == 0) i++; i++; while (i < narg && !isalpha(arg[i][0])) i++; styles[nstyles]->settings(i-istyle-1,&arg[istyle+1]); nstyles++; } } /* ---------------------------------------------------------------------- set coeffs for one type ---------------------------------------------------------------------- */ void ImproperHybrid::coeff(int narg, char **arg) { if (!allocated) allocate(); int ilo,ihi; force->bounds(arg[0],atom->nimpropertypes,ilo,ihi); // 2nd arg = improper sub-style name // allow for "none" as valid sub-style name int m; for (m = 0; m < nstyles; m++) if (strcmp(arg[1],keywords[m]) == 0) break; int none = 0; if (m == nstyles) { if (strcmp(arg[1],"none") == 0) none = 1; else error->all(FLERR,"Improper coeff for hybrid has invalid style"); } // move 1st arg to 2nd arg // just copy ptrs, since arg[] points into original input line arg[1] = arg[0]; // invoke sub-style coeff() starting with 1st arg if (!none) styles[m]->coeff(narg-1,&arg[1]); // set setflag and which type maps to which sub-style // if sub-style is none: set hybrid setflag, wipe out map for (int i = ilo; i <= ihi; i++) { if (none) { setflag[i] = 1; map[i] = -1; } else { setflag[i] = styles[m]->setflag[i]; map[i] = m; } } } /* ---------------------------------------------------------------------- proc 0 writes to restart file ------------------------------------------------------------------------- */ void ImproperHybrid::write_restart(FILE *fp) { fwrite(&nstyles,sizeof(int),1,fp); int n; for (int m = 0; m < nstyles; m++) { n = strlen(keywords[m]) + 1; fwrite(&n,sizeof(int),1,fp); fwrite(keywords[m],sizeof(char),n,fp); } } /* ---------------------------------------------------------------------- proc 0 reads from restart file, bcasts ------------------------------------------------------------------------- */ void ImproperHybrid::read_restart(FILE *fp) { int me = comm->me; if (me == 0) fread(&nstyles,sizeof(int),1,fp); MPI_Bcast(&nstyles,1,MPI_INT,0,world); styles = new Improper*[nstyles]; keywords = new char*[nstyles]; allocate(); int n,dummy; for (int m = 0; m < nstyles; m++) { if (me == 0) fread(&n,sizeof(int),1,fp); MPI_Bcast(&n,1,MPI_INT,0,world); keywords[m] = new char[n]; if (me == 0) fread(keywords[m],sizeof(char),n,fp); MPI_Bcast(keywords[m],n,MPI_CHAR,0,world); - styles[m] = force->new_improper(keywords[m],lmp->suffix,dummy); + styles[m] = force->new_improper(keywords[m],0,dummy); } } /* ---------------------------------------------------------------------- memory usage ------------------------------------------------------------------------- */ double ImproperHybrid::memory_usage() { double bytes = maxeatom * sizeof(double); bytes += maxvatom*6 * sizeof(double); for (int m = 0; m < nstyles; m++) bytes += maximproper[m]*5 * sizeof(int); for (int m = 0; m < nstyles; m++) if (styles[m]) bytes += styles[m]->memory_usage(); return bytes; } diff --git a/src/input.cpp b/src/input.cpp index 785929bb2..ac91ab546 100644 --- a/src/input.cpp +++ b/src/input.cpp @@ -1,1629 +1,1696 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ #include "mpi.h" #include "stdio.h" #include "stdlib.h" #include "string.h" #include "ctype.h" #include "unistd.h" #include "sys/stat.h" #include "input.h" #include "style_command.h" #include "universe.h" #include "atom.h" #include "atom_vec.h" #include "comm.h" #include "comm_brick.h" #include "comm_tiled.h" #include "group.h" #include "domain.h" #include "output.h" #include "thermo.h" #include "force.h" #include "pair.h" #include "min.h" #include "modify.h" #include "compute.h" #include "bond.h" #include "angle.h" #include "dihedral.h" #include "improper.h" #include "kspace.h" #include "update.h" #include "neighbor.h" #include "special.h" #include "variable.h" #include "accelerator_cuda.h" #include "accelerator_kokkos.h" #include "error.h" #include "memory.h" #ifdef _OPENMP #include "omp.h" #endif #ifdef _WIN32 #include #endif using namespace LAMMPS_NS; #define DELTALINE 256 #define DELTA 4 /* ---------------------------------------------------------------------- */ Input::Input(LAMMPS *lmp, int argc, char **argv) : Pointers(lmp) { MPI_Comm_rank(world,&me); maxline = maxcopy = maxwork = 0; line = copy = work = NULL; narg = maxarg = 0; arg = NULL; echo_screen = 0; echo_log = 1; label_active = 0; labelstr = NULL; jump_skip = 0; ifthenelse_flag = 0; if (me == 0) { nfile = maxfile = 1; infiles = (FILE **) memory->smalloc(sizeof(FILE *),"input:infiles"); infiles[0] = infile; } else infiles = NULL; variable = new Variable(lmp); // fill map with commands listed in style_command.h command_map = new std::map(); #define COMMAND_CLASS #define CommandStyle(key,Class) \ (*command_map)[#key] = &command_creator; #include "style_command.h" #undef CommandStyle #undef COMMAND_CLASS // process command-line args // check for args "-var" and "-echo" // caller has already checked that sufficient arguments exist int iarg = 0; while (iarg < argc) { if (strcmp(argv[iarg],"-var") == 0 || strcmp(argv[iarg],"-v") == 0) { int jarg = iarg+3; while (jarg < argc && argv[jarg][0] != '-') jarg++; variable->set(argv[iarg+1],jarg-iarg-2,&argv[iarg+2]); iarg = jarg; } else if (strcmp(argv[iarg],"-echo") == 0 || strcmp(argv[iarg],"-e") == 0) { narg = 1; char **tmp = arg; // trick echo() into using argv instead of arg arg = &argv[iarg+1]; echo(); arg = tmp; iarg += 2; } else iarg++; } } /* ---------------------------------------------------------------------- */ Input::~Input() { // don't free command and arg strings // they just point to other allocated memory memory->sfree(line); memory->sfree(copy); memory->sfree(work); if (labelstr) delete [] labelstr; memory->sfree(arg); memory->sfree(infiles); delete variable; delete command_map; } /* ---------------------------------------------------------------------- process all input from infile infile = stdin or file if command-line arg "-in" was used ------------------------------------------------------------------------- */ void Input::file() { int m,n; while (1) { // read a line from input script // n = length of line including str terminator, 0 if end of file // if line ends in continuation char '&', concatenate next line if (me == 0) { m = 0; while (1) { if (maxline-m < 2) reallocate(line,maxline,0); if (fgets(&line[m],maxline-m,infile) == NULL) { if (m) n = strlen(line) + 1; else n = 0; break; } m = strlen(line); if (line[m-1] != '\n') continue; m--; while (m >= 0 && isspace(line[m])) m--; if (m < 0 || line[m] != '&') { line[m+1] = '\0'; n = m+2; break; } } } // bcast the line // if n = 0, end-of-file // error if label_active is set, since label wasn't encountered // if original input file, code is done // else go back to previous input file MPI_Bcast(&n,1,MPI_INT,0,world); if (n == 0) { if (label_active) error->all(FLERR,"Label wasn't found in input script"); if (me == 0) { if (infile != stdin) { fclose(infile); infile = NULL; } nfile--; } MPI_Bcast(&nfile,1,MPI_INT,0,world); if (nfile == 0) break; if (me == 0) infile = infiles[nfile-1]; continue; } if (n > maxline) reallocate(line,maxline,n); MPI_Bcast(line,n,MPI_CHAR,0,world); // echo the command unless scanning for label if (me == 0 && label_active == 0) { if (echo_screen && screen) fprintf(screen,"%s\n",line); if (echo_log && logfile) fprintf(logfile,"%s\n",line); } // parse the line // if no command, skip to next line in input script parse(); if (command == NULL) continue; // if scanning for label, skip command unless it's a label command if (label_active && strcmp(command,"label") != 0) continue; // execute the command if (execute_command()) { char *str = new char[maxline+32]; sprintf(str,"Unknown command: %s",line); error->all(FLERR,str); } } } /* ---------------------------------------------------------------------- process all input from filename called from library interface ------------------------------------------------------------------------- */ void Input::file(const char *filename) { // error if another nested file still open, should not be possible // open new filename and set infile, infiles[0], nfile // call to file() will close filename and decrement nfile if (me == 0) { if (nfile > 1) error->one(FLERR,"Invalid use of library file() function"); if (infile && infile != stdin) fclose(infile); infile = fopen(filename,"r"); if (infile == NULL) { char str[128]; sprintf(str,"Cannot open input script %s",filename); error->one(FLERR,str); } infiles[0] = infile; nfile = 1; } file(); } /* ---------------------------------------------------------------------- copy command in single to line, parse and execute it return command name to caller ------------------------------------------------------------------------- */ char *Input::one(const char *single) { int n = strlen(single) + 1; if (n > maxline) reallocate(line,maxline,n); strcpy(line,single); // echo the command unless scanning for label if (me == 0 && label_active == 0) { if (echo_screen && screen) fprintf(screen,"%s\n",line); if (echo_log && logfile) fprintf(logfile,"%s\n",line); } // parse the line // if no command, just return NULL parse(); if (command == NULL) return NULL; // if scanning for label, skip command unless it's a label command if (label_active && strcmp(command,"label") != 0) return NULL; // execute the command and return its name if (execute_command()) { char *str = new char[maxline+32]; sprintf(str,"Unknown command: %s",line); error->all(FLERR,str); } return command; } /* ---------------------------------------------------------------------- parse copy of command line by inserting string terminators strip comment = all chars from # on replace all $ via variable substitution command = first word narg = # of args arg[] = individual args treat text between single/double quotes as one arg ------------------------------------------------------------------------- */ void Input::parse() { // duplicate line into copy string to break into words int n = strlen(line) + 1; if (n > maxcopy) reallocate(copy,maxcopy,n); strcpy(copy,line); // strip any # comment by replacing it with 0 // do not strip # inside single/double quotes char quote = '\0'; char *ptr = copy; while (*ptr) { if (*ptr == '#' && !quote) { *ptr = '\0'; break; } if (*ptr == quote) quote = '\0'; else if (*ptr == '"' || *ptr == '\'') quote = *ptr; ptr++; } // perform $ variable substitution (print changes) // except if searching for a label since earlier variable may not be defined if (!label_active) substitute(copy,work,maxcopy,maxwork,1); // command = 1st arg in copy string char *next; command = nextword(copy,&next); if (command == NULL) return; // point arg[] at each subsequent arg in copy string // nextword() inserts string terminators into copy string to delimit args // nextword() treats text between single/double quotes as one arg narg = 0; ptr = next; while (ptr) { if (narg == maxarg) { maxarg += DELTA; arg = (char **) memory->srealloc(arg,maxarg*sizeof(char *),"input:arg"); } arg[narg] = nextword(ptr,&next); if (!arg[narg]) break; narg++; ptr = next; } } /* ---------------------------------------------------------------------- find next word in str insert 0 at end of word ignore leading whitespace treat text between single/double quotes as one arg matching quote must be followed by whitespace char if not end of string strip quotes from returned word return ptr to start of word return next = ptr after word or NULL if word ended with 0 return NULL if no word in string ------------------------------------------------------------------------- */ char *Input::nextword(char *str, char **next) { char *start,*stop; start = &str[strspn(str," \t\n\v\f\r")]; if (*start == '\0') return NULL; if (*start == '"' || *start == '\'') { stop = strchr(&start[1],*start); if (!stop) error->all(FLERR,"Unbalanced quotes in input line"); if (stop[1] && !isspace(stop[1])) error->all(FLERR,"Input line quote not followed by whitespace"); start++; } else stop = &start[strcspn(start," \t\n\v\f\r")]; if (*stop == '\0') *next = NULL; else *next = stop+1; *stop = '\0'; return start; } /* ---------------------------------------------------------------------- substitute for $ variables in str using work str2 and return it reallocate str/str2 to hold expanded version if necessary & reset max/max2 print updated string if flag is set and not searching for label label_active will be 0 if called from external class ------------------------------------------------------------------------- */ void Input::substitute(char *&str, char *&str2, int &max, int &max2, int flag) { // use str2 as scratch space to expand str, then copy back to str // reallocate str and str2 as necessary // do not replace $ inside single/double quotes // var = pts at variable name, ended by NULL // if $ is followed by '{', trailing '}' becomes NULL // else $x becomes x followed by NULL // beyond = points to text following variable int i,n,paren_count; char immediate[256]; char *var,*value,*beyond; char quote = '\0'; char *ptr = str; n = strlen(str) + 1; if (n > max2) reallocate(str2,max2,n); *str2 = '\0'; char *ptr2 = str2; while (*ptr) { // variable substitution if (*ptr == '$' && !quote) { // value = ptr to expanded variable // variable name between curly braces, e.g. ${a} if (*(ptr+1) == '{') { var = ptr+2; i = 0; while (var[i] != '\0' && var[i] != '}') i++; if (var[i] == '\0') error->one(FLERR,"Invalid variable name"); var[i] = '\0'; beyond = ptr + strlen(var) + 3; value = variable->retrieve(var); // immediate variable between parenthesis, e.g. $(1/2) } else if (*(ptr+1) == '(') { var = ptr+2; paren_count = 0; i = 0; while (var[i] != '\0' && !(var[i] == ')' && paren_count == 0)) { switch (var[i]) { case '(': paren_count++; break; case ')': paren_count--; break; default: ; } i++; } if (var[i] == '\0') error->one(FLERR,"Invalid immediate variable"); var[i] = '\0'; beyond = ptr + strlen(var) + 3; sprintf(immediate,"%.20g",variable->compute_equal(var)); value = immediate; // single character variable name, e.g. $a } else { var = ptr; var[0] = var[1]; var[1] = '\0'; beyond = ptr + 2; value = variable->retrieve(var); } if (value == NULL) error->one(FLERR,"Substitution for illegal variable"); // check if storage in str2 needs to be expanded // re-initialize ptr and ptr2 to the point beyond the variable. n = strlen(str2) + strlen(value) + strlen(beyond) + 1; if (n > max2) reallocate(str2,max2,n); strcat(str2,value); ptr2 = str2 + strlen(str2); ptr = beyond; // output substitution progress if requested if (flag && me == 0 && label_active == 0) { if (echo_screen && screen) fprintf(screen,"%s%s\n",str2,beyond); if (echo_log && logfile) fprintf(logfile,"%s%s\n",str2,beyond); } continue; } if (*ptr == quote) quote = '\0'; else if (*ptr == '"' || *ptr == '\'') quote = *ptr; // copy current character into str2 *ptr2++ = *ptr++; *ptr2 = '\0'; } // set length of input str to length of work str2 // copy work string back to input str if (max2 > max) reallocate(str,max,max2); strcpy(str,str2); } /* ---------------------------------------------------------------------- rellocate a string if n > 0: set max >= n in increments of DELTALINE if n = 0: just increment max by DELTALINE ------------------------------------------------------------------------- */ void Input::reallocate(char *&str, int &max, int n) { if (n) { while (n > max) max += DELTALINE; } else max += DELTALINE; str = (char *) memory->srealloc(str,max*sizeof(char),"input:str"); } /* ---------------------------------------------------------------------- process a single parsed command return 0 if successful, -1 if did not recognize command ------------------------------------------------------------------------- */ int Input::execute_command() { int flag = 1; if (!strcmp(command,"clear")) clear(); else if (!strcmp(command,"echo")) echo(); else if (!strcmp(command,"if")) ifthenelse(); else if (!strcmp(command,"include")) include(); else if (!strcmp(command,"jump")) jump(); else if (!strcmp(command,"label")) label(); else if (!strcmp(command,"log")) log(); else if (!strcmp(command,"next")) next_command(); else if (!strcmp(command,"partition")) partition(); else if (!strcmp(command,"print")) print(); else if (!strcmp(command,"quit")) quit(); else if (!strcmp(command,"shell")) shell(); else if (!strcmp(command,"variable")) variable_command(); else if (!strcmp(command,"angle_coeff")) angle_coeff(); else if (!strcmp(command,"angle_style")) angle_style(); else if (!strcmp(command,"atom_modify")) atom_modify(); else if (!strcmp(command,"atom_style")) atom_style(); else if (!strcmp(command,"bond_coeff")) bond_coeff(); else if (!strcmp(command,"bond_style")) bond_style(); else if (!strcmp(command,"boundary")) boundary(); else if (!strcmp(command,"box")) box(); else if (!strcmp(command,"comm_modify")) comm_modify(); else if (!strcmp(command,"comm_style")) comm_style(); else if (!strcmp(command,"compute")) compute(); else if (!strcmp(command,"compute_modify")) compute_modify(); else if (!strcmp(command,"dielectric")) dielectric(); else if (!strcmp(command,"dihedral_coeff")) dihedral_coeff(); else if (!strcmp(command,"dihedral_style")) dihedral_style(); else if (!strcmp(command,"dimension")) dimension(); else if (!strcmp(command,"dump")) dump(); else if (!strcmp(command,"dump_modify")) dump_modify(); else if (!strcmp(command,"fix")) fix(); else if (!strcmp(command,"fix_modify")) fix_modify(); else if (!strcmp(command,"group")) group_command(); else if (!strcmp(command,"improper_coeff")) improper_coeff(); else if (!strcmp(command,"improper_style")) improper_style(); else if (!strcmp(command,"kspace_modify")) kspace_modify(); else if (!strcmp(command,"kspace_style")) kspace_style(); else if (!strcmp(command,"lattice")) lattice(); else if (!strcmp(command,"mass")) mass(); else if (!strcmp(command,"min_modify")) min_modify(); else if (!strcmp(command,"min_style")) min_style(); else if (!strcmp(command,"molecule")) molecule(); else if (!strcmp(command,"neigh_modify")) neigh_modify(); else if (!strcmp(command,"neighbor")) neighbor_command(); else if (!strcmp(command,"newton")) newton(); else if (!strcmp(command,"package")) package(); else if (!strcmp(command,"pair_coeff")) pair_coeff(); else if (!strcmp(command,"pair_modify")) pair_modify(); else if (!strcmp(command,"pair_style")) pair_style(); else if (!strcmp(command,"pair_write")) pair_write(); else if (!strcmp(command,"processors")) processors(); else if (!strcmp(command,"region")) region(); else if (!strcmp(command,"reset_timestep")) reset_timestep(); else if (!strcmp(command,"restart")) restart(); else if (!strcmp(command,"run_style")) run_style(); else if (!strcmp(command,"special_bonds")) special_bonds(); else if (!strcmp(command,"suffix")) suffix(); else if (!strcmp(command,"thermo")) thermo(); else if (!strcmp(command,"thermo_modify")) thermo_modify(); else if (!strcmp(command,"thermo_style")) thermo_style(); else if (!strcmp(command,"timestep")) timestep(); else if (!strcmp(command,"uncompute")) uncompute(); else if (!strcmp(command,"undump")) undump(); else if (!strcmp(command,"unfix")) unfix(); else if (!strcmp(command,"units")) units(); else flag = 0; // return if command was listed above if (flag) return 0; // invoke commands added via style_command.h if (command_map->find(command) != command_map->end()) { CommandCreator command_creator = (*command_map)[command]; command_creator(lmp,narg,arg); return 0; } // unrecognized command return -1; } /* ---------------------------------------------------------------------- one instance per command in style_command.h ------------------------------------------------------------------------- */ template void Input::command_creator(LAMMPS *lmp, int narg, char **arg) { T cmd(lmp); cmd.command(narg,arg); } /* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */ void Input::clear() { if (narg > 0) error->all(FLERR,"Illegal clear command"); lmp->destroy(); lmp->create(); lmp->post_create(); } /* ---------------------------------------------------------------------- */ void Input::echo() { if (narg != 1) error->all(FLERR,"Illegal echo command"); if (strcmp(arg[0],"none") == 0) { echo_screen = 0; echo_log = 0; } else if (strcmp(arg[0],"screen") == 0) { echo_screen = 1; echo_log = 0; } else if (strcmp(arg[0],"log") == 0) { echo_screen = 0; echo_log = 1; } else if (strcmp(arg[0],"both") == 0) { echo_screen = 1; echo_log = 1; } else error->all(FLERR,"Illegal echo command"); } /* ---------------------------------------------------------------------- */ void Input::ifthenelse() { if (narg < 3) error->all(FLERR,"Illegal if command"); // substitute for variables in Boolean expression for "if" // in case expression was enclosed in quotes // must substitute on copy of arg else will step on subsequent args int n = strlen(arg[0]) + 1; if (n > maxline) reallocate(line,maxline,n); strcpy(line,arg[0]); substitute(line,work,maxline,maxwork,0); // evaluate Boolean expression for "if" double btest = variable->evaluate_boolean(line); // bound "then" commands if (strcmp(arg[1],"then") != 0) error->all(FLERR,"Illegal if command"); int first = 2; int iarg = first; while (iarg < narg && (strcmp(arg[iarg],"elif") != 0 && strcmp(arg[iarg],"else") != 0)) iarg++; int last = iarg-1; // execute "then" commands // make copies of all arg string commands // required because re-parsing a command via one() will wipe out args if (btest != 0.0) { int ncommands = last-first + 1; if (ncommands <= 0) error->all(FLERR,"Illegal if command"); char **commands = new char*[ncommands]; ncommands = 0; for (int i = first; i <= last; i++) { int n = strlen(arg[i]) + 1; if (n == 1) error->all(FLERR,"Illegal if command"); commands[ncommands] = new char[n]; strcpy(commands[ncommands],arg[i]); ncommands++; } ifthenelse_flag = 1; for (int i = 0; i < ncommands; i++) one(commands[i]); ifthenelse_flag = 0; for (int i = 0; i < ncommands; i++) delete [] commands[i]; delete [] commands; return; } // done if no "elif" or "else" if (iarg == narg) return; // check "elif" or "else" until find commands to execute // substitute for variables and evaluate Boolean expression for "elif" // must substitute on copy of arg else will step on subsequent args // bound and execute "elif" or "else" commands while (iarg != narg) { if (iarg+2 > narg) error->all(FLERR,"Illegal if command"); if (strcmp(arg[iarg],"elif") == 0) { n = strlen(arg[iarg+1]) + 1; if (n > maxline) reallocate(line,maxline,n); strcpy(line,arg[iarg+1]); substitute(line,work,maxline,maxwork,0); btest = variable->evaluate_boolean(line); first = iarg+2; } else { btest = 1.0; first = iarg+1; } iarg = first; while (iarg < narg && (strcmp(arg[iarg],"elif") != 0 && strcmp(arg[iarg],"else") != 0)) iarg++; last = iarg-1; if (btest == 0.0) continue; int ncommands = last-first + 1; if (ncommands <= 0) error->all(FLERR,"Illegal if command"); char **commands = new char*[ncommands]; ncommands = 0; for (int i = first; i <= last; i++) { int n = strlen(arg[i]) + 1; if (n == 1) error->all(FLERR,"Illegal if command"); commands[ncommands] = new char[n]; strcpy(commands[ncommands],arg[i]); ncommands++; } // execute the list of commands ifthenelse_flag = 1; for (int i = 0; i < ncommands; i++) one(commands[i]); ifthenelse_flag = 0; // clean up for (int i = 0; i < ncommands; i++) delete [] commands[i]; delete [] commands; return; } } /* ---------------------------------------------------------------------- */ void Input::include() { if (narg != 1) error->all(FLERR,"Illegal include command"); // do not allow include inside an if command // NOTE: this check will fail if a 2nd if command was inside the if command // and came before the include if (ifthenelse_flag) error->all(FLERR,"Cannot use include command within an if command"); if (me == 0) { if (nfile == maxfile) { maxfile++; infiles = (FILE **) memory->srealloc(infiles,maxfile*sizeof(FILE *),"input:infiles"); } infile = fopen(arg[0],"r"); if (infile == NULL) { char str[128]; sprintf(str,"Cannot open input script %s",arg[0]); error->one(FLERR,str); } infiles[nfile++] = infile; } } /* ---------------------------------------------------------------------- */ void Input::jump() { if (narg < 1 || narg > 2) error->all(FLERR,"Illegal jump command"); if (jump_skip) { jump_skip = 0; return; } if (me == 0) { if (strcmp(arg[0],"SELF") == 0) rewind(infile); else { if (infile && infile != stdin) fclose(infile); infile = fopen(arg[0],"r"); if (infile == NULL) { char str[128]; sprintf(str,"Cannot open input script %s",arg[0]); error->one(FLERR,str); } infiles[nfile-1] = infile; } } if (narg == 2) { label_active = 1; if (labelstr) delete [] labelstr; int n = strlen(arg[1]) + 1; labelstr = new char[n]; strcpy(labelstr,arg[1]); } } /* ---------------------------------------------------------------------- */ void Input::label() { if (narg != 1) error->all(FLERR,"Illegal label command"); if (label_active && strcmp(labelstr,arg[0]) == 0) label_active = 0; } /* ---------------------------------------------------------------------- */ void Input::log() { if (narg > 2) error->all(FLERR,"Illegal log command"); int appendflag = 0; if (narg == 2) { if (strcmp(arg[1],"append") == 0) appendflag = 1; else error->all(FLERR,"Illegal log command"); } if (me == 0) { if (logfile) fclose(logfile); if (strcmp(arg[0],"none") == 0) logfile = NULL; else { if (appendflag) logfile = fopen(arg[0],"a"); else logfile = fopen(arg[0],"w"); if (logfile == NULL) { char str[128]; sprintf(str,"Cannot open logfile %s",arg[0]); error->one(FLERR,str); } } if (universe->nworlds == 1) universe->ulogfile = logfile; } } /* ---------------------------------------------------------------------- */ void Input::next_command() { if (variable->next(narg,arg)) jump_skip = 1; } /* ---------------------------------------------------------------------- */ void Input::partition() { if (narg < 3) error->all(FLERR,"Illegal partition command"); int yesflag; if (strcmp(arg[0],"yes") == 0) yesflag = 1; else if (strcmp(arg[0],"no") == 0) yesflag = 0; else error->all(FLERR,"Illegal partition command"); int ilo,ihi; force->bounds(arg[1],universe->nworlds,ilo,ihi); // copy original line to copy, since will use strtok() on it // ptr = start of 4th word strcpy(copy,line); char *ptr = strtok(copy," \t\n\r\f"); ptr = strtok(NULL," \t\n\r\f"); ptr = strtok(NULL," \t\n\r\f"); ptr += strlen(ptr) + 1; ptr += strspn(ptr," \t\n\r\f"); // execute the remaining command line on requested partitions if (yesflag) { if (universe->iworld+1 >= ilo && universe->iworld+1 <= ihi) one(ptr); } else { if (universe->iworld+1 < ilo || universe->iworld+1 > ihi) one(ptr); } } /* ---------------------------------------------------------------------- */ void Input::print() { if (narg < 1) error->all(FLERR,"Illegal print command"); // copy 1st arg back into line (copy is being used) // check maxline since arg[0] could have been exanded by variables // substitute for $ variables (no printing) and print arg int n = strlen(arg[0]) + 1; if (n > maxline) reallocate(line,maxline,n); strcpy(line,arg[0]); substitute(line,work,maxline,maxwork,0); // parse optional args FILE *fp = NULL; int screenflag = 1; int iarg = 1; while (iarg < narg) { if (strcmp(arg[iarg],"file") == 0 || strcmp(arg[iarg],"append") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal print command"); if (me == 0) { if (strcmp(arg[iarg],"file") == 0) fp = fopen(arg[iarg+1],"w"); else fp = fopen(arg[iarg+1],"a"); if (fp == NULL) { char str[128]; sprintf(str,"Cannot open print file %s",arg[iarg+1]); error->one(FLERR,str); } } iarg += 2; } else if (strcmp(arg[iarg],"screen") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal print command"); if (strcmp(arg[iarg+1],"yes") == 0) screenflag = 1; else if (strcmp(arg[iarg+1],"no") == 0) screenflag = 0; else error->all(FLERR,"Illegal print command"); iarg += 2; } else error->all(FLERR,"Illegal print command"); } if (me == 0) { if (screenflag && screen) fprintf(screen,"%s\n",line); if (screenflag && logfile) fprintf(logfile,"%s\n",line); if (fp) { fprintf(fp,"%s\n",line); fclose(fp); } } } /* ---------------------------------------------------------------------- */ void Input::quit() { if (narg) error->all(FLERR,"Illegal quit command"); error->done(); } /* ---------------------------------------------------------------------- */ void Input::shell() { if (narg < 1) error->all(FLERR,"Illegal shell command"); if (strcmp(arg[0],"cd") == 0) { if (narg != 2) error->all(FLERR,"Illegal shell cd command"); chdir(arg[1]); } else if (strcmp(arg[0],"mkdir") == 0) { if (narg < 2) error->all(FLERR,"Illegal shell mkdir command"); if (me == 0) for (int i = 1; i < narg; i++) { #if defined(_WIN32) _mkdir(arg[i]); #else mkdir(arg[i], S_IRWXU | S_IRGRP | S_IXGRP); #endif } } else if (strcmp(arg[0],"mv") == 0) { if (narg != 3) error->all(FLERR,"Illegal shell mv command"); if (me == 0) rename(arg[1],arg[2]); } else if (strcmp(arg[0],"rm") == 0) { if (narg < 2) error->all(FLERR,"Illegal shell rm command"); if (me == 0) for (int i = 1; i < narg; i++) unlink(arg[i]); } else if (strcmp(arg[0],"rmdir") == 0) { if (narg < 2) error->all(FLERR,"Illegal shell rmdir command"); if (me == 0) for (int i = 1; i < narg; i++) rmdir(arg[i]); } else if (strcmp(arg[0],"putenv") == 0) { if (narg < 2) error->all(FLERR,"Illegal shell putenv command"); for (int i = 1; i < narg; i++) { char *ptr = strdup(arg[i]); #ifdef _WIN32 if (ptr != NULL) _putenv(ptr); #else if (ptr != NULL) putenv(ptr); #endif } // use work string to concat args back into one string separated by spaces // invoke string in shell via system() } else { int n = 0; for (int i = 0; i < narg; i++) n += strlen(arg[i]) + 1; if (n > maxwork) reallocate(work,maxwork,n); strcpy(work,arg[0]); for (int i = 1; i < narg; i++) { strcat(work," "); strcat(work,arg[i]); } if (me == 0) system(work); } } /* ---------------------------------------------------------------------- */ void Input::variable_command() { variable->set(narg,arg); } /* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- one function for each LAMMPS-specific input script command ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */ void Input::angle_coeff() { if (domain->box_exist == 0) error->all(FLERR,"Angle_coeff command before simulation box is defined"); if (force->angle == NULL) error->all(FLERR,"Angle_coeff command before angle_style is defined"); if (atom->avec->angles_allow == 0) error->all(FLERR,"Angle_coeff command when no angles allowed"); force->angle->coeff(narg,arg); } /* ---------------------------------------------------------------------- */ void Input::angle_style() { if (narg < 1) error->all(FLERR,"Illegal angle_style command"); if (atom->avec->angles_allow == 0) error->all(FLERR,"Angle_style command when no angles allowed"); - force->create_angle(arg[0],lmp->suffix); + force->create_angle(arg[0],1); if (force->angle) force->angle->settings(narg-1,&arg[1]); } /* ---------------------------------------------------------------------- */ void Input::atom_modify() { atom->modify_params(narg,arg); } /* ---------------------------------------------------------------------- */ void Input::atom_style() { if (narg < 1) error->all(FLERR,"Illegal atom_style command"); if (domain->box_exist) error->all(FLERR,"Atom_style command after simulation box is defined"); - atom->create_avec(arg[0],narg-1,&arg[1],lmp->suffix); + atom->create_avec(arg[0],narg-1,&arg[1],1); } /* ---------------------------------------------------------------------- */ void Input::bond_coeff() { if (domain->box_exist == 0) error->all(FLERR,"Bond_coeff command before simulation box is defined"); if (force->bond == NULL) error->all(FLERR,"Bond_coeff command before bond_style is defined"); if (atom->avec->bonds_allow == 0) error->all(FLERR,"Bond_coeff command when no bonds allowed"); force->bond->coeff(narg,arg); } /* ---------------------------------------------------------------------- */ void Input::bond_style() { if (narg < 1) error->all(FLERR,"Illegal bond_style command"); if (atom->avec->bonds_allow == 0) error->all(FLERR,"Bond_style command when no bonds allowed"); - force->create_bond(arg[0],lmp->suffix); + force->create_bond(arg[0],1); if (force->bond) force->bond->settings(narg-1,&arg[1]); } /* ---------------------------------------------------------------------- */ void Input::boundary() { if (domain->box_exist) error->all(FLERR,"Boundary command after simulation box is defined"); domain->set_boundary(narg,arg,0); } /* ---------------------------------------------------------------------- */ void Input::box() { if (domain->box_exist) error->all(FLERR,"Box command after simulation box is defined"); domain->set_box(narg,arg); } /* ---------------------------------------------------------------------- */ void Input::comm_modify() { comm->modify_params(narg,arg); } /* ---------------------------------------------------------------------- */ void Input::comm_style() { if (narg < 1) error->all(FLERR,"Illegal comm_style command"); if (strcmp(arg[0],"brick") == 0) { if (comm->style == 0) return; Comm *oldcomm = comm; comm = new CommBrick(lmp,oldcomm); delete oldcomm; } else if (strcmp(arg[0],"tiled") == 0) { if (comm->style == 1) return; Comm *oldcomm = comm; comm = new CommTiled(lmp,oldcomm); delete oldcomm; } else error->all(FLERR,"Illegal comm_style command"); } /* ---------------------------------------------------------------------- */ void Input::compute() { - modify->add_compute(narg,arg,lmp->suffix); + modify->add_compute(narg,arg,1); } /* ---------------------------------------------------------------------- */ void Input::compute_modify() { modify->modify_compute(narg,arg); } /* ---------------------------------------------------------------------- */ void Input::dielectric() { if (narg != 1) error->all(FLERR,"Illegal dielectric command"); force->dielectric = force->numeric(FLERR,arg[0]); } /* ---------------------------------------------------------------------- */ void Input::dihedral_coeff() { if (domain->box_exist == 0) error->all(FLERR,"Dihedral_coeff command before simulation box is defined"); if (force->dihedral == NULL) error->all(FLERR,"Dihedral_coeff command before dihedral_style is defined"); if (atom->avec->dihedrals_allow == 0) error->all(FLERR,"Dihedral_coeff command when no dihedrals allowed"); force->dihedral->coeff(narg,arg); } /* ---------------------------------------------------------------------- */ void Input::dihedral_style() { if (narg < 1) error->all(FLERR,"Illegal dihedral_style command"); if (atom->avec->dihedrals_allow == 0) error->all(FLERR,"Dihedral_style command when no dihedrals allowed"); - force->create_dihedral(arg[0],lmp->suffix); + force->create_dihedral(arg[0],1); if (force->dihedral) force->dihedral->settings(narg-1,&arg[1]); } /* ---------------------------------------------------------------------- */ void Input::dimension() { if (narg != 1) error->all(FLERR,"Illegal dimension command"); if (domain->box_exist) error->all(FLERR,"Dimension command after simulation box is defined"); domain->dimension = force->inumeric(FLERR,arg[0]); if (domain->dimension != 2 && domain->dimension != 3) error->all(FLERR,"Illegal dimension command"); // must reset default extra_dof of all computes // since some were created before dimension command is encountered for (int i = 0; i < modify->ncompute; i++) modify->compute[i]->reset_extra_dof(); } /* ---------------------------------------------------------------------- */ void Input::dump() { output->add_dump(narg,arg); } /* ---------------------------------------------------------------------- */ void Input::dump_modify() { output->modify_dump(narg,arg); } /* ---------------------------------------------------------------------- */ void Input::fix() { - modify->add_fix(narg,arg,lmp->suffix); + modify->add_fix(narg,arg,1); } /* ---------------------------------------------------------------------- */ void Input::fix_modify() { modify->modify_fix(narg,arg); } /* ---------------------------------------------------------------------- */ void Input::group_command() { group->assign(narg,arg); } /* ---------------------------------------------------------------------- */ void Input::improper_coeff() { if (domain->box_exist == 0) error->all(FLERR,"Improper_coeff command before simulation box is defined"); if (force->improper == NULL) error->all(FLERR,"Improper_coeff command before improper_style is defined"); if (atom->avec->impropers_allow == 0) error->all(FLERR,"Improper_coeff command when no impropers allowed"); force->improper->coeff(narg,arg); } /* ---------------------------------------------------------------------- */ void Input::improper_style() { if (narg < 1) error->all(FLERR,"Illegal improper_style command"); if (atom->avec->impropers_allow == 0) error->all(FLERR,"Improper_style command when no impropers allowed"); - force->create_improper(arg[0],lmp->suffix); + force->create_improper(arg[0],1); if (force->improper) force->improper->settings(narg-1,&arg[1]); } /* ---------------------------------------------------------------------- */ void Input::kspace_modify() { if (force->kspace == NULL) error->all(FLERR,"KSpace style has not yet been set"); force->kspace->modify_params(narg,arg); } /* ---------------------------------------------------------------------- */ void Input::kspace_style() { - force->create_kspace(narg,arg,lmp->suffix); + force->create_kspace(narg,arg,1); } /* ---------------------------------------------------------------------- */ void Input::lattice() { domain->set_lattice(narg,arg); } /* ---------------------------------------------------------------------- */ void Input::mass() { if (narg != 2) error->all(FLERR,"Illegal mass command"); if (domain->box_exist == 0) error->all(FLERR,"Mass command before simulation box is defined"); atom->set_mass(narg,arg); } /* ---------------------------------------------------------------------- */ void Input::min_modify() { update->minimize->modify_params(narg,arg); } /* ---------------------------------------------------------------------- */ void Input::min_style() { if (domain->box_exist == 0) error->all(FLERR,"Min_style command before simulation box is defined"); update->create_minimize(narg,arg); } /* ---------------------------------------------------------------------- */ void Input::molecule() { atom->add_molecule(narg,arg); } /* ---------------------------------------------------------------------- */ void Input::neigh_modify() { neighbor->modify_params(narg,arg); } /* ---------------------------------------------------------------------- */ void Input::neighbor_command() { neighbor->set(narg,arg); } /* ---------------------------------------------------------------------- */ void Input::newton() { int newton_pair=1,newton_bond=1; if (narg == 1) { if (strcmp(arg[0],"off") == 0) newton_pair = newton_bond = 0; else if (strcmp(arg[0],"on") == 0) newton_pair = newton_bond = 1; else error->all(FLERR,"Illegal newton command"); } else if (narg == 2) { if (strcmp(arg[0],"off") == 0) newton_pair = 0; else if (strcmp(arg[0],"on") == 0) newton_pair= 1; else error->all(FLERR,"Illegal newton command"); if (strcmp(arg[1],"off") == 0) newton_bond = 0; else if (strcmp(arg[1],"on") == 0) newton_bond = 1; else error->all(FLERR,"Illegal newton command"); } else error->all(FLERR,"Illegal newton command"); force->newton_pair = newton_pair; if (domain->box_exist && (newton_bond != force->newton_bond)) error->all(FLERR,"Newton bond change after simulation box is defined"); force->newton_bond = newton_bond; if (newton_pair || newton_bond) force->newton = 1; else force->newton = 0; } /* ---------------------------------------------------------------------- */ void Input::package() { if (domain->box_exist) error->all(FLERR,"Package command after simulation box is defined"); if (narg < 1) error->all(FLERR,"Illegal package command"); if (strcmp(arg[0],"cuda") == 0) { if (!lmp->cuda) error->all(FLERR,"Package cuda command without USER-CUDA installed"); lmp->cuda->accelerator(narg-1,&arg[1]); } else if (strcmp(arg[0],"gpu") == 0) { char **fixarg = new char*[2+narg]; fixarg[0] = (char *) "package_gpu"; fixarg[1] = (char *) "all"; fixarg[2] = (char *) "GPU"; for (int i = 1; i < narg; i++) fixarg[i+2] = arg[i]; - modify->add_fix(2+narg,fixarg,NULL); + modify->add_fix(2+narg,fixarg); delete [] fixarg; force->newton_pair = 0; } else if (strcmp(arg[0],"kokkos") == 0) { if (!lmp->kokkos) error->all(FLERR,"Package kokkos command without KOKKOS installed"); lmp->kokkos->accelerator(narg-1,&arg[1]); } else if (strcmp(arg[0],"omp") == 0) { char **fixarg = new char*[2+narg]; fixarg[0] = (char *) "package_omp"; fixarg[1] = (char *) "all"; fixarg[2] = (char *) "OMP"; for (int i = 1; i < narg; i++) fixarg[i+2] = arg[i]; - modify->add_fix(2+narg,fixarg,NULL); + modify->add_fix(2+narg,fixarg); delete [] fixarg; + } else if (strcmp(arg[0],"intel") == 0) { + + // add omp package for non-pair routines + + /* + char **fixarg = new char*[2+narg]; + fixarg[0] = (char *) "package_omp"; + fixarg[1] = (char *) "all"; + fixarg[2] = (char *) "OMP"; + int omp_narg = 3; + if (narg > 1) { + fixarg[3] = arg[1]; + omp_narg++; + if (narg > 2) + for (int i = 2; i < narg; i++) + if (strcmp(arg[i],"mixed") == 0) { + fixarg[4] = arg[i]; + omp_narg++; + } + } + modify->add_fix(omp_narg,fixarg); + + // add intel package for neighbor and pair routines + */ + + char **fixarg = new char*[2+narg]; + fixarg[0] = (char *) "package_intel"; + fixarg[1] = (char *) "all"; + fixarg[2] = (char *) "Intel"; + for (int i = 1; i < narg; i++) fixarg[i+2] = arg[i]; + modify->add_fix(2+narg,fixarg); + delete [] fixarg; + + /* + // if running with offload, set run_style to verlet/intel + + #ifdef LMP_INTEL_OFFLOAD + #ifdef __INTEL_OFFLOAD + char *str; + str = (char *) "verlet/intel"; + update->create_integrate(1,&str,0); + #endif + #endif + */ + } else error->all(FLERR,"Illegal package command"); } /* ---------------------------------------------------------------------- */ void Input::pair_coeff() { if (domain->box_exist == 0) error->all(FLERR,"Pair_coeff command before simulation box is defined"); if (force->pair == NULL) error->all(FLERR,"Pair_coeff command before pair_style is defined"); force->pair->coeff(narg,arg); } /* ---------------------------------------------------------------------- */ void Input::pair_modify() { if (force->pair == NULL) error->all(FLERR,"Pair_modify command before pair_style is defined"); force->pair->modify_params(narg,arg); } /* ---------------------------------------------------------------------- if old pair style exists and new style is same, just change settings else create new pair class ------------------------------------------------------------------------- */ void Input::pair_style() { if (narg < 1) error->all(FLERR,"Illegal pair_style command"); - if (force->pair && strcmp(arg[0],force->pair_style) == 0) { - force->pair->settings(narg-1,&arg[1]); - return; + if (force->pair) { + int match = 0; + if (strcmp(arg[0],force->pair_style) == 0) match = 1; + if (!match && lmp->suffix_enable) { + char estyle[256]; + if (lmp->suffix) { + sprintf(estyle,"%s/%s",arg[0],lmp->suffix); + if (strcmp(estyle,force->pair_style) == 0) match = 1; + } + if (lmp->suffix2) { + sprintf(estyle,"%s/%s",arg[0],lmp->suffix2); + if (strcmp(estyle,force->pair_style) == 0) match = 1; + } + } + if (match) { + force->pair->settings(narg-1,&arg[1]); + return; + } } - force->create_pair(arg[0],lmp->suffix); + + force->create_pair(arg[0],1); if (force->pair) force->pair->settings(narg-1,&arg[1]); } /* ---------------------------------------------------------------------- */ void Input::pair_write() { if (force->pair == NULL) error->all(FLERR,"Pair_write command before pair_style is defined"); force->pair->write_file(narg,arg); } /* ---------------------------------------------------------------------- */ void Input::processors() { if (domain->box_exist) error->all(FLERR,"Processors command after simulation box is defined"); comm->set_processors(narg,arg); } /* ---------------------------------------------------------------------- */ void Input::region() { domain->add_region(narg,arg); } /* ---------------------------------------------------------------------- */ void Input::reset_timestep() { update->reset_timestep(narg,arg); } /* ---------------------------------------------------------------------- */ void Input::restart() { output->create_restart(narg,arg); } /* ---------------------------------------------------------------------- */ void Input::run_style() { if (domain->box_exist == 0) error->all(FLERR,"Run_style command before simulation box is defined"); - update->create_integrate(narg,arg,lmp->suffix); + update->create_integrate(narg,arg,1); } /* ---------------------------------------------------------------------- */ void Input::special_bonds() { // store 1-3,1-4 and dihedral/extra flag values before change // change in 1-2 coeffs will not change the special list double lj2 = force->special_lj[2]; double lj3 = force->special_lj[3]; double coul2 = force->special_coul[2]; double coul3 = force->special_coul[3]; int angle = force->special_angle; int dihedral = force->special_dihedral; int extra = force->special_extra; force->set_special(narg,arg); // if simulation box defined and saved values changed, redo special list if (domain->box_exist && atom->molecular == 1) { if (lj2 != force->special_lj[2] || lj3 != force->special_lj[3] || coul2 != force->special_coul[2] || coul3 != force->special_coul[3] || angle != force->special_angle || dihedral != force->special_dihedral || extra != force->special_extra) { Special special(lmp); special.build(); } } } /* ---------------------------------------------------------------------- */ void Input::suffix() { if (narg != 1) error->all(FLERR,"Illegal suffix command"); if (strcmp(arg[0],"off") == 0) lmp->suffix_enable = 0; else if (strcmp(arg[0],"on") == 0) lmp->suffix_enable = 1; else { delete [] lmp->suffix; int n = strlen(arg[0]) + 1; lmp->suffix = new char[n]; strcpy(lmp->suffix,arg[0]); + // set 2nd suffix = "omp" when suffix = "intel" + if (strcmp(lmp->suffix,"intel") == 0) { + delete [] lmp->suffix2; + lmp->suffix2 = new char[4]; + strcpy(lmp->suffix2,"omp"); + } lmp->suffix_enable = 1; } } /* ---------------------------------------------------------------------- */ void Input::thermo() { output->set_thermo(narg,arg); } /* ---------------------------------------------------------------------- */ void Input::thermo_modify() { output->thermo->modify_params(narg,arg); } /* ---------------------------------------------------------------------- */ void Input::thermo_style() { output->create_thermo(narg,arg); } /* ---------------------------------------------------------------------- */ void Input::timestep() { if (narg != 1) error->all(FLERR,"Illegal timestep command"); update->dt = force->numeric(FLERR,arg[0]); } /* ---------------------------------------------------------------------- */ void Input::uncompute() { if (narg != 1) error->all(FLERR,"Illegal uncompute command"); modify->delete_compute(arg[0]); } /* ---------------------------------------------------------------------- */ void Input::undump() { if (narg != 1) error->all(FLERR,"Illegal undump command"); output->delete_dump(arg[0]); } /* ---------------------------------------------------------------------- */ void Input::unfix() { if (narg != 1) error->all(FLERR,"Illegal unfix command"); modify->delete_fix(arg[0]); } /* ---------------------------------------------------------------------- */ void Input::units() { if (narg != 1) error->all(FLERR,"Illegal units command"); if (domain->box_exist) error->all(FLERR,"Units command after simulation box is defined"); update->set_units(arg[0]); } diff --git a/src/lammps.cpp b/src/lammps.cpp index d1e84cf9b..69945a805 100644 --- a/src/lammps.cpp +++ b/src/lammps.cpp @@ -1,821 +1,835 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ #include "mpi.h" #include "string.h" #include "ctype.h" #include "lammps.h" #include "style_angle.h" #include "style_atom.h" #include "style_bond.h" #include "style_command.h" #include "style_compute.h" #include "style_dihedral.h" #include "style_dump.h" #include "style_fix.h" #include "style_improper.h" #include "style_integrate.h" #include "style_kspace.h" #include "style_minimize.h" #include "style_pair.h" #include "style_region.h" #include "universe.h" #include "input.h" #include "atom.h" #include "update.h" #include "neighbor.h" #include "comm.h" #include "comm_brick.h" #include "domain.h" #include "force.h" #include "modify.h" #include "group.h" #include "output.h" #include "citeme.h" #include "accelerator_cuda.h" #include "accelerator_kokkos.h" #include "accelerator_omp.h" +#include "accelerator_intel.h" #include "timer.h" #include "memory.h" #include "error.h" using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- start up LAMMPS allocate fundamental classes (memory, error, universe, input) parse input switches initialize communicators, screen & logfile output input is allocated at end after MPI info is setup ------------------------------------------------------------------------- */ LAMMPS::LAMMPS(int narg, char **arg, MPI_Comm communicator) { memory = new Memory(this); error = new Error(this); universe = new Universe(this,communicator); output = NULL; screen = NULL; logfile = NULL; infile = NULL; // parse input switches int inflag = 0; int screenflag = 0; int logflag = 0; int partscreenflag = 0; int partlogflag = 0; int cudaflag = 0; int kokkosflag = 0; int restartflag = 0; int restartremapflag = 0; int citeflag = 1; int helpflag = 0; - suffix = NULL; + suffix = suffix2 = NULL; suffix_enable = 0; char *rfile = NULL; char *dfile = NULL; int wdfirst,wdlast; int kkfirst,kklast; int iarg = 1; while (iarg < narg) { if (strcmp(arg[iarg],"-partition") == 0 || strcmp(arg[iarg],"-p") == 0) { universe->existflag = 1; if (iarg+2 > narg) error->universe_all(FLERR,"Invalid command-line argument"); iarg++; while (iarg < narg && arg[iarg][0] != '-') { universe->add_world(arg[iarg]); iarg++; } } else if (strcmp(arg[iarg],"-in") == 0 || strcmp(arg[iarg],"-i") == 0) { if (iarg+2 > narg) error->universe_all(FLERR,"Invalid command-line argument"); inflag = iarg + 1; iarg += 2; } else if (strcmp(arg[iarg],"-screen") == 0 || strcmp(arg[iarg],"-sc") == 0) { if (iarg+2 > narg) error->universe_all(FLERR,"Invalid command-line argument"); screenflag = iarg + 1; iarg += 2; } else if (strcmp(arg[iarg],"-log") == 0 || strcmp(arg[iarg],"-l") == 0) { if (iarg+2 > narg) error->universe_all(FLERR,"Invalid command-line argument"); logflag = iarg + 1; iarg += 2; } else if (strcmp(arg[iarg],"-var") == 0 || strcmp(arg[iarg],"-v") == 0) { if (iarg+3 > narg) error->universe_all(FLERR,"Invalid command-line argument"); iarg += 3; while (iarg < narg && arg[iarg][0] != '-') iarg++; } else if (strcmp(arg[iarg],"-echo") == 0 || strcmp(arg[iarg],"-e") == 0) { if (iarg+2 > narg) error->universe_all(FLERR,"Invalid command-line argument"); iarg += 2; } else if (strcmp(arg[iarg],"-pscreen") == 0 || strcmp(arg[iarg],"-ps") == 0) { if (iarg+2 > narg) error->universe_all(FLERR,"Invalid command-line argument"); partscreenflag = iarg + 1; iarg += 2; } else if (strcmp(arg[iarg],"-plog") == 0 || strcmp(arg[iarg],"-pl") == 0) { if (iarg+2 > narg) error->universe_all(FLERR,"Invalid command-line argument"); partlogflag = iarg + 1; iarg += 2; } else if (strcmp(arg[iarg],"-cuda") == 0 || strcmp(arg[iarg],"-c") == 0) { if (iarg+2 > narg) error->universe_all(FLERR,"Invalid command-line argument"); if (strcmp(arg[iarg+1],"on") == 0) cudaflag = 1; else if (strcmp(arg[iarg+1],"off") == 0) cudaflag = 0; else error->universe_all(FLERR,"Invalid command-line argument"); iarg += 2; } else if (strcmp(arg[iarg],"-kokkos") == 0 || strcmp(arg[iarg],"-k") == 0) { if (iarg+2 > narg) error->universe_all(FLERR,"Invalid command-line argument"); if (strcmp(arg[iarg+1],"on") == 0) kokkosflag = 1; else if (strcmp(arg[iarg+1],"off") == 0) kokkosflag = 0; else error->universe_all(FLERR,"Invalid command-line argument"); iarg += 2; // delimit any extra args for the Kokkos instantiation kkfirst = iarg; while (iarg < narg && arg[iarg][0] != '-') iarg++; kklast = iarg; } else if (strcmp(arg[iarg],"-suffix") == 0 || strcmp(arg[iarg],"-sf") == 0) { if (iarg+2 > narg) error->universe_all(FLERR,"Invalid command-line argument"); delete [] suffix; int n = strlen(arg[iarg+1]) + 1; suffix = new char[n]; strcpy(suffix,arg[iarg+1]); + // set 2nd suffix = "omp" when suffix = "intel" + if (strcmp(suffix,"intel") == 0) { + suffix2 = new char[4]; + strcpy(suffix2,"omp"); + } suffix_enable = 1; iarg += 2; } else if (strcmp(arg[iarg],"-reorder") == 0 || strcmp(arg[iarg],"-ro") == 0) { if (iarg+3 > narg) error->universe_all(FLERR,"Invalid command-line argument"); if (universe->existflag) error->universe_all(FLERR,"Cannot use -reorder after -partition"); universe->reorder(arg[iarg+1],arg[iarg+2]); iarg += 3; } else if (strcmp(arg[iarg],"-restart") == 0 || strcmp(arg[iarg],"-r") == 0) { if (iarg+3 > narg) error->universe_all(FLERR,"Invalid command-line argument"); restartflag = 1; rfile = arg[iarg+1]; dfile = arg[iarg+2]; // check for restart remap flag if (strcmp(dfile,"remap") == 0) { if (iarg+4 > narg) error->universe_all(FLERR,"Invalid command-line argument"); restartremapflag = 1; dfile = arg[iarg+3]; iarg++; } iarg += 3; // delimit any extra args for the write_data command wdfirst = iarg; while (iarg < narg && arg[iarg][0] != '-') iarg++; wdlast = iarg; } else if (strcmp(arg[iarg],"-nocite") == 0 || strcmp(arg[iarg],"-nc") == 0) { citeflag = 0; iarg++; } else if (strcmp(arg[iarg],"-help") == 0 || strcmp(arg[iarg],"-h") == 0) { if (iarg+1 > narg) error->universe_all(FLERR,"Invalid command-line argument"); helpflag = 1; citeflag = 0; iarg += 1; } else error->universe_all(FLERR,"Invalid command-line argument"); } // if no partition command-line switch, universe is one world with all procs if (universe->existflag == 0) universe->add_world(NULL); // sum of procs in all worlds must equal total # of procs if (!universe->consistent()) error->universe_all(FLERR,"Processor partitions are inconsistent"); // universe cannot use stdin for input file if (universe->existflag && inflag == 0) error->universe_all(FLERR,"Must use -in switch with multiple partitions"); // if no partition command-line switch, cannot use -pscreen option if (universe->existflag == 0 && partscreenflag) error->universe_all(FLERR,"Can only use -pscreen with multiple partitions"); // if no partition command-line switch, cannot use -plog option if (universe->existflag == 0 && partlogflag) error->universe_all(FLERR,"Can only use -plog with multiple partitions"); // set universe screen and logfile if (universe->me == 0) { if (screenflag == 0) universe->uscreen = stdout; else if (strcmp(arg[screenflag],"none") == 0) universe->uscreen = NULL; else { universe->uscreen = fopen(arg[screenflag],"w"); if (universe->uscreen == NULL) error->universe_one(FLERR,"Cannot open universe screen file"); } if (logflag == 0) { if (helpflag == 0) { universe->ulogfile = fopen("log.lammps","w"); if (universe->ulogfile == NULL) error->universe_warn(FLERR,"Cannot open log.lammps for writing"); } } else if (strcmp(arg[logflag],"none") == 0) universe->ulogfile = NULL; else { universe->ulogfile = fopen(arg[logflag],"w"); if (universe->ulogfile == NULL) error->universe_one(FLERR,"Cannot open universe log file"); } } if (universe->me > 0) { if (screenflag == 0) universe->uscreen = stdout; else universe->uscreen = NULL; universe->ulogfile = NULL; } // make universe and single world the same, since no partition switch // world inherits settings from universe // set world screen, logfile, communicator, infile // open input script if from file if (universe->existflag == 0) { screen = universe->uscreen; logfile = universe->ulogfile; world = universe->uworld; if (universe->me == 0) { if (inflag == 0) infile = stdin; else infile = fopen(arg[inflag],"r"); if (infile == NULL) { char str[128]; sprintf(str,"Cannot open input script %s",arg[inflag]); error->one(FLERR,str); } } if (universe->me == 0) { if (screen) fprintf(screen,"LAMMPS (%s)\n",universe->version); if (logfile) fprintf(logfile,"LAMMPS (%s)\n",universe->version); } // universe is one or more worlds, as setup by partition switch // split universe communicator into separate world communicators // set world screen, logfile, communicator, infile // open input script } else { int me; MPI_Comm_split(universe->uworld,universe->iworld,0,&world); MPI_Comm_rank(world,&me); if (me == 0) if (partscreenflag == 0) if (screenflag == 0) { char str[32]; sprintf(str,"screen.%d",universe->iworld); screen = fopen(str,"w"); if (screen == NULL) error->one(FLERR,"Cannot open screen file"); } else if (strcmp(arg[screenflag],"none") == 0) screen = NULL; else { char str[128]; sprintf(str,"%s.%d",arg[screenflag],universe->iworld); screen = fopen(str,"w"); if (screen == NULL) error->one(FLERR,"Cannot open screen file"); } else if (strcmp(arg[partscreenflag],"none") == 0) screen = NULL; else { char str[128]; sprintf(str,"%s.%d",arg[partscreenflag],universe->iworld); screen = fopen(str,"w"); if (screen == NULL) error->one(FLERR,"Cannot open screen file"); } else screen = NULL; if (me == 0) if (partlogflag == 0) if (logflag == 0) { char str[32]; sprintf(str,"log.lammps.%d",universe->iworld); logfile = fopen(str,"w"); if (logfile == NULL) error->one(FLERR,"Cannot open logfile"); } else if (strcmp(arg[logflag],"none") == 0) logfile = NULL; else { char str[128]; sprintf(str,"%s.%d",arg[logflag],universe->iworld); logfile = fopen(str,"w"); if (logfile == NULL) error->one(FLERR,"Cannot open logfile"); } else if (strcmp(arg[partlogflag],"none") == 0) logfile = NULL; else { char str[128]; sprintf(str,"%s.%d",arg[partlogflag],universe->iworld); logfile = fopen(str,"w"); if (logfile == NULL) error->one(FLERR,"Cannot open logfile"); } else logfile = NULL; if (me == 0) { infile = fopen(arg[inflag],"r"); if (infile == NULL) { char str[128]; sprintf(str,"Cannot open input script %s",arg[inflag]); error->one(FLERR,str); } } else infile = NULL; // screen and logfile messages for universe and world if (universe->me == 0) { if (universe->uscreen) { fprintf(universe->uscreen,"LAMMPS (%s)\n",universe->version); fprintf(universe->uscreen,"Running on %d partitions of processors\n", universe->nworlds); } if (universe->ulogfile) { fprintf(universe->ulogfile,"LAMMPS (%s)\n",universe->version); fprintf(universe->ulogfile,"Running on %d partitions of processors\n", universe->nworlds); } } if (me == 0) { if (screen) { fprintf(screen,"LAMMPS (%s)\n",universe->version); fprintf(screen,"Processor partition = %d\n",universe->iworld); } if (logfile) { fprintf(logfile,"LAMMPS (%s)\n",universe->version); fprintf(logfile,"Processor partition = %d\n",universe->iworld); } } } // check consistency of datatype settings in lmptype.h if (sizeof(smallint) != sizeof(int)) error->all(FLERR,"Smallint setting in lmptype.h is invalid"); if (sizeof(imageint) < sizeof(smallint)) error->all(FLERR,"Imageint setting in lmptype.h is invalid"); if (sizeof(tagint) < sizeof(smallint)) error->all(FLERR,"Tagint setting in lmptype.h is invalid"); if (sizeof(bigint) < sizeof(imageint) || sizeof(bigint) < sizeof(tagint)) error->all(FLERR,"Bigint setting in lmptype.h is invalid"); int mpisize; MPI_Type_size(MPI_LMP_TAGINT,&mpisize); if (mpisize != sizeof(tagint)) error->all(FLERR,"MPI_LMP_TAGINT and tagint in " "lmptype.h are not compatible"); MPI_Type_size(MPI_LMP_BIGINT,&mpisize); if (mpisize != sizeof(bigint)) error->all(FLERR,"MPI_LMP_BIGINT and bigint in " "lmptype.h are not compatible"); #ifdef LAMMPS_SMALLBIG if (sizeof(smallint) != 4 || sizeof(imageint) != 4 || sizeof(tagint) != 4 || sizeof(bigint) != 8) error->all(FLERR,"Small to big integers are not sized correctly"); #endif #ifdef LAMMPS_BIGBIG if (sizeof(smallint) != 4 || sizeof(imageint) != 8 || sizeof(tagint) != 8 || sizeof(bigint) != 8) error->all(FLERR,"Small to big integers are not sized correctly"); #endif #ifdef LAMMPS_SMALLSMALL if (sizeof(smallint) != 4 || sizeof(imageint) != 4 || sizeof(tagint) != 4 || sizeof(bigint) != 4) error->all(FLERR,"Small to big integers are not sized correctly"); #endif // error check on accelerator packages if (cudaflag == 1 && kokkosflag == 1) error->all(FLERR,"Cannot use -cuda on and -kokkos on together"); // create Cuda class if USER-CUDA installed, unless explicitly switched off // instantiation creates dummy Cuda class if USER-CUDA is not installed cuda = NULL; if (cudaflag == 1) { cuda = new Cuda(this); if (!cuda->cuda_exists) error->all(FLERR,"Cannot use -cuda on without USER-CUDA installed"); } int me; MPI_Comm_rank(world,&me); if (cuda && me == 0) error->message(FLERR,"USER-CUDA mode is enabled"); // create Kokkos class if KOKKOS installed, unless explicitly switched off // instantiation creates dummy Kokkos class if KOKKOS is not installed // add args between kkfirst and kklast to Kokkos instantiation kokkos = NULL; if (kokkosflag == 1) { kokkos = new KokkosLMP(this,kklast-kkfirst,&arg[kkfirst]); if (!kokkos->kokkos_exists) error->all(FLERR,"Cannot use -kokkos on without KOKKOS installed"); } MPI_Comm_rank(world,&me); if (kokkos && me == 0) error->message(FLERR,"KOKKOS mode is enabled"); // allocate CiteMe class if enabled if (citeflag) citeme = new CiteMe(this); else citeme = NULL; // allocate input class now that MPI is fully setup input = new Input(this,narg,arg); // allocate top-level classes create(); post_create(); // if helpflag set, print help and quit if (helpflag) { if (universe->me == 0 && screen) help(); error->done(); } // if restartflag set, invoke 2 commands and quit // add args between wdfirst and wdlast to write_data command // also add "noinit" to prevent write_data from doing system init if (restartflag) { char cmd[128]; sprintf(cmd,"read_restart %s\n",rfile); if (restartremapflag) strcat(cmd," remap\n"); input->one(cmd); sprintf(cmd,"write_data %s",dfile); for (iarg = wdfirst; iarg < wdlast; iarg++) sprintf(&cmd[strlen(cmd)]," %s",arg[iarg]); strcat(cmd," noinit\n"); input->one(cmd); error->done(); } } /* ---------------------------------------------------------------------- shutdown LAMMPS delete top-level classes close screen and log files in world and universe output files were already closed in destroy() delete fundamental classes ------------------------------------------------------------------------- */ LAMMPS::~LAMMPS() { destroy(); delete citeme; if (universe->nworlds == 1) { if (screen && screen != stdout) fclose(screen); if (logfile) fclose(logfile); logfile = NULL; if (screen != stdout) screen = NULL; } else { if (screen && screen != stdout) fclose(screen); if (logfile) fclose(logfile); if (universe->ulogfile) fclose(universe->ulogfile); logfile = NULL; if (screen != stdout) screen = NULL; } if (infile && infile != stdin) fclose(infile); if (world != universe->uworld) MPI_Comm_free(&world); delete cuda; delete kokkos; delete [] suffix; + delete [] suffix2; delete input; delete universe; delete error; delete memory; } /* ---------------------------------------------------------------------- allocate single instance of top-level classes fundamental classes are allocated in constructor some classes have package variants ------------------------------------------------------------------------- */ void LAMMPS::create() { // Comm class must be created before Atom class // so that nthreads is defined when create_avec invokes grow() if (cuda) comm = new CommCuda(this); else if (kokkos) comm = new CommKokkos(this); else comm = new CommBrick(this); if (cuda) neighbor = new NeighborCuda(this); else if (kokkos) neighbor = new NeighborKokkos(this); else neighbor = new Neighbor(this); if (cuda) domain = new DomainCuda(this); else if (kokkos) domain = new DomainKokkos(this); #ifdef LMP_USER_OMP else domain = new DomainOMP(this); #else else domain = new Domain(this); #endif if (kokkos) atom = new AtomKokkos(this); else atom = new Atom(this); - atom->create_avec("atomic",0,NULL,suffix); + atom->create_avec("atomic",0,NULL,1); group = new Group(this); force = new Force(this); // must be after group, to create temperature if (cuda) modify = new ModifyCuda(this); else if (kokkos) modify = new ModifyKokkos(this); else modify = new Modify(this); output = new Output(this); // must be after group, so "all" exists // must be after modify so can create Computes update = new Update(this); // must be after output, force, neighbor timer = new Timer(this); } /* ---------------------------------------------------------------------- invoke package-specific setup commands called from LAMMPS constructor and after clear() command only invoke if suffix is set and enabled + also check if suffix2 is set ------------------------------------------------------------------------- */ void LAMMPS::post_create() { - if (suffix && suffix_enable) { + if (!suffix_enable) return; + if (suffix) { if (strcmp(suffix,"gpu") == 0) input->one("package gpu force/neigh 0 0 1"); if (strcmp(suffix,"omp") == 0) input->one("package omp *"); + if (strcmp(suffix,"intel") == 0) + input->one("package intel * mixed balance -1"); + } + if (suffix2) { + if (strcmp(suffix,"omp") == 0) input->one("package omp *"); } } /* ---------------------------------------------------------------------- initialize top-level classes do not initialize Timer class, other classes like Run() do that explicitly ------------------------------------------------------------------------- */ void LAMMPS::init() { if (cuda) cuda->accelerator(0,NULL); if (kokkos) kokkos->accelerator(0,NULL); update->init(); force->init(); // pair must come after update due to minimizer domain->init(); atom->init(); // atom must come after force and domain // atom deletes extra array // used by fix shear_history::unpack_restart() // when force->pair->gran_history creates fix ?? // atom_vec init uses deform_vremap modify->init(); // modify must come after update, force, atom, domain neighbor->init(); // neighbor must come after force, modify comm->init(); // comm must come after force, modify, neighbor, atom output->init(); // output must come after domain, force, modify } /* ---------------------------------------------------------------------- delete single instance of top-level classes fundamental classes are deleted in destructor ------------------------------------------------------------------------- */ void LAMMPS::destroy() { delete update; delete neighbor; delete comm; delete force; delete group; delete output; delete modify; // modify must come after output, force, update // since they delete fixes delete domain; // domain must come after modify // since fix destructors access domain delete atom; // atom must come after modify, neighbor // since fixes delete callbacks in atom delete timer; modify = NULL; // necessary since input->variable->varreader // will be destructed later } /* ---------------------------------------------------------------------- help message for command line options and styles present in executable ------------------------------------------------------------------------- */ void LAMMPS::help() { fprintf(screen, "\nCommand line options:\n\n" "-cuda on/off : turn CUDA mode on or off (-c)\n" "-echo none/screen/log/both : echoing of input script (-e)\n" "-in filename : read input from file, not stdin (-i)\n" "-help : print this help message (-h)\n" "-kokkos on/off ... : turn KOKKOS mode on or off (-k)\n" "-log none/filename : where to send log output (-l)\n" "-nocite : disable writing log.cite file (-nc)\n" "-partition size1 size2 ... : assign partition sizes (-p)\n" "-plog basename : basename for partition logs (-pl)\n" "-pscreen basename : basename for partition screens (-ps)\n" "-reorder topology-specs : processor reordering (-r)\n" "-screen none/filename : where to send screen output (-sc)\n" "-suffix cuda/gpu/opt/omp : style suffix to apply (-sf)\n" "-var varname value : set index style variable (-v)\n\n"); fprintf(screen,"Style options compiled with this executable\n\n"); int pos = 80; fprintf(screen,"* Atom styles:\n"); #define ATOM_CLASS #define AtomStyle(key,Class) print_style(#key,pos); #include "style_atom.h" #undef ATOM_CLASS fprintf(screen,"\n\n"); pos = 80; fprintf(screen,"* Integrate styles:\n"); #define INTEGRATE_CLASS #define IntegrateStyle(key,Class) print_style(#key,pos); #include "style_integrate.h" #undef INTEGRATE_CLASS fprintf(screen,"\n\n"); pos = 80; fprintf(screen,"* Minimize styles:\n"); #define MINIMIZE_CLASS #define MinimizeStyle(key,Class) print_style(#key,pos); #include "style_minimize.h" #undef MINIMIZE_CLASS fprintf(screen,"\n\n"); pos = 80; fprintf(screen,"* Pair styles:\n"); #define PAIR_CLASS #define PairStyle(key,Class) print_style(#key,pos); #include "style_pair.h" #undef PAIR_CLASS fprintf(screen,"\n\n"); pos = 80; fprintf(screen,"* Bond styles:\n"); #define BOND_CLASS #define BondStyle(key,Class) print_style(#key,pos); #include "style_bond.h" #undef BOND_CLASS fprintf(screen,"\n\n"); pos = 80; fprintf(screen,"* Angle styles:\n"); #define ANGLE_CLASS #define AngleStyle(key,Class) print_style(#key,pos); #include "style_angle.h" #undef ANGLE_CLASS fprintf(screen,"\n\n"); pos = 80; fprintf(screen,"* Dihedral styles:\n"); #define DIHEDRAL_CLASS #define DihedralStyle(key,Class) print_style(#key,pos); #include "style_dihedral.h" #undef DIHEDRAL_CLASS fprintf(screen,"\n\n"); pos = 80; fprintf(screen,"* Improper styles:\n"); #define IMPROPER_CLASS #define ImproperStyle(key,Class) print_style(#key,pos); #include "style_improper.h" #undef IMPROPER_CLASS fprintf(screen,"\n\n"); pos = 80; fprintf(screen,"* KSpace styles:\n"); #define KSPACE_CLASS #define KSpaceStyle(key,Class) print_style(#key,pos); #include "style_kspace.h" #undef KSPACE_CLASS fprintf(screen,"\n\n"); pos = 80; fprintf(screen,"* Fix styles\n"); #define FIX_CLASS #define FixStyle(key,Class) print_style(#key,pos); #include "style_fix.h" #undef FIX_CLASS fprintf(screen,"\n\n"); pos = 80; fprintf(screen,"* Compute styles:\n"); #define COMPUTE_CLASS #define ComputeStyle(key,Class) print_style(#key,pos); #include "style_compute.h" #undef COMPUTE_CLASS fprintf(screen,"\n\n"); pos = 80; fprintf(screen,"* Region styles:\n"); #define REGION_CLASS #define RegionStyle(key,Class) print_style(#key,pos); #include "style_region.h" #undef REGION_CLASS fprintf(screen,"\n\n"); pos = 80; fprintf(screen,"* Dump styles:\n"); #define DUMP_CLASS #define DumpStyle(key,Class) print_style(#key,pos); #include "style_dump.h" #undef DUMP_CLASS fprintf(screen,"\n\n"); pos = 80; fprintf(screen,"* Command styles\n"); #define COMMAND_CLASS #define CommandStyle(key,Class) print_style(#key,pos); #include "style_command.h" #undef COMMAND_CLASS fprintf(screen,"\n"); } /* ---------------------------------------------------------------------- print style names in columns skip any style that starts with upper-case letter, since internal ------------------------------------------------------------------------- */ void LAMMPS::print_style(const char *str, int &pos) { if (isupper(str[0])) return; int len = strlen(str); if (pos+len > 80) { fprintf(screen,"\n"); pos = 0; } if (len < 16) { fprintf(screen,"%-16s",str); pos += 16; } else if (len < 32) { fprintf(screen,"%-32s",str); pos += 32; } else if (len < 48) { fprintf(screen,"%-48s",str); pos += 48; } else if (len < 64) { fprintf(screen,"%-64s",str); pos += 64; } else { fprintf(screen,"%-80s",str); pos += 80; } } diff --git a/src/lammps.h b/src/lammps.h index 44c7921bd..8ff0eca06 100644 --- a/src/lammps.h +++ b/src/lammps.h @@ -1,168 +1,171 @@ /* -*- c++ -*- ---------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ #ifndef LMP_LAMMPS_H #define LMP_LAMMPS_H #include "stdio.h" namespace LAMMPS_NS { class LAMMPS { public: // ptrs to fundamental LAMMPS classes class Memory *memory; // memory allocation functions class Error *error; // error handling class Universe *universe; // universe of processors class Input *input; // input script processing // ptrs to top-level LAMMPS-specific classes class Atom *atom; // atom-based quantities class Update *update; // integrators/minimizers class Neighbor *neighbor; // neighbor lists class Comm *comm; // inter-processor communication class Domain *domain; // simulation box class Force *force; // inter-particle forces class Modify *modify; // fixes and computes class Group *group; // groups of atoms class Output *output; // thermo/dump/restart class Timer *timer; // CPU timing info MPI_Comm world; // MPI communicator FILE *infile; // infile FILE *screen; // screen output FILE *logfile; // logfile - char *suffix; // suffix to add to input script style names - int suffix_enable; // 1 if suffix enabled, 0 if disabled + char *suffix,*suffix2; // suffixes to add to input script style names + int suffix_enable; // 1 if suffixes are enabled, 0 if disabled int cite_enable; // 1 if generating log.cite, 0 if disabled class Cuda *cuda; // CUDA accelerator class + //class GPU *gpu; // GPU accelerator class + //class Intel *intel; // Intel accelerator class + //class OMP *omp; // OMP accelerator class class KokkosLMP *kokkos; // KOKKOS accelerator class class CiteMe *citeme; // citation info LAMMPS(int, char **, MPI_Comm); ~LAMMPS(); void create(); void post_create(); void init(); void destroy(); private: void help(); void print_style(const char *, int &); LAMMPS() {}; // prohibit using the default constructor LAMMPS(const LAMMPS &) {}; // prohibit using the copy constructor }; } #endif /* ERROR/WARNING messages: E: Invalid command-line argument One or more command-line arguments is invalid. Check the syntax of the command you are using to launch LAMMPS. E: Cannot use -reorder after -partition Self-explanatory. See doc page discussion of command-line switches. E: Processor partitions are inconsistent The total number of processors in all partitions must match the number of processors LAMMPS is running on. E: Must use -in switch with multiple partitions A multi-partition simulation cannot read the input script from stdin. The -in command-line option must be used to specify a file. E: Can only use -pscreen with multiple partitions Self-explanatory. See doc page discussion of command-line switches. E: Can only use -plog with multiple partitions Self-explanatory. See doc page discussion of command-line switches. E: Cannot open universe screen file For a multi-partition run, the master screen file cannot be opened. Check that the directory you are running in allows for files to be created. E: Cannot open log.lammps for writing The default LAMMPS log file cannot be opened. Check that the directory you are running in allows for files to be created. E: Cannot open universe log file For a multi-partition run, the master log file cannot be opened. Check that the directory you are running in allows for files to be created. E: Cannot open input script %s Self-explanatory. E: Cannot open screen file The screen file specified as a command-line argument cannot be opened. Check that the directory you are running in allows for files to be created. E: Cannot open logfile The LAMMPS log file named in a command-line argument cannot be opened. Check that the path and name are correct. E: Smallint setting in lmptype.h is invalid It has to be the size of an integer. E: Imageint setting in lmptype.h is invalid Imageint must be as large or larger than smallint. E: Tagint setting in lmptype.h is invalid Tagint must be as large or larger than smallint. E: Bigint setting in lmptype.h is invalid Size of bigint is less than size of tagint. E: MPI_LMP_TAGINT and tagint in lmptype.h are not compatible The size of the MPI datatype does not match the size of a tagint. E: MPI_LMP_BIGINT and bigint in lmptype.h are not compatible The size of the MPI datatype does not match the size of a bigint. E: Small to big integers are not sized correctly This error occurs whenthe sizes of smallint, imageint, tagint, bigint, as defined in src/lmptype.h are not what is expected. Contact the developers if this occurs. E: Cannot use -cuda on without USER-CUDA installed The USER-CUDA package must be installed via "make yes-user-cuda" before LAMMPS is built. */ diff --git a/src/modify.cpp b/src/modify.cpp index b55f368fe..065838f4c 100644 --- a/src/modify.cpp +++ b/src/modify.cpp @@ -1,1241 +1,1277 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ #include "stdio.h" #include "string.h" #include "modify.h" #include "style_compute.h" #include "style_fix.h" #include "atom.h" #include "comm.h" #include "fix.h" #include "compute.h" #include "group.h" #include "update.h" #include "domain.h" #include "memory.h" #include "error.h" using namespace LAMMPS_NS; using namespace FixConst; #define DELTA 4 #define BIG 1.0e20 -#define NEXCEPT 4 // change when add to exceptions in add_fix() +#define NEXCEPT 5 // change when add to exceptions in add_fix() /* ---------------------------------------------------------------------- */ Modify::Modify(LAMMPS *lmp) : Pointers(lmp) { nfix = maxfix = 0; n_initial_integrate = n_post_integrate = 0; n_pre_exchange = n_pre_neighbor = 0; n_pre_force = n_post_force = 0; n_final_integrate = n_end_of_step = n_thermo_energy = 0; n_initial_integrate_respa = n_post_integrate_respa = 0; n_pre_force_respa = n_post_force_respa = n_final_integrate_respa = 0; n_min_pre_exchange = n_min_pre_force = n_min_post_force = n_min_energy = 0; fix = NULL; fmask = NULL; list_initial_integrate = list_post_integrate = NULL; list_pre_exchange = list_pre_neighbor = NULL; list_pre_force = list_post_force = NULL; list_final_integrate = list_end_of_step = NULL; list_thermo_energy = NULL; list_initial_integrate_respa = list_post_integrate_respa = NULL; list_pre_force_respa = list_post_force_respa = NULL; list_final_integrate_respa = NULL; list_min_pre_exchange = list_min_pre_neighbor = NULL; list_min_pre_force = list_min_post_force = NULL; list_min_energy = NULL; end_of_step_every = NULL; list_timeflag = NULL; nfix_restart_global = 0; id_restart_global = style_restart_global = state_restart_global = NULL; nfix_restart_peratom = 0; id_restart_peratom = style_restart_peratom = NULL; index_restart_peratom = NULL; ncompute = maxcompute = 0; compute = NULL; // fill map with fixes listed in style_fix.h fix_map = new std::map(); #define FIX_CLASS #define FixStyle(key,Class) \ (*fix_map)[#key] = &fix_creator; #include "style_fix.h" #undef FixStyle #undef FIX_CLASS // fill map with computes listed in style_compute.h compute_map = new std::map(); #define COMPUTE_CLASS #define ComputeStyle(key,Class) \ (*compute_map)[#key] = &compute_creator; #include "style_compute.h" #undef ComputeStyle #undef COMPUTE_CLASS } /* ---------------------------------------------------------------------- */ Modify::~Modify() { // delete all fixes // do it via delete_fix() so callbacks in Atom are also updated correctly while (nfix) delete_fix(fix[0]->id); memory->sfree(fix); memory->destroy(fmask); // delete all computes for (int i = 0; i < ncompute; i++) delete compute[i]; memory->sfree(compute); delete [] list_initial_integrate; delete [] list_post_integrate; delete [] list_pre_exchange; delete [] list_pre_neighbor; delete [] list_pre_force; delete [] list_post_force; delete [] list_final_integrate; delete [] list_end_of_step; delete [] list_thermo_energy; delete [] list_initial_integrate_respa; delete [] list_post_integrate_respa; delete [] list_pre_force_respa; delete [] list_post_force_respa; delete [] list_final_integrate_respa; delete [] list_min_pre_exchange; delete [] list_min_pre_neighbor; delete [] list_min_pre_force; delete [] list_min_post_force; delete [] list_min_energy; delete [] end_of_step_every; delete [] list_timeflag; restart_deallocate(); delete compute_map; delete fix_map; } /* ---------------------------------------------------------------------- initialize all fixes and computes ------------------------------------------------------------------------- */ void Modify::init() { int i,j; // delete storage of restart info since it is not valid after 1st run restart_deallocate(); // create lists of fixes to call at each stage of run list_init(INITIAL_INTEGRATE,n_initial_integrate,list_initial_integrate); list_init(POST_INTEGRATE,n_post_integrate,list_post_integrate); list_init(PRE_EXCHANGE,n_pre_exchange,list_pre_exchange); list_init(PRE_NEIGHBOR,n_pre_neighbor,list_pre_neighbor); list_init(PRE_FORCE,n_pre_force,list_pre_force); list_init(POST_FORCE,n_post_force,list_post_force); list_init(FINAL_INTEGRATE,n_final_integrate,list_final_integrate); list_init_end_of_step(END_OF_STEP,n_end_of_step,list_end_of_step); list_init_thermo_energy(THERMO_ENERGY,n_thermo_energy,list_thermo_energy); list_init(INITIAL_INTEGRATE_RESPA, n_initial_integrate_respa,list_initial_integrate_respa); list_init(POST_INTEGRATE_RESPA, n_post_integrate_respa,list_post_integrate_respa); list_init(POST_FORCE_RESPA, n_post_force_respa,list_post_force_respa); list_init(PRE_FORCE_RESPA, n_pre_force_respa,list_pre_force_respa); list_init(FINAL_INTEGRATE_RESPA, n_final_integrate_respa,list_final_integrate_respa); list_init(MIN_PRE_EXCHANGE,n_min_pre_exchange,list_min_pre_exchange); list_init(MIN_PRE_NEIGHBOR,n_min_pre_neighbor,list_min_pre_neighbor); list_init(MIN_PRE_FORCE,n_min_pre_force,list_min_pre_force); list_init(MIN_POST_FORCE,n_min_post_force,list_min_post_force); list_init(MIN_ENERGY,n_min_energy,list_min_energy); // init each fix // not sure if now needs to come before compute init // used to b/c temperature computes called fix->dof() in their init, // and fix rigid required its own init before its dof() could be called, // but computes now do their DOF in setup() for (i = 0; i < nfix; i++) fix[i]->init(); // set global flag if any fix has its restart_pbc flag set restart_pbc_any = 0; for (i = 0; i < nfix; i++) if (fix[i]->restart_pbc) restart_pbc_any = 1; // create list of computes that store invocation times list_init_compute(); // init each compute // set invoked_scalar,vector,etc to -1 to force new run to re-compute them // add initial timestep to all computes that store invocation times // since any of them may be invoked by initial thermo // do not clear out invocation times stored within a compute, // b/c some may be holdovers from previous run, like for ave fixes for (i = 0; i < ncompute; i++) { compute[i]->init(); compute[i]->invoked_scalar = -1; compute[i]->invoked_vector = -1; compute[i]->invoked_array = -1; compute[i]->invoked_peratom = -1; compute[i]->invoked_local = -1; } addstep_compute_all(update->ntimestep); // error if any fix or compute is using a dynamic group when not allowed for (i = 0; i < nfix; i++) if (!fix[i]->dynamic_group_allow && group->dynamic[fix[i]->igroup]) { char str[128]; sprintf(str,"Fix %s does not allow use of dynamic group",fix[i]->id); error->all(FLERR,str); } for (i = 0; i < ncompute; i++) if (!compute[i]->dynamic_group_allow && group->dynamic[compute[i]->igroup]) { char str[128]; sprintf(str,"Compute %s does not allow use of dynamic group",fix[i]->id); error->all(FLERR,str); } // warn if any particle is time integrated more than once int nlocal = atom->nlocal; int *mask = atom->mask; int *flag = new int[nlocal]; for (i = 0; i < nlocal; i++) flag[i] = 0; int groupbit; for (i = 0; i < nfix; i++) { if (fix[i]->time_integrate == 0) continue; groupbit = fix[i]->groupbit; for (j = 0; j < nlocal; j++) if (mask[j] & groupbit) flag[j]++; } int check = 0; for (i = 0; i < nlocal; i++) if (flag[i] > 1) check = 1; delete [] flag; int checkall; MPI_Allreduce(&check,&checkall,1,MPI_INT,MPI_SUM,world); if (comm->me == 0 && checkall) error->warning(FLERR, "One or more atoms are time integrated more than once"); } /* ---------------------------------------------------------------------- setup for run, calls setup() of all fixes and computes called from Verlet, RESPA, Min ------------------------------------------------------------------------- */ void Modify::setup(int vflag) { // compute setup needs to come before fix setup // b/c NH fixes need DOF of temperature computes for (int i = 0; i < ncompute; i++) compute[i]->setup(); if (update->whichflag == 1) for (int i = 0; i < nfix; i++) fix[i]->setup(vflag); else if (update->whichflag == 2) for (int i = 0; i < nfix; i++) fix[i]->min_setup(vflag); } /* ---------------------------------------------------------------------- setup pre_exchange call, only for fixes that define pre_exchange called from Verlet, RESPA, Min, and WriteRestart with whichflag = 0 ------------------------------------------------------------------------- */ void Modify::setup_pre_exchange() { if (update->whichflag <= 1) for (int i = 0; i < n_pre_exchange; i++) fix[list_pre_exchange[i]]->setup_pre_exchange(); else if (update->whichflag == 2) for (int i = 0; i < n_min_pre_exchange; i++) fix[list_min_pre_exchange[i]]->min_setup_pre_exchange(); } /* ---------------------------------------------------------------------- setup pre_neighbor call, only for fixes that define pre_neighbor called from Verlet, RESPA ------------------------------------------------------------------------- */ void Modify::setup_pre_neighbor() { if (update->whichflag == 1) for (int i = 0; i < n_pre_neighbor; i++) fix[list_pre_neighbor[i]]->setup_pre_neighbor(); else if (update->whichflag == 2) for (int i = 0; i < n_min_pre_neighbor; i++) fix[list_min_pre_neighbor[i]]->min_setup_pre_neighbor(); } /* ---------------------------------------------------------------------- setup pre_force call, only for fixes that define pre_force called from Verlet, RESPA, Min ------------------------------------------------------------------------- */ void Modify::setup_pre_force(int vflag) { if (update->whichflag == 1) for (int i = 0; i < n_pre_force; i++) fix[list_pre_force[i]]->setup_pre_force(vflag); else if (update->whichflag == 2) for (int i = 0; i < n_min_pre_force; i++) fix[list_min_pre_force[i]]->min_setup_pre_force(vflag); } /* ---------------------------------------------------------------------- 1st half of integrate call, only for relevant fixes ------------------------------------------------------------------------- */ void Modify::initial_integrate(int vflag) { for (int i = 0; i < n_initial_integrate; i++) fix[list_initial_integrate[i]]->initial_integrate(vflag); } /* ---------------------------------------------------------------------- post_integrate call, only for relevant fixes ------------------------------------------------------------------------- */ void Modify::post_integrate() { for (int i = 0; i < n_post_integrate; i++) fix[list_post_integrate[i]]->post_integrate(); } /* ---------------------------------------------------------------------- pre_exchange call, only for relevant fixes ------------------------------------------------------------------------- */ void Modify::pre_exchange() { for (int i = 0; i < n_pre_exchange; i++) fix[list_pre_exchange[i]]->pre_exchange(); } /* ---------------------------------------------------------------------- pre_neighbor call, only for relevant fixes ------------------------------------------------------------------------- */ void Modify::pre_neighbor() { for (int i = 0; i < n_pre_neighbor; i++) fix[list_pre_neighbor[i]]->pre_neighbor(); } /* ---------------------------------------------------------------------- pre_force call, only for relevant fixes ------------------------------------------------------------------------- */ void Modify::pre_force(int vflag) { for (int i = 0; i < n_pre_force; i++) fix[list_pre_force[i]]->pre_force(vflag); } /* ---------------------------------------------------------------------- post_force call, only for relevant fixes ------------------------------------------------------------------------- */ void Modify::post_force(int vflag) { for (int i = 0; i < n_post_force; i++) fix[list_post_force[i]]->post_force(vflag); } /* ---------------------------------------------------------------------- 2nd half of integrate call, only for relevant fixes ------------------------------------------------------------------------- */ void Modify::final_integrate() { for (int i = 0; i < n_final_integrate; i++) fix[list_final_integrate[i]]->final_integrate(); } /* ---------------------------------------------------------------------- end-of-timestep call, only for relevant fixes only call fix->end_of_step() on timesteps that are multiples of nevery ------------------------------------------------------------------------- */ void Modify::end_of_step() { for (int i = 0; i < n_end_of_step; i++) if (update->ntimestep % end_of_step_every[i] == 0) fix[list_end_of_step[i]]->end_of_step(); } /* ---------------------------------------------------------------------- thermo energy call, only for relevant fixes called by Thermo class compute_scalar() is fix call to return energy ------------------------------------------------------------------------- */ double Modify::thermo_energy() { double energy = 0.0; for (int i = 0; i < n_thermo_energy; i++) energy += fix[list_thermo_energy[i]]->compute_scalar(); return energy; } /* ---------------------------------------------------------------------- post_run call ------------------------------------------------------------------------- */ void Modify::post_run() { for (int i = 0; i < nfix; i++) fix[i]->post_run(); } /* ---------------------------------------------------------------------- setup rRESPA pre_force call, only for relevant fixes ------------------------------------------------------------------------- */ void Modify::setup_pre_force_respa(int vflag, int ilevel) { for (int i = 0; i < n_pre_force_respa; i++) fix[list_pre_force_respa[i]]->setup_pre_force_respa(vflag,ilevel); } /* ---------------------------------------------------------------------- 1st half of rRESPA integrate call, only for relevant fixes ------------------------------------------------------------------------- */ void Modify::initial_integrate_respa(int vflag, int ilevel, int iloop) { for (int i = 0; i < n_initial_integrate_respa; i++) fix[list_initial_integrate_respa[i]]-> initial_integrate_respa(vflag,ilevel,iloop); } /* ---------------------------------------------------------------------- rRESPA post_integrate call, only for relevant fixes ------------------------------------------------------------------------- */ void Modify::post_integrate_respa(int ilevel, int iloop) { for (int i = 0; i < n_post_integrate_respa; i++) fix[list_post_integrate_respa[i]]->post_integrate_respa(ilevel,iloop); } /* ---------------------------------------------------------------------- rRESPA pre_force call, only for relevant fixes ------------------------------------------------------------------------- */ void Modify::pre_force_respa(int vflag, int ilevel, int iloop) { for (int i = 0; i < n_pre_force_respa; i++) fix[list_pre_force_respa[i]]->pre_force_respa(vflag,ilevel,iloop); } /* ---------------------------------------------------------------------- rRESPA post_force call, only for relevant fixes ------------------------------------------------------------------------- */ void Modify::post_force_respa(int vflag, int ilevel, int iloop) { for (int i = 0; i < n_post_force_respa; i++) fix[list_post_force_respa[i]]->post_force_respa(vflag,ilevel,iloop); } /* ---------------------------------------------------------------------- 2nd half of rRESPA integrate call, only for relevant fixes ------------------------------------------------------------------------- */ void Modify::final_integrate_respa(int ilevel, int iloop) { for (int i = 0; i < n_final_integrate_respa; i++) fix[list_final_integrate_respa[i]]->final_integrate_respa(ilevel,iloop); } /* ---------------------------------------------------------------------- minimizer pre-exchange call, only for relevant fixes ------------------------------------------------------------------------- */ void Modify::min_pre_exchange() { for (int i = 0; i < n_min_pre_exchange; i++) fix[list_min_pre_exchange[i]]->min_pre_exchange(); } /* ---------------------------------------------------------------------- minimizer pre-neighbor call, only for relevant fixes ------------------------------------------------------------------------- */ void Modify::min_pre_neighbor() { for (int i = 0; i < n_min_pre_neighbor; i++) fix[list_min_pre_neighbor[i]]->min_pre_neighbor(); } /* ---------------------------------------------------------------------- minimizer pre-force call, only for relevant fixes ------------------------------------------------------------------------- */ void Modify::min_pre_force(int vflag) { for (int i = 0; i < n_min_pre_force; i++) fix[list_min_pre_force[i]]->min_pre_force(vflag); } /* ---------------------------------------------------------------------- minimizer force adjustment call, only for relevant fixes ------------------------------------------------------------------------- */ void Modify::min_post_force(int vflag) { for (int i = 0; i < n_min_post_force; i++) fix[list_min_post_force[i]]->min_post_force(vflag); } /* ---------------------------------------------------------------------- minimizer energy/force evaluation, only for relevant fixes return energy and forces on extra degrees of freedom ------------------------------------------------------------------------- */ double Modify::min_energy(double *fextra) { int ifix,index; index = 0; double eng = 0.0; for (int i = 0; i < n_min_energy; i++) { ifix = list_min_energy[i]; eng += fix[ifix]->min_energy(&fextra[index]); index += fix[ifix]->min_dof(); } return eng; } /* ---------------------------------------------------------------------- store current state of extra minimizer dof, only for relevant fixes ------------------------------------------------------------------------- */ void Modify::min_store() { for (int i = 0; i < n_min_energy; i++) fix[list_min_energy[i]]->min_store(); } /* ---------------------------------------------------------------------- manage state of extra minimizer dof on a stack, only for relevant fixes ------------------------------------------------------------------------- */ void Modify::min_clearstore() { for (int i = 0; i < n_min_energy; i++) fix[list_min_energy[i]]->min_clearstore(); } void Modify::min_pushstore() { for (int i = 0; i < n_min_energy; i++) fix[list_min_energy[i]]->min_pushstore(); } void Modify::min_popstore() { for (int i = 0; i < n_min_energy; i++) fix[list_min_energy[i]]->min_popstore(); } /* ---------------------------------------------------------------------- displace extra minimizer dof along vector hextra, only for relevant fixes ------------------------------------------------------------------------- */ void Modify::min_step(double alpha, double *hextra) { int ifix,index; index = 0; for (int i = 0; i < n_min_energy; i++) { ifix = list_min_energy[i]; fix[ifix]->min_step(alpha,&hextra[index]); index += fix[ifix]->min_dof(); } } /* ---------------------------------------------------------------------- compute max allowed step size along vector hextra, only for relevant fixes ------------------------------------------------------------------------- */ double Modify::max_alpha(double *hextra) { int ifix,index; double alpha = BIG; index = 0; for (int i = 0; i < n_min_energy; i++) { ifix = list_min_energy[i]; double alpha_one = fix[ifix]->max_alpha(&hextra[index]); alpha = MIN(alpha,alpha_one); index += fix[ifix]->min_dof(); } return alpha; } /* ---------------------------------------------------------------------- extract extra minimizer dof, only for relevant fixes ------------------------------------------------------------------------- */ int Modify::min_dof() { int ndof = 0; for (int i = 0; i < n_min_energy; i++) ndof += fix[list_min_energy[i]]->min_dof(); return ndof; } /* ---------------------------------------------------------------------- reset minimizer reference state of fix, only for relevant fixes ------------------------------------------------------------------------- */ int Modify::min_reset_ref() { int itmp,itmpall; itmpall = 0; for (int i = 0; i < n_min_energy; i++) { itmp = fix[list_min_energy[i]]->min_reset_ref(); if (itmp) itmpall = 1; } return itmpall; } /* ---------------------------------------------------------------------- add a new fix or replace one with same ID ------------------------------------------------------------------------- */ -void Modify::add_fix(int narg, char **arg, char *suffix) +void Modify::add_fix(int narg, char **arg, int trysuffix) { if (narg < 3) error->all(FLERR,"Illegal fix command"); // cannot define fix before box exists unless style is in exception list // don't like this way of checking for exceptions by adding fixes to list, // but can't think of better way // too late if instantiate fix, then check flag set in fix constructor, // since some fixes access domain settings in their constructor - // change NEXCEPT above when add new fix to this list + // MUST change NEXCEPT above when add new fix to this list - const char *exceptions[NEXCEPT] = {"GPU","OMP","property/atom","cmap"}; + const char *exceptions[NEXCEPT] = + {"GPU","OMP","Intel","property/atom","cmap"}; if (domain->box_exist == 0) { int m; for (m = 0; m < NEXCEPT; m++) if (strcmp(arg[2],exceptions[m]) == 0) break; if (m == NEXCEPT) error->all(FLERR,"Fix command before simulation box is defined"); } // check group ID int igroup = group->find(arg[1]); if (igroup == -1) error->all(FLERR,"Could not find fix group ID"); // if fix ID exists: // set newflag = 0 so create new fix in same location in fix list // error if new style does not match old style // since can't replace it (all when-to-invoke ptrs would be invalid) // warn if new group != old group // delete old fix, but do not call update_callback(), // since will replace this fix and thus other fix locs will not change // set ptr to NULL in case new fix scans list of fixes, // e.g. scan will occur in add_callback() if called by new fix // if fix ID does not exist: // set newflag = 1 so create new fix // extend fix and fmask lists as necessary int ifix,newflag; for (ifix = 0; ifix < nfix; ifix++) if (strcmp(arg[0],fix[ifix]->id) == 0) break; if (ifix < nfix) { newflag = 0; - if (strcmp(arg[2],fix[ifix]->style) != 0) - error->all(FLERR,"Replacing a fix, but new style != old style"); + + int match = 0; + if (strcmp(arg[2],fix[ifix]->style) == 0) match = 1; + if (!match && trysuffix && lmp->suffix_enable) { + char estyle[256]; + if (lmp->suffix) { + sprintf(estyle,"%s/%s",arg[2],lmp->suffix); + if (strcmp(estyle,fix[ifix]->style) == 0) match = 1; + } + if (lmp->suffix2) { + sprintf(estyle,"%s/%s",arg[2],lmp->suffix2); + if (strcmp(estyle,fix[ifix]->style) == 0) match = 1; + } + } + if (!match) error->all(FLERR,"Replacing a fix, but new style != old style"); + if (fix[ifix]->igroup != igroup && comm->me == 0) error->warning(FLERR,"Replacing a fix, but new group != old group"); delete fix[ifix]; fix[ifix] = NULL; + } else { newflag = 1; if (nfix == maxfix) { maxfix += DELTA; fix = (Fix **) memory->srealloc(fix,maxfix*sizeof(Fix *),"modify:fix"); memory->grow(fmask,maxfix,"modify:fmask"); } } // create the Fix // try first with suffix appended fix[ifix] = NULL; - if (suffix && lmp->suffix_enable) { - char estyle[256]; - sprintf(estyle,"%s/%s",arg[2],suffix); - if (fix_map->find(estyle) != fix_map->end()) { - FixCreator fix_creator = (*fix_map)[estyle]; - fix[ifix] = fix_creator(lmp,narg,arg); + if (trysuffix && lmp->suffix_enable) { + if (lmp->suffix) { + char estyle[256]; + sprintf(estyle,"%s/%s",arg[2],lmp->suffix); + if (fix_map->find(estyle) != fix_map->end()) { + FixCreator fix_creator = (*fix_map)[estyle]; + fix[ifix] = fix_creator(lmp,narg,arg); + } + } + if (fix[ifix] == NULL && lmp->suffix2) { + char estyle[256]; + sprintf(estyle,"%s/%s",arg[2],lmp->suffix2); + if (fix_map->find(estyle) != fix_map->end()) { + FixCreator fix_creator = (*fix_map)[estyle]; + fix[ifix] = fix_creator(lmp,narg,arg); + } } } if (fix[ifix] == NULL && fix_map->find(arg[2]) != fix_map->end()) { FixCreator fix_creator = (*fix_map)[arg[2]]; fix[ifix] = fix_creator(lmp,narg,arg); } if (fix[ifix] == NULL) error->all(FLERR,"Invalid fix style"); // check if Fix is in restart_global list // if yes, pass state info to the Fix so it can reset itself for (int i = 0; i < nfix_restart_global; i++) if (strcmp(id_restart_global[i],fix[ifix]->id) == 0 && strcmp(style_restart_global[i],fix[ifix]->style) == 0) { fix[ifix]->restart(state_restart_global[i]); if (comm->me == 0) { char *str = (char *) ("Resetting global state of Fix %s Style %s " "from restart file info\n"); if (screen) fprintf(screen,str,fix[ifix]->id,fix[ifix]->style); if (logfile) fprintf(logfile,str,fix[ifix]->id,fix[ifix]->style); } } // check if Fix is in restart_peratom list // if yes, loop over atoms so they can extract info from atom->extra array for (int i = 0; i < nfix_restart_peratom; i++) if (strcmp(id_restart_peratom[i],fix[ifix]->id) == 0 && strcmp(style_restart_peratom[i],fix[ifix]->style) == 0) { for (int j = 0; j < atom->nlocal; j++) fix[ifix]->unpack_restart(j,index_restart_peratom[i]); fix[ifix]->restart_reset = 1; if (comm->me == 0) { char *str = (char *) ("Resetting per-atom state of Fix %s Style %s " "from restart file info\n"); if (screen) fprintf(screen,str,fix[ifix]->id,fix[ifix]->style); if (logfile) fprintf(logfile,str,fix[ifix]->id,fix[ifix]->style); } } // increment nfix (if new) // set fix mask values // post_construct() allows new fix to create other fixes // nfix increment comes first so that recursive call to add_fix within // post_constructor() will see updated nfix if (newflag) nfix++; fmask[ifix] = fix[ifix]->setmask(); fix[ifix]->post_constructor(); } /* ---------------------------------------------------------------------- one instance per fix in style_fix.h ------------------------------------------------------------------------- */ template Fix *Modify::fix_creator(LAMMPS *lmp, int narg, char **arg) { return new T(lmp,narg,arg); } /* ---------------------------------------------------------------------- modify a Fix's parameters ------------------------------------------------------------------------- */ void Modify::modify_fix(int narg, char **arg) { if (narg < 2) error->all(FLERR,"Illegal fix_modify command"); // lookup Fix ID int ifix; for (ifix = 0; ifix < nfix; ifix++) if (strcmp(arg[0],fix[ifix]->id) == 0) break; if (ifix == nfix) error->all(FLERR,"Could not find fix_modify ID"); fix[ifix]->modify_params(narg-1,&arg[1]); } /* ---------------------------------------------------------------------- delete a Fix from list of Fixes Atom class must update indices in its list of callbacks to fixes ------------------------------------------------------------------------- */ void Modify::delete_fix(const char *id) { int ifix = find_fix(id); if (ifix < 0) error->all(FLERR,"Could not find fix ID to delete"); delete fix[ifix]; atom->update_callback(ifix); // move other Fixes and fmask down in list one slot for (int i = ifix+1; i < nfix; i++) fix[i-1] = fix[i]; for (int i = ifix+1; i < nfix; i++) fmask[i-1] = fmask[i]; nfix--; } /* ---------------------------------------------------------------------- find a fix by ID return index of fix or -1 if not found ------------------------------------------------------------------------- */ int Modify::find_fix(const char *id) { int ifix; for (ifix = 0; ifix < nfix; ifix++) if (strcmp(id,fix[ifix]->id) == 0) break; if (ifix == nfix) return -1; return ifix; } /* ---------------------------------------------------------------------- add a new compute ------------------------------------------------------------------------- */ -void Modify::add_compute(int narg, char **arg, char *suffix) +void Modify::add_compute(int narg, char **arg, int trysuffix) { if (narg < 3) error->all(FLERR,"Illegal compute command"); // error check for (int icompute = 0; icompute < ncompute; icompute++) if (strcmp(arg[0],compute[icompute]->id) == 0) error->all(FLERR,"Reuse of compute ID"); // extend Compute list if necessary if (ncompute == maxcompute) { maxcompute += DELTA; compute = (Compute **) memory->srealloc(compute,maxcompute*sizeof(Compute *),"modify:compute"); } // create the Compute // try first with suffix appended compute[ncompute] = NULL; - if (suffix && lmp->suffix_enable) { - char estyle[256]; - sprintf(estyle,"%s/%s",arg[2],suffix); - if (compute_map->find(estyle) != compute_map->end()) { - ComputeCreator compute_creator = (*compute_map)[estyle]; - compute[ncompute] = compute_creator(lmp,narg,arg); + if (trysuffix && lmp->suffix_enable) { + if (lmp->suffix) { + char estyle[256]; + sprintf(estyle,"%s/%s",arg[2],lmp->suffix); + if (compute_map->find(estyle) != compute_map->end()) { + ComputeCreator compute_creator = (*compute_map)[estyle]; + compute[ncompute] = compute_creator(lmp,narg,arg); + } + } + if (compute[ncompute] == NULL && lmp->suffix2) { + char estyle[256]; + sprintf(estyle,"%s/%s",arg[2],lmp->suffix2); + if (compute_map->find(estyle) != compute_map->end()) { + ComputeCreator compute_creator = (*compute_map)[estyle]; + compute[ncompute] = compute_creator(lmp,narg,arg); + } } } if (compute[ncompute] == NULL && compute_map->find(arg[2]) != compute_map->end()) { ComputeCreator compute_creator = (*compute_map)[arg[2]]; compute[ncompute] = compute_creator(lmp,narg,arg); } if (compute[ncompute] == NULL) error->all(FLERR,"Invalid compute style"); ncompute++; } /* ---------------------------------------------------------------------- one instance per compute in style_compute.h ------------------------------------------------------------------------- */ template Compute *Modify::compute_creator(LAMMPS *lmp, int narg, char **arg) { return new T(lmp,narg,arg); } /* ---------------------------------------------------------------------- modify a Compute's parameters ------------------------------------------------------------------------- */ void Modify::modify_compute(int narg, char **arg) { if (narg < 2) error->all(FLERR,"Illegal compute_modify command"); // lookup Compute ID int icompute; for (icompute = 0; icompute < ncompute; icompute++) if (strcmp(arg[0],compute[icompute]->id) == 0) break; if (icompute == ncompute) error->all(FLERR,"Could not find compute_modify ID"); compute[icompute]->modify_params(narg-1,&arg[1]); } /* ---------------------------------------------------------------------- delete a Compute from list of Computes ------------------------------------------------------------------------- */ void Modify::delete_compute(const char *id) { int icompute = find_compute(id); if (icompute < 0) error->all(FLERR,"Could not find compute ID to delete"); delete compute[icompute]; // move other Computes down in list one slot for (int i = icompute+1; i < ncompute; i++) compute[i-1] = compute[i]; ncompute--; } /* ---------------------------------------------------------------------- find a compute by ID return index of compute or -1 if not found ------------------------------------------------------------------------- */ int Modify::find_compute(const char *id) { int icompute; for (icompute = 0; icompute < ncompute; icompute++) if (strcmp(id,compute[icompute]->id) == 0) break; if (icompute == ncompute) return -1; return icompute; } /* ---------------------------------------------------------------------- clear invoked flag of all computes called everywhere that computes are used, before computes are invoked invoked flag used to avoid re-invoking same compute multiple times and to flag computes that store invocation times as having been invoked ------------------------------------------------------------------------- */ void Modify::clearstep_compute() { for (int icompute = 0; icompute < ncompute; icompute++) compute[icompute]->invoked_flag = 0; } /* ---------------------------------------------------------------------- loop over computes that store invocation times if its invoked flag set on this timestep, schedule next invocation called everywhere that computes are used, after computes are invoked ------------------------------------------------------------------------- */ void Modify::addstep_compute(bigint newstep) { for (int icompute = 0; icompute < n_timeflag; icompute++) if (compute[list_timeflag[icompute]]->invoked_flag) compute[list_timeflag[icompute]]->addstep(newstep); } /* ---------------------------------------------------------------------- loop over all computes schedule next invocation for those that store invocation times called when not sure what computes will be needed on newstep do not loop only over n_timeflag, since may not be set yet ------------------------------------------------------------------------- */ void Modify::addstep_compute_all(bigint newstep) { for (int icompute = 0; icompute < ncompute; icompute++) if (compute[icompute]->timeflag) compute[icompute]->addstep(newstep); } /* ---------------------------------------------------------------------- write to restart file for all Fixes with restart info (1) fixes that have global state (2) fixes that store per-atom quantities ------------------------------------------------------------------------- */ void Modify::write_restart(FILE *fp) { int me = comm->me; int count = 0; for (int i = 0; i < nfix; i++) if (fix[i]->restart_global) count++; if (me == 0) fwrite(&count,sizeof(int),1,fp); int n; for (int i = 0; i < nfix; i++) if (fix[i]->restart_global) { if (me == 0) { n = strlen(fix[i]->id) + 1; fwrite(&n,sizeof(int),1,fp); fwrite(fix[i]->id,sizeof(char),n,fp); n = strlen(fix[i]->style) + 1; fwrite(&n,sizeof(int),1,fp); fwrite(fix[i]->style,sizeof(char),n,fp); } fix[i]->write_restart(fp); } count = 0; for (int i = 0; i < nfix; i++) if (fix[i]->restart_peratom) count++; if (me == 0) fwrite(&count,sizeof(int),1,fp); for (int i = 0; i < nfix; i++) if (fix[i]->restart_peratom) { int maxsize_restart = fix[i]->maxsize_restart(); if (me == 0) { n = strlen(fix[i]->id) + 1; fwrite(&n,sizeof(int),1,fp); fwrite(fix[i]->id,sizeof(char),n,fp); n = strlen(fix[i]->style) + 1; fwrite(&n,sizeof(int),1,fp); fwrite(fix[i]->style,sizeof(char),n,fp); fwrite(&maxsize_restart,sizeof(int),1,fp); } } } /* ---------------------------------------------------------------------- read in restart file data on all previously defined Fixes with restart info (1) fixes that have global state (2) fixes that store per-atom quantities return maxsize of extra info that will be stored with any atom ------------------------------------------------------------------------- */ int Modify::read_restart(FILE *fp) { // nfix_restart_global = # of restart entries with global state info int me = comm->me; if (me == 0) fread(&nfix_restart_global,sizeof(int),1,fp); MPI_Bcast(&nfix_restart_global,1,MPI_INT,0,world); // allocate space for each entry if (nfix_restart_global) { id_restart_global = new char*[nfix_restart_global]; style_restart_global = new char*[nfix_restart_global]; state_restart_global = new char*[nfix_restart_global]; } // read each entry and Bcast to all procs // each entry has id string, style string, chunk of state data int n; for (int i = 0; i < nfix_restart_global; i++) { if (me == 0) fread(&n,sizeof(int),1,fp); MPI_Bcast(&n,1,MPI_INT,0,world); id_restart_global[i] = new char[n]; if (me == 0) fread(id_restart_global[i],sizeof(char),n,fp); MPI_Bcast(id_restart_global[i],n,MPI_CHAR,0,world); if (me == 0) fread(&n,sizeof(int),1,fp); MPI_Bcast(&n,1,MPI_INT,0,world); style_restart_global[i] = new char[n]; if (me == 0) fread(style_restart_global[i],sizeof(char),n,fp); MPI_Bcast(style_restart_global[i],n,MPI_CHAR,0,world); if (me == 0) fread(&n,sizeof(int),1,fp); MPI_Bcast(&n,1,MPI_INT,0,world); state_restart_global[i] = new char[n]; if (me == 0) fread(state_restart_global[i],sizeof(char),n,fp); MPI_Bcast(state_restart_global[i],n,MPI_CHAR,0,world); } // nfix_restart_peratom = # of restart entries with peratom info int maxsize = 0; if (me == 0) fread(&nfix_restart_peratom,sizeof(int),1,fp); MPI_Bcast(&nfix_restart_peratom,1,MPI_INT,0,world); // allocate space for each entry if (nfix_restart_peratom) { id_restart_peratom = new char*[nfix_restart_peratom]; style_restart_peratom = new char*[nfix_restart_peratom]; index_restart_peratom = new int[nfix_restart_peratom]; } // read each entry and Bcast to all procs // each entry has id string, style string, maxsize of one atom's data // set index = which set of extra data this fix represents for (int i = 0; i < nfix_restart_peratom; i++) { if (me == 0) fread(&n,sizeof(int),1,fp); MPI_Bcast(&n,1,MPI_INT,0,world); id_restart_peratom[i] = new char[n]; if (me == 0) fread(id_restart_peratom[i],sizeof(char),n,fp); MPI_Bcast(id_restart_peratom[i],n,MPI_CHAR,0,world); if (me == 0) fread(&n,sizeof(int),1,fp); MPI_Bcast(&n,1,MPI_INT,0,world); style_restart_peratom[i] = new char[n]; if (me == 0) fread(style_restart_peratom[i],sizeof(char),n,fp); MPI_Bcast(style_restart_peratom[i],n,MPI_CHAR,0,world); if (me == 0) fread(&n,sizeof(int),1,fp); MPI_Bcast(&n,1,MPI_INT,0,world); maxsize += n; index_restart_peratom[i] = i; } return maxsize; } /* ---------------------------------------------------------------------- delete all lists of restart file Fix info ------------------------------------------------------------------------- */ void Modify::restart_deallocate() { if (nfix_restart_global) { for (int i = 0; i < nfix_restart_global; i++) { delete [] id_restart_global[i]; delete [] style_restart_global[i]; delete [] state_restart_global[i]; } delete [] id_restart_global; delete [] style_restart_global; delete [] state_restart_global; } if (nfix_restart_peratom) { for (int i = 0; i < nfix_restart_peratom; i++) { delete [] id_restart_peratom[i]; delete [] style_restart_peratom[i]; } delete [] id_restart_peratom; delete [] style_restart_peratom; delete [] index_restart_peratom; } nfix_restart_global = nfix_restart_peratom = 0; } /* ---------------------------------------------------------------------- create list of fix indices for fixes which match mask ------------------------------------------------------------------------- */ void Modify::list_init(int mask, int &n, int *&list) { delete [] list; n = 0; for (int i = 0; i < nfix; i++) if (fmask[i] & mask) n++; list = new int[n]; n = 0; for (int i = 0; i < nfix; i++) if (fmask[i] & mask) list[n++] = i; } /* ---------------------------------------------------------------------- create list of fix indices for end_of_step fixes also create end_of_step_every[] ------------------------------------------------------------------------- */ void Modify::list_init_end_of_step(int mask, int &n, int *&list) { delete [] list; delete [] end_of_step_every; n = 0; for (int i = 0; i < nfix; i++) if (fmask[i] & mask) n++; list = new int[n]; end_of_step_every = new int[n]; n = 0; for (int i = 0; i < nfix; i++) if (fmask[i] & mask) { list[n] = i; end_of_step_every[n++] = fix[i]->nevery; } } /* ---------------------------------------------------------------------- create list of fix indices for thermo energy fixes only added to list if fix has THERMO_ENERGY mask and its thermo_energy flag was set via fix_modify ------------------------------------------------------------------------- */ void Modify::list_init_thermo_energy(int mask, int &n, int *&list) { delete [] list; n = 0; for (int i = 0; i < nfix; i++) if (fmask[i] & mask && fix[i]->thermo_energy) n++; list = new int[n]; n = 0; for (int i = 0; i < nfix; i++) if (fmask[i] & mask && fix[i]->thermo_energy) list[n++] = i; } /* ---------------------------------------------------------------------- create list of compute indices for computes which store invocation times ------------------------------------------------------------------------- */ void Modify::list_init_compute() { delete [] list_timeflag; n_timeflag = 0; for (int i = 0; i < ncompute; i++) if (compute[i]->timeflag) n_timeflag++; list_timeflag = new int[n_timeflag]; n_timeflag = 0; for (int i = 0; i < ncompute; i++) if (compute[i]->timeflag) list_timeflag[n_timeflag++] = i; } /* ---------------------------------------------------------------------- return # of bytes of allocated memory from all fixes ------------------------------------------------------------------------- */ bigint Modify::memory_usage() { bigint bytes = 0; for (int i = 0; i < nfix; i++) bytes += static_cast (fix[i]->memory_usage()); for (int i = 0; i < ncompute; i++) bytes += static_cast (compute[i]->memory_usage()); return bytes; } diff --git a/src/modify.h b/src/modify.h index 422c77d2f..ba7101a93 100644 --- a/src/modify.h +++ b/src/modify.h @@ -1,219 +1,219 @@ /* -*- c++ -*- ---------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ #ifndef LMP_MODIFY_H #define LMP_MODIFY_H #include "stdio.h" #include "pointers.h" #include #include namespace LAMMPS_NS { class Modify : protected Pointers { public: int nfix,maxfix; int n_initial_integrate,n_post_integrate,n_pre_exchange,n_pre_neighbor; int n_pre_force,n_post_force; int n_final_integrate,n_end_of_step,n_thermo_energy; int n_initial_integrate_respa,n_post_integrate_respa; int n_pre_force_respa,n_post_force_respa,n_final_integrate_respa; int n_min_pre_exchange,n_min_pre_neighbor; int n_min_pre_force,n_min_post_force,n_min_energy; int restart_pbc_any; // 1 if any fix sets restart_pbc int nfix_restart_global; // stored fix global info from restart file int nfix_restart_peratom; // stored fix peratom info from restart file class Fix **fix; // list of fixes int *fmask; // bit mask for when each fix is applied int ncompute,maxcompute; // list of computes class Compute **compute; Modify(class LAMMPS *); virtual ~Modify(); virtual void init(); virtual void setup(int); virtual void setup_pre_exchange(); virtual void setup_pre_neighbor(); virtual void setup_pre_force(int); virtual void initial_integrate(int); virtual void post_integrate(); virtual void pre_exchange(); virtual void pre_neighbor(); virtual void pre_force(int); virtual void post_force(int); virtual void final_integrate(); virtual void end_of_step(); virtual double thermo_energy(); virtual void post_run(); virtual void setup_pre_force_respa(int, int); virtual void initial_integrate_respa(int, int, int); virtual void post_integrate_respa(int, int); virtual void pre_force_respa(int, int, int); virtual void post_force_respa(int, int, int); virtual void final_integrate_respa(int, int); virtual void min_pre_exchange(); virtual void min_pre_neighbor(); virtual void min_pre_force(int); virtual void min_post_force(int); virtual double min_energy(double *); virtual void min_store(); virtual void min_step(double, double *); virtual void min_clearstore(); virtual void min_pushstore(); virtual void min_popstore(); virtual double max_alpha(double *); virtual int min_dof(); virtual int min_reset_ref(); - void add_fix(int, char **, char *suffix = NULL); + void add_fix(int, char **, int trysuffix=0); void modify_fix(int, char **); void delete_fix(const char *); int find_fix(const char *); - void add_compute(int, char **, char *suffix = NULL); + void add_compute(int, char **, int trysuffix=0); void modify_compute(int, char **); void delete_compute(const char *); int find_compute(const char *); void clearstep_compute(); void addstep_compute(bigint); void addstep_compute_all(bigint); void write_restart(FILE *); int read_restart(FILE *); void restart_deallocate(); bigint memory_usage(); protected: // lists of fixes to apply at different stages of timestep int *list_initial_integrate,*list_post_integrate; int *list_pre_exchange,*list_pre_neighbor; int *list_pre_force,*list_post_force; int *list_final_integrate,*list_end_of_step,*list_thermo_energy; int *list_initial_integrate_respa,*list_post_integrate_respa; int *list_pre_force_respa,*list_post_force_respa; int *list_final_integrate_respa; int *list_min_pre_exchange,*list_min_pre_neighbor; int *list_min_pre_force,*list_min_post_force; int *list_min_energy; int *end_of_step_every; int n_timeflag; // list of computes that store time invocation int *list_timeflag; char **id_restart_global; // stored fix global info char **style_restart_global; // from read-in restart file char **state_restart_global; char **id_restart_peratom; // stored fix peratom info char **style_restart_peratom; // from read-in restart file int *index_restart_peratom; int index_permanent; // fix/compute index returned to library call void list_init(int, int &, int *&); void list_init_end_of_step(int, int &, int *&); void list_init_thermo_energy(int, int &, int *&); void list_init_compute(); private: typedef Compute *(*ComputeCreator)(LAMMPS *, int, char **); std::map *compute_map; typedef Fix *(*FixCreator)(LAMMPS *, int, char **); std::map *fix_map; template static Compute *compute_creator(LAMMPS *, int, char **); template static Fix *fix_creator(LAMMPS *, int, char **); }; } #endif /* ERROR/WARNING messages: W: One or more atoms are time integrated more than once This is probably an error since you typically do not want to advance the positions or velocities of an atom more than once per timestep. E: Illegal ... command Self-explanatory. Check the input script syntax and compare to the documentation for the command. You can use -echo screen as a command-line option when running LAMMPS to see the offending line. E: Fix command before simulation box is defined The fix command cannot be used before a read_data, read_restart, or create_box command. E: Could not find fix group ID A group ID used in the fix command does not exist. E: Replacing a fix, but new style != old style A fix ID can be used a 2nd time, but only if the style matches the previous fix. In this case it is assumed you with to reset a fix's parameters. This error may mean you are mistakenly re-using a fix ID when you do not intend to. W: Replacing a fix, but new group != old group The ID and style of a fix match for a fix you are changing with a fix command, but the new group you are specifying does not match the old group. E: Invalid fix style The choice of fix style is unknown. E: Could not find fix_modify ID A fix ID used in the fix_modify command does not exist. E: Could not find fix ID to delete Self-explanatory. E: Reuse of compute ID A compute ID cannot be used twice. E: Invalid compute style Self-explanatory. E: Could not find compute_modify ID Self-explanatory. E: Could not find compute ID to delete Self-explanatory. */ diff --git a/src/neigh_list.cpp b/src/neigh_list.cpp index dc925a6f9..0de4c0cb5 100644 --- a/src/neigh_list.cpp +++ b/src/neigh_list.cpp @@ -1,295 +1,296 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ #include "neigh_list.h" #include "atom.h" #include "comm.h" #include "update.h" #include "neighbor.h" #include "neigh_request.h" #include "my_page.h" #include "memory.h" #include "error.h" using namespace LAMMPS_NS; #define PGDELTA 1 enum{NSQ,BIN,MULTI}; // also in neighbor.cpp /* ---------------------------------------------------------------------- */ NeighList::NeighList(LAMMPS *lmp) : Pointers(lmp) { maxatoms = 0; inum = gnum = 0; ilist = NULL; numneigh = NULL; firstneigh = NULL; firstdouble = NULL; dnum = 0; last_build = -1; iskip = NULL; ijskip = NULL; listgranhistory = NULL; fix_history = NULL; respamiddle = 0; listinner = NULL; listmiddle = NULL; listfull = NULL; listcopy = NULL; listskip = NULL; maxstencil = 0; stencil = NULL; stencilxyz = NULL; maxstencil_multi = 0; nstencil_multi = NULL; stencil_multi = NULL; distsq_multi = NULL; ipage = NULL; dpage = NULL; } /* ---------------------------------------------------------------------- */ NeighList::~NeighList() { if (!listcopy) { memory->destroy(ilist); memory->destroy(numneigh); memory->sfree(firstneigh); memory->sfree(firstdouble); delete [] ipage; if (dnum) delete [] dpage; } delete [] iskip; memory->destroy(ijskip); if (maxstencil) memory->destroy(stencil); if (ghostflag) memory->destroy(stencilxyz); if (maxstencil_multi) { for (int i = 1; i <= atom->ntypes; i++) { memory->destroy(stencil_multi[i]); memory->destroy(distsq_multi[i]); } delete [] nstencil_multi; delete [] stencil_multi; delete [] distsq_multi; } } /* ---------------------------------------------------------------------- */ void NeighList::setup_pages(int pgsize_caller, int oneatom_caller, int dnum_caller) { pgsize = pgsize_caller; oneatom = oneatom_caller; dnum = dnum_caller; int nmypage = comm->nthreads; ipage = new MyPage[nmypage]; for (int i = 0; i < nmypage; i++) ipage[i].init(oneatom,pgsize,PGDELTA); if (dnum) { dpage = new MyPage[nmypage]; for (int i = 0; i < nmypage; i++) dpage[i].init(dnum*oneatom,dnum*pgsize,PGDELTA); } else dpage = NULL; } /* ---------------------------------------------------------------------- grow atom arrays to allow for nmax atoms triggered by more atoms on a processor caller knows if this list stores neighs of local atoms or local+ghost ------------------------------------------------------------------------- */ void NeighList::grow(int nmax) { // skip if this list is already long enough to store nmax atoms if (nmax <= maxatoms) return; maxatoms = nmax; memory->destroy(ilist); memory->destroy(numneigh); memory->sfree(firstneigh); memory->sfree(firstdouble); memory->create(ilist,maxatoms,"neighlist:ilist"); memory->create(numneigh,maxatoms,"neighlist:numneigh"); firstneigh = (int **) memory->smalloc(maxatoms*sizeof(int *), "neighlist:firstneigh"); if (dnum) firstdouble = (double **) memory->smalloc(maxatoms*sizeof(double *), "neighlist:firstdouble"); } /* ---------------------------------------------------------------------- insure stencils are large enough for smax bins style = BIN or MULTI ------------------------------------------------------------------------- */ void NeighList::stencil_allocate(int smax, int style) { int i; if (style == BIN) { if (smax > maxstencil) { maxstencil = smax; memory->destroy(stencil); memory->create(stencil,maxstencil,"neighlist:stencil"); if (ghostflag) { memory->destroy(stencilxyz); memory->create(stencilxyz,maxstencil,3,"neighlist:stencilxyz"); } } } else { int n = atom->ntypes; if (maxstencil_multi == 0) { nstencil_multi = new int[n+1]; stencil_multi = new int*[n+1]; distsq_multi = new double*[n+1]; for (i = 1; i <= n; i++) { nstencil_multi[i] = 0; stencil_multi[i] = NULL; distsq_multi[i] = NULL; } } if (smax > maxstencil_multi) { maxstencil_multi = smax; for (i = 1; i <= n; i++) { memory->destroy(stencil_multi[i]); memory->destroy(distsq_multi[i]); memory->create(stencil_multi[i],maxstencil_multi, "neighlist:stencil_multi"); memory->create(distsq_multi[i],maxstencil_multi, "neighlist:distsq_multi"); } } } } /* ---------------------------------------------------------------------- copy skip info from request rq into list's iskip,ijskip ------------------------------------------------------------------------- */ void NeighList::copy_skip_info(int *rq_iskip, int **rq_ijskip) { int ntypes = atom->ntypes; iskip = new int[ntypes+1]; memory->create(ijskip,ntypes+1,ntypes+1,"neigh_list:ijskip"); int i,j; for (i = 1; i <= ntypes; i++) iskip[i] = rq_iskip[i]; for (i = 1; i <= ntypes; i++) for (j = 1; j <= ntypes; j++) ijskip[i][j] = rq_ijskip[i][j]; } /* ---------------------------------------------------------------------- print attributes of this list and associated request ------------------------------------------------------------------------- */ void NeighList::print_attributes() { if (comm->me != 0) return; NeighRequest *rq = neighbor->requests[index]; printf("Neighbor list/request %d:\n",index); printf(" %d = build flag\n",buildflag); printf(" %d = grow flag\n",growflag); printf(" %d = stencil flag\n",stencilflag); printf(" %d = ghost flag\n",ghostflag); printf("\n"); printf(" %d = pair\n",rq->pair); printf(" %d = fix\n",rq->fix); printf(" %d = compute\n",rq->compute); printf(" %d = command\n",rq->command); printf("\n"); printf(" %d = half\n",rq->half); printf(" %d = full\n",rq->full); printf(" %d = gran\n",rq->gran); printf(" %d = granhistory\n",rq->granhistory); printf(" %d = respainner\n",rq->respainner); printf(" %d = respamiddle\n",rq->respamiddle); printf(" %d = respaouter\n",rq->respaouter); printf(" %d = half_from_full\n",rq->half_from_full); printf("\n"); printf(" %d = occasional\n",rq->occasional); printf(" %d = dnum\n",rq->dnum); printf(" %d = omp\n",rq->omp); + printf(" %d = intel\n",rq->intel); printf(" %d = ghost\n",rq->ghost); printf(" %d = cudable\n",rq->cudable); printf(" %d = omp\n",rq->omp); printf(" %d = copy\n",rq->copy); printf(" %d = skip\n",rq->skip); printf(" %d = otherlist\n",rq->otherlist); printf(" %p = listskip\n",listskip); printf("\n"); } /* ---------------------------------------------------------------------- return # of bytes of allocated memory if growflag = 0, maxatoms & maxpage will also be 0 if stencilflag = 0, maxstencil * maxstencil_multi will also be 0 ------------------------------------------------------------------------- */ bigint NeighList::memory_usage() { bigint bytes = 0; bytes += memory->usage(ilist,maxatoms); bytes += memory->usage(numneigh,maxatoms); bytes += maxatoms * sizeof(int *); int nmypage = comm->nthreads; if (ipage) { for (int i = 0; i < nmypage; i++) bytes += ipage[i].size(); } if (dnum && dpage) { for (int i = 0; i < nmypage; i++) { bytes += maxatoms * sizeof(double *); bytes += dpage[i].size(); } } if (maxstencil) bytes += memory->usage(stencil,maxstencil); if (ghostflag) bytes += memory->usage(stencilxyz,maxstencil,3); if (maxstencil_multi) { bytes += memory->usage(stencil_multi,atom->ntypes,maxstencil_multi); bytes += memory->usage(distsq_multi,atom->ntypes,maxstencil_multi); } return bytes; } diff --git a/src/neigh_request.cpp b/src/neigh_request.cpp index 643d11b3b..95ddc0151 100644 --- a/src/neigh_request.cpp +++ b/src/neigh_request.cpp @@ -1,208 +1,212 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ #include "neigh_request.h" #include "atom.h" #include "memory.h" using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ NeighRequest::NeighRequest(LAMMPS *lmp) : Pointers(lmp) { // default ID = 0 id = 0; unprocessed = 1; // default is pair request pair = 1; fix = compute = command = 0; // default is half neighbor list half = 1; full = 0; full_cluster = 0; gran = granhistory = 0; respainner = respamiddle = respaouter = 0; half_from_full = 0; // default is every reneighboring // default is use newton_pair setting in force // default is encode special bond flags // default is no auxiliary floating point values // default is no neighbors of ghosts // default is no CUDA neighbor list build // default is no multi-threaded neighbor list build // default is no Kokkos neighbor list build occasional = 0; newton = 0; special = 1; dnum = 0; ghost = 0; cudable = 0; omp = 0; + intel = 0; kokkos_host = kokkos_device = 0; // default is no copy or skip copy = 0; skip = 0; iskip = NULL; ijskip = NULL; otherlist = -1; } /* ---------------------------------------------------------------------- */ NeighRequest::~NeighRequest() { delete [] iskip; memory->destroy(ijskip); } /* ---------------------------------------------------------------------- archive request params that Neighbor may change after call to identical() ------------------------------------------------------------------------- */ void NeighRequest::archive() { half_original = half; half_from_full_original = half_from_full; copy_original = copy; otherlist_original = otherlist; } /* ---------------------------------------------------------------------- compare this request to other request identical means all params set by requester are the same compare to original values in other if Neighbor may have changed them return 1 if identical, 0 if not ------------------------------------------------------------------------- */ int NeighRequest::identical(NeighRequest *other) { int same = 1; // set same = 0 if old list was never processed if (other->unprocessed) same = 0; if (requestor != other->requestor) same = 0; if (id != other->id) same = 0; if (pair != other->pair) same = 0; if (fix != other->fix) same = 0; if (compute != other->compute) same = 0; if (command != other->command) same = 0; if (half != other->half_original) same = 0; if (full != other->full) same = 0; if (gran != other->gran) same = 0; if (granhistory != other->granhistory) same = 0; if (respainner != other->respainner) same = 0; if (respamiddle != other->respamiddle) same = 0; if (respaouter != other->respaouter) same = 0; if (half_from_full != other->half_from_full_original) same = 0; if (newton != other->newton) same = 0; if (occasional != other->occasional) same = 0; if (special != other->special) same = 0; if (dnum != other->dnum) same = 0; if (ghost != other->ghost) same = 0; if (cudable != other->cudable) same = 0; if (omp != other->omp) same = 0; + if (intel != other->intel) same = 0; if (copy != other->copy_original) same = 0; if (same_skip(other) == 0) same = 0; if (otherlist != other->otherlist_original) same = 0; return same; } /* ---------------------------------------------------------------------- compare kind of this request to other request return 1 if same, 0 if different ------------------------------------------------------------------------- */ int NeighRequest::same_kind(NeighRequest *other) { int same = 1; if (half != other->half) same = 0; if (full != other->full) same = 0; if (gran != other->gran) same = 0; if (granhistory != other->granhistory) same = 0; if (respainner != other->respainner) same = 0; if (respamiddle != other->respamiddle) same = 0; if (respaouter != other->respaouter) same = 0; if (half_from_full != other->half_from_full) same = 0; if (newton != other->newton) same = 0; if (ghost != other->ghost) same = 0; if (cudable != other->cudable) same = 0; if (omp != other->omp) same = 0; + if (intel != other->intel) same = 0; return same; } /* ---------------------------------------------------------------------- compare skip attributes of this request to other request return 1 if same, 0 if different ------------------------------------------------------------------------- */ int NeighRequest::same_skip(NeighRequest *other) { int i,j; int same = 1; if (skip != other->skip) same = 0; if (skip && other->skip) { int ntypes = atom->ntypes; for (i = 1; i <= ntypes; i++) if (iskip[i] != other->iskip[i]) same = 0; for (i = 1; i <= ntypes; i++) for (j = 1; j <= ntypes; j++) if (ijskip[i][j] != other->ijskip[i][j]) same = 0; } return same; } /* ---------------------------------------------------------------------- set kind and other values of this request to that of other request ------------------------------------------------------------------------- */ void NeighRequest::copy_request(NeighRequest *other) { half = 0; if (other->half) half = 1; if (other->full) full = 1; if (other->gran) gran = 1; if (other->granhistory) granhistory = 1; if (other->respainner) respainner = 1; if (other->respamiddle) respamiddle = 1; if (other->respaouter) respaouter = 1; if (other->half_from_full) half_from_full = 1; newton = other->newton; dnum = other->dnum; ghost = other->ghost; cudable = other->cudable; omp = other->omp; + intel = other->intel; } diff --git a/src/neigh_request.h b/src/neigh_request.h index 769d5354b..41fa951fe 100644 --- a/src/neigh_request.h +++ b/src/neigh_request.h @@ -1,123 +1,124 @@ /* -*- c++ -*- ---------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ #ifndef LMP_NEIGH_REQUEST_H #define LMP_NEIGH_REQUEST_H #include "pointers.h" namespace LAMMPS_NS { class NeighRequest : protected Pointers { public: void *requestor; // class that made request int id; // ID of request // used to track multiple requests from one class int unprocessed; // 1 when first requested // 0 after processed by Neighbor class // which class is requesting the list, one flag is 1, others are 0 int pair; // set by default int fix; int compute; int command; // kind of list requested, one flag is 1, others are 0 // set by requesting class int half; // 1 if half neigh list (set by default) int full; // 1 if full neigh list int full_cluster; // only used by Kokkos pair styles int gran; // 1 if granular list int granhistory; // 1 if granular history list int respainner; // 1 if a rRESPA inner list int respamiddle; // 1 if a rRESPA middle list int respaouter; // 1 if a rRESPA outer list int half_from_full; // 1 if half list computed from previous full list // 0 if needed every reneighboring during run // 1 if occasionally needed by a fix, compute, etc // set by requesting class int occasional; // 0 if use force::newton_pair setting // 1 if override with pair newton on // 2 if override with pair newton off int newton; // 0 if user of list wants no encoding of special bond flags and all neighs // 1 if user of list wants special bond flags encoded, set by default int special; // number of auxiliary floating point values to store, 0 if none // set by requesting class int dnum; // 1 if also need neighbors of ghosts int ghost; // 1 if neighbor list build will be done on GPU int cudable; - // 1 if using multi-threaded neighbor list build + // 1 if using multi-threaded neighbor list build for USER-OMP or USER-INTEL int omp; + int intel; // 1 if using Kokkos neighbor build int kokkos_host; int kokkos_device; // set by neighbor and pair_hybrid after all requests are made // these settings do not change kind value int copy; // 1 if this list copied from another list int skip; // 1 if this list skips atom types from another list int *iskip; // iskip[i] if atoms of type I are not in list int **ijskip; // ijskip[i][j] if pairs of type I,J are not in list int otherlist; // index of other list to copy or skip from // original params by requester // stored to compare against in identical() in case Neighbor changes them int half_original; int half_from_full_original; int copy_original; int otherlist_original; // methods NeighRequest(class LAMMPS *); ~NeighRequest(); void archive(); int identical(NeighRequest *); int same_kind(NeighRequest *); int same_skip(NeighRequest *); void copy_request(NeighRequest *); }; } #endif diff --git a/src/neighbor.cpp b/src/neighbor.cpp index 705887ba0..28c051313 100644 --- a/src/neighbor.cpp +++ b/src/neighbor.cpp @@ -1,2045 +1,2057 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing author (triclinic and multi-neigh) : Pieter in 't Veld (SNL) ------------------------------------------------------------------------- */ #include "lmptype.h" #include "mpi.h" #include "math.h" #include "stdlib.h" #include "string.h" #include "neighbor.h" #include "neigh_list.h" #include "neigh_request.h" #include "atom.h" #include "atom_vec.h" #include "comm.h" #include "force.h" #include "pair.h" #include "domain.h" #include "group.h" #include "modify.h" #include "fix.h" #include "compute.h" #include "update.h" #include "respa.h" #include "output.h" #include "citeme.h" #include "memory.h" #include "error.h" using namespace LAMMPS_NS; #define RQDELTA 1 #define EXDELTA 1 #define LB_FACTOR 1.5 #define SMALL 1.0e-6 #define BIG 1.0e20 #define CUT2BIN_RATIO 100 enum{NSQ,BIN,MULTI}; // also in neigh_list.cpp static const char cite_neigh_multi[] = "neighbor multi command:\n\n" "@Article{Intveld08,\n" " author = {P.{\\,}J.~in{\\,}'t~Veld and S.{\\,}J.~Plimpton" " and G.{\\,}S.~Grest},\n" " title = {Accurate and Efficient Methods for Modeling Colloidal\n" " Mixtures in an Explicit Solvent using Molecular Dynamics},\n" " journal = {Comp.~Phys.~Comm.},\n" " year = 2008,\n" " volume = 179,\n" " pages = {320--329}\n" "}\n\n"; //#define NEIGH_LIST_DEBUG 1 /* ---------------------------------------------------------------------- */ Neighbor::Neighbor(LAMMPS *lmp) : Pointers(lmp) { MPI_Comm_rank(world,&me); MPI_Comm_size(world,&nprocs); style = BIN; every = 1; delay = 10; dist_check = 1; pgsize = 100000; oneatom = 2000; binsizeflag = 0; build_once = 0; cluster_check = 0; binatomflag = 1; cutneighsq = NULL; cutneighghostsq = NULL; cuttype = NULL; cuttypesq = NULL; fixchecklist = NULL; // coords at last neighboring maxhold = 0; xhold = NULL; // binning maxhead = 0; binhead = NULL; maxbin = 0; bins = NULL; // pair exclusion list info includegroup = 0; nex_type = maxex_type = 0; ex1_type = ex2_type = NULL; ex_type = NULL; nex_group = maxex_group = 0; ex1_group = ex2_group = ex1_bit = ex2_bit = NULL; nex_mol = maxex_mol = 0; ex_mol_group = ex_mol_bit = NULL; // pair lists maxatom = 0; nblist = nglist = nslist = 0; nlist = 0; lists = NULL; pair_build = NULL; stencil_create = NULL; blist = glist = slist = NULL; anyghostlist = 0; nrequest = maxrequest = 0; requests = NULL; old_style = BIN; old_triclinic = 0; old_pgsize = pgsize; old_oneatom = oneatom; old_nrequest = 0; old_requests = NULL; // bond lists maxbond = 0; bondlist = NULL; maxangle = 0; anglelist = NULL; maxdihedral = 0; dihedrallist = NULL; maximproper = 0; improperlist = NULL; } /* ---------------------------------------------------------------------- */ Neighbor::~Neighbor() { memory->destroy(cutneighsq); memory->destroy(cutneighghostsq); delete [] cuttype; delete [] cuttypesq; delete [] fixchecklist; memory->destroy(xhold); memory->destroy(binhead); memory->destroy(bins); memory->destroy(ex1_type); memory->destroy(ex2_type); memory->destroy(ex_type); memory->destroy(ex1_group); memory->destroy(ex2_group); delete [] ex1_bit; delete [] ex2_bit; memory->destroy(ex_mol_group); delete [] ex_mol_bit; for (int i = 0; i < nlist; i++) delete lists[i]; delete [] lists; delete [] pair_build; delete [] stencil_create; delete [] blist; delete [] glist; delete [] slist; for (int i = 0; i < nrequest; i++) delete requests[i]; memory->sfree(requests); for (int i = 0; i < old_nrequest; i++) delete old_requests[i]; memory->sfree(old_requests); memory->destroy(bondlist); memory->destroy(anglelist); memory->destroy(dihedrallist); memory->destroy(improperlist); } /* ---------------------------------------------------------------------- */ void Neighbor::init() { int i,j,m,n; ncalls = ndanger = 0; dimension = domain->dimension; triclinic = domain->triclinic; newton_pair = force->newton_pair; // error check if (delay > 0 && (delay % every) != 0) error->all(FLERR,"Neighbor delay must be 0 or multiple of every setting"); if (pgsize < 10*oneatom) error->all(FLERR,"Neighbor page size must be >= 10x the one atom setting"); // ------------------------------------------------------------------ // settings // bbox lo/hi = bounding box of entire domain, stored by Domain if (triclinic == 0) { bboxlo = domain->boxlo; bboxhi = domain->boxhi; } else { bboxlo = domain->boxlo_bound; bboxhi = domain->boxhi_bound; } // set neighbor cutoffs (force cutoff + skin) // trigger determines when atoms migrate and neighbor lists are rebuilt // needs to be non-zero for migration distance check // even if pair = NULL and no neighbor lists are used // cutneigh = force cutoff + skin if cutforce > 0, else cutneigh = 0 // cutneighghost = pair cutghost if it requests it, else same as cutneigh triggersq = 0.25*skin*skin; boxcheck = 0; if (domain->box_change && (domain->xperiodic || domain->yperiodic || (dimension == 3 && domain->zperiodic))) boxcheck = 1; n = atom->ntypes; if (cutneighsq == NULL) { if (lmp->kokkos) init_cutneighsq_kokkos(n); else memory->create(cutneighsq,n+1,n+1,"neigh:cutneighsq"); memory->create(cutneighghostsq,n+1,n+1,"neigh:cutneighghostsq"); cuttype = new double[n+1]; cuttypesq = new double[n+1]; } double cutoff,delta,cut; cutneighmin = BIG; cutneighmax = 0.0; for (i = 1; i <= n; i++) { cuttype[i] = cuttypesq[i] = 0.0; for (j = 1; j <= n; j++) { if (force->pair) cutoff = sqrt(force->pair->cutsq[i][j]); else cutoff = 0.0; if (cutoff > 0.0) delta = skin; else delta = 0.0; cut = cutoff + delta; cutneighsq[i][j] = cut*cut; cuttype[i] = MAX(cuttype[i],cut); cuttypesq[i] = MAX(cuttypesq[i],cut*cut); cutneighmin = MIN(cutneighmin,cut); cutneighmax = MAX(cutneighmax,cut); if (force->pair && force->pair->ghostneigh) { cut = force->pair->cutghost[i][j] + skin; cutneighghostsq[i][j] = cut*cut; } else cutneighghostsq[i][j] = cut*cut; } } cutneighmaxsq = cutneighmax * cutneighmax; // check other classes that can induce reneighboring in decide() // don't check if build_once is set restart_check = 0; if (output->restart_flag) restart_check = 1; delete [] fixchecklist; fixchecklist = NULL; fixchecklist = new int[modify->nfix]; fix_check = 0; for (i = 0; i < modify->nfix; i++) if (modify->fix[i]->force_reneighbor) fixchecklist[fix_check++] = i; must_check = 0; if (restart_check || fix_check) must_check = 1; if (build_once) must_check = 0; // set special_flag for 1-2, 1-3, 1-4 neighbors // flag[0] is not used, flag[1] = 1-2, flag[2] = 1-3, flag[3] = 1-4 // flag = 0 if both LJ/Coulomb special values are 0.0 // flag = 1 if both LJ/Coulomb special values are 1.0 // flag = 2 otherwise or if KSpace solver is enabled // pairwise portion of KSpace solver uses all 1-2,1-3,1-4 neighbors if (force->special_lj[1] == 0.0 && force->special_coul[1] == 0.0) special_flag[1] = 0; else if (force->special_lj[1] == 1.0 && force->special_coul[1] == 1.0) special_flag[1] = 1; else special_flag[1] = 2; if (force->special_lj[2] == 0.0 && force->special_coul[2] == 0.0) special_flag[2] = 0; else if (force->special_lj[2] == 1.0 && force->special_coul[2] == 1.0) special_flag[2] = 1; else special_flag[2] = 2; if (force->special_lj[3] == 0.0 && force->special_coul[3] == 0.0) special_flag[3] = 0; else if (force->special_lj[3] == 1.0 && force->special_coul[3] == 1.0) special_flag[3] = 1; else special_flag[3] = 2; if (force->kspace || force->pair_match("coul/wolf",0) || force->pair_match("coul/dsf",0)) special_flag[1] = special_flag[2] = special_flag[3] = 2; // maxwt = max multiplicative factor on atom indices stored in neigh list maxwt = 0; if (special_flag[1] == 2) maxwt = 2; if (special_flag[2] == 2) maxwt = 3; if (special_flag[3] == 2) maxwt = 4; // rRESPA cutoffs int respa = 0; if (update->whichflag == 1 && strstr(update->integrate_style,"respa")) { if (((Respa *) update->integrate)->level_inner >= 0) respa = 1; if (((Respa *) update->integrate)->level_middle >= 0) respa = 2; } if (respa) { double *cut_respa = ((Respa *) update->integrate)->cutoff; cut_inner_sq = (cut_respa[1] + skin) * (cut_respa[1] + skin); cut_middle_sq = (cut_respa[3] + skin) * (cut_respa[3] + skin); cut_middle_inside_sq = (cut_respa[0] - skin) * (cut_respa[0] - skin); if (cut_respa[0]-skin < 0) cut_middle_inside_sq = 0.0; } // ------------------------------------------------------------------ // xhold, bins, exclusion lists // free xhold and bins if not needed for this run if (dist_check == 0) { memory->destroy(xhold); maxhold = 0; xhold = NULL; } if (style == NSQ) { memory->destroy(bins); memory->destroy(binhead); maxbin = maxhead = 0; binhead = NULL; bins = NULL; } // 1st time allocation of xhold and bins if (dist_check) { if (maxhold == 0) { maxhold = atom->nmax; memory->create(xhold,maxhold,3,"neigh:xhold"); } } if (style != NSQ) { if (maxbin == 0) { maxbin = atom->nmax; memory->create(bins,maxbin,"bins"); } } // exclusion lists for type, group, molecule settings from neigh_modify // warn if exclusions used with KSpace solver n = atom->ntypes; if (nex_type == 0 && nex_group == 0 && nex_mol == 0) exclude = 0; else exclude = 1; if (nex_type) { memory->destroy(ex_type); memory->create(ex_type,n+1,n+1,"neigh:ex_type"); for (i = 1; i <= n; i++) for (j = 1; j <= n; j++) ex_type[i][j] = 0; for (i = 0; i < nex_type; i++) { if (ex1_type[i] <= 0 || ex1_type[i] > n || ex2_type[i] <= 0 || ex2_type[i] > n) error->all(FLERR,"Invalid atom type in neighbor exclusion list"); ex_type[ex1_type[i]][ex2_type[i]] = 1; ex_type[ex2_type[i]][ex1_type[i]] = 1; } } if (nex_group) { delete [] ex1_bit; delete [] ex2_bit; ex1_bit = new int[nex_group]; ex2_bit = new int[nex_group]; for (i = 0; i < nex_group; i++) { ex1_bit[i] = group->bitmask[ex1_group[i]]; ex2_bit[i] = group->bitmask[ex2_group[i]]; } } if (nex_mol) { delete [] ex_mol_bit; ex_mol_bit = new int[nex_mol]; for (i = 0; i < nex_mol; i++) ex_mol_bit[i] = group->bitmask[ex_mol_group[i]]; } if (exclude && force->kspace && me == 0) error->warning(FLERR,"Neighbor exclusions used with KSpace solver " "may give inconsistent Coulombic energies"); // ------------------------------------------------------------------ // pairwise lists // test if pairwise lists need to be re-created // no need to re-create if: // neigh style, triclinic, pgsize, oneatom have not changed // current requests = old requests // first archive request params for current requests // before Neighbor possibly changes them below for (i = 0; i < nrequest; i++) requests[i]->archive(); int same = 1; if (style != old_style) same = 0; if (triclinic != old_triclinic) same = 0; if (pgsize != old_pgsize) same = 0; if (oneatom != old_oneatom) same = 0; if (nrequest != old_nrequest) same = 0; else for (i = 0; i < nrequest; i++) if (requests[i]->identical(old_requests[i]) == 0) same = 0; #ifdef NEIGH_LIST_DEBUG if (comm->me == 0) printf("SAME flag %d\n",same); #endif // if old and new are not the same, create new pairwise lists if (!same) { // delete old lists and create new ones for (i = 0; i < nlist; i++) delete lists[i]; delete [] lists; delete [] pair_build; delete [] stencil_create; if (lmp->kokkos) nlist = init_lists_kokkos(); else nlist = nrequest; lists = new NeighList*[nrequest]; pair_build = new PairPtr[nrequest]; stencil_create = new StencilPtr[nrequest]; // initialize to NULL since some may be Kokkos lists for (i = 0; i < nrequest; i++) { lists[i] = NULL; pair_build[i] = NULL; stencil_create[i] = NULL; } // create individual lists, one per request // pass list ptr back to requestor (except for Command class) // wait to allocate initial pages until copy lists are detected for (i = 0; i < nrequest; i++) { if (requests[i]->kokkos_host || requests[i]->kokkos_device) continue; lists[i] = new NeighList(lmp); lists[i]->index = i; if (requests[i]->pair) { Pair *pair = (Pair *) requests[i]->requestor; pair->init_list(requests[i]->id,lists[i]); } else if (requests[i]->fix) { Fix *fix = (Fix *) requests[i]->requestor; fix->init_list(requests[i]->id,lists[i]); } else if (requests[i]->compute) { Compute *compute = (Compute *) requests[i]->requestor; compute->init_list(requests[i]->id,lists[i]); } } // detect lists that are connected to other lists // if-then-else sequence and processed flag is important // since don't want to re-process skip or copy lists further down int processed; for (i = 0; i < nrequest; i++) { if (!lists[i]) continue; processed = 0; // copy: point this list at request->otherlist, could be a skip list if (requests[i]->copy) { lists[i]->listcopy = lists[requests[i]->otherlist]; processed = 1; // skip: point this list at request->otherlist, // copy skip info from request // skip list still needs to have granhistory or respa info added below } else if (requests[i]->skip) { lists[i]->listskip = lists[requests[i]->otherlist]; lists[i]->copy_skip_info(requests[i]->iskip,requests[i]->ijskip); processed = 1; // half_from_full: point this list at full list that comes right before // will only be case if pair style requested one after other } else if (requests[i]->half_from_full) { lists[i]->listfull = lists[i-1]; processed = 1; } // granhistory: set preceeding list's listgranhistory to this list // also set preceeding list's ptr to FixShearHistory if (requests[i]->granhistory) { lists[i-1]->listgranhistory = lists[i]; for (int ifix = 0; ifix < modify->nfix; ifix++) if (strcmp(modify->fix[ifix]->style,"SHEAR_HISTORY") == 0) lists[i-1]->fix_history = (FixShearHistory *) modify->fix[ifix]; processed = 1; // respaouter: point this list at preceeding 1/2 inner/middle lists } else if (requests[i]->respaouter) { if (requests[i-1]->respainner) { lists[i]->respamiddle = 0; lists[i]->listinner = lists[i-1]; } else { lists[i]->respamiddle = 1; lists[i]->listmiddle = lists[i-1]; lists[i]->listinner = lists[i-2]; } processed = 1; } if (processed) continue; // pair and half and newton != 2: // if there is a full non-occasional non-skip list // change this list to half_from_full and point at the full list // parent could be copy list or pair or fix // could remove newton != 2 check if added half_from_full_no_newton_ghost // option in neigh_derive.cpp and below in choose_build() // this would require full list had ghost info // would be useful when reax/c used in hybrid mode, e.g. with airebo if (requests[i]->pair && requests[i]->half && requests[i]->newton != 2) { for (j = 0; j < nrequest; j++) { if (!lists[j]) continue; if (requests[j]->full && requests[j]->occasional == 0 && requests[j]->skip == 0) break; } if (j < nrequest) { requests[i]->half = 0; requests[i]->half_from_full = 1; lists[i]->listfull = lists[j]; } // fix/compute requests: // whether request is occasional or not doesn't matter // if request = half and non-skip pair half/respaouter exists, // become copy of that list if cudable flag matches // if request = full and non-skip pair full exists, // become copy of that list if cudable flag matches // if request = half and non-skip pair full exists, // become half_from_full of that list if cudable flag matches // if no matches, do nothing // fix/compute list will be built independently as needed // ok if parent is itself a copy list } else if (requests[i]->fix || requests[i]->compute) { for (j = 0; j < nrequest; j++) { if (!lists[j]) continue; if (requests[i]->half && requests[j]->pair && requests[j]->skip == 0 && requests[j]->half) break; if (requests[i]->full && requests[j]->pair && requests[j]->skip == 0 && requests[j]->full) break; if (requests[i]->gran && requests[j]->pair && requests[j]->skip == 0 && requests[j]->gran) break; if (requests[i]->half && requests[j]->pair && requests[j]->skip == 0 && requests[j]->respaouter) break; } if (j < nrequest && requests[j]->cudable != requests[i]->cudable) j = nrequest; if (j < nrequest) { requests[i]->copy = 1; requests[i]->otherlist = j; lists[i]->listcopy = lists[j]; } else { for (j = 0; j < nrequest; j++) { if (!lists[j]) continue; if (requests[i]->half && requests[j]->pair && requests[j]->skip == 0 && requests[j]->full) break; } if (j < nrequest && requests[j]->cudable != requests[i]->cudable) j = nrequest; if (j < nrequest) { requests[i]->half = 0; requests[i]->half_from_full = 1; lists[i]->listfull = lists[j]; } } } } // allocate initial pages for each list, except if listcopy set for (i = 0; i < nrequest; i++) { if (!lists[i]) continue; if (!lists[i]->listcopy) lists[i]->setup_pages(pgsize,oneatom,requests[i]->dnum); } // set ptrs to pair_build and stencil_create functions for each list // ptrs set to NULL if not set explicitly // also set cudable to 0 if any neigh list request is not cudable for (i = 0; i < nrequest; i++) { choose_build(i,requests[i]); if (style != NSQ) choose_stencil(i,requests[i]); else stencil_create[i] = NULL; if (!requests[i]->cudable) cudable = 0; } // set each list's build/grow/stencil/ghost flags based on neigh request // buildflag = 1 if its pair_build() invoked every reneighbor // growflag = 1 if it stores atom-based arrays and pages // stencilflag = 1 if it stores stencil arrays // ghostflag = 1 if it stores neighbors of ghosts // anyghostlist = 1 if any non-occasional list stores neighbors of ghosts anyghostlist = 0; for (i = 0; i < nrequest; i++) { if (lists[i]) { lists[i]->buildflag = 1; if (pair_build[i] == NULL) lists[i]->buildflag = 0; if (requests[i]->occasional) lists[i]->buildflag = 0; lists[i]->growflag = 1; if (requests[i]->copy) lists[i]->growflag = 0; lists[i]->stencilflag = 1; if (style == NSQ) lists[i]->stencilflag = 0; if (stencil_create[i] == NULL) lists[i]->stencilflag = 0; lists[i]->ghostflag = 0; if (requests[i]->ghost) lists[i]->ghostflag = 1; if (requests[i]->ghost && !requests[i]->occasional) anyghostlist = 1; } else init_list_flags1_kokkos(i); } #ifdef NEIGH_LIST_DEBUG for (i = 0; i < nrequest; i++) lists[i]->print_attributes(); #endif // allocate atom arrays for neighbor lists that store them maxatom = atom->nmax; for (i = 0; i < nrequest; i++) { if (lists[i]) { if (lists[i]->growflag) lists[i]->grow(maxatom); } else init_list_grow_kokkos(i); } // setup 3 vectors of pairwise neighbor lists // blist = lists whose pair_build() is invoked every reneighbor // glist = lists who store atom arrays which are used every reneighbor // slist = lists who store stencil arrays which are used every reneighbor // blist and glist vectors are used by neighbor::build() // slist vector is used by neighbor::setup_bins() nblist = nglist = nslist = 0; delete [] blist; delete [] glist; delete [] slist; blist = new int[nrequest]; glist = new int[nrequest]; slist = new int[nrequest]; for (i = 0; i < nrequest; i++) { if (lists[i]) { if (lists[i]->buildflag) blist[nblist++] = i; if (lists[i]->growflag && requests[i]->occasional == 0) glist[nglist++] = i; if (lists[i]->stencilflag && requests[i]->occasional == 0) slist[nslist++] = i; } else init_list_flags2_kokkos(i); } #ifdef NEIGH_LIST_DEBUG print_lists_of_lists(); #endif // reorder build vector if necessary // relevant for lists that copy/skip/half-full from parent // the derived list must appear in blist after the parent list // no occasional lists are in build vector // swap two lists within blist when dependency is mis-ordered // done when entire pass thru blist results in no swaps int done = 0; while (!done) { done = 1; for (i = 0; i < nblist; i++) { if (!lists[blist[i]]) continue; NeighList *ptr = NULL; if (lists[blist[i]]->listfull) ptr = lists[blist[i]]->listfull; if (lists[blist[i]]->listcopy) ptr = lists[blist[i]]->listcopy; if (lists[blist[i]]->listskip) ptr = lists[blist[i]]->listskip; if (ptr == NULL) continue; for (m = 0; m < nrequest; m++) if (ptr == lists[m]) break; for (j = 0; j < nblist; j++) if (m == blist[j]) break; if (j < i) continue; int tmp = blist[i]; blist[i] = blist[j]; blist[j] = tmp; done = 0; break; } } #ifdef NEIGH_LIST_DEBUG print_lists_of_lists(); #endif } // mark all current requests as processed // delete old requests // copy current requests and style to old for next run for (i = 0; i < nrequest; i++) requests[i]->unprocessed = 0; for (i = 0; i < old_nrequest; i++) delete old_requests[i]; memory->sfree(old_requests); old_nrequest = nrequest; old_requests = requests; nrequest = maxrequest = 0; requests = NULL; old_style = style; old_triclinic = triclinic; // ------------------------------------------------------------------ // topology lists // 1st time allocation of topology lists if (atom->molecular && atom->nbonds && maxbond == 0) { if (nprocs == 1) maxbond = atom->nbonds; else maxbond = static_cast (LB_FACTOR * atom->nbonds / nprocs); memory->create(bondlist,maxbond,3,"neigh:bondlist"); } if (atom->molecular && atom->nangles && maxangle == 0) { if (nprocs == 1) maxangle = atom->nangles; else maxangle = static_cast (LB_FACTOR * atom->nangles / nprocs); memory->create(anglelist,maxangle,4,"neigh:anglelist"); } if (atom->molecular && atom->ndihedrals && maxdihedral == 0) { if (nprocs == 1) maxdihedral = atom->ndihedrals; else maxdihedral = static_cast (LB_FACTOR * atom->ndihedrals / nprocs); memory->create(dihedrallist,maxdihedral,5,"neigh:dihedrallist"); } if (atom->molecular && atom->nimpropers && maximproper == 0) { if (nprocs == 1) maximproper = atom->nimpropers; else maximproper = static_cast (LB_FACTOR * atom->nimpropers / nprocs); memory->create(improperlist,maximproper,5,"neigh:improperlist"); } // set flags that determine which topology neighboring routines to use // bonds,etc can only be broken for atom->molecular = 1, not 2 // SHAKE sets bonds and angles negative // bond_quartic sets bonds to 0 // delete_bonds sets all interactions negative int bond_off = 0; int angle_off = 0; for (i = 0; i < modify->nfix; i++) if (strcmp(modify->fix[i]->style,"shake") == 0) bond_off = angle_off = 1; if (force->bond && force->bond_match("quartic")) bond_off = 1; if (atom->avec->bonds_allow && atom->molecular == 1) { for (i = 0; i < atom->nlocal; i++) { if (bond_off) break; for (m = 0; m < atom->num_bond[i]; m++) if (atom->bond_type[i][m] <= 0) bond_off = 1; } } if (atom->avec->angles_allow && atom->molecular == 1) { for (i = 0; i < atom->nlocal; i++) { if (angle_off) break; for (m = 0; m < atom->num_angle[i]; m++) if (atom->angle_type[i][m] <= 0) angle_off = 1; } } int dihedral_off = 0; if (atom->avec->dihedrals_allow && atom->molecular == 1) { for (i = 0; i < atom->nlocal; i++) { if (dihedral_off) break; for (m = 0; m < atom->num_dihedral[i]; m++) if (atom->dihedral_type[i][m] <= 0) dihedral_off = 1; } } int improper_off = 0; if (atom->avec->impropers_allow && atom->molecular == 1) { for (i = 0; i < atom->nlocal; i++) { if (improper_off) break; for (m = 0; m < atom->num_improper[i]; m++) if (atom->improper_type[i][m] <= 0) improper_off = 1; } } // sync on/off settings across all procs int on_or_off = bond_off; MPI_Allreduce(&on_or_off,&bond_off,1,MPI_INT,MPI_MAX,world); on_or_off = angle_off; MPI_Allreduce(&on_or_off,&angle_off,1,MPI_INT,MPI_MAX,world); on_or_off = dihedral_off; MPI_Allreduce(&on_or_off,&dihedral_off,1,MPI_INT,MPI_MAX,world); on_or_off = improper_off; MPI_Allreduce(&on_or_off,&improper_off,1,MPI_INT,MPI_MAX,world); // set ptrs to topology build functions if (atom->molecular == 2) bond_build = &Neighbor::bond_template; else if (bond_off) bond_build = &Neighbor::bond_partial; else bond_build = &Neighbor::bond_all; if (atom->molecular == 2) angle_build = &Neighbor::angle_template; else if (angle_off) angle_build = &Neighbor::angle_partial; else angle_build = &Neighbor::angle_all; if (atom->molecular == 2) dihedral_build = &Neighbor::dihedral_template; else if (dihedral_off) dihedral_build = &Neighbor::dihedral_partial; else dihedral_build = &Neighbor::dihedral_all; if (atom->molecular == 2) improper_build = &Neighbor::improper_template; else if (improper_off) improper_build = &Neighbor::improper_partial; else improper_build = &Neighbor::improper_all; // set topology neighbor list counts to 0 // in case all are turned off but potential is still defined nbondlist = nanglelist = ndihedrallist = nimproperlist = 0; } /* ---------------------------------------------------------------------- */ int Neighbor::request(void *requestor) { if (nrequest == maxrequest) { maxrequest += RQDELTA; requests = (NeighRequest **) memory->srealloc(requests,maxrequest*sizeof(NeighRequest *), "neighbor:requests"); } requests[nrequest] = new NeighRequest(lmp); requests[nrequest]->requestor = requestor; nrequest++; return nrequest-1; } /* ---------------------------------------------------------------------- determine which pair_build function each neigh list needs based on settings of neigh request copy -> copy_from function skip -> granular function if gran with granhistory, respa function if respaouter, skip_from function for everything else half_from_full, half, full, gran, respaouter -> choose by newton and rq->newton and tri settings style NSQ options = newton off, newton on style BIN options = newton off, newton on and not tri, newton on and tri stlye MULTI options = same options as BIN if none of these, ptr = NULL since pair_build is not invoked for this list use "else if" b/c skip,copy can be set in addition to half,full,etc ------------------------------------------------------------------------- */ void Neighbor::choose_build(int index, NeighRequest *rq) { PairPtr pb = NULL; - if (rq->omp == 0) { + if (rq->omp == 0 && rq->intel == 0) { if (rq->copy) pb = &Neighbor::copy_from; else if (rq->skip) { if (rq->gran && lists[index]->listgranhistory) pb = &Neighbor::skip_from_granular; else if (rq->respaouter) pb = &Neighbor::skip_from_respa; else pb = &Neighbor::skip_from; } else if (rq->half_from_full) { if (rq->newton == 0) { if (newton_pair == 0) pb = &Neighbor::half_from_full_no_newton; else if (newton_pair == 1) pb = &Neighbor::half_from_full_newton; } else if (rq->newton == 1) { pb = &Neighbor::half_from_full_newton; } else if (rq->newton == 2) { pb = &Neighbor::half_from_full_no_newton; } } else if (rq->half) { if (style == NSQ) { if (rq->newton == 0) { if (newton_pair == 0) { if (rq->ghost == 0) pb = &Neighbor::half_nsq_no_newton; else if (includegroup) error->all(FLERR,"Neighbor include group not allowed " "with ghost neighbors"); else pb = &Neighbor::half_nsq_no_newton_ghost; } else if (newton_pair == 1) pb = &Neighbor::half_nsq_newton; } else if (rq->newton == 1) { pb = &Neighbor::half_nsq_newton; } else if (rq->newton == 2) { if (rq->ghost == 0) pb = &Neighbor::half_nsq_no_newton; else if (includegroup) error->all(FLERR,"Neighbor include group not allowed " "with ghost neighbors"); else pb = &Neighbor::half_nsq_no_newton_ghost; } } else if (style == BIN) { if (rq->newton == 0) { if (newton_pair == 0) { if (rq->ghost == 0) pb = &Neighbor::half_bin_no_newton; else if (includegroup) error->all(FLERR,"Neighbor include group not allowed " "with ghost neighbors"); else pb = &Neighbor::half_bin_no_newton_ghost; } else if (triclinic == 0) { pb = &Neighbor::half_bin_newton; } else if (triclinic == 1) pb = &Neighbor::half_bin_newton_tri; } else if (rq->newton == 1) { if (triclinic == 0) pb = &Neighbor::half_bin_newton; else if (triclinic == 1) pb = &Neighbor::half_bin_newton_tri; } else if (rq->newton == 2) { if (rq->ghost == 0) pb = &Neighbor::half_bin_no_newton; else if (includegroup) error->all(FLERR,"Neighbor include group not allowed " "with ghost neighbors"); else pb = &Neighbor::half_bin_no_newton_ghost; } } else if (style == MULTI) { if (rq->ghost == 1) error->all(FLERR, "Neighbor multi not yet enabled for ghost neighbors"); if (rq->newton == 0) { if (newton_pair == 0) pb = &Neighbor::half_multi_no_newton; else if (triclinic == 0) pb = &Neighbor::half_multi_newton; else if (triclinic == 1) pb = &Neighbor::half_multi_newton_tri; } else if (rq->newton == 1) { if (triclinic == 0) pb = &Neighbor::half_multi_newton; else if (triclinic == 1) pb = &Neighbor::half_multi_newton_tri; } else if (rq->newton == 2) pb = &Neighbor::half_multi_no_newton; } } else if (rq->full) { if (style == NSQ) { if (rq->ghost == 0) pb = &Neighbor::full_nsq; else if (includegroup) error->all(FLERR, "Neighbor include group not allowed with ghost neighbors"); else pb = &Neighbor::full_nsq_ghost; } else if (style == BIN) { if (rq->ghost == 0) pb = &Neighbor::full_bin; else if (includegroup) error->all(FLERR, "Neighbor include group not allowed with ghost neighbors"); else pb = &Neighbor::full_bin_ghost; } else if (style == MULTI) { if (rq->ghost == 1) error->all(FLERR, "Neighbor multi not yet enabled for ghost neighbors"); pb = &Neighbor::full_multi; } } else if (rq->gran) { if (style == NSQ) { if (newton_pair == 0) pb = &Neighbor::granular_nsq_no_newton; else if (newton_pair == 1) pb = &Neighbor::granular_nsq_newton; } else if (style == BIN) { if (newton_pair == 0) pb = &Neighbor::granular_bin_no_newton; else if (triclinic == 0) pb = &Neighbor::granular_bin_newton; else if (triclinic == 1) pb = &Neighbor::granular_bin_newton_tri; } else if (style == MULTI) error->all(FLERR,"Neighbor multi not yet enabled for granular"); } else if (rq->respaouter) { if (style == NSQ) { if (newton_pair == 0) pb = &Neighbor::respa_nsq_no_newton; else if (newton_pair == 1) pb = &Neighbor::respa_nsq_newton; } else if (style == BIN) { if (newton_pair == 0) pb = &Neighbor::respa_bin_no_newton; else if (triclinic == 0) pb = &Neighbor::respa_bin_newton; else if (triclinic == 1) pb = &Neighbor::respa_bin_newton_tri; } else if (style == MULTI) error->all(FLERR,"Neighbor multi not yet enabled for rRESPA"); } // OMP versions of build methods } else { if (rq->copy) pb = &Neighbor::copy_from; else if (rq->skip) { if (rq->gran && lists[index]->listgranhistory) pb = &Neighbor::skip_from_granular; else if (rq->respaouter) pb = &Neighbor::skip_from_respa; else pb = &Neighbor::skip_from; } else if (rq->half_from_full) { if (newton_pair == 0) pb = &Neighbor::half_from_full_no_newton_omp; else if (newton_pair == 1) pb = &Neighbor::half_from_full_newton_omp; } else if (rq->half) { if (style == NSQ) { if (rq->newton == 0) { if (newton_pair == 0) { if (rq->ghost == 0) pb = &Neighbor::half_nsq_no_newton_omp; else if (includegroup) error->all(FLERR,"Neighbor include group not allowed " "with ghost neighbors"); else pb = &Neighbor::half_nsq_no_newton_ghost_omp; } else if (newton_pair == 1) pb = &Neighbor::half_nsq_newton_omp; } else if (rq->newton == 1) { pb = &Neighbor::half_nsq_newton_omp; } else if (rq->newton == 2) { if (rq->ghost == 0) pb = &Neighbor::half_nsq_no_newton_omp; else if (includegroup) error->all(FLERR,"Neighbor include group not allowed " "with ghost neighbors"); else pb = &Neighbor::half_nsq_no_newton_ghost_omp; } } else if (style == BIN) { if (rq->newton == 0) { if (newton_pair == 0) { - if (rq->ghost == 0) pb = &Neighbor::half_bin_no_newton_omp; - else if (includegroup) + if (rq->ghost == 0) { + if (rq->intel) pb = &Neighbor::half_bin_no_newton_intel; + else pb = &Neighbor::half_bin_no_newton_omp; + } else if (includegroup) error->all(FLERR,"Neighbor include group not allowed " "with ghost neighbors"); else pb = &Neighbor::half_bin_no_newton_ghost_omp; } else if (triclinic == 0) { - pb = &Neighbor::half_bin_newton_omp; - } else if (triclinic == 1) - pb = &Neighbor::half_bin_newton_tri_omp; + if (rq->intel) pb = &Neighbor::half_bin_newton_intel; + else pb = &Neighbor::half_bin_newton_omp; + } else if (triclinic == 1) { + if (rq->intel) pb = &Neighbor::half_bin_newton_tri_intel; + else pb = &Neighbor::half_bin_newton_tri_omp; + } } else if (rq->newton == 1) { - if (triclinic == 0) pb = &Neighbor::half_bin_newton_omp; - else if (triclinic == 1) pb = &Neighbor::half_bin_newton_tri_omp; + if (triclinic == 0) { + if (rq->intel) pb = &Neighbor::half_bin_newton_intel; + else pb = &Neighbor::half_bin_newton_omp; + } else if (triclinic == 1) { + if (rq->intel) pb = &Neighbor::half_bin_newton_tri_intel; + else pb = &Neighbor::half_bin_newton_tri_omp; + } } else if (rq->newton == 2) { - if (rq->ghost == 0) pb = &Neighbor::half_bin_no_newton_omp; - else if (includegroup) + if (rq->ghost == 0) { + if (rq->intel) pb = &Neighbor::half_bin_no_newton_intel; + else pb = &Neighbor::half_bin_no_newton_omp; + } else if (includegroup) error->all(FLERR,"Neighbor include group not allowed " "with ghost neighbors"); else pb = &Neighbor::half_bin_no_newton_ghost_omp; } } else if (style == MULTI) { if (rq->ghost == 1) error->all(FLERR, "Neighbor multi not yet enabled for ghost neighbors"); if (rq->newton == 0) { if (newton_pair == 0) pb = &Neighbor::half_multi_no_newton_omp; else if (triclinic == 0) pb = &Neighbor::half_multi_newton_omp; else if (triclinic == 1) pb = &Neighbor::half_multi_newton_tri_omp; } else if (rq->newton == 1) { if (triclinic == 0) pb = &Neighbor::half_multi_newton_omp; else if (triclinic == 1) pb = &Neighbor::half_multi_newton_tri_omp; } else if (rq->newton == 2) pb = &Neighbor::half_multi_no_newton_omp; } } else if (rq->full) { if (style == NSQ) { if (rq->ghost == 0) pb = &Neighbor::full_nsq_omp; else if (includegroup) error->all(FLERR, "Neighbor include group not allowed with ghost neighbors"); else pb = &Neighbor::full_nsq_ghost_omp; } else if (style == BIN) { if (rq->ghost == 0) pb = &Neighbor::full_bin_omp; else if (includegroup) error->all(FLERR, "Neighbor include group not allowed with ghost neighbors"); else pb = &Neighbor::full_bin_ghost_omp; } else if (style == MULTI) { if (rq->ghost == 1) error->all(FLERR, "Neighbor multi not yet enabled for ghost neighbors"); pb = &Neighbor::full_multi_omp; } } else if (rq->gran) { if (style == NSQ) { if (newton_pair == 0) pb = &Neighbor::granular_nsq_no_newton_omp; else if (newton_pair == 1) pb = &Neighbor::granular_nsq_newton_omp; } else if (style == BIN) { if (newton_pair == 0) pb = &Neighbor::granular_bin_no_newton_omp; else if (triclinic == 0) pb = &Neighbor::granular_bin_newton_omp; else if (triclinic == 1) pb = &Neighbor::granular_bin_newton_tri_omp; } else if (style == MULTI) error->all(FLERR,"Neighbor multi not yet enabled for granular"); } else if (rq->respaouter) { if (style == NSQ) { if (newton_pair == 0) pb = &Neighbor::respa_nsq_no_newton_omp; else if (newton_pair == 1) pb = &Neighbor::respa_nsq_newton_omp; } else if (style == BIN) { if (newton_pair == 0) pb = &Neighbor::respa_bin_no_newton_omp; else if (triclinic == 0) pb = &Neighbor::respa_bin_newton_omp; else if (triclinic == 1) pb = &Neighbor::respa_bin_newton_tri_omp; } else if (style == MULTI) error->all(FLERR,"Neighbor multi not yet enabled for rRESPA"); } } pair_build[index] = pb; } /* ---------------------------------------------------------------------- determine which stencil_create function each neigh list needs based on settings of neigh request, only called if style != NSQ skip or copy or half_from_full -> no stencil half, gran, respaouter, full -> choose by newton and tri and dimension if none of these, ptr = NULL since this list needs no stencils use "else if" b/c skip,copy can be set in addition to half,full,etc ------------------------------------------------------------------------- */ void Neighbor::choose_stencil(int index, NeighRequest *rq) { StencilPtr sc = NULL; if (rq->skip || rq->copy || rq->half_from_full) sc = NULL; else if (rq->half || rq->gran || rq->respaouter) { if (style == BIN) { if (rq->newton == 0) { if (newton_pair == 0) { if (dimension == 2) { if (rq->ghost) sc = &Neighbor::stencil_half_ghost_bin_2d_no_newton; else sc = &Neighbor::stencil_half_bin_2d_no_newton; } else if (dimension == 3) { if (rq->ghost) sc = &Neighbor::stencil_half_ghost_bin_3d_no_newton; else sc = &Neighbor::stencil_half_bin_3d_no_newton; } } else if (triclinic == 0) { if (dimension == 2) sc = &Neighbor::stencil_half_bin_2d_newton; else if (dimension == 3) sc = &Neighbor::stencil_half_bin_3d_newton; } else if (triclinic == 1) { if (dimension == 2) sc = &Neighbor::stencil_half_bin_2d_newton_tri; else if (dimension == 3) sc = &Neighbor::stencil_half_bin_3d_newton_tri; } } else if (rq->newton == 1) { if (triclinic == 0) { if (dimension == 2) sc = &Neighbor::stencil_half_bin_2d_newton; else if (dimension == 3) sc = &Neighbor::stencil_half_bin_3d_newton; } else if (triclinic == 1) { if (dimension == 2) sc = &Neighbor::stencil_half_bin_2d_newton_tri; else if (dimension == 3) sc = &Neighbor::stencil_half_bin_3d_newton_tri; } } else if (rq->newton == 2) { if (dimension == 2) if (rq->ghost) sc = &Neighbor::stencil_half_ghost_bin_2d_no_newton; else sc = &Neighbor::stencil_half_bin_2d_no_newton; else if (dimension == 3) { if (rq->ghost) sc = &Neighbor::stencil_half_ghost_bin_3d_no_newton; else sc = &Neighbor::stencil_half_bin_3d_no_newton; } } } else if (style == MULTI) { if (rq->newton == 0) { if (newton_pair == 0) { if (dimension == 2) sc = &Neighbor::stencil_half_multi_2d_no_newton; else if (dimension == 3) sc = &Neighbor::stencil_half_multi_3d_no_newton; } else if (triclinic == 0) { if (dimension == 2) sc = &Neighbor::stencil_half_multi_2d_newton; else if (dimension == 3) sc = &Neighbor::stencil_half_multi_3d_newton; } else if (triclinic == 1) { if (dimension == 2) sc = &Neighbor::stencil_half_multi_2d_newton_tri; else if (dimension == 3) sc = &Neighbor::stencil_half_multi_3d_newton_tri; } } else if (rq->newton == 1) { if (triclinic == 0) { if (dimension == 2) sc = &Neighbor::stencil_half_multi_2d_newton; else if (dimension == 3) sc = &Neighbor::stencil_half_multi_3d_newton; } else if (triclinic == 1) { if (dimension == 2) sc = &Neighbor::stencil_half_multi_2d_newton_tri; else if (dimension == 3) sc = &Neighbor::stencil_half_multi_3d_newton_tri; } } else if (rq->newton == 2) { if (dimension == 2) sc = &Neighbor::stencil_half_multi_2d_no_newton; else if (dimension == 3) sc = &Neighbor::stencil_half_multi_3d_no_newton; } } } else if (rq->full) { if (style == BIN) { if (dimension == 2) { if (rq->ghost) sc = &Neighbor::stencil_full_ghost_bin_2d; else sc = &Neighbor::stencil_full_bin_2d; } else if (dimension == 3) { if (rq->ghost) sc = &Neighbor::stencil_full_ghost_bin_3d; else sc = &Neighbor::stencil_full_bin_3d; } } else if (style == MULTI) { if (dimension == 2) sc = &Neighbor::stencil_full_multi_2d; else if (dimension == 3) sc = &Neighbor::stencil_full_multi_3d; } } stencil_create[index] = sc; } /* ---------------------------------------------------------------------- */ void Neighbor::print_lists_of_lists() { if (comm->me == 0) { printf("Build lists = %d: ",nblist); for (int i = 0; i < nblist; i++) printf("%d ",blist[i]); printf("\n"); printf("Grow lists = %d: ",nglist); for (int i = 0; i < nglist; i++) printf("%d ",glist[i]); printf("\n"); printf("Stencil lists = %d: ",nslist); for (int i = 0; i < nslist; i++) printf("%d ",slist[i]); printf("\n"); } } /* ---------------------------------------------------------------------- */ int Neighbor::decide() { if (must_check) { bigint n = update->ntimestep; if (restart_check && n == output->next_restart) return 1; for (int i = 0; i < fix_check; i++) if (n == modify->fix[fixchecklist[i]]->next_reneighbor) return 1; } ago++; if (ago >= delay && ago % every == 0) { if (build_once) return 0; if (dist_check == 0) return 1; return check_distance(); } else return 0; } /* ---------------------------------------------------------------------- if any atom moved trigger distance (half of neighbor skin) return 1 shrink trigger distance if box size has changed conservative shrink procedure: compute distance each of 8 corners of box has moved since last reneighbor reduce skin distance by sum of 2 largest of the 8 values new trigger = 1/2 of reduced skin distance for orthogonal box, only need 2 lo/hi corners for triclinic, need all 8 corners since deformations can displace all 8 ------------------------------------------------------------------------- */ int Neighbor::check_distance() { double delx,dely,delz,rsq; double delta,deltasq,delta1,delta2; if (boxcheck) { if (triclinic == 0) { delx = bboxlo[0] - boxlo_hold[0]; dely = bboxlo[1] - boxlo_hold[1]; delz = bboxlo[2] - boxlo_hold[2]; delta1 = sqrt(delx*delx + dely*dely + delz*delz); delx = bboxhi[0] - boxhi_hold[0]; dely = bboxhi[1] - boxhi_hold[1]; delz = bboxhi[2] - boxhi_hold[2]; delta2 = sqrt(delx*delx + dely*dely + delz*delz); delta = 0.5 * (skin - (delta1+delta2)); deltasq = delta*delta; } else { domain->box_corners(); delta1 = delta2 = 0.0; for (int i = 0; i < 8; i++) { delx = corners[i][0] - corners_hold[i][0]; dely = corners[i][1] - corners_hold[i][1]; delz = corners[i][2] - corners_hold[i][2]; delta = sqrt(delx*delx + dely*dely + delz*delz); if (delta > delta1) delta1 = delta; else if (delta > delta2) delta2 = delta; } delta = 0.5 * (skin - (delta1+delta2)); deltasq = delta*delta; } } else deltasq = triggersq; double **x = atom->x; int nlocal = atom->nlocal; if (includegroup) nlocal = atom->nfirst; int flag = 0; for (int i = 0; i < nlocal; i++) { delx = x[i][0] - xhold[i][0]; dely = x[i][1] - xhold[i][1]; delz = x[i][2] - xhold[i][2]; rsq = delx*delx + dely*dely + delz*delz; if (rsq > deltasq) flag = 1; } int flagall; MPI_Allreduce(&flag,&flagall,1,MPI_INT,MPI_MAX,world); if (flagall && ago == MAX(every,delay)) ndanger++; return flagall; } /* ---------------------------------------------------------------------- build perpetuals neighbor lists called at setup and every few timesteps during run or minimization topology lists only built if topoflag = 1, USER-CUDA calls with topoflag = 0 ------------------------------------------------------------------------- */ void Neighbor::build(int topoflag) { int i; ago = 0; ncalls++; lastcall = update->ntimestep; // store current atom positions and box size if needed if (dist_check) { double **x = atom->x; int nlocal = atom->nlocal; if (includegroup) nlocal = atom->nfirst; if (nlocal > maxhold) { maxhold = atom->nmax; memory->destroy(xhold); memory->create(xhold,maxhold,3,"neigh:xhold"); } for (i = 0; i < nlocal; i++) { xhold[i][0] = x[i][0]; xhold[i][1] = x[i][1]; xhold[i][2] = x[i][2]; } if (boxcheck) { if (triclinic == 0) { boxlo_hold[0] = bboxlo[0]; boxlo_hold[1] = bboxlo[1]; boxlo_hold[2] = bboxlo[2]; boxhi_hold[0] = bboxhi[0]; boxhi_hold[1] = bboxhi[1]; boxhi_hold[2] = bboxhi[2]; } else { domain->box_corners(); corners = domain->corners; for (i = 0; i < 8; i++) { corners_hold[i][0] = corners[i][0]; corners_hold[i][1] = corners[i][1]; corners_hold[i][2] = corners[i][2]; } } } } // if any lists store neighbors of ghosts: // invoke grow() if nlocal+nghost exceeds previous list size // else only invoke grow() if nlocal exceeds previous list size // only for lists with growflag set and which are perpetual (glist) if (anyghostlist && atom->nlocal+atom->nghost > maxatom) { maxatom = atom->nmax; for (i = 0; i < nglist; i++) lists[glist[i]]->grow(maxatom); } else if (atom->nlocal > maxatom) { maxatom = atom->nmax; for (i = 0; i < nglist; i++) lists[glist[i]]->grow(maxatom); } // extend atom bin list if necessary if (style != NSQ && atom->nmax > maxbin) { maxbin = atom->nmax; memory->destroy(bins); memory->create(bins,maxbin,"bins"); } // check that using special bond flags will not overflow neigh lists if (atom->nlocal+atom->nghost > NEIGHMASK) error->one(FLERR,"Too many local+ghost atoms for neighbor list"); // invoke building of pair and molecular topology neighbor lists // only for pairwise lists with buildflag set // blist is for standard neigh lists, otherwise is a Kokkos list for (i = 0; i < nblist; i++) { if (lists[blist[i]]) (this->*pair_build[blist[i]])(lists[blist[i]]); else build_kokkos(i); } if (atom->molecular && topoflag) build_topology(); } /* ---------------------------------------------------------------------- build all topology neighbor lists every few timesteps normally built with pair lists, but USER-CUDA separates them ------------------------------------------------------------------------- */ void Neighbor::build_topology() { if (force->bond) (this->*bond_build)(); if (force->angle) (this->*angle_build)(); if (force->dihedral) (this->*dihedral_build)(); if (force->improper) (this->*improper_build)(); } /* ---------------------------------------------------------------------- build a single occasional pairwise neighbor list indexed by I called by other classes ------------------------------------------------------------------------- */ void Neighbor::build_one(int i, int preflag) { // no need to build if already built since last re-neighbor // preflag is set by fix bond/create and fix bond/swap // b/c they invoke build_one() on same step neigh list is re-built, // but before re-build, so need to use ">" instead of ">=" if (preflag) { if (lists[i]->last_build > lastcall) return; } else { if (lists[i]->last_build >= lastcall) return; } lists[i]->last_build = update->ntimestep; // update stencils and grow atom arrays as needed // only for relevant settings of stencilflag and growflag // grow atom array for this list to current size of perpetual lists if (lists[i]->stencilflag) { lists[i]->stencil_allocate(smax,style); (this->*stencil_create[i])(lists[i],sx,sy,sz); } if (lists[i]->growflag) lists[i]->grow(maxatom); // build list I, turning off atom binning // binning results from last re-neighbor should be used instead // if re-bin now, atoms may have moved outside of proc domain & bin extent, // leading to errors or even a crash binatomflag = 0; (this->*pair_build[i])(lists[i]); binatomflag = 1; } /* ---------------------------------------------------------------------- setup neighbor binning parameters bin numbering in each dimension is global: 0 = 0.0 to binsize, 1 = binsize to 2*binsize, etc nbin-1,nbin,etc = bbox-binsize to bbox, bbox to bbox+binsize, etc -1,-2,etc = -binsize to 0.0, -2*binsize to -binsize, etc code will work for any binsize since next(xyz) and stencil extend as far as necessary binsize = 1/2 of cutoff is roughly optimal for orthogonal boxes: a dim must be filled exactly by integer # of bins in periodic, procs on both sides of PBC must see same bin boundary in non-periodic, coord2bin() still assumes this by use of nbin xyz for triclinic boxes: tilted simulation box cannot contain integer # of bins stencil & neigh list built differently to account for this mbinlo = lowest global bin any of my ghost atoms could fall into mbinhi = highest global bin any of my ghost atoms could fall into mbin = number of bins I need in a dimension ------------------------------------------------------------------------- */ void Neighbor::setup_bins() { // bbox = size of bbox of entire domain // bsubbox lo/hi = bounding box of my subdomain extended by comm->cutghost // for triclinic: // bbox bounds all 8 corners of tilted box // subdomain is in lamda coords // include dimension-dependent extension via comm->cutghost // domain->bbox() converts lamda extent to box coords and computes bbox double bbox[3],bsubboxlo[3],bsubboxhi[3]; double *cutghost = comm->cutghost; if (triclinic == 0) { bsubboxlo[0] = domain->sublo[0] - cutghost[0]; bsubboxlo[1] = domain->sublo[1] - cutghost[1]; bsubboxlo[2] = domain->sublo[2] - cutghost[2]; bsubboxhi[0] = domain->subhi[0] + cutghost[0]; bsubboxhi[1] = domain->subhi[1] + cutghost[1]; bsubboxhi[2] = domain->subhi[2] + cutghost[2]; } else { double lo[3],hi[3]; lo[0] = domain->sublo_lamda[0] - cutghost[0]; lo[1] = domain->sublo_lamda[1] - cutghost[1]; lo[2] = domain->sublo_lamda[2] - cutghost[2]; hi[0] = domain->subhi_lamda[0] + cutghost[0]; hi[1] = domain->subhi_lamda[1] + cutghost[1]; hi[2] = domain->subhi_lamda[2] + cutghost[2]; domain->bbox(lo,hi,bsubboxlo,bsubboxhi); } bbox[0] = bboxhi[0] - bboxlo[0]; bbox[1] = bboxhi[1] - bboxlo[1]; bbox[2] = bboxhi[2] - bboxlo[2]; // optimal bin size is roughly 1/2 the cutoff // for BIN style, binsize = 1/2 of max neighbor cutoff // for MULTI style, binsize = 1/2 of min neighbor cutoff // special case of all cutoffs = 0.0, binsize = box size double binsize_optimal; if (binsizeflag) binsize_optimal = binsize_user; else if (style == BIN) binsize_optimal = 0.5*cutneighmax; else binsize_optimal = 0.5*cutneighmin; if (binsize_optimal == 0.0) binsize_optimal = bbox[0]; double binsizeinv = 1.0/binsize_optimal; // test for too many global bins in any dimension due to huge global domain if (bbox[0]*binsizeinv > MAXSMALLINT || bbox[1]*binsizeinv > MAXSMALLINT || bbox[2]*binsizeinv > MAXSMALLINT) error->all(FLERR,"Domain too large for neighbor bins"); // create actual bins // always have one bin even if cutoff > bbox // for 2d, nbinz = 1 nbinx = static_cast (bbox[0]*binsizeinv); nbiny = static_cast (bbox[1]*binsizeinv); if (dimension == 3) nbinz = static_cast (bbox[2]*binsizeinv); else nbinz = 1; if (nbinx == 0) nbinx = 1; if (nbiny == 0) nbiny = 1; if (nbinz == 0) nbinz = 1; // compute actual bin size for nbins to fit into box exactly // error if actual bin size << cutoff, since will create a zillion bins // this happens when nbin = 1 and box size << cutoff // typically due to non-periodic, flat system in a particular dim // in that extreme case, should use NSQ not BIN neighbor style binsizex = bbox[0]/nbinx; binsizey = bbox[1]/nbiny; binsizez = bbox[2]/nbinz; bininvx = 1.0 / binsizex; bininvy = 1.0 / binsizey; bininvz = 1.0 / binsizez; if (binsize_optimal*bininvx > CUT2BIN_RATIO || binsize_optimal*bininvy > CUT2BIN_RATIO || binsize_optimal*bininvz > CUT2BIN_RATIO) error->all(FLERR,"Cannot use neighbor bins - box size << cutoff"); // mbinlo/hi = lowest and highest global bins my ghost atoms could be in // coord = lowest and highest values of coords for my ghost atoms // static_cast(-1.5) = -1, so subract additional -1 // add in SMALL for round-off safety int mbinxhi,mbinyhi,mbinzhi; double coord; coord = bsubboxlo[0] - SMALL*bbox[0]; mbinxlo = static_cast ((coord-bboxlo[0])*bininvx); if (coord < bboxlo[0]) mbinxlo = mbinxlo - 1; coord = bsubboxhi[0] + SMALL*bbox[0]; mbinxhi = static_cast ((coord-bboxlo[0])*bininvx); coord = bsubboxlo[1] - SMALL*bbox[1]; mbinylo = static_cast ((coord-bboxlo[1])*bininvy); if (coord < bboxlo[1]) mbinylo = mbinylo - 1; coord = bsubboxhi[1] + SMALL*bbox[1]; mbinyhi = static_cast ((coord-bboxlo[1])*bininvy); if (dimension == 3) { coord = bsubboxlo[2] - SMALL*bbox[2]; mbinzlo = static_cast ((coord-bboxlo[2])*bininvz); if (coord < bboxlo[2]) mbinzlo = mbinzlo - 1; coord = bsubboxhi[2] + SMALL*bbox[2]; mbinzhi = static_cast ((coord-bboxlo[2])*bininvz); } // extend bins by 1 to insure stencil extent is included // if 2d, only 1 bin in z mbinxlo = mbinxlo - 1; mbinxhi = mbinxhi + 1; mbinx = mbinxhi - mbinxlo + 1; mbinylo = mbinylo - 1; mbinyhi = mbinyhi + 1; mbiny = mbinyhi - mbinylo + 1; if (dimension == 3) { mbinzlo = mbinzlo - 1; mbinzhi = mbinzhi + 1; } else mbinzlo = mbinzhi = 0; mbinz = mbinzhi - mbinzlo + 1; // memory for bin ptrs bigint bbin = ((bigint) mbinx) * ((bigint) mbiny) * ((bigint) mbinz); if (bbin > MAXSMALLINT) error->one(FLERR,"Too many neighbor bins"); mbins = bbin; if (mbins > maxhead) { maxhead = mbins; memory->destroy(binhead); memory->create(binhead,maxhead,"neigh:binhead"); } // create stencil of bins to search over in neighbor list construction // sx,sy,sz = max range of stencil in each dim // smax = max possible size of entire 3d stencil // stencil is empty if cutneighmax = 0.0 sx = static_cast (cutneighmax*bininvx); if (sx*binsizex < cutneighmax) sx++; sy = static_cast (cutneighmax*bininvy); if (sy*binsizey < cutneighmax) sy++; sz = static_cast (cutneighmax*bininvz); if (sz*binsizez < cutneighmax) sz++; if (dimension == 2) sz = 0; smax = (2*sx+1) * (2*sy+1) * (2*sz+1); // create stencils for pairwise neighbor lists // only done for lists with stencilflag and buildflag set for (int i = 0; i < nslist; i++) { if (lists[slist[i]]) { lists[slist[i]]->stencil_allocate(smax,style); (this->*stencil_create[slist[i]])(lists[slist[i]],sx,sy,sz); } else setup_bins_kokkos(i); } } /* ---------------------------------------------------------------------- compute closest distance between central bin (0,0,0) and bin (i,j,k) ------------------------------------------------------------------------- */ double Neighbor::bin_distance(int i, int j, int k) { double delx,dely,delz; if (i > 0) delx = (i-1)*binsizex; else if (i == 0) delx = 0.0; else delx = (i+1)*binsizex; if (j > 0) dely = (j-1)*binsizey; else if (j == 0) dely = 0.0; else dely = (j+1)*binsizey; if (k > 0) delz = (k-1)*binsizez; else if (k == 0) delz = 0.0; else delz = (k+1)*binsizez; return (delx*delx + dely*dely + delz*delz); } /* ---------------------------------------------------------------------- set neighbor style and skin distance ------------------------------------------------------------------------- */ void Neighbor::set(int narg, char **arg) { if (narg != 2) error->all(FLERR,"Illegal neighbor command"); skin = force->numeric(FLERR,arg[0]); if (skin < 0.0) error->all(FLERR,"Illegal neighbor command"); if (strcmp(arg[1],"nsq") == 0) style = NSQ; else if (strcmp(arg[1],"bin") == 0) style = BIN; else if (strcmp(arg[1],"multi") == 0) style = MULTI; else error->all(FLERR,"Illegal neighbor command"); if (style == MULTI && lmp->citeme) lmp->citeme->add(cite_neigh_multi); } /* ---------------------------------------------------------------------- modify parameters of the pair-wise neighbor build ------------------------------------------------------------------------- */ void Neighbor::modify_params(int narg, char **arg) { int iarg = 0; while (iarg < narg) { if (strcmp(arg[iarg],"every") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal neigh_modify command"); every = force->inumeric(FLERR,arg[iarg+1]); if (every <= 0) error->all(FLERR,"Illegal neigh_modify command"); iarg += 2; } else if (strcmp(arg[iarg],"delay") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal neigh_modify command"); delay = force->inumeric(FLERR,arg[iarg+1]); if (delay < 0) error->all(FLERR,"Illegal neigh_modify command"); iarg += 2; } else if (strcmp(arg[iarg],"check") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal neigh_modify command"); if (strcmp(arg[iarg+1],"yes") == 0) dist_check = 1; else if (strcmp(arg[iarg+1],"no") == 0) dist_check = 0; else error->all(FLERR,"Illegal neigh_modify command"); iarg += 2; } else if (strcmp(arg[iarg],"once") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal neigh_modify command"); if (strcmp(arg[iarg+1],"yes") == 0) build_once = 1; else if (strcmp(arg[iarg+1],"no") == 0) build_once = 0; else error->all(FLERR,"Illegal neigh_modify command"); iarg += 2; } else if (strcmp(arg[iarg],"page") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal neigh_modify command"); old_pgsize = pgsize; pgsize = force->inumeric(FLERR,arg[iarg+1]); iarg += 2; } else if (strcmp(arg[iarg],"one") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal neigh_modify command"); old_oneatom = oneatom; oneatom = force->inumeric(FLERR,arg[iarg+1]); iarg += 2; } else if (strcmp(arg[iarg],"binsize") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal neigh_modify command"); binsize_user = force->numeric(FLERR,arg[iarg+1]); if (binsize_user <= 0.0) binsizeflag = 0; else binsizeflag = 1; iarg += 2; } else if (strcmp(arg[iarg],"cluster") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal neigh_modify command"); if (strcmp(arg[iarg+1],"yes") == 0) cluster_check = 1; else if (strcmp(arg[iarg+1],"no") == 0) cluster_check = 0; else error->all(FLERR,"Illegal neigh_modify command"); iarg += 2; } else if (strcmp(arg[iarg],"include") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal neigh_modify command"); includegroup = group->find(arg[iarg+1]); if (includegroup < 0) error->all(FLERR,"Invalid group ID in neigh_modify command"); if (includegroup && (atom->firstgroupname == NULL || strcmp(arg[iarg+1],atom->firstgroupname) != 0)) error->all(FLERR, "Neigh_modify include group != atom_modify first group"); iarg += 2; } else if (strcmp(arg[iarg],"exclude") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal neigh_modify command"); if (strcmp(arg[iarg+1],"type") == 0) { if (iarg+4 > narg) error->all(FLERR,"Illegal neigh_modify command"); if (nex_type == maxex_type) { maxex_type += EXDELTA; memory->grow(ex1_type,maxex_type,"neigh:ex1_type"); memory->grow(ex2_type,maxex_type,"neigh:ex2_type"); } ex1_type[nex_type] = force->inumeric(FLERR,arg[iarg+2]); ex2_type[nex_type] = force->inumeric(FLERR,arg[iarg+3]); nex_type++; iarg += 4; } else if (strcmp(arg[iarg+1],"group") == 0) { if (iarg+4 > narg) error->all(FLERR,"Illegal neigh_modify command"); if (nex_group == maxex_group) { maxex_group += EXDELTA; memory->grow(ex1_group,maxex_group,"neigh:ex1_group"); memory->grow(ex2_group,maxex_group,"neigh:ex2_group"); } ex1_group[nex_group] = group->find(arg[iarg+2]); ex2_group[nex_group] = group->find(arg[iarg+3]); if (ex1_group[nex_group] == -1 || ex2_group[nex_group] == -1) error->all(FLERR,"Invalid group ID in neigh_modify command"); nex_group++; iarg += 4; } else if (strcmp(arg[iarg+1],"molecule") == 0) { if (iarg+3 > narg) error->all(FLERR,"Illegal neigh_modify command"); if (atom->molecule_flag == 0) error->all(FLERR,"Neigh_modify exclude molecule " "requires atom attribute molecule"); if (nex_mol == maxex_mol) { maxex_mol += EXDELTA; memory->grow(ex_mol_group,maxex_mol,"neigh:ex_mol_group"); } ex_mol_group[nex_mol] = group->find(arg[iarg+2]); if (ex_mol_group[nex_mol] == -1) error->all(FLERR,"Invalid group ID in neigh_modify command"); nex_mol++; iarg += 3; } else if (strcmp(arg[iarg+1],"none") == 0) { nex_type = nex_group = nex_mol = 0; iarg += 2; } else error->all(FLERR,"Illegal neigh_modify command"); } else error->all(FLERR,"Illegal neigh_modify command"); } } /* ---------------------------------------------------------------------- bin owned and ghost atoms ------------------------------------------------------------------------- */ void Neighbor::bin_atoms() { int i,ibin; for (i = 0; i < mbins; i++) binhead[i] = -1; // bin in reverse order so linked list will be in forward order // also puts ghost atoms at end of list, which is necessary double **x = atom->x; int *mask = atom->mask; int nlocal = atom->nlocal; int nall = nlocal + atom->nghost; if (includegroup) { int bitmask = group->bitmask[includegroup]; for (i = nall-1; i >= nlocal; i--) { if (mask[i] & bitmask) { ibin = coord2bin(x[i]); bins[i] = binhead[ibin]; binhead[ibin] = i; } } for (i = atom->nfirst-1; i >= 0; i--) { ibin = coord2bin(x[i]); bins[i] = binhead[ibin]; binhead[ibin] = i; } } else { for (i = nall-1; i >= 0; i--) { ibin = coord2bin(x[i]); bins[i] = binhead[ibin]; binhead[ibin] = i; } } } /* ---------------------------------------------------------------------- convert atom coords into local bin # for orthogonal, only ghost atoms will have coord >= bboxhi or coord < bboxlo take special care to insure ghosts are in correct bins even w/ roundoff hi ghost atoms = nbin,nbin+1,etc owned atoms = 0 to nbin-1 lo ghost atoms = -1,-2,etc this is necessary so that both procs on either side of PBC treat a pair of atoms straddling the PBC in a consistent way for triclinic, doesn't matter since stencil & neigh list built differently ------------------------------------------------------------------------- */ int Neighbor::coord2bin(double *x) { int ix,iy,iz; if (x[0] >= bboxhi[0]) ix = static_cast ((x[0]-bboxhi[0])*bininvx) + nbinx; else if (x[0] >= bboxlo[0]) { ix = static_cast ((x[0]-bboxlo[0])*bininvx); ix = MIN(ix,nbinx-1); } else ix = static_cast ((x[0]-bboxlo[0])*bininvx) - 1; if (x[1] >= bboxhi[1]) iy = static_cast ((x[1]-bboxhi[1])*bininvy) + nbiny; else if (x[1] >= bboxlo[1]) { iy = static_cast ((x[1]-bboxlo[1])*bininvy); iy = MIN(iy,nbiny-1); } else iy = static_cast ((x[1]-bboxlo[1])*bininvy) - 1; if (x[2] >= bboxhi[2]) iz = static_cast ((x[2]-bboxhi[2])*bininvz) + nbinz; else if (x[2] >= bboxlo[2]) { iz = static_cast ((x[2]-bboxlo[2])*bininvz); iz = MIN(iz,nbinz-1); } else iz = static_cast ((x[2]-bboxlo[2])*bininvz) - 1; return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo); } /* ---------------------------------------------------------------------- same as coord2bin, but also return ix,iy,iz offsets in each dim ------------------------------------------------------------------------- */ int Neighbor::coord2bin(double *x, int &ix, int &iy, int &iz) { if (x[0] >= bboxhi[0]) ix = static_cast ((x[0]-bboxhi[0])*bininvx) + nbinx; else if (x[0] >= bboxlo[0]) { ix = static_cast ((x[0]-bboxlo[0])*bininvx); ix = MIN(ix,nbinx-1); } else ix = static_cast ((x[0]-bboxlo[0])*bininvx) - 1; if (x[1] >= bboxhi[1]) iy = static_cast ((x[1]-bboxhi[1])*bininvy) + nbiny; else if (x[1] >= bboxlo[1]) { iy = static_cast ((x[1]-bboxlo[1])*bininvy); iy = MIN(iy,nbiny-1); } else iy = static_cast ((x[1]-bboxlo[1])*bininvy) - 1; if (x[2] >= bboxhi[2]) iz = static_cast ((x[2]-bboxhi[2])*bininvz) + nbinz; else if (x[2] >= bboxlo[2]) { iz = static_cast ((x[2]-bboxlo[2])*bininvz); iz = MIN(iz,nbinz-1); } else iz = static_cast ((x[2]-bboxlo[2])*bininvz) - 1; ix -= mbinxlo; iy -= mbinylo; iz -= mbinzlo; return iz*mbiny*mbinx + iy*mbinx + ix; } /* ---------------------------------------------------------------------- test if atom pair i,j is excluded from neighbor list due to type, group, molecule settings from neigh_modify command return 1 if should be excluded, 0 if included ------------------------------------------------------------------------- */ int Neighbor::exclusion(int i, int j, int itype, int jtype, int *mask, tagint *molecule) const { int m; if (nex_type && ex_type[itype][jtype]) return 1; if (nex_group) { for (m = 0; m < nex_group; m++) { if (mask[i] & ex1_bit[m] && mask[j] & ex2_bit[m]) return 1; if (mask[i] & ex2_bit[m] && mask[j] & ex1_bit[m]) return 1; } } if (nex_mol) { for (m = 0; m < nex_mol; m++) if (mask[i] & ex_mol_bit[m] && mask[j] & ex_mol_bit[m] && molecule[i] == molecule[j]) return 1; } return 0; } /* ---------------------------------------------------------------------- return # of bytes of allocated memory ------------------------------------------------------------------------- */ bigint Neighbor::memory_usage() { bigint bytes = 0; bytes += memory->usage(xhold,maxhold,3); if (style != NSQ) { bytes += memory->usage(bins,maxbin); bytes += memory->usage(binhead,maxhead); } for (int i = 0; i < nrequest; i++) if (lists[i]) bytes += lists[i]->memory_usage(); bytes += memory->usage(bondlist,maxbond,3); bytes += memory->usage(anglelist,maxangle,4); bytes += memory->usage(dihedrallist,maxdihedral,5); bytes += memory->usage(improperlist,maximproper,5); return bytes; } /* ---------------------------------------------------------------------- return the value of exclude - used to check compatibility with GPU ------------------------------------------------------------------------- */ int Neighbor::exclude_setting() { return exclude; } diff --git a/src/neighbor.h b/src/neighbor.h index 3c0c4af88..05a8622d0 100644 --- a/src/neighbor.h +++ b/src/neighbor.h @@ -1,424 +1,425 @@ /* -*- c++ -*- ---------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ #ifndef LMP_NEIGHBOR_H #define LMP_NEIGHBOR_H #include "pointers.h" namespace LAMMPS_NS { class Neighbor : protected Pointers { friend class Cuda; public: int style; // 0,1,2 = nsq, bin, multi int every; // build every this many steps int delay; // delay build for this many steps int dist_check; // 0 = always build, 1 = only if 1/2 dist int ago; // how many steps ago neighboring occurred int pgsize; // size of neighbor page int oneatom; // max # of neighbors for one atom int includegroup; // only build pairwise lists for this group int build_once; // 1 if only build lists once per run int cudable; // GPU <-> CPU communication flag for CUDA double skin; // skin distance double cutneighmin; // min neighbor cutoff for all type pairs double cutneighmax; // max neighbor cutoff for all type pairs double *cuttype; // for each type, max neigh cut w/ others bigint ncalls; // # of times build has been called bigint ndanger; // # of dangerous builds bigint lastcall; // timestep of last neighbor::build() call int nrequest; // requests for pairwise neighbor lists class NeighRequest **requests; // from Pair, Fix, Compute, Command classes int maxrequest; int old_style; // previous run info to avoid int old_nrequest; // re-creation of pairwise neighbor lists int old_triclinic; int old_pgsize; int old_oneatom; class NeighRequest **old_requests; int nlist; // pairwise neighbor lists class NeighList **lists; int nbondlist; // list of bonds to compute int **bondlist; int nanglelist; // list of angles to compute int **anglelist; int ndihedrallist; // list of dihedrals to compute int **dihedrallist; int nimproperlist; // list of impropers to compute int **improperlist; Neighbor(class LAMMPS *); virtual ~Neighbor(); virtual void init(); int request(void *); // another class requests a neighbor list void print_lists_of_lists(); // debug print out int decide(); // decide whether to build or not virtual int check_distance(); // check max distance moved since last build void setup_bins(); // setup bins based on box and cutoff virtual void build(int topoflag=1); // create all neighbor lists (pair,bond) virtual void build_topology(); // create all topology neighbor lists void build_one(int, int preflag=0); // create a single neighbor list void set(int, char **); // set neighbor style and skin distance void modify_params(int, char**); // modify parameters that control builds bigint memory_usage(); int exclude_setting(); protected: int me,nprocs; int maxatom; // size of atom-based NeighList arrays int maxbond,maxangle,maxdihedral,maximproper; // size of bond lists int maxwt; // max weighting factor applied + 1 int must_check; // 1 if must check other classes to reneigh int restart_check; // 1 if restart enabled, 0 if no int fix_check; // # of fixes that induce reneigh int *fixchecklist; // which fixes to check double **cutneighsq; // neighbor cutneigh sq for each type pair double **cutneighghostsq; // neighbor cutnsq for each ghost type pair double cutneighmaxsq; // cutneighmax squared double *cuttypesq; // cuttype squared double triggersq; // trigger = build when atom moves this dist int cluster_check; // 1 if check bond/angle/etc satisfies minimg double **xhold; // atom coords at last neighbor build int maxhold; // size of xhold array int boxcheck; // 1 if need to store box size double boxlo_hold[3],boxhi_hold[3]; // box size at last neighbor build double corners_hold[8][3]; // box corners at last neighbor build int binatomflag; // bin atoms or not when build neigh list // turned off by build_one() int nbinx,nbiny,nbinz; // # of global bins int *bins; // ptr to next atom in each bin int maxbin; // size of bins array int *binhead; // ptr to 1st atom in each bin int maxhead; // size of binhead array int mbins; // # of local bins and offset int mbinx,mbiny,mbinz; int mbinxlo,mbinylo,mbinzlo; int binsizeflag; // user-chosen bin size double binsize_user; double binsizex,binsizey,binsizez; // actual bin sizes and inverse sizes double bininvx,bininvy,bininvz; int sx,sy,sz,smax; // bin stencil extents int dimension; // 2/3 for 2d/3d int triclinic; // 0 if domain is orthog, 1 if triclinic int newton_pair; // 0 if newton off, 1 if on for pairwise double *bboxlo,*bboxhi; // ptrs to full domain bounding box double (*corners)[3]; // ptr to 8 corners of triclinic box double inner[2],middle[2]; // rRESPA cutoffs for extra lists double cut_inner_sq; // outer cutoff for inner neighbor list double cut_middle_sq; // outer cutoff for middle neighbor list double cut_middle_inside_sq; // inner cutoff for middle neighbor list int special_flag[4]; // flags for 1-2, 1-3, 1-4 neighbors int anyghostlist; // 1 if any non-occasional list // stores neighbors of ghosts int exclude; // 0 if no type/group exclusions, 1 if yes int nex_type; // # of entries in type exclusion list int maxex_type; // max # in type list int *ex1_type,*ex2_type; // pairs of types to exclude int **ex_type; // 2d array of excluded type pairs int nex_group; // # of entries in group exclusion list int maxex_group; // max # in group list int *ex1_group,*ex2_group; // pairs of group #'s to exclude int *ex1_bit,*ex2_bit; // pairs of group bits to exclude int nex_mol; // # of entries in molecule exclusion list int maxex_mol; // max # in molecule list int *ex_mol_group; // molecule group #'s to exclude int *ex_mol_bit; // molecule group bits to exclude int nblist,nglist,nslist; // # of pairwise neigh lists of various kinds int *blist; // lists to build every reneighboring int *glist; // lists to grow atom arrays every reneigh int *slist; // lists to grow stencil arrays every reneigh void bin_atoms(); // bin all atoms double bin_distance(int, int, int); // distance between binx int coord2bin(double *); // mapping atom coord to a bin int coord2bin(double *, int &, int &, int&); // ditto int exclusion(int, int, int, int, int *, tagint *) const; // test for pair exclusion virtual void choose_build(int, class NeighRequest *); void choose_stencil(int, class NeighRequest *); // dummy functions provided by NeighborKokkos virtual void init_cutneighsq_kokkos(int) {} virtual int init_lists_kokkos() {return 0;} virtual void init_list_flags1_kokkos(int) {} virtual void init_list_flags2_kokkos(int) {} virtual void init_list_grow_kokkos(int) {} virtual void build_kokkos(int) {} virtual void setup_bins_kokkos(int) {} // pairwise build functions typedef void (Neighbor::*PairPtr)(class NeighList *); PairPtr *pair_build; void half_nsq_no_newton(class NeighList *); void half_nsq_no_newton_ghost(class NeighList *); void half_nsq_newton(class NeighList *); void half_bin_no_newton(class NeighList *); void half_bin_no_newton_ghost(class NeighList *); void half_bin_newton(class NeighList *); void half_bin_newton_tri(class NeighList *); void half_multi_no_newton(class NeighList *); void half_multi_newton(class NeighList *); void half_multi_newton_tri(class NeighList *); void full_nsq(class NeighList *); void full_nsq_ghost(class NeighList *); void full_bin(class NeighList *); void full_bin_ghost(class NeighList *); void full_multi(class NeighList *); void half_from_full_no_newton(class NeighList *); void half_from_full_newton(class NeighList *); void skip_from(class NeighList *); void skip_from_granular(class NeighList *); void skip_from_respa(class NeighList *); void copy_from(class NeighList *); void granular_nsq_no_newton(class NeighList *); void granular_nsq_newton(class NeighList *); void granular_bin_no_newton(class NeighList *); void granular_bin_newton(class NeighList *); void granular_bin_newton_tri(class NeighList *); void respa_nsq_no_newton(class NeighList *); void respa_nsq_newton(class NeighList *); void respa_bin_no_newton(class NeighList *); void respa_bin_newton(class NeighList *); void respa_bin_newton_tri(class NeighList *); // include prototypes for multi-threaded neighbor lists // builds or their corresponding dummy versions #define LMP_INSIDE_NEIGHBOR_H #include "accelerator_omp.h" +#include "accelerator_intel.h" #undef LMP_INSIDE_NEIGHBOR_H // pairwise stencil creation functions typedef void (Neighbor::*StencilPtr)(class NeighList *, int, int, int); StencilPtr *stencil_create; void stencil_half_bin_2d_no_newton(class NeighList *, int, int, int); void stencil_half_ghost_bin_2d_no_newton(class NeighList *, int, int, int); void stencil_half_bin_3d_no_newton(class NeighList *, int, int, int); void stencil_half_ghost_bin_3d_no_newton(class NeighList *, int, int, int); void stencil_half_bin_2d_newton(class NeighList *, int, int, int); void stencil_half_bin_3d_newton(class NeighList *, int, int, int); void stencil_half_bin_2d_newton_tri(class NeighList *, int, int, int); void stencil_half_bin_3d_newton_tri(class NeighList *, int, int, int); void stencil_half_multi_2d_no_newton(class NeighList *, int, int, int); void stencil_half_multi_3d_no_newton(class NeighList *, int, int, int); void stencil_half_multi_2d_newton(class NeighList *, int, int, int); void stencil_half_multi_3d_newton(class NeighList *, int, int, int); void stencil_half_multi_2d_newton_tri(class NeighList *, int, int, int); void stencil_half_multi_3d_newton_tri(class NeighList *, int, int, int); void stencil_full_bin_2d(class NeighList *, int, int, int); void stencil_full_ghost_bin_2d(class NeighList *, int, int, int); void stencil_full_bin_3d(class NeighList *, int, int, int); void stencil_full_ghost_bin_3d(class NeighList *, int, int, int); void stencil_full_multi_2d(class NeighList *, int, int, int); void stencil_full_multi_3d(class NeighList *, int, int, int); // topology build functions typedef void (Neighbor::*BondPtr)(); // ptrs to topology build functions BondPtr bond_build; // ptr to bond list functions void bond_all(); // bond list with all bonds void bond_template(); // bond list with templated bonds void bond_partial(); // exclude certain bonds void bond_check(); BondPtr angle_build; // ptr to angle list functions void angle_all(); // angle list with all angles void angle_template(); // angle list with templated bonds void angle_partial(); // exclude certain angles void angle_check(); BondPtr dihedral_build; // ptr to dihedral list functions void dihedral_all(); // dihedral list with all dihedrals void dihedral_template(); // dihedral list with templated bonds void dihedral_partial(); // exclude certain dihedrals void dihedral_check(int, int **); BondPtr improper_build; // ptr to improper list functions void improper_all(); // improper list with all impropers void improper_template(); // improper list with templated bonds void improper_partial(); // exclude certain impropers // find_special: determine if atom j is in special list of atom i // if it is not, return 0 // if it is and special flag is 0 (both coeffs are 0.0), return -1 // if it is and special flag is 1 (both coeffs are 1.0), return 0 // if it is and special flag is 2 (otherwise), return 1,2,3 // for which level of neighbor it is (and which coeff it maps to) inline int find_special(const tagint *list, const int *nspecial, const tagint tag) const { const int n1 = nspecial[0]; const int n2 = nspecial[1]; const int n3 = nspecial[2]; for (int i = 0; i < n3; i++) { if (list[i] == tag) { if (i < n1) { if (special_flag[1] == 0) return -1; else if (special_flag[1] == 1) return 0; else return 1; } else if (i < n2) { if (special_flag[2] == 0) return -1; else if (special_flag[2] == 1) return 0; else return 2; } else { if (special_flag[3] == 0) return -1; else if (special_flag[3] == 1) return 0; else return 3; } } } return 0; }; }; } #endif /* ERROR/WARNING messages: E: Neighbor delay must be 0 or multiple of every setting The delay and every parameters set via the neigh_modify command are inconsistent. If the delay setting is non-zero, then it must be a multiple of the every setting. E: Neighbor page size must be >= 10x the one atom setting This is required to prevent wasting too much memory. E: Invalid atom type in neighbor exclusion list Atom types must range from 1 to Ntypes inclusive. W: Neighbor exclusions used with KSpace solver may give inconsistent Coulombic energies This is because excluding specific pair interactions also excludes them from long-range interactions which may not be the desired effect. The special_bonds command handles this consistently by insuring excluded (or weighted) 1-2, 1-3, 1-4 interactions are treated consistently by both the short-range pair style and the long-range solver. This is not done for exclusions of charged atom pairs via the neigh_modify exclude command. E: Neighbor include group not allowed with ghost neighbors This is a current restriction within LAMMPS. E: Neighbor multi not yet enabled for ghost neighbors This is a current restriction within LAMMPS. E: Neighbor multi not yet enabled for granular Self-explanatory. E: Neighbor multi not yet enabled for rRESPA Self-explanatory. E: Too many local+ghost atoms for neighbor list The number of nlocal + nghost atoms on a processor is limited by the size of a 32-bit integer with 2 bits removed for masking 1-2, 1-3, 1-4 neighbors. W: Building an occasional neighobr list when atoms may have moved too far This can cause LAMMPS to crash when the neighbor list is built. The solution is to check for building the regular neighbor lists more frequently. E: Domain too large for neighbor bins The domain has become extremely large so that neighbor bins cannot be used. Most likely, one or more atoms have been blown out of the simulation box to a great distance. E: Cannot use neighbor bins - box size << cutoff Too many neighbor bins will be created. This typically happens when the simulation box is very small in some dimension, compared to the neighbor cutoff. Use the "nsq" style instead of "bin" style. E: Too many neighbor bins This is likely due to an immense simulation box that has blown up to a large size. E: Illegal ... command Self-explanatory. Check the input script syntax and compare to the documentation for the command. You can use -echo screen as a command-line option when running LAMMPS to see the offending line. E: Invalid group ID in neigh_modify command A group ID used in the neigh_modify command does not exist. E: Neigh_modify include group != atom_modify first group Self-explanatory. E: Neigh_modify exclude molecule requires atom attribute molecule Self-explanatory. */ diff --git a/src/output.cpp b/src/output.cpp index 0383dfe84..6a9223604 100644 --- a/src/output.cpp +++ b/src/output.cpp @@ -1,807 +1,807 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ #include "stdio.h" #include "stdlib.h" #include "string.h" #include "output.h" #include "style_dump.h" #include "atom.h" #include "neighbor.h" #include "input.h" #include "variable.h" #include "comm.h" #include "update.h" #include "group.h" #include "domain.h" #include "thermo.h" #include "modify.h" #include "compute.h" #include "force.h" #include "dump.h" #include "write_restart.h" #include "accelerator_cuda.h" #include "memory.h" #include "error.h" using namespace LAMMPS_NS; #define DELTA 1 /* ---------------------------------------------------------------------- initialize all output ------------------------------------------------------------------------- */ Output::Output(LAMMPS *lmp) : Pointers(lmp) { // create default computes for temp,pressure,pe char **newarg = new char*[4]; newarg[0] = (char *) "thermo_temp"; newarg[1] = (char *) "all"; newarg[2] = (char *) "temp"; - modify->add_compute(3,newarg,lmp->suffix); + modify->add_compute(3,newarg,1); newarg[0] = (char *) "thermo_press"; newarg[1] = (char *) "all"; newarg[2] = (char *) "pressure"; newarg[3] = (char *) "thermo_temp"; - modify->add_compute(4,newarg,lmp->suffix); + modify->add_compute(4,newarg,1); newarg[0] = (char *) "thermo_pe"; newarg[1] = (char *) "all"; newarg[2] = (char *) "pe"; - modify->add_compute(3,newarg,lmp->suffix); + modify->add_compute(3,newarg,1); delete [] newarg; // create default Thermo class newarg = new char*[1]; newarg[0] = (char *) "one"; thermo = new Thermo(lmp,1,newarg); delete [] newarg; thermo_every = 0; var_thermo = NULL; ndump = 0; max_dump = 0; every_dump = NULL; next_dump = NULL; last_dump = NULL; var_dump = NULL; ivar_dump = NULL; dump = NULL; restart_flag = restart_flag_single = restart_flag_double = 0; restart_every_single = restart_every_double = 0; last_restart = -1; restart1 = restart2a = restart2b = NULL; var_restart_single = var_restart_double = NULL; restart = NULL; } /* ---------------------------------------------------------------------- free all memory ------------------------------------------------------------------------- */ Output::~Output() { if (thermo) delete thermo; delete [] var_thermo; memory->destroy(every_dump); memory->destroy(next_dump); memory->destroy(last_dump); for (int i = 0; i < ndump; i++) delete [] var_dump[i]; memory->sfree(var_dump); memory->destroy(ivar_dump); for (int i = 0; i < ndump; i++) delete dump[i]; memory->sfree(dump); delete [] restart1; delete [] restart2a; delete [] restart2b; delete [] var_restart_single; delete [] var_restart_double; delete restart; } /* ---------------------------------------------------------------------- */ void Output::init() { thermo->init(); if (var_thermo) { ivar_thermo = input->variable->find(var_thermo); if (ivar_thermo < 0) error->all(FLERR,"Variable name for thermo every does not exist"); if (!input->variable->equalstyle(ivar_thermo)) error->all(FLERR,"Variable for thermo every is invalid style"); } for (int i = 0; i < ndump; i++) dump[i]->init(); for (int i = 0; i < ndump; i++) if (every_dump[i] == 0) { ivar_dump[i] = input->variable->find(var_dump[i]); if (ivar_dump[i] < 0) error->all(FLERR,"Variable name for dump every does not exist"); if (!input->variable->equalstyle(ivar_dump[i])) error->all(FLERR,"Variable for dump every is invalid style"); } if (restart_flag_single && restart_every_single == 0) { ivar_restart_single = input->variable->find(var_restart_single); if (ivar_restart_single < 0) error->all(FLERR,"Variable name for restart does not exist"); if (!input->variable->equalstyle(ivar_restart_single)) error->all(FLERR,"Variable for restart is invalid style"); } if (restart_flag_double && restart_every_double == 0) { ivar_restart_double = input->variable->find(var_restart_double); if (ivar_restart_double < 0) error->all(FLERR,"Variable name for restart does not exist"); if (!input->variable->equalstyle(ivar_restart_double)) error->all(FLERR,"Variable for restart is invalid style"); } } /* ---------------------------------------------------------------------- perform output for setup of run/min do dump first, so memory_usage will include dump allocation do thermo last, so will print after memory_usage memflag = 0/1 for printing out memory usage ------------------------------------------------------------------------- */ void Output::setup(int memflag) { bigint ntimestep = update->ntimestep; // perform dump at start of run only if: // current timestep is multiple of every and last dump not >= this step // this is first run after dump created and firstflag is set // note that variable freq will not write unless triggered by firstflag // set next_dump to multiple of every or variable value // set next_dump_any to smallest next_dump // wrap dumps that invoke computes and variable eval with clear/add // if dump not written now, use addstep_compute_all() since don't know // what computes the dump write would invoke // if no dumps, set next_dump_any to last+1 so will not influence next int writeflag; if (ndump && update->restrict_output == 0) { for (int idump = 0; idump < ndump; idump++) { if (dump[idump]->clearstep || every_dump[idump] == 0) modify->clearstep_compute(); writeflag = 0; if (every_dump[idump] && ntimestep % every_dump[idump] == 0 && last_dump[idump] != ntimestep) writeflag = 1; if (last_dump[idump] < 0 && dump[idump]->first_flag == 1) writeflag = 1; if (writeflag) { dump[idump]->write(); last_dump[idump] = ntimestep; } if (every_dump[idump]) next_dump[idump] = (ntimestep/every_dump[idump])*every_dump[idump] + every_dump[idump]; else { bigint nextdump = static_cast (input->variable->compute_equal(ivar_dump[idump])); if (nextdump <= ntimestep) error->all(FLERR,"Dump every variable returned a bad timestep"); next_dump[idump] = nextdump; } if (dump[idump]->clearstep || every_dump[idump] == 0) { if (writeflag) modify->addstep_compute(next_dump[idump]); else modify->addstep_compute_all(next_dump[idump]); } if (idump) next_dump_any = MIN(next_dump_any,next_dump[idump]); else next_dump_any = next_dump[0]; } } else next_dump_any = update->laststep + 1; // do not write restart files at start of run // set next_restart values to multiple of every or variable value // wrap variable eval with clear/add // if no restarts, set next_restart to last+1 so will not influence next if (restart_flag && update->restrict_output == 0) { if (restart_flag_single) { if (restart_every_single) next_restart_single = (ntimestep/restart_every_single)*restart_every_single + restart_every_single; else { bigint nextrestart = static_cast (input->variable->compute_equal(ivar_restart_single)); if (nextrestart <= ntimestep) error->all(FLERR,"Restart variable returned a bad timestep"); next_restart_single = nextrestart; } } else next_restart_single = update->laststep + 1; if (restart_flag_double) { if (restart_every_double) next_restart_double = (ntimestep/restart_every_double)*restart_every_double + restart_every_double; else { bigint nextrestart = static_cast (input->variable->compute_equal(ivar_restart_double)); if (nextrestart <= ntimestep) error->all(FLERR,"Restart variable returned a bad timestep"); next_restart_double = nextrestart; } } else next_restart_double = update->laststep + 1; next_restart = MIN(next_restart_single,next_restart_double); } else next_restart = update->laststep + 1; // print memory usage unless being called between multiple runs if (memflag) memory_usage(); // set next_thermo to multiple of every or variable eval if var defined // insure thermo output on last step of run // thermo may invoke computes so wrap with clear/add modify->clearstep_compute(); thermo->header(); thermo->compute(0); last_thermo = ntimestep; if (var_thermo) { next_thermo = static_cast (input->variable->compute_equal(ivar_thermo)); if (next_thermo <= ntimestep) error->all(FLERR,"Thermo every variable returned a bad timestep"); } else if (thermo_every) { next_thermo = (ntimestep/thermo_every)*thermo_every + thermo_every; next_thermo = MIN(next_thermo,update->laststep); } else next_thermo = update->laststep; modify->addstep_compute(next_thermo); // next = next timestep any output will be done next = MIN(next_dump_any,next_restart); next = MIN(next,next_thermo); } /* ---------------------------------------------------------------------- perform all output for this timestep only perform output if next matches current step and last output doesn't do dump/restart before thermo so thermo CPU time will include them ------------------------------------------------------------------------- */ void Output::write(bigint ntimestep) { // next_dump does not force output on last step of run // wrap dumps that invoke computes or eval of variable with clear/add // download data from GPU if necessary if (next_dump_any == ntimestep) { if (lmp->cuda && !lmp->cuda->oncpu) lmp->cuda->downloadAll(); for (int idump = 0; idump < ndump; idump++) { if (next_dump[idump] == ntimestep) { if (dump[idump]->clearstep || every_dump[idump] == 0) modify->clearstep_compute(); if (last_dump[idump] != ntimestep) { dump[idump]->write(); last_dump[idump] = ntimestep; } if (every_dump[idump]) next_dump[idump] += every_dump[idump]; else { bigint nextdump = static_cast (input->variable->compute_equal(ivar_dump[idump])); if (nextdump <= ntimestep) error->all(FLERR,"Dump every variable returned a bad timestep"); next_dump[idump] = nextdump; } if (dump[idump]->clearstep || every_dump[idump] == 0) modify->addstep_compute(next_dump[idump]); } if (idump) next_dump_any = MIN(next_dump_any,next_dump[idump]); else next_dump_any = next_dump[0]; } } // next_restart does not force output on last step of run // for toggle = 0, replace "*" with current timestep in restart filename // download data from GPU if necessary // eval of variable may invoke computes so wrap with clear/add if (next_restart == ntimestep) { if (lmp->cuda && !lmp->cuda->oncpu) lmp->cuda->downloadAll(); if (next_restart_single == ntimestep) { char *file = new char[strlen(restart1) + 16]; char *ptr = strchr(restart1,'*'); *ptr = '\0'; sprintf(file,"%s" BIGINT_FORMAT "%s",restart1,ntimestep,ptr+1); *ptr = '*'; if (last_restart != ntimestep) restart->write(file); delete [] file; if (restart_every_single) next_restart_single += restart_every_single; else { modify->clearstep_compute(); bigint nextrestart = static_cast (input->variable->compute_equal(ivar_restart_single)); if (nextrestart <= ntimestep) error->all(FLERR,"Restart variable returned a bad timestep"); next_restart_single = nextrestart; modify->addstep_compute(next_restart_single); } } if (next_restart_double == ntimestep) { if (last_restart != ntimestep) { if (restart_toggle == 0) { restart->write(restart2a); restart_toggle = 1; } else { restart->write(restart2b); restart_toggle = 0; } } if (restart_every_double) next_restart_double += restart_every_double; else { modify->clearstep_compute(); bigint nextrestart = static_cast (input->variable->compute_equal(ivar_restart_double)); if (nextrestart <= ntimestep) error->all(FLERR,"Restart variable returned a bad timestep"); next_restart_double = nextrestart; modify->addstep_compute(next_restart_double); } } last_restart = ntimestep; next_restart = MIN(next_restart_single,next_restart_double); } // insure next_thermo forces output on last step of run // thermo may invoke computes so wrap with clear/add if (next_thermo == ntimestep) { modify->clearstep_compute(); if (last_thermo != ntimestep) thermo->compute(1); last_thermo = ntimestep; if (var_thermo) { next_thermo = static_cast (input->variable->compute_equal(ivar_thermo)); if (next_thermo <= ntimestep) error->all(FLERR,"Thermo every variable returned a bad timestep"); } else if (thermo_every) next_thermo += thermo_every; else next_thermo = update->laststep; next_thermo = MIN(next_thermo,update->laststep); modify->addstep_compute(next_thermo); } // next = next timestep any output will be done next = MIN(next_dump_any,next_restart); next = MIN(next,next_thermo); } /* ---------------------------------------------------------------------- force a snapshot to be written for all dumps called from PRD and TAD ------------------------------------------------------------------------- */ void Output::write_dump(bigint ntimestep) { for (int idump = 0; idump < ndump; idump++) { dump[idump]->write(); last_dump[idump] = ntimestep; } } /* ---------------------------------------------------------------------- force restart file(s) to be written called from PRD and TAD ------------------------------------------------------------------------- */ void Output::write_restart(bigint ntimestep) { if (restart_flag_single) { char *file = new char[strlen(restart1) + 16]; char *ptr = strchr(restart1,'*'); *ptr = '\0'; sprintf(file,"%s" BIGINT_FORMAT "%s",restart1,ntimestep,ptr+1); *ptr = '*'; restart->write(file); delete [] file; } if (restart_flag_double) { if (restart_toggle == 0) { restart->write(restart2a); restart_toggle = 1; } else { restart->write(restart2b); restart_toggle = 0; } } last_restart = ntimestep; } /* ---------------------------------------------------------------------- timestep is being changed, called by update->reset_timestep() reset next timestep values for dumps, restart, thermo output reset to smallest value >= new timestep if next timestep set by variable evaluation, eval for ntimestep-1, so current ntimestep can be returned if needed no guarantee that variable can be evaluated for ntimestep-1 if it depends on computes, but live with that rare case for now ------------------------------------------------------------------------- */ void Output::reset_timestep(bigint ntimestep) { next_dump_any = MAXBIGINT; for (int idump = 0; idump < ndump; idump++) { if (every_dump[idump]) { next_dump[idump] = (ntimestep/every_dump[idump])*every_dump[idump]; if (next_dump[idump] < ntimestep) next_dump[idump] += every_dump[idump]; } else { modify->clearstep_compute(); update->ntimestep--; bigint nextdump = static_cast (input->variable->compute_equal(ivar_dump[idump])); if (nextdump < ntimestep) error->all(FLERR,"Dump every variable returned a bad timestep"); update->ntimestep++; next_dump[idump] = nextdump; modify->addstep_compute(next_dump[idump]); } next_dump_any = MIN(next_dump_any,next_dump[idump]); } if (restart_flag_single) { if (restart_every_single) { next_restart_single = (ntimestep/restart_every_single)*restart_every_single; if (next_restart_single < ntimestep) next_restart_single += restart_every_single; } else { modify->clearstep_compute(); update->ntimestep--; bigint nextrestart = static_cast (input->variable->compute_equal(ivar_restart_single)); if (nextrestart < ntimestep) error->all(FLERR,"Restart variable returned a bad timestep"); update->ntimestep++; next_restart_single = nextrestart; modify->addstep_compute(next_restart_single); } } else next_restart_single = update->laststep + 1; if (restart_flag_double) { if (restart_every_double) { next_restart_double = (ntimestep/restart_every_double)*restart_every_double; if (next_restart_double < ntimestep) next_restart_double += restart_every_double; } else { modify->clearstep_compute(); update->ntimestep--; bigint nextrestart = static_cast (input->variable->compute_equal(ivar_restart_double)); if (nextrestart < ntimestep) error->all(FLERR,"Restart variable returned a bad timestep"); update->ntimestep++; next_restart_double = nextrestart; modify->addstep_compute(next_restart_double); } } else next_restart_double = update->laststep + 1; next_restart = MIN(next_restart_single,next_restart_double); if (var_thermo) { modify->clearstep_compute(); update->ntimestep--; next_thermo = static_cast (input->variable->compute_equal(ivar_thermo)); if (next_thermo < ntimestep) error->all(FLERR,"Thermo_modify every variable returned a bad timestep"); update->ntimestep++; next_thermo = MIN(next_thermo,update->laststep); modify->addstep_compute(next_thermo); } else if (thermo_every) { next_thermo = (ntimestep/thermo_every)*thermo_every; if (next_thermo < ntimestep) next_thermo += thermo_every; next_thermo = MIN(next_thermo,update->laststep); } else next_thermo = update->laststep; next = MIN(next_dump_any,next_restart); next = MIN(next,next_thermo); } /* ---------------------------------------------------------------------- add a Dump to list of Dumps ------------------------------------------------------------------------- */ void Output::add_dump(int narg, char **arg) { if (narg < 5) error->all(FLERR,"Illegal dump command"); // error checks for (int idump = 0; idump < ndump; idump++) if (strcmp(arg[0],dump[idump]->id) == 0) error->all(FLERR,"Reuse of dump ID"); int igroup = group->find(arg[1]); if (igroup == -1) error->all(FLERR,"Could not find dump group ID"); if (force->inumeric(FLERR,arg[3]) <= 0) error->all(FLERR,"Invalid dump frequency"); // extend Dump list if necessary if (ndump == max_dump) { max_dump += DELTA; dump = (Dump **) memory->srealloc(dump,max_dump*sizeof(Dump *),"output:dump"); memory->grow(every_dump,max_dump,"output:every_dump"); memory->grow(next_dump,max_dump,"output:next_dump"); memory->grow(last_dump,max_dump,"output:last_dump"); var_dump = (char **) memory->srealloc(var_dump,max_dump*sizeof(char *),"output:var_dump"); memory->grow(ivar_dump,max_dump,"output:ivar_dump"); } // create the Dump if (0) return; // dummy line to enable else-if macro expansion #define DUMP_CLASS #define DumpStyle(key,Class) \ else if (strcmp(arg[2],#key) == 0) dump[ndump] = new Class(lmp,narg,arg); #include "style_dump.h" #undef DUMP_CLASS else error->all(FLERR,"Invalid dump style"); every_dump[ndump] = force->inumeric(FLERR,arg[3]); if (every_dump[ndump] <= 0) error->all(FLERR,"Illegal dump command"); last_dump[ndump] = -1; var_dump[ndump] = NULL; ndump++; } /* ---------------------------------------------------------------------- modify parameters of a Dump ------------------------------------------------------------------------- */ void Output::modify_dump(int narg, char **arg) { if (narg < 1) error->all(FLERR,"Illegal dump_modify command"); // find which dump it is int idump; for (idump = 0; idump < ndump; idump++) if (strcmp(arg[0],dump[idump]->id) == 0) break; if (idump == ndump) error->all(FLERR,"Cound not find dump_modify ID"); dump[idump]->modify_params(narg-1,&arg[1]); } /* ---------------------------------------------------------------------- delete a Dump from list of Dumps ------------------------------------------------------------------------- */ void Output::delete_dump(char *id) { // find which dump it is and delete it int idump; for (idump = 0; idump < ndump; idump++) if (strcmp(id,dump[idump]->id) == 0) break; if (idump == ndump) error->all(FLERR,"Could not find undump ID"); delete dump[idump]; delete [] var_dump[idump]; // move other dumps down in list one slot for (int i = idump+1; i < ndump; i++) { dump[i-1] = dump[i]; every_dump[i-1] = every_dump[i]; next_dump[i-1] = next_dump[i]; last_dump[i-1] = last_dump[i]; var_dump[i-1] = var_dump[i]; ivar_dump[i-1] = ivar_dump[i]; } ndump--; } /* ---------------------------------------------------------------------- set thermo output frequency from input script ------------------------------------------------------------------------- */ void Output::set_thermo(int narg, char **arg) { if (narg != 1) error->all(FLERR,"Illegal thermo command"); if (strstr(arg[0],"v_") == arg[0]) { delete [] var_thermo; int n = strlen(&arg[0][2]) + 1; var_thermo = new char[n]; strcpy(var_thermo,&arg[0][2]); } else { thermo_every = force->inumeric(FLERR,arg[0]); if (thermo_every < 0) error->all(FLERR,"Illegal thermo command"); } } /* ---------------------------------------------------------------------- new Thermo style ------------------------------------------------------------------------- */ void Output::create_thermo(int narg, char **arg) { if (narg < 1) error->all(FLERR,"Illegal thermo_style command"); // don't allow this so that dipole style can safely allocate inertia vector if (domain->box_exist == 0) error->all(FLERR,"Thermo_style command before simulation box is defined"); // warn if previous thermo had been modified via thermo_modify command if (thermo->modified && comm->me == 0) error->warning(FLERR,"New thermo_style command, " "previous thermo_modify settings will be lost"); // set thermo = NULL in case new Thermo throws an error delete thermo; thermo = NULL; thermo = new Thermo(lmp,narg,arg); } /* ---------------------------------------------------------------------- setup restart capability for single or double output files if only one filename and it contains no "*", then append ".*" ------------------------------------------------------------------------- */ void Output::create_restart(int narg, char **arg) { if (narg < 1) error->all(FLERR,"Illegal restart command"); int every = 0; int varflag = 0; if (strstr(arg[0],"v_") == arg[0]) varflag = 1; else every = force->inumeric(FLERR,arg[0]); if (!varflag && every == 0) { if (narg != 1) error->all(FLERR,"Illegal restart command"); restart_flag = restart_flag_single = restart_flag_double = 0; last_restart = -1; delete restart; restart = NULL; delete [] restart1; delete [] restart2a; delete [] restart2b; restart1 = restart2a = restart2b = NULL; delete [] var_restart_single; delete [] var_restart_double; var_restart_single = var_restart_double = NULL; return; } if (narg < 2) error->all(FLERR,"Illegal restart command"); int nfile = 0; if (narg % 2 == 0) nfile = 1; else nfile = 2; if (nfile == 1) { restart_flag = restart_flag_single = 1; if (varflag) { delete [] var_restart_single; int n = strlen(&arg[0][2]) + 1; var_restart_single = new char[n]; strcpy(var_restart_single,&arg[0][2]); restart_every_single = 0; } else restart_every_single = every; int n = strlen(arg[1]) + 3; restart1 = new char[n]; strcpy(restart1,arg[1]); if (strchr(restart1,'*') == NULL) strcat(restart1,".*"); } if (nfile == 2) { restart_flag = restart_flag_double = 1; if (varflag) { delete [] var_restart_double; int n = strlen(&arg[0][2]) + 1; var_restart_double = new char[n]; strcpy(var_restart_double,&arg[0][2]); restart_every_double = 0; } else restart_every_double = every; restart_toggle = 0; int n = strlen(arg[1]) + 3; restart2a = new char[n]; strcpy(restart2a,arg[1]); n = strlen(arg[2]) + 1; restart2b = new char[n]; strcpy(restart2b,arg[2]); } // check for multiproc output and an MPI-IO filename // if 2 filenames, must be consistent int multiproc; if (strchr(arg[1],'%')) multiproc = comm->nprocs; else multiproc = 0; if (nfile == 2) { if (multiproc && !strchr(arg[2],'%')) error->all(FLERR,"Both restart files must use % or neither"); if (!multiproc && strchr(arg[2],'%')) error->all(FLERR,"Both restart files must use % or neither"); } int mpiioflag; if (strstr(arg[1],".mpi")) mpiioflag = 1; else mpiioflag = 0; if (nfile == 2) { if (mpiioflag && !strstr(arg[2],".mpi")) error->all(FLERR,"Both restart files must use MPI-IO or neither"); if (!mpiioflag && strstr(arg[2],".mpi")) error->all(FLERR,"Both restart files must use MPI-IO or neither"); } // setup output style and process optional args delete restart; restart = new WriteRestart(lmp); int iarg = nfile+1; restart->multiproc_options(multiproc,mpiioflag,narg-iarg,&arg[iarg]); } /* ---------------------------------------------------------------------- sum and print memory usage result is only memory on proc 0, not averaged across procs ------------------------------------------------------------------------- */ void Output::memory_usage() { bigint bytes = 0; bytes += atom->memory_usage(); bytes += neighbor->memory_usage(); bytes += comm->memory_usage(); bytes += update->memory_usage(); bytes += force->memory_usage(); bytes += modify->memory_usage(); for (int i = 0; i < ndump; i++) bytes += dump[i]->memory_usage(); double mbytes = bytes/1024.0/1024.0; if (comm->me == 0) { if (screen) fprintf(screen,"Memory usage per processor = %g Mbytes\n",mbytes); if (logfile) fprintf(logfile,"Memory usage per processor = %g Mbytes\n",mbytes); } } diff --git a/src/pair_hybrid.cpp b/src/pair_hybrid.cpp index e15d4b00d..15de14db1 100644 --- a/src/pair_hybrid.cpp +++ b/src/pair_hybrid.cpp @@ -1,787 +1,787 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ #include "math.h" #include "stdlib.h" #include "string.h" #include "ctype.h" #include "pair_hybrid.h" #include "atom.h" #include "force.h" #include "pair.h" #include "neighbor.h" #include "neigh_request.h" #include "update.h" #include "comm.h" #include "memory.h" #include "error.h" using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairHybrid::PairHybrid(LAMMPS *lmp) : Pair(lmp) { nstyles = 0; styles = NULL; keywords = NULL; multiple = NULL; outerflag = 0; } /* ---------------------------------------------------------------------- */ PairHybrid::~PairHybrid() { if (nstyles) { for (int m = 0; m < nstyles; m++) delete styles[m]; for (int m = 0; m < nstyles; m++) delete [] keywords[m]; } delete [] styles; delete [] keywords; delete [] multiple; delete [] svector; if (allocated) { memory->destroy(setflag); memory->destroy(cutsq); memory->destroy(cutghost); memory->destroy(nmap); memory->destroy(map); } } /* ---------------------------------------------------------------------- call each sub-style's compute() or compute_outer() function accumulate sub-style global/peratom energy/virial in hybrid for global vflag = 1: each sub-style computes own virial[6] sum sub-style virial[6] to hybrid's virial[6] for global vflag = 2: call sub-style with adjusted vflag to prevent it calling virial_fdotr_compute() hybrid calls virial_fdotr_compute() on final accumulated f ------------------------------------------------------------------------- */ void PairHybrid::compute(int eflag, int vflag) { int i,j,m,n; // if no_virial_fdotr_compute is set and global component of // incoming vflag = 2, then // reset vflag as if global component were 1 // necessary since one or more sub-styles cannot compute virial as F dot r if (no_virial_fdotr_compute && vflag % 4 == 2) vflag = 1 + vflag/4 * 4; if (eflag || vflag) ev_setup(eflag,vflag); else evflag = vflag_fdotr = eflag_global = vflag_global = eflag_atom = vflag_atom = 0; // check if global component of incoming vflag = 2 // if so, reset vflag passed to substyle as if it were 0 // necessary so substyle will not invoke virial_fdotr_compute() int vflag_substyle; if (vflag % 4 == 2) vflag_substyle = vflag/4 * 4; else vflag_substyle = vflag; for (m = 0; m < nstyles; m++) { // invoke compute() unless compute flag is turned off or // outerflag is set and sub-style has a compute_outer() method if (styles[m]->compute_flag == 0) continue; if (outerflag && styles[m]->respa_enable) styles[m]->compute_outer(eflag,vflag_substyle); else styles[m]->compute(eflag,vflag_substyle); if (eflag_global) { eng_vdwl += styles[m]->eng_vdwl; eng_coul += styles[m]->eng_coul; } if (vflag_global) { for (n = 0; n < 6; n++) virial[n] += styles[m]->virial[n]; } if (eflag_atom) { n = atom->nlocal; if (force->newton_pair) n += atom->nghost; double *eatom_substyle = styles[m]->eatom; for (i = 0; i < n; i++) eatom[i] += eatom_substyle[i]; } if (vflag_atom) { n = atom->nlocal; if (force->newton_pair) n += atom->nghost; double **vatom_substyle = styles[m]->vatom; for (i = 0; i < n; i++) for (j = 0; j < 6; j++) vatom[i][j] += vatom_substyle[i][j]; } } if (vflag_fdotr) virial_fdotr_compute(); } /* ---------------------------------------------------------------------- */ void PairHybrid::compute_inner() { for (int m = 0; m < nstyles; m++) if (styles[m]->respa_enable) styles[m]->compute_inner(); } /* ---------------------------------------------------------------------- */ void PairHybrid::compute_middle() { for (int m = 0; m < nstyles; m++) if (styles[m]->respa_enable) styles[m]->compute_middle(); } /* ---------------------------------------------------------------------- */ void PairHybrid::compute_outer(int eflag, int vflag) { outerflag = 1; compute(eflag,vflag); outerflag = 0; } /* ---------------------------------------------------------------------- allocate all arrays ------------------------------------------------------------------------- */ void PairHybrid::allocate() { allocated = 1; int n = atom->ntypes; memory->create(setflag,n+1,n+1,"pair:setflag"); for (int i = 1; i <= n; i++) for (int j = i; j <= n; j++) setflag[i][j] = 0; memory->create(cutsq,n+1,n+1,"pair:cutsq"); memory->create(cutghost,n+1,n+1,"pair:cutghost"); memory->create(nmap,n+1,n+1,"pair:nmap"); memory->create(map,n+1,n+1,nstyles,"pair:map"); for (int i = 1; i <= n; i++) for (int j = i; j <= n; j++) nmap[i][j] = 0; } /* ---------------------------------------------------------------------- create one pair style for each arg in list ------------------------------------------------------------------------- */ void PairHybrid::settings(int narg, char **arg) { if (narg < 1) error->all(FLERR,"Illegal pair_style command"); // delete old lists, since cannot just change settings if (nstyles) { for (int m = 0; m < nstyles; m++) delete styles[m]; delete [] styles; for (int m = 0; m < nstyles; m++) delete [] keywords[m]; delete [] keywords; } if (allocated) { memory->destroy(setflag); memory->destroy(cutsq); memory->destroy(cutghost); memory->destroy(nmap); memory->destroy(map); } allocated = 0; // allocate list of sub-styles as big as possibly needed if no extra args styles = new Pair*[narg]; keywords = new char*[narg]; multiple = new int[narg]; // allocate each sub-style // call settings() with set of args that are not pair style names // use force->pair_map to determine which args these are - int iarg,jarg,dummy; + int iarg,jarg,sflag; iarg = 0; nstyles = 0; while (iarg < narg) { if (strcmp(arg[iarg],"hybrid") == 0) error->all(FLERR,"Pair style hybrid cannot have hybrid as an argument"); if (strcmp(arg[iarg],"none") == 0) error->all(FLERR,"Pair style hybrid cannot have none as an argument"); - styles[nstyles] = force->new_pair(arg[iarg],lmp->suffix,dummy); - int n = strlen(arg[iarg]) + 1; - keywords[nstyles] = new char[n]; - strcpy(keywords[nstyles],arg[iarg]); + + styles[nstyles] = force->new_pair(arg[iarg],1,sflag); + force->store_style(keywords[nstyles],arg[iarg],sflag); + jarg = iarg + 1; while (jarg < narg && !force->pair_map->count(arg[jarg])) jarg++; styles[nstyles]->settings(jarg-iarg-1,&arg[iarg+1]); iarg = jarg; nstyles++; } // multiple[i] = 1 to M if sub-style used multiple times, else 0 for (int i = 0; i < nstyles; i++) { int count = 0; for (int j = 0; j < nstyles; j++) { if (strcmp(keywords[j],keywords[i]) == 0) count++; if (j == i) multiple[i] = count; } if (count == 1) multiple[i] = 0; } // set pair flags from sub-style flags flags(); } /* ---------------------------------------------------------------------- set top-level pair flags from sub-style flags ------------------------------------------------------------------------- */ void PairHybrid::flags() { int m; // set comm_forward, comm_reverse, comm_reverse_off to max of any sub-style for (m = 0; m < nstyles; m++) { if (styles[m]) comm_forward = MAX(comm_forward,styles[m]->comm_forward); if (styles[m]) comm_reverse = MAX(comm_reverse,styles[m]->comm_reverse); if (styles[m]) comm_reverse_off = MAX(comm_reverse_off, styles[m]->comm_reverse_off); } // single_enable = 1 if any sub-style is set // respa_enable = 1 if any sub-style is set // manybody_flag = 1 if any sub-style is set // no_virial_fdotr_compute = 1 if any sub-style is set // ghostneigh = 1 if any sub-style is set // ewaldflag, pppmflag, msmflag, dispersionflag, tip4pflag = 1 // if any sub-style is set // compute_flag = 1 if any sub-style is set single_enable = 0; compute_flag = 0; for (m = 0; m < nstyles; m++) { if (styles[m]->single_enable) single_enable = 1; if (styles[m]->respa_enable) respa_enable = 1; if (styles[m]->manybody_flag) manybody_flag = 1; if (styles[m]->no_virial_fdotr_compute) no_virial_fdotr_compute = 1; if (styles[m]->ghostneigh) ghostneigh = 1; if (styles[m]->ewaldflag) ewaldflag = 1; if (styles[m]->pppmflag) pppmflag = 1; if (styles[m]->msmflag) msmflag = 1; if (styles[m]->dispersionflag) dispersionflag = 1; if (styles[m]->tip4pflag) tip4pflag = 1; if (styles[m]->compute_flag) compute_flag = 1; } // single_extra = min of all sub-style single_extra // allocate svector single_extra = styles[0]->single_extra; for (m = 1; m < nstyles; m++) single_extra = MIN(single_extra,styles[m]->single_extra); if (single_extra) { delete [] svector; svector = new double[single_extra]; } } /* ---------------------------------------------------------------------- set coeffs for one or more type pairs ------------------------------------------------------------------------- */ void PairHybrid::coeff(int narg, char **arg) { if (narg < 3) error->all(FLERR,"Incorrect args for pair coefficients"); if (!allocated) allocate(); int ilo,ihi,jlo,jhi; force->bounds(arg[0],atom->ntypes,ilo,ihi); force->bounds(arg[1],atom->ntypes,jlo,jhi); // 3rd arg = pair sub-style name // 4th arg = pair sub-style index if name used multiple times // allow for "none" as valid sub-style name int multflag; int m; for (m = 0; m < nstyles; m++) { multflag = 0; if (strcmp(arg[2],keywords[m]) == 0) { if (multiple[m]) { multflag = 1; if (narg < 4) error->all(FLERR,"Incorrect args for pair coefficients"); if (!isdigit(arg[3][0])) error->all(FLERR,"Incorrect args for pair coefficients"); int index = force->inumeric(FLERR,arg[3]); if (index == multiple[m]) break; else continue; } else break; } } int none = 0; if (m == nstyles) { if (strcmp(arg[2],"none") == 0) none = 1; else error->all(FLERR,"Pair coeff for hybrid has invalid style"); } // move 1st/2nd args to 2nd/3rd args // if multflag: move 1st/2nd args to 3rd/4th args // just copy ptrs, since arg[] points into original input line arg[2+multflag] = arg[1]; arg[1+multflag] = arg[0]; // invoke sub-style coeff() starting with 1st remaining arg if (!none) styles[m]->coeff(narg-1-multflag,&arg[1+multflag]); // if sub-style only allows one pair coeff call (with * * and type mapping) // then unset setflag/map assigned to that style before setting it below // in case pair coeff for this sub-style is being called for 2nd time if (!none && styles[m]->one_coeff) for (int i = 1; i <= atom->ntypes; i++) for (int j = i; j <= atom->ntypes; j++) if (nmap[i][j] && map[i][j][0] == m) { setflag[i][j] = 0; nmap[i][j] = 0; } // set setflag and which type pairs map to which sub-style // if sub-style is none: set hybrid setflag, wipe out map // else: set hybrid setflag & map only if substyle setflag is set // previous mappings are wiped out int count = 0; for (int i = ilo; i <= ihi; i++) { for (int j = MAX(jlo,i); j <= jhi; j++) { if (none) { setflag[i][j] = 1; nmap[i][j] = 0; count++; } else if (styles[m]->setflag[i][j]) { setflag[i][j] = 1; nmap[i][j] = 1; map[i][j][0] = m; count++; } } } if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients"); } /* ---------------------------------------------------------------------- init specific to this pair style ------------------------------------------------------------------------- */ void PairHybrid::init_style() { int i,m,itype,jtype,used,istyle,skip; // error if a sub-style is not used int ntypes = atom->ntypes; for (istyle = 0; istyle < nstyles; istyle++) { used = 0; for (itype = 1; itype <= ntypes; itype++) for (jtype = itype; jtype <= ntypes; jtype++) for (m = 0; m < nmap[itype][jtype]; m++) if (map[itype][jtype][m] == istyle) used = 1; if (used == 0) error->all(FLERR,"Pair hybrid sub-style is not used"); } // each sub-style makes its neighbor list request(s) for (istyle = 0; istyle < nstyles; istyle++) styles[istyle]->init_style(); // create skip lists for each pair neigh request // any kind of list can have its skip flag set at this stage for (i = 0; i < neighbor->nrequest; i++) { if (!neighbor->requests[i]->pair) continue; // istyle = associated sub-style for (istyle = 0; istyle < nstyles; istyle++) if (styles[istyle] == neighbor->requests[i]->requestor) break; // allocate iskip and ijskip // initialize so as to skip all pair types // set ijskip = 0 if type pair matches any entry in sub-style map // set ijskip = 0 if mixing will assign type pair to this sub-style // will occur if type pair is currently unassigned // and both I,I and J,J are assigned to single sub-style // and sub-style for both I,I and J,J match istyle // set iskip = 1 only if all ijskip for itype are 1 int *iskip = new int[ntypes+1]; int **ijskip; memory->create(ijskip,ntypes+1,ntypes+1,"pair_hybrid:ijskip"); for (itype = 1; itype <= ntypes; itype++) for (jtype = 1; jtype <= ntypes; jtype++) ijskip[itype][jtype] = 1; for (itype = 1; itype <= ntypes; itype++) for (jtype = itype; jtype <= ntypes; jtype++) { for (m = 0; m < nmap[itype][jtype]; m++) if (map[itype][jtype][m] == istyle) ijskip[itype][jtype] = ijskip[jtype][itype] = 0; if (nmap[itype][jtype] == 0 && nmap[itype][itype] == 1 && map[itype][itype][0] == istyle && nmap[jtype][jtype] == 1 && map[jtype][jtype][0] == istyle) ijskip[itype][jtype] = ijskip[jtype][itype] = 0; } for (itype = 1; itype <= ntypes; itype++) { iskip[itype] = 1; for (jtype = 1; jtype <= ntypes; jtype++) if (ijskip[itype][jtype] == 0) iskip[itype] = 0; } // if any skipping occurs // set request->skip and copy iskip and ijskip into request // else delete iskip and ijskip skip = 0; for (itype = 1; itype <= ntypes; itype++) for (jtype = 1; jtype <= ntypes; jtype++) if (ijskip[itype][jtype] == 1) skip = 1; if (skip) { neighbor->requests[i]->skip = 1; neighbor->requests[i]->iskip = iskip; neighbor->requests[i]->ijskip = ijskip; } else { delete [] iskip; memory->destroy(ijskip); } } // combine sub-style neigh list requests and create new ones if needed modify_requests(); } /* ---------------------------------------------------------------------- init for one type pair i,j and corresponding j,i ------------------------------------------------------------------------- */ double PairHybrid::init_one(int i, int j) { // if I,J is not set explicitly: // perform mixing only if I,I sub-style = J,J sub-style // also require I,I and J,J are both assigned to single sub-style if (setflag[i][j] == 0) { if (nmap[i][i] != 1 || nmap[j][j] != 1 || map[i][i][0] != map[j][j][0]) error->one(FLERR,"All pair coeffs are not set"); nmap[i][j] = 1; map[i][j][0] = map[i][i][0]; } // call init/mixing for all sub-styles of I,J // set cutsq in sub-style just as Pair::init() does via call to init_one() // set cutghost for I,J and J,I just as sub-style does // sum tail corrections for I,J // return max cutoff of all sub-styles assigned to I,J // if no sub-styles assigned to I,J (pair_coeff none), cutmax = 0.0 returned double cutmax = 0.0; cutghost[i][j] = cutghost[j][i] = 0.0; if (tail_flag) etail_ij = ptail_ij = 0.0; nmap[j][i] = nmap[i][j]; for (int k = 0; k < nmap[i][j]; k++) { map[j][i][k] = map[i][j][k]; double cut = styles[map[i][j][k]]->init_one(i,j); styles[map[i][j][k]]->cutsq[i][j] = styles[map[i][j][k]]->cutsq[j][i] = cut*cut; if (styles[map[i][j][k]]->ghostneigh) cutghost[i][j] = cutghost[j][i] = MAX(cutghost[i][j],styles[map[i][j][k]]->cutghost[i][j]); if (tail_flag) { etail_ij += styles[map[i][j][k]]->etail_ij; ptail_ij += styles[map[i][j][k]]->ptail_ij; } cutmax = MAX(cutmax,cut); } return cutmax; } /* ---------------------------------------------------------------------- combine sub-style neigh list requests and create new ones if needed ------------------------------------------------------------------------- */ void PairHybrid::modify_requests() { int i,j; NeighRequest *irq,*jrq; // loop over pair requests only // if list is skip list and not copy, look for non-skip list of same kind // if one exists, point at that one via otherlist // else make new non-skip request of same kind and point at that one // don't bother to set ID for new request, since pair hybrid ignores list // only exception is half_from_full: // ignore it, turn off skip, since it will derive from its skip parent // after possible new request creation, unset skip flag and otherlist // for these derived lists: granhistory, rRESPA inner/middle // this prevents neighbor from treating them as skip lists // copy list check is for pair style = hybrid/overlay // which invokes this routine for (i = 0; i < neighbor->nrequest; i++) { if (!neighbor->requests[i]->pair) continue; irq = neighbor->requests[i]; if (irq->skip == 0 || irq->copy) continue; if (irq->half_from_full) { irq->skip = 0; continue; } for (j = 0; j < neighbor->nrequest; j++) { if (!neighbor->requests[j]->pair) continue; jrq = neighbor->requests[j]; if (irq->same_kind(jrq) && jrq->skip == 0) break; } if (j < neighbor->nrequest) irq->otherlist = j; else { int newrequest = neighbor->request(this); neighbor->requests[newrequest]->copy_request(irq); irq->otherlist = newrequest; } if (irq->granhistory || irq->respainner || irq->respamiddle) { irq->skip = 0; irq->otherlist = -1; } } } /* ---------------------------------------------------------------------- proc 0 writes to restart file ------------------------------------------------------------------------- */ void PairHybrid::write_restart(FILE *fp) { fwrite(&nstyles,sizeof(int),1,fp); // each sub-style writes its settings, but no coeff info int n; for (int m = 0; m < nstyles; m++) { n = strlen(keywords[m]) + 1; fwrite(&n,sizeof(int),1,fp); fwrite(keywords[m],sizeof(char),n,fp); styles[m]->write_restart_settings(fp); } } /* ---------------------------------------------------------------------- proc 0 reads from restart file, bcasts ------------------------------------------------------------------------- */ void PairHybrid::read_restart(FILE *fp) { int me = comm->me; if (me == 0) fread(&nstyles,sizeof(int),1,fp); MPI_Bcast(&nstyles,1,MPI_INT,0,world); // allocate list of sub-styles styles = new Pair*[nstyles]; keywords = new char*[nstyles]; multiple = new int[nstyles]; // each sub-style is created via new_pair() // each reads its settings, but no coeff info int n,dummy; for (int m = 0; m < nstyles; m++) { if (me == 0) fread(&n,sizeof(int),1,fp); MPI_Bcast(&n,1,MPI_INT,0,world); keywords[m] = new char[n]; if (me == 0) fread(keywords[m],sizeof(char),n,fp); MPI_Bcast(keywords[m],n,MPI_CHAR,0,world); - styles[m] = force->new_pair(keywords[m],lmp->suffix,dummy); + styles[m] = force->new_pair(keywords[m],0,dummy); styles[m]->read_restart_settings(fp); } // multiple[i] = 1 to M if sub-style used multiple times, else 0 for (int i = 0; i < nstyles; i++) { int count = 0; for (int j = 0; j < nstyles; j++) { if (strcmp(keywords[j],keywords[i]) == 0) count++; if (j == i) multiple[i] = count; } if (count == 1) multiple[i] = 0; } // set pair flags from sub-style flags flags(); } /* ---------------------------------------------------------------------- call sub-style to compute single interaction error if sub-style does not support single() call since overlay could have multiple sub-styles, sum results explicitly ------------------------------------------------------------------------- */ double PairHybrid::single(int i, int j, int itype, int jtype, double rsq, double factor_coul, double factor_lj, double &fforce) { if (nmap[itype][jtype] == 0) error->one(FLERR,"Invoked pair single on pair style none"); double fone; fforce = 0.0; double esum = 0.0; for (int m = 0; m < nmap[itype][jtype]; m++) { if (rsq < styles[map[itype][jtype][m]]->cutsq[itype][jtype]) { if (styles[map[itype][jtype][m]]->single_enable == 0) error->one(FLERR,"Pair hybrid sub-style does not support single call"); esum += styles[map[itype][jtype][m]]-> single(i,j,itype,jtype,rsq,factor_coul,factor_lj,fone); fforce += fone; // copy substyle extra values into hybrid's svector if (single_extra && styles[map[itype][jtype][m]]->single_extra) for (m = 0; m < single_extra; m++) svector[m] = styles[map[itype][jtype][m]]->svector[m]; } } return esum; } /* ---------------------------------------------------------------------- modify parameters of the pair style if 1st keyword is pair, then applies to one sub-style else pass command args to every sub-style of hybrid ------------------------------------------------------------------------- */ void PairHybrid::modify_params(int narg, char **arg) { if (narg == 0) error->all(FLERR,"Illegal pair_modify command"); if (strcmp(arg[0],"pair") == 0) { if (narg < 2) error->all(FLERR,"Illegal pair_modify command"); int m; for (m = 0; m < nstyles; m++) if (strcmp(arg[1],keywords[m]) == 0) break; if (m == nstyles) error->all(FLERR,"Unknown pair_modify hybrid sub-style"); if (multiple[m] == 0) styles[m]->modify_params(narg-2,&arg[2]); else { if (narg < 3) error->all(FLERR,"Illegal pair_modify command"); int multiflag = force->inumeric(FLERR,arg[2]); for (m = 0; m < nstyles; m++) if (strcmp(arg[1],keywords[m]) == 0 && multiflag == multiple[m]) break; if (m == nstyles) error->all(FLERR,"Unknown pair_modify hybrid sub-style"); styles[m]->modify_params(narg-3,&arg[3]); } } else for (int m = 0; m < nstyles; m++) styles[m]->modify_params(narg,arg); } /* ---------------------------------------------------------------------- extract a ptr to a particular quantity stored by pair pass request thru to sub-styles return first non-NULL result except for cut_coul request for cut_coul, insure all non-NULL results are equal since required by Kspace ------------------------------------------------------------------------- */ void *PairHybrid::extract(const char *str, int &dim) { void *cutptr = NULL; void *ptr; double cutvalue; for (int m = 0; m < nstyles; m++) { ptr = styles[m]->extract(str,dim); if (ptr && strcmp(str,"cut_coul") == 0) { double *p_newvalue = (double *) ptr; double newvalue = *p_newvalue; if (cutptr && newvalue != cutvalue) error->all(FLERR, "Coulomb cutoffs of pair hybrid sub-styles do not match"); cutptr = ptr; cutvalue = newvalue; } else if (ptr) return ptr; } if (strcmp(str,"cut_coul") == 0) return cutptr; return NULL; } /* ---------------------------------------------------------------------- */ void PairHybrid::reset_dt() { for (int m = 0; m < nstyles; m++) styles[m]->reset_dt(); } /* ---------------------------------------------------------------------- check if itype,jtype maps to sub-style ------------------------------------------------------------------------- */ int PairHybrid::check_ijtype(int itype, int jtype, char *substyle) { for (int m = 0; m < nmap[itype][jtype]; m++) if (strcmp(keywords[map[itype][jtype][m]],substyle) == 0) return 1; return 0; } /* ---------------------------------------------------------------------- memory usage of each sub-style ------------------------------------------------------------------------- */ double PairHybrid::memory_usage() { double bytes = maxeatom * sizeof(double); bytes += maxvatom*6 * sizeof(double); for (int m = 0; m < nstyles; m++) bytes += styles[m]->memory_usage(); return bytes; } diff --git a/src/read_restart.cpp b/src/read_restart.cpp index 749bff7f3..441ecbfcd 100644 --- a/src/read_restart.cpp +++ b/src/read_restart.cpp @@ -1,1149 +1,1149 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ #include "lmptype.h" #include "mpi.h" #include "string.h" #include "stdlib.h" #include "dirent.h" #include "read_restart.h" #include "atom.h" #include "atom_vec.h" #include "domain.h" #include "comm.h" #include "irregular.h" #include "update.h" #include "modify.h" #include "fix.h" #include "fix_read_restart.h" #include "group.h" #include "force.h" #include "pair.h" #include "bond.h" #include "angle.h" #include "dihedral.h" #include "improper.h" #include "special.h" #include "universe.h" #include "mpiio.h" #include "memory.h" #include "error.h" using namespace LAMMPS_NS; // same as write_restart.cpp #define MAGIC_STRING "LammpS RestartT" #define ENDIAN 0x0001 #define ENDIANSWAP 0x1000 #define VERSION_NUMERIC 0 enum{VERSION,SMALLINT,TAGINT,BIGINT, UNITS,NTIMESTEP,DIMENSION,NPROCS,PROCGRID, NEWTON_PAIR,NEWTON_BOND, XPERIODIC,YPERIODIC,ZPERIODIC,BOUNDARY, ATOM_STYLE,NATOMS,NTYPES, NBONDS,NBONDTYPES,BOND_PER_ATOM, NANGLES,NANGLETYPES,ANGLE_PER_ATOM, NDIHEDRALS,NDIHEDRALTYPES,DIHEDRAL_PER_ATOM, NIMPROPERS,NIMPROPERTYPES,IMPROPER_PER_ATOM, TRICLINIC,BOXLO,BOXHI,XY,XZ,YZ, SPECIAL_LJ,SPECIAL_COUL, MASS,PAIR,BOND,ANGLE,DIHEDRAL,IMPROPER, MULTIPROC,MPIIO,PROCSPERFILE,PERPROC, IMAGEINT}; #define LB_FACTOR 1.1 /* ---------------------------------------------------------------------- */ ReadRestart::ReadRestart(LAMMPS *lmp) : Pointers(lmp) {} /* ---------------------------------------------------------------------- */ void ReadRestart::command(int narg, char **arg) { if (narg != 1 && narg != 2) error->all(FLERR,"Illegal read_restart command"); if (domain->box_exist) error->all(FLERR,"Cannot read_restart after simulation box is defined"); MPI_Comm_rank(world,&me); MPI_Comm_size(world,&nprocs); // check for remap option int remapflag = 0; if (narg == 2) { if (strcmp(arg[1],"remap") == 0) remapflag = 1; else error->all(FLERR,"Illegal read_restart command"); } // if filename contains "*", search dir for latest restart file char *file = new char[strlen(arg[0]) + 16]; if (strchr(arg[0],'*')) { int n; if (me == 0) { file_search(arg[0],file); n = strlen(file) + 1; } MPI_Bcast(&n,1,MPI_INT,0,world); MPI_Bcast(file,n,MPI_CHAR,0,world); } else strcpy(file,arg[0]); // check for multiproc files and an MPI-IO filename if (strchr(arg[0],'%')) multiproc = 1; else multiproc = 0; if (strstr(arg[0],".mpiio")) mpiioflag = 1; else mpiioflag = 0; if (multiproc && mpiioflag) error->all(FLERR, "Read restart MPI-IO input not allowed with % in filename"); if (mpiioflag) { mpiio = new RestartMPIIO(lmp); if (!mpiio->mpiio_exists) error->all(FLERR,"Reading from MPI-IO filename when " "MPIIO package is not installed"); } // open single restart file or base file for multiproc case if (me == 0) { if (screen) fprintf(screen,"Reading restart file ...\n"); char *hfile; if (multiproc) { hfile = new char[strlen(file) + 16]; char *ptr = strchr(file,'%'); *ptr = '\0'; sprintf(hfile,"%s%s%s",file,"base",ptr+1); *ptr = '%'; } else hfile = file; fp = fopen(hfile,"rb"); if (fp == NULL) { char str[128]; sprintf(str,"Cannot open restart file %s",hfile); error->one(FLERR,str); } if (multiproc) delete [] hfile; } // read magic string, endian flag, numeric version magic_string(); endian(); int incompatible = version_numeric(); // read header info which creates simulation box header(incompatible); domain->box_exist = 1; // problem setup using info from header int n; if (nprocs == 1) n = static_cast (atom->natoms); else n = static_cast (LB_FACTOR * atom->natoms / nprocs); atom->allocate_type_arrays(); atom->deallocate_topology(); atom->avec->grow(n); n = atom->nmax; domain->print_box(" "); domain->set_initial_box(0); domain->set_global_box(); comm->set_proc_grid(); domain->set_local_box(); // read groups, ntype-length arrays, force field, fix info from file // nextra = max # of extra quantities stored with each atom group->read_restart(fp); type_arrays(); force_fields(); int nextra = modify->read_restart(fp); atom->nextra_store = nextra; memory->create(atom->extra,n,nextra,"atom:extra"); // read file layout info file_layout(); // close header file if in multiproc mode if (multiproc && me == 0) fclose(fp); // read per-proc info AtomVec *avec = atom->avec; int maxbuf = 0; double *buf = NULL; int m,flag; // MPI-IO input from single file if (mpiioflag) { mpiio->openForRead(file); memory->create(buf,assignedChunkSize,"read_restart:buf"); mpiio->read((headerOffset+assignedChunkOffset),assignedChunkSize,buf); mpiio->close(); m = 0; while (m < assignedChunkSize) m += avec->unpack_restart(&buf[m]); } // input of single native file // nprocs_file = # of chunks in file // proc 0 reads a chunk and bcasts it to other procs // each proc unpacks the atoms, saving ones in it's sub-domain // if remapflag set, remap the atom to box before checking sub-domain // check for atom in sub-domain differs for orthogonal vs triclinic box else if (multiproc == 0) { int triclinic = domain->triclinic; imageint *iptr; double *x,lamda[3]; double *coord,*sublo,*subhi; if (triclinic == 0) { sublo = domain->sublo; subhi = domain->subhi; } else { sublo = domain->sublo_lamda; subhi = domain->subhi_lamda; } for (int iproc = 0; iproc < nprocs_file; iproc++) { if (read_int() != PERPROC) error->all(FLERR,"Invalid flag in peratom section of restart file"); n = read_int(); if (n > maxbuf) { maxbuf = n; memory->destroy(buf); memory->create(buf,maxbuf,"read_restart:buf"); } read_double_vec(n,buf); m = 0; while (m < n) { x = &buf[m+1]; if (remapflag) { iptr = (imageint *) &buf[m+7]; domain->remap(x,*iptr); } if (triclinic) { domain->x2lamda(x,lamda); coord = lamda; } else coord = x; if (coord[0] >= sublo[0] && coord[0] < subhi[0] && coord[1] >= sublo[1] && coord[1] < subhi[1] && coord[2] >= sublo[2] && coord[2] < subhi[2]) { m += avec->unpack_restart(&buf[m]); } else m += static_cast (buf[m]); } } if (me == 0) fclose(fp); } // input of multiple native files with procs <= files // # of files = multiproc_file // each proc reads a subset of files, striding by nprocs // each proc keeps all atoms in all perproc chunks in its files else if (nprocs <= multiproc_file) { char *procfile = new char[strlen(file) + 16]; char *ptr = strchr(file,'%'); for (int iproc = me; iproc < multiproc_file; iproc += nprocs) { *ptr = '\0'; sprintf(procfile,"%s%d%s",file,iproc,ptr+1); *ptr = '%'; fp = fopen(procfile,"rb"); if (fp == NULL) { char str[128]; sprintf(str,"Cannot open restart file %s",procfile); error->one(FLERR,str); } fread(&flag,sizeof(int),1,fp); if (flag != PROCSPERFILE) error->one(FLERR,"Invalid flag in peratom section of restart file"); int procsperfile; fread(&procsperfile,sizeof(int),1,fp); for (int i = 0; i < procsperfile; i++) { fread(&flag,sizeof(int),1,fp); if (flag != PERPROC) error->one(FLERR,"Invalid flag in peratom section of restart file"); fread(&n,sizeof(int),1,fp); if (n > maxbuf) { maxbuf = n; memory->destroy(buf); memory->create(buf,maxbuf,"read_restart:buf"); } fread(buf,sizeof(double),n,fp); m = 0; while (m < n) m += avec->unpack_restart(&buf[m]); } fclose(fp); } delete [] procfile; } // input of multiple native files with procs > files // # of files = multiproc_file // cluster procs based on # of files // 1st proc in each cluster reads per-proc chunks from file // sends chunks round-robin to other procs in its cluster // each proc keeps all atoms in its perproc chunks in file else { // nclusterprocs = # of procs in my cluster that read from one file // filewriter = 1 if this proc reads file, else 0 // fileproc = ID of proc in my cluster who reads from file // clustercomm = MPI communicator within my cluster of procs int nfile = multiproc_file; int icluster = static_cast ((bigint) me * nfile/nprocs); int fileproc = static_cast ((bigint) icluster * nprocs/nfile); int fcluster = static_cast ((bigint) fileproc * nfile/nprocs); if (fcluster < icluster) fileproc++; int fileprocnext = static_cast ((bigint) (icluster+1) * nprocs/nfile); fcluster = static_cast ((bigint) fileprocnext * nfile/nprocs); if (fcluster < icluster+1) fileprocnext++; int nclusterprocs = fileprocnext - fileproc; int filereader = 0; if (me == fileproc) filereader = 1; MPI_Comm clustercomm; MPI_Comm_split(world,icluster,0,&clustercomm); if (filereader) { char *procfile = new char[strlen(file) + 16]; char *ptr = strchr(file,'%'); *ptr = '\0'; sprintf(procfile,"%s%d%s",file,icluster,ptr+1); *ptr = '%'; fp = fopen(procfile,"rb"); if (fp == NULL) { char str[128]; sprintf(str,"Cannot open restart file %s",procfile); error->one(FLERR,str); } delete [] procfile; } int flag,procsperfile; if (filereader) { fread(&flag,sizeof(int),1,fp); if (flag != PROCSPERFILE) error->one(FLERR,"Invalid flag in peratom section of restart file"); fread(&procsperfile,sizeof(int),1,fp); } MPI_Bcast(&procsperfile,1,MPI_INT,0,clustercomm); int tmp,iproc; MPI_Status status; MPI_Request request; for (int i = 0; i < procsperfile; i++) { if (filereader) { fread(&flag,sizeof(int),1,fp); if (flag != PERPROC) error->one(FLERR,"Invalid flag in peratom section of restart file"); fread(&n,sizeof(int),1,fp); if (n > maxbuf) { maxbuf = n; memory->destroy(buf); memory->create(buf,maxbuf,"read_restart:buf"); } fread(buf,sizeof(double),n,fp); if (i % nclusterprocs) { iproc = me + (i % nclusterprocs); MPI_Send(&n,1,MPI_INT,iproc,0,world); MPI_Recv(&tmp,0,MPI_INT,iproc,0,world,&status); MPI_Rsend(buf,n,MPI_DOUBLE,iproc,0,world); } } else if (i % nclusterprocs == me - fileproc) { MPI_Recv(&n,1,MPI_INT,fileproc,0,world,&status); if (n > maxbuf) { maxbuf = n; memory->destroy(buf); memory->create(buf,maxbuf,"read_restart:buf"); } MPI_Irecv(buf,n,MPI_DOUBLE,fileproc,0,world,&request); MPI_Send(&tmp,0,MPI_INT,fileproc,0,world); MPI_Wait(&request,&status); } if (i % nclusterprocs == me - fileproc) { m = 0; while (m < n) m += avec->unpack_restart(&buf[m]); } } if (filereader) fclose(fp); MPI_Comm_free(&clustercomm); } // clean-up memory delete [] file; memory->destroy(buf); // for multiproc or MPI-IO files: // perform irregular comm to migrate atoms to correct procs if (multiproc || mpiioflag) { // if remapflag set, remap all atoms I read back to box before migrating if (remapflag) { double **x = atom->x; imageint *image = atom->image; int nlocal = atom->nlocal; for (int i = 0; i < nlocal; i++) domain->remap(x[i],image[i]); } // create a temporary fix to hold and migrate extra atom info // necessary b/c irregular will migrate atoms if (nextra) { char cextra[8],fixextra[8]; sprintf(cextra,"%d",nextra); sprintf(fixextra,"%d",modify->nfix_restart_peratom); char **newarg = new char*[5]; newarg[0] = (char *) "_read_restart"; newarg[1] = (char *) "all"; newarg[2] = (char *) "READ_RESTART"; newarg[3] = cextra; newarg[4] = fixextra; modify->add_fix(5,newarg); delete [] newarg; } // move atoms to new processors via irregular() // turn sorting on in migrate_atoms() to avoid non-reproducible restarts // in case read by different proc than wrote restart file // first do map_init() since irregular->migrate_atoms() will do map_clear() if (atom->map_style) atom->map_init(); if (domain->triclinic) domain->x2lamda(atom->nlocal); Irregular *irregular = new Irregular(lmp); irregular->migrate_atoms(1); delete irregular; if (domain->triclinic) domain->lamda2x(atom->nlocal); // put extra atom info held by fix back into atom->extra // destroy temporary fix if (nextra) { memory->destroy(atom->extra); memory->create(atom->extra,atom->nmax,nextra,"atom:extra"); int ifix = modify->find_fix("_read_restart"); FixReadRestart *fix = (FixReadRestart *) modify->fix[ifix]; int *count = fix->count; double **extra = fix->extra; double **atom_extra = atom->extra; int nlocal = atom->nlocal; for (int i = 0; i < nlocal; i++) for (int j = 0; j < count[i]; j++) atom_extra[i][j] = extra[i][j]; modify->delete_fix("_read_restart"); } } // check that all atoms were assigned to procs bigint natoms; bigint nblocal = atom->nlocal; MPI_Allreduce(&nblocal,&natoms,1,MPI_LMP_BIGINT,MPI_SUM,world); if (me == 0) { if (screen) fprintf(screen," " BIGINT_FORMAT " atoms\n",natoms); if (logfile) fprintf(logfile," " BIGINT_FORMAT " atoms\n",natoms); } if (natoms != atom->natoms) error->all(FLERR,"Did not assign all restart atoms correctly"); if (me == 0) { if (atom->nbonds) { if (screen) fprintf(screen," " BIGINT_FORMAT " bonds\n",atom->nbonds); if (logfile) fprintf(logfile," " BIGINT_FORMAT " bonds\n",atom->nbonds); } if (atom->nangles) { if (screen) fprintf(screen," " BIGINT_FORMAT " angles\n", atom->nangles); if (logfile) fprintf(logfile," " BIGINT_FORMAT " angles\n", atom->nangles); } if (atom->ndihedrals) { if (screen) fprintf(screen," " BIGINT_FORMAT " dihedrals\n", atom->ndihedrals); if (logfile) fprintf(logfile," " BIGINT_FORMAT " dihedrals\n", atom->ndihedrals); } if (atom->nimpropers) { if (screen) fprintf(screen," " BIGINT_FORMAT " impropers\n", atom->nimpropers); if (logfile) fprintf(logfile," " BIGINT_FORMAT " impropers\n", atom->nimpropers); } } // check that atom IDs are valid atom->tag_check(); // create global mapping of atoms if (atom->map_style) { atom->map_init(); atom->map_set(); } // create special bond lists for molecular systems if (atom->molecular == 1) { Special special(lmp); special.build(); } } /* ---------------------------------------------------------------------- infile contains a "*" search for all files which match the infile pattern replace "*" with latest timestep value to create outfile name search dir referenced by initial pathname of file if infile also contains "%", use "base" when searching directory only called by proc 0 ------------------------------------------------------------------------- */ void ReadRestart::file_search(char *infile, char *outfile) { char *ptr; // separate infile into dir + filename char *dirname = new char[strlen(infile) + 1]; char *filename = new char[strlen(infile) + 1]; if (strchr(infile,'/')) { ptr = strrchr(infile,'/'); *ptr = '\0'; strcpy(dirname,infile); strcpy(filename,ptr+1); *ptr = '/'; } else { strcpy(dirname,"./"); strcpy(filename,infile); } // if filename contains "%" replace "%" with "base" char *pattern = new char[strlen(filename) + 16]; if ((ptr = strchr(filename,'%'))) { *ptr = '\0'; sprintf(pattern,"%s%s%s",filename,"base",ptr+1); *ptr = '%'; } else strcpy(pattern,filename); // scan all files in directory, searching for files that match pattern // maxnum = largest int that matches "*" int n = strlen(pattern) + 16; char *begin = new char[n]; char *middle = new char[n]; char *end = new char[n]; ptr = strchr(pattern,'*'); *ptr = '\0'; strcpy(begin,pattern); strcpy(end,ptr+1); int nbegin = strlen(begin); bigint maxnum = -1; struct dirent *ep; DIR *dp = opendir(dirname); if (dp == NULL) error->one(FLERR,"Cannot open dir to search for restart file"); while ((ep = readdir(dp))) { if (strstr(ep->d_name,begin) != ep->d_name) continue; if ((ptr = strstr(&ep->d_name[nbegin],end)) == NULL) continue; if (strlen(end) == 0) ptr = ep->d_name + strlen(ep->d_name); *ptr = '\0'; if (strlen(&ep->d_name[nbegin]) < n) { strcpy(middle,&ep->d_name[nbegin]); if (ATOBIGINT(middle) > maxnum) maxnum = ATOBIGINT(middle); } } closedir(dp); if (maxnum < 0) error->one(FLERR,"Found no restart file matching pattern"); // create outfile with maxint substituted for "*" // use original infile, not pattern, since need to retain "%" in filename ptr = strchr(infile,'*'); *ptr = '\0'; sprintf(outfile,"%s" BIGINT_FORMAT "%s",infile,maxnum,ptr+1); *ptr = '*'; // clean up delete [] dirname; delete [] filename; delete [] pattern; delete [] begin; delete [] middle; delete [] end; } /* ---------------------------------------------------------------------- read header of restart file ------------------------------------------------------------------------- */ void ReadRestart::header(int incompatible) { int xperiodic,yperiodic,zperiodic; // read flags and fields until flag = -1 int flag = read_int(); while (flag >= 0) { // check restart file version, warn if different if (flag == VERSION) { char *version = read_string(); if (me == 0) { if (screen) fprintf(screen," restart file = %s, LAMMPS = %s\n", version,universe->version); } if (incompatible) error->all(FLERR,"Restart file incompatible with current version"); delete [] version; // check lmptype.h sizes, error if different } else if (flag == SMALLINT) { int size = read_int(); if (size != sizeof(smallint)) error->all(FLERR,"Smallint setting in lmptype.h is not compatible"); } else if (flag == IMAGEINT) { int size = read_int(); if (size != sizeof(imageint)) error->all(FLERR,"Imageint setting in lmptype.h is not compatible"); } else if (flag == TAGINT) { int size = read_int(); if (size != sizeof(tagint)) error->all(FLERR,"Tagint setting in lmptype.h is not compatible"); } else if (flag == BIGINT) { int size = read_int(); if (size != sizeof(bigint)) error->all(FLERR,"Bigint setting in lmptype.h is not compatible"); // reset unit_style only if different // so that timestep,neighbor-skin are not changed } else if (flag == UNITS) { char *style = read_string(); if (strcmp(style,update->unit_style) != 0) update->set_units(style); delete [] style; } else if (flag == NTIMESTEP) { update->ntimestep = read_bigint(); // set dimension from restart file } else if (flag == DIMENSION) { int dimension = read_int(); domain->dimension = dimension; if (domain->dimension == 2 && domain->zperiodic == 0) error->all(FLERR, "Cannot run 2d simulation with nonperiodic Z dimension"); // read nprocs from restart file, warn if different } else if (flag == NPROCS) { nprocs_file = read_int(); if (nprocs_file != comm->nprocs && me == 0) error->warning(FLERR,"Restart file used different # of processors"); // don't set procgrid, warn if different } else if (flag == PROCGRID) { int procgrid[3]; read_int(); read_int_vec(3,procgrid); if (comm->user_procgrid[0] != 0 && (procgrid[0] != comm->user_procgrid[0] || procgrid[1] != comm->user_procgrid[1] || procgrid[2] != comm->user_procgrid[2]) && me == 0) error->warning(FLERR,"Restart file used different 3d processor grid"); // don't set newton_pair, leave input script value unchanged // set newton_bond from restart file // warn if different and input script settings are not default } else if (flag == NEWTON_PAIR) { int newton_pair_file = read_int(); if (force->newton_pair != 1) { if (newton_pair_file != force->newton_pair && me == 0) error->warning(FLERR, "Restart file used different newton pair setting, " "using input script value"); } } else if (flag == NEWTON_BOND) { int newton_bond_file = read_int(); if (force->newton_bond != 1) { if (newton_bond_file != force->newton_bond && me == 0) error->warning(FLERR, "Restart file used different newton bond setting, " "using restart file value"); } force->newton_bond = newton_bond_file; if (force->newton_pair || force->newton_bond) force->newton = 1; else force->newton = 0; // set boundary settings from restart file // warn if different and input script settings are not default } else if (flag == XPERIODIC) { xperiodic = read_int(); } else if (flag == YPERIODIC) { yperiodic = read_int(); } else if (flag == ZPERIODIC) { zperiodic = read_int(); } else if (flag == BOUNDARY) { int boundary[3][2]; read_int(); read_int_vec(6,&boundary[0][0]); if (domain->boundary[0][0] || domain->boundary[0][1] || domain->boundary[1][0] || domain->boundary[1][1] || domain->boundary[2][0] || domain->boundary[2][1]) { if (boundary[0][0] != domain->boundary[0][0] || boundary[0][1] != domain->boundary[0][1] || boundary[1][0] != domain->boundary[1][0] || boundary[1][1] != domain->boundary[1][1] || boundary[2][0] != domain->boundary[2][0] || boundary[2][1] != domain->boundary[2][1]) { if (me == 0) error->warning(FLERR, "Restart file used different boundary settings, " "using restart file values"); } } domain->boundary[0][0] = boundary[0][0]; domain->boundary[0][1] = boundary[0][1]; domain->boundary[1][0] = boundary[1][0]; domain->boundary[1][1] = boundary[1][1]; domain->boundary[2][0] = boundary[2][0]; domain->boundary[2][1] = boundary[2][1]; domain->periodicity[0] = domain->xperiodic = xperiodic; domain->periodicity[1] = domain->yperiodic = yperiodic; domain->periodicity[2] = domain->zperiodic = zperiodic; domain->nonperiodic = 0; if (xperiodic == 0 || yperiodic == 0 || zperiodic == 0) { domain->nonperiodic = 1; if (boundary[0][0] >= 2 || boundary[0][1] >= 2 || boundary[1][0] >= 2 || boundary[1][1] >= 2 || boundary[2][0] >= 2 || boundary[2][1] >= 2) domain->nonperiodic = 2; } // create new AtomVec class using any stored args } else if (flag == ATOM_STYLE) { char *style = read_string(); int nargcopy = read_int(); char **argcopy = new char*[nargcopy]; for (int i = 0; i < nargcopy; i++) argcopy[i] = read_string(); - atom->create_avec(style,nargcopy,argcopy); + atom->create_avec(style,nargcopy,argcopy,0); for (int i = 0; i < nargcopy; i++) delete [] argcopy[i]; delete [] argcopy; delete [] style; } else if (flag == NATOMS) { atom->natoms = read_bigint(); } else if (flag == NTYPES) { atom->ntypes = read_int(); } else if (flag == NBONDS) { atom->nbonds = read_bigint(); } else if (flag == NBONDTYPES) { atom->nbondtypes = read_int(); } else if (flag == BOND_PER_ATOM) { atom->bond_per_atom = read_int(); } else if (flag == NANGLES) { atom->nangles = read_bigint(); } else if (flag == NANGLETYPES) { atom->nangletypes = read_int(); } else if (flag == ANGLE_PER_ATOM) { atom->angle_per_atom = read_int(); } else if (flag == NDIHEDRALS) { atom->ndihedrals = read_bigint(); } else if (flag == NDIHEDRALTYPES) { atom->ndihedraltypes = read_int(); } else if (flag == DIHEDRAL_PER_ATOM) { atom->dihedral_per_atom = read_int(); } else if (flag == NIMPROPERS) { atom->nimpropers = read_bigint(); } else if (flag == NIMPROPERTYPES) { atom->nimpropertypes = read_int(); } else if (flag == IMPROPER_PER_ATOM) { atom->improper_per_atom = read_int(); } else if (flag == TRICLINIC) { domain->triclinic = read_int(); } else if (flag == BOXLO) { read_int(); read_double_vec(3,domain->boxlo); } else if (flag == BOXHI) { read_int(); read_double_vec(3,domain->boxhi); } else if (flag == XY) { domain->xy = read_double(); } else if (flag == XZ) { domain->xz = read_double(); } else if (flag == YZ) { domain->yz = read_double(); } else if (flag == SPECIAL_LJ) { read_int(); read_double_vec(3,&force->special_lj[1]); } else if (flag == SPECIAL_COUL) { read_int(); read_double_vec(3,&force->special_coul[1]); } else error->all(FLERR,"Invalid flag in header section of restart file"); flag = read_int(); } } /* ---------------------------------------------------------------------- */ void ReadRestart::type_arrays() { int flag = read_int(); while (flag >= 0) { if (flag == MASS) { read_int(); double *mass = new double[atom->ntypes+1]; read_double_vec(atom->ntypes,&mass[1]); atom->set_mass(mass); delete [] mass; } else error->all(FLERR, "Invalid flag in type arrays section of restart file"); flag = read_int(); } } /* ---------------------------------------------------------------------- */ void ReadRestart::force_fields() { char *style; int flag = read_int(); while (flag >= 0) { if (flag == PAIR) { style = read_string(); - force->create_pair(style); + force->create_pair(style,0); delete [] style; force->pair->read_restart(fp); } else if (flag == BOND) { style = read_string(); - force->create_bond(style); + force->create_bond(style,0); delete [] style; force->bond->read_restart(fp); } else if (flag == ANGLE) { style = read_string(); - force->create_angle(style); + force->create_angle(style,0); delete [] style; force->angle->read_restart(fp); } else if (flag == DIHEDRAL) { style = read_string(); - force->create_dihedral(style); + force->create_dihedral(style,0); delete [] style; force->dihedral->read_restart(fp); } else if (flag == IMPROPER) { style = read_string(); - force->create_improper(style); + force->create_improper(style,0); delete [] style; force->improper->read_restart(fp); } else error->all(FLERR, "Invalid flag in force field section of restart file"); flag = read_int(); } } /* ---------------------------------------------------------------------- */ void ReadRestart::file_layout() { int flag = read_int(); while (flag >= 0) { if (flag == MULTIPROC) { multiproc_file = read_int(); if (multiproc == 0 && multiproc_file) error->all(FLERR,"Restart file is not a multi-proc file"); if (multiproc && multiproc_file == 0) error->all(FLERR,"Restart file is a multi-proc file"); } else if (flag == MPIIO) { int mpiioflag_file = read_int(); if (mpiioflag == 0 && mpiioflag_file) error->all(FLERR,"Restart file is a MPI-IO file"); if (mpiioflag && mpiioflag_file == 0) error->all(FLERR,"Restart file is not a MPI-IO file"); if (mpiioflag) { bigint *nproc_chunk_offsets; memory->create(nproc_chunk_offsets,nprocs, "write_restart:nproc_chunk_offsets"); bigint *nproc_chunk_sizes; memory->create(nproc_chunk_sizes,nprocs, "write_restart:nproc_chunk_sizes"); // on rank 0 read in the chunk sizes that were written out // then consolidate them and compute offsets relative to the // end of the header info to fit the current partition size // if the number of ranks that did the writing is different if (me == 0) { int *all_written_send_sizes; memory->create(all_written_send_sizes,nprocs_file, "write_restart:all_written_send_sizes"); int *nproc_chunk_number; memory->create(nproc_chunk_number,nprocs, "write_restart:nproc_chunk_number"); fread(all_written_send_sizes,sizeof(int),nprocs_file,fp); int init_chunk_number = nprocs_file/nprocs; int num_extra_chunks = nprocs_file - (nprocs*init_chunk_number); for (int i = 0; i < nprocs; i++) { if (i < num_extra_chunks) nproc_chunk_number[i] = init_chunk_number+1; else nproc_chunk_number[i] = init_chunk_number; } int all_written_send_sizes_index = 0; bigint current_offset = 0; for (int i=0;idestroy(all_written_send_sizes); memory->destroy(nproc_chunk_number); } // scatter chunk sizes and offsets to all procs MPI_Scatter(nproc_chunk_sizes, 1, MPI_LMP_BIGINT, &assignedChunkSize , 1, MPI_LMP_BIGINT, 0,world); MPI_Scatter(nproc_chunk_offsets, 1, MPI_LMP_BIGINT, &assignedChunkOffset , 1, MPI_LMP_BIGINT, 0,world); memory->destroy(nproc_chunk_sizes); memory->destroy(nproc_chunk_offsets); } } flag = read_int(); } // if MPI-IO file, broadcast the end of the header offste // this allows all ranks to compute offset to their data if (mpiioflag) { if (me == 0) headerOffset = ftell(fp); MPI_Bcast(&headerOffset,1,MPI_LMP_BIGINT,0,world); } } // ---------------------------------------------------------------------- // ---------------------------------------------------------------------- // low-level fread methods // ---------------------------------------------------------------------- // ---------------------------------------------------------------------- /* ---------------------------------------------------------------------- ------------------------------------------------------------------------- */ void ReadRestart::magic_string() { int n = strlen(MAGIC_STRING) + 1; char *str = new char[n]; int count; if (me == 0) count = fread(str,sizeof(char),n,fp); MPI_Bcast(&count,1,MPI_INT,0,world); if (count < n) error->all(FLERR,"Invalid LAMMPS restart file"); MPI_Bcast(str,n,MPI_CHAR,0,world); if (strcmp(str,MAGIC_STRING) != 0) error->all(FLERR,"Invalid LAMMPS restart file"); delete [] str; } /* ---------------------------------------------------------------------- ------------------------------------------------------------------------- */ void ReadRestart::endian() { int endian; if (me == 0) fread(&endian,sizeof(int),1,fp); MPI_Bcast(&endian,1,MPI_INT,0,world); if (endian == ENDIAN) return; if (endian == ENDIANSWAP) error->all(FLERR,"Restart file byte ordering is swapped"); else error->all(FLERR,"Restart file byte ordering is not recognized"); } /* ---------------------------------------------------------------------- ------------------------------------------------------------------------- */ int ReadRestart::version_numeric() { int vn; if (me == 0) fread(&vn,sizeof(int),1,fp); MPI_Bcast(&vn,1,MPI_INT,0,world); if (vn != VERSION_NUMERIC) return 1; return 0; } /* ---------------------------------------------------------------------- read an int from restart file and bcast it ------------------------------------------------------------------------- */ int ReadRestart::read_int() { int value; if (me == 0) fread(&value,sizeof(int),1,fp); MPI_Bcast(&value,1,MPI_INT,0,world); return value; } /* ---------------------------------------------------------------------- read a bigint from restart file and bcast it ------------------------------------------------------------------------- */ bigint ReadRestart::read_bigint() { bigint value; if (me == 0) fread(&value,sizeof(bigint),1,fp); MPI_Bcast(&value,1,MPI_LMP_BIGINT,0,world); return value; } /* ---------------------------------------------------------------------- read a double from restart file and bcast it ------------------------------------------------------------------------- */ double ReadRestart::read_double() { double value; if (me == 0) fread(&value,sizeof(double),1,fp); MPI_Bcast(&value,1,MPI_DOUBLE,0,world); return value; } /* ---------------------------------------------------------------------- read a char string (including NULL) and bcast it str is allocated here, ptr is returned, caller must deallocate ------------------------------------------------------------------------- */ char *ReadRestart::read_string() { int n; if (me == 0) fread(&n,sizeof(int),1,fp); MPI_Bcast(&n,1,MPI_INT,0,world); char *value = new char[n]; if (me == 0) fread(value,sizeof(char),n,fp); MPI_Bcast(value,n,MPI_CHAR,0,world); return value; } /* ---------------------------------------------------------------------- read vector of N ints from restart file and bcast them do not bcast them, caller does that if required ------------------------------------------------------------------------- */ void ReadRestart::read_int_vec(int n, int *vec) { if (me == 0) fread(vec,sizeof(int),n,fp); MPI_Bcast(vec,n,MPI_INT,0,world); } /* ---------------------------------------------------------------------- read vector of N doubles from restart file and bcast them do not bcast them, caller does that if required ------------------------------------------------------------------------- */ void ReadRestart::read_double_vec(int n, double *vec) { if (me == 0) fread(vec,sizeof(double),n,fp); MPI_Bcast(vec,n,MPI_DOUBLE,0,world); } diff --git a/src/replicate.cpp b/src/replicate.cpp index 26f3fca7e..7300da096 100644 --- a/src/replicate.cpp +++ b/src/replicate.cpp @@ -1,422 +1,422 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ #include "stdlib.h" #include "string.h" #include "replicate.h" #include "atom.h" #include "atom_vec.h" #include "atom_vec_hybrid.h" #include "force.h" #include "domain.h" #include "comm.h" #include "special.h" #include "memory.h" #include "error.h" using namespace LAMMPS_NS; #define LB_FACTOR 1.1 #define EPSILON 1.0e-6 enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED}; // several files /* ---------------------------------------------------------------------- */ Replicate::Replicate(LAMMPS *lmp) : Pointers(lmp) {} /* ---------------------------------------------------------------------- */ void Replicate::command(int narg, char **arg) { int i,j,m,n; if (domain->box_exist == 0) error->all(FLERR,"Replicate command before simulation box is defined"); if (narg != 3) error->all(FLERR,"Illegal replicate command"); int me = comm->me; int nprocs = comm->nprocs; if (me == 0 && screen) fprintf(screen,"Replicating atoms ...\n"); // nrep = total # of replications int nx = force->inumeric(FLERR,arg[0]); int ny = force->inumeric(FLERR,arg[1]); int nz = force->inumeric(FLERR,arg[2]); int nrep = nx*ny*nz; // error and warning checks if (nx <= 0 || ny <= 0 || nz <= 0) error->all(FLERR,"Illegal replicate command"); if (domain->dimension == 2 && nz != 1) error->all(FLERR,"Cannot replicate 2d simulation in z dimension"); if ((nx > 1 && domain->xperiodic == 0) || (ny > 1 && domain->yperiodic == 0) || (nz > 1 && domain->zperiodic == 0)) { if (comm->me == 0) error->warning(FLERR,"Replicating in a non-periodic dimension"); } if (atom->nextra_grow || atom->nextra_restart || atom->nextra_store) error->all(FLERR,"Cannot replicate with fixes that store atom quantities"); // maxtag = largest atom tag across all existing atoms tagint maxtag = 0; if (atom->tag_enable) { for (i = 0; i < atom->nlocal; i++) maxtag = MAX(atom->tag[i],maxtag); tagint maxtag_all; MPI_Allreduce(&maxtag,&maxtag_all,1,MPI_LMP_TAGINT,MPI_MAX,world); maxtag = maxtag_all; } // maxmol = largest molecule tag across all existing atoms tagint maxmol = 0; if (atom->molecule_flag) { for (i = 0; i < atom->nlocal; i++) maxmol = MAX(atom->molecule[i],maxmol); tagint maxmol_all; MPI_Allreduce(&maxmol,&maxmol_all,1,MPI_LMP_TAGINT,MPI_MAX,world); maxmol = maxmol_all; } // unmap existing atoms via image flags for (i = 0; i < atom->nlocal; i++) domain->unmap(atom->x[i],atom->image[i]); // communication buffer for all my atom's info // max_size = largest buffer needed by any proc // must do before new Atom class created, // since size_restart() uses atom->nlocal int max_size; int send_size = atom->avec->size_restart(); MPI_Allreduce(&send_size,&max_size,1,MPI_INT,MPI_MAX,world); double *buf; memory->create(buf,max_size,"replicate:buf"); // old = original atom class // atom = new replicated atom class Atom *old = atom; atom = new Atom(lmp); atom->settings(old); - atom->create_avec(old->atom_style,old->avec->nargcopy,old->avec->argcopy); + atom->create_avec(old->atom_style,old->avec->nargcopy,old->avec->argcopy,0); // check that new system will not be too large // new tags cannot exceed MAXTAGINT // new system sizes cannot exceed MAXBIGINT if (atom->tag_enable) { bigint maxnewtag = maxtag + (nrep-1)*old->natoms; if (maxnewtag < 0 || maxnewtag >= MAXTAGINT) error->all(FLERR,"Replicated system atom IDs are too big"); } if (nrep*old->natoms < 0 || nrep*old->natoms >= MAXBIGINT || nrep*old->nbonds < 0 || nrep*old->nbonds >= MAXBIGINT || nrep*old->nangles < 0 || nrep*old->nangles >= MAXBIGINT || nrep*old->ndihedrals < 0 || nrep*old->ndihedrals >= MAXBIGINT || nrep*old->nimpropers < 0 || nrep*old->nimpropers >= MAXBIGINT) error->all(FLERR,"Replicated system is too big"); // assign atom and topology counts in new class from old one atom->natoms = old->natoms * nrep; atom->nbonds = old->nbonds * nrep; atom->nangles = old->nangles * nrep; atom->ndihedrals = old->ndihedrals * nrep; atom->nimpropers = old->nimpropers * nrep; atom->ntypes = old->ntypes; atom->nbondtypes = old->nbondtypes; atom->nangletypes = old->nangletypes; atom->ndihedraltypes = old->ndihedraltypes; atom->nimpropertypes = old->nimpropertypes; atom->bond_per_atom = old->bond_per_atom; atom->angle_per_atom = old->angle_per_atom; atom->dihedral_per_atom = old->dihedral_per_atom; atom->improper_per_atom = old->improper_per_atom; // store old simulation box int triclinic = domain->triclinic; double old_xprd = domain->xprd; double old_yprd = domain->yprd; double old_zprd = domain->zprd; double old_xy = domain->xy; double old_xz = domain->xz; double old_yz = domain->yz; // setup new simulation box domain->boxhi[0] = domain->boxlo[0] + nx*old_xprd; domain->boxhi[1] = domain->boxlo[1] + ny*old_yprd; domain->boxhi[2] = domain->boxlo[2] + nz*old_zprd; if (triclinic) { domain->xy *= ny; domain->xz *= nz; domain->yz *= nz; } // new problem setup using new box boundaries if (nprocs == 1) n = static_cast (atom->natoms); else n = static_cast (LB_FACTOR * atom->natoms / nprocs); atom->allocate_type_arrays(); atom->avec->grow(n); n = atom->nmax; domain->print_box(" "); domain->set_initial_box(); domain->set_global_box(); comm->set_proc_grid(); domain->set_local_box(); // copy type arrays to new atom class if (atom->mass) { for (int itype = 1; itype <= atom->ntypes; itype++) { atom->mass_setflag[itype] = old->mass_setflag[itype]; if (atom->mass_setflag[itype]) atom->mass[itype] = old->mass[itype]; } } // set bounds for my proc // if periodic and I am lo/hi proc, adjust bounds by EPSILON // insures all replicated atoms will be owned even with round-off double epsilon[3]; if (triclinic) epsilon[0] = epsilon[1] = epsilon[2] = EPSILON; else { epsilon[0] = domain->prd[0] * EPSILON; epsilon[1] = domain->prd[1] * EPSILON; epsilon[2] = domain->prd[2] * EPSILON; } double sublo[3],subhi[3]; if (triclinic == 0) { sublo[0] = domain->sublo[0]; subhi[0] = domain->subhi[0]; sublo[1] = domain->sublo[1]; subhi[1] = domain->subhi[1]; sublo[2] = domain->sublo[2]; subhi[2] = domain->subhi[2]; } else { sublo[0] = domain->sublo_lamda[0]; subhi[0] = domain->subhi_lamda[0]; sublo[1] = domain->sublo_lamda[1]; subhi[1] = domain->subhi_lamda[1]; sublo[2] = domain->sublo_lamda[2]; subhi[2] = domain->subhi_lamda[2]; } if (comm->layout != LAYOUT_TILED) { if (domain->xperiodic) { if (comm->myloc[0] == 0) sublo[0] -= epsilon[0]; if (comm->myloc[0] == comm->procgrid[0]-1) subhi[0] += epsilon[0]; } if (domain->yperiodic) { if (comm->myloc[1] == 0) sublo[1] -= epsilon[1]; if (comm->myloc[1] == comm->procgrid[1]-1) subhi[1] += epsilon[1]; } if (domain->zperiodic) { if (comm->myloc[2] == 0) sublo[2] -= epsilon[2]; if (comm->myloc[2] == comm->procgrid[2]-1) subhi[2] += epsilon[2]; } } else { if (domain->xperiodic) { if (comm->mysplit[0][0] == 0.0) sublo[0] -= epsilon[0]; if (comm->mysplit[0][1] == 1.0) subhi[0] += epsilon[0]; } if (domain->yperiodic) { if (comm->mysplit[1][0] == 0.0) sublo[1] -= epsilon[1]; if (comm->mysplit[1][1] == 1.0) subhi[1] += epsilon[1]; } if (domain->zperiodic) { if (comm->mysplit[2][0] == 0.0) sublo[2] -= epsilon[2]; if (comm->mysplit[2][1] == 1.0) subhi[2] += epsilon[2]; } } // loop over all procs // if this iteration of loop is me: // pack my unmapped atom data into buf // bcast it to all other procs // performs 3d replicate loop with while loop over atoms in buf // x = new replicated position, remapped into simulation box // unpack atom into new atom class from buf if I own it // adjust tag, mol #, coord, topology info as needed AtomVec *old_avec = old->avec; AtomVec *avec = atom->avec; int ix,iy,iz; tagint atom_offset,mol_offset; imageint image; double x[3],lamda[3]; double *coord; int tag_enable = atom->tag_enable; for (int iproc = 0; iproc < nprocs; iproc++) { if (me == iproc) { n = 0; for (i = 0; i < old->nlocal; i++) n += old_avec->pack_restart(i,&buf[n]); } MPI_Bcast(&n,1,MPI_INT,iproc,world); MPI_Bcast(buf,n,MPI_DOUBLE,iproc,world); for (ix = 0; ix < nx; ix++) { for (iy = 0; iy < ny; iy++) { for (iz = 0; iz < nz; iz++) { // while loop over one proc's atom list m = 0; while (m < n) { image = ((imageint) IMGMAX << IMG2BITS) | ((imageint) IMGMAX << IMGBITS) | IMGMAX; if (triclinic == 0) { x[0] = buf[m+1] + ix*old_xprd; x[1] = buf[m+2] + iy*old_yprd; x[2] = buf[m+3] + iz*old_zprd; } else { x[0] = buf[m+1] + ix*old_xprd + iy*old_xy + iz*old_xz; x[1] = buf[m+2] + iy*old_yprd + iz*old_yz; x[2] = buf[m+3] + iz*old_zprd; } domain->remap(x,image); if (triclinic) { domain->x2lamda(x,lamda); coord = lamda; } else coord = x; if (coord[0] >= sublo[0] && coord[0] < subhi[0] && coord[1] >= sublo[1] && coord[1] < subhi[1] && coord[2] >= sublo[2] && coord[2] < subhi[2]) { m += avec->unpack_restart(&buf[m]); i = atom->nlocal - 1; if (tag_enable) atom_offset = iz*ny*nx*maxtag + iy*nx*maxtag + ix*maxtag; else atom_offset = 0; mol_offset = iz*ny*nx*maxmol + iy*nx*maxmol + ix*maxmol; atom->x[i][0] = x[0]; atom->x[i][1] = x[1]; atom->x[i][2] = x[2]; atom->tag[i] += atom_offset; atom->image[i] = image; if (atom->molecular) { if (atom->molecule[i] > 0) atom->molecule[i] += mol_offset; if (atom->molecular == 1) { if (atom->avec->bonds_allow) for (j = 0; j < atom->num_bond[i]; j++) atom->bond_atom[i][j] += atom_offset; if (atom->avec->angles_allow) for (j = 0; j < atom->num_angle[i]; j++) { atom->angle_atom1[i][j] += atom_offset; atom->angle_atom2[i][j] += atom_offset; atom->angle_atom3[i][j] += atom_offset; } if (atom->avec->dihedrals_allow) for (j = 0; j < atom->num_dihedral[i]; j++) { atom->dihedral_atom1[i][j] += atom_offset; atom->dihedral_atom2[i][j] += atom_offset; atom->dihedral_atom3[i][j] += atom_offset; atom->dihedral_atom4[i][j] += atom_offset; } if (atom->avec->impropers_allow) for (j = 0; j < atom->num_improper[i]; j++) { atom->improper_atom1[i][j] += atom_offset; atom->improper_atom2[i][j] += atom_offset; atom->improper_atom3[i][j] += atom_offset; atom->improper_atom4[i][j] += atom_offset; } } } } else m += static_cast (buf[m]); } } } } } // free communication buffer and old atom class memory->destroy(buf); delete old; // check that all atoms were assigned to procs bigint natoms; bigint nblocal = atom->nlocal; MPI_Allreduce(&nblocal,&natoms,1,MPI_LMP_BIGINT,MPI_SUM,world); if (me == 0) { if (screen) fprintf(screen," " BIGINT_FORMAT " atoms\n",natoms); if (logfile) fprintf(logfile," " BIGINT_FORMAT " atoms\n",natoms); } if (natoms != atom->natoms) error->all(FLERR,"Replicate did not assign all atoms correctly"); if (me == 0) { if (atom->nbonds) { if (screen) fprintf(screen," " BIGINT_FORMAT " bonds\n",atom->nbonds); if (logfile) fprintf(logfile," " BIGINT_FORMAT " bonds\n",atom->nbonds); } if (atom->nangles) { if (screen) fprintf(screen," " BIGINT_FORMAT " angles\n", atom->nangles); if (logfile) fprintf(logfile," " BIGINT_FORMAT " angles\n", atom->nangles); } if (atom->ndihedrals) { if (screen) fprintf(screen," " BIGINT_FORMAT " dihedrals\n", atom->ndihedrals); if (logfile) fprintf(logfile," " BIGINT_FORMAT " dihedrals\n", atom->ndihedrals); } if (atom->nimpropers) { if (screen) fprintf(screen," " BIGINT_FORMAT " impropers\n", atom->nimpropers); if (logfile) fprintf(logfile," " BIGINT_FORMAT " impropers\n", atom->nimpropers); } } // check that atom IDs are valid atom->tag_check(); // create global mapping of atoms if (atom->map_style) { atom->map_init(); atom->map_set(); } // create special bond lists for molecular systems if (atom->molecular == 1) { Special special(lmp); special.build(); } } diff --git a/src/suffix.h b/src/suffix.h index 2a150ed5e..43493d620 100644 --- a/src/suffix.h +++ b/src/suffix.h @@ -1,29 +1,30 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ #ifndef LMP_SUFFIX_H #define LMP_SUFFIX_H namespace LAMMPS_NS { namespace Suffix { static const int NONE = 0; static const int OPT = 1<<0; static const int GPU = 1<<1; static const int CUDA = 1<<2; static const int OMP = 1<<3; + static const int INTEL = 1<<4; } } #endif diff --git a/src/update.cpp b/src/update.cpp index a2017db06..610cce1ab 100644 --- a/src/update.cpp +++ b/src/update.cpp @@ -1,478 +1,496 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ #include "string.h" #include "stdlib.h" #include "update.h" #include "integrate.h" #include "min.h" #include "style_integrate.h" #include "style_minimize.h" #include "neighbor.h" #include "neigh_list.h" #include "force.h" #include "modify.h" #include "fix.h" #include "domain.h" #include "region.h" #include "compute.h" #include "output.h" #include "memory.h" #include "error.h" using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ Update::Update(LAMMPS *lmp) : Pointers(lmp) { char *str; ntimestep = 0; atime = 0.0; atimestep = 0; first_update = 0; whichflag = 0; firststep = laststep = 0; beginstep = endstep = 0; setupflag = 0; multireplica = 0; restrict_output = 0; eflag_global = vflag_global = -1; unit_style = NULL; set_units("lj"); integrate_style = NULL; integrate = NULL; minimize_style = NULL; minimize = NULL; str = (char *) "verlet"; - create_integrate(1,&str,lmp->suffix); + create_integrate(1,&str,1); str = (char *) "cg"; create_minimize(1,&str); } /* ---------------------------------------------------------------------- */ Update::~Update() { delete [] unit_style; delete [] integrate_style; delete integrate; delete [] minimize_style; delete minimize; } /* ---------------------------------------------------------------------- */ void Update::init() { // if USER-CUDA mode is enabled: // integrate/minimize style must be CUDA variant if (whichflag == 1 && lmp->cuda) if (strstr(integrate_style,"cuda") == NULL) error->all(FLERR,"USER-CUDA mode requires CUDA variant of run style"); if (whichflag == 2 && lmp->cuda) if (strstr(minimize_style,"cuda") == NULL) error->all(FLERR,"USER-CUDA mode requires CUDA variant of min style"); // init the appropriate integrate and/or minimize class // if neither (e.g. from write_restart) then just return if (whichflag == 0) return; if (whichflag == 1) integrate->init(); else if (whichflag == 2) minimize->init(); // only set first_update if a run or minimize is being performed first_update = 1; } /* ---------------------------------------------------------------------- */ void Update::set_units(const char *style) { // physical constants from: // http://physics.nist.gov/cuu/Constants/Table/allascii.txt // using thermochemical calorie = 4.184 J if (strcmp(style,"lj") == 0) { force->boltz = 1.0; force->hplanck = 0.18292026; // using LJ parameters for argon force->mvv2e = 1.0; force->ftm2v = 1.0; force->mv2d = 1.0; force->nktv2p = 1.0; force->qqr2e = 1.0; force->qe2f = 1.0; force->vxmu2f = 1.0; force->xxt2kmu = 1.0; force->e_mass = 0.0; // not yet set force->hhmrr2e = 0.0; force->mvh2r = 0.0; force->angstrom = 1.0; force->femtosecond = 1.0; force->qelectron = 1.0; dt = 0.005; neighbor->skin = 0.3; } else if (strcmp(style,"real") == 0) { force->boltz = 0.0019872067; force->hplanck = 95.306976368; force->mvv2e = 48.88821291 * 48.88821291; force->ftm2v = 1.0 / 48.88821291 / 48.88821291; force->mv2d = 1.0 / 0.602214179; force->nktv2p = 68568.415; force->qqr2e = 332.06371; force->qe2f = 23.060549; force->vxmu2f = 1.4393264316e4; force->xxt2kmu = 0.1; force->e_mass = 1.0/1836.1527556560675; force->hhmrr2e = 0.0957018663603261; force->mvh2r = 1.5339009481951; force->angstrom = 1.0; force->femtosecond = 1.0; force->qelectron = 1.0; dt = 1.0; neighbor->skin = 2.0; } else if (strcmp(style,"metal") == 0) { force->boltz = 8.617343e-5; force->hplanck = 4.135667403e-3; force->mvv2e = 1.0364269e-4; force->ftm2v = 1.0 / 1.0364269e-4; force->mv2d = 1.0 / 0.602214179; force->nktv2p = 1.6021765e6; force->qqr2e = 14.399645; force->qe2f = 1.0; force->vxmu2f = 0.6241509647; force->xxt2kmu = 1.0e-4; force->e_mass = 0.0; // not yet set force->hhmrr2e = 0.0; force->mvh2r = 0.0; force->angstrom = 1.0; force->femtosecond = 1.0e-3; force->qelectron = 1.0; dt = 0.001; neighbor->skin = 2.0; } else if (strcmp(style,"si") == 0) { force->boltz = 1.3806504e-23; force->hplanck = 6.62606896e-34; force->mvv2e = 1.0; force->ftm2v = 1.0; force->mv2d = 1.0; force->nktv2p = 1.0; force->qqr2e = 8.9876e9; force->qe2f = 1.0; force->vxmu2f = 1.0; force->xxt2kmu = 1.0; force->e_mass = 0.0; // not yet set force->hhmrr2e = 0.0; force->mvh2r = 0.0; force->angstrom = 1.0e-10; force->femtosecond = 1.0e-15; force->qelectron = 1.6021765e-19; dt = 1.0e-8; neighbor->skin = 0.001; } else if (strcmp(style,"cgs") == 0) { force->boltz = 1.3806504e-16; force->hplanck = 6.62606896e-27; force->mvv2e = 1.0; force->ftm2v = 1.0; force->mv2d = 1.0; force->nktv2p = 1.0; force->qqr2e = 1.0; force->qe2f = 1.0; force->vxmu2f = 1.0; force->xxt2kmu = 1.0; force->e_mass = 0.0; // not yet set force->hhmrr2e = 0.0; force->mvh2r = 0.0; force->angstrom = 1.0e-8; force->femtosecond = 1.0e-15; force->qelectron = 4.8032044e-10; dt = 1.0e-8; neighbor->skin = 0.1; } else if (strcmp(style,"electron") == 0) { force->boltz = 3.16681534e-6; force->hplanck = 0.1519829846; force->mvv2e = 1.06657236; force->ftm2v = 0.937582899; force->mv2d = 1.0; force->nktv2p = 2.94210108e13; force->qqr2e = 1.0; force->qe2f = 1.94469051e-10; force->vxmu2f = 3.39893149e1; force->xxt2kmu = 3.13796367e-2; force->e_mass = 0.0; // not yet set force->hhmrr2e = 0.0; force->mvh2r = 0.0; force->angstrom = 1.88972612; force->femtosecond = 0.0241888428; force->qelectron = 1.0; dt = 0.001; neighbor->skin = 2.0; } else if (strcmp(style,"micro") == 0) { force->boltz = 1.3806504e-8; force->hplanck = 6.62606896e-13; force->mvv2e = 1.0; force->ftm2v = 1.0; force->mv2d = 1.0; force->nktv2p = 1.0; force->qqr2e = 8.987556e6; force->qe2f = 1.0; force->vxmu2f = 1.0; force->xxt2kmu = 1.0; force->e_mass = 0.0; // not yet set force->hhmrr2e = 0.0; force->mvh2r = 0.0; force->angstrom = 1.0e-4; force->femtosecond = 1.0e-9; force->qelectron = 1.6021765e-7; dt = 2.0; neighbor->skin = 0.1; } else if (strcmp(style,"nano") == 0) { force->boltz = 0.013806504; force->hplanck = 6.62606896e-4; force->mvv2e = 1.0; force->ftm2v = 1.0; force->mv2d = 1.0; force->nktv2p = 1.0; force->qqr2e = 230.7078669; force->qe2f = 1.0; force->vxmu2f = 1.0; force->xxt2kmu = 1.0; force->e_mass = 0.0; // not yet set force->hhmrr2e = 0.0; force->mvh2r = 0.0; force->angstrom = 1.0e-1; force->femtosecond = 1.0e-6; force->qelectron = 1.0; dt = 0.00045; neighbor->skin = 0.1; } else error->all(FLERR,"Illegal units command"); delete [] unit_style; int n = strlen(style) + 1; unit_style = new char[n]; strcpy(unit_style,style); } /* ---------------------------------------------------------------------- */ -void Update::create_integrate(int narg, char **arg, char *suffix) +void Update::create_integrate(int narg, char **arg, int trysuffix) { if (narg < 1) error->all(FLERR,"Illegal run_style command"); delete [] integrate_style; delete integrate; int sflag; - new_integrate(arg[0],narg-1,&arg[1],suffix,sflag); + new_integrate(arg[0],narg-1,&arg[1],trysuffix,sflag); if (sflag) { char estyle[256]; - sprintf(estyle,"%s/%s",arg[0],suffix); + if (sflag == 1) sprintf(estyle,"%s/%s",arg[0],lmp->suffix); + else sprintf(estyle,"%s/%s",arg[0],lmp->suffix2); int n = strlen(estyle) + 1; integrate_style = new char[n]; strcpy(integrate_style,estyle); } else { int n = strlen(arg[0]) + 1; integrate_style = new char[n]; strcpy(integrate_style,arg[0]); } } /* ---------------------------------------------------------------------- create the Integrate style, first with suffix appended ------------------------------------------------------------------------- */ void Update::new_integrate(char *style, int narg, char **arg, - char *suffix, int &sflag) + int trysuffix, int &sflag) { - int success = 0; + if (trysuffix && lmp->suffix_enable) { + if (lmp->suffix) { + sflag = 1; + char estyle[256]; + sprintf(estyle,"%s/%s",style,lmp->suffix); + int success = 1; - if (suffix && lmp->suffix_enable) { - sflag = 1; - char estyle[256]; - sprintf(estyle,"%s/%s",style,suffix); - success = 1; - - if (0) return; + if (0) return; #define INTEGRATE_CLASS #define IntegrateStyle(key,Class) \ - else if (strcmp(estyle,#key) == 0) integrate = new Class(lmp,narg,arg); + else if (strcmp(estyle,#key) == 0) integrate = new Class(lmp,narg,arg); #include "style_integrate.h" #undef IntegrateStyle #undef INTEGRATE_CLASS + + else success = 0; + if (success) return; + } + + if (lmp->suffix2) { + sflag = 2; + char estyle[256]; + sprintf(estyle,"%s/%s",style,lmp->suffix2); + int success = 1; + + if (0) return; - else success = 0; +#define INTEGRATE_CLASS +#define IntegrateStyle(key,Class) \ + else if (strcmp(estyle,#key) == 0) integrate = new Class(lmp,narg,arg); +#include "style_integrate.h" +#undef IntegrateStyle +#undef INTEGRATE_CLASS + + else success = 0; + if (success) return; + } } - if (!success) { - sflag = 0; - - if (0) return; + sflag = 0; + if (0) return; #define INTEGRATE_CLASS #define IntegrateStyle(key,Class) \ - else if (strcmp(style,#key) == 0) integrate = new Class(lmp,narg,arg); + else if (strcmp(style,#key) == 0) integrate = new Class(lmp,narg,arg); #include "style_integrate.h" #undef IntegrateStyle #undef INTEGRATE_CLASS - else error->all(FLERR,"Illegal integrate style"); - } + else error->all(FLERR,"Illegal integrate style"); } /* ---------------------------------------------------------------------- */ void Update::create_minimize(int narg, char **arg) { if (narg != 1) error->all(FLERR,"Illegal min_style command"); delete [] minimize_style; delete minimize; if (0) return; // dummy line to enable else-if macro expansion #define MINIMIZE_CLASS #define MinimizeStyle(key,Class) \ else if (strcmp(arg[0],#key) == 0) minimize = new Class(lmp); #include "style_minimize.h" #undef MINIMIZE_CLASS else error->all(FLERR,"Illegal min_style command"); int n = strlen(arg[0]) + 1; minimize_style = new char[n]; strcpy(minimize_style,arg[0]); } /* ---------------------------------------------------------------------- reset timestep as called from input script ------------------------------------------------------------------------- */ void Update::reset_timestep(int narg, char **arg) { if (narg != 1) error->all(FLERR,"Illegal reset_timestep command"); bigint newstep = ATOBIGINT(arg[0]); reset_timestep(newstep); } /* ---------------------------------------------------------------------- reset timestep called from rerun command and input script (indirectly) ------------------------------------------------------------------------- */ void Update::reset_timestep(bigint newstep) { ntimestep = newstep; if (ntimestep < 0) error->all(FLERR,"Timestep must be >= 0"); if (ntimestep > MAXBIGINT) error->all(FLERR,"Too big a timestep"); // set atimestep to new timestep // so future update_time() calls will be correct atimestep = ntimestep; // trigger reset of timestep for output and for fixes that require it // do not allow any timestep-dependent fixes to be defined output->reset_timestep(ntimestep); for (int i = 0; i < modify->nfix; i++) { if (modify->fix[i]->time_depend) error->all(FLERR, "Cannot reset timestep with a time-dependent fix defined"); modify->fix[i]->reset_timestep(ntimestep); } // reset eflag/vflag global so no commands will think eng/virial are current eflag_global = vflag_global = -1; // reset invoked flags of computes, // so no commands will think they are current between runs for (int i = 0; i < modify->ncompute; i++) { modify->compute[i]->invoked_scalar = -1; modify->compute[i]->invoked_vector = -1; modify->compute[i]->invoked_array = -1; modify->compute[i]->invoked_peratom = -1; modify->compute[i]->invoked_local = -1; } // clear timestep list of computes that store future invocation times for (int i = 0; i < modify->ncompute; i++) if (modify->compute[i]->timeflag) modify->compute[i]->clearstep(); // set last_build of all neigh lists to -1 to force rebuild for (int i = 0; i < neighbor->nlist; i++) neighbor->lists[i]->last_build = -1; // NOTE: 7Jun12, adding rerun command, don't think this is required //for (int i = 0; i < domain->nregion; i++) // if (domain->regions[i]->dynamic_check()) // error->all(FLERR,"Cannot reset timestep with a dynamic region defined"); } /* ---------------------------------------------------------------------- update elapsed simulation time called at end of runs or when timestep size changes ------------------------------------------------------------------------- */ void Update::update_time() { atime += (ntimestep-atimestep) * dt; atimestep = ntimestep; } /* ---------------------------------------------------------------------- memory usage of update and integrate/minimize ------------------------------------------------------------------------- */ bigint Update::memory_usage() { bigint bytes = 0; if (whichflag == 1) bytes += integrate->memory_usage(); else if (whichflag == 2) bytes += minimize->memory_usage(); return bytes; } diff --git a/src/update.h b/src/update.h index 53bf041de..5c6c15bad 100644 --- a/src/update.h +++ b/src/update.h @@ -1,108 +1,108 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ #ifndef LMP_UPDATE_H #define LMP_UPDATE_H #include "pointers.h" namespace LAMMPS_NS { class Update : protected Pointers { public: double dt; // timestep double etol,ftol; // minimizer tolerances on energy/force bigint ntimestep; // current step (dynamics or min iterations) int nsteps; // # of steps to run (dynamics or min iter) int whichflag; // 0 for unset, 1 for dynamics, 2 for min double atime; // simulation time at atime_step bigint atimestep; // last timestep atime was updated bigint firststep,laststep; // 1st & last step of this run bigint beginstep,endstep; // 1st and last step of multiple runs int first_update; // 0 before initial update, 1 after int max_eval; // max force evaluations for minimizer int restrict_output; // 1 if output should not write dump/restart int setupflag; // set when setup() is computing forces int multireplica; // 1 if min across replicas, else 0 bigint eflag_global,eflag_atom; // timestep global/peratom eng is tallied on bigint vflag_global,vflag_atom; // ditto for virial char *unit_style; class Integrate *integrate; char *integrate_style; class Min *minimize; char *minimize_style; Update(class LAMMPS *); ~Update(); void init(); void set_units(const char *); - void create_integrate(int, char **, char *); + void create_integrate(int, char **, int); void create_minimize(int, char **); void reset_timestep(int, char **); void reset_timestep(bigint); void update_time(); bigint memory_usage(); private: - void new_integrate(char *, int, char **, char *, int &); + void new_integrate(char *, int, char **, int, int &); }; } #endif /* ERROR/WARNING messages: E: USER-CUDA mode requires CUDA variant of run style CUDA mode is enabled, so the run style must include a cuda suffix. E: USER-CUDA mode requires CUDA variant of min style CUDA mode is enabled, so the min style must include a cuda suffix. E: Illegal ... command Self-explanatory. Check the input script syntax and compare to the documentation for the command. You can use -echo screen as a command-line option when running LAMMPS to see the offending line. E: Illegal integrate style Self-explanatory. E: Timestep must be >= 0 Specified timestep is invalid. E: Too big a timestep Specified timestep is too large. E: Cannot reset timestep with a time-dependent fix defined You cannot reset the timestep when a fix that keeps track of elapsed time is in place. E: Cannot reset timestep with a dynamic region defined Dynamic regions (see the region command) have a time dependence. Thus you cannot change the timestep when one or more of these are defined. */