diff --git a/src/MAKE/MACHINES/Makefile.stampede b/src/MAKE/MACHINES/Makefile.stampede index e8b363896..3edda8c9f 100755 --- a/src/MAKE/MACHINES/Makefile.stampede +++ b/src/MAKE/MACHINES/Makefile.stampede @@ -1,116 +1,114 @@ # stampede = Intel Compiler, MKL FFT, Offload to Xeon Phi SHELL = /bin/sh # --------------------------------------------------------------------- # compiler/linker settings # specify flags and libraries needed for your compiler CC = mpicc -openmp -DLMP_INTEL_OFFLOAD -DLAMMPS_MEMALIGN=64 MIC_OPT = -offload-option,mic,compiler,"-fp-model fast=2 -mGLOB_default_function_attrs=\"gather_scatter_loop_unroll=4\"" -CCFLAGS = -O3 -xAVX -fno-alias -ansi-alias -restrict -override-limits $(MIC_OPT) +CCFLAGS = -O3 -xhost -fp-model precise -restrict -override-limits $(MIC_OPT) SHFLAGS = -fPIC DEPFLAGS = -M LINK = mpicc -openmp -LINKFLAGS = -O3 -xAVX +LINKFLAGS = -O3 -xhost LIB = SIZE = size ARCHIVE = ar ARFLAGS = -rc SHLIBFLAGS = -shared # --------------------------------------------------------------------- # LAMMPS-specific settings, all OPTIONAL # specify settings for LAMMPS features you will use # if you change any -D setting, do full re-compile after "make clean" # LAMMPS ifdef settings # see possible settings in Section 2.2 (step 4) of manual -LMP_INC = -DLAMMPS_GZIP -DLAMMPS_JPEG +LMP_INC = -DLAMMPS_GZIP # MPI library # see discussion in Section 2.2 (step 5) of manual # MPI wrapper compiler/linker can provide this info # can point to dummy MPI library in src/STUBS as in Makefile.serial # use -D MPICH and OMPI settings in INC to avoid C++ lib conflicts # INC = path for mpi.h, MPI compiler settings # PATH = path for MPI library # LIB = name of MPI library MPI_INC = -DMPICH_SKIP_MPICXX MPI_PATH = MPI_LIB = # FFT library # see discussion in Section 2.2 (step 6) of manaul # can be left blank to use provided KISS FFT library # INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings # PATH = path for FFT library # LIB = name of FFT library -FFT_INC = -DFFT_MKL -DFFT_SINGLE -I$(TACC_MKL_INC) +FFT_INC = -DFFT_MKL -I$(TACC_MKL_INC) FFT_PATH = FFT_LIB = -L$(TACC_MKL_LIB) -lmkl_intel_ilp64 -lmkl_intel_thread -lmkl_core # JPEG and/or PNG library # see discussion in Section 2.2 (step 7) of manual # only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC # INC = path(s) for jpeglib.h and/or png.h # PATH = path(s) for JPEG library and/or PNG library # LIB = name(s) of JPEG library and/or PNG library JPG_INC = JPG_PATH = JPG_LIB = -ljpeg # --------------------------------------------------------------------- # build rules and dependencies # do not edit this section include Makefile.package.settings include Makefile.package EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC) EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH) EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB) -EXTRA_CPP_DEPENDS = $(PKG_CPP_DEPENDS) -EXTRA_LINK_DEPENDS = $(PKG_LINK_DEPENDS) # Path to src files vpath %.cpp .. vpath %.h .. # Link target -$(EXE): $(OBJ) $(EXTRA_LINK_DEPENDS) +$(EXE): $(OBJ) $(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE) $(SIZE) $(EXE) # Library targets -lib: $(OBJ) $(EXTRA_LINK_DEPENDS) +lib: $(OBJ) $(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ) -shlib: $(OBJ) $(EXTRA_LINK_DEPENDS) +shlib: $(OBJ) $(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \ $(OBJ) $(EXTRA_LIB) $(LIB) # Compilation rules -%.o:%.cpp $(EXTRA_CPP_DEPENDS) +%.o:%.cpp $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $< -%.d:%.cpp $(EXTRA_CPP_DEPENDS) +%.d:%.cpp $(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@ -%.o:%.cu $(EXTRA_CPP_DEPENDS) +%.o:%.cu $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $< # Individual dependencies DEPENDS = $(OBJ:.o=.d) sinclude $(DEPENDS) diff --git a/src/MC/fix_atom_swap.cpp b/src/MC/fix_atom_swap.cpp index 65214ee55..fe20ce389 100644 --- a/src/MC/fix_atom_swap.cpp +++ b/src/MC/fix_atom_swap.cpp @@ -1,801 +1,799 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing authors: Paul Crozier (SNL) Alexander Stukowski ------------------------------------------------------------------------- */ #include "math.h" #include "float.h" #include "stdlib.h" #include "string.h" #include "fix_atom_swap.h" #include "atom.h" #include "atom_vec.h" #include "atom_vec_hybrid.h" #include "update.h" #include "modify.h" #include "fix.h" #include "comm.h" #include "compute.h" #include "group.h" #include "domain.h" #include "region.h" #include "random_park.h" #include "force.h" #include "pair.h" #include "bond.h" #include "angle.h" #include "dihedral.h" #include "improper.h" #include "kspace.h" #include "math_const.h" #include "memory.h" #include "error.h" #include "thermo.h" #include "output.h" #include "neighbor.h" #include using namespace std; using namespace LAMMPS_NS; using namespace FixConst; using namespace MathConst; /* ---------------------------------------------------------------------- */ FixAtomSwap::FixAtomSwap(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg) { if (narg < 10) error->all(FLERR,"Illegal fix atom/swap command"); dynamic_group_allow = 1; vector_flag = 1; size_vector = 2; global_freq = 1; extvector = 0; restart_global = 1; time_depend = 1; type_list = NULL; qtype = NULL; // required args nevery = force->inumeric(FLERR,arg[3]); ncycles = force->inumeric(FLERR,arg[4]); seed = force->inumeric(FLERR,arg[5]); double temperature = force->numeric(FLERR,arg[6]); beta = 1.0/(force->boltz*temperature); if (ncycles < 0) error->all(FLERR,"Illegal fix atom/swap command"); if (seed <= 0) error->all(FLERR,"Illegal fix atom/swap command"); memory->create(type_list,atom->ntypes,"atom/swap:type_list"); memory->create(mu,atom->ntypes+1,"atom/swap:mu"); for (int i = 1; i <= atom->ntypes; i++) mu[i] = 0.0; // read options from end of input line options(narg-7,&arg[7]); // random number generator, same for all procs random_equal = new RanPark(lmp,seed); // random number generator, not the same for all procs random_unequal = new RanPark(lmp,seed); // set up reneighboring force_reneighbor = 1; next_reneighbor = update->ntimestep + 1; // zero out counters nswap_attempts = 0.0; nswap_successes = 0.0; atom_swap_nmax = 0; local_swap_atom_list = NULL; local_swap_iatom_list = NULL; local_swap_jatom_list = NULL; // set comm size needed by this Fix if (atom->q_flag) comm_forward = 2; else comm_forward = 1; } /* ---------------------------------------------------------------------- parse optional parameters at end of input line ------------------------------------------------------------------------- */ void FixAtomSwap::options(int narg, char **arg) { if (narg < 0) error->all(FLERR,"Illegal fix atom/swap command"); regionflag = 0; conserve_ke_flag = 1; semi_grand_flag = 0; nswaptypes = 0; nmutypes = 0; iregion = -1; int iarg = 0; while (iarg < narg) { if (strcmp(arg[iarg],"region") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal fix atom/swap command"); iregion = domain->find_region(arg[iarg+1]); if (iregion == -1) error->all(FLERR,"Region ID for fix atom/swap does not exist"); int n = strlen(arg[iarg+1]) + 1; idregion = new char[n]; strcpy(idregion,arg[iarg+1]); regionflag = 1; iarg += 2; } else if (strcmp(arg[iarg],"ke") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal fix atom/swap command"); if (strcmp(arg[iarg+1],"no") == 0) conserve_ke_flag = 0; else if (strcmp(arg[iarg+1],"yes") == 0) conserve_ke_flag = 1; else error->all(FLERR,"Illegal fix atom/swap command"); iarg += 2; } else if (strcmp(arg[iarg],"semi-grand") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal fix atom/swap command"); if (strcmp(arg[iarg+1],"no") == 0) semi_grand_flag = 0; else if (strcmp(arg[iarg+1],"yes") == 0) semi_grand_flag = 1; else error->all(FLERR,"Illegal fix atom/swap command"); iarg += 2; } else if (strcmp(arg[iarg],"types") == 0) { if (iarg+3 > narg) error->all(FLERR,"Illegal fix atom/swap command"); iarg++; while (iarg < narg) { if (isalpha(arg[iarg][0])) break; if (nswaptypes >= atom->ntypes) error->all(FLERR,"Illegal fix atom/swap command"); type_list[nswaptypes] = force->numeric(FLERR,arg[iarg]); nswaptypes++; iarg++; } } else if (strcmp(arg[iarg],"mu") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal fix atom/swap command"); iarg++; while (iarg < narg) { if (isalpha(arg[iarg][0])) break; nmutypes++; if (nmutypes > atom->ntypes) error->all(FLERR,"Illegal fix atom/swap command"); mu[nmutypes] = force->numeric(FLERR,arg[iarg]); iarg++; } } else error->all(FLERR,"Illegal fix atom/swap command"); } } /* ---------------------------------------------------------------------- */ FixAtomSwap::~FixAtomSwap() { memory->destroy(type_list); memory->destroy(mu); memory->destroy(qtype); memory->destroy(sqrt_mass_ratio); if (regionflag) delete [] idregion; delete random_equal; delete random_unequal; } /* ---------------------------------------------------------------------- */ int FixAtomSwap::setmask() { int mask = 0; mask |= PRE_EXCHANGE; return mask; } /* ---------------------------------------------------------------------- */ void FixAtomSwap::init() { char *id_pe = (char *) "thermo_pe"; int ipe = modify->find_compute(id_pe); c_pe = modify->compute[ipe]; int *type = atom->type; if (nswaptypes < 2) error->all(FLERR,"Must specify at least 2 types in fix atom/swap command"); if (semi_grand_flag) { if (nswaptypes != nmutypes) error->all(FLERR,"Need nswaptypes mu values in fix atom/swap command"); } else { if (nswaptypes != 2) error->all(FLERR,"Only 2 types allowed when not using semi-grand in fix atom/swap command"); if (nmutypes != 0) error->all(FLERR,"Mu not allowed when not using semi-grand in fix atom/swap command"); } for (int iswaptype = 0; iswaptype < nswaptypes; iswaptype++) if (type_list[iswaptype] <= 0 || type_list[iswaptype] > atom->ntypes) error->all(FLERR,"Invalid atom type in fix atom/swap command"); // this is only required for non-semi-grand // in which case, nswaptypes = 2 if (atom->q_flag && !semi_grand_flag) { double qmax,qmin; int firstall,first; memory->create(qtype,nswaptypes,"atom/swap:qtype"); for (int iswaptype = 0; iswaptype < nswaptypes; iswaptype++) { first = 1; for (int i = 0; i < atom->nlocal; i++) { if (atom->mask[i] & groupbit) { if (type[i] == type_list[iswaptype]) { if (first) { qtype[iswaptype] = atom->q[i]; first = 0; } else if (qtype[iswaptype] != atom->q[i]) error->one(FLERR,"All atoms of a swapped type must have the same charge."); } MPI_Allreduce(&first,&firstall,1,MPI_INT,MPI_MIN,world); if (firstall) error->all(FLERR,"At least one atom of each swapped type must be present to define charges."); if (first) qtype[iswaptype] = -DBL_MAX; MPI_Allreduce(&qtype[iswaptype],&qmax,1,MPI_DOUBLE,MPI_MAX,world); if (first) qtype[iswaptype] = DBL_MAX; MPI_Allreduce(&qtype[iswaptype],&qmin,1,MPI_DOUBLE,MPI_MIN,world); if (qmax != qmin) error->all(FLERR,"All atoms of a swapped type must have same charge."); } } } } memory->create(sqrt_mass_ratio,atom->ntypes+1,atom->ntypes+1,"atom/swap:sqrt_mass_ratio"); for (int itype = 1; itype <= atom->ntypes; itype++) for (int jtype = 1; jtype <= atom->ntypes; jtype++) sqrt_mass_ratio[itype][jtype] = sqrt(atom->mass[itype]/atom->mass[jtype]); // check to see if itype and jtype cutoffs are the same // if not, reneighboring will be needed between swaps double **cutsq = force->pair->cutsq; unequal_cutoffs = false; for (int iswaptype = 0; iswaptype < nswaptypes; iswaptype++) for (int jswaptype = 0; jswaptype < nswaptypes; jswaptype++) for (int ktype = 1; ktype <= atom->ntypes; ktype++) if (cutsq[type_list[iswaptype]][ktype] != cutsq[type_list[jswaptype]][ktype]) unequal_cutoffs = true; // check that no swappable atoms are in atom->firstgroup // swapping such an atom might not leave firstgroup atoms first if (atom->firstgroup >= 0) { int *mask = atom->mask; int firstgroupbit = group->bitmask[atom->firstgroup]; int flag = 0; for (int i = 0; i < atom->nlocal; i++) if ((mask[i] == groupbit) && (mask[i] && firstgroupbit)) flag = 1; int flagall; MPI_Allreduce(&flag,&flagall,1,MPI_INT,MPI_SUM,world); if (flagall) error->all(FLERR,"Cannot do atom/swap on atoms in atom_modify first group"); } } /* ---------------------------------------------------------------------- attempt Monte Carlo swaps ------------------------------------------------------------------------- */ void FixAtomSwap::pre_exchange() { // just return if should not be called on this timestep if (next_reneighbor != update->ntimestep) return; if (domain->triclinic) domain->x2lamda(atom->nlocal); domain->pbc(); comm->exchange(); comm->borders(); if (domain->triclinic) domain->lamda2x(atom->nlocal+atom->nghost); if (modify->n_pre_neighbor) modify->pre_neighbor(); neighbor->build(); energy_stored = energy_full(); int nsuccess = 0; if (semi_grand_flag) { update_semi_grand_atoms_list(); for (int i = 0; i < ncycles; i++) nsuccess += attempt_semi_grand(); } else { update_swap_atoms_list(); for (int i = 0; i < ncycles; i++) nsuccess += attempt_swap(); } nswap_attempts += ncycles; nswap_successes += nsuccess; energy_full(); next_reneighbor = update->ntimestep + nevery; } /* ---------------------------------------------------------------------- Note: atom charges are assumed equal and so are not updated ------------------------------------------------------------------------- */ int FixAtomSwap::attempt_semi_grand() { if (nswap == 0) return 0; double energy_before = energy_stored; int itype,jtype,jswaptype; - double qtmp; - int i = pick_semi_grand_atom(); if (i >= 0) { jswaptype = static_cast (nswaptypes*random_unequal->uniform()); jtype = type_list[jswaptype]; itype = atom->type[i]; while (itype == jtype) { jswaptype = static_cast (nswaptypes*random_unequal->uniform()); jtype = type_list[jswaptype]; } atom->type[i] = jtype; } if (unequal_cutoffs) { if (domain->triclinic) domain->x2lamda(atom->nlocal); domain->pbc(); comm->exchange(); comm->borders(); if (domain->triclinic) domain->lamda2x(atom->nlocal+atom->nghost); if (modify->n_pre_neighbor) modify->pre_neighbor(); neighbor->build(); } else { comm->forward_comm_fix(this); } if (force->kspace) force->kspace->qsum_qsq(); double energy_after = energy_full(); int success = 0; if (i >= 0) if (random_unequal->uniform() < exp(-beta*(energy_after - energy_before + mu[jtype] - mu[itype]))) success = 1; int success_all = 0; MPI_Allreduce(&success,&success_all,1,MPI_INT,MPI_MAX,world); if (success_all) { update_semi_grand_atoms_list(); energy_stored = energy_after; if (conserve_ke_flag) { if (i >= 0) { atom->v[i][0] *= sqrt_mass_ratio[itype][jtype]; atom->v[i][1] *= sqrt_mass_ratio[itype][jtype]; atom->v[i][2] *= sqrt_mass_ratio[itype][jtype]; } } return 1; } else { if (i >= 0) { atom->type[i] = itype; } if (force->kspace) force->kspace->qsum_qsq(); energy_stored = energy_before; if (unequal_cutoffs) { if (domain->triclinic) domain->x2lamda(atom->nlocal); domain->pbc(); comm->exchange(); comm->borders(); if (domain->triclinic) domain->lamda2x(atom->nlocal+atom->nghost); if (modify->n_pre_neighbor) modify->pre_neighbor(); neighbor->build(); } else { comm->forward_comm_fix(this); } } return 0; } /* ---------------------------------------------------------------------- ------------------------------------------------------------------------- */ int FixAtomSwap::attempt_swap() { if ((niswap == 0) || (njswap == 0)) return 0; double energy_before = energy_stored; int i = pick_i_swap_atom(); int j = pick_j_swap_atom(); int itype = type_list[0]; int jtype = type_list[1]; if (i >= 0) { atom->type[i] = jtype; if (atom->q_flag) atom->q[i] = qtype[1]; } if (j >= 0) { atom->type[j] = itype; if (atom->q_flag) atom->q[j] = qtype[0]; } if (unequal_cutoffs) { if (domain->triclinic) domain->x2lamda(atom->nlocal); domain->pbc(); comm->exchange(); comm->borders(); if (domain->triclinic) domain->lamda2x(atom->nlocal+atom->nghost); if (modify->n_pre_neighbor) modify->pre_neighbor(); neighbor->build(); } else { comm->forward_comm_fix(this); } double energy_after = energy_full(); if (random_equal->uniform() < exp(beta*(energy_before - energy_after))) { update_swap_atoms_list(); energy_stored = energy_after; if (conserve_ke_flag) { if (i >= 0) { atom->v[i][0] *= sqrt_mass_ratio[itype][jtype]; atom->v[i][1] *= sqrt_mass_ratio[itype][jtype]; atom->v[i][2] *= sqrt_mass_ratio[itype][jtype]; } if (j >= 0) { atom->v[j][0] *= sqrt_mass_ratio[jtype][itype]; atom->v[j][1] *= sqrt_mass_ratio[jtype][itype]; atom->v[j][2] *= sqrt_mass_ratio[jtype][itype]; } } return 1; } else { if (i >= 0) { atom->type[i] = type_list[0]; if (atom->q_flag) atom->q[i] = qtype[0]; } if (j >= 0) { atom->type[j] = type_list[1]; if (atom->q_flag) atom->q[j] = qtype[1]; } energy_stored = energy_before; if (unequal_cutoffs) { if (domain->triclinic) domain->x2lamda(atom->nlocal); domain->pbc(); comm->exchange(); comm->borders(); if (domain->triclinic) domain->lamda2x(atom->nlocal+atom->nghost); if (modify->n_pre_neighbor) modify->pre_neighbor(); neighbor->build(); } else { comm->forward_comm_fix(this); } } return 0; } /* ---------------------------------------------------------------------- compute system potential energy ------------------------------------------------------------------------- */ double FixAtomSwap::energy_full() { int eflag = 1; int vflag = 0; if (modify->n_pre_neighbor) modify->pre_neighbor(); if (modify->n_pre_force) modify->pre_force(vflag); if (force->pair) force->pair->compute(eflag,vflag); if (atom->molecular) { if (force->bond) force->bond->compute(eflag,vflag); if (force->angle) force->angle->compute(eflag,vflag); if (force->dihedral) force->dihedral->compute(eflag,vflag); if (force->improper) force->improper->compute(eflag,vflag); } if (force->kspace) force->kspace->compute(eflag,vflag); if (modify->n_post_force) modify->post_force(vflag); if (modify->n_end_of_step) modify->end_of_step(); update->eflag_global = update->ntimestep; double total_energy = c_pe->compute_scalar(); return total_energy; } /* ---------------------------------------------------------------------- ------------------------------------------------------------------------- */ int FixAtomSwap::pick_semi_grand_atom() { int i = -1; int iwhichglobal = static_cast (nswap*random_equal->uniform()); if ((iwhichglobal >= nswap_before) && (iwhichglobal < nswap_before + nswap_local)) { int iwhichlocal = iwhichglobal - nswap_before; i = local_swap_atom_list[iwhichlocal]; } return i; } /* ---------------------------------------------------------------------- ------------------------------------------------------------------------- */ int FixAtomSwap::pick_i_swap_atom() { int i = -1; int iwhichglobal = static_cast (niswap*random_equal->uniform()); if ((iwhichglobal >= niswap_before) && (iwhichglobal < niswap_before + niswap_local)) { int iwhichlocal = iwhichglobal - niswap_before; i = local_swap_iatom_list[iwhichlocal]; } return i; } /* ---------------------------------------------------------------------- ------------------------------------------------------------------------- */ int FixAtomSwap::pick_j_swap_atom() { int j = -1; int jwhichglobal = static_cast (njswap*random_equal->uniform()); if ((jwhichglobal >= njswap_before) && (jwhichglobal < njswap_before + njswap_local)) { int jwhichlocal = jwhichglobal - njswap_before; j = local_swap_jatom_list[jwhichlocal]; } return j; } /* ---------------------------------------------------------------------- update the list of gas atoms ------------------------------------------------------------------------- */ void FixAtomSwap::update_semi_grand_atoms_list() { int nlocal = atom->nlocal; double **x = atom->x; if (nlocal > atom_swap_nmax) { memory->sfree(local_swap_atom_list); atom_swap_nmax = atom->nmax; local_swap_atom_list = (int *) memory->smalloc(atom_swap_nmax*sizeof(int), "MCSWAP:local_swap_atom_list"); } nswap_local = 0; if (regionflag) { for (int i = 0; i < nlocal; i++) { if (domain->regions[iregion]->match(x[i][0],x[i][1],x[i][2]) == 1) { if (atom->mask[i] & groupbit) { int itype = atom->type[i]; int iswaptype; for (iswaptype = 0; iswaptype < nswaptypes; iswaptype++) if (itype == type_list[iswaptype]) break; if (iswaptype == nswaptypes) continue; local_swap_atom_list[nswap_local] = i; nswap_local++; } } } } else { for (int i = 0; i < nlocal; i++) { if (atom->mask[i] & groupbit) { int itype = atom->type[i]; int iswaptype; for (iswaptype = 0; iswaptype < nswaptypes; iswaptype++) if (itype == type_list[iswaptype]) break; if (iswaptype == nswaptypes) continue; local_swap_atom_list[nswap_local] = i; nswap_local++; } } } MPI_Allreduce(&nswap_local,&nswap,1,MPI_INT,MPI_SUM,world); MPI_Scan(&nswap_local,&nswap_before,1,MPI_INT,MPI_SUM,world); nswap_before -= nswap_local; } /* ---------------------------------------------------------------------- update the list of gas atoms ------------------------------------------------------------------------- */ void FixAtomSwap::update_swap_atoms_list() { int nlocal = atom->nlocal; int *type = atom->type; double **x = atom->x; if (nlocal > atom_swap_nmax) { memory->sfree(local_swap_iatom_list); memory->sfree(local_swap_jatom_list); atom_swap_nmax = atom->nmax; local_swap_iatom_list = (int *) memory->smalloc(atom_swap_nmax*sizeof(int), "MCSWAP:local_swap_iatom_list"); local_swap_jatom_list = (int *) memory->smalloc(atom_swap_nmax*sizeof(int), "MCSWAP:local_swap_jatom_list"); } niswap_local = 0; njswap_local = 0; if (regionflag) { for (int i = 0; i < nlocal; i++) { if (domain->regions[iregion]->match(x[i][0],x[i][1],x[i][2]) == 1) { if (atom->mask[i] & groupbit) { if (type[i] == type_list[0]) { local_swap_iatom_list[niswap_local] = i; niswap_local++; } else if (type[i] == type_list[1]) { local_swap_jatom_list[njswap_local] = i; njswap_local++; } } } } } else { for (int i = 0; i < nlocal; i++) { if (atom->mask[i] & groupbit) { if (type[i] == type_list[0]) { local_swap_iatom_list[niswap_local] = i; niswap_local++; } else if (type[i] == type_list[1]) { local_swap_jatom_list[njswap_local] = i; njswap_local++; } } } } MPI_Allreduce(&niswap_local,&niswap,1,MPI_INT,MPI_SUM,world); MPI_Scan(&niswap_local,&niswap_before,1,MPI_INT,MPI_SUM,world); niswap_before -= niswap_local; MPI_Allreduce(&njswap_local,&njswap,1,MPI_INT,MPI_SUM,world); MPI_Scan(&njswap_local,&njswap_before,1,MPI_INT,MPI_SUM,world); njswap_before -= njswap_local; } /* ---------------------------------------------------------------------- */ int FixAtomSwap::pack_forward_comm(int n, int *list, double *buf, int pbc_flag, int *pbc) { int i,j,m; int *type = atom->type; double *q = atom->q; m = 0; if (atom->q_flag) { for (i = 0; i < n; i++) { j = list[i]; buf[m++] = type[j]; buf[m++] = q[j]; } } else { for (i = 0; i < n; i++) { j = list[i]; buf[m++] = type[j]; } } return m; } /* ---------------------------------------------------------------------- */ void FixAtomSwap::unpack_forward_comm(int n, int first, double *buf) { int i,m,last; int *type = atom->type; double *q = atom->q; m = 0; last = first + n; if (atom->q_flag) { for (i = first; i < last; i++) { type[i] = static_cast (buf[m++]); q[i] = buf[m++]; } } else { for (i = first; i < last; i++) type[i] = static_cast (buf[m++]); } } /* ---------------------------------------------------------------------- return acceptance ratio ------------------------------------------------------------------------- */ double FixAtomSwap::compute_vector(int n) { if (n == 0) return nswap_attempts; if (n == 1) return nswap_successes; return 0.0; } /* ---------------------------------------------------------------------- memory usage of local atom-based arrays ------------------------------------------------------------------------- */ double FixAtomSwap::memory_usage() { double bytes = atom_swap_nmax * sizeof(int); return bytes; } /* ---------------------------------------------------------------------- pack entire state of Fix into one write ------------------------------------------------------------------------- */ void FixAtomSwap::write_restart(FILE *fp) { int n = 0; double list[4]; list[n++] = random_equal->state(); list[n++] = random_unequal->state(); list[n++] = next_reneighbor; if (comm->me == 0) { int size = n * sizeof(double); fwrite(&size,sizeof(int),1,fp); fwrite(list,sizeof(double),n,fp); } } /* ---------------------------------------------------------------------- use state info from restart file to restart the Fix ------------------------------------------------------------------------- */ void FixAtomSwap::restart(char *buf) { int n = 0; double *list = (double *) buf; seed = static_cast (list[n++]); random_equal->reset(seed); seed = static_cast (list[n++]); random_unequal->reset(seed); next_reneighbor = static_cast (list[n++]); } diff --git a/src/USER-FEP/compute_fep.cpp b/src/USER-FEP/compute_fep.cpp index 97763b5ee..dd7d29be4 100644 --- a/src/USER-FEP/compute_fep.cpp +++ b/src/USER-FEP/compute_fep.cpp @@ -1,660 +1,666 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing author: Agilio Padua (Univ Blaise Pascal & CNRS) ------------------------------------------------------------------------- */ #include "stdlib.h" #include "string.h" #include "math.h" #include "mpi.h" #include "comm.h" #include "update.h" #include "atom.h" #include "domain.h" #include "force.h" #include "pair.h" #include "pair_hybrid.h" #include "kspace.h" #include "input.h" #include "fix.h" #include "modify.h" #include "variable.h" #include "timer.h" #include "memory.h" #include "error.h" #include "compute_fep.h" using namespace LAMMPS_NS; enum{PAIR,ATOM}; enum{CHARGE}; /* ---------------------------------------------------------------------- */ ComputeFEP::ComputeFEP(LAMMPS *lmp, int narg, char **arg) : Compute(lmp, narg, arg) { if (narg < 5) error->all(FLERR,"Illegal number of arguments in compute fep"); scalar_flag = 0; vector_flag = 1; size_vector = 3; extvector = 0; vector = new double[3]; fepinitflag = 0; // avoid init to run entirely when called by write_data temp_fep = force->numeric(FLERR,arg[3]); // count # of perturbations npert = 0; int iarg = 4; while (iarg < narg) { if (strcmp(arg[iarg],"pair") == 0) { if (iarg+6 > narg) error->all(FLERR, "Illegal pair attribute in compute fep"); npert++; iarg += 6; } else if (strcmp(arg[iarg],"atom") == 0) { if (iarg+4 > narg) error->all(FLERR, "Illegal atom attribute in compute fep"); npert++; iarg += 4; } else break; } if (npert == 0) error->all(FLERR,"Illegal syntax in compute fep"); perturb = new Perturb[npert]; // parse keywords npert = 0; chgflag = 0; iarg = 4; while (iarg < narg) { if (strcmp(arg[iarg],"pair") == 0) { perturb[npert].which = PAIR; int n = strlen(arg[iarg+1]) + 1; perturb[npert].pstyle = new char[n]; strcpy(perturb[npert].pstyle,arg[iarg+1]); n = strlen(arg[iarg+2]) + 1; perturb[npert].pparam = new char[n]; strcpy(perturb[npert].pparam,arg[iarg+2]); force->bounds(arg[iarg+3],atom->ntypes, perturb[npert].ilo,perturb[npert].ihi); force->bounds(arg[iarg+4],atom->ntypes, perturb[npert].jlo,perturb[npert].jhi); if (strstr(arg[iarg+5],"v_") == arg[iarg+5]) { n = strlen(&arg[iarg+5][2]) + 1; perturb[npert].var = new char[n]; strcpy(perturb[npert].var,&arg[iarg+5][2]); } else error->all(FLERR,"Illegal variable in compute fep"); npert++; iarg += 6; } else if (strcmp(arg[iarg],"atom") == 0) { perturb[npert].which = ATOM; if (strcmp(arg[iarg+1],"charge") == 0) { perturb[npert].aparam = CHARGE; chgflag = 1; } else error->all(FLERR,"Illegal atom argument in compute fep"); force->bounds(arg[iarg+2],atom->ntypes, perturb[npert].ilo,perturb[npert].ihi); if (strstr(arg[iarg+3],"v_") == arg[iarg+3]) { int n = strlen(&arg[iarg+3][2]) + 1; perturb[npert].var = new char[n]; strcpy(perturb[npert].var,&arg[iarg+3][2]); } else error->all(FLERR,"Illegal variable in compute fep"); npert++; iarg += 4; } else break; } // optional keywords tailflag = 0; volumeflag = 0; while (iarg < narg) { if (strcmp(arg[iarg],"tail") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal optional keyword " "in compute fep"); if (strcmp(arg[iarg+1],"no") == 0) tailflag = 0; else if (strcmp(arg[iarg+1],"yes") == 0) tailflag = 1; else error->all(FLERR,"Illegal optional keyword in compute fep"); iarg += 2; } else if (strcmp(arg[iarg],"volume") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal optional keyword " "in compute fep"); if (strcmp(arg[iarg+1],"no") == 0) volumeflag = 0; else if (strcmp(arg[iarg+1],"yes") == 0) volumeflag = 1; else error->all(FLERR,"Illegal optional keyword in compute fep"); iarg += 2; } else error->all(FLERR,"Illegal optional keyword in compute fep"); } // allocate pair style arrays int ntype = atom->ntypes; for (int m = 0; m < npert; m++) { if (perturb[m].which == PAIR) memory->create(perturb[m].array_orig,ntype+1,ntype+1,"fep:array_orig"); } // allocate space for charge, force, energy, virial arrays + f_orig = NULL; + q_orig = NULL; + peatom_orig = keatom_orig = NULL; + pvatom_orig = kvatom_orig = NULL; + allocate_storage(); fixgpu = NULL; } /* ---------------------------------------------------------------------- */ ComputeFEP::~ComputeFEP() { delete [] vector; for (int m = 0; m < npert; m++) { delete [] perturb[m].var; if (perturb[m].which == PAIR) { delete [] perturb[m].pstyle; delete [] perturb[m].pparam; memory->destroy(perturb[m].array_orig); } } delete [] perturb; deallocate_storage(); } /* ---------------------------------------------------------------------- */ void ComputeFEP::init() { int i,j; if (!fepinitflag) // avoid init to run entirely when called by write_data fepinitflag = 1; else return; // setup and error checks pairflag = 0; for (int m = 0; m < npert; m++) { Perturb *pert = &perturb[m]; pert->ivar = input->variable->find(pert->var); if (pert->ivar < 0) error->all(FLERR,"Variable name for compute fep does not exist"); if (!input->variable->equalstyle(pert->ivar)) error->all(FLERR,"Variable for compute fep is of invalid style"); if (force->pair == NULL) error->all(FLERR,"compute fep pair requires pair interactions"); if (pert->which == PAIR) { pairflag = 1; Pair *pair = force->pair_match(pert->pstyle,1); if (pair == NULL) error->all(FLERR,"compute fep pair style " "does not exist"); void *ptr = pair->extract(pert->pparam,pert->pdim); if (ptr == NULL) error->all(FLERR,"compute fep pair style param not supported"); pert->array = (double **) ptr; // if pair hybrid, test that ilo,ihi,jlo,jhi are valid for sub-style if ((strcmp(force->pair_style,"hybrid") == 0 || strcmp(force->pair_style,"hybrid/overlay") == 0)) { PairHybrid *pair = (PairHybrid *) force->pair; for (i = pert->ilo; i <= pert->ihi; i++) for (j = MAX(pert->jlo,i); j <= pert->jhi; j++) if (!pair->check_ijtype(i,j,pert->pstyle)) error->all(FLERR,"compute fep type pair range is not valid for " "pair hybrid sub-style"); } } else if (pert->which == ATOM) { if (pert->aparam == CHARGE) { if (!atom->q_flag) error->all(FLERR,"compute fep requires atom attribute charge"); } } } if (tailflag) { if (force->pair->tail_flag == 0) error->all(FLERR,"Compute fep tail when pair style does not " "compute tail corrections"); } // detect if package gpu is present int ifixgpu = modify->find_fix("package_gpu"); if (ifixgpu >= 0) fixgpu = modify->fix[ifixgpu]; if (comm->me == 0) { if (screen) { fprintf(screen, "FEP settings ...\n"); fprintf(screen, " temperature = %f\n", temp_fep); fprintf(screen, " tail %s\n", (tailflag ? "yes":"no")); for (int m = 0; m < npert; m++) { Perturb *pert = &perturb[m]; if (pert->which == PAIR) fprintf(screen, " %s %s %d-%d %d-%d\n", pert->pstyle, pert->pparam, pert->ilo, pert->ihi, pert->jlo, pert->jhi); else if (pert->which == ATOM) fprintf(screen, " %d-%d charge\n", pert->ilo, pert->ihi); } } if (logfile) { fprintf(logfile, "FEP settings ...\n"); fprintf(logfile, " temperature = %f\n", temp_fep); fprintf(logfile, " tail %s\n", (tailflag ? "yes":"no")); for (int m = 0; m < npert; m++) { Perturb *pert = &perturb[m]; if (pert->which == PAIR) fprintf(logfile, " %s %s %d-%d %d-%d\n", pert->pstyle, pert->pparam, pert->ilo, pert->ihi, pert->jlo, pert->jhi); else if (pert->which == ATOM) fprintf(logfile, " %d-%d charge\n", pert->ilo, pert->ihi); } } } } /* ---------------------------------------------------------------------- */ void ComputeFEP::compute_vector() { double pe0,pe1; eflag = 1; vflag = 0; invoked_vector = update->ntimestep; if (atom->nmax > nmax) { // reallocate working arrays if necessary deallocate_storage(); allocate_storage(); } backup_qfev(); // backup charge, force, energy, virial array values backup_params(); // backup pair parameters timer->stamp(); if (force->pair && force->pair->compute_flag) { force->pair->compute(eflag,vflag); timer->stamp(Timer::PAIR); } if (chgflag && force->kspace && force->kspace->compute_flag) { force->kspace->compute(eflag,vflag); timer->stamp(Timer::KSPACE); } // accumulate force/energy/virial from /gpu pair styles if (fixgpu) fixgpu->post_force(vflag); pe0 = compute_epair(); perturb_params(); timer->stamp(); if (force->pair && force->pair->compute_flag) { force->pair->compute(eflag,vflag); timer->stamp(Timer::PAIR); } if (chgflag && force->kspace && force->kspace->compute_flag) { force->kspace->compute(eflag,vflag); timer->stamp(Timer::KSPACE); } // accumulate force/energy/virial from /gpu pair styles // this is required as to empty the answer queue, // otherwise the force compute on the GPU in the next step would be incorrect if (fixgpu) fixgpu->post_force(vflag); pe1 = compute_epair(); restore_qfev(); // restore charge, force, energy, virial array values restore_params(); // restore pair parameters vector[0] = pe1-pe0; vector[1] = exp(-(pe1-pe0)/(force->boltz*temp_fep)); vector[2] = domain->xprd * domain->yprd * domain->zprd; if (volumeflag) vector[1] *= vector[2]; } /* ---------------------------------------------------------------------- obtain pair energy from lammps accumulators ------------------------------------------------------------------------- */ double ComputeFEP::compute_epair() { double eng, eng_pair; eng = 0.0; if (force->pair) eng = force->pair->eng_vdwl + force->pair->eng_coul; MPI_Allreduce(&eng,&eng_pair,1,MPI_DOUBLE,MPI_SUM,world); if (tailflag) { double volume = domain->xprd * domain->yprd * domain->zprd; eng_pair += force->pair->etail / volume; } if (chgflag && force->kspace) eng_pair += force->kspace->energy; return eng_pair; } /* ---------------------------------------------------------------------- apply perturbation to pair, atom parameters based on variable evaluation ------------------------------------------------------------------------- */ void ComputeFEP::perturb_params() { int i,j; for (int m = 0; m < npert; m++) { Perturb *pert = &perturb[m]; double delta = input->variable->compute_equal(pert->ivar); if (pert->which == PAIR) { // modify pair parameters for (i = pert->ilo; i <= pert->ihi; i++) for (j = MAX(pert->jlo,i); j <= pert->jhi; j++) pert->array[i][j] = pert->array_orig[i][j] + delta; } else if (pert->which == ATOM) { if (pert->aparam == CHARGE) { // modify charges int *atype = atom->type; double *q = atom->q; int *mask = atom->mask; int natom = atom->nlocal + atom->nghost; for (i = 0; i < natom; i++) if (atype[i] >= pert->ilo && atype[i] <= pert->ihi) if (mask[i] & groupbit) q[i] += delta; } } } // re-initialize pair styles if any PAIR settings were changed // this resets other coeffs that may depend on changed values, // and also offset and tail corrections if (pairflag) force->pair->reinit(); // reset KSpace charges if charges have changed if (chgflag && force->kspace) force->kspace->qsum_qsq(); } /* ---------------------------------------------------------------------- backup pair parameters ------------------------------------------------------------------------- */ void ComputeFEP::backup_params() { int i,j; for (int m = 0; m < npert; m++) { Perturb *pert = &perturb[m]; if (pert->which == PAIR) { for (i = pert->ilo; i <= pert->ihi; i++) for (j = MAX(pert->jlo,i); j <= pert->jhi; j++) pert->array_orig[i][j] = pert->array[i][j]; } } } /* ---------------------------------------------------------------------- restore pair parameters to original values ------------------------------------------------------------------------- */ void ComputeFEP::restore_params() { int i,j; for (int m = 0; m < npert; m++) { Perturb *pert = &perturb[m]; if (pert->which == PAIR) { for (i = pert->ilo; i <= pert->ihi; i++) for (j = MAX(pert->jlo,i); j <= pert->jhi; j++) pert->array[i][j] = pert->array_orig[i][j]; } } if (pairflag) force->pair->reinit(); // reset KSpace charges if charges have changed if (chgflag && force->kspace) force->kspace->qsum_qsq(); } /* ---------------------------------------------------------------------- manage storage for charge, force, energy, virial arrays ------------------------------------------------------------------------- */ void ComputeFEP::allocate_storage() { nmax = atom->nmax; memory->create(f_orig,nmax,3,"fep:f_orig"); memory->create(peatom_orig,nmax,"fep:peatom_orig"); memory->create(pvatom_orig,nmax,6,"fep:pvatom_orig"); if (chgflag) { memory->create(q_orig,nmax,"fep:q_orig"); if (force->kspace) { memory->create(keatom_orig,nmax,"fep:keatom_orig"); memory->create(kvatom_orig,nmax,6,"fep:kvatom_orig"); } } } /* ---------------------------------------------------------------------- */ void ComputeFEP::deallocate_storage() { memory->destroy(f_orig); memory->destroy(peatom_orig); memory->destroy(pvatom_orig); - if (chgflag) { - memory->destroy(q_orig); - if (force && force->kspace) { - memory->destroy(keatom_orig); - memory->destroy(kvatom_orig); - } - } + memory->destroy(q_orig); + memory->destroy(keatom_orig); + memory->destroy(kvatom_orig); + + f_orig = NULL; + q_orig = NULL; + peatom_orig = keatom_orig = NULL; + pvatom_orig = kvatom_orig = NULL; } /* ---------------------------------------------------------------------- backup and restore arrays with charge, force, energy, virial ------------------------------------------------------------------------- */ void ComputeFEP::backup_qfev() { int i; int nall = atom->nlocal + atom->nghost; int natom = atom->nlocal; if (force->newton || force->kspace->tip4pflag) natom += atom->nghost; double **f = atom->f; for (i = 0; i < natom; i++) { f_orig[i][0] = f[i][0]; f_orig[i][1] = f[i][1]; f_orig[i][2] = f[i][2]; } eng_vdwl_orig = force->pair->eng_vdwl; eng_coul_orig = force->pair->eng_coul; pvirial_orig[0] = force->pair->virial[0]; pvirial_orig[1] = force->pair->virial[1]; pvirial_orig[2] = force->pair->virial[2]; pvirial_orig[3] = force->pair->virial[3]; pvirial_orig[4] = force->pair->virial[4]; pvirial_orig[5] = force->pair->virial[5]; if (update->eflag_atom) { double *peatom = force->pair->eatom; for (i = 0; i < natom; i++) peatom_orig[i] = peatom[i]; } if (update->vflag_atom) { double **pvatom = force->pair->vatom; for (i = 0; i < natom; i++) { pvatom_orig[i][0] = pvatom[i][0]; pvatom_orig[i][1] = pvatom[i][1]; pvatom_orig[i][2] = pvatom[i][2]; pvatom_orig[i][3] = pvatom[i][3]; pvatom_orig[i][4] = pvatom[i][4]; pvatom_orig[i][5] = pvatom[i][5]; } } if (chgflag) { double *q = atom->q; for (i = 0; i < nall; i++) q_orig[i] = q[i]; if (force->kspace) { energy_orig = force->kspace->energy; kvirial_orig[0] = force->kspace->virial[0]; kvirial_orig[1] = force->kspace->virial[1]; kvirial_orig[2] = force->kspace->virial[2]; kvirial_orig[3] = force->kspace->virial[3]; kvirial_orig[4] = force->kspace->virial[4]; kvirial_orig[5] = force->kspace->virial[5]; if (update->eflag_atom) { double *keatom = force->kspace->eatom; for (i = 0; i < natom; i++) keatom_orig[i] = keatom[i]; } if (update->vflag_atom) { double **kvatom = force->kspace->vatom; for (i = 0; i < natom; i++) { kvatom_orig[i][0] = kvatom[i][0]; kvatom_orig[i][1] = kvatom[i][1]; kvatom_orig[i][2] = kvatom[i][2]; kvatom_orig[i][3] = kvatom[i][3]; kvatom_orig[i][4] = kvatom[i][4]; kvatom_orig[i][5] = kvatom[i][5]; } } } } } /* ---------------------------------------------------------------------- */ void ComputeFEP::restore_qfev() { int i; int nall = atom->nlocal + atom->nghost; int natom = atom->nlocal; if (force->newton || force->kspace->tip4pflag) natom += atom->nghost; double **f = atom->f; for (i = 0; i < natom; i++) { f[i][0] = f_orig[i][0]; f[i][1] = f_orig[i][1]; f[i][2] = f_orig[i][2]; } force->pair->eng_vdwl = eng_vdwl_orig; force->pair->eng_coul = eng_coul_orig; force->pair->virial[0] = pvirial_orig[0]; force->pair->virial[1] = pvirial_orig[1]; force->pair->virial[2] = pvirial_orig[2]; force->pair->virial[3] = pvirial_orig[3]; force->pair->virial[4] = pvirial_orig[4]; force->pair->virial[5] = pvirial_orig[5]; if (update->eflag_atom) { double *peatom = force->pair->eatom; for (i = 0; i < natom; i++) peatom[i] = peatom_orig[i]; } if (update->vflag_atom) { double **pvatom = force->pair->vatom; for (i = 0; i < natom; i++) { pvatom[i][0] = pvatom_orig[i][0]; pvatom[i][1] = pvatom_orig[i][1]; pvatom[i][2] = pvatom_orig[i][2]; pvatom[i][3] = pvatom_orig[i][3]; pvatom[i][4] = pvatom_orig[i][4]; pvatom[i][5] = pvatom_orig[i][5]; } } if (chgflag) { double *q = atom->q; for (i = 0; i < nall; i++) q[i] = q_orig[i]; if (force->kspace) { force->kspace->energy = energy_orig; force->kspace->virial[0] = kvirial_orig[0]; force->kspace->virial[1] = kvirial_orig[1]; force->kspace->virial[2] = kvirial_orig[2]; force->kspace->virial[3] = kvirial_orig[3]; force->kspace->virial[4] = kvirial_orig[4]; force->kspace->virial[5] = kvirial_orig[5]; if (update->eflag_atom) { double *keatom = force->kspace->eatom; for (i = 0; i < natom; i++) keatom[i] = keatom_orig[i]; } if (update->vflag_atom) { double **kvatom = force->kspace->vatom; for (i = 0; i < natom; i++) { kvatom[i][0] = kvatom_orig[i][0]; kvatom[i][1] = kvatom_orig[i][1]; kvatom[i][2] = kvatom_orig[i][2]; kvatom[i][3] = kvatom_orig[i][3]; kvatom[i][4] = kvatom_orig[i][4]; kvatom[i][5] = kvatom_orig[i][5]; } } } } } diff --git a/src/USER-INTEL/README b/src/USER-INTEL/README index b9d391fc3..929bd0087 100644 --- a/src/USER-INTEL/README +++ b/src/USER-INTEL/README @@ -1,52 +1,54 @@ -------------------------------- LAMMPS Intel(R) Package -------------------------------- W. Michael Brown (Intel) michael.w.brown at intel.com Anupama Kurpad (Intel) + Biswajit Mishra (Shell) ----------------------------------------------------------------------------- This package is based on the USER-OMP package and provides LAMMPS styles that: 1. include support for single and mixed precision in addition to double. 2. include modifications to support vectorization for key routines 3. include modifications to support offload to Intel(R) Xeon Phi(TM) coprocessors ----------------------------------------------------------------------------- When using the suffix command with "intel", intel styles will be used if they exist; if they do not, and the USER-OMP package is installed and an omp version exists, that style will be used. For example, in the case the USER-OMP package is installed, kspace_style pppm/intel 1e-4 is equivalent to: kspace_style pppm/omp 1e-4 because no pppm style has been implemented for the Intel package. ----------------------------------------------------------------------------- In order to use offload to Intel(R) Xeon Phi(TM) coprocessors, the flag -DLMP_INTEL_OFFLOAD should be set in the Makefile. Offload requires the use of Intel compilers. ----------------------------------------------------------------------------- For portability reasons, vectorization directives are currently only enabled for Intel compilers. Using other compilers may result in significantly -lower performance. +lower performance. This behavior can be changed by defining +LMP_SIMD_COMPILER for the preprocessor (see intel_preprocess.h). ----------------------------------------------------------------------------- By default, when running with offload to Intel(R) coprocessors, affinity for host MPI tasks and OpenMP threads is set automatically within the code. This currently requires the use of system calls. To disable at build time, compile with -DINTEL_OFFLOAD_NOAFFINITY. diff --git a/src/USER-INTEL/fix_intel.cpp b/src/USER-INTEL/fix_intel.cpp index c0847a8bf..a67eb58b6 100644 --- a/src/USER-INTEL/fix_intel.cpp +++ b/src/USER-INTEL/fix_intel.cpp @@ -1,796 +1,800 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing author: W. Michael Brown (Intel) Anupama Kurpad (Intel) - Host Affinitization ------------------------------------------------------------------------- */ #include "comm.h" #include "error.h" #include "force.h" #include "neighbor.h" #include "neigh_request.h" #include "pair.h" #include "pair_hybrid.h" #include "pair_hybrid_overlay.h" #include "timer.h" #include "universe.h" #include "update.h" #include "fix_intel.h" #include #include #include #ifdef _LMP_INTEL_OFFLOAD #ifndef INTEL_OFFLOAD_NOAFFINITY #include #endif #endif #include "suffix.h" using namespace LAMMPS_NS; using namespace FixConst; #ifdef __INTEL_OFFLOAD #ifndef _LMP_INTEL_OFFLOAD #warning "Not building Intel package with Xeon Phi offload support." #endif #endif enum{NSQ,BIN,MULTI}; /* ---------------------------------------------------------------------- */ FixIntel::FixIntel(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg) { if (narg < 4) error->all(FLERR,"Illegal package intel command"); int ncops = force->inumeric(FLERR,arg[3]); _precision_mode = PREC_MODE_MIXED; _offload_balance = 1.0; _overflow_flag[LMP_OVERFLOW] = 0; _off_overflow_flag[LMP_OVERFLOW] = 0; _offload_affinity_balanced = 0; _offload_threads = 0; _offload_tpc = 4; #ifdef _LMP_INTEL_OFFLOAD if (ncops < 0) error->all(FLERR,"Illegal package intel command"); _offload_affinity_set = 0; _off_force_array_s = 0; _off_force_array_m = 0; _off_force_array_d = 0; _off_ev_array_s = 0; _off_ev_array_d = 0; _balance_fixed = 0.0; _cop = 0; #endif // optional keywords int nomp = 0, no_affinity = 0; _allow_separate_buffers = 1; _offload_ghost = -1; int iarg = 4; while (iarg < narg) { if (strcmp(arg[iarg],"omp") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal package intel command"); nomp = force->inumeric(FLERR,arg[iarg+1]); iarg += 2; } else if (strcmp(arg[iarg],"mode") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal package intel command"); if (strcmp(arg[iarg+1],"single") == 0) _precision_mode = PREC_MODE_SINGLE; else if (strcmp(arg[iarg+1],"mixed") == 0) _precision_mode = PREC_MODE_MIXED; else if (strcmp(arg[iarg+1],"double") == 0) _precision_mode = PREC_MODE_DOUBLE; else error->all(FLERR,"Illegal package intel command"); iarg += 2; } else if (strcmp(arg[iarg],"balance") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal package intel command"); _offload_balance = force->numeric(FLERR,arg[iarg+1]); iarg += 2; } else if (strcmp(arg[iarg], "ghost") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal package intel command"); if (strcmp(arg[iarg+1],"yes") == 0) _offload_ghost = 1; else if (strcmp(arg[iarg+1],"no") == 0) _offload_ghost = 0; else error->all(FLERR,"Illegal package intel command"); iarg += 2; } else if (strcmp(arg[iarg], "tpc") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal package intel command"); _offload_tpc = atoi(arg[iarg+1]); iarg += 2; } else if (strcmp(arg[iarg],"tptask") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal package intel command"); _offload_threads = atoi(arg[iarg+1]); iarg += 2; } else if (strcmp(arg[iarg],"no_affinity") == 0) { no_affinity = 1; iarg++; } // undocumented options else if (strcmp(arg[iarg],"offload_affinity_balanced") == 0) { _offload_affinity_balanced = 1; iarg++; } else if (strcmp(arg[iarg],"buffers") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal package intel command"); _allow_separate_buffers = atoi(arg[iarg+1]); iarg += 2; } else error->all(FLERR,"Illegal package intel command"); } // if ncops is zero, just run on the cpu if (ncops < 1) { ncops = -1; _offload_balance = 0.0; } // error check if (_offload_balance > 1.0 || _offload_threads < 0 || _offload_tpc <= 0 || _offload_tpc > 4 || nomp < 0) error->all(FLERR,"Illegal package intel command"); #ifdef _LMP_INTEL_OFFLOAD _ncops = ncops; if (_offload_balance != 0.0) { _real_space_comm = MPI_COMM_WORLD; if (no_affinity == 0) if (set_host_affinity(nomp) != 0) error->all(FLERR,"Could not set host affinity for offload tasks"); } int max_offload_threads = 0, offload_cores = 0; if (_offload_balance != 0.0) { #pragma offload target(mic:_cop) mandatory \ out(max_offload_threads,offload_cores) { offload_cores = omp_get_num_procs(); omp_set_num_threads(offload_cores); max_offload_threads = omp_get_max_threads(); } _max_offload_threads = max_offload_threads; _offload_cores = offload_cores; if (_offload_threads == 0) _offload_threads = offload_cores; } #endif // set OpenMP threads // nomp is user setting, default = 0 #if defined(_OPENMP) if (nomp != 0) { omp_set_num_threads(nomp); comm->nthreads = nomp; } else { int nthreads; #pragma omp parallel default(none) shared(nthreads) nthreads = omp_get_num_threads(); comm->nthreads = nthreads; } #endif // set offload params #ifdef _LMP_INTEL_OFFLOAD if (_offload_balance < 0.0) { _balance_neighbor = 0.9; _balance_pair = 0.9; } else { _balance_neighbor = _offload_balance; _balance_pair = _offload_balance; } _tscreen = screen; zero_timers(); _setup_time_cleared = false; _timers_allocated = false; #else _offload_balance = 0.0; #endif // set precision if (_precision_mode == PREC_MODE_SINGLE) _single_buffers = new IntelBuffers(lmp); else if (_precision_mode == PREC_MODE_MIXED) _mixed_buffers = new IntelBuffers(lmp); else _double_buffers = new IntelBuffers(lmp); } /* ---------------------------------------------------------------------- */ FixIntel::~FixIntel() { #ifdef _LMP_INTEL_OFFLOAD output_timing_data(); if (_timers_allocated) { double *time1 = off_watch_pair(); double *time2 = off_watch_neighbor(); int *overflow = get_off_overflow_flag(); if (_offload_balance != 0.0 && time1 != NULL && time2 != NULL && overflow != NULL) { #pragma offload_transfer target(mic:_cop) \ nocopy(time1,time2,overflow:alloc_if(0) free_if(1)) } } #endif if (_precision_mode == PREC_MODE_SINGLE) delete _single_buffers; else if (_precision_mode == PREC_MODE_MIXED) delete _mixed_buffers; else delete _double_buffers; } /* ---------------------------------------------------------------------- */ int FixIntel::setmask() { int mask = 0; return mask; } /* ---------------------------------------------------------------------- */ void FixIntel::init() { #ifdef _LMP_INTEL_OFFLOAD output_timing_data(); #endif int nstyles = 0; if (force->pair_match("hybrid", 1) != NULL) { PairHybrid *hybrid = (PairHybrid *) force->pair; for (int i = 0; i < hybrid->nstyles; i++) if (strstr(hybrid->keywords[i], "/intel") != NULL) nstyles++; } else if (force->pair_match("hybrid/overlay", 1) != NULL) { PairHybridOverlay *hybrid = (PairHybridOverlay *) force->pair; for (int i = 0; i < hybrid->nstyles; i++) if (strstr(hybrid->keywords[i], "/intel") != NULL) nstyles++; else force->pair->no_virial_fdotr_compute = 1; } if (nstyles > 1) error->all(FLERR, "Currently, cannot use more than one intel style with hybrid."); neighbor->fix_intel = (void *)this; check_neighbor_intel(); if (_precision_mode == PREC_MODE_SINGLE) _single_buffers->zero_ev(); else if (_precision_mode == PREC_MODE_MIXED) _mixed_buffers->zero_ev(); else _double_buffers->zero_ev(); } /* ---------------------------------------------------------------------- */ void FixIntel::setup(int vflag) { if (neighbor->style != BIN) error->all(FLERR, "Currently, neighbor style BIN must be used with Intel package."); if (neighbor->exclude_setting() != 0) error->all(FLERR, "Currently, cannot use neigh_modify exclude with Intel package."); } /* ---------------------------------------------------------------------- */ void FixIntel::pair_init_check() { + #ifdef INTEL_VMASK + atom->sortfreq = 1; + #endif + #ifdef _LMP_INTEL_OFFLOAD if (_offload_balance != 0.0) atom->sortfreq = 1; if (force->newton_pair == 0) _offload_noghost = 0; else if (_offload_ghost == 0) _offload_noghost = 1; set_offload_affinity(); if (!_timers_allocated) { double *time1 = off_watch_pair(); double *time2 = off_watch_neighbor(); int *overflow = get_off_overflow_flag(); if (_offload_balance !=0.0 && time1 != NULL && time2 != NULL && overflow != NULL) { #pragma offload_transfer target(mic:_cop) \ nocopy(time1,time2:length(1) alloc_if(1) free_if(0)) \ in(overflow:length(5) alloc_if(1) free_if(0)) } _timers_allocated = true; } if (update->whichflag == 2 && _offload_balance != 0.0) { if (_offload_balance == 1.0 && _offload_noghost == 0) _sync_at_pair = 1; else _sync_at_pair = 2; } else { _sync_at_pair = 0; if (strstr(update->integrate_style,"intel") == 0) error->all(FLERR, "Specified run_style does not support the Intel package."); } #endif _nthreads = comm->nthreads; if (_offload_balance != 0.0 && comm->me == 0) { #ifndef __INTEL_COMPILER_BUILD_DATE error->warning(FLERR, "Unknown Intel Compiler Version\n"); #else if (__INTEL_COMPILER_BUILD_DATE != 20131008 && __INTEL_COMPILER_BUILD_DATE < 20141023) error->warning(FLERR, "Unsupported Intel Compiler."); #endif #if !defined(__INTEL_COMPILER) error->warning(FLERR, "Unsupported Intel Compiler."); #endif } int need_tag = 0; if (atom->molecular) need_tag = 1; // Clear buffers used for pair style char kmode[80]; if (_precision_mode == PREC_MODE_SINGLE) { strcpy(kmode, "single"); get_single_buffers()->free_all_nbor_buffers(); get_single_buffers()->need_tag(need_tag); } else if (_precision_mode == PREC_MODE_MIXED) { strcpy(kmode, "mixed"); get_mixed_buffers()->free_all_nbor_buffers(); get_mixed_buffers()->need_tag(need_tag); } else { strcpy(kmode, "double"); get_double_buffers()->free_all_nbor_buffers(); get_double_buffers()->need_tag(need_tag); } #ifdef _LMP_INTEL_OFFLOAD set_offload_affinity(); #endif if (comm->me == 0) { if (screen) { fprintf(screen, "----------------------------------------------------------\n"); if (_offload_balance != 0.0) { fprintf(screen,"Using Intel Coprocessor with %d threads per core, ", _offload_tpc); fprintf(screen,"%d threads per task\n",_offload_threads); } else { fprintf(screen,"Using Intel Package without Coprocessor.\n"); } fprintf(screen,"Precision: %s\n",kmode); fprintf(screen, "----------------------------------------------------------\n"); } } } /* ---------------------------------------------------------------------- */ void FixIntel::check_neighbor_intel() { #ifdef _LMP_INTEL_OFFLOAD _full_host_list = 0; #endif const int nrequest = neighbor->nrequest; for (int i = 0; i < nrequest; ++i) { #ifdef _LMP_INTEL_OFFLOAD if (_offload_balance != 0.0 && neighbor->requests[i]->intel == 0) { _full_host_list = 1; _offload_noghost = 0; } #endif if (neighbor->requests[i]->skip) error->all(FLERR, "Cannot yet use hybrid styles with Intel package."); } } /* ---------------------------------------------------------------------- */ void FixIntel::sync_coprocessor() { #ifdef _LMP_INTEL_OFFLOAD if (_offload_balance != 0.0) { if (_off_force_array_m != 0) { add_off_results(_off_force_array_m, _off_ev_array_d); _off_force_array_m = 0; } else if (_off_force_array_d != 0) { add_off_results(_off_force_array_d, _off_ev_array_d); _off_force_array_d = 0; } else if (_off_force_array_s != 0) { add_off_results(_off_force_array_s, _off_ev_array_s); _off_force_array_s = 0; } } #endif } /* ---------------------------------------------------------------------- */ double FixIntel::memory_usage() { double bytes; if (_precision_mode == PREC_MODE_SINGLE) bytes = _single_buffers->memory_usage(_nthreads); else if (_precision_mode == PREC_MODE_MIXED) bytes = _mixed_buffers->memory_usage(_nthreads); else bytes = _double_buffers->memory_usage(_nthreads); return bytes; } /* ---------------------------------------------------------------------- */ #ifdef _LMP_INTEL_OFFLOAD void FixIntel::output_timing_data() { if (_im_real_space_task == 0 || _offload_affinity_set == 0) return; double timer_total = 0.0; int size, rank; double timers[NUM_ITIMERS]; MPI_Comm_size(_real_space_comm, &size); MPI_Comm_rank(_real_space_comm, &rank); MPI_Allreduce(&_timers, &timers, NUM_ITIMERS, MPI_DOUBLE, MPI_SUM, _real_space_comm); for (int i=0; i < NUM_ITIMERS; i++) { timers[i] /= size; timer_total += timers[i]; } #ifdef TIME_BALANCE double timers_min[NUM_ITIMERS], timers_max[NUM_ITIMERS]; MPI_Allreduce(&_timers, &timers_max, NUM_ITIMERS, MPI_DOUBLE, MPI_MAX, _real_space_comm); MPI_Allreduce(&_timers, &timers_min, NUM_ITIMERS, MPI_DOUBLE, MPI_MIN, _real_space_comm); #endif if (timer_total > 0.0) { double balance_out[2], balance_in[2]; balance_out[0] = _balance_pair; balance_out[1] = _balance_neighbor; MPI_Reduce(balance_out, balance_in, 2, MPI_DOUBLE, MPI_SUM, 0, _real_space_comm); balance_in[0] /= size; balance_in[1] /= size; if (rank == 0 && _tscreen) { fprintf(_tscreen, "\n------------------------------------------------\n"); fprintf(_tscreen, " Offload Timing Data\n"); fprintf(_tscreen, "------------------------------------------------\n"); fprintf(_tscreen, " Data Pack/Cast Seconds %f\n", timers[TIME_PACK]); if (_offload_balance != 0.0) { fprintf(_tscreen, " Host Neighbor Seconds %f\n", timers[TIME_HOST_NEIGHBOR]); fprintf(_tscreen, " Host Pair Seconds %f\n", timers[TIME_HOST_PAIR]); fprintf(_tscreen, " Offload Neighbor Seconds %f\n", timers[TIME_OFFLOAD_NEIGHBOR]); fprintf(_tscreen, " Offload Pair Seconds %f\n", timers[TIME_OFFLOAD_PAIR]); fprintf(_tscreen, " Offload Wait Seconds %f\n", timers[TIME_OFFLOAD_WAIT]); fprintf(_tscreen, " Offload Latency Seconds %f\n", timers[TIME_OFFLOAD_LATENCY]); fprintf(_tscreen, " Offload Neighbor Balance %f\n", balance_in[1]); fprintf(_tscreen, " Offload Pair Balance %f\n", balance_in[0]); fprintf(_tscreen, " Offload Ghost Atoms "); if (_offload_noghost) fprintf(_tscreen,"No\n"); else fprintf(_tscreen,"Yes\n"); #ifdef TIME_BALANCE fprintf(_tscreen, " Offload Imbalance Seconds %f\n", timers[TIME_IMBALANCE]); fprintf(_tscreen, " Offload Min/Max Seconds "); for (int i = 0; i < NUM_ITIMERS; i++) fprintf(_tscreen, "[%f, %f] ",timers_min[i],timers_max[i]); fprintf(_tscreen, "\n"); #endif double ht = timers[TIME_HOST_NEIGHBOR] + timers[TIME_HOST_PAIR] + timers[TIME_OFFLOAD_WAIT]; double ct = timers[TIME_OFFLOAD_NEIGHBOR] + timers[TIME_OFFLOAD_PAIR]; double tt = MAX(ht,ct); if (timers[TIME_OFFLOAD_LATENCY] / tt > 0.07 && _separate_coi == 0) error->warning(FLERR, "Leaving a core free can improve performance for offload"); } fprintf(_tscreen, "------------------------------------------------\n"); } zero_timers(); _setup_time_cleared = false; } } /* ---------------------------------------------------------------------- */ int FixIntel::get_ppn(int &node_rank) { int nprocs; int rank; MPI_Comm_size(_real_space_comm, &nprocs); MPI_Comm_rank(_real_space_comm, &rank); int name_length; char node_name[MPI_MAX_PROCESSOR_NAME]; MPI_Get_processor_name(node_name,&name_length); node_name[name_length] = '\0'; char *node_names = new char[MPI_MAX_PROCESSOR_NAME*nprocs]; MPI_Allgather(node_name, MPI_MAX_PROCESSOR_NAME, MPI_CHAR, node_names, MPI_MAX_PROCESSOR_NAME, MPI_CHAR, _real_space_comm); int ppn = 0; node_rank = 0; for (int i = 0; i < nprocs; i++) { if (strcmp(node_name, node_names + i * MPI_MAX_PROCESSOR_NAME) == 0) { ppn++; if (i < rank) node_rank++; } } return ppn; } /* ---------------------------------------------------------------------- */ void FixIntel::set_offload_affinity() { _separate_buffers = 0; if (_allow_separate_buffers) if (_offload_balance != 0.0 && _offload_balance < 1.0) _separate_buffers = 1; _im_real_space_task = 1; if (strncmp(update->integrate_style,"verlet/split",12) == 0) { _real_space_comm = world; if (universe->iworld != 0) { _im_real_space_task = 0; return; } } else _real_space_comm = universe->uworld; if (_offload_balance == 0.0) _cop = -1; if (_offload_balance == 0.0 || _offload_affinity_set == 1) return; _offload_affinity_set = 1; int node_rank; int ppn = get_ppn(node_rank); if (ppn % _ncops != 0) error->all(FLERR, "MPI tasks per node must be multiple of offload_cards"); ppn = ppn / _ncops; _cop = node_rank / ppn; node_rank = node_rank % ppn; int max_threads_per_task = _offload_cores / 4 * _offload_tpc / ppn; if (_offload_threads > max_threads_per_task) _offload_threads = max_threads_per_task; if (_offload_threads > _max_offload_threads) _offload_threads = _max_offload_threads; int offload_threads = _offload_threads; int offload_tpc = _offload_tpc; int offload_affinity_balanced = _offload_affinity_balanced; #pragma offload target(mic:_cop) mandatory \ in(node_rank,offload_threads,offload_tpc,offload_affinity_balanced) { omp_set_num_threads(offload_threads); #pragma omp parallel { int tnum = omp_get_thread_num(); kmp_affinity_mask_t mask; kmp_create_affinity_mask(&mask); int proc; if (offload_affinity_balanced) { proc = offload_threads * node_rank + tnum; proc = proc * 4 - (proc / 60) * 240 + proc / 60 + 1; } else { proc = offload_threads * node_rank + tnum; proc += (proc / 4) * (4 - offload_tpc) + 1; } kmp_set_affinity_mask_proc(proc, &mask); if (kmp_set_affinity(&mask) != 0) printf("Could not set affinity on rank %d thread %d to %d\n", node_rank, tnum, proc); } } if (_precision_mode == PREC_MODE_SINGLE) _single_buffers->set_off_params(offload_threads, _cop, _separate_buffers); else if (_precision_mode == PREC_MODE_MIXED) _mixed_buffers->set_off_params(offload_threads, _cop, _separate_buffers); else _double_buffers->set_off_params(offload_threads, _cop, _separate_buffers); } /* ---------------------------------------------------------------------- */ int FixIntel::set_host_affinity(const int nomp) { #ifndef INTEL_OFFLOAD_NOAFFINITY _separate_coi = 1; int rank = comm->me; int node_rank; int ppn = get_ppn(node_rank); int cop = node_rank / (ppn / _ncops); // Get a sorted list of logical cores int proc_list[INTEL_MAX_HOST_CORE_COUNT]; int ncores; FILE *p; char cmd[512]; char readbuf[INTEL_MAX_HOST_CORE_COUNT*5]; sprintf(cmd, "lscpu -p=cpu,core,socket | grep -v '#' |" "sort -t, -k 3,3n -k 2,2n | awk -F, '{print $1}'"); p = popen(cmd, "r"); if (p == NULL) return -1; ncores = 0; while(fgets(readbuf, 512, p)) { proc_list[ncores] = atoi(readbuf); ncores++; } pclose(p); // Sanity checks for core list if (ncores < 2) return -1; int nzero = 0; for (int i = 0; i < ncores; i++) { if (proc_list[i] == 0) nzero++; if (proc_list[i] < 0 || proc_list[i] >= ncores) return -1; } if (nzero > 1) return -1; // Determine the OpenMP/MPI configuration char *estring; int nthreads = nomp; if (nthreads == 0) { estring = getenv("OMP_NUM_THREADS"); if (estring != NULL) { nthreads = atoi(estring); if (nthreads < 2) nthreads = 1; } else nthreads = 1; } // Determine how many logical cores for COI and MPI tasks int coi_cores = 0, mpi_cores; int subscription = nthreads * ppn; if (subscription > ncores) { if (rank == 0) error->warning(FLERR, "More MPI tasks/OpenMP threads than available cores"); return 0; } if (subscription == ncores) _separate_coi = 0; if (subscription > ncores / 2) { coi_cores = ncores - subscription; if (coi_cores > INTEL_MAX_COI_CORES) coi_cores = INTEL_MAX_COI_CORES; } mpi_cores = (ncores - coi_cores) / ppn; // Get ids of all LWPs that COI spawned and affinitize int lwp = 0, plwp = 0, nlwp = 0, mlwp = 0, fail = 0; cpu_set_t cpuset; pid_t pid = getpid(); if (coi_cores) { sprintf(cmd, "ps -Lp %d -o lwp | awk ' (NR > 2) {print}'", pid); p = popen(cmd, "r"); if (p == NULL) return -1; while(fgets(readbuf, 512, p)) { lwp = atoi(readbuf); int first = coi_cores + node_rank * mpi_cores; CPU_ZERO(&cpuset); for (int i = first; i < first + mpi_cores; i++) CPU_SET(proc_list[i], &cpuset); if (sched_setaffinity(lwp, sizeof(cpu_set_t), &cpuset)) { fail = 1; break; } plwp++; } pclose(p); // Do async offload to create COI threads int sig1, sig2; float *buf1; int pragma_size = 1024; buf1 = (float*) malloc(sizeof(float)*pragma_size); #pragma offload target (mic:0) mandatory \ in(buf1:length(pragma_size) alloc_if(1) free_if(0)) \ signal(&sig1) { buf1[0] = 0.0; } #pragma offload_wait target(mic:0) wait(&sig1) #pragma offload target (mic:0) mandatory \ out(buf1:length(pragma_size) alloc_if(0) free_if(1)) \ signal(&sig2) { buf1[0] = 1.0; } #pragma offload_wait target(mic:0) wait(&sig2) free(buf1); p = popen(cmd, "r"); if (p == NULL) return -1; while(fgets(readbuf, 512, p)) { lwp = atoi(readbuf); nlwp++; if (nlwp <= plwp) continue; CPU_ZERO(&cpuset); for(int i=0; i class IntelBuffers; class FixIntel : public Fix { public: FixIntel(class LAMMPS *, int, char **); virtual ~FixIntel(); virtual int setmask(); virtual void init(); virtual void setup(int); void pair_init_check(); // Get all forces, calculation results from coprocesser void sync_coprocessor(); double memory_usage(); typedef struct { double x,y,z; } lmp_ft; enum {PREC_MODE_SINGLE, PREC_MODE_MIXED, PREC_MODE_DOUBLE}; inline int precision() { return _precision_mode; } inline IntelBuffers * get_single_buffers() { return _single_buffers; } inline IntelBuffers * get_mixed_buffers() { return _mixed_buffers; } inline IntelBuffers * get_double_buffers() { return _double_buffers; } protected: IntelBuffers *_single_buffers; IntelBuffers *_mixed_buffers; IntelBuffers *_double_buffers; int _precision_mode, _nthreads; public: inline int* get_overflow_flag() { return _overflow_flag; } inline int* get_off_overflow_flag() { return _off_overflow_flag; } inline void add_result_array(IntelBuffers::vec3_acc_t *f_in, double *ev_in, const int offload, const int eatom = 0, const int vatom = 0); inline void add_result_array(IntelBuffers::vec3_acc_t *f_in, double *ev_in, const int offload, const int eatom = 0, const int vatom = 0); inline void add_result_array(IntelBuffers::vec3_acc_t *f_in, float *ev_in, const int offload, const int eatom = 0, const int vatom = 0); inline void get_buffern(const int offload, int &nlocal, int &nall, int &minlocal); #ifdef _LMP_INTEL_OFFLOAD inline int coprocessor_number() { return _cop; } inline int full_host_list() { return _full_host_list; } void set_offload_affinity(); inline double offload_balance() { return _offload_balance; } inline int offload_end_neighbor() { return _balance_neighbor * atom->nlocal; } inline int offload_end_pair(); inline int host_start_neighbor() { if (_offload_noghost) return 0; else return offload_end_neighbor(); } inline int host_start_pair() { if (_offload_noghost) return 0; else return offload_end_pair(); } inline int offload_nlocal() { return _offload_nlocal; } inline int offload_nall() { return _offload_nall; } inline int offload_min_ghost() { return _offload_min_ghost; } inline int host_min_local() { return _host_min_local; } inline int host_min_ghost() { return _host_min_ghost; } inline int host_used_local() { return _host_used_local; } inline int host_used_ghost() { return _host_used_ghost; } inline int host_nall() { return _host_nall; } inline int separate_buffers() { return _separate_buffers; } inline int offload_noghost() { return _offload_noghost; } inline void set_offload_noghost(const int v) { if (_offload_ghost < 0) _offload_noghost = v; } inline void set_neighbor_host_sizes(); inline void zero_timers() { memset(_timers, 0, sizeof(double) * NUM_ITIMERS); } inline void start_watch(const int which) { _stopwatch[which] = MPI_Wtime(); } inline double stop_watch(const int which); inline double * off_watch_pair() { return _stopwatch_offload_pair; } inline double * off_watch_neighbor() { return _stopwatch_offload_neighbor; } inline void balance_stamp(); inline void acc_timers(); #else inline int offload_end_neighbor() { return 0; } inline int offload_end_pair() { return 0; } inline int host_start_neighbor() { return 0; } inline int host_start_pair() { return 0; } inline void zero_timers() {} inline void start_watch(const int which) {} inline double stop_watch(const int which) { return 0.0; } double * off_watch_pair() { return NULL; } double * off_watch_neighbor() { return NULL; } inline void balance_stamp() {} inline void acc_timers() {} inline int separate_buffers() { return 0; } #endif protected: int _overflow_flag[5]; _alignvar(int _off_overflow_flag[5],64); int _allow_separate_buffers, _offload_ghost; #ifdef _LMP_INTEL_OFFLOAD double _balance_pair_time, _balance_other_time; int _offload_nlocal, _offload_nall, _offload_min_ghost, _offload_nghost; int _host_min_local, _host_min_ghost, _host_nall; int _host_used_local, _host_used_ghost; int _separate_buffers, _offload_noghost, _sync_at_pair, _separate_coi; bool _setup_time_cleared, _timers_allocated; void output_timing_data(); FILE *_tscreen; IntelBuffers::vec3_acc_t *_off_force_array_s; IntelBuffers::vec3_acc_t *_off_force_array_m; IntelBuffers::vec3_acc_t *_off_force_array_d; float *_off_ev_array_s; double *_off_ev_array_d; int _off_results_eatom, _off_results_vatom; int _full_host_list, _cop, _ncops; int get_ppn(int &); int set_host_affinity(const int); #endif void check_neighbor_intel(); double _offload_balance, _balance_neighbor, _balance_pair, _balance_fixed; double _timers[NUM_ITIMERS]; double _stopwatch[NUM_ITIMERS]; _alignvar(double _stopwatch_offload_neighbor[1],64); _alignvar(double _stopwatch_offload_pair[1],64); template inline void add_results(const ft * _noalias const f_in, const acc_t * _noalias const ev_global, const int eatom, const int vatom, const int offload); template inline void add_oresults(const ft * _noalias const f_in, const acc_t * _noalias const ev_global, const int eatom, const int vatom, const int out_offset, const int nall); int _offload_affinity_balanced, _offload_threads, _offload_tpc; #ifdef _LMP_INTEL_OFFLOAD int _max_offload_threads, _offload_cores, _offload_affinity_set; int _im_real_space_task; MPI_Comm _real_space_comm; template inline void add_off_results(const ft * _noalias const f_in, const acc_t * _noalias const ev_global); #endif }; /* ---------------------------------------------------------------------- */ void FixIntel::get_buffern(const int offload, int &nlocal, int &nall, int &minlocal) { #ifdef _LMP_INTEL_OFFLOAD if (_separate_buffers) { if (offload) { if (neighbor->ago != 0) { nlocal = _offload_nlocal; nall = _offload_nall; } else { nlocal = atom->nlocal; nall = nlocal + atom->nghost; } minlocal = 0; } else { nlocal = atom->nlocal; nall = _host_nall; minlocal = _host_min_local; } return; } if (_offload_noghost && offload) nall = atom->nlocal; else #endif nall = atom->nlocal + atom->nghost; nlocal = atom->nlocal; minlocal = 0; } /* ---------------------------------------------------------------------- */ void FixIntel::add_result_array(IntelBuffers::vec3_acc_t *f_in, double *ev_in, const int offload, const int eatom, const int vatom) { #ifdef _LMP_INTEL_OFFLOAD if (offload) { _off_results_eatom = eatom; _off_results_vatom = vatom; _off_force_array_d = f_in; _off_ev_array_d = ev_in; if (_sync_at_pair == 1) sync_coprocessor(); return; } #endif add_results(f_in, ev_in, eatom, vatom, 0); if (_overflow_flag[LMP_OVERFLOW]) error->one(FLERR, "Neighbor list overflow, boost neigh_modify one"); #ifdef _LMP_INTEL_OFFLOAD if (_sync_at_pair) sync_coprocessor(); #endif } /* ---------------------------------------------------------------------- */ void FixIntel::add_result_array(IntelBuffers::vec3_acc_t *f_in, double *ev_in, const int offload, const int eatom, const int vatom) { #ifdef _LMP_INTEL_OFFLOAD if (offload) { _off_results_eatom = eatom; _off_results_vatom = vatom; _off_force_array_m = f_in; _off_ev_array_d = ev_in; if (_sync_at_pair == 1) sync_coprocessor(); return; } #endif add_results(f_in, ev_in, eatom, vatom, 0); if (_overflow_flag[LMP_OVERFLOW]) error->one(FLERR, "Neighbor list overflow, boost neigh_modify one"); #ifdef _LMP_INTEL_OFFLOAD if (_sync_at_pair) sync_coprocessor(); #endif } /* ---------------------------------------------------------------------- */ void FixIntel::add_result_array(IntelBuffers::vec3_acc_t *f_in, float *ev_in, const int offload, const int eatom, const int vatom) { #ifdef _LMP_INTEL_OFFLOAD if (offload) { _off_results_eatom = eatom; _off_results_vatom = vatom; _off_force_array_s = f_in; _off_ev_array_s = ev_in; if (_sync_at_pair == 1) sync_coprocessor(); return; } #endif add_results(f_in, ev_in, eatom, vatom, 0); if (_overflow_flag[LMP_OVERFLOW]) error->one(FLERR, "Neighbor list overflow, boost neigh_modify one"); #ifdef _LMP_INTEL_OFFLOAD if (_sync_at_pair) sync_coprocessor(); #endif } /* ---------------------------------------------------------------------- */ template void FixIntel::add_results(const ft * _noalias const f_in, const acc_t * _noalias const ev_global, const int eatom, const int vatom, const int offload) { start_watch(TIME_PACK); int f_length; #ifdef _LMP_INTEL_OFFLOAD if (_separate_buffers) { if (offload) { add_oresults(f_in, ev_global, eatom, vatom, 0, _offload_nlocal); if (force->newton_pair) { const acc_t * _noalias const enull = 0; int offset = _offload_nlocal; if (atom->torque) offset *= 2; add_oresults(f_in + offset, enull, eatom, vatom, _offload_min_ghost, _offload_nghost); } } else { add_oresults(f_in, ev_global, eatom, vatom, _host_min_local, _host_used_local); if (force->newton_pair) { const acc_t * _noalias const enull = 0; int offset = _host_used_local; if (atom->torque) offset *= 2; add_oresults(f_in + offset, enull, eatom, vatom, _host_min_ghost, _host_used_ghost); } } stop_watch(TIME_PACK); return; } if (force->newton_pair && (_offload_noghost == 0 || offload == 0)) f_length = atom->nlocal + atom->nghost; else f_length = atom->nlocal; #else if (force->newton_pair) f_length = atom->nlocal + atom->nghost; else f_length = atom->nlocal; #endif add_oresults(f_in, ev_global, eatom, vatom, 0, f_length); stop_watch(TIME_PACK); } /* ---------------------------------------------------------------------- */ template void FixIntel::add_oresults(const ft * _noalias const f_in, const acc_t * _noalias const ev_global, const int eatom, const int vatom, const int out_offset, const int nall) { lmp_ft * _noalias const f = (lmp_ft *) lmp->atom->f[0] + out_offset; if (atom->torque) { if (f_in[1].w) if (f_in[1].w == 1) error->all(FLERR,"Bad matrix inversion in mldivide3"); else error->all(FLERR, "Sphere particles not yet supported for gayberne/intel"); } #if defined(_OPENMP) #pragma omp parallel default(none) #endif { #if defined(_OPENMP) const int tid = omp_get_thread_num(); #else const int tid = 0; #endif int ifrom, ito; IP_PRE_omp_range_align(ifrom, ito, tid, nall, _nthreads, sizeof(acc_t)); if (atom->torque) { int ii = ifrom * 2; lmp_ft * _noalias const tor = (lmp_ft *) lmp->atom->torque[0] + out_offset; if (eatom) { double * _noalias const lmp_eatom = force->pair->eatom + out_offset; + #if defined(LMP_SIMD_COMPILER) + #pragma novector + #endif for (int i = ifrom; i < ito; i++) { f[i].x += f_in[ii].x; f[i].y += f_in[ii].y; f[i].z += f_in[ii].z; lmp_eatom[i] += f_in[ii].w; tor[i].x += f_in[ii+1].x; tor[i].y += f_in[ii+1].y; tor[i].z += f_in[ii+1].z; ii += 2; } } else { + #if defined(LMP_SIMD_COMPILER) + #pragma novector + #endif for (int i = ifrom; i < ito; i++) { f[i].x += f_in[ii].x; f[i].y += f_in[ii].y; f[i].z += f_in[ii].z; tor[i].x += f_in[ii+1].x; tor[i].y += f_in[ii+1].y; tor[i].z += f_in[ii+1].z; ii += 2; } } } else { if (eatom) { double * _noalias const lmp_eatom = force->pair->eatom + out_offset; + #if defined(LMP_SIMD_COMPILER) + #pragma novector + #endif for (int i = ifrom; i < ito; i++) { f[i].x += f_in[i].x; f[i].y += f_in[i].y; f[i].z += f_in[i].z; lmp_eatom[i] += f_in[i].w; } } else { + #if defined(LMP_SIMD_COMPILER) + #pragma novector + #endif for (int i = ifrom; i < ito; i++) { f[i].x += f_in[i].x; f[i].y += f_in[i].y; f[i].z += f_in[i].z; } } } } if (ev_global != NULL) { force->pair->eng_vdwl += ev_global[0]; force->pair->eng_coul += ev_global[1]; force->pair->virial[0] += ev_global[2]; force->pair->virial[1] += ev_global[3]; force->pair->virial[2] += ev_global[4]; force->pair->virial[3] += ev_global[5]; force->pair->virial[4] += ev_global[6]; force->pair->virial[5] += ev_global[7]; } } #ifdef _LMP_INTEL_OFFLOAD /* ---------------------------------------------------------------------- */ int FixIntel::offload_end_pair() { if (neighbor->ago == 0) return _balance_neighbor * atom->nlocal; else return _balance_pair * atom->nlocal; } /* ---------------------------------------------------------------------- */ double FixIntel::stop_watch(const int which) { double elapsed = MPI_Wtime() - _stopwatch[which]; _timers[which] += elapsed; return elapsed; } /* ---------------------------------------------------------------------- */ void FixIntel::balance_stamp() { if (_offload_balance < 0.0) { double ct = MPI_Wtime(); _balance_other_time = ct; _balance_pair_time = ct - _stopwatch[TIME_HOST_PAIR]; } } /* ---------------------------------------------------------------------- */ void FixIntel::acc_timers() { _timers[TIME_OFFLOAD_PAIR] += *_stopwatch_offload_pair; if (neighbor->ago == 0) { _timers[TIME_OFFLOAD_NEIGHBOR] += *_stopwatch_offload_neighbor; if (_setup_time_cleared == false) { zero_timers(); _setup_time_cleared = true; } } } /* ---------------------------------------------------------------------- */ void FixIntel::set_neighbor_host_sizes() { _host_min_local = _overflow_flag[LMP_LOCAL_MIN]; _host_min_ghost = _overflow_flag[LMP_GHOST_MIN]; _host_used_local = atom->nlocal - _host_min_local; _host_used_ghost = _overflow_flag[LMP_GHOST_MAX] + 1 - _host_min_ghost; if (_host_used_ghost < 0) _host_used_ghost = 0; _host_nall = atom->nlocal + _host_used_ghost; } /* ---------------------------------------------------------------------- */ template void FixIntel::add_off_results(const ft * _noalias const f_in, const acc_t * _noalias const ev_global) { if (_offload_balance < 0.0) _balance_other_time = MPI_Wtime() - _balance_other_time; start_watch(TIME_OFFLOAD_WAIT); #ifdef _LMP_INTEL_OFFLOAD if (neighbor->ago == 0) { #pragma offload_wait target(mic:_cop) wait(atom->tag, f_in) } else { #pragma offload_wait target(mic:_cop) wait(f_in) } #endif double wait_time = stop_watch(TIME_OFFLOAD_WAIT); if (neighbor->ago == 0) { if (_off_overflow_flag[LMP_OVERFLOW]) error->one(FLERR, "Neighbor list overflow, boost neigh_modify one"); _offload_nlocal = _off_overflow_flag[LMP_LOCAL_MAX] + 1; _offload_min_ghost = _off_overflow_flag[LMP_GHOST_MIN]; _offload_nghost = _off_overflow_flag[LMP_GHOST_MAX] + 1 - _offload_min_ghost; if (_offload_nghost < 0) _offload_nghost = 0; _offload_nall = _offload_nlocal + _offload_nghost; _offload_nlocal; } int nlocal = atom->nlocal; // Load balance? if (_offload_balance < 0.0) { if (neighbor->ago == 0) _balance_pair = _balance_neighbor; double mic_time; mic_time = *_stopwatch_offload_pair; if (_balance_pair_time + _balance_other_time < mic_time) { double ft = _balance_pair_time + _balance_other_time + wait_time - mic_time; _balance_fixed = (1.0 - INTEL_LB_MEAN_WEIGHT) * _balance_fixed + INTEL_LB_MEAN_WEIGHT * ft; } double ctps = _balance_pair_time / (1.0-_balance_pair); double otps = mic_time / _balance_pair; double new_balance = (ctps + _balance_other_time - _balance_fixed) / (otps + ctps); if (new_balance < 0.01) new_balance = 0.01; else if (new_balance > 0.99) new_balance = 0.99; _balance_neighbor = (1.0 - INTEL_LB_MEAN_WEIGHT) *_balance_neighbor + INTEL_LB_MEAN_WEIGHT * new_balance; } #ifdef TIME_BALANCE start_watch(TIME_IMBALANCE); MPI_Barrier(_real_space_comm); stop_watch(TIME_IMBALANCE); #endif acc_timers(); if (atom->torque) if (f_in[1].w < 0.0) error->all(FLERR, "Bad matrix inversion in mldivide3"); add_results(f_in, ev_global, _off_results_eatom, _off_results_vatom, 1); } #endif } #endif #endif /* ERROR/WARNING messages: E: The 'package intel' command is required for /intel styles Self-explanatory. W: Could not set host affinity for offload tasks When using offload to a coprocessor, the application will try to set affinity for host MPI tasks and OpenMP threads and will generate a warning if unable to do so successfully. In the unsuccessful case, you might wish to set affinity outside of the application and performance might suffer if hyperthreading is disable on the CPU. E: Neighbor list overflow, boost neigh_modify one Increase the value for neigh_modify one to allow for larger allocations for neighbor list builds. The value required can be different for the Intel package in order to support offload to a coprocessor. E: Bad matrix inversion in mldivide3 This error should not occur unless the matrix is badly formed. E: Illegal package intel command The format for the package intel command is incorrect. Please see the documentation. E: fix intel has to operate on group 'all' Self explanatory. E: Illegal package intel mode requested The format for the package intel command is incorrect. Please see the documentation. E: Specified run_style does not support the Intel package. When using offload to a coprocessor, the Intel package requires a run style with the intel suffix. E: Currently, neighbor style BIN must be used with Intel package. This is the only neighbor style that has been implemented for the Intel package. E: Currently, cannot use neigh_modify exclude with Intel package. This is a current restriction of the Intel package. W: Unknown Intel Compiler Version The compiler version used to build LAMMPS has not been tested with offload to a coprocessor. W: Unsupported Intel Compiler The compiler version used to build LAMMPS is not supported when using offload to a coprocessor. There could be performance or correctness issues. Please use 14.0.1.106 or 15.1.133 or later. E: Currently, cannot use more than one intel style with hybrid. Currently, hybrid pair styles can only use the intel suffix for one of the pair styles. E: Cannot yet use hybrid styles with Intel package. The hybrid pair style configuration is not yet supported by the Intel package. Support is limited to hybrid/overlay or a hybrid style that does not require a skip list. W: Leaving a core/node free can improve performance for offload When each CPU is fully subscribed with MPI tasks and OpenMP threads, context switching with threads used for offload can sometimes decrease performance. If you see this warning, try using fewer MPI tasks/OpenMP threads per node to leave a physical CPU core free on each node. E: MPI tasks per node must be multiple of offload_cards For offload to multiple coprocessors on a single node, the Intel package requires that each coprocessor is used by the same number of MPI tasks. W: More MPI tasks/OpenMP threads than available cores Using more MPI tasks/OpenMP threads than available cores will typically decrease performance. */ diff --git a/src/USER-INTEL/intel_preprocess.h b/src/USER-INTEL/intel_preprocess.h index 44534e132..f4c8b5062 100644 --- a/src/USER-INTEL/intel_preprocess.h +++ b/src/USER-INTEL/intel_preprocess.h @@ -1,436 +1,462 @@ /* -*- c++ -*- ------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing author: W. Michael Brown (Intel) ------------------------------------------------------------------------- */ +#ifdef __INTEL_COMPILER +#define LMP_SIMD_COMPILER +#endif + #ifdef __INTEL_OFFLOAD #ifdef LMP_INTEL_OFFLOAD #define _LMP_INTEL_OFFLOAD #endif #endif #ifndef LMP_INTEL_PREPROCESS_H #define LMP_INTEL_PREPROCESS_H #ifndef LAMMPS_MEMALIGN #error Please set -DLAMMPS_MEMALIGN=64 in CCFLAGS for your LAMMPS makefile. #else #if (LAMMPS_MEMALIGN != 64) #error Please set -DLAMMPS_MEMALIGN=64 in CCFLAGS for your LAMMPS makefile. #endif #endif #if defined(_OPENMP) #define _use_omp_pragma(txt) _Pragma(txt) #else #define _use_omp_pragma(txt) #endif -#if defined(__INTEL_COMPILER) +#if defined(LMP_SIMD_COMPILER) #define _use_simd_pragma(txt) _Pragma(txt) #else #define _use_simd_pragma(txt) #endif namespace LAMMPS_NS { enum {LMP_OVERFLOW, LMP_LOCAL_MIN, LMP_LOCAL_MAX, LMP_GHOST_MIN, LMP_GHOST_MAX}; enum {TIME_PACK, TIME_HOST_NEIGHBOR, TIME_HOST_PAIR, TIME_OFFLOAD_NEIGHBOR, TIME_OFFLOAD_PAIR, TIME_OFFLOAD_WAIT, TIME_OFFLOAD_LATENCY, TIME_IMBALANCE}; #define NUM_ITIMERS ( TIME_IMBALANCE + 1 ) +#define INTEL_MIC_VECTOR_WIDTH 16 +#define INTEL_VECTOR_WIDTH 4 +#ifdef __AVX__ +#undef INTEL_VECTOR_WIDTH +#define INTEL_VECTOR_WIDTH 8 +#endif +#ifdef __AVX2__ +#undef INTEL_VECTOR_WIDTH +#define INTEL_VECTOR_WIDTH 8 +#endif +#ifdef __AVX512F__ +#undef INTEL_VECTOR_WIDTH +#define INTEL_VECTOR_WIDTH 16 +#define INTEL_V512 1 +#define INTEL_VMASK 1 +#else + +#ifdef __MIC__ +#define INTEL_V512 1 +#define INTEL_VMASK 1 +#endif + +#endif + #define INTEL_DATA_ALIGN 64 #define INTEL_ONEATOM_FACTOR 2 -#define INTEL_MIC_VECTOR_WIDTH 16 #define INTEL_MIC_NBOR_PAD INTEL_MIC_VECTOR_WIDTH -#define INTEL_VECTOR_WIDTH 8 #define INTEL_NBOR_PAD INTEL_VECTOR_WIDTH #define INTEL_LB_MEAN_WEIGHT 0.1 #define INTEL_BIGP 1e15 #define INTEL_MAX_HOST_CORE_COUNT 512 #define INTEL_MAX_COI_CORES 2 #define IP_PRE_get_stride(stride, n, datasize, torque) \ { \ int blength = n; \ if (torque) blength *= 2; \ const int bytes = blength * datasize; \ stride = INTEL_DATA_ALIGN - (bytes % INTEL_DATA_ALIGN); \ stride = blength + stride / datasize; \ } #if defined(_OPENMP) #define IP_PRE_omp_range(ifrom, ito, tid, inum, nthreads) \ { \ const int idelta = 1 + inum/nthreads; \ ifrom = tid * idelta; \ ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta; \ } #define IP_PRE_omp_range_id(ifrom, ito, tid, inum, nthreads) \ { \ tid = omp_get_thread_num(); \ IP_PRE_omp_range(ifrom, ito, tid, inum, nthreads); \ } #define IP_PRE_omp_range_align(ifrom, ito, tid, inum, nthreads, \ datasize) \ { \ int chunk_size = INTEL_DATA_ALIGN / datasize; \ int idelta = static_cast(static_cast(inum) \ /chunk_size/nthreads) + 1; \ idelta *= chunk_size; \ ifrom = tid*idelta; \ ito = ifrom + idelta; \ if (ito > inum) ito = inum; \ } #define IP_PRE_omp_range_id_align(ifrom, ito, tid, inum, \ nthreads, datasize) \ { \ tid = omp_get_thread_num(); \ IP_PRE_omp_range_align(ifrom, ito, tid, inum, nthreads, \ datasize); \ } #else #define IP_PRE_omp_range(ifrom, ito, tid, inum, nthreads) \ { \ ifrom = 0; \ ito = inum; \ } #define IP_PRE_omp_range_id(ifrom, ito, tid, inum, nthreads) \ { \ tid = 0; \ ifrom = 0; \ ito = inum; \ } #define IP_PRE_omp_range_align(ifrom, ito, tid, inum, nthreads, \ datasize) \ { \ ifrom = 0; \ ito = inum; \ } #define IP_PRE_omp_range_id_align(ifrom, ito, tid, inum, \ nthreads, datasize) \ { \ tid = 0; \ ifrom = 0; \ ito = inum; \ } #endif #ifdef _LMP_INTEL_OFFLOAD #include __declspec( target (mic)) inline double MIC_Wtime() { double time; struct timeval tv; gettimeofday(&tv, NULL); time = 1.0 * tv.tv_sec + 1.0e-6 * tv.tv_usec; return time; } #define IP_PRE_pack_separate_buffers(fix, buffers, ago, offload, \ nlocal, nall) \ { \ if (fix->separate_buffers() && ago != 0) { \ fix->start_watch(TIME_PACK); \ if (offload) { \ _use_omp_pragma("omp parallel default(none) shared(buffers,nlocal,nall)") \ { \ int ifrom, ito, tid; \ int nthreads = comm->nthreads; \ IP_PRE_omp_range_id_align(ifrom, ito, tid, nlocal, \ nthreads, sizeof(flt_t)); \ buffers->thr_pack_cop(ifrom, ito, 0); \ int nghost = nall - nlocal; \ if (nghost) { \ IP_PRE_omp_range_align(ifrom, ito, tid, nall - nlocal, \ nthreads, sizeof(flt_t)); \ buffers->thr_pack_cop(ifrom + nlocal, ito + nlocal, \ fix->offload_min_ghost() - nlocal, \ ago == 1); \ } \ } \ } else { \ buffers->thr_pack_host(fix->host_min_local(), nlocal, 0); \ buffers->thr_pack_host(nlocal, nall, \ fix->host_min_ghost()-nlocal); \ } \ fix->stop_watch(TIME_PACK); \ } \ } #define IP_PRE_get_transfern(ago, newton, evflag, eflag, vflag, \ buffers, offload, fix, separate_flag, \ x_size, q_size, ev_size, f_stride) \ { \ separate_flag = 0; \ if (ago == 0) { \ x_size = 0; \ q_size = nall; \ if (offload) { \ if (fix->separate_buffers()) { \ if (lmp->atom->torque) \ separate_flag = 2; \ else \ separate_flag = 1; \ } else \ separate_flag = 3; \ } \ } else { \ x_size = nall; \ q_size = 0; \ } \ ev_size = 0; \ if (evflag) { \ if (eflag) ev_size = 2; \ if (vflag) ev_size = 8; \ } \ int f_length; \ if (newton) \ f_length = nall; \ else \ f_length = nlocal; \ f_length -= minlocal; \ f_stride = buffers->get_stride(f_length); \ } #define IP_PRE_get_buffers(offload, buffers, fix, tc, f_start, \ ev_global) \ { \ if (offload) { \ tc = buffers->get_off_threads(); \ f_start = buffers->get_off_f(); \ ev_global = buffers->get_ev_global(); \ } else { \ tc = comm->nthreads; \ f_start = buffers->get_f(); \ fix->start_watch(TIME_HOST_PAIR); \ ev_global = buffers->get_ev_global_host(); \ } \ } #define IP_PRE_repack_for_offload(newton, separate_flag, nlocal, nall, \ f_stride, x, q) \ { \ if (separate_flag) { \ if (separate_flag < 3) { \ int all_local = nlocal; \ int ghost_min = overflow[LMP_GHOST_MIN]; \ nlocal = overflow[LMP_LOCAL_MAX] + 1; \ int nghost = overflow[LMP_GHOST_MAX] + 1 - ghost_min; \ if (nghost < 0) nghost = 0; \ nall = nlocal + nghost; \ separate_flag--; \ int flength; \ if (newton) flength = nall; \ else flength = nlocal; \ IP_PRE_get_stride(f_stride, flength, sizeof(FORCE_T), \ separate_flag); \ if (nghost) { \ if (nlocal < all_local || ghost_min > all_local) { \ memmove(x + nlocal, x + ghost_min, \ (nall - nlocal) * sizeof(ATOM_T)); \ if (q != 0) \ memmove((void *)(q + nlocal), (void *)(q + ghost_min), \ (nall - nlocal) * sizeof(flt_t)); \ } \ } \ } \ x[nall].x = INTEL_BIGP; \ x[nall].y = INTEL_BIGP; \ x[nall].z = INTEL_BIGP; \ } \ } #else #define MIC_Wtime MPI_Wtime #define IP_PRE_pack_separate_buffers(fix, buffers, ago, offload, \ nlocal, nall) #define IP_PRE_get_transfern(ago, newton, evflag, eflag, vflag, \ buffers, offload, fix, separate_flag, \ x_size, q_size, ev_size, f_stride) \ { \ separate_flag = 0; \ int f_length; \ if (newton) \ f_length = nall; \ else \ f_length = nlocal; \ f_stride = buffers->get_stride(f_length); \ } #define IP_PRE_get_buffers(offload, buffers, fix, tc, f_start, \ ev_global) \ { \ tc = comm->nthreads; \ f_start = buffers->get_f(); \ fix->start_watch(TIME_HOST_PAIR); \ ev_global = buffers->get_ev_global_host(); \ } #define IP_PRE_repack_for_offload(newton, separate_flag, nlocal, nall, \ f_stride, x, q) #endif #define IP_PRE_ev_tally_nbor(vflag, ev_pre, fpair, delx, dely, delz) \ { \ if (vflag == 1) { \ sv0 += ev_pre * delx * delx * fpair; \ sv1 += ev_pre * dely * dely * fpair; \ sv2 += ev_pre * delz * delz * fpair; \ sv3 += ev_pre * delx * dely * fpair; \ sv4 += ev_pre * delx * delz * fpair; \ sv5 += ev_pre * dely * delz * fpair; \ } \ } #define IP_PRE_ev_tally_nbor3(vflag, fj, fk, delx, dely, delz, delr2) \ { \ if (vflag == 1) { \ sv0 += delx * fj[0] + delr2[0] * fk[0]; \ sv1 += dely * fj[1] + delr2[1] * fk[1]; \ sv2 += delz * fj[2] + delr2[2] * fk[2]; \ sv3 += delx * fj[1] + delr2[0] * fk[1]; \ sv4 += delx * fj[2] + delr2[0] * fk[2]; \ sv5 += dely * fj[2] + delr2[1] * fk[2]; \ } \ } #define IP_PRE_ev_tally_nbor3v(vflag, fj0, fj1, fj2, delx, dely, delz) \ { \ if (vflag == 1) { \ sv0 += delx * fj0; \ sv1 += dely * fj1; \ sv2 += delz * fj2; \ sv3 += delx * fj1; \ sv4 += delx * fj2; \ sv5 += dely * fj2; \ } \ } #define IP_PRE_ev_tally_atom(evflag, eflag, vflag, f, fwtmp) \ { \ if (evflag) { \ if (eflag) { \ f[i].w += fwtmp; \ oevdwl += sevdwl; \ } \ if (vflag == 1) { \ ov0 += sv0; \ ov1 += sv1; \ ov2 += sv2; \ ov3 += sv3; \ ov4 += sv4; \ ov5 += sv5; \ } \ } \ } #define IP_PRE_ev_tally_atomq(evflag, eflag, vflag, f, fwtmp) \ { \ if (evflag) { \ if (eflag) { \ f[i].w += fwtmp; \ oevdwl += sevdwl; \ oecoul += secoul; \ } \ if (vflag == 1) { \ ov0 += sv0; \ ov1 += sv1; \ ov2 += sv2; \ ov3 += sv3; \ ov4 += sv4; \ ov5 += sv5; \ } \ } \ } #define IP_PRE_fdotr_acc_force(newton, evflag, eflag, vflag, eatom, \ nall, nlocal, minlocal, nthreads, \ f_start, f_stride, x) \ { \ int o_range; \ if (newton) \ o_range = nall; \ else \ o_range = nlocal; \ if (offload == 0) o_range -= minlocal; \ IP_PRE_omp_range_align(iifrom, iito, tid, o_range, nthreads, \ sizeof(acc_t)); \ \ int t_off = f_stride; \ if (eflag && eatom) { \ for (int t = 1; t < nthreads; t++) { \ _use_simd_pragma("vector nontemporal") \ _use_simd_pragma("novector") \ for (int n = iifrom; n < iito; n++) { \ f_start[n].x += f_start[n + t_off].x; \ f_start[n].y += f_start[n + t_off].y; \ f_start[n].z += f_start[n + t_off].z; \ f_start[n].w += f_start[n + t_off].w; \ } \ t_off += f_stride; \ } \ } else { \ for (int t = 1; t < nthreads; t++) { \ _use_simd_pragma("vector nontemporal") \ _use_simd_pragma("novector") \ for (int n = iifrom; n < iito; n++) { \ f_start[n].x += f_start[n + t_off].x; \ f_start[n].y += f_start[n + t_off].y; \ f_start[n].z += f_start[n + t_off].z; \ } \ t_off += f_stride; \ } \ } \ \ if (evflag) { \ if (vflag == 2) { \ const ATOM_T * _noalias const xo = x + minlocal; \ _use_simd_pragma("vector nontemporal") \ _use_simd_pragma("novector") \ for (int n = iifrom; n < iito; n++) { \ ov0 += f_start[n].x * xo[n].x; \ ov1 += f_start[n].y * xo[n].y; \ ov2 += f_start[n].z * xo[n].z; \ ov3 += f_start[n].y * xo[n].x; \ ov4 += f_start[n].z * xo[n].x; \ ov5 += f_start[n].z * xo[n].y; \ } \ } \ } \ } } #endif diff --git a/src/USER-INTEL/neigh_half_bin_intel.cpp b/src/USER-INTEL/neigh_half_bin_intel.cpp index 6c3cfc196..8b4fe4c10 100644 --- a/src/USER-INTEL/neigh_half_bin_intel.cpp +++ b/src/USER-INTEL/neigh_half_bin_intel.cpp @@ -1,1757 +1,2115 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing author: W. Michael Brown (Intel) ------------------------------------------------------------------------- */ #include "neighbor.h" #include "neigh_list.h" #include "atom.h" #include "comm.h" #include "group.h" #include "fix_intel.h" #if defined(_OPENMP) #include #endif using namespace LAMMPS_NS; #ifdef _LMP_INTEL_OFFLOAD #pragma offload_attribute(push,target(mic)) #endif #define ofind_special(which, special, nspecial, i, tag, special_flag) \ { \ which = 0; \ const int n1 = nspecial[i * 3]; \ const int n2 = nspecial[i * 3 + 1]; \ const int n3 = nspecial[i * 3 + 2]; \ const tagint *sptr = special + i * maxspecial; \ for (int s = 0; s < n3; s++) { \ if (sptr[s] == tag) { \ if (s < n1) { \ if (special_flag[1] == 0) which = -1; \ else if (special_flag[1] == 1) which = 0; \ else which = 1; \ } else if (s < n2) { \ if (special_flag[2] == 0) which = -1; \ else if (special_flag[2] == 1) which = 0; \ else which = 2; \ } else { \ if (special_flag[3] == 0) which = -1; \ else if (special_flag[3] == 1) which = 0; \ else which = 3; \ } \ } \ } \ } +#define ominimum_image_check(answer, dx, dy, dz) \ +{ \ + answer = 0; \ + if (xperiodic && fabs(dx) > xprd_half) answer = 1; \ + if (yperiodic && fabs(dy) > yprd_half) answer = 1; \ + if (zperiodic && fabs(dz) > zprd_half) answer = 1; \ +} + +#define dminimum_image_check(answer, dx, dy, dz) \ +{ \ + answer = 0; \ + if (domain->xperiodic && fabs(dx) > domain->xprd_half) answer = 1; \ + if (domain->yperiodic && fabs(dy) > domain->yprd_half) answer = 1; \ + if (domain->zperiodic && fabs(dz) > domain->zprd_half) answer = 1; \ +} + #ifdef _LMP_INTEL_OFFLOAD #pragma offload_attribute(pop) #endif template void Neighbor::bin_atoms(void * xin, int * _noalias const atombin) { const ATOM_T * _noalias const x = (const ATOM_T * _noalias const)xin; int nlocal = atom->nlocal; const int nall = nlocal + atom->nghost; const double sboxlo0 = bboxlo[0] + mbinxlo/bininvx; const double sboxlo1 = bboxlo[1] + mbinylo/bininvy; const double sboxlo2 = bboxlo[2] + mbinzlo/bininvz; int i, ibin; for (i = 0; i < mbins; i++) binhead[i] = -1; int *mask = atom->mask; if (includegroup) { int bitmask = group->bitmask[includegroup]; for (i = nall-1; i >= nlocal; i--) { if (mask[i] & bitmask) { ibin = coord2bin(atom->x[i]); bins[i] = binhead[ibin]; binhead[ibin] = i; } } for (i = atom->nfirst-1; i >= 0; i--) { ibin = coord2bin(atom->x[i]); atombin[i] = ibin; bins[i] = binhead[ibin]; binhead[ibin] = i; } } else { for (i = nall-1; i >= nlocal; i--) { ibin = coord2bin(atom->x[i]); bins[i] = binhead[ibin]; binhead[ibin] = i; } for (i = nlocal-1; i >= 0; i--) { ibin = coord2bin(atom->x[i]); atombin[i]=ibin; bins[i] = binhead[ibin]; binhead[ibin] = i; } } } /* ---------------------------------------------------------------------- binned neighbor list construction with partial Newton's 3rd law each owned atom i checks own bin and other bins in stencil pair stored once if i,j are both owned and i < j pair stored by me if j is ghost (also stored by proc owning j) ------------------------------------------------------------------------- */ void Neighbor::half_bin_no_newton_intel(NeighList *list) { const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal; list->inum = nlocal; // Get fix for intel stuff FixIntel *fix = static_cast(fix_intel); const int off_end = fix->offload_end_neighbor(); int host_start = off_end;; #ifdef _LMP_INTEL_OFFLOAD if (fix->full_host_list()) host_start = 0; if (exclude) error->all(FLERR, "Exclusion lists not yet supported for Intel offload"); #endif - if (fix->precision() == FixIntel::PREC_MODE_MIXED) { - hbnni(1, list, fix->get_mixed_buffers(), - 0, off_end, fix); - hbnni(0, list, fix->get_mixed_buffers(), - host_start, nlocal,fix); - } else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) { - hbnni(1, list, fix->get_double_buffers(), - 0, off_end, fix); - hbnni(0, list, fix->get_double_buffers(), - host_start, nlocal, fix); + int need_ic = 0; + if (atom->molecular) + dminimum_image_check(need_ic, cutneighmax, cutneighmax, cutneighmax); + + if (need_ic) { + if (fix->precision() == FixIntel::PREC_MODE_MIXED) { + hbnni(1, list, fix->get_mixed_buffers(), + 0, off_end, fix); + hbnni(0, list, fix->get_mixed_buffers(), + host_start, nlocal,fix); + } else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) { + hbnni(1, list, fix->get_double_buffers(), + 0, off_end, fix); + hbnni(0, list, fix->get_double_buffers(), + host_start, nlocal, fix); + } else { + hbnni(1, list, fix->get_single_buffers(), + 0, off_end, fix); + hbnni(0, list, fix->get_single_buffers(), + host_start, nlocal, fix); + } } else { - hbnni(1, list, fix->get_single_buffers(), - 0, off_end, fix); - hbnni(0, list, fix->get_single_buffers(), - host_start, nlocal, fix); + if (fix->precision() == FixIntel::PREC_MODE_MIXED) { + hbnni(1, list, fix->get_mixed_buffers(), + 0, off_end, fix); + hbnni(0, list, fix->get_mixed_buffers(), + host_start, nlocal,fix); + } else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) { + hbnni(1, list, fix->get_double_buffers(), + 0, off_end, fix); + hbnni(0, list, fix->get_double_buffers(), + host_start, nlocal, fix); + } else { + hbnni(1, list, fix->get_single_buffers(), + 0, off_end, fix); + hbnni(0, list, fix->get_single_buffers(), + host_start, nlocal, fix); + } } } -template +template void Neighbor::hbnni(const int offload, NeighList *list, void *buffers_in, const int astart, const int aend, void *fix_in) { IntelBuffers *buffers = (IntelBuffers *)buffers_in; FixIntel *fix = (FixIntel *)fix_in; const int nall = atom->nlocal + atom->nghost; int pad = 1; if (offload) { fix->start_watch(TIME_PACK); buffers->grow(nall, atom->nlocal, comm->nthreads, aend); buffers->grow_nbor(list, atom->nlocal, aend); ATOM_T biga; biga.x = INTEL_BIGP; biga.y = INTEL_BIGP; biga.z = INTEL_BIGP; biga.w = 1; buffers->get_x()[nall] = biga; const int nthreads = comm->nthreads; #if defined(_OPENMP) #pragma omp parallel default(none) shared(buffers) #endif { int ifrom, ito, tid; IP_PRE_omp_range_id_align(ifrom, ito, tid, nall, nthreads, sizeof(ATOM_T)); buffers->thr_pack(ifrom, ito, 0); } fix->stop_watch(TIME_PACK); fix->start_watch(TIME_HOST_NEIGHBOR); bin_atoms(buffers->get_x(), buffers->get_atombin()); if (INTEL_MIC_NBOR_PAD > 1) pad = INTEL_MIC_NBOR_PAD * sizeof(float) / sizeof(flt_t); } else { fix->start_watch(TIME_HOST_NEIGHBOR); if (INTEL_NBOR_PAD > 1) pad = INTEL_NBOR_PAD * sizeof(float) / sizeof(flt_t); } const int pad_width = pad; if (aend-astart == 0) { fix->stop_watch(TIME_HOST_NEIGHBOR); return; } const ATOM_T * _noalias const x = buffers->get_x(); int * _noalias const firstneigh = buffers->firstneigh(list); const int molecular = atom->molecular; int *ns = NULL; tagint *s = NULL; int tag_size = 0, special_size; if (buffers->need_tag()) tag_size = nall; if (molecular) { s = atom->special[0]; ns = atom->nspecial[0]; special_size = aend; } else { s = &buffers->_special_holder; ns = &buffers->_nspecial_holder; special_size = 0; } const tagint * _noalias const special = s; const int * _noalias const nspecial = ns; const int maxspecial = atom->maxspecial; const tagint * _noalias const tag = atom->tag; int * _noalias const ilist = list->ilist; int * _noalias numneigh = list->numneigh; int * _noalias const cnumneigh = buffers->cnumneigh(list); const int nstencil = list->nstencil; const int * _noalias const stencil = list->stencil; const flt_t * _noalias const cutneighsq = buffers->get_cutneighsq()[0]; const int ntypes = atom->ntypes + 1; const int nlocal = atom->nlocal; #ifndef _LMP_INTEL_OFFLOAD int * const mask = atom->mask; tagint * const molecule = atom->molecule; #endif int tnum; int *overflow; double *timer_compute; if (offload) { timer_compute = fix->off_watch_neighbor(); tnum = buffers->get_off_threads(); overflow = fix->get_off_overflow_flag(); fix->stop_watch(TIME_HOST_NEIGHBOR); fix->start_watch(TIME_OFFLOAD_LATENCY); } else { tnum = comm->nthreads; overflow = fix->get_overflow_flag(); } const int nthreads = tnum; const int maxnbors = buffers->get_max_nbors(); int * _noalias const atombin = buffers->get_atombin(); + const int xperiodic = domain->xperiodic; + const int yperiodic = domain->yperiodic; + const int zperiodic = domain->zperiodic; + const flt_t xprd_half = domain->xprd_half; + const flt_t yprd_half = domain->yprd_half; + const flt_t zprd_half = domain->zprd_half; + // Make sure dummy coordinates to eliminate loop remainder not within cutoff { const flt_t dx = (INTEL_BIGP - bboxhi[0]); const flt_t dy = (INTEL_BIGP - bboxhi[1]); const flt_t dz = (INTEL_BIGP - bboxhi[2]); if (dx * dx + dy * dy + dz * dz < static_cast(cutneighmaxsq)) error->one(FLERR, "Intel package expects no atoms within cutoff of {1e15,1e15,1e15}."); } #ifdef _LMP_INTEL_OFFLOAD const int * _noalias const binhead = this->binhead; const int * _noalias const special_flag = this->special_flag; const int * _noalias const bins = this->bins; const int cop = fix->coprocessor_number(); const int separate_buffers = fix->separate_buffers(); #pragma offload target(mic:cop) if(offload) \ in(x:length(nall+1) alloc_if(0) free_if(0)) \ in(tag:length(tag_size) alloc_if(0) free_if(0)) \ in(special:length(special_size*maxspecial) alloc_if(0) free_if(0)) \ in(nspecial:length(special_size*3) alloc_if(0) free_if(0)) \ in(bins:length(nall) alloc_if(0) free_if(0)) \ in(binhead:length(mbins) alloc_if(0) free_if(0)) \ in(cutneighsq:length(0) alloc_if(0) free_if(0)) \ in(firstneigh:length(0) alloc_if(0) free_if(0)) \ in(cnumneigh:length(0) alloc_if(0) free_if(0)) \ out(numneigh:length(0) alloc_if(0) free_if(0)) \ in(ilist:length(0) alloc_if(0) free_if(0)) \ in(atombin:length(aend) alloc_if(0) free_if(0)) \ in(stencil:length(nstencil) alloc_if(0) free_if(0)) \ in(special_flag:length(0) alloc_if(0) free_if(0)) \ - in(maxnbors,nthreads,maxspecial,nstencil,pad_width,offload) \ + in(maxnbors,nthreads,maxspecial,nstencil,pad_width,offload) \ in(separate_buffers, astart, aend, nlocal, molecular, ntypes) \ + in(xperiodic, yperiodic, zperiodic, xprd_half, yprd_half, zprd_half) \ out(overflow:length(5) alloc_if(0) free_if(0)) \ out(timer_compute:length(1) alloc_if(0) free_if(0)) \ signal(tag) #endif { #ifdef __MIC__ *timer_compute = MIC_Wtime(); #endif #ifdef _LMP_INTEL_OFFLOAD overflow[LMP_LOCAL_MIN] = astart; overflow[LMP_LOCAL_MAX] = aend - 1; overflow[LMP_GHOST_MIN] = nall; overflow[LMP_GHOST_MAX] = -1; #endif #if defined(_OPENMP) #pragma omp parallel default(none) shared(numneigh,overflow) #endif { #ifdef _LMP_INTEL_OFFLOAD int lmin = nall, lmax = -1, gmin = nall, gmax = -1; #endif const int num = aend - astart; int tid, ifrom, ito; IP_PRE_omp_range_id(ifrom, ito, tid, num, nthreads); ifrom += astart; ito += astart; int which; const int list_size = (ito + tid + 1) * maxnbors; int ct = (ifrom + tid) * maxnbors; int *neighptr = firstneigh + ct; for (int i = ifrom; i < ito; i++) { int j, k, n, n2, itype, jtype, ibin; double xtmp, ytmp, ztmp, delx, dely, delz, rsq; n = 0; n2 = maxnbors; xtmp = x[i].x; ytmp = x[i].y; ztmp = x[i].z; itype = x[i].w; const int ioffset = ntypes*itype; // loop over all atoms in other bins in stencil including self // only store pair if i < j // stores own/own pairs only once // stores own/ghost pairs on both procs ibin = atombin[i]; for (k = 0; k < nstencil; k++) { for (j = binhead[ibin + stencil[k]]; j >= 0; j = bins[j]) { if (j <= i) continue; jtype = x[j].w; #ifndef _LMP_INTEL_OFFLOAD if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; #endif delx = xtmp - x[j].x; dely = ytmp - x[j].y; delz = ztmp - x[j].z; rsq = delx * delx + dely * dely + delz * delz; if (rsq <= cutneighsq[ioffset + jtype]) { if (j < nlocal) { - neighptr[n++] = j; + if (need_ic) { + int no_special; + ominimum_image_check(no_special, delx, dely, delz); + if (no_special) + neighptr[n++] = -j - 1; + else + neighptr[n++] = j; + } else + neighptr[n++] = j; #ifdef _LMP_INTEL_OFFLOAD if (j < lmin) lmin = j; if (j > lmax) lmax = j; #endif } else { - neighptr[n2++] = j; + if (need_ic) { + int no_special; + ominimum_image_check(no_special, delx, dely, delz); + if (no_special) + neighptr[n2++] = -j - 1; + else + neighptr[n2++] = j; + } else + neighptr[n2++] = j; #ifdef _LMP_INTEL_OFFLOAD if (j < gmin) gmin = j; if (j > gmax) gmax = j; #endif } } } } ilist[i] = i; cnumneigh[i] = ct; if (n > maxnbors) *overflow = 1; for (k = maxnbors; k < n2; k++) neighptr[n++] = neighptr[k]; while( (n % pad_width) != 0 ) neighptr[n++] = nall; numneigh[i] = n; while((n % (INTEL_DATA_ALIGN / sizeof(int))) != 0) n++; ct += n; neighptr += n; if (ct + n + maxnbors > list_size) { *overflow = 1; ct = (ifrom + tid) * maxnbors; } } if (*overflow == 1) for (int i = ifrom; i < ito; i++) numneigh[i] = 0; #ifdef _LMP_INTEL_OFFLOAD if (separate_buffers) { #if defined(_OPENMP) #pragma omp critical #endif { if (lmin < overflow[LMP_LOCAL_MIN]) overflow[LMP_LOCAL_MIN] = lmin; if (lmax > overflow[LMP_LOCAL_MAX]) overflow[LMP_LOCAL_MAX] = lmax; if (gmin < overflow[LMP_GHOST_MIN]) overflow[LMP_GHOST_MIN] = gmin; if (gmax > overflow[LMP_GHOST_MAX]) overflow[LMP_GHOST_MAX] = gmax; } #pragma omp barrier } int ghost_offset = 0, nall_offset = nall; if (separate_buffers) { int nghost = overflow[LMP_GHOST_MAX] + 1 - overflow[LMP_GHOST_MIN]; if (nghost < 0) nghost = 0; if (offload) { ghost_offset = overflow[LMP_GHOST_MIN] - overflow[LMP_LOCAL_MAX] - 1; nall_offset = overflow[LMP_LOCAL_MAX] + 1 + nghost; } else { ghost_offset = overflow[LMP_GHOST_MIN] - nlocal; nall_offset = nlocal + nghost; } } #endif if (molecular) { for (int i = ifrom; i < ito; ++i) { int * _noalias jlist = firstneigh + cnumneigh[i]; const int jnum = numneigh[i]; for (int jj = 0; jj < jnum; jj++) { const int j = jlist[jj]; - ofind_special(which, special, nspecial, i, tag[j], special_flag); + if (need_ic && j < 0) { + which = 0; + jlist[jj] = -j - 1; + } else + ofind_special(which, special, nspecial, i, tag[j], special_flag); #ifdef _LMP_INTEL_OFFLOAD if (j >= nlocal) { if (j == nall) jlist[jj] = nall_offset; else if (which > 0) jlist[jj] = (j-ghost_offset) ^ (which << SBBITS); else jlist[jj]-=ghost_offset; } else #endif if (which > 0) jlist[jj] = j ^ (which << SBBITS); } } } #ifdef _LMP_INTEL_OFFLOAD else if (separate_buffers) { for (int i = ifrom; i < ito; ++i) { int * _noalias jlist = firstneigh + cnumneigh[i]; const int jnum = numneigh[i]; int jj = 0; for (jj = 0; jj < jnum; jj++) if (jlist[jj] >= nlocal) break; while (jj < jnum) { if (jlist[jj] == nall) jlist[jj] = nall_offset; else jlist[jj] -= ghost_offset; jj++; } } } #endif } // end omp #ifdef __MIC__ *timer_compute = MIC_Wtime() - *timer_compute; #endif } // end offload if (offload) { fix->stop_watch(TIME_OFFLOAD_LATENCY); #ifdef _LMP_INTEL_OFFLOAD for (int n = 0; n < aend; n++) { ilist[n] = n; numneigh[n] = 0; } #endif } else { for (int i = astart; i < aend; i++) list->firstneigh[i] = firstneigh + cnumneigh[i]; fix->stop_watch(TIME_HOST_NEIGHBOR); #ifdef _LMP_INTEL_OFFLOAD if (separate_buffers) { fix->start_watch(TIME_PACK); fix->set_neighbor_host_sizes(); buffers->pack_sep_from_single(fix->host_min_local(), fix->host_used_local(), fix->host_min_ghost(), fix->host_used_ghost()); fix->stop_watch(TIME_PACK); } #endif } } /* ---------------------------------------------------------------------- binned neighbor list construction with full Newton's 3rd law each owned atom i checks its own bin and other bins in Newton stencil every pair stored exactly once by some processor ------------------------------------------------------------------------- */ void Neighbor::half_bin_newton_intel(NeighList *list) { const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal; list->inum = nlocal; // Get fix for intel stuff FixIntel *fix = static_cast(fix_intel); const int off_end = fix->offload_end_neighbor(); int host_start = fix->host_start_neighbor();; int offload_noghost = 0; #ifdef _LMP_INTEL_OFFLOAD if (fix->full_host_list()) host_start = 0; offload_noghost = fix->offload_noghost(); if (exclude) error->all(FLERR, "Exclusion lists not yet supported for Intel offload"); #endif - if (fix->precision() == FixIntel::PREC_MODE_MIXED) { - if (offload_noghost) { - hbni(1, list, fix->get_mixed_buffers(), - 0, off_end, fix); - hbni(0, list, fix->get_mixed_buffers(), - host_start, nlocal, fix, off_end); - } else { - hbni(1, list, fix->get_mixed_buffers(), - 0, off_end, fix); - hbni(0, list, fix->get_mixed_buffers(), - host_start, nlocal, fix); - } - } else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) { - if (offload_noghost) { - hbni(1, list, fix->get_double_buffers(), - 0, off_end, fix); - hbni(0, list, fix->get_double_buffers(), - host_start, nlocal, fix, off_end); + int need_ic = 0; + if (atom->molecular) + dminimum_image_check(need_ic, cutneighmax, cutneighmax, cutneighmax); + + if (need_ic) { + if (fix->precision() == FixIntel::PREC_MODE_MIXED) { + #ifdef _LMP_INTEL_OFFLOAD + if (offload_noghost) { + hbni(1, list, fix->get_mixed_buffers(), + 0, off_end, fix); + hbni(0, list, fix->get_mixed_buffers(), + host_start, nlocal, fix, off_end); + } else + #endif + { + hbni(1, list, fix->get_mixed_buffers(), + 0, off_end, fix); + hbni(0, list, fix->get_mixed_buffers(), + host_start, nlocal, fix); + } + } else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) { + #ifdef _LMP_INTEL_OFFLOAD + if (offload_noghost) { + hbni(1, list, fix->get_double_buffers(), + 0, off_end, fix); + hbni(0, list, fix->get_double_buffers(), + host_start, nlocal, fix, off_end); + } else + #endif + { + hbni(1, list, fix->get_double_buffers(), + 0, off_end, fix); + hbni(0, list, fix->get_double_buffers(), + host_start, nlocal, fix); + } } else { - hbni(1, list, fix->get_double_buffers(), - 0, off_end, fix); - hbni(0, list, fix->get_double_buffers(), - host_start, nlocal, fix); + #ifdef _LMP_INTEL_OFFLOAD + if (offload_noghost) { + hbni(1, list, fix->get_single_buffers(), 0, off_end, + fix); + hbni(0, list, fix->get_single_buffers(), + host_start, nlocal, fix, off_end); + } else + #endif + { + hbni(1, list, fix->get_single_buffers(), 0, off_end, + fix); + hbni(0, list, fix->get_single_buffers(), + host_start, nlocal, fix); + } } } else { - if (offload_noghost) { - hbni(1, list, fix->get_single_buffers(), 0, off_end, fix); - hbni(0, list, fix->get_single_buffers(), - host_start, nlocal, fix, off_end); + if (fix->precision() == FixIntel::PREC_MODE_MIXED) { + #ifdef _LMP_INTEL_OFFLOAD + if (offload_noghost) { + hbni(1, list, fix->get_mixed_buffers(), + 0, off_end, fix); + hbni(0, list, fix->get_mixed_buffers(), + host_start, nlocal, fix, off_end); + } else + #endif + { + hbni(1, list, fix->get_mixed_buffers(), + 0, off_end, fix); + hbni(0, list, fix->get_mixed_buffers(), + host_start, nlocal, fix); + } + } else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) { + #ifdef _LMP_INTEL_OFFLOAD + if (offload_noghost) { + hbni(1, list, fix->get_double_buffers(), + 0, off_end, fix); + hbni(0, list, fix->get_double_buffers(), + host_start, nlocal, fix, off_end); + } else + #endif + { + hbni(1, list, fix->get_double_buffers(), + 0, off_end, fix); + hbni(0, list, fix->get_double_buffers(), + host_start, nlocal, fix); + } } else { - hbni(1, list, fix->get_single_buffers(), 0, off_end, fix); - hbni(0, list, fix->get_single_buffers(), - host_start, nlocal, fix); + #ifdef _LMP_INTEL_OFFLOAD + if (offload_noghost) { + hbni(1, list, fix->get_single_buffers(), 0, off_end, + fix); + hbni(0, list, fix->get_single_buffers(), + host_start, nlocal, fix, off_end); + } else + #endif + { + hbni(1, list, fix->get_single_buffers(), 0, off_end, + fix); + hbni(0, list, fix->get_single_buffers(), + host_start, nlocal, fix); + } } } } -template +template void Neighbor::hbni(const int offload, NeighList *list, void *buffers_in, const int astart, const int aend, void *fix_in, const int offload_end) { IntelBuffers *buffers = (IntelBuffers *)buffers_in; FixIntel *fix = (FixIntel *)fix_in; const int nall = atom->nlocal + atom->nghost; int pad = 1; if (offload) { fix->start_watch(TIME_PACK); buffers->grow(nall, atom->nlocal, comm->nthreads, aend); buffers->grow_nbor(list, atom->nlocal, aend); ATOM_T biga; biga.x = INTEL_BIGP; biga.y = INTEL_BIGP; biga.z = INTEL_BIGP; biga.w = 1; buffers->get_x()[nall]=biga; const int nthreads = comm->nthreads; #if defined(_OPENMP) #pragma omp parallel default(none) shared(buffers) #endif { int ifrom, ito, tid; IP_PRE_omp_range_id_align(ifrom, ito, tid, nall, nthreads, sizeof(ATOM_T)); buffers->thr_pack(ifrom, ito, 0); } fix->stop_watch(TIME_PACK); fix->start_watch(TIME_HOST_NEIGHBOR); bin_atoms(buffers->get_x(), buffers->get_atombin()); if (INTEL_MIC_NBOR_PAD > 1) pad = INTEL_MIC_NBOR_PAD * sizeof(float) / sizeof(flt_t); } else { fix->start_watch(TIME_HOST_NEIGHBOR); if (INTEL_NBOR_PAD > 1) pad = INTEL_NBOR_PAD * sizeof(float) / sizeof(flt_t); } const int pad_width = pad; if (aend-astart == 0) { fix->stop_watch(TIME_HOST_NEIGHBOR); return; } const ATOM_T * _noalias const x = buffers->get_x(); int * _noalias const firstneigh = buffers->firstneigh(list); int nall_t = nall; if (offload_noghost && offload) nall_t = atom->nlocal; const int e_nall = nall_t; const int molecular = atom->molecular; int *ns = NULL; tagint *s = NULL; int tag_size = 0, special_size; if (buffers->need_tag()) tag_size = e_nall; if (molecular) { s = atom->special[0]; ns = atom->nspecial[0]; special_size = aend; } else { s = &buffers->_special_holder; ns = &buffers->_nspecial_holder; special_size = 0; } const tagint * _noalias const special = s; const int * _noalias const nspecial = ns; const int maxspecial = atom->maxspecial; const tagint * _noalias const tag = atom->tag; int * _noalias const ilist = list->ilist; int * _noalias numneigh = list->numneigh; int * _noalias const cnumneigh = buffers->cnumneigh(list); const int nstencil = list->nstencil; const int * _noalias const stencil = list->stencil; const flt_t * _noalias const cutneighsq = buffers->get_cutneighsq()[0]; const int ntypes = atom->ntypes + 1; const int nlocal = atom->nlocal; #ifndef _LMP_INTEL_OFFLOAD int * const mask = atom->mask; tagint * const molecule = atom->molecule; #endif int tnum; int *overflow; double *timer_compute; if (offload) { timer_compute = fix->off_watch_neighbor(); tnum = buffers->get_off_threads(); overflow = fix->get_off_overflow_flag(); fix->stop_watch(TIME_HOST_NEIGHBOR); fix->start_watch(TIME_OFFLOAD_LATENCY); } else { tnum = comm->nthreads; overflow = fix->get_overflow_flag(); } const int nthreads = tnum; const int maxnbors = buffers->get_max_nbors(); int * _noalias const atombin = buffers->get_atombin(); + const int xperiodic = domain->xperiodic; + const int yperiodic = domain->yperiodic; + const int zperiodic = domain->zperiodic; + const flt_t xprd_half = domain->xprd_half; + const flt_t yprd_half = domain->yprd_half; + const flt_t zprd_half = domain->zprd_half; + // Make sure dummy coordinates to eliminate loop remainder not within cutoff { const flt_t dx = (INTEL_BIGP - bboxhi[0]); const flt_t dy = (INTEL_BIGP - bboxhi[1]); const flt_t dz = (INTEL_BIGP - bboxhi[2]); if (dx * dx + dy * dy + dz * dz < static_cast(cutneighmaxsq)) error->one(FLERR, "Intel package expects no atoms within cutoff of {1e15,1e15,1e15}."); } #ifdef _LMP_INTEL_OFFLOAD const int * _noalias const binhead = this->binhead; const int * _noalias const special_flag = this->special_flag; const int * _noalias const bins = this->bins; const int cop = fix->coprocessor_number(); const int separate_buffers = fix->separate_buffers(); #pragma offload target(mic:cop) if(offload) \ in(x:length(e_nall+1) alloc_if(0) free_if(0)) \ in(tag:length(tag_size) alloc_if(0) free_if(0)) \ in(special:length(special_size*maxspecial) alloc_if(0) free_if(0)) \ in(nspecial:length(special_size*3) alloc_if(0) free_if(0)) \ in(bins:length(nall) alloc_if(0) free_if(0)) \ in(binhead:length(mbins) alloc_if(0) free_if(0)) \ in(cutneighsq:length(0) alloc_if(0) free_if(0)) \ in(firstneigh:length(0) alloc_if(0) free_if(0)) \ in(cnumneigh:length(0) alloc_if(0) free_if(0)) \ out(numneigh:length(0) alloc_if(0) free_if(0)) \ in(ilist:length(0) alloc_if(0) free_if(0)) \ in(atombin:length(aend) alloc_if(0) free_if(0)) \ in(stencil:length(nstencil) alloc_if(0) free_if(0)) \ in(special_flag:length(0) alloc_if(0) free_if(0)) \ in(maxnbors,nthreads,maxspecial,nstencil,e_nall,offload,pad_width) \ in(offload_end,separate_buffers,astart, aend, nlocal, molecular, ntypes) \ + in(xperiodic, yperiodic, zperiodic, xprd_half, yprd_half, zprd_half) \ out(overflow:length(5) alloc_if(0) free_if(0)) \ out(timer_compute:length(1) alloc_if(0) free_if(0)) \ signal(tag) #endif { #ifdef __MIC__ *timer_compute = MIC_Wtime(); #endif #ifdef _LMP_INTEL_OFFLOAD overflow[LMP_LOCAL_MIN] = astart; overflow[LMP_LOCAL_MAX] = aend - 1; overflow[LMP_GHOST_MIN] = e_nall; overflow[LMP_GHOST_MAX] = -1; #endif #if defined(_OPENMP) #pragma omp parallel default(none) shared(numneigh, overflow) #endif { #ifdef _LMP_INTEL_OFFLOAD int lmin = e_nall, lmax = -1, gmin = e_nall, gmax = -1; #endif const int num = aend - astart; int tid, ifrom, ito; IP_PRE_omp_range_id(ifrom, ito, tid, num, nthreads); ifrom += astart; ito += astart; int which; const int list_size = (ito + tid + 1) * maxnbors; int ct = (ifrom + tid) * maxnbors; int *neighptr = firstneigh + ct; for (int i = ifrom; i < ito; i++) { int j, k, n, n2, itype, jtype, ibin; double xtmp, ytmp, ztmp, delx, dely, delz, rsq; n = 0; n2 = maxnbors; xtmp = x[i].x; ytmp = x[i].y; ztmp = x[i].z; itype = x[i].w; const int ioffset = ntypes * itype; // loop over rest of atoms in i's bin, ghosts are at end of linked list // if j is owned atom, store it, since j is beyond i in linked list // if j is ghost, only store if j coords are "above/to the right" of i for (j = bins[i]; j >= 0; j = bins[j]) { if (j >= nlocal) { if (offload_noghost && offload) continue; if (x[j].z < ztmp) continue; if (x[j].z == ztmp) { if (x[j].y < ytmp) continue; if (x[j].y == ytmp && x[j].x < xtmp) continue; } } else if (offload_noghost && i < offload_end) continue; jtype = x[j].w; #ifndef _LMP_INTEL_OFFLOAD if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; #endif delx = xtmp - x[j].x; dely = ytmp - x[j].y; delz = ztmp - x[j].z; rsq = delx * delx + dely * dely + delz * delz; if (rsq <= cutneighsq[ioffset + jtype]) { if (j < nlocal) { - neighptr[n++] = j; + if (need_ic) { + int no_special; + ominimum_image_check(no_special, delx, dely, delz); + if (no_special) + neighptr[n++] = -j - 1; + else + neighptr[n++] = j; + } else + neighptr[n++] = j; #ifdef _LMP_INTEL_OFFLOAD if (j < lmin) lmin = j; if (j > lmax) lmax = j; #endif } else { - neighptr[n2++] = j; + if (need_ic) { + int no_special; + ominimum_image_check(no_special, delx, dely, delz); + if (no_special) + neighptr[n2++] = -j - 1; + else + neighptr[n2++] = j; + } else + neighptr[n2++] = j; #ifdef _LMP_INTEL_OFFLOAD if (j < gmin) gmin = j; if (j > gmax) gmax = j; #endif } } } // loop over all atoms in other bins in stencil, store every pair ibin = atombin[i]; for (k = 0; k < nstencil; k++) { for (j = binhead[ibin + stencil[k]]; j >= 0; j = bins[j]) { if (offload_noghost) { if (j < nlocal) { if (i < offload_end) continue; } else if (offload) continue; } jtype = x[j].w; #ifndef _LMP_INTEL_OFFLOAD if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; #endif delx = xtmp - x[j].x; dely = ytmp - x[j].y; delz = ztmp - x[j].z; rsq = delx * delx + dely * dely + delz * delz; if (rsq <= cutneighsq[ioffset + jtype]) { if (j < nlocal) { - neighptr[n++] = j; + if (need_ic) { + int no_special; + ominimum_image_check(no_special, delx, dely, delz); + if (no_special) + neighptr[n++] = -j - 1; + else + neighptr[n++] = j; + } else + neighptr[n++] = j; #ifdef _LMP_INTEL_OFFLOAD if (j < lmin) lmin = j; if (j > lmax) lmax = j; #endif } else { - neighptr[n2++] = j; + if (need_ic) { + int no_special; + ominimum_image_check(no_special, delx, dely, delz); + if (no_special) + neighptr[n2++] = -j - 1; + else + neighptr[n2++] = j; + } else + neighptr[n2++] = j; #ifdef _LMP_INTEL_OFFLOAD if (j < gmin) gmin = j; if (j > gmax) gmax = j; #endif } } } } ilist[i] = i; cnumneigh[i] = ct; if (n > maxnbors) *overflow = 1; for (k = maxnbors; k < n2; k++) neighptr[n++] = neighptr[k]; while( (n % pad_width) != 0 ) neighptr[n++] = e_nall; numneigh[i] = n; while((n % (INTEL_DATA_ALIGN / sizeof(int))) != 0) n++; ct += n; neighptr += n; if (ct + n + maxnbors > list_size) { *overflow = 1; ct = (ifrom + tid) * maxnbors; } } if (*overflow == 1) for (int i = ifrom; i < ito; i++) numneigh[i] = 0; #ifdef _LMP_INTEL_OFFLOAD if (separate_buffers) { #if defined(_OPENMP) #pragma omp critical #endif { if (lmin < overflow[LMP_LOCAL_MIN]) overflow[LMP_LOCAL_MIN] = lmin; if (lmax > overflow[LMP_LOCAL_MAX]) overflow[LMP_LOCAL_MAX] = lmax; if (gmin < overflow[LMP_GHOST_MIN]) overflow[LMP_GHOST_MIN] = gmin; if (gmax > overflow[LMP_GHOST_MAX]) overflow[LMP_GHOST_MAX] = gmax; } #pragma omp barrier } int ghost_offset = 0, nall_offset = e_nall; if (separate_buffers) { int nghost = overflow[LMP_GHOST_MAX] + 1 - overflow[LMP_GHOST_MIN]; - if (nghost < 0) nghost = 0; + if (nghost < 0) nghost = 0; if (offload) { ghost_offset = overflow[LMP_GHOST_MIN] - overflow[LMP_LOCAL_MAX] - 1; nall_offset = overflow[LMP_LOCAL_MAX] + 1 + nghost; } else { ghost_offset = overflow[LMP_GHOST_MIN] - nlocal; nall_offset = nlocal + nghost; } } #endif if (molecular) { for (int i = ifrom; i < ito; ++i) { int * _noalias jlist = firstneigh + cnumneigh[i]; const int jnum = numneigh[i]; for (int jj = 0; jj < jnum; jj++) { const int j = jlist[jj]; - ofind_special(which, special, nspecial, i, tag[j], - special_flag); + if (need_ic && j < 0) { + which = 0; + jlist[jj] = -j - 1; + } else + ofind_special(which, special, nspecial, i, tag[j], + special_flag); #ifdef _LMP_INTEL_OFFLOAD if (j >= nlocal) { if (j == e_nall) jlist[jj] = nall_offset; else if (which > 0) jlist[jj] = (j-ghost_offset) ^ (which << SBBITS); else jlist[jj]-=ghost_offset; } else #endif if (which > 0) jlist[jj] = j ^ (which << SBBITS); } } } #ifdef _LMP_INTEL_OFFLOAD else if (separate_buffers) { for (int i = ifrom; i < ito; ++i) { int * _noalias jlist = firstneigh + cnumneigh[i]; const int jnum = numneigh[i]; int jj = 0; for (jj = 0; jj < jnum; jj++) if (jlist[jj] >= nlocal) break; while (jj < jnum) { if (jlist[jj] == e_nall) jlist[jj] = nall_offset; else jlist[jj] -= ghost_offset; jj++; } } } #endif } // end omp #ifdef __MIC__ *timer_compute = MIC_Wtime() - *timer_compute; #endif } // end offload if (offload) { fix->stop_watch(TIME_OFFLOAD_LATENCY); #ifdef _LMP_INTEL_OFFLOAD for (int n = 0; n < aend; n++) { ilist[n] = n; numneigh[n] = 0; } #endif } else { for (int i = astart; i < aend; i++) list->firstneigh[i] = firstneigh + cnumneigh[i]; fix->stop_watch(TIME_HOST_NEIGHBOR); #ifdef _LMP_INTEL_OFFLOAD if (separate_buffers) { fix->start_watch(TIME_PACK); fix->set_neighbor_host_sizes(); buffers->pack_sep_from_single(fix->host_min_local(), fix->host_used_local(), fix->host_min_ghost(), fix->host_used_ghost()); fix->stop_watch(TIME_PACK); } #endif } } /* ---------------------------------------------------------------------- binned neighbor list construction with Newton's 3rd law for triclinic each owned atom i checks its own bin and other bins in triclinic stencil every pair stored exactly once by some processor ------------------------------------------------------------------------- */ void Neighbor::half_bin_newton_tri_intel(NeighList *list) { const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal; list->inum = nlocal; // Get fix for intel stuff FixIntel *fix = static_cast(fix_intel); const int off_end = fix->offload_end_neighbor(); int host_start = fix->host_start_neighbor(); int offload_noghost = 0; #ifdef _LMP_INTEL_OFFLOAD if (fix->full_host_list()) host_start = 0; offload_noghost = fix->offload_noghost(); if (exclude) error->all(FLERR, "Exclusion lists not yet supported for Intel offload"); #endif - if (fix->precision() == FixIntel::PREC_MODE_MIXED) { - if (offload_noghost) { - hbnti(1, list, fix->get_mixed_buffers(), - 0, off_end, fix); - hbnti(0, list, fix->get_mixed_buffers(), - host_start, nlocal, fix, off_end); - } else { - hbnti(1, list, fix->get_mixed_buffers(), - 0, off_end, fix); - hbnti(0, list, fix->get_mixed_buffers(), - host_start, nlocal, fix); - } - } else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) { - if (offload_noghost) { - hbnti(1, list, fix->get_double_buffers(), - 0, off_end, fix); - hbnti(0, list, fix->get_double_buffers(), - host_start, nlocal, fix, off_end); + int need_ic = 0; + if (atom->molecular) + dminimum_image_check(need_ic, cutneighmax, cutneighmax, cutneighmax); + + if (need_ic) { + if (fix->precision() == FixIntel::PREC_MODE_MIXED) { + #ifdef _LMP_INTEL_OFFLOAD + if (offload_noghost) { + hbnti(1, list, fix->get_mixed_buffers(), + 0, off_end, fix); + hbnti(0, list, fix->get_mixed_buffers(), + host_start, nlocal, fix, off_end); + } else + #endif + { + hbnti(1, list, fix->get_mixed_buffers(), + 0, off_end, fix); + hbnti(0, list, fix->get_mixed_buffers(), + host_start, nlocal, fix); + } + } else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) { + #ifdef _LMP_INTEL_OFFLOAD + if (offload_noghost) { + hbnti(1, list, fix->get_double_buffers(), + 0, off_end, fix); + hbnti(0, list, fix->get_double_buffers(), + host_start, nlocal, fix, off_end); + } else + #endif + { + hbnti(1, list, fix->get_double_buffers(), + 0, off_end, fix); + hbnti(0, list, fix->get_double_buffers(), + host_start, nlocal, fix); + } } else { - hbnti(1, list, fix->get_double_buffers(), - 0, off_end, fix); - hbnti(0, list, fix->get_double_buffers(), - host_start, nlocal, fix); + #ifdef _LMP_INTEL_OFFLOAD + if (offload_noghost) { + hbnti(1, list, fix->get_single_buffers(), + 0, off_end, fix); + hbnti(0, list, fix->get_single_buffers(), + host_start, nlocal, fix, off_end); + } else + #endif + { + hbnti(1, list, fix->get_single_buffers(), + 0, off_end, fix); + hbnti(0, list, fix->get_single_buffers(), + host_start, nlocal, fix); + } } } else { - if (offload_noghost) { - hbnti(1, list, fix->get_single_buffers(), - 0, off_end, fix); - hbnti(0, list, fix->get_single_buffers(), - host_start, nlocal, fix, off_end); + if (fix->precision() == FixIntel::PREC_MODE_MIXED) { + #ifdef _LMP_INTEL_OFFLOAD + if (offload_noghost) { + hbnti(1, list, fix->get_mixed_buffers(), + 0, off_end, fix); + hbnti(0, list, fix->get_mixed_buffers(), + host_start, nlocal, fix, off_end); + } else + #endif + { + hbnti(1, list, fix->get_mixed_buffers(), + 0, off_end, fix); + hbnti(0, list, fix->get_mixed_buffers(), + host_start, nlocal, fix); + } + } else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) { + #ifdef _LMP_INTEL_OFFLOAD + if (offload_noghost) { + hbnti(1, list, fix->get_double_buffers(), + 0, off_end, fix); + hbnti(0, list, fix->get_double_buffers(), + host_start, nlocal, fix, off_end); + } else + #endif + { + hbnti(1, list, fix->get_double_buffers(), + 0, off_end, fix); + hbnti(0, list, fix->get_double_buffers(), + host_start, nlocal, fix); + } } else { - hbnti(1, list, fix->get_single_buffers(), - 0, off_end, fix); - hbnti(0, list, fix->get_single_buffers(), - host_start, nlocal, fix); + #ifdef _LMP_INTEL_OFFLOAD + if (offload_noghost) { + hbnti(1, list, fix->get_single_buffers(), + 0, off_end, fix); + hbnti(0, list, fix->get_single_buffers(), + host_start, nlocal, fix, off_end); + } else + #endif + { + hbnti(1, list, fix->get_single_buffers(), + 0, off_end, fix); + hbnti(0, list, fix->get_single_buffers(), + host_start, nlocal, fix); + } } } } -template +template void Neighbor::hbnti(const int offload, NeighList *list, void *buffers_in, const int astart, const int aend, void *fix_in, const int offload_end) { IntelBuffers *buffers = (IntelBuffers *)buffers_in; FixIntel *fix = (FixIntel *)fix_in; const int nall = atom->nlocal + atom->nghost; int pad = 1; if (offload) { fix->start_watch(TIME_PACK); buffers->grow(nall, atom->nlocal, comm->nthreads, aend); buffers->grow_nbor(list, atom->nlocal, aend); ATOM_T biga; biga.x = INTEL_BIGP; biga.y = INTEL_BIGP; biga.z = INTEL_BIGP; biga.w = 1; buffers->get_x()[nall]=biga; const int nthreads = comm->nthreads; #if defined(_OPENMP) #pragma omp parallel default(none) shared(buffers) #endif { int ifrom, ito, tid; IP_PRE_omp_range_id_align(ifrom, ito, tid, nall, nthreads, sizeof(ATOM_T)); buffers->thr_pack(ifrom, ito, 0); } fix->stop_watch(TIME_PACK); fix->start_watch(TIME_HOST_NEIGHBOR); bin_atoms(buffers->get_x(), buffers->get_atombin()); if (INTEL_MIC_NBOR_PAD > 1) pad = INTEL_MIC_NBOR_PAD * sizeof(float) / sizeof(flt_t); } else { fix->start_watch(TIME_HOST_NEIGHBOR); if (INTEL_NBOR_PAD > 1) pad = INTEL_NBOR_PAD * sizeof(float) / sizeof(flt_t); } const int pad_width = pad; if (aend-astart == 0) { fix->stop_watch(TIME_HOST_NEIGHBOR); return; } const ATOM_T * _noalias const x = buffers->get_x(); int * _noalias const firstneigh = buffers->firstneigh(list); int nall_t = nall; if (offload_noghost && offload) nall_t = atom->nlocal; const int e_nall = nall_t; const int molecular = atom->molecular; int *ns = NULL; tagint *s = NULL; int tag_size = 0, special_size; if (buffers->need_tag()) tag_size = e_nall; if (molecular) { s = atom->special[0]; ns = atom->nspecial[0]; special_size = aend; } else { s = &buffers->_special_holder; ns = &buffers->_nspecial_holder; special_size = 0; } const tagint * _noalias const special = s; const int * _noalias const nspecial = ns; const int maxspecial = atom->maxspecial; const tagint * _noalias const tag = atom->tag; int * _noalias const ilist = list->ilist; int * _noalias numneigh = list->numneigh; int * _noalias const cnumneigh = buffers->cnumneigh(list); const int nstencil = list->nstencil; const int * _noalias const stencil = list->stencil; const flt_t * _noalias const cutneighsq = buffers->get_cutneighsq()[0]; const int ntypes = atom->ntypes + 1; const int nlocal = atom->nlocal; #ifndef _LMP_INTEL_OFFLOAD int * const mask = atom->mask; tagint * const molecule = atom->molecule; #endif int tnum; int *overflow; double *timer_compute; if (offload) { timer_compute = fix->off_watch_neighbor(); tnum = buffers->get_off_threads(); overflow = fix->get_off_overflow_flag(); fix->stop_watch(TIME_HOST_NEIGHBOR); fix->start_watch(TIME_OFFLOAD_LATENCY); } else { tnum = comm->nthreads; overflow = fix->get_overflow_flag(); } const int nthreads = tnum; const int maxnbors = buffers->get_max_nbors(); int * _noalias const atombin = buffers->get_atombin(); + const int xperiodic = domain->xperiodic; + const int yperiodic = domain->yperiodic; + const int zperiodic = domain->zperiodic; + const flt_t xprd_half = domain->xprd_half; + const flt_t yprd_half = domain->yprd_half; + const flt_t zprd_half = domain->zprd_half; + // Make sure dummy coordinates to eliminate loop remainder not within cutoff { const flt_t dx = (INTEL_BIGP - bboxhi[0]); const flt_t dy = (INTEL_BIGP - bboxhi[1]); const flt_t dz = (INTEL_BIGP - bboxhi[2]); if (dx * dx + dy * dy + dz * dz < static_cast(cutneighmaxsq)) error->one(FLERR, "Intel package expects no atoms within cutoff of {1e15,1e15,1e15}."); } #ifdef _LMP_INTEL_OFFLOAD const int * _noalias const binhead = this->binhead; const int * _noalias const special_flag = this->special_flag; const int * _noalias const bins = this->bins; const int cop = fix->coprocessor_number(); const int separate_buffers = fix->separate_buffers(); #pragma offload target(mic:cop) if(offload) \ in(x:length(e_nall+1) alloc_if(0) free_if(0)) \ in(tag:length(tag_size) alloc_if(0) free_if(0)) \ in(special:length(special_size*maxspecial) alloc_if(0) free_if(0)) \ in(nspecial:length(special_size*3) alloc_if(0) free_if(0)) \ in(bins:length(nall) alloc_if(0) free_if(0)) \ in(binhead:length(mbins) alloc_if(0) free_if(0)) \ in(cutneighsq:length(0) alloc_if(0) free_if(0)) \ in(firstneigh:length(0) alloc_if(0) free_if(0)) \ in(cnumneigh:length(0) alloc_if(0) free_if(0)) \ out(numneigh:length(0) alloc_if(0) free_if(0)) \ in(ilist:length(0) alloc_if(0) free_if(0)) \ in(atombin:length(aend) alloc_if(0) free_if(0)) \ in(stencil:length(nstencil) alloc_if(0) free_if(0)) \ in(special_flag:length(0) alloc_if(0) free_if(0)) \ in(maxnbors,nthreads,maxspecial,nstencil,offload_end,pad_width,e_nall) \ in(offload,separate_buffers, astart, aend, nlocal, molecular, ntypes) \ + in(xperiodic, yperiodic, zperiodic, xprd_half, yprd_half, zprd_half) \ out(overflow:length(5) alloc_if(0) free_if(0)) \ out(timer_compute:length(1) alloc_if(0) free_if(0)) \ signal(tag) #endif { #ifdef __MIC__ *timer_compute = MIC_Wtime(); #endif #ifdef _LMP_INTEL_OFFLOAD overflow[LMP_LOCAL_MIN] = astart; overflow[LMP_LOCAL_MAX] = aend - 1; overflow[LMP_GHOST_MIN] = e_nall; overflow[LMP_GHOST_MAX] = -1; #endif #if defined(_OPENMP) #pragma omp parallel default(none) shared(numneigh, overflow) #endif { #ifdef _LMP_INTEL_OFFLOAD int lmin = e_nall, lmax = -1, gmin = e_nall, gmax = -1; #endif const int num = aend-astart; int tid, ifrom, ito; IP_PRE_omp_range_id(ifrom,ito,tid,num,nthreads); ifrom += astart; ito += astart; int which; const int list_size = (ito + tid + 1) * maxnbors; int ct = (ifrom + tid) * maxnbors; int *neighptr = firstneigh + ct; for (int i = ifrom; i < ito; i++) { int j, k, n, n2, itype, jtype, ibin; double xtmp, ytmp, ztmp, delx, dely, delz, rsq; n = 0; n2 = maxnbors; xtmp = x[i].x; ytmp = x[i].y; ztmp = x[i].z; itype = x[i].w; const int ioffset = ntypes * itype; // loop over all atoms in bins in stencil // pairs for atoms j "below" i are excluded // below = lower z or (equal z and lower y) or (equal zy and lower x) // (equal zyx and j <= i) // latter excludes self-self interaction but allows superposed atoms ibin = atombin[i]; for (k = 0; k < nstencil; k++) { for (j = binhead[ibin + stencil[k]]; j >= 0; j = bins[j]) { if (offload_noghost) { if (j < nlocal) { if (i < offload_end) continue; } else if (offload) continue; } if (x[j].z < ztmp) continue; if (x[j].z == ztmp) { if (x[j].y < ytmp) continue; if (x[j].y == ytmp) { if (x[j].x < xtmp) continue; if (x[j].x == xtmp && j <= i) continue; } } jtype = x[j].w; #ifndef _LMP_INTEL_OFFLOAD if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; #endif delx = xtmp - x[j].x; dely = ytmp - x[j].y; delz = ztmp - x[j].z; rsq = delx * delx + dely * dely + delz * delz; if (rsq <= cutneighsq[ioffset + jtype]) { if (j < nlocal) { - neighptr[n++] = j; + if (need_ic) { + int no_special; + ominimum_image_check(no_special, delx, dely, delz); + if (no_special) + neighptr[n++] = -j - 1; + else + neighptr[n++] = j; + } else + neighptr[n++] = j; #ifdef _LMP_INTEL_OFFLOAD if (j < lmin) lmin = j; if (j > lmax) lmax = j; #endif } else { - neighptr[n2++] = j; + if (need_ic) { + int no_special; + ominimum_image_check(no_special, delx, dely, delz); + if (no_special) + neighptr[n2++] = -j - 1; + else + neighptr[n2++] = j; + } else + neighptr[n2++] = j; #ifdef _LMP_INTEL_OFFLOAD if (j < gmin) gmin = j; if (j > gmax) gmax = j; #endif } } } } ilist[i] = i; cnumneigh[i] = ct; if (n > maxnbors) *overflow = 1; for (k = maxnbors; k < n2; k++) neighptr[n++] = neighptr[k]; while( (n % pad_width) != 0 ) neighptr[n++] = e_nall; numneigh[i] = n; while((n % (INTEL_DATA_ALIGN / sizeof(int))) != 0) n++; ct += n; neighptr += n; if (ct + n + maxnbors > list_size) { *overflow = 1; ct = (ifrom + tid) * maxnbors; } } if (*overflow == 1) for (int i = ifrom; i < ito; i++) numneigh[i] = 0; #ifdef _LMP_INTEL_OFFLOAD if (separate_buffers) { #if defined(_OPENMP) #pragma omp critical #endif { if (lmin < overflow[LMP_LOCAL_MIN]) overflow[LMP_LOCAL_MIN] = lmin; if (lmax > overflow[LMP_LOCAL_MAX]) overflow[LMP_LOCAL_MAX] = lmax; if (gmin < overflow[LMP_GHOST_MIN]) overflow[LMP_GHOST_MIN] = gmin; if (gmax > overflow[LMP_GHOST_MAX]) overflow[LMP_GHOST_MAX] = gmax; } #pragma omp barrier } int ghost_offset = 0, nall_offset = e_nall; if (separate_buffers) { int nghost = overflow[LMP_GHOST_MAX] + 1 - overflow[LMP_GHOST_MIN]; if (nghost < 0) nghost = 0; if (offload) { ghost_offset = overflow[LMP_GHOST_MIN] - overflow[LMP_LOCAL_MAX] - 1; nall_offset = overflow[LMP_LOCAL_MAX] + 1 + nghost; } else { ghost_offset = overflow[LMP_GHOST_MIN] - nlocal; nall_offset = nlocal + nghost; } } #endif if (molecular) { for (int i = ifrom; i < ito; ++i) { int * _noalias jlist = firstneigh + cnumneigh[i]; const int jnum = numneigh[i]; for (int jj = 0; jj < jnum; jj++) { const int j = jlist[jj]; - ofind_special(which, special, nspecial, i, tag[j], special_flag); + if (need_ic && j < 0) { + which = 0; + jlist[jj] = -j - 1; + } else + ofind_special(which, special, nspecial, i, tag[j], special_flag); #ifdef _LMP_INTEL_OFFLOAD if (j >= nlocal) { if (j == e_nall) jlist[jj] = nall_offset; else if (which > 0) jlist[jj] = (j-ghost_offset) ^ (which << SBBITS); else jlist[jj]-=ghost_offset; } else #endif if (which > 0) jlist[jj] = j ^ (which << SBBITS); } } } #ifdef _LMP_INTEL_OFFLOAD else if (separate_buffers) { for (int i = ifrom; i < ito; ++i) { int * _noalias jlist = firstneigh + cnumneigh[i]; const int jnum = numneigh[i]; int jj = 0; for (jj = 0; jj < jnum; jj++) if (jlist[jj] >= nlocal) break; while (jj < jnum) { if (jlist[jj] == e_nall) jlist[jj] = nall_offset; else jlist[jj] -= ghost_offset; jj++; } } } #endif } // end omp #ifdef __MIC__ *timer_compute = MIC_Wtime() - *timer_compute; #endif } // end offload if (offload) { fix->stop_watch(TIME_OFFLOAD_LATENCY); #ifdef _LMP_INTEL_OFFLOAD for (int n = 0; n < aend; n++) { ilist[n] = n; numneigh[n] = 0; } #endif } else { for (int i = astart; i < aend; i++) list->firstneigh[i] = firstneigh + cnumneigh[i]; fix->stop_watch(TIME_HOST_NEIGHBOR); #ifdef _LMP_INTEL_OFFLOAD if (separate_buffers) { fix->start_watch(TIME_PACK); fix->set_neighbor_host_sizes(); buffers->pack_sep_from_single(fix->host_min_local(), fix->host_used_local(), fix->host_min_ghost(), fix->host_used_ghost()); fix->stop_watch(TIME_PACK); } #endif } } /* ---------------------------------------------------------------------- binned neighbor list construction for all neighbors every neighbor pair appears in list of both atoms i and j ------------------------------------------------------------------------- */ void Neighbor::full_bin_intel(NeighList *list) { const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal; list->inum = nlocal; list->gnum = 0; // Get fix for intel stuff FixIntel *fix = static_cast(fix_intel); const int off_end = fix->offload_end_neighbor(); int host_start = fix->host_start_neighbor();; int offload_noghost = 0; #ifdef _LMP_INTEL_OFFLOAD if (fix->full_host_list()) host_start = 0; offload_noghost = fix->offload_noghost(); if (exclude) error->all(FLERR, "Exclusion lists not yet supported for Intel offload"); #endif - if (fix->precision() == FixIntel::PREC_MODE_MIXED) { - if (offload_noghost) { - fbi(1, list, fix->get_mixed_buffers(), - 0, off_end, fix); - fbi(0, list, fix->get_mixed_buffers(), - host_start, nlocal, fix, off_end); - } else { - fbi(1, list, fix->get_mixed_buffers(), - 0, off_end, fix); - fbi(0, list, fix->get_mixed_buffers(), - host_start, nlocal, fix); - } - } else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) { - if (offload_noghost) { - fbi(1, list, fix->get_double_buffers(), - 0, off_end, fix); - fbi(0, list, fix->get_double_buffers(), - host_start, nlocal, fix, off_end); + int need_ic = 0; + if (atom->molecular) + dminimum_image_check(need_ic, cutneighmax, cutneighmax, cutneighmax); + + if (need_ic) { + if (fix->precision() == FixIntel::PREC_MODE_MIXED) { + #ifdef _LMP_INTEL_OFFLOAD + if (offload_noghost) { + fbi(1, list, fix->get_mixed_buffers(), + 0, off_end, fix); + fbi(0, list, fix->get_mixed_buffers(), + host_start, nlocal, fix, off_end); + } else + #endif + { + fbi(1, list, fix->get_mixed_buffers(), + 0, off_end, fix); + fbi(0, list, fix->get_mixed_buffers(), + host_start, nlocal, fix); + } + } else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) { + #ifdef _LMP_INTEL_OFFLOAD + if (offload_noghost) { + fbi(1, list, fix->get_double_buffers(), + 0, off_end, fix); + fbi(0, list, fix->get_double_buffers(), + host_start, nlocal, fix, off_end); + } else + #endif + { + fbi(1, list, fix->get_double_buffers(), + 0, off_end, fix); + fbi(0, list, fix->get_double_buffers(), + host_start, nlocal, fix); + } } else { - fbi(1, list, fix->get_double_buffers(), - 0, off_end, fix); - fbi(0, list, fix->get_double_buffers(), - host_start, nlocal, fix); + #ifdef _LMP_INTEL_OFFLOAD + if (offload_noghost) { + fbi(1, list, fix->get_single_buffers(), 0, off_end, + fix); + fbi(0, list, fix->get_single_buffers(), + host_start, nlocal, fix, off_end); + } else + #endif + { + fbi(1, list, fix->get_single_buffers(), 0, off_end, + fix); + fbi(0, list, fix->get_single_buffers(), + host_start, nlocal, fix); + } } } else { - if (offload_noghost) { - fbi(1, list, fix->get_single_buffers(), 0, off_end, fix); - fbi(0, list, fix->get_single_buffers(), - host_start, nlocal, fix, off_end); + if (fix->precision() == FixIntel::PREC_MODE_MIXED) { + #ifdef _LMP_INTEL_OFFLOAD + if (offload_noghost) { + fbi(1, list, fix->get_mixed_buffers(), + 0, off_end, fix); + fbi(0, list, fix->get_mixed_buffers(), + host_start, nlocal, fix, off_end); + } else + #endif + { + fbi(1, list, fix->get_mixed_buffers(), + 0, off_end, fix); + fbi(0, list, fix->get_mixed_buffers(), + host_start, nlocal, fix); + } + } else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) { + #ifdef _LMP_INTEL_OFFLOAD + if (offload_noghost) { + fbi(1, list, fix->get_double_buffers(), + 0, off_end, fix); + fbi(0, list, fix->get_double_buffers(), + host_start, nlocal, fix, off_end); + } else + #endif + { + fbi(1, list, fix->get_double_buffers(), + 0, off_end, fix); + fbi(0, list, fix->get_double_buffers(), + host_start, nlocal, fix); + } } else { - fbi(1, list, fix->get_single_buffers(), 0, off_end, fix); - fbi(0, list, fix->get_single_buffers(), - host_start, nlocal, fix); + #ifdef _LMP_INTEL_OFFLOAD + if (offload_noghost) { + fbi(1, list, fix->get_single_buffers(), 0, off_end, + fix); + fbi(0, list, fix->get_single_buffers(), + host_start, nlocal, fix, off_end); + } else + #endif + { + fbi(1, list, fix->get_single_buffers(), 0, off_end, + fix); + fbi(0, list, fix->get_single_buffers(), + host_start, nlocal, fix); + } } } } -template +template void Neighbor::fbi(const int offload, NeighList *list, void *buffers_in, const int astart, const int aend, void *fix_in, const int offload_end) { IntelBuffers *buffers = (IntelBuffers *)buffers_in; FixIntel *fix = (FixIntel *)fix_in; const int nall = atom->nlocal + atom->nghost; int pad = 1; if (offload) { fix->start_watch(TIME_PACK); buffers->grow(nall, atom->nlocal, comm->nthreads, aend); buffers->grow_nbor(list, atom->nlocal, aend); ATOM_T biga; biga.x = INTEL_BIGP; biga.y = INTEL_BIGP; biga.z = INTEL_BIGP; biga.w = 1; buffers->get_x()[nall]=biga; const int nthreads = comm->nthreads; #if defined(_OPENMP) #pragma omp parallel default(none) shared(buffers) #endif { int ifrom, ito, tid; IP_PRE_omp_range_id_align(ifrom, ito, tid, nall, nthreads, sizeof(ATOM_T)); buffers->thr_pack(ifrom, ito, 0); } fix->stop_watch(TIME_PACK); fix->start_watch(TIME_HOST_NEIGHBOR); bin_atoms(buffers->get_x(), buffers->get_atombin()); } else { fix->start_watch(TIME_HOST_NEIGHBOR); } const int pad_width = pad; if (aend-astart == 0) { fix->stop_watch(TIME_HOST_NEIGHBOR); return; } const ATOM_T * _noalias const x = buffers->get_x(); int * _noalias const firstneigh = buffers->firstneigh(list); int nall_t = nall; if (offload_noghost && offload) nall_t = atom->nlocal; const int e_nall = nall_t; const int molecular = atom->molecular; int *ns = NULL; tagint *s = NULL; int tag_size = 0, special_size; if (buffers->need_tag()) tag_size = e_nall; if (molecular) { s = atom->special[0]; ns = atom->nspecial[0]; special_size = aend; } else { s = &buffers->_special_holder; ns = &buffers->_nspecial_holder; special_size = 0; } const tagint * _noalias const special = s; const int * _noalias const nspecial = ns; const int maxspecial = atom->maxspecial; const tagint * _noalias const tag = atom->tag; int * _noalias const ilist = list->ilist; int * _noalias numneigh = list->numneigh; int * _noalias const cnumneigh = buffers->cnumneigh(list); const int nstencil = list->nstencil; const int * _noalias const stencil = list->stencil; const flt_t * _noalias const cutneighsq = buffers->get_cutneighsq()[0]; const int ntypes = atom->ntypes + 1; const int nlocal = atom->nlocal; #ifndef _LMP_INTEL_OFFLOAD int * const mask = atom->mask; tagint * const molecule = atom->molecule; #endif int tnum; int *overflow; double *timer_compute; if (offload) { timer_compute = fix->off_watch_neighbor(); tnum = buffers->get_off_threads(); overflow = fix->get_off_overflow_flag(); fix->stop_watch(TIME_HOST_NEIGHBOR); fix->start_watch(TIME_OFFLOAD_LATENCY); } else { tnum = comm->nthreads; overflow = fix->get_overflow_flag(); } const int nthreads = tnum; const int maxnbors = buffers->get_max_nbors(); int * _noalias const atombin = buffers->get_atombin(); + const int xperiodic = domain->xperiodic; + const int yperiodic = domain->yperiodic; + const int zperiodic = domain->zperiodic; + const flt_t xprd_half = domain->xprd_half; + const flt_t yprd_half = domain->yprd_half; + const flt_t zprd_half = domain->zprd_half; + // Make sure dummy coordinates to eliminate loop remainder not within cutoff { const flt_t dx = (INTEL_BIGP - bboxhi[0]); const flt_t dy = (INTEL_BIGP - bboxhi[1]); const flt_t dz = (INTEL_BIGP - bboxhi[2]); if (dx * dx + dy * dy + dz * dz < static_cast(cutneighmaxsq)) error->one(FLERR, "Intel package expects no atoms within cutoff of {1e15,1e15,1e15}."); } #ifdef _LMP_INTEL_OFFLOAD const int * _noalias const binhead = this->binhead; const int * _noalias const special_flag = this->special_flag; const int * _noalias const bins = this->bins; const int cop = fix->coprocessor_number(); const int separate_buffers = fix->separate_buffers(); #pragma offload target(mic:cop) if(offload) \ in(x:length(e_nall+1) alloc_if(0) free_if(0)) \ in(tag:length(tag_size) alloc_if(0) free_if(0)) \ in(special:length(special_size*maxspecial) alloc_if(0) free_if(0)) \ in(nspecial:length(special_size*3) alloc_if(0) free_if(0)) \ in(bins:length(nall) alloc_if(0) free_if(0)) \ in(binhead:length(mbins) alloc_if(0) free_if(0)) \ in(cutneighsq:length(0) alloc_if(0) free_if(0)) \ in(firstneigh:length(0) alloc_if(0) free_if(0)) \ in(cnumneigh:length(0) alloc_if(0) free_if(0)) \ out(numneigh:length(0) alloc_if(0) free_if(0)) \ in(ilist:length(0) alloc_if(0) free_if(0)) \ in(atombin:length(aend) alloc_if(0) free_if(0)) \ in(stencil:length(nstencil) alloc_if(0) free_if(0)) \ in(special_flag:length(0) alloc_if(0) free_if(0)) \ in(maxnbors,nthreads,maxspecial,nstencil,e_nall,offload) \ in(offload_end,separate_buffers,astart, aend, nlocal, molecular, ntypes) \ + in(xperiodic, yperiodic, zperiodic, xprd_half, yprd_half, zprd_half) \ out(overflow:length(5) alloc_if(0) free_if(0)) \ out(timer_compute:length(1) alloc_if(0) free_if(0)) \ signal(tag) #endif { #ifdef __MIC__ *timer_compute = MIC_Wtime(); #endif #ifdef _LMP_INTEL_OFFLOAD overflow[LMP_LOCAL_MIN] = astart; overflow[LMP_LOCAL_MAX] = aend - 1; overflow[LMP_GHOST_MIN] = e_nall; overflow[LMP_GHOST_MAX] = -1; #endif #if defined(_OPENMP) #pragma omp parallel default(none) shared(numneigh, overflow) #endif { #ifdef _LMP_INTEL_OFFLOAD int lmin = e_nall, lmax = -1, gmin = e_nall, gmax = -1; #endif const int num = aend - astart; int tid, ifrom, ito; IP_PRE_omp_range_id(ifrom, ito, tid, num, nthreads); ifrom += astart; ito += astart; int which; const int list_size = (ito + tid + 1) * maxnbors; int ct = (ifrom + tid) * maxnbors; int *neighptr = firstneigh + ct; for (int i = ifrom; i < ito; i++) { int j, k, n, n2, itype, jtype, ibin; double xtmp, ytmp, ztmp, delx, dely, delz, rsq; n = 0; n2 = maxnbors; xtmp = x[i].x; ytmp = x[i].y; ztmp = x[i].z; itype = x[i].w; const tagint itag = tag[i]; const int ioffset = ntypes * itype; // loop over all atoms in surrounding bins in stencil including self // skip i = j ibin = atombin[i]; for (k = 0; k < nstencil; k++) { for (j = binhead[ibin + stencil[k]]; j >= 0; j = bins[j]) { if (i == j) continue; if (offload_noghost) { if (j < nlocal) { if (i < offload_end) continue; } else if (offload) continue; } jtype = x[j].w; #ifndef _LMP_INTEL_OFFLOAD if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; #endif delx = xtmp - x[j].x; dely = ytmp - x[j].y; delz = ztmp - x[j].z; rsq = delx * delx + dely * dely + delz * delz; if (rsq <= cutneighsq[ioffset + jtype]) { const int jtag = tag[j]; int flist = 0; if (itag > jtag) { if ((itag+jtag) % 2 == 0) flist = 1; } else if (itag < jtag) { if ((itag+jtag) % 2 == 1) flist = 1; } else { if (x[j].z < ztmp) flist = 1; else if (x[j].z == ztmp && x[j].y < ytmp) flist = 1; else if (x[j].z == ztmp && x[j].y == ytmp && x[j].x < xtmp) flist = 1; } - if (flist) - neighptr[n2++] = j; - else - neighptr[n++] = j; + if (flist) { + if (need_ic) { + int no_special; + ominimum_image_check(no_special, delx, dely, delz); + if (no_special) + neighptr[n2++] = -j - 1; + else + neighptr[n2++] = j; + } else + neighptr[n2++] = j; + } else { + if (need_ic) { + int no_special; + ominimum_image_check(no_special, delx, dely, delz); + if (no_special) + neighptr[n++] = -j - 1; + else + neighptr[n++] = j; + } else + neighptr[n++] = j; + } #ifdef _LMP_INTEL_OFFLOAD if (j < nlocal) { if (j < lmin) lmin = j; if (j > lmax) lmax = j; } else { if (j < gmin) gmin = j; if (j > gmax) gmax = j; } #endif } } } ilist[i] = i; cnumneigh[i] = ct; if (n > maxnbors) *overflow = 1; atombin[i] = n; for (k = maxnbors; k < n2; k++) neighptr[n++] = neighptr[k]; numneigh[i] = n; while((n % (INTEL_DATA_ALIGN / sizeof(int))) != 0) n++; ct += n; neighptr += n; if (ct + n + maxnbors > list_size) { *overflow = 1; ct = (ifrom + tid) * maxnbors; } } if (*overflow == 1) for (int i = ifrom; i < ito; i++) numneigh[i] = 0; #ifdef _LMP_INTEL_OFFLOAD if (separate_buffers) { #if defined(_OPENMP) #pragma omp critical #endif { if (lmin < overflow[LMP_LOCAL_MIN]) overflow[LMP_LOCAL_MIN] = lmin; if (lmax > overflow[LMP_LOCAL_MAX]) overflow[LMP_LOCAL_MAX] = lmax; if (gmin < overflow[LMP_GHOST_MIN]) overflow[LMP_GHOST_MIN] = gmin; if (gmax > overflow[LMP_GHOST_MAX]) overflow[LMP_GHOST_MAX] = gmax; } #pragma omp barrier } int ghost_offset = 0, nall_offset = e_nall; if (separate_buffers) { int nghost = overflow[LMP_GHOST_MAX] + 1 - overflow[LMP_GHOST_MIN]; if (nghost < 0) nghost = 0; if (offload) { ghost_offset = overflow[LMP_GHOST_MIN] - overflow[LMP_LOCAL_MAX] - 1; nall_offset = overflow[LMP_LOCAL_MAX] + 1 + nghost; } else { ghost_offset = overflow[LMP_GHOST_MIN] - nlocal; nall_offset = nlocal + nghost; } } #endif if (molecular) { for (int i = ifrom; i < ito; ++i) { int * _noalias jlist = firstneigh + cnumneigh[i]; const int jnum = numneigh[i]; for (int jj = 0; jj < jnum; jj++) { const int j = jlist[jj]; - ofind_special(which, special, nspecial, i, tag[j], - special_flag); + if (need_ic && j < 0) { + which = 0; + jlist[jj] = -j - 1; + } else + ofind_special(which, special, nspecial, i, tag[j], + special_flag); #ifdef _LMP_INTEL_OFFLOAD if (j >= nlocal) { if (j == e_nall) jlist[jj] = nall_offset; else if (which > 0) jlist[jj] = (j-ghost_offset) ^ (which << SBBITS); else jlist[jj]-=ghost_offset; } else #endif if (which > 0) jlist[jj] = j ^ (which << SBBITS); } } } #ifdef _LMP_INTEL_OFFLOAD else if (separate_buffers) { for (int i = ifrom; i < ito; ++i) { int * _noalias jlist = firstneigh + cnumneigh[i]; const int jnum = numneigh[i]; int jj = 0; for (jj = 0; jj < jnum; jj++) { if (jlist[jj] >= nlocal) { if (jlist[jj] == e_nall) jlist[jj] = nall_offset; else jlist[jj] -= ghost_offset; } } } } #endif } // end omp #ifdef __MIC__ *timer_compute = MIC_Wtime() - *timer_compute; #endif } // end offload if (offload) { fix->stop_watch(TIME_OFFLOAD_LATENCY); #ifdef _LMP_INTEL_OFFLOAD for (int n = 0; n < aend; n++) { ilist[n] = n; numneigh[n] = 0; } #endif } else { for (int i = astart; i < aend; i++) list->firstneigh[i] = firstneigh + cnumneigh[i]; fix->stop_watch(TIME_HOST_NEIGHBOR); #ifdef _LMP_INTEL_OFFLOAD if (separate_buffers) { fix->start_watch(TIME_PACK); fix->set_neighbor_host_sizes(); buffers->pack_sep_from_single(fix->host_min_local(), fix->host_used_local(), fix->host_min_ghost(), fix->host_used_ghost()); fix->stop_watch(TIME_PACK); } #endif } } diff --git a/src/USER-INTEL/pair_gayberne_intel.cpp b/src/USER-INTEL/pair_gayberne_intel.cpp index ab8c652d1..5eb76d891 100644 --- a/src/USER-INTEL/pair_gayberne_intel.cpp +++ b/src/USER-INTEL/pair_gayberne_intel.cpp @@ -1,1083 +1,1083 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing author: W. Michael Brown (Intel) ------------------------------------------------------------------------- */ #include "math.h" #include "pair_gayberne_intel.h" #include "math_extra_intel.h" #include "atom.h" #include "comm.h" #include "atom_vec_ellipsoid.h" #include "force.h" #include "memory.h" #include "modify.h" #include "neighbor.h" #include "neigh_list.h" #include "neigh_request.h" #include "suffix.h" using namespace LAMMPS_NS; #define FC_PACKED1_T typename ForceConst::fc_packed1 #define FC_PACKED2_T typename ForceConst::fc_packed2 #define FC_PACKED3_T typename ForceConst::fc_packed3 /* ---------------------------------------------------------------------- */ PairGayBerneIntel::PairGayBerneIntel(LAMMPS *lmp) : PairGayBerne(lmp) { suffix_flag |= Suffix::INTEL; respa_enable = 0; } /* ---------------------------------------------------------------------- */ void PairGayBerneIntel::compute(int eflag, int vflag) { if (fix->precision()==FixIntel::PREC_MODE_MIXED) compute(eflag, vflag, fix->get_mixed_buffers(), force_const_single); else if (fix->precision()==FixIntel::PREC_MODE_DOUBLE) compute(eflag, vflag, fix->get_double_buffers(), force_const_double); else compute(eflag, vflag, fix->get_single_buffers(), force_const_single); fix->balance_stamp(); vflag_fdotr = 0; } template void PairGayBerneIntel::compute(int eflag, int vflag, IntelBuffers *buffers, const ForceConst &fc) { if (eflag || vflag) { ev_setup(eflag, vflag); } else evflag = vflag_fdotr = 0; const int inum = list->inum; const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; const int host_start = fix->host_start_pair(); const int offload_end = fix->offload_end_pair(); const int ago = neighbor->ago; if (fix->separate_buffers() == 0) { fix->start_watch(TIME_PACK); const AtomVecEllipsoid::Bonus * const bonus = avec->bonus; const int * const ellipsoid = atom->ellipsoid; QUAT_T * _noalias const quat = buffers->get_quat(); #if defined(_OPENMP) #pragma omp parallel default(none) shared(eflag,vflag,buffers,fc) #endif { int ifrom, ito, tid; IP_PRE_omp_range_id_align(ifrom, ito, tid, nall, nthreads, sizeof(ATOM_T)); if (ago != 0) buffers->thr_pack(ifrom,ito,ago); for (int i = ifrom; i < ito; i++) { int qi = ellipsoid[i]; if (qi > -1) { quat[i].w = bonus[qi].quat[0]; quat[i].i = bonus[qi].quat[1]; quat[i].j = bonus[qi].quat[2]; quat[i].k = bonus[qi].quat[3]; } } } quat[nall].w = (flt_t)1.0; quat[nall].i = (flt_t)0.0; quat[nall].j = (flt_t)0.0; quat[nall].k = (flt_t)0.0; fix->stop_watch(TIME_PACK); } if (evflag || vflag_fdotr) { int ovflag = 0; if (vflag_fdotr) ovflag = 2; else if (vflag) ovflag = 1; if (eflag) { if (force->newton_pair) { eval<1,1,1>(1, ovflag, buffers, fc, 0, offload_end); eval<1,1,1>(0, ovflag, buffers, fc, host_start, inum); } else { eval<1,1,0>(1, ovflag, buffers, fc, 0, offload_end); eval<1,1,0>(0, ovflag, buffers, fc, host_start, inum); } } else { if (force->newton_pair) { eval<1,0,1>(1, ovflag, buffers, fc, 0, offload_end); eval<1,0,1>(0, ovflag, buffers, fc, host_start, inum); } else { eval<1,0,0>(1, ovflag, buffers, fc, 0, offload_end); eval<1,0,0>(0, ovflag, buffers, fc, host_start, inum); } } } else { if (force->newton_pair) { eval<0,0,1>(1, 0, buffers, fc, 0, offload_end); eval<0,0,1>(0, 0, buffers, fc, host_start, inum); } else { eval<0,0,0>(1, 0, buffers, fc, 0, offload_end); eval<0,0,0>(0, 0, buffers, fc, host_start, inum); } } } template void PairGayBerneIntel::eval(const int offload, const int vflag, IntelBuffers *buffers, const ForceConst &fc, const int astart, const int aend) { const int inum = aend - astart; if (inum == 0) return; int nlocal, nall, minlocal; fix->get_buffern(offload, nlocal, nall, minlocal); const int ago = neighbor->ago; ATOM_T * _noalias const x = buffers->get_x(offload); QUAT_T * _noalias const quat = buffers->get_quat(offload); const AtomVecEllipsoid::Bonus *bonus = avec->bonus; const int *ellipsoid = atom->ellipsoid; #ifdef _LMP_INTEL_OFFLOAD if (fix->separate_buffers()) { fix->start_watch(TIME_PACK); if (offload) { #pragma omp parallel default(none) \ shared(buffers,nlocal,nall,bonus,ellipsoid) { int ifrom, ito, tid; int nthreads = comm->nthreads; IP_PRE_omp_range_id_align(ifrom, ito, tid, nlocal, nthreads, sizeof(ATOM_T)); if (ago != 0) buffers->thr_pack_cop(ifrom, ito, 0); for (int i = ifrom; i < ito; i++) { int qi = ellipsoid[i]; if (qi > -1) { quat[i].w = bonus[qi].quat[0]; quat[i].i = bonus[qi].quat[1]; quat[i].j = bonus[qi].quat[2]; quat[i].k = bonus[qi].quat[3]; } } int nghost = nall - nlocal; if (nghost) { IP_PRE_omp_range_align(ifrom, ito, tid, nall - nlocal, nthreads, sizeof(ATOM_T)); int offset = 0; ifrom += nlocal; ito += nlocal; if (ago != 0) { offset = fix->offload_min_ghost() - nlocal; buffers->thr_pack_cop(ifrom, ito, offset, ago == 1); } for (int i = ifrom; i < ito; i++) { int qi = ellipsoid[i + offset]; if (qi > -1) { quat[i].w = bonus[qi].quat[0]; quat[i].i = bonus[qi].quat[1]; quat[i].j = bonus[qi].quat[2]; quat[i].k = bonus[qi].quat[3]; } } } } } else { if (ago != 0) buffers->thr_pack_host(fix->host_min_local(), nlocal, 0); for (int i = fix->host_min_local(); i < nlocal; i++) { int qi = ellipsoid[i]; if (qi > -1) { quat[i].w = bonus[qi].quat[0]; quat[i].i = bonus[qi].quat[1]; quat[i].j = bonus[qi].quat[2]; quat[i].k = bonus[qi].quat[3]; } } int offset = fix->host_min_ghost() - nlocal; if (ago != 0) buffers->thr_pack_host(nlocal, nall, offset); for (int i = nlocal; i < nall; i++) { int qi = ellipsoid[i + offset]; if (qi > -1) { quat[i].w = bonus[qi].quat[0]; quat[i].i = bonus[qi].quat[1]; quat[i].j = bonus[qi].quat[2]; quat[i].k = bonus[qi].quat[3]; } } } fix->stop_watch(TIME_PACK); } #endif // const int * _noalias const ilist = list->ilist; const int * _noalias const numneigh = list->numneigh; const int * _noalias const cnumneigh = buffers->cnumneigh(list); const int * _noalias const firstneigh = buffers->firstneigh(list); const flt_t * _noalias const special_lj = fc.special_lj; const FC_PACKED1_T * _noalias const ijc = fc.ijc[0]; const FC_PACKED2_T * _noalias const lj34 = fc.lj34[0]; const FC_PACKED3_T * _noalias const ic = fc.ic; const flt_t mu = fc.mu; const flt_t gamma = fc.gamma; const flt_t upsilon = fc.upsilon; flt_t * const rsq_formi = fc.rsq_form[0]; flt_t * const delx_formi = fc.delx_form[0]; flt_t * const dely_formi = fc.dely_form[0]; flt_t * const delz_formi = fc.delz_form[0]; int * const jtype_formi = fc.jtype_form[0]; int * const jlist_formi = fc.jlist_form[0]; const int ntypes = atom->ntypes + 1; const int eatom = this->eflag_atom; // Determine how much data to transfer int x_size, q_size, f_stride, ev_size, separate_flag; IP_PRE_get_transfern(ago, NEWTON_PAIR, EVFLAG, EFLAG, vflag, buffers, offload, fix, separate_flag, x_size, q_size, ev_size, f_stride); int tc; FORCE_T * _noalias f_start; acc_t * _noalias ev_global; IP_PRE_get_buffers(offload, buffers, fix, tc, f_start, ev_global); const int max_nbors = _max_nbors; const int nthreads = tc; int pad = 1; if (offload) { if (INTEL_MIC_NBOR_PAD > 1) pad = INTEL_MIC_NBOR_PAD * sizeof(float) / sizeof(flt_t); } else { if (INTEL_NBOR_PAD > 1) pad = INTEL_NBOR_PAD * sizeof(float) / sizeof(flt_t); } const int pad_width = pad; #ifdef _LMP_INTEL_OFFLOAD int *overflow = fix->get_off_overflow_flag(); double *timer_compute = fix->off_watch_pair(); if (offload) fix->start_watch(TIME_OFFLOAD_LATENCY); #pragma offload target(mic:_cop) if(offload) \ in(special_lj:length(0) alloc_if(0) free_if(0)) \ in(ijc,lj34,ic:length(0) alloc_if(0) free_if(0)) \ in(rsq_formi, delx_formi, dely_formi: length(0) alloc_if(0) free_if(0)) \ in(delz_formi, jtype_formi, jlist_formi: length(0) alloc_if(0) free_if(0))\ in(firstneigh:length(0) alloc_if(0) free_if(0)) \ in(cnumneigh:length(0) alloc_if(0) free_if(0)) \ in(numneigh:length(0) alloc_if(0) free_if(0)) \ in(x:length(x_size) alloc_if(0) free_if(0)) \ in(quat:length(nall+1) alloc_if(0) free_if(0)) \ in(overflow:length(0) alloc_if(0) free_if(0)) \ in(nthreads,inum,nall,ntypes,vflag,eatom,minlocal,separate_flag) \ in(astart,nlocal,f_stride,max_nbors,mu,gamma,upsilon,offload,pad_width) \ out(f_start:length(f_stride) alloc_if(0) free_if(0)) \ out(ev_global:length(ev_size) alloc_if(0) free_if(0)) \ out(timer_compute:length(1) alloc_if(0) free_if(0)) \ signal(f_start) #endif { #ifdef __MIC__ *timer_compute=MIC_Wtime(); #endif #ifdef _LMP_INTEL_OFFLOAD if (separate_flag) { if (separate_flag < 3) { int all_local = nlocal; int ghost_min = overflow[LMP_GHOST_MIN]; nlocal = overflow[LMP_LOCAL_MAX] + 1; int nghost = overflow[LMP_GHOST_MAX] + 1 - ghost_min; if (nghost < 0) nghost = 0; nall = nlocal + nghost; separate_flag--; int flength; if (NEWTON_PAIR) flength = nall; else flength = nlocal; IP_PRE_get_stride(f_stride, flength, sizeof(FORCE_T), separate_flag); if (nghost) { if (nlocal < all_local || ghost_min > all_local) { memmove(x + nlocal, x + ghost_min, (nall - nlocal) * sizeof(ATOM_T)); memmove(quat + nlocal, quat + ghost_min, (nall - nlocal) * sizeof(QUAT_T)); } } } x[nall].x = (flt_t)INTEL_BIGP; x[nall].y = (flt_t)INTEL_BIGP; x[nall].z = (flt_t)INTEL_BIGP; quat[nall].w = (flt_t)1.0; quat[nall].i = (flt_t)0.0; quat[nall].j = (flt_t)0.0; quat[nall].k = (flt_t)0.0; } #endif acc_t oevdwl, ov0, ov1, ov2, ov3, ov4, ov5; if (EVFLAG) { oevdwl = (acc_t)0; if (vflag) ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0; } // loop over neighbors of my atoms #if defined(_OPENMP) #pragma omp parallel default(none) \ shared(f_start,f_stride,nlocal,nall,minlocal) \ reduction(+:oevdwl,ov0,ov1,ov2,ov3,ov4,ov5) #endif { int iifrom, iito, tid; IP_PRE_omp_range_id(iifrom, iito, tid, inum, nthreads); iifrom += astart; iito += astart; FORCE_T * _noalias const f = f_start - minlocal * 2 + (tid * f_stride); memset(f + minlocal * 2, 0, f_stride * sizeof(FORCE_T)); flt_t * _noalias const rsq_form = rsq_formi + tid * max_nbors; flt_t * _noalias const delx_form = delx_formi + tid * max_nbors; flt_t * _noalias const dely_form = dely_formi + tid * max_nbors; flt_t * _noalias const delz_form = delz_formi + tid * max_nbors; int * _noalias const jtype_form = jtype_formi + tid * max_nbors; int * _noalias const jlist_form = jlist_formi + tid * max_nbors; int ierror = 0; for (int i = iifrom; i < iito; ++i) { // const int i = ilist[ii]; const int itype = x[i].w; const int ptr_off = itype * ntypes; const FC_PACKED1_T * _noalias const ijci = ijc + ptr_off; const FC_PACKED2_T * _noalias const lj34i = lj34 + ptr_off; const int * _noalias const jlist = firstneigh + cnumneigh[i]; const int jnum = numneigh[i]; const flt_t xtmp = x[i].x; const flt_t ytmp = x[i].y; const flt_t ztmp = x[i].z; flt_t a1_0, a1_1, a1_2, a1_3, a1_4, a1_5, a1_6, a1_7, a1_8; flt_t b1_0, b1_1, b1_2, b1_3, b1_4, b1_5, b1_6, b1_7, b1_8; flt_t g1_0, g1_1, g1_2, g1_3, g1_4, g1_5, g1_6, g1_7, g1_8; if (ijci[itype].form == ELLIPSE_ELLIPSE) { flt_t temp_0,temp_1,temp_2,temp_3,temp_4,temp_5,temp_6,temp_7,temp_8; ME_quat_to_mat_trans(quat[i],a1); ME_diag_times3(ic[itype].well,a1,temp); ME_transpose_times3(a1,temp,b1); ME_diag_times3(ic[itype].shape2,a1,temp); ME_transpose_times3(a1,temp,g1); } acc_t fxtmp, fytmp, fztmp, fwtmp, t1tmp, t2tmp, t3tmp; acc_t sevdwl, sv0, sv1, sv2, sv3, sv4, sv5; fxtmp = fytmp = fztmp = t1tmp = t2tmp = t3tmp = (acc_t)0.0; if (EVFLAG) { if (EFLAG) fwtmp = sevdwl = (acc_t)0; if (vflag==1) sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0; } bool multiple_forms = false; int packed_j = 0; for (int jj = 0; jj < jnum; jj++) { int jm = jlist[jj]; int j = jm & NEIGHMASK; const int jtype = x[j].w; if (ijci[jtype].form == ELLIPSE_ELLIPSE) { flt_t delx = x[j].x-xtmp; flt_t dely = x[j].y-ytmp; flt_t delz = x[j].z-ztmp; flt_t rsq = delx * delx + dely * dely + delz * delz; if (rsq < ijci[jtype].cutsq) { rsq_form[packed_j] = rsq; delx_form[packed_j] = delx; dely_form[packed_j] = dely; delz_form[packed_j] = delz; jtype_form[packed_j] = jtype; jlist_form[packed_j] = jm; packed_j++; } } else multiple_forms = true; } while( (packed_j % pad_width) != 0 ) jlist_form[packed_j++] = nall; // ------------------------------------------------------------- - #ifdef __MIC__ + #ifdef INTEL_V512 __assume(packed_j % INTEL_VECTOR_WIDTH == 0); __assume(packed_j % 8 == 0); __assume(packed_j % INTEL_MIC_VECTOR_WIDTH == 0); #endif - #if defined(__INTEL_COMPILER) + #if defined(LMP_SIMD_COMPILER) #pragma vector aligned #pragma simd reduction(+:fxtmp,fytmp,fztmp,fwtmp,t1tmp,t2tmp,t3tmp, \ sevdwl,sv0,sv1,sv2,sv3,sv4,sv5) #endif for (int jj = 0; jj < packed_j; jj++) { flt_t a2_0, a2_1, a2_2, a2_3, a2_4, a2_5, a2_6, a2_7, a2_8; flt_t b2_0, b2_1, b2_2, b2_3, b2_4, b2_5, b2_6, b2_7, b2_8; flt_t g2_0, g2_1, g2_2, g2_3, g2_4, g2_5, g2_6, g2_7, g2_8; flt_t temp_0,temp_1,temp_2,temp_3,temp_4,temp_5,temp_6,temp_7,temp_8; flt_t fforce_0, fforce_1, fforce_2, ttor_0, ttor_1, ttor_2; flt_t rtor_0, rtor_1, rtor_2; const int sbindex = jlist_form[jj] >> SBBITS & 3; const int j = jlist_form[jj] & NEIGHMASK; flt_t factor_lj = special_lj[sbindex]; const int jtype = jtype_form[jj]; const flt_t sigma = ijci[jtype].sigma; const flt_t epsilon = ijci[jtype].epsilon; const flt_t shape2_0 = ic[jtype].shape2[0]; const flt_t shape2_1 = ic[jtype].shape2[1]; const flt_t shape2_2 = ic[jtype].shape2[2]; flt_t one_eng, evdwl; ME_quat_to_mat_trans(quat[j], a2); ME_diag_times3(ic[jtype].well, a2, temp); ME_transpose_times3(a2, temp, b2); ME_diag_times3a(shape2, a2, temp); ME_transpose_times3(a2, temp, g2); flt_t tempv_0, tempv_1, tempv_2, tempv2_0, tempv2_1, tempv2_2; flt_t temp1, temp2, temp3; flt_t r12hat_0, r12hat_1, r12hat_2; ME_normalize3(delx_form[jj], dely_form[jj], delz_form[jj], r12hat); flt_t r = sqrt(rsq_form[jj]); // compute distance of closest approach flt_t g12_0, g12_1, g12_2, g12_3, g12_4, g12_5, g12_6, g12_7, g12_8; ME_plus3(g1, g2, g12); flt_t kappa_0, kappa_1, kappa_2; ME_mldivide3(g12, delx_form[jj], dely_form[jj], delz_form[jj], kappa, ierror); // tempv = G12^-1*r12hat flt_t inv_r = (flt_t)1.0 / r; tempv_0 = kappa_0 * inv_r; tempv_1 = kappa_1 * inv_r; tempv_2 = kappa_2 * inv_r; flt_t sigma12 = ME_dot3(r12hat, tempv); sigma12 = pow((flt_t)0.5 * sigma12,(flt_t) - 0.5); flt_t h12 = r - sigma12; // energy // compute u_r flt_t varrho = sigma / (h12 + gamma * sigma); flt_t varrho6 = pow(varrho, (flt_t)6.0); flt_t varrho12 = varrho6 * varrho6; flt_t u_r = (flt_t)4.0 * epsilon * (varrho12 - varrho6); // compute eta_12 flt_t eta = (flt_t)2.0 * ijci[jtype].lshape; flt_t det_g12 = ME_det3(g12); eta = pow(eta / det_g12, upsilon); // compute chi_12 flt_t b12_0, b12_1, b12_2, b12_3, b12_4, b12_5, b12_6, b12_7, b12_8; flt_t iota_0, iota_1, iota_2; ME_plus3(b1, b2, b12); ME_mldivide3(b12, delx_form[jj], dely_form[jj], delz_form[jj], iota, ierror); // tempv = G12^-1*r12hat tempv_0 = iota_0 * inv_r; tempv_1 = iota_1 * inv_r; tempv_2 = iota_2 * inv_r; flt_t chi = ME_dot3(r12hat, tempv); chi = pow(chi * (flt_t)2.0, mu); // force // compute dUr/dr temp1 = ((flt_t)2.0 * varrho12 * varrho - varrho6 * varrho) / sigma; temp1 = temp1 * (flt_t)24.0 * epsilon; flt_t u_slj = temp1 * pow(sigma12, (flt_t)3.0) * (flt_t)0.5; flt_t dUr_0, dUr_1, dUr_2; temp2 = ME_dot3(kappa, r12hat); flt_t uslj_rsq = u_slj / rsq_form[jj]; dUr_0 = temp1 * r12hat_0 + uslj_rsq * (kappa_0 - temp2 * r12hat_0); dUr_1 = temp1 * r12hat_1 + uslj_rsq * (kappa_1 - temp2 * r12hat_1); dUr_2 = temp1 * r12hat_2 + uslj_rsq * (kappa_2 - temp2 * r12hat_2); // compute dChi_12/dr flt_t dchi_0, dchi_1, dchi_2; temp1 = ME_dot3(iota, r12hat); temp2 = (flt_t)-4.0 / rsq_form[jj] * mu * pow(chi, (mu - (flt_t)1.0) / mu); dchi_0 = temp2 * (iota_0 - temp1 * r12hat_0); dchi_1 = temp2 * (iota_1 - temp1 * r12hat_1); dchi_2 = temp2 * (iota_2 - temp1 * r12hat_2); temp1 = -eta * u_r; temp2 = eta * chi; fforce_0 = temp1 * dchi_0 - temp2 * dUr_0; fforce_1 = temp1 * dchi_1 - temp2 * dUr_1; fforce_2 = temp1 * dchi_2 - temp2 * dUr_2; // torque for particle 1 and 2 // compute dUr tempv_0 = -uslj_rsq * kappa_0; tempv_1 = -uslj_rsq * kappa_1; tempv_2 = -uslj_rsq * kappa_2; ME_vecmat(kappa, g1, tempv2); ME_cross3(tempv, tempv2, dUr); flt_t dUr2_0, dUr2_1, dUr2_2; if (NEWTON_PAIR || j < nlocal) { ME_vecmat(kappa, g2, tempv2); ME_cross3(tempv, tempv2, dUr2); } // compute d_chi ME_vecmat(iota, b1, tempv); ME_cross3(tempv, iota, dchi); temp1 = (flt_t)-4.0 / rsq_form[jj]; dchi_0 *= temp1; dchi_1 *= temp1; dchi_2 *= temp1; flt_t dchi2_0, dchi2_1, dchi2_2; if (NEWTON_PAIR || j < nlocal) { ME_vecmat(iota, b2, tempv); ME_cross3(tempv, iota, dchi2); dchi2_0 *= temp1; dchi2_1 *= temp1; dchi2_2 *= temp1; } // compute d_eta flt_t deta_0, deta_1, deta_2; deta_0 = deta_1 = deta_2 = (flt_t)0.0; ME_compute_eta_torque(g12, a1, shape2, temp); temp1 = -eta * upsilon; tempv_0 = temp1 * temp_0; tempv_1 = temp1 * temp_1; tempv_2 = temp1 * temp_2; ME_mv0_cross3(a1, tempv, tempv2); deta_0 += tempv2_0; deta_1 += tempv2_1; deta_2 += tempv2_2; tempv_0 = temp1 * temp_3; tempv_1 = temp1 * temp_4; tempv_2 = temp1 * temp_5; ME_mv1_cross3(a1, tempv, tempv2); deta_0 += tempv2_0; deta_1 += tempv2_1; deta_2 += tempv2_2; tempv_0 = temp1 * temp_6; tempv_1 = temp1 * temp_7; tempv_2 = temp1 * temp_8; ME_mv2_cross3(a1, tempv, tempv2); deta_0 += tempv2_0; deta_1 += tempv2_1; deta_2 += tempv2_2; // compute d_eta for particle 2 flt_t deta2_0, deta2_1, deta2_2; if (NEWTON_PAIR || j < nlocal) { deta2_0 = deta2_1 = deta2_2 = (flt_t)0.0; ME_compute_eta_torque(g12, a2, shape2, temp); tempv_0 = temp1 * temp_0; tempv_1 = temp1 * temp_1; tempv_2 = temp1 * temp_2; ME_mv0_cross3(a2, tempv, tempv2); deta2_0 += tempv2_0; deta2_1 += tempv2_1; deta2_2 += tempv2_2; tempv_0 = temp1 * temp_3; tempv_1 = temp1 * temp_4; tempv_2 = temp1 * temp_5; ME_mv1_cross3(a2, tempv, tempv2); deta2_0 += tempv2_0; deta2_1 += tempv2_1; deta2_2 += tempv2_2; tempv_0 = temp1 * temp_6; tempv_1 = temp1 * temp_7; tempv_2 = temp1 * temp_8; ME_mv2_cross3(a2, tempv, tempv2); deta2_0 += tempv2_0; deta2_1 += tempv2_1; deta2_2 += tempv2_2; } // torque temp1 = u_r * eta; temp2 = u_r * chi; temp3 = chi * eta; ttor_0 = (temp1 * dchi_0 + temp2 * deta_0 + temp3 * dUr_0) * (flt_t)-1.0; ttor_1 = (temp1 * dchi_1 + temp2 * deta_1 + temp3 * dUr_1) * (flt_t)-1.0; ttor_2 = (temp1 * dchi_2 + temp2 * deta_2 + temp3 * dUr_2) * (flt_t)-1.0; if (NEWTON_PAIR || j < nlocal) { rtor_0 = (temp1 * dchi2_0 + temp2 * deta2_0 + temp3 * dUr2_0) * (flt_t)-1.0; rtor_1 = (temp1 * dchi2_1 + temp2 * deta2_1 + temp3 * dUr2_1) * (flt_t)-1.0; rtor_2 = (temp1 * dchi2_2 + temp2 * deta2_2 + temp3 * dUr2_2) * (flt_t)-1.0; } one_eng = temp1 * chi; - #ifndef __MIC__ + #ifndef INTEL_VMASK if (jlist_form[jj] == nall) { one_eng = (flt_t)0.0; fforce_0 = 0.0; fforce_1 = 0.0; fforce_2 = 0.0; ttor_0 = 0.0; ttor_1 = 0.0; ttor_2 = 0.0; rtor_0 = 0.0; rtor_1 = 0.0; rtor_2 = 0.0; } #endif fforce_0 *= factor_lj; fforce_1 *= factor_lj; fforce_2 *= factor_lj; ttor_0 *= factor_lj; ttor_1 *= factor_lj; ttor_2 *= factor_lj; - #ifdef __MIC__ + #ifdef INTEL_VMASK if (jlist_form[jj] < nall) { #endif fxtmp += fforce_0; fytmp += fforce_1; fztmp += fforce_2; t1tmp += ttor_0; t2tmp += ttor_1; t3tmp += ttor_2; if (NEWTON_PAIR || j < nlocal) { rtor_0 *= factor_lj; rtor_1 *= factor_lj; rtor_2 *= factor_lj; int jp = j * 2; f[jp].x -= fforce_0; f[jp].y -= fforce_1; f[jp].z -= fforce_2; jp++; f[jp].x += rtor_0; f[jp].y += rtor_1; f[jp].z += rtor_2; } if (EVFLAG) { flt_t ev_pre = (flt_t)0; if (NEWTON_PAIR || i < nlocal) ev_pre += (flt_t)0.5; if (NEWTON_PAIR || j < nlocal) ev_pre += (flt_t)0.5; if (EFLAG) { evdwl = factor_lj * one_eng; sevdwl += ev_pre * evdwl; if (eatom) { if (NEWTON_PAIR || i < nlocal) fwtmp += (flt_t)0.5 * evdwl; if (NEWTON_PAIR || j < nlocal) f[j*2].w += (flt_t)0.5 * evdwl; } } if (vflag == 1) { ev_pre *= (flt_t)-1.0; sv0 += ev_pre * delx_form[jj] * fforce_0; sv1 += ev_pre * dely_form[jj] * fforce_1; sv2 += ev_pre * delz_form[jj] * fforce_2; sv3 += ev_pre * delx_form[jj] * fforce_1; sv4 += ev_pre * delx_form[jj] * fforce_2; sv5 += ev_pre * dely_form[jj] * fforce_2; } } // EVFLAG - #ifdef __MIC__ + #ifdef INTEL_VMASK } #endif } // for jj // ------------------------------------------------------------- if (multiple_forms) ierror = 2; int ip = i * 2; f[ip].x += fxtmp; f[ip].y += fytmp; f[ip].z += fztmp; ip++; f[ip].x += t1tmp; f[ip].y += t2tmp; f[ip].z += t3tmp; if (EVFLAG) { if (EFLAG) { if (eatom) f[i * 2].w += fwtmp; oevdwl += sevdwl; } if (vflag == 1) { ov0 += sv0; ov1 += sv1; ov2 += sv2; ov3 += sv3; ov4 += sv4; ov5 += sv5; } } } // for i int o_range; if (NEWTON_PAIR) o_range = nall; else o_range = nlocal; if (offload == 0) o_range -= minlocal; IP_PRE_omp_range_align(iifrom, iito, tid, o_range, nthreads, sizeof(FORCE_T)); const int two_iito = iito * 2; #if defined(_OPENMP) #pragma omp barrier #endif acc_t *facc = &(f_start[0].x); const int sto = two_iito * 4; const int fst4 = f_stride * 4; #if defined(_OPENMP) #pragma omp barrier #endif int t_off = f_stride; if (EFLAG && eatom) { for (int t = 1; t < nthreads; t++) { - #if defined(__INTEL_COMPILER) + #if defined(LMP_SIMD_COMPILER) #pragma vector nontemporal #pragma novector #endif for (int n = iifrom * 2; n < two_iito; n++) { f_start[n].x += f_start[n + t_off].x; f_start[n].y += f_start[n + t_off].y; f_start[n].z += f_start[n + t_off].z; f_start[n].w += f_start[n + t_off].w; } t_off += f_stride; } } else { for (int t = 1; t < nthreads; t++) { - #if defined(__INTEL_COMPILER) + #if defined(LMP_SIMD_COMPILER) #pragma vector nontemporal #pragma novector #endif for (int n = iifrom * 2; n < two_iito; n++) { f_start[n].x += f_start[n + t_off].x; f_start[n].y += f_start[n + t_off].y; f_start[n].z += f_start[n + t_off].z; } t_off += f_stride; } } if (EVFLAG) { if (vflag==2) { const ATOM_T * _noalias const xo = x + minlocal; - #if defined(__INTEL_COMPILER) + #if defined(LMP_SIMD_COMPILER) #pragma vector nontemporal #pragma novector #endif for (int n = iifrom; n < iito; n++) { const int nt2 = n * 2; ov0 += f_start[nt2].x * xo[n].x; ov1 += f_start[nt2].y * xo[n].y; ov2 += f_start[nt2].z * xo[n].z; ov3 += f_start[nt2].y * xo[n].x; ov4 += f_start[nt2].z * xo[n].x; ov5 += f_start[nt2].z * xo[n].y; } } } if (ierror) f_start[1].w = ierror; } // omp if (EVFLAG) { if (EFLAG) { ev_global[0] = oevdwl; ev_global[1] = (acc_t)0.0; } if (vflag) { ev_global[2] = ov0; ev_global[3] = ov1; ev_global[4] = ov2; ev_global[5] = ov3; ev_global[6] = ov4; ev_global[7] = ov5; } } #ifdef __MIC__ *timer_compute = MIC_Wtime() - *timer_compute; #endif } // offload if (offload) fix->stop_watch(TIME_OFFLOAD_LATENCY); else fix->stop_watch(TIME_HOST_PAIR); if (EVFLAG) fix->add_result_array(f_start, ev_global, offload,eatom); else fix->add_result_array(f_start, 0, offload); } /* ---------------------------------------------------------------------- */ void PairGayBerneIntel::init_style() { PairGayBerne::init_style(); neighbor->requests[neighbor->nrequest-1]->intel = 1; int ifix = modify->find_fix("package_intel"); if (ifix < 0) error->all(FLERR, "The 'package intel' command is required for /intel styles"); fix = static_cast(modify->fix[ifix]); fix->pair_init_check(); #ifdef _LMP_INTEL_OFFLOAD if (force->newton_pair) fix->set_offload_noghost(1); _cop = fix->coprocessor_number(); #endif if (fix->precision() == FixIntel::PREC_MODE_MIXED) pack_force_const(force_const_single, fix->get_mixed_buffers()); else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) pack_force_const(force_const_double, fix->get_double_buffers()); else pack_force_const(force_const_single, fix->get_single_buffers()); } /* ---------------------------------------------------------------------- */ template void PairGayBerneIntel::pack_force_const(ForceConst &fc, IntelBuffers *buffers) { int tp1 = atom->ntypes + 1; _max_nbors = buffers->get_max_nbors(); int mthreads = comm->nthreads; if (mthreads < buffers->get_off_threads()) mthreads = buffers->get_off_threads(); fc.set_ntypes(tp1, _max_nbors, mthreads, memory, _cop); buffers->set_ntypes(tp1); flt_t **cutneighsq = buffers->get_cutneighsq(); // Repeat cutsq calculation because done after call to init_style double cut, cutneigh; for (int i = 1; i <= atom->ntypes; i++) { for (int j = i; j <= atom->ntypes; j++) { if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) { cut = init_one(i,j); cutneigh = cut + neighbor->skin; cutsq[i][j] = cutsq[j][i] = cut*cut; cutneighsq[i][j] = cutneighsq[j][i] = cutneigh * cutneigh; } } } for (int i = 0; i < 4; i++) { fc.special_lj[i] = force->special_lj[i]; fc.special_lj[0] = 1.0; } fc.gamma = gamma; fc.upsilon = upsilon; fc.mu = mu; for (int i = 0; i < tp1; i++) { for (int j = 0; j < tp1; j++) { fc.ijc[i][j].lj1 = lj1[i][j]; fc.ijc[i][j].lj2 = lj2[i][j]; fc.ijc[i][j].cutsq = cutsq[i][j]; fc.ijc[i][j].offset = offset[i][j]; fc.ijc[i][j].sigma = sigma[i][j]; fc.ijc[i][j].epsilon = epsilon[i][j]; fc.ijc[i][j].form = form[i][j]; fc.ijc[i][j].lshape = lshape[i] * lshape[j]; fc.lj34[i][j].lj3 = lj3[i][j]; fc.lj34[i][j].lj4 = lj4[i][j]; } for (int j = 0; j < 4; j++) { fc.ic[i].shape2[j] = shape2[i][j]; fc.ic[i].well[j] = well[i][j]; } } #ifdef _LMP_INTEL_OFFLOAD if (_cop < 0) return; flt_t * special_lj = fc.special_lj; FC_PACKED1_T *oijc = fc.ijc[0]; FC_PACKED2_T *olj34 = fc.lj34[0]; FC_PACKED3_T *oic = fc.ic; flt_t * ocutneighsq = cutneighsq[0]; int tp1sq = tp1 * tp1; if (oijc != NULL && oic != NULL) { #pragma offload_transfer target(mic:_cop) \ in(special_lj: length(4) alloc_if(0) free_if(0)) \ in(oijc,olj34: length(tp1sq) alloc_if(0) free_if(0)) \ in(oic: length(tp1) alloc_if(0) free_if(0)) \ in(ocutneighsq: length(tp1sq)) } #endif } /* ---------------------------------------------------------------------- */ template void PairGayBerneIntel::ForceConst::set_ntypes(const int ntypes, const int one_length, const int nthreads, Memory *memory, const int cop) { if (ntypes != _ntypes) { if (_ntypes > 0) { fc_packed3 *oic = ic; #ifdef _LMP_INTEL_OFFLOAD flt_t * ospecial_lj = special_lj; fc_packed1 *oijc = ijc[0]; fc_packed2 *olj34 = lj34[0]; flt_t * orsq_form = rsq_form[0]; flt_t * odelx_form = delx_form[0]; flt_t * odely_form = dely_form[0]; flt_t * odelz_form = delz_form[0]; int * ojtype_form = jtype_form[0]; int * ojlist_form = jlist_form[0]; if (ospecial_lj != NULL && oijc != NULL && olj34 != NULL && orsq_form != NULL && odelx_form != NULL && odely_form != NULL && odelz_form != NULL && ojtype_form != NULL && ojlist_form != NULL && _cop >= 0) { #pragma offload_transfer target(mic:_cop) \ nocopy(ospecial_lj, oijc, olj34, oic: alloc_if(0) free_if(1)) \ nocopy(orsq_form, odelx_form, odely_form: alloc_if(0) free_if(1)) \ nocopy(odelz_form, ojtype_form, ojlist_form: alloc_if(0) free_if(1)) } #endif _memory->destroy(oic); _memory->destroy(ijc); _memory->destroy(lj34); _memory->destroy(rsq_form); _memory->destroy(delx_form); _memory->destroy(dely_form); _memory->destroy(delz_form); _memory->destroy(jtype_form); _memory->destroy(jlist_form); } if (ntypes > 0) { _cop = cop; memory->create(ijc, ntypes, ntypes, "fc.ijc"); memory->create(lj34, ntypes, ntypes, "fc.lj34"); memory->create(ic, ntypes, "fc.ic"); memory->create(rsq_form, nthreads, one_length, "rsq_form"); memory->create(delx_form, nthreads, one_length, "delx_form"); memory->create(dely_form, nthreads, one_length, "dely_form"); memory->create(delz_form, nthreads, one_length, "delz_form"); memory->create(jtype_form, nthreads, one_length, "jtype_form"); memory->create(jlist_form, nthreads, one_length, "jlist_form"); for (int zn = 0; zn < nthreads; zn++) for (int zo = 0; zo < one_length; zo++) { rsq_form[zn][zo] = 10.0; delx_form[zn][zo] = 10.0; dely_form[zn][zo] = 10.0; delz_form[zn][zo] = 10.0; jtype_form[zn][zo] = 1; jlist_form[zn][zo] = 0; } #ifdef _LMP_INTEL_OFFLOAD flt_t * ospecial_lj = special_lj; fc_packed1 *oijc = ijc[0]; fc_packed2 *olj34 = lj34[0]; fc_packed3 *oic = ic; flt_t * orsq_form = rsq_form[0]; flt_t * odelx_form = delx_form[0]; flt_t * odely_form = dely_form[0]; flt_t * odelz_form = delz_form[0]; int * ojtype_form = jtype_form[0]; int * ojlist_form = jlist_form[0]; int off_onel = one_length * nthreads; int tp1sq = ntypes*ntypes; if (ospecial_lj != NULL && oijc != NULL && olj34 != NULL && oic != NULL && orsq_form != NULL && odelx_form != NULL && odely_form != NULL && odelz_form != NULL && ojtype_form !=NULL && ojlist_form !=NULL && cop >= 0) { #pragma offload_transfer target(mic:cop) \ nocopy(ospecial_lj: length(4) alloc_if(1) free_if(0)) \ nocopy(oijc,olj34: length(tp1sq) alloc_if(1) free_if(0)) \ nocopy(oic: length(ntypes) alloc_if(1) free_if(0)) \ in(orsq_form: length(off_onel) alloc_if(1) free_if(0)) \ in(odelx_form: length(off_onel) alloc_if(1) free_if(0)) \ in(odely_form: length(off_onel) alloc_if(1) free_if(0)) \ in(odelz_form: length(off_onel) alloc_if(1) free_if(0)) \ in(ojtype_form: length(off_onel) alloc_if(1) free_if(0)) \ in(ojlist_form: length(off_onel) alloc_if(1) free_if(0)) } #endif } } _ntypes = ntypes; _memory = memory; } diff --git a/src/USER-INTEL/pair_lj_charmm_coul_long_intel.cpp b/src/USER-INTEL/pair_lj_charmm_coul_long_intel.cpp index 02e7cfc73..88a9012c2 100644 --- a/src/USER-INTEL/pair_lj_charmm_coul_long_intel.cpp +++ b/src/USER-INTEL/pair_lj_charmm_coul_long_intel.cpp @@ -1,674 +1,674 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing author: W. Michael Brown (Intel) ------------------------------------------------------------------------- */ #include "math.h" #include "pair_lj_charmm_coul_long_intel.h" #include "atom.h" #include "comm.h" #include "force.h" #include "group.h" #include "kspace.h" #include "memory.h" #include "modify.h" #include "neighbor.h" #include "neigh_list.h" #include "neigh_request.h" #include "memory.h" #include "suffix.h" using namespace LAMMPS_NS; #define LJ_T typename IntelBuffers::vec4_t #define TABLE_T typename ForceConst::table_t /* ---------------------------------------------------------------------- */ PairLJCharmmCoulLongIntel::PairLJCharmmCoulLongIntel(LAMMPS *lmp) : PairLJCharmmCoulLong(lmp) { suffix_flag |= Suffix::INTEL; respa_enable = 0; cut_respa = NULL; } /* ---------------------------------------------------------------------- */ PairLJCharmmCoulLongIntel::~PairLJCharmmCoulLongIntel() { } /* ---------------------------------------------------------------------- */ void PairLJCharmmCoulLongIntel::compute(int eflag, int vflag) { if (fix->precision()==FixIntel::PREC_MODE_MIXED) compute(eflag, vflag, fix->get_mixed_buffers(), force_const_single); else if (fix->precision()==FixIntel::PREC_MODE_DOUBLE) compute(eflag, vflag, fix->get_double_buffers(), force_const_double); else compute(eflag, vflag, fix->get_single_buffers(), force_const_single); fix->balance_stamp(); vflag_fdotr = 0; } template void PairLJCharmmCoulLongIntel::compute(int eflag, int vflag, IntelBuffers *buffers, const ForceConst &fc) { if (eflag || vflag) { ev_setup(eflag,vflag); } else evflag = vflag_fdotr = 0; const int inum = list->inum; const int nthreads = comm->nthreads; const int host_start = fix->host_start_pair(); const int offload_end = fix->offload_end_pair(); const int ago = neighbor->ago; if (ago != 0 && fix->separate_buffers() == 0) { fix->start_watch(TIME_PACK); #if defined(_OPENMP) #pragma omp parallel default(none) shared(eflag,vflag,buffers,fc) #endif { int ifrom, ito, tid; IP_PRE_omp_range_id_align(ifrom, ito, tid, atom->nlocal+atom->nghost, nthreads, sizeof(ATOM_T)); buffers->thr_pack(ifrom,ito,ago); } fix->stop_watch(TIME_PACK); } // -------------------- Regular version if (evflag || vflag_fdotr) { int ovflag = 0; if (vflag_fdotr) ovflag = 2; else if (vflag) ovflag = 1; if (eflag) { if (force->newton_pair) { eval<1,1,1>(1, ovflag, buffers, fc, 0, offload_end); eval<1,1,1>(0, ovflag, buffers, fc, host_start, inum); } else { eval<1,1,0>(1, ovflag, buffers, fc, 0, offload_end); eval<1,1,0>(0, ovflag, buffers, fc, host_start, inum); } } else { if (force->newton_pair) { eval<1,0,1>(1, ovflag, buffers, fc, 0, offload_end); eval<1,0,1>(0, ovflag, buffers, fc, host_start, inum); } else { eval<1,0,0>(1, ovflag, buffers, fc, 0, offload_end); eval<1,0,0>(0, ovflag, buffers, fc, host_start, inum); } } } else { if (force->newton_pair) { eval<0,0,1>(1, 0, buffers, fc, 0, offload_end); eval<0,0,1>(0, 0, buffers, fc, host_start, inum); } else { eval<0,0,0>(1, 0, buffers, fc, 0, offload_end); eval<0,0,0>(0, 0, buffers, fc, host_start, inum); } } } /* ---------------------------------------------------------------------- */ template void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag, IntelBuffers *buffers, const ForceConst &fc, const int astart, const int aend) { const int inum = aend - astart; if (inum == 0) return; int nlocal, nall, minlocal; fix->get_buffern(offload, nlocal, nall, minlocal); const int ago = neighbor->ago; IP_PRE_pack_separate_buffers(fix, buffers, ago, offload, nlocal, nall); ATOM_T * _noalias const x = buffers->get_x(offload); flt_t * _noalias const q = buffers->get_q(offload); const int * _noalias const numneigh = list->numneigh; const int * _noalias const cnumneigh = buffers->cnumneigh(list); const int * _noalias const firstneigh = buffers->firstneigh(list); const flt_t * _noalias const special_coul = fc.special_coul; const flt_t * _noalias const special_lj = fc.special_lj; const flt_t qqrd2e = force->qqrd2e; const flt_t inv_denom_lj = (flt_t)1.0/denom_lj; const flt_t * _noalias const cutsq = fc.cutsq[0]; const LJ_T * _noalias const lj = fc.lj[0]; const TABLE_T * _noalias const table = fc.table; const flt_t * _noalias const etable = fc.etable; const flt_t * _noalias const detable = fc.detable; const flt_t * _noalias const ctable = fc.ctable; const flt_t * _noalias const dctable = fc.dctable; const flt_t cut_ljsq = fc.cut_ljsq; const flt_t cut_lj_innersq = fc.cut_lj_innersq; const flt_t cut_coulsq = fc.cut_coulsq; const flt_t g_ewald = fc.g_ewald; const flt_t tabinnersq = fc.tabinnersq; const int ntypes = atom->ntypes + 1; const int eatom = this->eflag_atom; // Determine how much data to transfer int x_size, q_size, f_stride, ev_size, separate_flag; IP_PRE_get_transfern(ago, NEWTON_PAIR, EVFLAG, EFLAG, vflag, buffers, offload, fix, separate_flag, x_size, q_size, ev_size, f_stride); int tc; FORCE_T * _noalias f_start; acc_t * _noalias ev_global; IP_PRE_get_buffers(offload, buffers, fix, tc, f_start, ev_global); const int nthreads = tc; #ifdef _LMP_INTEL_OFFLOAD int *overflow = fix->get_off_overflow_flag(); double *timer_compute = fix->off_watch_pair(); // Redeclare as local variables for offload const int ncoultablebits = this->ncoultablebits; const int ncoulmask = this->ncoulmask; const int ncoulshiftbits = this->ncoulshiftbits; #ifdef INTEL_ALLOW_TABLE #define ITABLE_IN in(table,etable,detable:length(0) alloc_if(0) free_if(0)) \ in(ctable,dctable:length(0) alloc_if(0) free_if(0)) \ in(ncoultablebits,tabinnersq,ncoulmask,ncoulshiftbits) #else #define ITABLE_IN #endif if (offload) fix->start_watch(TIME_OFFLOAD_LATENCY); #pragma offload target(mic:_cop) if(offload) \ in(special_lj,special_coul:length(0) alloc_if(0) free_if(0)) \ in(cutsq,lj:length(0) alloc_if(0) free_if(0)) \ in(firstneigh:length(0) alloc_if(0) free_if(0)) \ in(cnumneigh:length(0) alloc_if(0) free_if(0)) \ in(numneigh:length(0) alloc_if(0) free_if(0)) \ in(x:length(x_size) alloc_if(0) free_if(0)) \ in(q:length(q_size) alloc_if(0) free_if(0)) \ in(overflow:length(0) alloc_if(0) free_if(0)) \ in(nthreads,qqrd2e,g_ewald,inum,nall,ntypes,cut_coulsq,vflag,eatom) \ in(f_stride,separate_flag,offload) \ in(astart,cut_ljsq,cut_lj_innersq,nlocal,inv_denom_lj,minlocal) \ out(f_start:length(f_stride) alloc_if(0) free_if(0)) \ out(ev_global:length(ev_size) alloc_if(0) free_if(0)) \ out(timer_compute:length(1) alloc_if(0) free_if(0)) \ ITABLE_IN signal(f_start) #endif { #ifdef __MIC__ *timer_compute = MIC_Wtime(); #endif IP_PRE_repack_for_offload(NEWTON_PAIR, separate_flag, nlocal, nall, f_stride, x, q); acc_t oevdwl, oecoul, ov0, ov1, ov2, ov3, ov4, ov5; if (EVFLAG) { oevdwl = oecoul = (acc_t)0; if (vflag) ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0; } // loop over neighbors of my atoms #if defined(_OPENMP) #pragma omp parallel default(none) \ shared(f_start,f_stride,nlocal,nall,minlocal) \ reduction(+:oevdwl,oecoul,ov0,ov1,ov2,ov3,ov4,ov5) #endif { int iifrom, iito, tid; IP_PRE_omp_range_id(iifrom, iito, tid, inum, nthreads); iifrom += astart; iito += astart; FORCE_T * _noalias const f = f_start - minlocal + (tid * f_stride); memset(f + minlocal, 0, f_stride * sizeof(FORCE_T)); flt_t cutboth = cut_coulsq; for (int i = iifrom; i < iito; ++i) { // const int i = ilist[ii]; const int itype = x[i].w; const int ptr_off = itype * ntypes; const flt_t * _noalias const cutsqi = cutsq + ptr_off; const LJ_T * _noalias const lji = lj + ptr_off; const int * _noalias const jlist = firstneigh + cnumneigh[i]; const int jnum = numneigh[i]; acc_t fxtmp,fytmp,fztmp,fwtmp; acc_t sevdwl, secoul, sv0, sv1, sv2, sv3, sv4, sv5; const flt_t xtmp = x[i].x; const flt_t ytmp = x[i].y; const flt_t ztmp = x[i].z; const flt_t qtmp = q[i]; fxtmp = fytmp = fztmp = (acc_t)0; if (EVFLAG) { if (EFLAG) fwtmp = sevdwl = secoul = (acc_t)0; if (vflag==1) sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0; } - #if defined(__INTEL_COMPILER) + #if defined(LMP_SIMD_COMPILER) #pragma vector aligned #pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, secoul, \ sv0, sv1, sv2, sv3, sv4, sv5) #endif for (int jj = 0; jj < jnum; jj++) { flt_t forcecoul, forcelj, evdwl, ecoul; forcecoul = forcelj = evdwl = ecoul = (flt_t)0.0; const int sbindex = jlist[jj] >> SBBITS & 3; const int j = jlist[jj] & NEIGHMASK; const flt_t delx = xtmp - x[j].x; const flt_t dely = ytmp - x[j].y; const flt_t delz = ztmp - x[j].z; const int jtype = x[j].w; const flt_t rsq = delx * delx + dely * dely + delz * delz; const flt_t r2inv = (flt_t)1.0 / rsq; - #ifdef __MIC__ + #ifdef INTEL_VMASK if (rsq < cut_coulsq) { #endif #ifdef INTEL_ALLOW_TABLE if (!ncoultablebits || rsq <= tabinnersq) { #endif const flt_t A1 = 0.254829592; const flt_t A2 = -0.284496736; const flt_t A3 = 1.421413741; const flt_t A4 = -1.453152027; const flt_t A5 = 1.061405429; const flt_t EWALD_F = 1.12837917; const flt_t INV_EWALD_P = 1.0 / 0.3275911; const flt_t r = sqrt(rsq); const flt_t grij = g_ewald * r; const flt_t expm2 = exp(-grij * grij); const flt_t t = INV_EWALD_P / (INV_EWALD_P + grij); const flt_t erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; const flt_t prefactor = qqrd2e * qtmp * q[j] / r; forcecoul = prefactor * (erfc + EWALD_F * grij * expm2); if (EFLAG) ecoul = prefactor * erfc; if (sbindex) { const flt_t adjust = ((flt_t)1.0 - special_coul[sbindex])* prefactor; forcecoul -= adjust; if (EFLAG) ecoul -= adjust; } #ifdef INTEL_ALLOW_TABLE } else { float rsq_lookup = rsq; const int itable = (__intel_castf32_u32(rsq_lookup) & ncoulmask) >> ncoulshiftbits; const flt_t fraction = (rsq_lookup - table[itable].r) * table[itable].dr; const flt_t tablet = table[itable].f + fraction * table[itable].df; forcecoul = qtmp * q[j] * tablet; if (EFLAG) ecoul = qtmp * q[j] * (etable[itable] + fraction * detable[itable]); if (sbindex) { const flt_t table2 = ctable[itable] + fraction * dctable[itable]; const flt_t prefactor = qtmp * q[j] * table2; const flt_t adjust = ((flt_t)1.0 - special_coul[sbindex]) * prefactor; forcecoul -= adjust; if (EFLAG) ecoul -= adjust; } } #endif - #ifdef __MIC__ + #ifdef INTEL_VMASK } #endif - #ifdef __MIC__ + #ifdef INTEL_VMASK if (rsq < cut_ljsq) { #endif flt_t r6inv = r2inv * r2inv * r2inv; forcelj = r6inv * (lji[jtype].x * r6inv - lji[jtype].y); if (EFLAG) evdwl = r6inv*(lji[jtype].z * r6inv - lji[jtype].w); - #ifdef __MIC__ + #ifdef INTEL_VMASK if (rsq > cut_lj_innersq) { #endif const flt_t drsq = cut_ljsq - rsq; const flt_t cut2 = (rsq - cut_lj_innersq) * drsq; const flt_t switch1 = drsq * (drsq * drsq + (flt_t)3.0 * cut2) * inv_denom_lj; const flt_t switch2 = (flt_t)12.0 * rsq * cut2 * inv_denom_lj; if (EFLAG) { - #ifndef __MIC__ + #ifndef INTEL_VMASK if (rsq > cut_lj_innersq) { #endif forcelj = forcelj * switch1 + evdwl * switch2; evdwl *= switch1; - #ifndef __MIC__ + #ifndef INTEL_VMASK } #endif } else { const flt_t philj = r6inv * (lji[jtype].z*r6inv - lji[jtype].w); - #ifndef __MIC__ + #ifndef INTEL_VMASK if (rsq > cut_lj_innersq) #endif forcelj = forcelj * switch1 + philj * switch2; } - #ifdef __MIC__ + #ifdef INTEL_VMASK } #endif if (sbindex) { const flt_t factor_lj = special_lj[sbindex]; forcelj *= factor_lj; if (EFLAG) evdwl *= factor_lj; } - #ifdef __MIC__ + #ifdef INTEL_VMASK } #else if (rsq > cut_coulsq) { forcecoul = (flt_t)0.0; ecoul = (flt_t)0.0; } if (rsq > cut_ljsq) { forcelj = (flt_t)0.0; evdwl = (flt_t)0.0; } #endif - #ifdef __MIC__ + #ifdef INTEL_VMASK if (rsq < cut_coulsq) { #endif const flt_t fpair = (forcecoul + forcelj) * r2inv; fxtmp += delx * fpair; fytmp += dely * fpair; fztmp += delz * fpair; if (NEWTON_PAIR || j < nlocal) { f[j].x -= delx * fpair; f[j].y -= dely * fpair; f[j].z -= delz * fpair; } if (EVFLAG) { flt_t ev_pre = (flt_t)0; if (NEWTON_PAIR || i < nlocal) ev_pre += (flt_t)0.5; if (NEWTON_PAIR || j < nlocal) ev_pre += (flt_t)0.5; if (EFLAG) { sevdwl += ev_pre * evdwl; secoul += ev_pre * ecoul; if (eatom) { if (NEWTON_PAIR || i < nlocal) fwtmp += (flt_t)0.5 * evdwl + (flt_t)0.5 * ecoul; if (NEWTON_PAIR || j < nlocal) f[j].w += (flt_t)0.5 * evdwl + (flt_t)0.5 * ecoul; } } IP_PRE_ev_tally_nbor(vflag, ev_pre, fpair, delx, dely, delz); } - #ifdef __MIC__ + #ifdef INTEL_VMASK } #endif } // for jj f[i].x += fxtmp; f[i].y += fytmp; f[i].z += fztmp; IP_PRE_ev_tally_atomq(EVFLAG, EFLAG, vflag, f, fwtmp); } // for ii #if defined(_OPENMP) #pragma omp barrier #endif IP_PRE_fdotr_acc_force(NEWTON_PAIR, EVFLAG, EFLAG, vflag, eatom, nall, nlocal, minlocal, nthreads, f_start, f_stride, x); } // end of omp parallel region if (EVFLAG) { if (EFLAG) { ev_global[0] = oevdwl; ev_global[1] = oecoul; } if (vflag) { ev_global[2] = ov0; ev_global[3] = ov1; ev_global[4] = ov2; ev_global[5] = ov3; ev_global[6] = ov4; ev_global[7] = ov5; } } #ifdef __MIC__ *timer_compute = MIC_Wtime() - *timer_compute; #endif } // end of offload region if (offload) fix->stop_watch(TIME_OFFLOAD_LATENCY); else fix->stop_watch(TIME_HOST_PAIR); if (EVFLAG) fix->add_result_array(f_start, ev_global, offload, eatom); else fix->add_result_array(f_start, 0, offload); } /* ---------------------------------------------------------------------- */ void PairLJCharmmCoulLongIntel::init_style() { PairLJCharmmCoulLong::init_style(); neighbor->requests[neighbor->nrequest-1]->intel = 1; int ifix = modify->find_fix("package_intel"); if (ifix < 0) error->all(FLERR, "The 'package intel' command is required for /intel styles"); fix = static_cast(modify->fix[ifix]); fix->pair_init_check(); #ifdef _LMP_INTEL_OFFLOAD _cop = fix->coprocessor_number(); #endif if (fix->precision() == FixIntel::PREC_MODE_MIXED) pack_force_const(force_const_single, fix->get_mixed_buffers()); else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) pack_force_const(force_const_double, fix->get_double_buffers()); else pack_force_const(force_const_single, fix->get_single_buffers()); } template void PairLJCharmmCoulLongIntel::pack_force_const(ForceConst &fc, IntelBuffers *buffers) { int tp1 = atom->ntypes + 1; int ntable = 1; if (ncoultablebits) for (int i = 0; i < ncoultablebits; i++) ntable *= 2; fc.set_ntypes(tp1, ntable, memory, _cop); buffers->set_ntypes(tp1); flt_t **cutneighsq = buffers->get_cutneighsq(); // Repeat cutsq calculation because done after call to init_style double cut, cutneigh; if (cut_lj > cut_coul) error->all(FLERR, "Intel varient of lj/charmm/coul/long expects lj cutoff<=coulombic"); for (int i = 1; i <= atom->ntypes; i++) { for (int j = i; j <= atom->ntypes; j++) { if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) { cut = init_one(i, j); cutneigh = cut + neighbor->skin; cutsq[i][j] = cutsq[j][i] = cut*cut; cutneighsq[i][j] = cutneighsq[j][i] = cutneigh * cutneigh; } } } cut_lj_innersq = cut_lj_inner * cut_lj_inner; cut_ljsq = cut_lj * cut_lj; cut_coulsq = cut_coul * cut_coul; cut_bothsq = MAX(cut_ljsq, cut_coulsq); fc.g_ewald = force->kspace->g_ewald; fc.tabinnersq = tabinnersq; fc.cut_coulsq = cut_coulsq; fc.cut_ljsq = cut_ljsq; fc.cut_lj_innersq = cut_lj_innersq; for (int i = 0; i < 4; i++) { fc.special_lj[i] = force->special_lj[i]; fc.special_coul[i] = force->special_coul[i]; fc.special_coul[0] = 1.0; fc.special_lj[0] = 1.0; } for (int i = 0; i < tp1; i++) { for (int j = 0; j < tp1; j++) { fc.lj[i][j].x = lj1[i][j]; fc.lj[i][j].y = lj2[i][j]; fc.lj[i][j].z = lj3[i][j]; fc.lj[i][j].w = lj4[i][j]; fc.cutsq[i][j] = cutsq[i][j]; } } if (ncoultablebits) { for (int i = 0; i < ntable; i++) { fc.table[i].r = rtable[i]; fc.table[i].dr = drtable[i]; fc.table[i].f = ftable[i]; fc.table[i].df = dftable[i]; fc.etable[i] = etable[i]; fc.detable[i] = detable[i]; fc.ctable[i] = ctable[i]; fc.dctable[i] = dctable[i]; } } #ifdef _LMP_INTEL_OFFLOAD if (_cop < 0) return; flt_t * special_lj = fc.special_lj; flt_t * special_coul = fc.special_coul; flt_t * cutsq = fc.cutsq[0]; LJ_T * lj = fc.lj[0]; TABLE_T * table = fc.table; flt_t * etable = fc.etable; flt_t * detable = fc.detable; flt_t * ctable = fc.ctable; flt_t * dctable = fc.dctable; flt_t * ocutneighsq = cutneighsq[0]; int tp1sq = tp1 * tp1; #pragma offload_transfer target(mic:_cop) \ in(special_lj, special_coul: length(4) alloc_if(0) free_if(0)) \ in(cutsq,lj: length(tp1sq) alloc_if(0) free_if(0)) \ in(table: length(ntable) alloc_if(0) free_if(0)) \ in(etable,detable,ctable,dctable: length(ntable) alloc_if(0) free_if(0)) \ in(ocutneighsq: length(tp1sq) alloc_if(0) free_if(0)) #endif } /* ---------------------------------------------------------------------- */ template void PairLJCharmmCoulLongIntel::ForceConst::set_ntypes(const int ntypes, const int ntable, Memory *memory, const int cop) { if ( (ntypes != _ntypes || ntable != _ntable) ) { if (_ntypes > 0) { #ifdef _LMP_INTEL_OFFLOAD flt_t * ospecial_lj = special_lj; flt_t * ospecial_coul = special_coul; flt_t * ocutsq = cutsq[0]; typename IntelBuffers::vec4_t * olj = lj[0]; table_t * otable = table; flt_t * oetable = etable; flt_t * odetable = detable; flt_t * octable = ctable; flt_t * odctable = dctable; if (ospecial_lj != NULL && ocutsq != NULL && olj != NULL && otable != NULL && oetable != NULL && odetable != NULL && octable != NULL && odctable != NULL && ospecial_coul != NULL && cop >= 0) { #pragma offload_transfer target(mic:cop) \ nocopy(ospecial_lj, ospecial_coul: alloc_if(0) free_if(1)) \ nocopy(ocutsq, olj: alloc_if(0) free_if(1)) \ nocopy(otable: alloc_if(0) free_if(1)) \ nocopy(oetable, odetable, octable, odctable: alloc_if(0) free_if(1)) } #endif _memory->destroy(cutsq); _memory->destroy(lj); _memory->destroy(table); _memory->destroy(etable); _memory->destroy(detable); _memory->destroy(ctable); _memory->destroy(dctable); } if (ntypes > 0) { _cop = cop; memory->create(cutsq,ntypes,ntypes,"fc.cutsq"); memory->create(lj,ntypes,ntypes,"fc.lj"); memory->create(table,ntable,"pair:fc.table"); memory->create(etable,ntable,"pair:fc.etable"); memory->create(detable,ntable,"pair:fc.detable"); memory->create(ctable,ntable,"pair:fc.ctable"); memory->create(dctable,ntable,"pair:fc.dctable"); #ifdef _LMP_INTEL_OFFLOAD flt_t * ospecial_lj = special_lj; flt_t * ospecial_coul = special_coul; flt_t * ocutsq = cutsq[0]; typename IntelBuffers::vec4_t * olj = lj[0]; table_t * otable = table; flt_t * oetable = etable; flt_t * odetable = detable; flt_t * octable = ctable; flt_t * odctable = dctable; int tp1sq = ntypes*ntypes; if (ospecial_lj != NULL && ocutsq != NULL && olj != NULL && otable !=NULL && oetable != NULL && odetable != NULL && octable != NULL && odctable != NULL && ospecial_coul != NULL && cop >= 0) { #pragma offload_transfer target(mic:cop) \ nocopy(ospecial_lj: length(4) alloc_if(1) free_if(0)) \ nocopy(ospecial_coul: length(4) alloc_if(1) free_if(0)) \ nocopy(ocutsq,olj: length(tp1sq) alloc_if(1) free_if(0)) \ nocopy(otable: length(ntable) alloc_if(1) free_if(0)) \ nocopy(oetable,odetable: length(ntable) alloc_if(1) free_if(0)) \ nocopy(octable,odctable: length(ntable) alloc_if(1) free_if(0)) } #endif } } _ntypes=ntypes; _ntable=ntable; _memory=memory; } diff --git a/src/USER-INTEL/pair_lj_cut_intel.cpp b/src/USER-INTEL/pair_lj_cut_intel.cpp index fd47b7e40..9d7e1b068 100644 --- a/src/USER-INTEL/pair_lj_cut_intel.cpp +++ b/src/USER-INTEL/pair_lj_cut_intel.cpp @@ -1,412 +1,412 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing author: W. Michael Brown (Intel) ------------------------------------------------------------------------- */ #include "math.h" #include "pair_lj_cut_intel.h" #include "atom.h" #include "comm.h" #include "force.h" #include "memory.h" #include "modify.h" #include "neighbor.h" #include "neigh_list.h" #include "neigh_request.h" #include "suffix.h" using namespace LAMMPS_NS; #define FC_PACKED1_T typename ForceConst::fc_packed1 #define FC_PACKED2_T typename ForceConst::fc_packed2 /* ---------------------------------------------------------------------- */ PairLJCutIntel::PairLJCutIntel(LAMMPS *lmp) : PairLJCut(lmp) { suffix_flag |= Suffix::INTEL; respa_enable = 0; cut_respa = NULL; } /* ---------------------------------------------------------------------- */ void PairLJCutIntel::compute(int eflag, int vflag) { if (fix->precision() == FixIntel::PREC_MODE_MIXED) compute(eflag, vflag, fix->get_mixed_buffers(), force_const_single); else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) compute(eflag, vflag, fix->get_double_buffers(), force_const_double); else compute(eflag, vflag, fix->get_single_buffers(), force_const_single); fix->balance_stamp(); vflag_fdotr = 0; } template void PairLJCutIntel::compute(int eflag, int vflag, IntelBuffers *buffers, const ForceConst &fc) { if (eflag || vflag) { ev_setup(eflag, vflag); } else evflag = vflag_fdotr = 0; const int inum = list->inum; const int nthreads = comm->nthreads; const int host_start = fix->host_start_pair(); const int offload_end = fix->offload_end_pair(); const int ago = neighbor->ago; if (ago != 0 && fix->separate_buffers() == 0) { fix->start_watch(TIME_PACK); if (ago != 0) { #if defined(_OPENMP) #pragma omp parallel default(none) shared(eflag,vflag,buffers,fc) #endif { int ifrom, ito, tid; IP_PRE_omp_range_id_align(ifrom, ito, tid, atom->nlocal + atom->nghost, nthreads, sizeof(ATOM_T)); buffers->thr_pack(ifrom,ito,ago); } } fix->stop_watch(TIME_PACK); } if (evflag || vflag_fdotr) { int ovflag = 0; if (vflag_fdotr) ovflag = 2; else if (vflag) ovflag = 1; if (eflag) { if (force->newton_pair) { eval<1,1,1>(1, ovflag, buffers, fc, 0, offload_end); eval<1,1,1>(0, ovflag, buffers, fc, host_start, inum); } else { eval<1,1,0>(1, ovflag, buffers, fc, 0, offload_end); eval<1,1,0>(0, ovflag, buffers, fc, host_start, inum); } } else { if (force->newton_pair) { eval<1,0,1>(1, ovflag, buffers, fc, 0, offload_end); eval<1,0,1>(0, ovflag, buffers, fc, host_start, inum); } else { eval<1,0,0>(1, ovflag, buffers, fc, 0, offload_end); eval<1,0,0>(0, ovflag, buffers, fc, host_start, inum); } } } else { if (force->newton_pair) { eval<0,0,1>(1, 0, buffers, fc, 0, offload_end); eval<0,0,1>(0, 0, buffers, fc, host_start, inum); } else { eval<0,0,0>(1, 0, buffers, fc, 0, offload_end); eval<0,0,0>(0, 0, buffers, fc, host_start, inum); } } } template void PairLJCutIntel::eval(const int offload, const int vflag, IntelBuffers *buffers, const ForceConst &fc, const int astart, const int aend) { const int inum = aend - astart; if (inum == 0) return; int nlocal, nall, minlocal; fix->get_buffern(offload, nlocal, nall, minlocal); const int ago = neighbor->ago; IP_PRE_pack_separate_buffers(fix, buffers, ago, offload, nlocal, nall); ATOM_T * _noalias const x = buffers->get_x(offload); const int * _noalias const numneigh = list->numneigh; const int * _noalias const cnumneigh = buffers->cnumneigh(list); const int * _noalias const firstneigh = buffers->firstneigh(list); const flt_t * _noalias const special_lj = fc.special_lj; const FC_PACKED1_T * _noalias const ljc12o = fc.ljc12o[0]; const FC_PACKED2_T * _noalias const lj34 = fc.lj34[0]; const int ntypes = atom->ntypes + 1; const int eatom = this->eflag_atom; // Determine how much data to transfer int x_size, q_size, f_stride, ev_size, separate_flag; IP_PRE_get_transfern(ago, NEWTON_PAIR, EVFLAG, EFLAG, vflag, buffers, offload, fix, separate_flag, x_size, q_size, ev_size, f_stride); int tc; FORCE_T * _noalias f_start; acc_t * _noalias ev_global; IP_PRE_get_buffers(offload, buffers, fix, tc, f_start, ev_global); const int nthreads = tc; int *overflow = fix->get_off_overflow_flag(); { #ifdef __MIC__ *timer_compute = MIC_Wtime(); #endif IP_PRE_repack_for_offload(NEWTON_PAIR, separate_flag, nlocal, nall, f_stride, x, 0); acc_t oevdwl, ov0, ov1, ov2, ov3, ov4, ov5; if (EVFLAG) { oevdwl = (acc_t)0; if (vflag) ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0; } // loop over neighbors of my atoms #if defined(_OPENMP) #pragma omp parallel default(none) \ shared(f_start,f_stride,nlocal,nall,minlocal) \ reduction(+:oevdwl,ov0,ov1,ov2,ov3,ov4,ov5) #endif { int iifrom, iito, tid; IP_PRE_omp_range_id(iifrom, iito, tid, inum, nthreads); iifrom += astart; iito += astart; FORCE_T * _noalias const f = f_start - minlocal + (tid * f_stride); memset(f + minlocal, 0, f_stride * sizeof(FORCE_T)); for (int i = iifrom; i < iito; ++i) { const int itype = x[i].w; const int ptr_off = itype * ntypes; const FC_PACKED1_T * _noalias const ljc12oi = ljc12o + ptr_off; const FC_PACKED2_T * _noalias const lj34i = lj34 + ptr_off; const int * _noalias const jlist = firstneigh + cnumneigh[i]; const int jnum = numneigh[i]; acc_t fxtmp, fytmp, fztmp, fwtmp; acc_t sevdwl, sv0, sv1, sv2, sv3, sv4, sv5; const flt_t xtmp = x[i].x; const flt_t ytmp = x[i].y; const flt_t ztmp = x[i].z; fxtmp = fytmp = fztmp = (acc_t)0; if (EVFLAG) { if (EFLAG) fwtmp = sevdwl = (acc_t)0; if (vflag==1) sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0; } - #if defined(__INTEL_COMPILER) + #if defined(LMP_SIMD_COMPILER) #pragma vector aligned #pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \ sv0, sv1, sv2, sv3, sv4, sv5) #endif for (int jj = 0; jj < jnum; jj++) { flt_t forcelj, evdwl; forcelj = evdwl = (flt_t)0.0; const int sbindex = jlist[jj] >> SBBITS & 3; const int j = jlist[jj] & NEIGHMASK; const flt_t delx = xtmp - x[j].x; const flt_t dely = ytmp - x[j].y; const flt_t delz = ztmp - x[j].z; const int jtype = x[j].w; const flt_t rsq = delx * delx + dely * dely + delz * delz; - #ifdef __MIC__ + #ifdef INTEL_VMASK if (rsq < ljc12oi[jtype].cutsq) { #endif flt_t factor_lj = special_lj[sbindex]; flt_t r2inv = 1.0 / rsq; flt_t r6inv = r2inv * r2inv * r2inv; - #ifndef __MIC__ + #ifndef INTEL_VMASK if (rsq > ljc12oi[jtype].cutsq) r6inv = (flt_t)0.0; #endif forcelj = r6inv * (ljc12oi[jtype].lj1 * r6inv - ljc12oi[jtype].lj2); flt_t fpair = factor_lj * forcelj * r2inv; fxtmp += delx * fpair; fytmp += dely * fpair; fztmp += delz * fpair; if (NEWTON_PAIR || j < nlocal) { f[j].x -= delx * fpair; f[j].y -= dely * fpair; f[j].z -= delz * fpair; } if (EVFLAG) { flt_t ev_pre = (flt_t)0; if (NEWTON_PAIR || istop_watch(TIME_OFFLOAD_LATENCY); else fix->stop_watch(TIME_HOST_PAIR); if (EVFLAG) fix->add_result_array(f_start, ev_global, offload, eatom); else fix->add_result_array(f_start, 0, offload); } /* ---------------------------------------------------------------------- */ void PairLJCutIntel::init_style() { PairLJCut::init_style(); neighbor->requests[neighbor->nrequest-1]->intel = 1; int ifix = modify->find_fix("package_intel"); if (ifix < 0) error->all(FLERR, "The 'package intel' command is required for /intel styles"); fix = static_cast(modify->fix[ifix]); fix->pair_init_check(); #ifdef _LMP_INTEL_OFFLOAD if (fix->offload_balance() != 0.0) error->all(FLERR, "Offload for lj/cut/intel is not yet available. Set balance to 0."); #endif if (fix->precision() == FixIntel::PREC_MODE_MIXED) pack_force_const(force_const_single, fix->get_mixed_buffers()); else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) pack_force_const(force_const_double, fix->get_double_buffers()); else pack_force_const(force_const_single, fix->get_single_buffers()); } /* ---------------------------------------------------------------------- */ template void PairLJCutIntel::pack_force_const(ForceConst &fc, IntelBuffers *buffers) { int tp1 = atom->ntypes + 1; fc.set_ntypes(tp1,memory,_cop); buffers->set_ntypes(tp1); flt_t **cutneighsq = buffers->get_cutneighsq(); // Repeat cutsq calculation because done after call to init_style double cut, cutneigh; for (int i = 1; i <= atom->ntypes; i++) { for (int j = i; j <= atom->ntypes; j++) { if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) { cut = init_one(i,j); cutneigh = cut + neighbor->skin; cutsq[i][j] = cutsq[j][i] = cut*cut; cutneighsq[i][j] = cutneighsq[j][i] = cutneigh * cutneigh; } } } for (int i = 0; i < 4; i++) { fc.special_lj[i] = force->special_lj[i]; fc.special_lj[0] = 1.0; } for (int i = 0; i < tp1; i++) { for (int j = 0; j < tp1; j++) { fc.ljc12o[i][j].lj1 = lj1[i][j]; fc.ljc12o[i][j].lj2 = lj2[i][j]; fc.lj34[i][j].lj3 = lj3[i][j]; fc.lj34[i][j].lj4 = lj4[i][j]; fc.ljc12o[i][j].cutsq = cutsq[i][j]; fc.ljc12o[i][j].offset = offset[i][j]; } } } /* ---------------------------------------------------------------------- */ template void PairLJCutIntel::ForceConst::set_ntypes(const int ntypes, Memory *memory, const int cop) { if (ntypes != _ntypes) { if (_ntypes > 0) { fc_packed1 *oljc12o = ljc12o[0]; fc_packed2 *olj34 = lj34[0]; _memory->destroy(oljc12o); _memory->destroy(olj34); } if (ntypes > 0) { _cop = cop; memory->create(ljc12o,ntypes,ntypes,"fc.c12o"); memory->create(lj34,ntypes,ntypes,"fc.lj34"); } } _ntypes = ntypes; _memory = memory; } diff --git a/src/USER-INTEL/pair_sw_intel.cpp b/src/USER-INTEL/pair_sw_intel.cpp index ebd626b5f..884d3436d 100755 --- a/src/USER-INTEL/pair_sw_intel.cpp +++ b/src/USER-INTEL/pair_sw_intel.cpp @@ -1,703 +1,703 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing author: W. Michael Brown (Intel) ------------------------------------------------------------------------- */ #include "math.h" #include "stdio.h" #include "stdlib.h" #include "string.h" #include "pair_sw_intel.h" #include "atom.h" #include "neighbor.h" #include "neigh_request.h" #include "force.h" #include "comm.h" #include "memory.h" #include "neighbor.h" #include "neigh_list.h" #include "memory.h" #include "error.h" #include "modify.h" #include "suffix.h" using namespace LAMMPS_NS; #define FC_PACKED0_T typename ForceConst::fc_packed0 #define FC_PACKED1_T typename ForceConst::fc_packed1 #define FC_PACKED2_T typename ForceConst::fc_packed2 #define FC_PACKED3_T typename ForceConst::fc_packed3 #define MAXLINE 1024 #define DELTA 4 /* ---------------------------------------------------------------------- */ PairSWIntel::PairSWIntel(LAMMPS *lmp) : PairSW(lmp) { suffix_flag |= Suffix::INTEL; } /* ---------------------------------------------------------------------- */ PairSWIntel::~PairSWIntel() { } /* ---------------------------------------------------------------------- */ void PairSWIntel::compute(int eflag, int vflag) { if (fix->precision() == FixIntel::PREC_MODE_MIXED) compute(eflag, vflag, fix->get_mixed_buffers(), force_const_single); else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) compute(eflag, vflag, fix->get_double_buffers(), force_const_double); else compute(eflag, vflag, fix->get_single_buffers(), force_const_single); fix->balance_stamp(); vflag_fdotr = 0; } /* ---------------------------------------------------------------------- */ template void PairSWIntel::compute(int eflag, int vflag, IntelBuffers *buffers, const ForceConst &fc) { if (eflag || vflag) { ev_setup(eflag, vflag); } else evflag = vflag_fdotr = 0; const int inum = list->inum; const int nthreads = comm->nthreads; const int host_start = fix->host_start_pair(); const int offload_end = fix->offload_end_pair(); const int ago = neighbor->ago; if (ago != 0 && fix->separate_buffers() == 0) { fix->start_watch(TIME_PACK); #if defined(_OPENMP) #pragma omp parallel default(none) shared(eflag,vflag,buffers,fc) #endif { int ifrom, ito, tid; IP_PRE_omp_range_id_align(ifrom, ito, tid, atom->nlocal + atom->nghost, nthreads, sizeof(ATOM_T)); buffers->thr_pack(ifrom, ito, ago); } fix->stop_watch(TIME_PACK); } if (_spq) { if (evflag || vflag_fdotr) { int ovflag = 0; if (vflag_fdotr) ovflag = 2; else if (vflag) ovflag = 1; if (eflag) { eval<1,1,1>(1, ovflag, buffers, fc, 0, offload_end, _offload_pad); eval<1,1,1>(0, ovflag, buffers, fc, host_start, inum, _host_pad); } else { eval<1,1,0>(1, ovflag, buffers, fc, 0, offload_end, _offload_pad); eval<1,1,0>(0, ovflag, buffers, fc, host_start, inum, _host_pad); } } else { eval<1,0,0>(1, 0, buffers, fc, 0, offload_end, _offload_pad); eval<1,0,0>(0, 0, buffers, fc, host_start, inum, _host_pad); } } else { if (evflag || vflag_fdotr) { int ovflag = 0; if (vflag_fdotr) ovflag = 2; else if (vflag) ovflag = 1; if (eflag) { eval<0,1,1>(1, ovflag, buffers, fc, 0, offload_end, _offload_pad); eval<0,1,1>(0, ovflag, buffers, fc, host_start, inum, _host_pad); } else { eval<0,1,0>(1, ovflag, buffers, fc, 0, offload_end, _offload_pad); eval<0,1,0>(0, ovflag, buffers, fc, host_start, inum, _host_pad); } } else { eval<0,0,0>(1, 0, buffers, fc, 0, offload_end, _offload_pad); eval<0,0,0>(0, 0, buffers, fc, host_start, inum, _host_pad); } } } /* ---------------------------------------------------------------------- */ template void PairSWIntel::eval(const int offload, const int vflag, IntelBuffers *buffers, const ForceConst &fc, const int astart, const int aend, const int pad_width) { const int inum = aend - astart; if (inum == 0) return; int nlocal, nall, minlocal; fix->get_buffern(offload, nlocal, nall, minlocal); const int ago = neighbor->ago; IP_PRE_pack_separate_buffers(fix, buffers, ago, offload, nlocal, nall); ATOM_T * _noalias const x = buffers->get_x(offload); const int * _noalias const numneighhalf = buffers->get_atombin(); const int * _noalias const numneigh = list->numneigh; const int * _noalias const cnumneigh = buffers->cnumneigh(list); const int * _noalias const firstneigh = buffers->firstneigh(list); const FC_PACKED0_T * _noalias const p2 = fc.p2[0]; const FC_PACKED1_T * _noalias const p2f = fc.p2f[0]; const FC_PACKED2_T * _noalias const p2e = fc.p2e[0]; const FC_PACKED3_T * _noalias const p3 = fc.p3[0][0]; flt_t * _noalias const ccachex = buffers->get_ccachex(); flt_t * _noalias const ccachey = buffers->get_ccachey(); flt_t * _noalias const ccachez = buffers->get_ccachez(); flt_t * _noalias const ccachew = buffers->get_ccachew(); int * _noalias const ccachei = buffers->get_ccachei(); int * _noalias const ccachej = buffers->get_ccachej(); const int ccache_stride = _ccache_stride; const int ntypes = atom->ntypes + 1; const int eatom = this->eflag_atom; // Determine how much data to transfer int x_size, q_size, f_stride, ev_size, separate_flag; IP_PRE_get_transfern(ago, /* NEWTON_PAIR*/ 1, EVFLAG, EFLAG, vflag, buffers, offload, fix, separate_flag, x_size, q_size, ev_size, f_stride); int tc; FORCE_T * _noalias f_start; acc_t * _noalias ev_global; IP_PRE_get_buffers(offload, buffers, fix, tc, f_start, ev_global); const int nthreads = tc; #ifdef _LMP_INTEL_OFFLOAD double *timer_compute = fix->off_watch_pair(); int *overflow = fix->get_off_overflow_flag(); if (offload) fix->start_watch(TIME_OFFLOAD_LATENCY); #pragma offload target(mic:_cop) if(offload) \ in(p2,p2f,p2e,p3:length(0) alloc_if(0) free_if(0)) \ in(firstneigh:length(0) alloc_if(0) free_if(0)) \ in(cnumneigh:length(0) alloc_if(0) free_if(0)) \ in(numneigh:length(0) alloc_if(0) free_if(0)) \ in(x:length(x_size) alloc_if(0) free_if(0)) \ in(numneighhalf:length(0) alloc_if(0) free_if(0)) \ in(overflow:length(0) alloc_if(0) free_if(0)) \ in(ccachex,ccachey,ccachez,ccachew:length(0) alloc_if(0) free_if(0)) \ in(ccachei,ccachej:length(0) alloc_if(0) free_if(0)) \ in(ccache_stride,nthreads,inum,nall,ntypes,vflag,eatom,offload) \ in(astart,nlocal,f_stride,minlocal,separate_flag,pad_width) \ out(f_start:length(f_stride) alloc_if(0) free_if(0)) \ out(ev_global:length(ev_size) alloc_if(0) free_if(0)) \ out(timer_compute:length(1) alloc_if(0) free_if(0)) \ signal(f_start) #endif { #ifdef __MIC__ *timer_compute = MIC_Wtime(); #endif IP_PRE_repack_for_offload(1, separate_flag, nlocal, nall, f_stride, x, 0); acc_t oevdwl, ov0, ov1, ov2, ov3, ov4, ov5; if (EVFLAG) { oevdwl = (acc_t)0; if (vflag) ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0; } #if defined(_OPENMP) #pragma omp parallel default(none) \ shared(f_start,f_stride,nlocal,nall,minlocal) \ reduction(+:oevdwl,ov0,ov1,ov2,ov3,ov4,ov5) #endif { int iifrom, iito, tid; IP_PRE_omp_range_id(iifrom, iito, tid, inum, nthreads); iifrom += astart; iito += astart; FORCE_T * _noalias const f = f_start - minlocal + (tid * f_stride); memset(f + minlocal, 0, f_stride * sizeof(FORCE_T)); const int toffs = tid * ccache_stride; flt_t * _noalias const tdelx = ccachex + toffs; flt_t * _noalias const tdely = ccachey + toffs; flt_t * _noalias const tdelz = ccachez + toffs; flt_t * _noalias const trsq = ccachew + toffs; int * _noalias const tj = ccachei + toffs; int * _noalias const tjtype = ccachej + toffs; // loop over full neighbor list of my atoms for (int i = iifrom; i < iito; ++i) { const flt_t xtmp = x[i].x; const flt_t ytmp = x[i].y; const flt_t ztmp = x[i].z; const int itype = x[i].w; const int ptr_off = itype * ntypes; const FC_PACKED0_T * _noalias const p2i = p2 + ptr_off; const FC_PACKED1_T * _noalias const p2fi = p2f + ptr_off; const FC_PACKED2_T * _noalias const p2ei = p2e + ptr_off; const FC_PACKED3_T * _noalias const p3i = p3 + ptr_off*ntypes; const int * _noalias const jlist = firstneigh + cnumneigh[i]; const int jnum = numneigh[i]; const int jnumhalf = numneighhalf[i]; acc_t fxtmp, fytmp, fztmp, fwtmp; acc_t sevdwl, sv0, sv1, sv2, sv3, sv4, sv5; fxtmp = fytmp = fztmp = (acc_t)0.0; if (EVFLAG) { if (EFLAG) fwtmp = sevdwl = (acc_t)0; if (vflag==1) sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0; } int ejnum = 0, ejnumhalf = 0; for (int jj = 0; jj < jnum; jj++) { int j = jlist[jj]; j &= NEIGHMASK; const flt_t delx = x[j].x - xtmp; const flt_t dely = x[j].y - ytmp; const flt_t delz = x[j].z - ztmp; const int jtype = x[j].w; const flt_t rsq1 = delx * delx + dely * dely + delz * delz; if (rsq1 < p2i[jtype].cutsq) { tdelx[ejnum] = delx; tdely[ejnum] = dely; tdelz[ejnum] = delz; trsq[ejnum] = rsq1; tj[ejnum] = j; tjtype[ejnum] = jtype; ejnum++; if (jj < jnumhalf) ejnumhalf++; } } int ejnum_pad = ejnum; while ( (ejnum_pad % pad_width) != 0) { tdelx[ejnum_pad] = (flt_t)0.0; tdely[ejnum_pad] = (flt_t)0.0; tdelz[ejnum_pad] = (flt_t)0.0; trsq[ejnum_pad] = (flt_t)1.0; tj[ejnum_pad] = nall; tjtype[ejnum_pad] = 0; ejnum_pad++; } - #if defined(__INTEL_COMPILER) + #if defined(LMP_SIMD_COMPILER) #pragma vector aligned #pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \ sv0, sv1, sv2, sv3, sv4, sv5) #endif for (int jj = 0; jj < ejnum_pad; jj++) { acc_t fjxtmp, fjytmp, fjztmp, fjtmp; fjxtmp = fjytmp = fjztmp = (acc_t)0.0; if (EFLAG) fjtmp = (acc_t)0.0; const flt_t delx = tdelx[jj]; const flt_t dely = tdely[jj]; const flt_t delz = tdelz[jj]; const int jtype = tjtype[jj]; const flt_t rsq1 = trsq[jj]; const flt_t r1 = sqrt(rsq1); const flt_t rinvsq1 = (flt_t)1.0 / rsq1; const flt_t rainv1 = (flt_t)1.0 / (r1 - p2fi[jtype].cut); // two-body interactions, skip half of them flt_t rp, rq; if (SPQ == 1) { rp = r1 * r1; rp *= rp; rp = (flt_t)1.0 / rp; rq = (flt_t)1.0; } else { rp = pow(r1, -p2fi[jtype].powerp); rq = pow(r1, -p2fi[jtype].powerq); } const flt_t rainvsq = rainv1 * rainv1 * r1; flt_t expsrainv = exp(p2fi[jtype].sigma * rainv1); if (jj >= ejnumhalf) expsrainv = (flt_t)0.0; const flt_t fpair = (p2fi[jtype].c1 * rp - p2fi[jtype].c2 * rq + (p2fi[jtype].c3 * rp -p2fi[jtype].c4 * rq) * rainvsq) * expsrainv * rinvsq1; fxtmp -= delx * fpair; fytmp -= dely * fpair; fztmp -= delz * fpair; fjxtmp += delx * fpair; fjytmp += dely * fpair; fjztmp += delz * fpair; if (EVFLAG) { if (EFLAG) { flt_t evdwl; evdwl = (p2ei[jtype].c5 * rp - p2ei[jtype].c6 * rq) * expsrainv; sevdwl += evdwl; if (eatom) { fwtmp += (acc_t)0.5 * evdwl; fjtmp += (acc_t)0.5 * evdwl; } } IP_PRE_ev_tally_nbor(vflag, (flt_t)1.0, fpair, -delx, -dely, -delz); } /*---------------------------------------------*/ flt_t gsrainv1 = p2i[jtype].sigma_gamma * rainv1; flt_t gsrainvsq1 = gsrainv1 * rainv1 / r1; flt_t expgsrainv1 = exp(gsrainv1); const int joffset = jtype * ntypes; for (int kk = 0; kk < ejnum; kk++) { flt_t delr2[3]; delr2[0] = tdelx[kk]; delr2[1] = tdely[kk]; delr2[2] = tdelz[kk]; const int ktype = tjtype[kk]; const flt_t rsq2 = trsq[kk]; const flt_t r2 = sqrt(rsq2); const flt_t rinvsq2 = (flt_t)1.0 / rsq2; const flt_t rainv2 = (flt_t)1.0 / (r2 - p2i[ktype].cut); const flt_t gsrainv2 = p2i[ktype].sigma_gamma * rainv2; const flt_t gsrainvsq2 = gsrainv2 * rainv2 / r2; const flt_t expgsrainv2 = exp(gsrainv2); const flt_t rinv12 = (flt_t)1.0 / (r1 * r2); const flt_t cs = (delx * delr2[0] + dely * delr2[1] + delz * delr2[2]) * rinv12; const flt_t delcs = cs - p3i[joffset + ktype].costheta; const flt_t delcssq = delcs*delcs; flt_t kfactor; if (jj == kk) kfactor = (flt_t)0.0; else kfactor = (flt_t)1.0; const flt_t facexp = expgsrainv1*expgsrainv2*kfactor; const flt_t facrad = p3i[joffset + ktype].lambda_epsilon * facexp * delcssq; const flt_t frad1 = facrad*gsrainvsq1; const flt_t frad2 = facrad*gsrainvsq2; const flt_t facang = p3i[joffset + ktype].lambda_epsilon2 * facexp * delcs; const flt_t facang12 = rinv12*facang; const flt_t csfacang = cs*facang; const flt_t csfac1 = rinvsq1*csfacang; const flt_t fjx = delx*(frad1+csfac1)-delr2[0]*facang12; const flt_t fjy = dely*(frad1+csfac1)-delr2[1]*facang12; const flt_t fjz = delz*(frad1+csfac1)-delr2[2]*facang12; fxtmp -= fjx; fytmp -= fjy; fztmp -= fjz; fjxtmp += fjx; fjytmp += fjy; fjztmp += fjz; if (EVFLAG) { if (EFLAG) { const flt_t evdwl = facrad * (flt_t)0.5; sevdwl += evdwl; if (eatom) { fwtmp += (acc_t)0.33333333 * evdwl; fjtmp += (acc_t)0.33333333 * facrad; } } IP_PRE_ev_tally_nbor3v(vflag, fjx, fjy, fjz, delx, dely, delz); } } // for kk const int j = tj[jj]; f[j].x += fjxtmp; f[j].y += fjytmp; f[j].z += fjztmp; if (EFLAG) if (eatom) f[j].w += fjtmp; } // for jj f[i].x += fxtmp; f[i].y += fytmp; f[i].z += fztmp; IP_PRE_ev_tally_atom(EVFLAG, EFLAG, vflag, f, fwtmp); } // for ii #if defined(_OPENMP) #pragma omp barrier #endif IP_PRE_fdotr_acc_force(1, EVFLAG, EFLAG, vflag, eatom, nall, nlocal, minlocal, nthreads, f_start, f_stride, x); } // end omp if (EVFLAG) { if (EFLAG) { ev_global[0] = oevdwl; ev_global[1] = (acc_t)0.0; } if (vflag) { ev_global[2] = ov0; ev_global[3] = ov1; ev_global[4] = ov2; ev_global[5] = ov3; ev_global[6] = ov4; ev_global[7] = ov5; } } #ifdef __MIC__ *timer_compute = MIC_Wtime() - *timer_compute; #endif } // end offload if (offload) fix->stop_watch(TIME_OFFLOAD_LATENCY); else fix->stop_watch(TIME_HOST_PAIR); if (EVFLAG) fix->add_result_array(f_start, ev_global, offload, eatom); else fix->add_result_array(f_start, 0, offload); } /* ---------------------------------------------------------------------- */ void PairSWIntel::allocate() { PairSW::allocate(); } /* ---------------------------------------------------------------------- init specific to this pair style ------------------------------------------------------------------------- */ void PairSWIntel::init_style() { PairSW::init_style(); neighbor->requests[neighbor->nrequest-1]->intel = 1; map[0] = map[1]; int ifix = modify->find_fix("package_intel"); if (ifix < 0) error->all(FLERR, "The 'package intel' command is required for /intel styles"); fix = static_cast(modify->fix[ifix]); fix->pair_init_check(); #ifdef _LMP_INTEL_OFFLOAD _cop = fix->coprocessor_number(); #endif if (fix->precision() == FixIntel::PREC_MODE_MIXED) { pack_force_const(force_const_single, fix->get_mixed_buffers()); fix->get_mixed_buffers()->need_tag(1); } else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) { pack_force_const(force_const_double, fix->get_double_buffers()); fix->get_double_buffers()->need_tag(1); } else { pack_force_const(force_const_single, fix->get_single_buffers()); fix->get_single_buffers()->need_tag(1); } #ifdef _LMP_INTEL_OFFLOAD if (fix->offload_noghost()) error->all(FLERR,"The 'ghost no' option cannot be used with sw/intel."); #endif #if defined(__INTEL_COMPILER) if (__INTEL_COMPILER_BUILD_DATE < 20141023) error->all(FLERR, "Intel compiler versions before " "15 Update 1 not supported for sw/intel"); #endif } /* ---------------------------------------------------------------------- */ template void PairSWIntel::pack_force_const(ForceConst &fc, IntelBuffers *buffers) { int off_ccache = 0; #ifdef _LMP_INTEL_OFFLOAD if (_cop >= 0) off_ccache = 1; #endif buffers->grow_ccache(off_ccache, comm->nthreads); _ccache_stride = buffers->ccache_stride(); int tp1 = atom->ntypes + 1; fc.set_ntypes(tp1,memory,_cop); buffers->set_ntypes(tp1); flt_t **cutneighsq = buffers->get_cutneighsq(); // Repeat cutsq calculation because done after call to init_style double cut, cutneigh; for (int i = 1; i <= atom->ntypes; i++) { for (int j = i; j <= atom->ntypes; j++) { if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) { cut = init_one(i,j); cutneigh = cut + neighbor->skin; cutsq[i][j] = cutsq[j][i] = cut*cut; cutneighsq[i][j] = cutneighsq[j][i] = cutneigh * cutneigh; } } } _spq = 1; for (int ii = 0; ii < tp1; ii++) { int i = map[ii]; for (int jj = 0; jj < tp1; jj++) { int j = map[jj]; if (i < 0 || j < 0 || ii == 0 || jj == 0) { fc.p2[ii][jj].cutsq = 0; fc.p2[ii][jj].cut = 0; fc.p2[ii][jj].sigma_gamma = 0; fc.p2f[ii][jj].cut = 0; fc.p2f[ii][jj].powerp = 0; fc.p2f[ii][jj].powerq = 0; fc.p2f[ii][jj].sigma = 0; fc.p2f[ii][jj].c1 = 0; fc.p2f[ii][jj].c2 = 0; fc.p2f[ii][jj].c3 = 0; fc.p2f[ii][jj].c4 = 0; fc.p2e[ii][jj].c5 = 0; fc.p2e[ii][jj].c6 = 0; } else { int ijparam = elem2param[i][j][j]; fc.p2[ii][jj].cutsq = params[ijparam].cutsq; fc.p2[ii][jj].cut = params[ijparam].cut; fc.p2[ii][jj].sigma_gamma = params[ijparam].sigma_gamma; fc.p2f[ii][jj].cut = params[ijparam].cut; fc.p2f[ii][jj].powerp = params[ijparam].powerp; fc.p2f[ii][jj].powerq = params[ijparam].powerq; fc.p2f[ii][jj].sigma = params[ijparam].sigma; fc.p2f[ii][jj].c1 = params[ijparam].c1; fc.p2f[ii][jj].c2 = params[ijparam].c2; fc.p2f[ii][jj].c3 = params[ijparam].c3; fc.p2f[ii][jj].c4 = params[ijparam].c4; fc.p2e[ii][jj].c5 = params[ijparam].c5; fc.p2e[ii][jj].c6 = params[ijparam].c6; double cutcut = params[ijparam].cut * params[ijparam].cut; if (params[ijparam].cutsq >= cutcut) fc.p2[ii][jj].cutsq *= 0.98; if (params[ijparam].powerp != 4.0 || params[ijparam].powerq != 0.0) _spq = 0; } for (int kk = 0; kk < tp1; kk++) { int k = map[kk]; if (i < 0 || j < 0 || k < 0 || ii == 0 || jj == 0 || kk == 0) { fc.p3[ii][jj][kk].costheta = 0; fc.p3[ii][jj][kk].lambda_epsilon = 0; fc.p3[ii][jj][kk].lambda_epsilon2 = 0; } else { int ijkparam = elem2param[i][j][k]; fc.p3[ii][jj][kk].costheta = params[ijkparam].costheta; fc.p3[ii][jj][kk].lambda_epsilon = params[ijkparam].lambda_epsilon; fc.p3[ii][jj][kk].lambda_epsilon2 = params[ijkparam].lambda_epsilon2; } } } } _host_pad = 1; _offload_pad = 1; if (INTEL_NBOR_PAD > 1) _host_pad = INTEL_NBOR_PAD * sizeof(float) / sizeof(flt_t); #ifdef _LMP_INTEL_OFFLOAD if (_cop < 0) return; FC_PACKED0_T *op2 = fc.p2[0]; FC_PACKED1_T *op2f = fc.p2f[0]; FC_PACKED2_T *op2e = fc.p2e[0]; FC_PACKED3_T *op3 = fc.p3[0][0]; flt_t * ocutneighsq = cutneighsq[0]; int tp1sq = tp1 * tp1; int tp1cu = tp1sq * tp1; if (op2 != NULL && op2f != NULL && op2e != NULL && op3 != NULL && ocutneighsq != NULL) { #pragma offload_transfer target(mic:_cop) \ in(op2,op2f,op2e: length(tp1sq) alloc_if(0) free_if(0)) \ in(op3: length(tp1cu) alloc_if(0) free_if(0)) \ in(ocutneighsq: length(tp1sq)) } #endif } /* ---------------------------------------------------------------------- */ template void PairSWIntel::ForceConst::set_ntypes(const int ntypes, Memory *memory, const int cop) { if (ntypes != _ntypes) { if (_ntypes > 0) { fc_packed0 *op2 = p2[0]; fc_packed1 *op2f = p2f[0]; fc_packed2 *op2e = p2e[0]; fc_packed3 *op3 = p3[0][0]; #ifdef _LMP_INTEL_OFFLOAD if (op2 != NULL && op2f != NULL && op2e != NULL && op3 != NULL && _cop >= 0) { #pragma offload_transfer target(mic:_cop) \ nocopy(op2, op2f, op2e, op3: alloc_if(0) free_if(1)) } #endif _memory->destroy(op2); _memory->destroy(op2f); _memory->destroy(op2e); _memory->destroy(op3); } if (ntypes > 0) { _cop = cop; memory->create(p2,ntypes,ntypes,"fc.p2"); memory->create(p2f,ntypes,ntypes,"fc.p2f"); memory->create(p2e,ntypes,ntypes,"fc.p2e"); memory->create(p3,ntypes,ntypes,ntypes,"fc.p3"); #ifdef _LMP_INTEL_OFFLOAD fc_packed0 *op2 = p2[0]; fc_packed1 *op2f = p2f[0]; fc_packed2 *op2e = p2e[0]; fc_packed3 *op3 = p3[0][0]; int tp1sq = ntypes * ntypes; int tp1cu = tp1sq * ntypes; if (op2 != NULL && op2f != NULL && op2e != NULL && op3 != NULL && cop >= 0) { #pragma offload_transfer target(mic:cop) \ nocopy(op2,op2f,op2e: length(tp1sq) alloc_if(1) free_if(0)) \ nocopy(op3: length(tp1cu) alloc_if(1) free_if(0)) } #endif } } _ntypes = ntypes; _memory = memory; } diff --git a/src/accelerator_intel.h b/src/accelerator_intel.h index ad856e41e..9398a06f1 100644 --- a/src/accelerator_intel.h +++ b/src/accelerator_intel.h @@ -1,69 +1,69 @@ /* -*- c++ -*- ---------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ // NOTE: this file is *supposed* to be included multiple times #ifdef LMP_USER_INTEL // true interface to USER-INTEL // this part is used inside the neighbor.h header file to // add functions to the Neighbor class definition #ifdef LMP_INSIDE_NEIGHBOR_H #ifdef LMP_INTEL_OFFLOAD #ifdef __INTEL_OFFLOAD template friend class IntelBuffers; inline int * special_flag_alloc() { return special_flag; } #endif #endif friend class FixIntel; void *fix_intel; template void bin_atoms(void *, int *); - template + template void hbni(const int, NeighList *, void *, const int, const int, void *, const int offload_end = 0); - template - void hbnni(const int, NeighList *, void *, const int, const int, void *); template + void hbnni(const int, NeighList *, void *, const int, const int, void *); + template void hbnti(const int, NeighList *, void *, const int, const int, void *, const int offload_end = 0); - template + template void fbi(const int, NeighList *, void *, const int, const int, void *, const int offload_end = 0); void half_bin_no_newton_intel(class NeighList *); void half_bin_newton_intel(class NeighList *); void half_bin_newton_tri_intel(class NeighList *); void full_bin_intel(class NeighList *); #endif /* !LMP_INSIDE_NEIGHBOR_H */ #else /* !LMP_USER_INTEL */ // needed for compiling Neighbor class when USER-Intel is not installed #ifdef LMP_INSIDE_NEIGHBOR_H void half_bin_no_newton_intel(class NeighList *) {} void half_bin_newton_intel(class NeighList *) {} void half_bin_newton_tri_intel(class NeighList *) {} void full_bin_intel(class NeighList *) {} #endif #endif /* !LMP_USER_INTEL */ diff --git a/src/info.cpp b/src/info.cpp index f36cee7b8..0c2e6b9d4 100644 --- a/src/info.cpp +++ b/src/info.cpp @@ -1,451 +1,684 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ #include "string.h" #include "info.h" +#include "accelerator_cuda.h" +#include "accelerator_kokkos.h" #include "atom.h" #include "comm.h" #include "compute.h" #include "domain.h" #include "dump.h" #include "fix.h" #include "force.h" +#include "pair.h" #include "group.h" #include "input.h" #include "modify.h" #include "neighbor.h" #include "output.h" #include "region.h" #include "universe.h" #include "variable.h" #include "update.h" #include "error.h" #include #ifdef _WIN32 #define PSAPI_VERSION=1 #include #include #include #else #include #include #include #endif #if defined __linux #include #endif namespace LAMMPS_NS { // same as in variable.cpp enum {INDEX,LOOP,WORLD,UNIVERSE,ULOOP,STRING,GETENV, SCALARFILE,ATOMFILE,FORMAT,EQUAL,ATOM,PYTHON}; enum {COMPUTES=1<<0, DUMPS=1<<1, FIXES=1<<2, GROUPS=1<<3, REGIONS=1<<4, CONFIG=1<<5, TIME=1<<6, VARIABLES=1<<7, SYSTEM=1<<8, COMM=1<<9, ALL=~0}; } static const char *varstyles[] = { "index", "loop", "world", "universe", "uloop", "string", "getenv", "file", "atomfile", "format", "equal", "atom", "python", "(unknown)"}; static const char *mapstyles[] = { "none", "array", "hash" }; static const char *commstyles[] = { "brick", "tiled" }; static const char *commlayout[] = { "uniform", "nonuniform", "irregular" }; static const char bstyles[] = "pfsm"; using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ void Info::command(int narg, char **arg) { FILE *out=screen; int flags=0; if (comm->me != 0) return; // parse arguments int idx = 0; while (idx < narg) { if (strncmp(arg[idx],"all",3) == 0) { flags |= ALL; ++idx; } else if ((idx+1 < narg) && (strncmp(arg[idx],"out",3) == 0) && (strncmp(arg[idx+1],"screen",3) == 0)) { out = screen; idx += 2; } else if ((idx+1 < narg) && (strncmp(arg[idx],"out",3) == 0) && (strncmp(arg[idx+1],"log",3) == 0)) { out = logfile; idx += 2; } else if ((idx+2 < narg) && (strncmp(arg[idx],"out",3) == 0) && (strncmp(arg[idx+1],"append",3) == 0)) { if ((out != screen) && (out != logfile)) fclose(out); out = fopen(arg[idx+2],"a"); idx += 3; } else if ((idx+2 < narg) && (strncmp(arg[idx],"out",3) == 0) && (strncmp(arg[idx+1],"overwrite",3) == 0)) { if ((out != screen) && (out != logfile)) fclose(out); out = fopen(arg[idx+2],"w"); idx += 3; } else if (strncmp(arg[idx],"communication",4) == 0) { flags |= COMM; ++idx; } else if (strncmp(arg[idx],"computes",4) == 0) { flags |= COMPUTES; ++idx; } else if (strncmp(arg[idx],"dumps",3) == 0) { flags |= DUMPS; ++idx; } else if (strncmp(arg[idx],"fixes",3) == 0) { flags |= FIXES; ++idx; } else if (strncmp(arg[idx],"groups",3) == 0) { flags |= GROUPS; ++idx; } else if (strncmp(arg[idx],"regions",3) == 0) { flags |= REGIONS; ++idx; } else if (strncmp(arg[idx],"config",3) == 0) { flags |= CONFIG; ++idx; } else if (strncmp(arg[idx],"time",3) == 0) { flags |= TIME; ++idx; } else if (strncmp(arg[idx],"variables",3) == 0) { flags |= VARIABLES; ++idx; } else if (strncmp(arg[idx],"system",3) == 0) { flags |= SYSTEM; ++idx; } else { error->warning(FLERR,"Ignoring unknown or incorrect info command flag"); ++idx; } } if (out == NULL) return; fputs("\nInfo-Info-Info-Info-Info-Info-Info-Info-Info-Info-Info\n",out); time_t now = time(NULL); fprintf(out,"Printed on %s\n",ctime(&now)); if (flags & CONFIG) { fprintf(out,"\nLAMMPS version: %s / %s\n", universe->version, universe->num_ver); fprintf(out,"sizeof(smallint): %3d-bit\n",(int)sizeof(smallint)*8); fprintf(out,"sizeof(imageint): %3d-bit\n",(int)sizeof(imageint)*8); fprintf(out,"sizeof(tagint): %3d-bit\n",(int)sizeof(tagint)*8); fprintf(out,"sizeof(bigint): %3d-bit\n",(int)sizeof(bigint)*8); #if defined(_WIN32) DWORD fullversion,majorv,minorv,buildv=0; fullversion = GetVersion(); majorv = (DWORD) (LOBYTE(LOWORD(fullversion))); minorv = (DWORD) (HIBYTE(LOWORD(fullversion))); if (fullversion < 0x80000000) buildv = (DWORD) (HIWORD(fullversion)); SYSTEM_INFO si; GetSystemInfo(&si); const char *machine; switch (si.wProcessorArchitecture) { case PROCESSOR_ARCHITECTURE_AMD64: machine = (const char *) "x86_64"; break; case PROCESSOR_ARCHITECTURE_ARM: machine = (const char *) "arm"; break; case PROCESSOR_ARCHITECTURE_IA64: machine = (const char *) "ia64"; break; case PROCESSOR_ARCHITECTURE_INTEL: machine = (const char *) "i386"; break; default: machine = (const char *) "(unknown)"; } fprintf(out,"\nOS information: Windows %d.%d (%d) on %s\n", majorv,minorv,buildv,machine); #else struct utsname ut; uname(&ut); fprintf(out,"\nOS information: %s %s on %s\n", ut.sysname, ut.release, ut.machine); #endif fprintf(out,"\nMemory allocation information (MPI rank 0)\n"); #if defined(_WIN32) HANDLE phandle = GetCurrentProcess(); PROCESS_MEMORY_COUNTERS_EX pmc; GetProcessMemoryInfo(phandle,(PROCESS_MEMORY_COUNTERS *)&pmc,sizeof(pmc)); fprintf(out,"Non-shared memory use: %.3g Mbyte\n", (double)pmc.PrivateUsage/1048576.0); fprintf(out,"Maximum working set size: %.3g Mbyte\n", (double)pmc.PeakWorkingSetSize/1048576.0); #else #if defined(__linux) struct mallinfo mi; mi = mallinfo(); fprintf(out,"Total dynamically allocated memory: %.3g Mbyte\n", (double)mi.uordblks/1048576.0); #endif struct rusage ru; if (getrusage(RUSAGE_SELF, &ru) == 0) { fprintf(out,"Maximum resident set size: %.3g Mbyte\n", (double)ru.ru_maxrss/1024.0); } #endif } if (flags & COMM) { int major,minor; MPI_Get_version(&major,&minor); fprintf(out,"\nCommunication information:\n"); fprintf(out,"MPI library level: MPI v%d.%d\n",major,minor); fprintf(out,"Comm style = %s, Comm layout = %s\n", commstyles[comm->style], commlayout[comm->layout]); fprintf(out,"Communicate velocities for ghost atoms = %s\n", comm->ghost_velocity ? "yes" : "no"); if (comm->mode == 0) { fprintf(out,"Communication mode = single\n"); fprintf(out,"Communication cutoff = %g\n", MAX(comm->cutghostuser,neighbor->cutneighmax)); } if (comm->mode == 1) { fprintf(out,"Communication mode = multi\n"); double cut; for (int i=1; i <= atom->ntypes && neighbor->cuttype; ++i) { cut = neighbor->cuttype[i]; if (comm->cutusermulti) cut = MAX(cut,comm->cutusermulti[i]); fprintf(out,"Communication cutoff for type %d = %g\n", i, cut); } } fprintf(out,"Nprocs = %d Nthreads = %d\n", comm->nprocs, comm->nthreads); fprintf(out,"Processor grid = %d x %d x %d\n",comm->procgrid[0], comm->procgrid[1], comm->procgrid[2]); } if (flags & SYSTEM) { fprintf(out,"\nSystem information:\n"); fprintf(out,"Units = %s\n",update->unit_style); fprintf(out,"Atom style = %s\n", atom->atom_style); fprintf(out,"Atom map = %s\n", mapstyles[atom->map_style]); if (atom->molecular > 0) { const char *msg; msg = (atom->molecular == 2) ? "template" : "standard"; fprintf(out,"Molecule type = %s\n",msg); } fprintf(out,"Atoms = " BIGINT_FORMAT ", types = %d, style = %s\n", atom->natoms, atom->ntypes, force->pair_style); if (atom->molecular > 0) { const char *msg; msg = force->bond_style ? force->bond_style : "none"; fprintf(out,"Bonds = " BIGINT_FORMAT ", types = %d, style = %s\n", atom->nbonds, atom->nbondtypes, msg); msg = force->angle_style ? force->angle_style : "none"; fprintf(out,"Angles = " BIGINT_FORMAT ", types = %d, style = %s\n", atom->nangles, atom->nangletypes, msg); msg = force->dihedral_style ? force->dihedral_style : "none"; fprintf(out,"Dihedrals = " BIGINT_FORMAT ", types = %d, style = %s\n", atom->ndihedrals, atom->ndihedraltypes, msg); msg = force->improper_style ? force->improper_style : "none"; fprintf(out,"Impropers = " BIGINT_FORMAT ", types = %d, style = %s\n", atom->nimpropers, atom->nimpropertypes, msg); const double * const special_lj = force->special_lj; const double * const special_coul = force->special_coul; fprintf(out,"Special bond factors lj = %-10g %-10g %-10g\n" "Special bond factors coul = %-10g %-10g %-10g\n", special_lj[1],special_lj[2],special_lj[3], special_coul[1],special_coul[2],special_coul[3]); } fprintf(out,"Kspace style = %s\n", force->kspace ? force->kspace_style : "none"); if (domain->box_exist) { fprintf(out,"\nDimensions = %d\n",domain->dimension); fprintf(out,"%s box = %g x %g x %g\n", domain->triclinic ? "Triclinic" : "Orthogonal", domain->xprd, domain->yprd, domain->zprd); fprintf(out,"Boundaries = %c,%c %c,%c %c,%c\n", bstyles[domain->boundary[0][0]],bstyles[domain->boundary[0][1]], bstyles[domain->boundary[1][0]],bstyles[domain->boundary[1][1]], bstyles[domain->boundary[2][0]],bstyles[domain->boundary[2][1]]); fprintf(out,"Xlo, zhi = %g, %g\n", domain->boxlo[0], domain->boxhi[0]); fprintf(out,"Ylo, zhi = %g, %g\n", domain->boxlo[1], domain->boxhi[1]); fprintf(out,"Zlo, zhi = %g, %g\n", domain->boxlo[2], domain->boxhi[2]); if (domain->triclinic) fprintf(out,"Xy, xz, yz = %g, %g, %g\n", domain->xy, domain->xz, domain->yz); } else { fputs("\nBox has not yet been created\n",out); } } if (flags & GROUPS) { int ngroup = group->ngroup; char **names = group->names; int *dynamic = group->dynamic; fprintf(out,"\nGroup information:\n"); for (int i=0; i < ngroup; ++i) { fprintf(out,"Group[%2d]: %s (%s)\n", i, names[i], dynamic[i] ? "dynamic" : "static"); } } if (flags & REGIONS) { int nreg = domain->nregion; Region **regs = domain->regions; fprintf(out,"\nRegion information:\n"); for (int i=0; i < nreg; ++i) { fprintf(out,"Region[%3d]: %s, style = %s, side = %s\n", i, regs[i]->id, regs[i]->style, regs[i]->interior ? "in" : "out"); } } if (flags & COMPUTES) { int ncompute = modify->ncompute; Compute **compute = modify->compute; char **names = group->names; fprintf(out,"\nCompute information:\n"); for (int i=0; i < ncompute; ++i) { fprintf(out,"Compute[%3d]: %s, style = %s, group = %s\n", i, compute[i]->id, compute[i]->style, names[compute[i]->igroup]); } } if (flags & DUMPS) { int ndump = output->ndump; Dump **dump = output->dump; int *nevery = output->every_dump; \ char **vnames = output->var_dump; char **names = group->names; fprintf(out,"\nDump information:\n"); for (int i=0; i < ndump; ++i) { fprintf(out,"Dump[%3d]: %s, file = %s, style = %s, group = %s, ", i, dump[i]->id, dump[i]->filename, dump[i]->style, names[dump[i]->igroup]); if (nevery[i]) { fprintf(out,"every = %d\n", nevery[i]); } else { fprintf(out,"every = %s\n", vnames[i]); } } } if (flags & FIXES) { int nfix = modify->nfix; Fix **fix = modify->fix; char **names = group->names; fprintf(out,"\nFix information:\n"); for (int i=0; i < nfix; ++i) { fprintf(out,"Fix[%3d]: %s, style = %s, group = %s\n", i, fix[i]->id, fix[i]->style, names[fix[i]->igroup]); } } if (flags & VARIABLES) { int nvar = input->variable->nvar; int *style = input->variable->style; char **names = input->variable->names; char ***data = input->variable->data; fprintf(out,"\nVariable information:\n"); for (int i=0; i < nvar; ++i) { int ndata = 1; fprintf(out,"Variable[%3d]: %-10s style = %-10s def =", i,names[i],varstyles[style[i]]); if ((style[i] != LOOP) && (style[i] != ULOOP)) ndata = input->variable->num[i]; for (int j=0; j < ndata; ++j) fprintf(out," %s",data[i][j]); fputs("\n",out); } } if (flags & TIME) { double wallclock = MPI_Wtime() - lmp->initclock; double cpuclock = 0.0; #if defined(_WIN32) // from MSD docs. FILETIME ct,et,kt,ut; union { FILETIME ft; uint64_t ui; } cpu; if (GetProcessTimes(GetCurrentProcess(),&ct,&et,&kt,&ut)) { cpu.ft = ut; cpuclock = cpu.ui * 0.0000001; } #else /* POSIX */ struct rusage ru; if (getrusage(RUSAGE_SELF, &ru) == 0) { cpuclock = (double) ru.ru_utime.tv_sec; cpuclock += (double) ru.ru_utime.tv_usec * 0.000001; } #endif /* ! _WIN32 */ int cpuh,cpum,cpus,wallh,wallm,walls; cpus = fmod(cpuclock,60.0); cpuclock = (cpuclock - cpus) / 60.0; cpum = fmod(cpuclock,60.0); cpuh = (cpuclock - cpum) / 60.0; walls = fmod(wallclock,60.0); wallclock = (wallclock - walls) / 60.0; wallm = fmod(wallclock,60.0); wallh = (wallclock - wallm) / 60.0; fprintf(out,"\nTotal time information (MPI rank 0):\n" " CPU time: %4d:%02d:%02d\n" " Wall time: %4d:%02d:%02d\n", cpuh,cpum,cpus,wallh,wallm,walls); } fputs("\nInfo-Info-Info-Info-Info-Info-Info-Info-Info-Info-Info\n\n",out); // close output file pointer if opened locally thus forcing a hard sync. if ((out != screen) && (out != logfile)) fclose(out); } + +/* ---------------------------------------------------------------------- */ + +// the is_active() function returns true if the selected style or name +// in the selected category is currently in use. + +bool Info::is_active(const char *category, const char *name) +{ + if ((category == NULL) || (name == NULL)) return false; + const char *style = "none"; + const int len = strlen(name); + + if (strcmp(category,"package") == 0) { + if (strcmp(name,"cuda") == 0) { + return (lmp->cuda && lmp->cuda->cuda_exists) ? true : false; + } else if (strcmp(name,"gpu") == 0) { + return (modify->find_fix("package_gpu") >= 0) ? true : false; + } else if (strcmp(name,"intel") == 0) { + return (modify->find_fix("package_intel") >= 0) ? true : false; + } else if (strcmp(name,"kokkos") == 0) { + return (lmp->kokkos && lmp->kokkos->kokkos_exists) ? true : false; + } else if (strcmp(name,"omp") == 0) { + return (modify->find_fix("package_omp") >= 0) ? true : false; + } else error->all(FLERR,"Unknown name for package category"); + + } else if (strcmp(category,"newton") == 0) { + if (strcmp(name,"pair") == 0) return (force->newton_pair != 0); + else if (strcmp(name,"bond") == 0) return (force->newton_bond != 0); + else if (strcmp(name,"any") == 0) return (force->newton != 0); + else error->all(FLERR,"Unknown name for newton category"); + + } else if (strcmp(category,"pair") == 0) { + if (force->pair == NULL) return false; + if (strcmp(name,"single") == 0) return (force->pair->single_enable != 0); + else if (strcmp(name,"respa") == 0) return (force->pair->respa_enable != 0); + else if (strcmp(name,"manybody") == 0) return (force->pair->manybody_flag != 0); + else if (strcmp(name,"tail") == 0) return (force->pair->tail_flag != 0); + else if (strcmp(name,"shift") == 0) return (force->pair->offset_flag != 0); + else error->all(FLERR,"Unknown name for pair category"); + + } else if (strcmp(category,"comm_style") == 0) { + style = commstyles[comm->style]; + } else if (strcmp(category,"min_style") == 0) { + style = update->minimize_style; + } else if (strcmp(category,"run_style") == 0) { + style = update->integrate_style; + } else if (strcmp(category,"atom_style") == 0) { + style = atom->atom_style; + } else if (strcmp(category,"pair_style") == 0) { + style = force->pair_style; + } else if (strcmp(category,"bond_style") == 0) { + style = force->bond_style; + } else if (strcmp(category,"angle_style") == 0) { + style = force->angle_style; + } else if (strcmp(category,"dihedral_style") == 0) { + style = force->dihedral_style; + } else if (strcmp(category,"improper_style") == 0) { + style = force->improper_style; + } else if (strcmp(category,"kspace_style") == 0) { + style = force->kspace_style; + } else error->all(FLERR,"Unknown category for is_active()"); + + int match = 0; + if (strcmp(style,name) == 0) match = 1; + + if (!match && lmp->suffix_enable) { + if (lmp->suffix) { + char *name_w_suffix = new char [len + 2 + strlen(lmp->suffix)]; + sprintf(name_w_suffix,"%s/%s",name,lmp->suffix); + if (strcmp(style,name_w_suffix) == 0) match = 1; + delete[] name_w_suffix; + } + if (!match && lmp->suffix2) { + char *name_w_suffix = new char [len + 2 + strlen(lmp->suffix2)]; + sprintf(name_w_suffix,"%s/%s",name,lmp->suffix2); + if (strcmp(style,name_w_suffix) == 0) match = 1; + delete[] name_w_suffix; + } + } + return match ? true : false; +} + +/* ---------------------------------------------------------------------- */ + +// the is_available() function returns true if the selected style +// or name in the selected category is available for use (but need +// not be currently active). + +bool Info::is_available(const char *category, const char *name) +{ + if ((category == NULL) || (name == NULL)) return false; + const int len = strlen(name); + + if (strcmp(category,"command") == 0) { + int match = 0; + return (input->command_map->find(name) != input->command_map->end()); + + } else if (strcmp(category,"compute") == 0) { + int match = 0; + if (modify->compute_map->find(name) != modify->compute_map->end()) + match = 1; + + if (!match && lmp->suffix_enable) { + if (lmp->suffix) { + char *name_w_suffix = new char [len + 2 + strlen(lmp->suffix)]; + sprintf(name_w_suffix,"%s/%s",name,lmp->suffix); + if (modify->compute_map->find(name_w_suffix) != modify->compute_map->end()) + match = 1; + delete[] name_w_suffix; + } + if (!match && lmp->suffix2) { + char *name_w_suffix = new char [len + 2 + strlen(lmp->suffix2)]; + sprintf(name_w_suffix,"%s/%s",name,lmp->suffix2); + if (modify->compute_map->find(name_w_suffix) != modify->compute_map->end()) + match = 1; + delete[] name_w_suffix; + } + } + return match ? true : false; + + } else if (strcmp(category,"fix") == 0) { + int match = 0; + if (modify->fix_map->find(name) != modify->fix_map->end()) + match = 1; + + if (!match && lmp->suffix_enable) { + if (lmp->suffix) { + char *name_w_suffix = new char [len + 2 + strlen(lmp->suffix)]; + sprintf(name_w_suffix,"%s/%s",name,lmp->suffix); + if (modify->fix_map->find(name_w_suffix) != modify->fix_map->end()) + match = 1; + delete[] name_w_suffix; + } + if (!match && lmp->suffix2) { + char *name_w_suffix = new char [len + 2 + strlen(lmp->suffix2)]; + sprintf(name_w_suffix,"%s/%s",name,lmp->suffix2); + if (modify->fix_map->find(name_w_suffix) != modify->fix_map->end()) + match = 1; + delete[] name_w_suffix; + } + } + return match ? true : false; + + } else if (strcmp(category,"pair_style") == 0) { + int match = 0; + if (force->pair_map->find(name) != force->pair_map->end()) + match = 1; + + if (!match && lmp->suffix_enable) { + if (lmp->suffix) { + char *name_w_suffix = new char [len + 2 + strlen(lmp->suffix)]; + sprintf(name_w_suffix,"%s/%s",name,lmp->suffix); + if (force->pair_map->find(name_w_suffix) != force->pair_map->end()) + match = 1; + delete[] name_w_suffix; + } + if (!match && lmp->suffix2) { + char *name_w_suffix = new char [len + 2 + strlen(lmp->suffix2)]; + sprintf(name_w_suffix,"%s/%s",name,lmp->suffix2); + if (force->pair_map->find(name_w_suffix) != force->pair_map->end()) + match = 1; + delete[] name_w_suffix; + } + } + return match ? true : false; + + } else error->all(FLERR,"Unknown category for is_available()"); +} + +/* ---------------------------------------------------------------------- */ + +// the is_defined() function returns true if a particular ID of the +// selected category (e.g. fix ID, group ID, region ID etc.) has been +// defined and thus can be accessed. It does *NOT* check whether a +// particular ID has a particular style. + +bool Info::is_defined(const char *category, const char *name) +{ + if ((category == NULL) || (name == NULL)) return false; + + if (strcmp(category,"compute") == 0) { + int ncompute = modify->ncompute; + Compute **compute = modify->compute; + for (int i=0; i < ncompute; ++i) { + if (strcmp(compute[i]->id,name) == 0) + return true; + } + return false; + } else if (strcmp(category,"dump") == 0) { + int ndump = output->ndump; + Dump **dump = output->dump; + for (int i=0; i < ndump; ++i) { + if (strcmp(dump[i]->id,name) == 0) + return true; + } + return false; + } else if (strcmp(category,"fix") == 0) { + int nfix = modify->nfix; + Fix **fix = modify->fix; + for (int i=0; i < nfix; ++i) { + if (strcmp(fix[i]->id,name) == 0) + return true; + } + return false; + } else if (strcmp(category,"group") == 0) { + int ngroup = group->ngroup; + char **names = group->names; + for (int i=0; i < ngroup; ++i) { + if (strcmp(names[i],name) == 0) + return true; + } + return false; + } else if (strcmp(category,"region") == 0) { + int nreg = domain->nregion; + Region **regs = domain->regions; + for (int i=0; i < nreg; ++i) { + if (strcmp(regs[i]->id,name) == 0) + return true; + } + return false; + } else if (strcmp(category,"variable") == 0) { + int nvar = input->variable->nvar; + char **names = input->variable->names; + for (int i=0; i < nvar; ++i) { + if (strcmp(names[i],name) == 0) + return true; + } + return false; + } else error->all(FLERR,"Unknown category for is_defined()"); +} diff --git a/src/info.h b/src/info.h index b49876b14..19fca873b 100644 --- a/src/info.h +++ b/src/info.h @@ -1,51 +1,55 @@ /* -*- c++ -*- ---------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ #ifdef COMMAND_CLASS CommandStyle(info,Info) #else #ifndef LMP_INFO_H #define LMP_INFO_H #include "pointers.h" namespace LAMMPS_NS { class Info : protected Pointers { public: Info(class LAMMPS *lmp) : Pointers(lmp) {}; void command(int, char **); + + bool is_active(const char *, const char *); + bool is_defined(const char *, const char *); + bool is_available(const char *, const char *); }; } #endif #endif /* ERROR/WARNING messages: E: Illegal ... command Self-explanatory. Check the input script syntax and compare to the documentation for the command. You can use -echo screen as a command-line option when running LAMMPS to see the offending line. W: Ignoring unknown or incorrect info command flag Self-explanatory. The an unknown argument was given to the info command. Compare your input with the documentation. */ diff --git a/src/input.h b/src/input.h index ade27f75f..f03655657 100644 --- a/src/input.h +++ b/src/input.h @@ -1,374 +1,377 @@ /* -*- c++ -*- ---------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ #ifndef LMP_INPUT_H #define LMP_INPUT_H #include "stdio.h" #include "pointers.h" #include #include namespace LAMMPS_NS { class Input : protected Pointers { + friend class Info; public: int narg; // # of command args char **arg; // parsed args for command class Variable *variable; // defined variables Input(class LAMMPS *, int, char **); ~Input(); void file(); // process all input void file(const char *); // process an input script char *one(const char *); // process a single command void substitute(char *&, char *&, int &, int &, int); // substitute for variables in a string private: int me; // proc ID char *command; // ptr to current command int maxarg; // max # of args in arg char *line,*copy,*work; // input line & copy and work string int maxline,maxcopy,maxwork; // max lengths of char strings int echo_screen; // 0 = no, 1 = yes int echo_log; // 0 = no, 1 = yes int nfile,maxfile; // current # and max # of open input files int label_active; // 0 = no label, 1 = looking for label char *labelstr; // label string being looked for int jump_skip; // 1 if skipping next jump, 0 otherwise int ifthenelse_flag; // 1 if executing commands inside an if-then-else FILE **infiles; // list of open input files + protected: typedef void (*CommandCreator)(LAMMPS *, int, char **); std::map *command_map; template static void command_creator(LAMMPS *, int, char **); + private: void parse(); // parse an input text line char *nextword(char *, char **); // find next word in string with quotes int numtriple(char *); // count number of triple quotes void reallocate(char *&, int &, int); // reallocate a char string int execute_command(); // execute a single command void clear(); // input script commands void echo(); void ifthenelse(); void include(); void jump(); void label(); void log(); void next_command(); void partition(); void print(); void python(); void quit(); void shell(); void variable_command(); void angle_coeff(); // LAMMPS commands void angle_style(); void atom_modify(); void atom_style(); void bond_coeff(); void bond_style(); void boundary(); void box(); void comm_modify(); void comm_style(); void compute(); void compute_modify(); void dielectric(); void dihedral_coeff(); void dihedral_style(); void dimension(); void dump(); void dump_modify(); void fix(); void fix_modify(); void group_command(); void improper_coeff(); void improper_style(); void kspace_modify(); void kspace_style(); void lattice(); void mass(); void min_modify(); void min_style(); void molecule(); void neigh_modify(); void neighbor_command(); void newton(); void package(); void pair_coeff(); void pair_modify(); void pair_style(); void pair_write(); void processors(); void region(); void reset_timestep(); void restart(); void run_style(); void special_bonds(); void suffix(); void thermo(); void thermo_modify(); void thermo_style(); void timestep(); void timer_command(); void uncompute(); void undump(); void unfix(); void units(); }; } #endif /* ERROR/WARNING messages: E: Label wasn't found in input script Self-explanatory. E: Unknown command: %s The command is not known to LAMMPS. Check the input script. E: Invalid use of library file() function This function is called thru the library interface. This error should not occur. Contact the developers if it does. E: Cannot open input script %s Self-explanatory. E: Unbalanced quotes in input line No matching end double quote was found following a leading double quote. E: Input line quote not followed by whitespace An end quote must be followed by whitespace. E: Invalid variable name Variable name used in an input script line is invalid. E: Invalid immediate variable Syntax of immediate value is incorrect. E: Substitution for illegal variable Input script line contained a variable that could not be substituted for. E: Illegal ... command Self-explanatory. Check the input script syntax and compare to the documentation for the command. You can use -echo screen as a command-line option when running LAMMPS to see the offending line. E: Cannot use include command within an if command Self-explanatory. E: Cannot open logfile %s The LAMMPS log file specified in the input script cannot be opened. Check that the path and name are correct. E: Cannot open print file %s Self-explanatory. E: Angle_coeff command before simulation box is defined The angle_coeff command cannot be used before a read_data, read_restart, or create_box command. E: Angle_coeff command before angle_style is defined Coefficients cannot be set in the data file or via the angle_coeff command until an angle_style has been assigned. E: Angle_coeff command when no angles allowed The chosen atom style does not allow for angles to be defined. E: Angle_style command when no angles allowed The chosen atom style does not allow for angles to be defined. E: Atom_style command after simulation box is defined The atom_style command cannot be used after a read_data, read_restart, or create_box command. E: Bond_coeff command before simulation box is defined The bond_coeff command cannot be used before a read_data, read_restart, or create_box command. E: Bond_coeff command before bond_style is defined Coefficients cannot be set in the data file or via the bond_coeff command until an bond_style has been assigned. E: Bond_coeff command when no bonds allowed The chosen atom style does not allow for bonds to be defined. E: Bond_style command when no bonds allowed The chosen atom style does not allow for bonds to be defined. E: Boundary command after simulation box is defined The boundary command cannot be used after a read_data, read_restart, or create_box command. E: Box command after simulation box is defined The box command cannot be used after a read_data, read_restart, or create_box command. E: Dihedral_coeff command before simulation box is defined The dihedral_coeff command cannot be used before a read_data, read_restart, or create_box command. E: Dihedral_coeff command before dihedral_style is defined Coefficients cannot be set in the data file or via the dihedral_coeff command until an dihedral_style has been assigned. E: Dihedral_coeff command when no dihedrals allowed The chosen atom style does not allow for dihedrals to be defined. E: Dihedral_style command when no dihedrals allowed The chosen atom style does not allow for dihedrals to be defined. E: Dimension command after simulation box is defined The dimension command cannot be used after a read_data, read_restart, or create_box command. E: Improper_coeff command before simulation box is defined The improper_coeff command cannot be used before a read_data, read_restart, or create_box command. E: Improper_coeff command before improper_style is defined Coefficients cannot be set in the data file or via the improper_coeff command until an improper_style has been assigned. E: Improper_coeff command when no impropers allowed The chosen atom style does not allow for impropers to be defined. E: Improper_style command when no impropers allowed The chosen atom style does not allow for impropers to be defined. E: KSpace style has not yet been set Cannot use kspace_modify command until a kspace style is set. E: Mass command before simulation box is defined The mass command cannot be used before a read_data, read_restart, or create_box command. E: Min_style command before simulation box is defined The min_style command cannot be used before a read_data, read_restart, or create_box command. E: Newton bond change after simulation box is defined The newton command cannot be used to change the newton bond value after a read_data, read_restart, or create_box command. E: Package command after simulation box is defined The package command cannot be used afer a read_data, read_restart, or create_box command. E: Package cuda command without USER-CUDA package enabled The USER-CUDA package must be installed via "make yes-user-cuda" before LAMMPS is built, and the "-c on" must be used to enable the package. E: Package gpu command without GPU package installed The GPU package must be installed via "make yes-gpu" before LAMMPS is built. E: Package kokkos command without KOKKOS package enabled The KOKKOS package must be installed via "make yes-kokkos" before LAMMPS is built, and the "-k on" must be used to enable the package. E: Package omp command without USER-OMP package installed The USER-OMP package must be installed via "make yes-user-omp" before LAMMPS is built. E: Package intel command without USER-INTEL package installed The USER-INTEL package must be installed via "make yes-user-intel" before LAMMPS is built. E: Pair_coeff command before simulation box is defined The pair_coeff command cannot be used before a read_data, read_restart, or create_box command. E: Pair_coeff command before pair_style is defined Self-explanatory. E: Pair_modify command before pair_style is defined Self-explanatory. E: Pair_write command before pair_style is defined Self-explanatory. E: Processors command after simulation box is defined The processors command cannot be used after a read_data, read_restart, or create_box command. E: Run_style command before simulation box is defined The run_style command cannot be used before a read_data, read_restart, or create_box command. E: Units command after simulation box is defined The units command cannot be used after a read_data, read_restart, or create_box command. */ diff --git a/src/modify.h b/src/modify.h index be2fe286f..7c11714a7 100644 --- a/src/modify.h +++ b/src/modify.h @@ -1,230 +1,231 @@ /* -*- c++ -*- ---------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ #ifndef LMP_MODIFY_H #define LMP_MODIFY_H #include "stdio.h" #include "pointers.h" #include #include namespace LAMMPS_NS { class Modify : protected Pointers { + friend class Info; public: int nfix,maxfix; int n_initial_integrate,n_post_integrate,n_pre_exchange,n_pre_neighbor; int n_pre_force,n_post_force; int n_final_integrate,n_end_of_step,n_thermo_energy; int n_initial_integrate_respa,n_post_integrate_respa; int n_pre_force_respa,n_post_force_respa,n_final_integrate_respa; int n_min_pre_exchange,n_min_pre_neighbor; int n_min_pre_force,n_min_post_force,n_min_energy; int restart_pbc_any; // 1 if any fix sets restart_pbc int nfix_restart_global; // stored fix global info from restart file int nfix_restart_peratom; // stored fix peratom info from restart file class Fix **fix; // list of fixes int *fmask; // bit mask for when each fix is applied int ncompute,maxcompute; // list of computes class Compute **compute; Modify(class LAMMPS *); virtual ~Modify(); virtual void init(); virtual void setup(int); virtual void setup_pre_exchange(); virtual void setup_pre_neighbor(); virtual void setup_pre_force(int); virtual void initial_integrate(int); virtual void post_integrate(); virtual void pre_exchange(); virtual void pre_neighbor(); virtual void pre_force(int); virtual void post_force(int); virtual void final_integrate(); virtual void end_of_step(); virtual double thermo_energy(); virtual void post_run(); virtual void create_attribute(int); virtual void setup_pre_force_respa(int, int); virtual void initial_integrate_respa(int, int, int); virtual void post_integrate_respa(int, int); virtual void pre_force_respa(int, int, int); virtual void post_force_respa(int, int, int); virtual void final_integrate_respa(int, int); virtual void min_pre_exchange(); virtual void min_pre_neighbor(); virtual void min_pre_force(int); virtual void min_post_force(int); virtual double min_energy(double *); virtual void min_store(); virtual void min_step(double, double *); virtual void min_clearstore(); virtual void min_pushstore(); virtual void min_popstore(); virtual double max_alpha(double *); virtual int min_dof(); virtual int min_reset_ref(); void add_fix(int, char **, int trysuffix=0); void modify_fix(int, char **); void delete_fix(const char *); int find_fix(const char *); int check_package(const char *); void add_compute(int, char **, int trysuffix=0); void modify_compute(int, char **); void delete_compute(const char *); int find_compute(const char *); void clearstep_compute(); void addstep_compute(bigint); void addstep_compute_all(bigint); void write_restart(FILE *); int read_restart(FILE *); void restart_deallocate(); bigint memory_usage(); protected: // lists of fixes to apply at different stages of timestep int *list_initial_integrate,*list_post_integrate; int *list_pre_exchange,*list_pre_neighbor; int *list_pre_force,*list_post_force; int *list_final_integrate,*list_end_of_step,*list_thermo_energy; int *list_initial_integrate_respa,*list_post_integrate_respa; int *list_pre_force_respa,*list_post_force_respa; int *list_final_integrate_respa; int *list_min_pre_exchange,*list_min_pre_neighbor; int *list_min_pre_force,*list_min_post_force; int *list_min_energy; int *end_of_step_every; int n_timeflag; // list of computes that store time invocation int *list_timeflag; char **id_restart_global; // stored fix global info char **style_restart_global; // from read-in restart file char **state_restart_global; char **id_restart_peratom; // stored fix peratom info char **style_restart_peratom; // from read-in restart file int *index_restart_peratom; int index_permanent; // fix/compute index returned to library call void list_init(int, int &, int *&); void list_init_end_of_step(int, int &, int *&); void list_init_thermo_energy(int, int &, int *&); void list_init_dofflag(int &, int *&); void list_init_compute(); - private: + protected: typedef Compute *(*ComputeCreator)(LAMMPS *, int, char **); std::map *compute_map; typedef Fix *(*FixCreator)(LAMMPS *, int, char **); std::map *fix_map; template static Compute *compute_creator(LAMMPS *, int, char **); template static Fix *fix_creator(LAMMPS *, int, char **); }; } #endif /* ERROR/WARNING messages: E: Fix %s does not allow use of dynamic group Dynamic groups have not yet been enabled for this fix. E: Compute %s does not allow use of dynamic group Dynamic groups have not yet been enabled for this compute. W: One or more atoms are time integrated more than once This is probably an error since you typically do not want to advance the positions or velocities of an atom more than once per timestep. E: Illegal ... command Self-explanatory. Check the input script syntax and compare to the documentation for the command. You can use -echo screen as a command-line option when running LAMMPS to see the offending line. E: Fix command before simulation box is defined The fix command cannot be used before a read_data, read_restart, or create_box command. E: Could not find fix group ID A group ID used in the fix command does not exist. E: Replacing a fix, but new style != old style A fix ID can be used a 2nd time, but only if the style matches the previous fix. In this case it is assumed you with to reset a fix's parameters. This error may mean you are mistakenly re-using a fix ID when you do not intend to. W: Replacing a fix, but new group != old group The ID and style of a fix match for a fix you are changing with a fix command, but the new group you are specifying does not match the old group. E: Unknown fix style The choice of fix style is unknown. E: Could not find fix_modify ID A fix ID used in the fix_modify command does not exist. E: Could not find fix ID to delete Self-explanatory. E: Reuse of compute ID A compute ID cannot be used twice. E: Unknown compute style The choice of compute style is unknown. E: Could not find compute_modify ID Self-explanatory. E: Could not find compute ID to delete Self-explanatory. */ diff --git a/src/pair.h b/src/pair.h index d1a41fbee..bc4db091f 100644 --- a/src/pair.h +++ b/src/pair.h @@ -1,340 +1,341 @@ /* -*- c++ -*- ---------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ #ifndef LMP_PAIR_H #define LMP_PAIR_H #include "pointers.h" #include "accelerator_kokkos.h" namespace LAMMPS_NS { class Pair : protected Pointers { friend class AngleSDK; friend class AngleSDKOMP; friend class BondQuartic; friend class BondQuarticOMP; friend class DihedralCharmm; friend class DihedralCharmmOMP; friend class FixGPU; friend class FixOMP; friend class ThrOMP; + friend class Info; public: static int instance_total; // # of Pair classes ever instantiated double eng_vdwl,eng_coul; // accumulated energies double virial[6]; // accumulated virial double *eatom,**vatom; // accumulated per-atom energy/virial double cutforce; // max cutoff for all atom pairs double **cutsq; // cutoff sq for each atom pair int **setflag; // 0/1 = whether each i,j has been set int comm_forward; // size of forward communication (0 if none) int comm_reverse; // size of reverse communication (0 if none) int comm_reverse_off; // size of reverse comm even if newton off int single_enable; // 1 if single() routine exists int restartinfo; // 1 if pair style writes restart info int respa_enable; // 1 if inner/middle/outer rRESPA routines int one_coeff; // 1 if allows only one coeff * * call int manybody_flag; // 1 if a manybody potential int no_virial_fdotr_compute; // 1 if does not invoke virial_fdotr_compute() int writedata; // 1 if writes coeffs to data file int ghostneigh; // 1 if pair style needs neighbors of ghosts double **cutghost; // cutoff for each ghost pair int ewaldflag; // 1 if compatible with Ewald solver int pppmflag; // 1 if compatible with PPPM solver int msmflag; // 1 if compatible with MSM solver int dispersionflag; // 1 if compatible with LJ/dispersion solver int tip4pflag; // 1 if compatible with TIP4P solver int dipoleflag; // 1 if compatible with dipole solver int reinitflag; // 1 if compatible with fix adapt and alike int tail_flag; // pair_modify flag for LJ tail correction double etail,ptail; // energy/pressure tail corrections double etail_ij,ptail_ij; int evflag; // energy,virial settings int eflag_either,eflag_global,eflag_atom; int vflag_either,vflag_global,vflag_atom; int ncoultablebits; // size of Coulomb table, accessed by KSpace int ndisptablebits; // size of dispersion table double tabinnersq; double tabinnerdispsq; double *rtable,*drtable,*ftable,*dftable,*ctable,*dctable; double *etable,*detable,*ptable,*dptable,*vtable,*dvtable; double *rdisptable, *drdisptable, *fdisptable, *dfdisptable; double *edisptable, *dedisptable; int ncoulshiftbits,ncoulmask; int ndispshiftbits, ndispmask; int nextra; // # of extra quantities pair style calculates double *pvector; // vector of extra pair quantities int single_extra; // number of extra single values calculated double *svector; // vector of extra single quantities class NeighList *list; // standard neighbor list used by most pairs class NeighList *listhalf; // half list used by some pairs class NeighList *listfull; // full list used by some pairs class NeighList *listgranhistory; // granular history list used by some pairs class NeighList *listinner; // rRESPA lists used by some pairs class NeighList *listmiddle; class NeighList *listouter; unsigned int datamask; unsigned int datamask_ext; int compute_flag; // 0 if skip compute() // KOKKOS host/device flag and data masks ExecutionSpace execution_space; unsigned int datamask_read,datamask_modify; Pair(class LAMMPS *); virtual ~Pair(); // top-level Pair methods void init(); virtual void reinit(); double mix_energy(double, double, double, double); double mix_distance(double, double); void write_file(int, char **); void init_bitmap(double, double, int, int &, int &, int &, int &); virtual void modify_params(int, char **); void compute_dummy(int, int); // need to be public, so can be called by pair_style reaxc void v_tally(int, double *, double *); void ev_tally(int, int, int, int, double, double, double, double, double, double); void ev_tally3(int, int, int, double, double, double *, double *, double *, double *); void v_tally3(int, int, int, double *, double *, double *, double *); void v_tally4(int, int, int, int, double *, double *, double *, double *, double *, double *); void ev_tally_xyz(int, int, int, int, double, double, double, double, double, double, double, double); // general child-class methods virtual void compute(int, int) = 0; virtual void compute_inner() {} virtual void compute_middle() {} virtual void compute_outer(int, int) {} virtual double single(int, int, int, int, double, double, double, double& fforce) { fforce = 0.0; return 0.0; } virtual void settings(int, char **) = 0; virtual void coeff(int, char **) = 0; virtual void init_style(); virtual void init_list(int, class NeighList *); virtual double init_one(int, int) {return 0.0;} virtual void init_tables(double, double *); virtual void init_tables_disp(double); virtual void free_tables(); virtual void free_disp_tables(); virtual void write_restart(FILE *) {} virtual void read_restart(FILE *) {} virtual void write_restart_settings(FILE *) {} virtual void read_restart_settings(FILE *) {} virtual void write_data(FILE *) {} virtual void write_data_all(FILE *) {} virtual int pack_forward_comm(int, int *, double *, int, int *) {return 0;} virtual void unpack_forward_comm(int, int, double *) {} virtual int pack_forward_comm_kokkos(int, DAT::tdual_int_2d, int, DAT::tdual_xfloat_1d&, int, int *) {return 0;}; virtual void unpack_forward_comm_kokkos(int, int, DAT::tdual_xfloat_1d&) {} virtual int pack_reverse_comm(int, int, double *) {return 0;} virtual void unpack_reverse_comm(int, int *, double *) {} virtual double memory_usage(); // specific child-class methods for certain Pair styles virtual void *extract(const char *, int &) {return NULL;} virtual void swap_eam(double *, double **) {} virtual void reset_dt() {} virtual void min_xf_pointers(int, double **, double **) {} virtual void min_xf_get(int) {} virtual void min_x_set(int) {} virtual unsigned int data_mask() {return datamask;} virtual unsigned int data_mask_ext() {return datamask_ext;} // management of callbacks to be run from ev_tally() protected: int num_tally_compute; class Compute **list_tally_compute; public: void add_tally_callback(class Compute *); void del_tally_callback(class Compute *); protected: int instance_me; // which Pair class instantiation I am enum{GEOMETRIC,ARITHMETIC,SIXTHPOWER}; // mixing options int special_lj[4]; // copied from force->special_lj for Kokkos int allocated; // 0/1 = whether arrays are allocated int suffix_flag; // suffix compatibility flag // pair_modify settings int offset_flag,mix_flag; // flags for offset and mixing double tabinner; // inner cutoff for Coulomb table double tabinner_disp; // inner cutoff for dispersion table // custom data type for accessing Coulomb tables typedef union {int i; float f;} union_int_float_t; double THIRD; int vflag_fdotr; int maxeatom,maxvatom; int copymode; // if set, do not deallocate during destruction // required when classes are used as functors by Kokkos virtual void ev_setup(int, int); void ev_unset(); void ev_tally_full(int, double, double, double, double, double, double); void ev_tally_xyz_full(int, double, double, double, double, double, double, double, double); void ev_tally4(int, int, int, int, double, double *, double *, double *, double *, double *, double *); void ev_tally_tip4p(int, int *, double *, double, double); void v_tally2(int, int, double, double *); void v_tally_tensor(int, int, int, int, double, double, double, double, double, double); void virial_fdotr_compute(); inline int sbmask(int j) { return j >> SBBITS & 3; } }; } #endif /* ERROR/WARNING messages: E: Illegal ... command Self-explanatory. Check the input script syntax and compare to the documentation for the command. You can use -echo screen as a command-line option when running LAMMPS to see the offending line. E: Too many total bits for bitmapped lookup table Table size specified via pair_modify command is too large. Note that a value of N generates a 2^N size table. E: Cannot have both pair_modify shift and tail set to yes These 2 options are contradictory. E: Cannot use pair tail corrections with 2d simulations The correction factors are only currently defined for 3d systems. W: Using pair tail corrections with nonperiodic system This is probably a bogus thing to do, since tail corrections are computed by integrating the density of a periodic system out to infinity. W: Using a manybody potential with bonds/angles/dihedrals and special_bond exclusions This is likely not what you want to do. The exclusion settings will eliminate neighbors in the neighbor list, which the manybody potential needs to calculated its terms correctly. E: All pair coeffs are not set All pair coefficients must be set in the data file or by the pair_coeff command before running a simulation. E: Fix adapt interface to this pair style not supported New coding for the pair style would need to be done. E: Pair style requires a KSpace style No kspace style is defined. E: Pair style does not support pair_write The pair style does not have a single() function, so it can not be invoked by pair write. E: Invalid atom types in pair_write command Atom types must range from 1 to Ntypes inclusive. E: Invalid style in pair_write command Self-explanatory. Check the input script. E: Invalid cutoffs in pair_write command Inner cutoff must be larger than 0.0 and less than outer cutoff. E: Cannot open pair_write file The specified output file for pair energies and forces cannot be opened. Check that the path and name are correct. E: Bitmapped lookup tables require int/float be same size Cannot use pair tables on this machine, because of word sizes. Use the pair_modify command with table 0 instead. W: Table inner cutoff >= outer cutoff You specified an inner cutoff for a Coulombic table that is longer than the global cutoff. Probably not what you wanted. E: Too many exponent bits for lookup table Table size specified via pair_modify command does not work with your machine's floating point representation. E: Too many mantissa bits for lookup table Table size specified via pair_modify command does not work with your machine's floating point representation. E: Too few bits for lookup table Table size specified via pair_modify command does not work with your machine's floating point representation. */ diff --git a/src/pair_hybrid.cpp b/src/pair_hybrid.cpp index 490b3a5b1..7ca6c0fb8 100644 --- a/src/pair_hybrid.cpp +++ b/src/pair_hybrid.cpp @@ -1,978 +1,979 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ #include "math.h" #include "stdlib.h" #include "string.h" #include "ctype.h" #include "pair_hybrid.h" #include "atom.h" #include "force.h" #include "pair.h" #include "neighbor.h" #include "neigh_request.h" #include "update.h" #include "comm.h" #include "memory.h" #include "error.h" #include "respa.h" using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ PairHybrid::PairHybrid(LAMMPS *lmp) : Pair(lmp) { nstyles = 0; styles = NULL; keywords = NULL; multiple = NULL; special_lj = NULL; special_coul = NULL; outerflag = 0; respaflag = 0; } /* ---------------------------------------------------------------------- */ PairHybrid::~PairHybrid() { if (nstyles) { for (int m = 0; m < nstyles; m++) { delete styles[m]; delete [] keywords[m]; if (special_lj[m]) delete [] special_lj[m]; if (special_coul[m]) delete [] special_coul[m]; } } delete [] styles; delete [] keywords; delete [] multiple; delete [] special_lj; delete [] special_coul; delete [] svector; if (allocated) { memory->destroy(setflag); memory->destroy(cutsq); memory->destroy(cutghost); memory->destroy(nmap); memory->destroy(map); } } /* ---------------------------------------------------------------------- call each sub-style's compute() or compute_outer() function accumulate sub-style global/peratom energy/virial in hybrid for global vflag = 1: each sub-style computes own virial[6] sum sub-style virial[6] to hybrid's virial[6] for global vflag = 2: call sub-style with adjusted vflag to prevent it calling virial_fdotr_compute() hybrid calls virial_fdotr_compute() on final accumulated f ------------------------------------------------------------------------- */ void PairHybrid::compute(int eflag, int vflag) { int i,j,m,n; // if no_virial_fdotr_compute is set and global component of // incoming vflag = 2, then // reset vflag as if global component were 1 // necessary since one or more sub-styles cannot compute virial as F dot r if (no_virial_fdotr_compute && vflag % 4 == 2) vflag = 1 + vflag/4 * 4; if (eflag || vflag) ev_setup(eflag,vflag); else evflag = vflag_fdotr = eflag_global = vflag_global = eflag_atom = vflag_atom = 0; // check if global component of incoming vflag = 2 // if so, reset vflag passed to substyle as if it were 0 // necessary so substyle will not invoke virial_fdotr_compute() int vflag_substyle; if (vflag % 4 == 2) vflag_substyle = vflag/4 * 4; else vflag_substyle = vflag; double *saved_special = save_special(); // check if we are running with r-RESPA using the hybrid keyword Respa *respa = NULL; respaflag = 0; if (strstr(update->integrate_style,"respa")) { respa = (Respa *) update->integrate; if (respa->nhybrid_styles > 0) respaflag = 1; } for (m = 0; m < nstyles; m++) { set_special(m); if (!respaflag || (respaflag && respa->hybrid_compute[m])) { // invoke compute() unless compute flag is turned off or // outerflag is set and sub-style has a compute_outer() method if (styles[m]->compute_flag == 0) continue; if (outerflag && styles[m]->respa_enable) styles[m]->compute_outer(eflag,vflag_substyle); else styles[m]->compute(eflag,vflag_substyle); } restore_special(saved_special); // jump to next sub-style if r-RESPA does not want global accumulated data if (respaflag && !respa->tally_global) continue; if (eflag_global) { eng_vdwl += styles[m]->eng_vdwl; eng_coul += styles[m]->eng_coul; } if (vflag_global) { for (n = 0; n < 6; n++) virial[n] += styles[m]->virial[n]; } if (eflag_atom) { n = atom->nlocal; if (force->newton_pair) n += atom->nghost; double *eatom_substyle = styles[m]->eatom; for (i = 0; i < n; i++) eatom[i] += eatom_substyle[i]; } if (vflag_atom) { n = atom->nlocal; if (force->newton_pair) n += atom->nghost; double **vatom_substyle = styles[m]->vatom; for (i = 0; i < n; i++) for (j = 0; j < 6; j++) vatom[i][j] += vatom_substyle[i][j]; } } delete [] saved_special; if (vflag_fdotr) virial_fdotr_compute(); } /* ---------------------------------------------------------------------- */ void PairHybrid::compute_inner() { for (int m = 0; m < nstyles; m++) if (styles[m]->respa_enable) styles[m]->compute_inner(); } /* ---------------------------------------------------------------------- */ void PairHybrid::compute_middle() { for (int m = 0; m < nstyles; m++) if (styles[m]->respa_enable) styles[m]->compute_middle(); } /* ---------------------------------------------------------------------- */ void PairHybrid::compute_outer(int eflag, int vflag) { outerflag = 1; compute(eflag,vflag); outerflag = 0; } /* ---------------------------------------------------------------------- allocate all arrays ------------------------------------------------------------------------- */ void PairHybrid::allocate() { allocated = 1; int n = atom->ntypes; memory->create(setflag,n+1,n+1,"pair:setflag"); for (int i = 1; i <= n; i++) for (int j = i; j <= n; j++) setflag[i][j] = 0; memory->create(cutsq,n+1,n+1,"pair:cutsq"); memory->create(cutghost,n+1,n+1,"pair:cutghost"); memory->create(nmap,n+1,n+1,"pair:nmap"); memory->create(map,n+1,n+1,nstyles,"pair:map"); for (int i = 1; i <= n; i++) for (int j = i; j <= n; j++) nmap[i][j] = 0; } /* ---------------------------------------------------------------------- create one pair style for each arg in list ------------------------------------------------------------------------- */ void PairHybrid::settings(int narg, char **arg) { if (narg < 1) error->all(FLERR,"Illegal pair_style command"); // delete old lists, since cannot just change settings if (nstyles) { for (int m = 0; m < nstyles; m++) delete styles[m]; delete [] styles; for (int m = 0; m < nstyles; m++) delete [] keywords[m]; delete [] keywords; } if (allocated) { memory->destroy(setflag); memory->destroy(cutsq); memory->destroy(cutghost); memory->destroy(nmap); memory->destroy(map); } allocated = 0; // allocate list of sub-styles as big as possibly needed if no extra args styles = new Pair*[narg]; keywords = new char*[narg]; multiple = new int[narg]; special_lj = new double*[narg]; special_coul = new double*[narg]; // allocate each sub-style // allocate uses suffix, but don't store suffix version in keywords, // else syntax in coeff() will not match // call settings() with set of args that are not pair style names // use force->pair_map to determine which args these are int iarg,jarg,dummy; iarg = 0; nstyles = 0; while (iarg < narg) { if (strcmp(arg[iarg],"hybrid") == 0) error->all(FLERR,"Pair style hybrid cannot have hybrid as an argument"); if (strcmp(arg[iarg],"none") == 0) error->all(FLERR,"Pair style hybrid cannot have none as an argument"); styles[nstyles] = force->new_pair(arg[iarg],1,dummy); force->store_style(keywords[nstyles],arg[iarg],0); special_lj[nstyles] = special_coul[nstyles] = NULL; jarg = iarg + 1; while (jarg < narg && !force->pair_map->count(arg[jarg])) jarg++; styles[nstyles]->settings(jarg-iarg-1,&arg[iarg+1]); iarg = jarg; nstyles++; } // multiple[i] = 1 to M if sub-style used multiple times, else 0 for (int i = 0; i < nstyles; i++) { int count = 0; for (int j = 0; j < nstyles; j++) { if (strcmp(keywords[j],keywords[i]) == 0) count++; if (j == i) multiple[i] = count; } if (count == 1) multiple[i] = 0; } // set pair flags from sub-style flags flags(); } /* ---------------------------------------------------------------------- set top-level pair flags from sub-style flags ------------------------------------------------------------------------- */ void PairHybrid::flags() { int m; // set comm_forward, comm_reverse, comm_reverse_off to max of any sub-style for (m = 0; m < nstyles; m++) { if (styles[m]) comm_forward = MAX(comm_forward,styles[m]->comm_forward); if (styles[m]) comm_reverse = MAX(comm_reverse,styles[m]->comm_reverse); if (styles[m]) comm_reverse_off = MAX(comm_reverse_off, styles[m]->comm_reverse_off); } // single_enable = 1 if any sub-style is set // respa_enable = 1 if any sub-style is set // manybody_flag = 1 if any sub-style is set // no_virial_fdotr_compute = 1 if any sub-style is set // ghostneigh = 1 if any sub-style is set - // ewaldflag, pppmflag, msmflag, dispersionflag, tip4pflag = 1 + // ewaldflag, pppmflag, msmflag, dipoleflag, dispersionflag, tip4pflag = 1 // if any sub-style is set // compute_flag = 1 if any sub-style is set single_enable = 0; compute_flag = 0; for (m = 0; m < nstyles; m++) { if (styles[m]->single_enable) single_enable = 1; if (styles[m]->respa_enable) respa_enable = 1; if (styles[m]->manybody_flag) manybody_flag = 1; if (styles[m]->no_virial_fdotr_compute) no_virial_fdotr_compute = 1; if (styles[m]->ghostneigh) ghostneigh = 1; if (styles[m]->ewaldflag) ewaldflag = 1; if (styles[m]->pppmflag) pppmflag = 1; if (styles[m]->msmflag) msmflag = 1; + if (styles[m]->dipoleflag) dipoleflag = 1; if (styles[m]->dispersionflag) dispersionflag = 1; if (styles[m]->tip4pflag) tip4pflag = 1; if (styles[m]->compute_flag) compute_flag = 1; } // single_extra = min of all sub-style single_extra // allocate svector single_extra = styles[0]->single_extra; for (m = 1; m < nstyles; m++) single_extra = MIN(single_extra,styles[m]->single_extra); if (single_extra) { delete [] svector; svector = new double[single_extra]; } } /* ---------------------------------------------------------------------- set coeffs for one or more type pairs ------------------------------------------------------------------------- */ void PairHybrid::coeff(int narg, char **arg) { if (narg < 3) error->all(FLERR,"Incorrect args for pair coefficients"); if (!allocated) allocate(); int ilo,ihi,jlo,jhi; force->bounds(arg[0],atom->ntypes,ilo,ihi); force->bounds(arg[1],atom->ntypes,jlo,jhi); // 3rd arg = pair sub-style name // 4th arg = pair sub-style index if name used multiple times // allow for "none" as valid sub-style name int multflag; int m; for (m = 0; m < nstyles; m++) { multflag = 0; if (strcmp(arg[2],keywords[m]) == 0) { if (multiple[m]) { multflag = 1; if (narg < 4) error->all(FLERR,"Incorrect args for pair coefficients"); if (!isdigit(arg[3][0])) error->all(FLERR,"Incorrect args for pair coefficients"); int index = force->inumeric(FLERR,arg[3]); if (index == multiple[m]) break; else continue; } else break; } } int none = 0; if (m == nstyles) { if (strcmp(arg[2],"none") == 0) none = 1; else error->all(FLERR,"Pair coeff for hybrid has invalid style"); } // move 1st/2nd args to 2nd/3rd args // if multflag: move 1st/2nd args to 3rd/4th args // just copy ptrs, since arg[] points into original input line arg[2+multflag] = arg[1]; arg[1+multflag] = arg[0]; // invoke sub-style coeff() starting with 1st remaining arg if (!none) styles[m]->coeff(narg-1-multflag,&arg[1+multflag]); // if sub-style only allows one pair coeff call (with * * and type mapping) // then unset setflag/map assigned to that style before setting it below // in case pair coeff for this sub-style is being called for 2nd time if (!none && styles[m]->one_coeff) for (int i = 1; i <= atom->ntypes; i++) for (int j = i; j <= atom->ntypes; j++) if (nmap[i][j] && map[i][j][0] == m) { setflag[i][j] = 0; nmap[i][j] = 0; } // set setflag and which type pairs map to which sub-style // if sub-style is none: set hybrid setflag, wipe out map // else: set hybrid setflag & map only if substyle setflag is set // previous mappings are wiped out int count = 0; for (int i = ilo; i <= ihi; i++) { for (int j = MAX(jlo,i); j <= jhi; j++) { if (none) { setflag[i][j] = 1; nmap[i][j] = 0; count++; } else if (styles[m]->setflag[i][j]) { setflag[i][j] = 1; nmap[i][j] = 1; map[i][j][0] = m; count++; } } } if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients"); } /* ---------------------------------------------------------------------- init specific to this pair style ------------------------------------------------------------------------- */ void PairHybrid::init_style() { int i,m,itype,jtype,used,istyle,skip; // error if a sub-style is not used int ntypes = atom->ntypes; for (istyle = 0; istyle < nstyles; istyle++) { used = 0; for (itype = 1; itype <= ntypes; itype++) for (jtype = itype; jtype <= ntypes; jtype++) for (m = 0; m < nmap[itype][jtype]; m++) if (map[itype][jtype][m] == istyle) used = 1; if (used == 0) error->all(FLERR,"Pair hybrid sub-style is not used"); } // check if special_lj/special_coul overrides are compatible for (istyle = 0; istyle < nstyles; istyle++) { if (special_lj[istyle]) { for (i = 1; i < 4; ++i) { if (((force->special_lj[i] == 0.0) || (force->special_lj[i] == 1.0)) && (force->special_lj[i] != special_lj[istyle][i])) error->all(FLERR,"Pair_modify special setting incompatible with" " global special_bonds setting"); } } if (special_coul[istyle]) { for (i = 1; i < 4; ++i) { if (((force->special_coul[i] == 0.0) || (force->special_coul[i] == 1.0)) && (force->special_coul[i] != special_coul[istyle][i])) error->all(FLERR,"Pair_modify special setting incompatible with" "global special_bonds setting"); } } } // each sub-style makes its neighbor list request(s) for (istyle = 0; istyle < nstyles; istyle++) styles[istyle]->init_style(); // create skip lists for each pair neigh request // any kind of list can have its skip flag set at this stage for (i = 0; i < neighbor->nrequest; i++) { if (!neighbor->requests[i]->pair) continue; // istyle = associated sub-style for that request for (istyle = 0; istyle < nstyles; istyle++) if (styles[istyle] == neighbor->requests[i]->requestor) break; // allocate iskip and ijskip // initialize so as to skip all pair types // set ijskip = 0 if type pair matches any entry in sub-style map // set ijskip = 0 if mixing will assign type pair to this sub-style // will occur if type pair is currently unassigned // and both I,I and J,J are assigned to single sub-style // and sub-style for both I,I and J,J match istyle // set iskip = 1 only if all ijskip for itype are 1 int *iskip = new int[ntypes+1]; int **ijskip; memory->create(ijskip,ntypes+1,ntypes+1,"pair_hybrid:ijskip"); for (itype = 1; itype <= ntypes; itype++) for (jtype = 1; jtype <= ntypes; jtype++) ijskip[itype][jtype] = 1; for (itype = 1; itype <= ntypes; itype++) for (jtype = itype; jtype <= ntypes; jtype++) { for (m = 0; m < nmap[itype][jtype]; m++) if (map[itype][jtype][m] == istyle) ijskip[itype][jtype] = ijskip[jtype][itype] = 0; if (nmap[itype][jtype] == 0 && nmap[itype][itype] == 1 && map[itype][itype][0] == istyle && nmap[jtype][jtype] == 1 && map[jtype][jtype][0] == istyle) ijskip[itype][jtype] = ijskip[jtype][itype] = 0; } for (itype = 1; itype <= ntypes; itype++) { iskip[itype] = 1; for (jtype = 1; jtype <= ntypes; jtype++) if (ijskip[itype][jtype] == 0) iskip[itype] = 0; } // if any skipping occurs // set request->skip and copy iskip and ijskip into request // else delete iskip and ijskip skip = 0; for (itype = 1; itype <= ntypes; itype++) for (jtype = 1; jtype <= ntypes; jtype++) if (ijskip[itype][jtype] == 1) skip = 1; if (skip) { neighbor->requests[i]->skip = 1; neighbor->requests[i]->iskip = iskip; neighbor->requests[i]->ijskip = ijskip; } else { delete [] iskip; memory->destroy(ijskip); } } // combine sub-style neigh list requests and create new ones if needed modify_requests(); } /* ---------------------------------------------------------------------- init for one type pair i,j and corresponding j,i ------------------------------------------------------------------------- */ double PairHybrid::init_one(int i, int j) { // if I,J is not set explicitly: // perform mixing only if I,I sub-style = J,J sub-style // also require I,I and J,J are both assigned to single sub-style if (setflag[i][j] == 0) { if (nmap[i][i] != 1 || nmap[j][j] != 1 || map[i][i][0] != map[j][j][0]) error->one(FLERR,"All pair coeffs are not set"); nmap[i][j] = 1; map[i][j][0] = map[i][i][0]; } // call init/mixing for all sub-styles of I,J // set cutsq in sub-style just as Pair::init() does via call to init_one() // set cutghost for I,J and J,I just as sub-style does // sum tail corrections for I,J // return max cutoff of all sub-styles assigned to I,J // if no sub-styles assigned to I,J (pair_coeff none), cutmax = 0.0 returned double cutmax = 0.0; cutghost[i][j] = cutghost[j][i] = 0.0; if (tail_flag) etail_ij = ptail_ij = 0.0; nmap[j][i] = nmap[i][j]; for (int k = 0; k < nmap[i][j]; k++) { map[j][i][k] = map[i][j][k]; double cut = styles[map[i][j][k]]->init_one(i,j); styles[map[i][j][k]]->cutsq[i][j] = styles[map[i][j][k]]->cutsq[j][i] = cut*cut; if (styles[map[i][j][k]]->ghostneigh) cutghost[i][j] = cutghost[j][i] = MAX(cutghost[i][j],styles[map[i][j][k]]->cutghost[i][j]); if (tail_flag) { etail_ij += styles[map[i][j][k]]->etail_ij; ptail_ij += styles[map[i][j][k]]->ptail_ij; } cutmax = MAX(cutmax,cut); } return cutmax; } /* ---------------------------------------------------------------------- combine sub-style neigh list requests and create new ones if needed ------------------------------------------------------------------------- */ void PairHybrid::modify_requests() { int i,j; NeighRequest *irq,*jrq; // loop over pair requests only // if list is skip list and not copy, look for non-skip list of same kind // if one exists, point at that one via otherlist // else make new non-skip request of same kind and point at that one // don't bother to set ID for new request, since pair hybrid ignores list // only exception is half_from_full: // ignore it, turn off skip, since it will derive from its skip parent // after possible new request creation, unset skip flag and otherlist // for these derived lists: granhistory, rRESPA inner/middle // this prevents neighbor from treating them as skip lists // copy list check is for pair style = hybrid/overlay // which invokes this routine for (i = 0; i < neighbor->nrequest; i++) { if (!neighbor->requests[i]->pair) continue; irq = neighbor->requests[i]; if (irq->skip == 0 || irq->copy) continue; if (irq->half_from_full) { irq->skip = 0; continue; } for (j = 0; j < neighbor->nrequest; j++) { if (!neighbor->requests[j]->pair) continue; jrq = neighbor->requests[j]; if (irq->same_kind(jrq) && jrq->skip == 0) break; } if (j < neighbor->nrequest) irq->otherlist = j; else { int newrequest = neighbor->request(this,instance_me); neighbor->requests[newrequest]->copy_request(irq); irq->otherlist = newrequest; } if (irq->granhistory || irq->respainner || irq->respamiddle) { irq->skip = 0; irq->otherlist = -1; } } } /* ---------------------------------------------------------------------- proc 0 writes to restart file ------------------------------------------------------------------------- */ void PairHybrid::write_restart(FILE *fp) { fwrite(&nstyles,sizeof(int),1,fp); // each sub-style writes its settings, but no coeff info int n; for (int m = 0; m < nstyles; m++) { n = strlen(keywords[m]) + 1; fwrite(&n,sizeof(int),1,fp); fwrite(keywords[m],sizeof(char),n,fp); styles[m]->write_restart_settings(fp); // write out per style special settings, if present n = (special_lj[m] == NULL) ? 0 : 1; fwrite(&n,sizeof(int),1,fp); if (n) fwrite(special_lj[m],sizeof(double),4,fp); n = (special_coul[m] == NULL) ? 0 : 1; fwrite(&n,sizeof(int),1,fp); if (n) fwrite(special_coul[m],sizeof(double),4,fp); } } /* ---------------------------------------------------------------------- proc 0 reads from restart file, bcasts ------------------------------------------------------------------------- */ void PairHybrid::read_restart(FILE *fp) { int me = comm->me; if (me == 0) fread(&nstyles,sizeof(int),1,fp); MPI_Bcast(&nstyles,1,MPI_INT,0,world); // allocate list of sub-styles styles = new Pair*[nstyles]; keywords = new char*[nstyles]; multiple = new int[nstyles]; special_lj = new double*[nstyles]; special_coul = new double*[nstyles]; // each sub-style is created via new_pair() // each reads its settings, but no coeff info int n,dummy; for (int m = 0; m < nstyles; m++) { if (me == 0) fread(&n,sizeof(int),1,fp); MPI_Bcast(&n,1,MPI_INT,0,world); keywords[m] = new char[n]; if (me == 0) fread(keywords[m],sizeof(char),n,fp); MPI_Bcast(keywords[m],n,MPI_CHAR,0,world); styles[m] = force->new_pair(keywords[m],0,dummy); styles[m]->read_restart_settings(fp); // read back per style special settings, if present special_lj[m] = special_coul[m] = NULL; if (me == 0) fread(&n,sizeof(int),1,fp); MPI_Bcast(&n,1,MPI_INT,0,world); if (n > 0 ) { special_lj[m] = new double[4]; if (me == 0) fread(special_lj[m],sizeof(double),4,fp); MPI_Bcast(special_lj[m],4,MPI_DOUBLE,0,world); } if (me == 0) fread(&n,sizeof(int),1,fp); MPI_Bcast(&n,1,MPI_INT,0,world); if (n > 0 ) { special_coul[m] = new double[4]; if (me == 0) fread(special_coul[m],sizeof(double),4,fp); MPI_Bcast(special_coul[m],4,MPI_DOUBLE,0,world); } } // multiple[i] = 1 to M if sub-style used multiple times, else 0 for (int i = 0; i < nstyles; i++) { int count = 0; for (int j = 0; j < nstyles; j++) { if (strcmp(keywords[j],keywords[i]) == 0) count++; if (j == i) multiple[i] = count; } if (count == 1) multiple[i] = 0; } // set pair flags from sub-style flags flags(); } /* ---------------------------------------------------------------------- call sub-style to compute single interaction error if sub-style does not support single() call since overlay could have multiple sub-styles, sum results explicitly ------------------------------------------------------------------------- */ double PairHybrid::single(int i, int j, int itype, int jtype, double rsq, double factor_coul, double factor_lj, double &fforce) { if (nmap[itype][jtype] == 0) error->one(FLERR,"Invoked pair single on pair style none"); double fone; fforce = 0.0; double esum = 0.0; for (int m = 0; m < nmap[itype][jtype]; m++) { if (rsq < styles[map[itype][jtype][m]]->cutsq[itype][jtype]) { if (styles[map[itype][jtype][m]]->single_enable == 0) error->one(FLERR,"Pair hybrid sub-style does not support single call"); if ((special_lj[map[itype][jtype][m]] != NULL) || (special_coul[map[itype][jtype][m]] != NULL)) error->one(FLERR,"Pair hybrid single calls do not support" " per sub-style special bond values"); esum += styles[map[itype][jtype][m]]-> single(i,j,itype,jtype,rsq,factor_coul,factor_lj,fone); fforce += fone; // copy substyle extra values into hybrid's svector if (single_extra && styles[map[itype][jtype][m]]->single_extra) for (m = 0; m < single_extra; m++) svector[m] = styles[map[itype][jtype][m]]->svector[m]; } } return esum; } /* ---------------------------------------------------------------------- modify parameters of the pair style and its sub-styles ------------------------------------------------------------------------- */ void PairHybrid::modify_params(int narg, char **arg) { if (narg == 0) error->all(FLERR,"Illegal pair_modify command"); // if 1st keyword is pair, apply other keywords to one sub-style if (strcmp(arg[0],"pair") == 0) { if (narg < 2) error->all(FLERR,"Illegal pair_modify command"); int m; for (m = 0; m < nstyles; m++) if (strcmp(arg[1],keywords[m]) == 0) break; if (m == nstyles) error->all(FLERR,"Unknown pair_modify hybrid sub-style"); int iarg = 2; if (multiple[m]) { if (narg < 3) error->all(FLERR,"Illegal pair_modify command"); int multiflag = force->inumeric(FLERR,arg[2]); for (m = 0; m < nstyles; m++) if (strcmp(arg[1],keywords[m]) == 0 && multiflag == multiple[m]) break; if (m == nstyles) error->all(FLERR,"Unknown pair_modify hybrid sub-style"); iarg = 3; } // if 2nd keyword (after pair) is special: // invoke modify_special() for the sub-style if (iarg < narg && strcmp(arg[iarg],"special") == 0) { if (narg < iarg+5) error->all(FLERR,"Illegal pair_modify special command"); modify_special(m,narg-iarg,&arg[iarg+1]); iarg += 5; } // apply the remaining keywords to the base pair style itself and the // sub-style except for "pair" and "special". // the former is important for some keywords like "tail" or "compute" if (narg-iarg > 0) { Pair::modify_params(narg-iarg,&arg[iarg]); styles[m]->modify_params(narg-iarg,&arg[iarg]); } // apply all keywords to pair hybrid itself and every sub-style } else { Pair::modify_params(narg,arg); for (int m = 0; m < nstyles; m++) styles[m]->modify_params(narg,arg); } } /* ---------------------------------------------------------------------- store a local per pair style override for special_lj and special_coul ------------------------------------------------------------------------- */ void PairHybrid::modify_special(int m, int narg, char **arg) { double special[4]; int i; special[0] = 1.0; special[1] = force->numeric(FLERR,arg[1]); special[2] = force->numeric(FLERR,arg[2]); special[3] = force->numeric(FLERR,arg[3]); if (strcmp(arg[0],"lj/coul") == 0) { if (!special_lj[m]) special_lj[m] = new double[4]; if (!special_coul[m]) special_coul[m] = new double[4]; for (i = 0; i < 4; ++i) special_lj[m][i] = special_coul[m][i] = special[i]; } else if (strcmp(arg[0],"lj") == 0) { if (!special_lj[m]) special_lj[m] = new double[4]; for (i = 0; i < 4; ++i) special_lj[m][i] = special[i]; } else if (strcmp(arg[0],"coul") == 0) { if (!special_coul[m]) special_coul[m] = new double[4]; for (i = 0; i < 4; ++i) special_coul[m][i] = special[i]; } else error->all(FLERR,"Illegal pair_modify special command"); } /* ---------------------------------------------------------------------- override global special bonds settings with per substyle values ------------------------------------------------------------------------- */ void PairHybrid::set_special(int m) { int i; if (special_lj[m]) for (i = 0; i < 4; ++i) force->special_lj[i] = special_lj[m][i]; if (special_coul[m]) for (i = 0; i < 4; ++i) force->special_coul[i] = special_coul[m][i]; } /* ---------------------------------------------------------------------- store global special settings ------------------------------------------------------------------------- */ double * PairHybrid::save_special() { double *saved = new double[8]; for (int i = 0; i < 4; ++i) { saved[i] = force->special_lj[i]; saved[i+4] = force->special_coul[i]; } return saved; } /* ---------------------------------------------------------------------- restore global special settings from saved data ------------------------------------------------------------------------- */ void PairHybrid::restore_special(double *saved) { for (int i = 0; i < 4; ++i) { force->special_lj[i] = saved[i]; force->special_coul[i] = saved[i+4]; } } /* ---------------------------------------------------------------------- extract a ptr to a particular quantity stored by pair pass request thru to sub-styles return first non-NULL result except for cut_coul request for cut_coul, insure all non-NULL results are equal since required by Kspace ------------------------------------------------------------------------- */ void *PairHybrid::extract(const char *str, int &dim) { void *cutptr = NULL; void *ptr; double cutvalue = 0.0; for (int m = 0; m < nstyles; m++) { ptr = styles[m]->extract(str,dim); if (ptr && strcmp(str,"cut_coul") == 0) { double *p_newvalue = (double *) ptr; double newvalue = *p_newvalue; if (cutptr && newvalue != cutvalue) error->all(FLERR, "Coulomb cutoffs of pair hybrid sub-styles do not match"); cutptr = ptr; cutvalue = newvalue; } else if (ptr) return ptr; } if (strcmp(str,"cut_coul") == 0) return cutptr; return NULL; } /* ---------------------------------------------------------------------- */ void PairHybrid::reset_dt() { for (int m = 0; m < nstyles; m++) styles[m]->reset_dt(); } /* ---------------------------------------------------------------------- check if itype,jtype maps to sub-style ------------------------------------------------------------------------- */ int PairHybrid::check_ijtype(int itype, int jtype, char *substyle) { for (int m = 0; m < nmap[itype][jtype]; m++) if (strcmp(keywords[map[itype][jtype][m]],substyle) == 0) return 1; return 0; } /* ---------------------------------------------------------------------- memory usage of each sub-style ------------------------------------------------------------------------- */ double PairHybrid::memory_usage() { double bytes = maxeatom * sizeof(double); bytes += maxvatom*6 * sizeof(double); for (int m = 0; m < nstyles; m++) bytes += styles[m]->memory_usage(); return bytes; } diff --git a/src/reader_xyz.cpp b/src/reader_xyz.cpp index 3dbd69d41..b95d917bd 100644 --- a/src/reader_xyz.cpp +++ b/src/reader_xyz.cpp @@ -1,218 +1,224 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing author: Axel Kohlmeyer (Temple U) ------------------------------------------------------------------------- */ #include "string.h" #include "stdlib.h" #include "reader_xyz.h" #include "atom.h" #include "memory.h" #include "error.h" #include "force.h" using namespace LAMMPS_NS; #define MAXLINE 1024 // max line length in dump file enum{ID,TYPE,X,Y,Z}; /* ---------------------------------------------------------------------- */ ReaderXYZ::ReaderXYZ(LAMMPS *lmp) : Reader(lmp) { line = new char[MAXLINE]; fieldindex = NULL; nstep = 0; } /* ---------------------------------------------------------------------- */ ReaderXYZ::~ReaderXYZ() { delete [] line; memory->destroy(fieldindex); } /* ---------------------------------------------------------------------- read and return time stamp from dump file if first read reaches end-of-file, return 1 so caller can open next file only called by proc 0 ------------------------------------------------------------------------- */ int ReaderXYZ::read_time(bigint &ntimestep) { char *eof = fgets(line,MAXLINE,fp); if (eof == NULL) return 1; - int n = strlen(line); - if (n > 0) line[n-1] = '\0'; // strip newline // first line has to have the number of atoms + // truncate the string to the first whitespace, + // so force->bnumeric() does not hiccup + for (int i=0; (i < MAXLINE) && (eof[i] != '\0'); ++i) { + if (eof[i] == '\n' || eof[i] == '\r' || eof[i] == ' ' || eof[i] == '\t') { + eof[i] = '\0'; + break; + } + } natoms = force->bnumeric(FLERR,line); if (natoms < 1) error->one(FLERR,"Dump file is incorrectly formatted"); // skip over comment/title line read_lines(1); // fake time step numbers ntimestep = nstep; // count this frame ++nstep; return 0; } /* ---------------------------------------------------------------------- skip snapshot from timestamp onward only called by proc 0 ------------------------------------------------------------------------- */ void ReaderXYZ::skip() { // invoke read_lines() in chunks no larger than MAXSMALLINT int nchunk; bigint nremain = natoms; while (nremain) { nchunk = MIN(nremain,MAXSMALLINT); read_lines(nchunk); nremain -= nchunk; } } /* ---------------------------------------------------------------------- read remaining header info: return natoms box bounds, triclinic (inferred), fieldflag (1 if any fields not found), xyz flags = from input scaleflag & wrapflag if fieldflag set: match Nfield fields to per-atom column labels allocate and set fieldindex = which column each field maps to fieldtype = X,VX,IZ etc fieldlabel = user-specified label or NULL if use fieldtype default xyz flag = scaledflag if has fieldlabel name, else set by x,xs,xu,xsu only called by proc 0 ------------------------------------------------------------------------- */ bigint ReaderXYZ::read_header(double box[3][3], int &triclinic, int fieldinfo, int nfield, int *fieldtype, char **fieldlabel, int scaleflag, int wrapflag, int &fieldflag, int &xflag, int &yflag, int &zflag) { nid = 0; // signal that we have no box info at all triclinic = -1; // if no field info requested, just return if (!fieldinfo) return natoms; memory->create(fieldindex,nfield,"read_dump:fieldindex"); // for xyz we know nothing about the style of coordinates, // so caller has to set the proper flags xflag = 2*scaleflag + wrapflag + 1; yflag = 2*scaleflag + wrapflag + 1; zflag = 2*scaleflag + wrapflag + 1; // copy fieldtype list for supported fields fieldflag = 0; for (int i = 0; i < nfield; i++) { if ( (fieldtype[i] == X) || (fieldtype[i] == Y) || (fieldtype[i] == Z) || (fieldtype[i] == ID) || (fieldtype[i] == TYPE) ) { fieldindex[i] = fieldtype[i]; } else { fieldflag = 1; } } return natoms; } /* ---------------------------------------------------------------------- read N atom lines from dump file stores appropriate values in fields array return 0 if success, 1 if error only called by proc 0 ------------------------------------------------------------------------- */ void ReaderXYZ::read_atoms(int n, int nfield, double **fields) { int i,m; char *eof; int mytype; double myx, myy, myz; for (i = 0; i < n; i++) { eof = fgets(line,MAXLINE,fp); if (eof == NULL) error->one(FLERR,"Unexpected end of dump file"); ++nid; sscanf(line,"%*s%lg%lg%lg", &myx, &myy, &myz); // XXX: we could insert an element2type translation here // XXX: for now we flag unrecognized types as type 0, // XXX: which should trigger an error, if LAMMPS uses it. mytype = atoi(line); for (m = 0; m < nfield; m++) { switch (fieldindex[m]) { case X: fields[i][m] = myx; break; case Y: fields[i][m] = myy; break; case Z: fields[i][m] = myz; break; case ID: fields[i][m] = nid; break; case TYPE: fields[i][m] = mytype; break; } } } } /* ---------------------------------------------------------------------- read N lines from dump file only last one is saved in line only called by proc 0 ------------------------------------------------------------------------- */ void ReaderXYZ::read_lines(int n) { char *eof = NULL; if (n <= 0) return; for (int i = 0; i < n; i++) eof = fgets(line,MAXLINE,fp); if (eof == NULL) error->one(FLERR,"Unexpected end of dump file"); } diff --git a/src/variable.cpp b/src/variable.cpp index b929caa50..2e88da4fd 100644 --- a/src/variable.cpp +++ b/src/variable.cpp @@ -1,4543 +1,4613 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ #include "math.h" #include "stdlib.h" #include "string.h" #include "ctype.h" #include "unistd.h" #include "variable.h" #include "universe.h" #include "atom.h" #include "update.h" #include "group.h" #include "domain.h" #include "comm.h" #include "region.h" #include "modify.h" #include "compute.h" #include "fix.h" #include "fix_store.h" #include "force.h" #include "output.h" #include "thermo.h" #include "random_mars.h" #include "math_const.h" #include "atom_masks.h" #include "python_wrapper.h" #include "memory.h" +#include "info.h" #include "error.h" using namespace LAMMPS_NS; using namespace MathConst; #define VARDELTA 4 #define MAXLEVEL 4 #define MAXLINE 256 #define CHUNK 1024 #define VALUELENGTH 64 // also in python.cpp #define MAXFUNCARG 6 #define MYROUND(a) (( a-floor(a) ) >= .5) ? ceil(a) : floor(a) enum{INDEX,LOOP,WORLD,UNIVERSE,ULOOP,STRING,GETENV, SCALARFILE,ATOMFILE,FORMAT,EQUAL,ATOM,PYTHON}; enum{ARG,OP}; // customize by adding a function // if add before OR, // also set precedence level in constructor and precedence length in *.h enum{DONE,ADD,SUBTRACT,MULTIPLY,DIVIDE,CARAT,MODULO,UNARY, NOT,EQ,NE,LT,LE,GT,GE,AND,OR, SQRT,EXP,LN,LOG,ABS,SIN,COS,TAN,ASIN,ACOS,ATAN,ATAN2, RANDOM,NORMAL,CEIL,FLOOR,ROUND,RAMP,STAGGER,LOGFREQ,LOGFREQ2, STRIDE,STRIDE2,VDISPLACE,SWIGGLE,CWIGGLE,GMASK,RMASK,GRMASK, + IS_ACTIVE,IS_DEFINED,IS_AVAILABLE, VALUE,ATOMARRAY,TYPEARRAY,INTARRAY,BIGINTARRAY}; // customize by adding a special function enum{SUM,XMIN,XMAX,AVE,TRAP,SLOPE}; #define INVOKED_SCALAR 1 #define INVOKED_VECTOR 2 #define INVOKED_ARRAY 4 #define INVOKED_PERATOM 8 #define BIG 1.0e20 /* ---------------------------------------------------------------------- */ Variable::Variable(LAMMPS *lmp) : Pointers(lmp) { MPI_Comm_rank(world,&me); nvar = maxvar = 0; names = NULL; style = NULL; num = NULL; which = NULL; pad = NULL; reader = NULL; data = NULL; eval_in_progress = NULL; randomequal = NULL; randomatom = NULL; // customize by assigning a precedence level precedence[DONE] = 0; precedence[OR] = 1; precedence[AND] = 2; precedence[EQ] = precedence[NE] = 3; precedence[LT] = precedence[LE] = precedence[GT] = precedence[GE] = 4; precedence[ADD] = precedence[SUBTRACT] = 5; precedence[MULTIPLY] = precedence[DIVIDE] = precedence[MODULO] = 6; precedence[CARAT] = 7; precedence[UNARY] = precedence[NOT] = 8; // Python wrapper, real or dummy python = new Python(lmp); } /* ---------------------------------------------------------------------- */ Variable::~Variable() { for (int i = 0; i < nvar; i++) { delete [] names[i]; delete reader[i]; if (style[i] == LOOP || style[i] == ULOOP) delete [] data[i][0]; else for (int j = 0; j < num[i]; j++) delete [] data[i][j]; delete [] data[i]; } memory->sfree(names); memory->destroy(style); memory->destroy(num); memory->destroy(which); memory->destroy(pad); memory->sfree(reader); memory->sfree(data); memory->destroy(eval_in_progress); delete randomequal; delete randomatom; delete python; } /* ---------------------------------------------------------------------- called by variable command in input script ------------------------------------------------------------------------- */ void Variable::set(int narg, char **arg) { if (narg < 2) error->all(FLERR,"Illegal variable command"); int replaceflag = 0; // DELETE // doesn't matter if variable no longer exists if (strcmp(arg[1],"delete") == 0) { if (narg != 2) error->all(FLERR,"Illegal variable command"); if (find(arg[0]) >= 0) remove(find(arg[0])); return; // INDEX // num = listed args, which = 1st value, data = copied args } else if (strcmp(arg[1],"index") == 0) { if (narg < 3) error->all(FLERR,"Illegal variable command"); if (find(arg[0]) >= 0) return; if (nvar == maxvar) grow(); style[nvar] = INDEX; num[nvar] = narg - 2; which[nvar] = 0; pad[nvar] = 0; data[nvar] = new char*[num[nvar]]; copy(num[nvar],&arg[2],data[nvar]); // LOOP // 1 arg + pad: num = N, which = 1st value, data = single string // 2 args + pad: num = N2, which = N1, data = single string } else if (strcmp(arg[1],"loop") == 0) { if (find(arg[0]) >= 0) return; if (nvar == maxvar) grow(); style[nvar] = LOOP; int nfirst,nlast; if (narg == 3 || (narg == 4 && strcmp(arg[3],"pad") == 0)) { nfirst = 1; nlast = force->inumeric(FLERR,arg[2]); if (nlast <= 0) error->all(FLERR,"Illegal variable command"); if (narg == 4 && strcmp(arg[3],"pad") == 0) { char digits[12]; sprintf(digits,"%d",nlast); pad[nvar] = strlen(digits); } else pad[nvar] = 0; } else if (narg == 4 || (narg == 5 && strcmp(arg[4],"pad") == 0)) { nfirst = force->inumeric(FLERR,arg[2]); nlast = force->inumeric(FLERR,arg[3]); if (nfirst > nlast || nlast < 0) error->all(FLERR,"Illegal variable command"); if (narg == 5 && strcmp(arg[4],"pad") == 0) { char digits[12]; sprintf(digits,"%d",nlast); pad[nvar] = strlen(digits); } else pad[nvar] = 0; } else error->all(FLERR,"Illegal variable command"); num[nvar] = nlast; which[nvar] = nfirst-1; data[nvar] = new char*[1]; data[nvar][0] = NULL; // WORLD // num = listed args, which = partition this proc is in, data = copied args // error check that num = # of worlds in universe } else if (strcmp(arg[1],"world") == 0) { if (narg < 3) error->all(FLERR,"Illegal variable command"); if (find(arg[0]) >= 0) return; if (nvar == maxvar) grow(); style[nvar] = WORLD; num[nvar] = narg - 2; if (num[nvar] != universe->nworlds) error->all(FLERR,"World variable count doesn't match # of partitions"); which[nvar] = universe->iworld; pad[nvar] = 0; data[nvar] = new char*[num[nvar]]; copy(num[nvar],&arg[2],data[nvar]); // UNIVERSE and ULOOP // for UNIVERSE: num = listed args, data = copied args // for ULOOP: num = N, data = single string // which = partition this proc is in // universe proc 0 creates lock file // error check that all other universe/uloop variables are same length } else if (strcmp(arg[1],"universe") == 0 || strcmp(arg[1],"uloop") == 0) { if (strcmp(arg[1],"universe") == 0) { if (narg < 3) error->all(FLERR,"Illegal variable command"); if (find(arg[0]) >= 0) return; if (nvar == maxvar) grow(); style[nvar] = UNIVERSE; num[nvar] = narg - 2; pad[nvar] = 0; data[nvar] = new char*[num[nvar]]; copy(num[nvar],&arg[2],data[nvar]); } else if (strcmp(arg[1],"uloop") == 0) { if (narg < 3 || narg > 4 || (narg == 4 && strcmp(arg[3],"pad") != 0)) error->all(FLERR,"Illegal variable command"); if (find(arg[0]) >= 0) return; if (nvar == maxvar) grow(); style[nvar] = ULOOP; num[nvar] = force->inumeric(FLERR,arg[2]); data[nvar] = new char*[1]; data[nvar][0] = NULL; if (narg == 4) { char digits[12]; sprintf(digits,"%d",num[nvar]); pad[nvar] = strlen(digits); } else pad[nvar] = 0; } if (num[nvar] < universe->nworlds) error->all(FLERR,"Universe/uloop variable count < # of partitions"); which[nvar] = universe->iworld; if (universe->me == 0) { FILE *fp = fopen("tmp.lammps.variable","w"); if (fp == NULL) error->one(FLERR,"Cannot open temporary file for world counter."); fprintf(fp,"%d\n",universe->nworlds); fclose(fp); fp = NULL; } for (int jvar = 0; jvar < nvar; jvar++) if (num[jvar] && (style[jvar] == UNIVERSE || style[jvar] == ULOOP) && num[nvar] != num[jvar]) error->all(FLERR, "All universe/uloop variables must have same # of values"); // STRING // replace pre-existing var if also style STRING (allows it to be reset) // num = 1, which = 1st value // data = 1 value, string to eval } else if (strcmp(arg[1],"string") == 0) { if (narg != 3) error->all(FLERR,"Illegal variable command"); int ivar = find(arg[0]); if (ivar >= 0) { if (style[ivar] != STRING) error->all(FLERR,"Cannot redefine variable as a different style"); delete [] data[ivar][0]; copy(1,&arg[2],data[ivar]); replaceflag = 1; } else { if (nvar == maxvar) grow(); style[nvar] = STRING; num[nvar] = 1; which[nvar] = 0; pad[nvar] = 0; data[nvar] = new char*[num[nvar]]; copy(1,&arg[2],data[nvar]); } // GETENV // remove pre-existing var if also style GETENV (allows it to be reset) // num = 1, which = 1st value // data = 1 value, string to eval } else if (strcmp(arg[1],"getenv") == 0) { if (narg != 3) error->all(FLERR,"Illegal variable command"); if (find(arg[0]) >= 0) { if (style[find(arg[0])] != GETENV) error->all(FLERR,"Cannot redefine variable as a different style"); remove(find(arg[0])); } if (nvar == maxvar) grow(); style[nvar] = GETENV; num[nvar] = 1; which[nvar] = 0; pad[nvar] = 0; data[nvar] = new char*[num[nvar]]; copy(1,&arg[2],data[nvar]); data[nvar][1] = new char[VALUELENGTH]; strcpy(data[nvar][1],"(undefined)"); // SCALARFILE for strings or numbers // which = 1st value // data = 1 value, string to eval } else if (strcmp(arg[1],"file") == 0) { if (narg != 3) error->all(FLERR,"Illegal variable command"); if (find(arg[0]) >= 0) return; if (nvar == maxvar) grow(); style[nvar] = SCALARFILE; num[nvar] = 1; which[nvar] = 0; pad[nvar] = 0; data[nvar] = new char*[num[nvar]]; data[nvar][0] = new char[MAXLINE]; reader[nvar] = new VarReader(lmp,arg[0],arg[2],SCALARFILE); int flag = reader[nvar]->read_scalar(data[nvar][0]); if (flag) error->all(FLERR,"File variable could not read value"); // ATOMFILE for numbers // which = 1st value // data = NULL } else if (strcmp(arg[1],"atomfile") == 0) { if (narg != 3) error->all(FLERR,"Illegal variable command"); if (find(arg[0]) >= 0) return; if (nvar == maxvar) grow(); style[nvar] = ATOMFILE; num[nvar] = 1; which[nvar] = 0; pad[nvar] = 0; data[nvar] = new char*[num[nvar]]; data[nvar][0] = NULL; reader[nvar] = new VarReader(lmp,arg[0],arg[2],ATOMFILE); int flag = reader[nvar]->read_peratom(); if (flag) error->all(FLERR,"Atomfile variable could not read values"); // FORMAT // num = 3, which = 1st value // data = 3 values // 1st is name of variable to eval, 2nd is format string, // 3rd is filled on retrieval } else if (strcmp(arg[1],"format") == 0) { if (narg != 4) error->all(FLERR,"Illegal variable command"); if (find(arg[0]) >= 0) return; if (nvar == maxvar) grow(); style[nvar] = FORMAT; num[nvar] = 3; which[nvar] = 0; pad[nvar] = 0; data[nvar] = new char*[num[nvar]]; copy(2,&arg[2],data[nvar]); data[nvar][2] = new char[VALUELENGTH]; strcpy(data[nvar][2],"(undefined)"); // EQUAL // replace pre-existing var if also style EQUAL (allows it to be reset) // num = 2, which = 1st value // data = 2 values, 1st is string to eval, 2nd is filled on retrieval } else if (strcmp(arg[1],"equal") == 0) { if (narg != 3) error->all(FLERR,"Illegal variable command"); int ivar = find(arg[0]); if (ivar >= 0) { if (style[ivar] != EQUAL) error->all(FLERR,"Cannot redefine variable as a different style"); delete [] data[ivar][0]; copy(1,&arg[2],data[ivar]); replaceflag = 1; } else { if (nvar == maxvar) grow(); style[nvar] = EQUAL; num[nvar] = 2; which[nvar] = 0; pad[nvar] = 0; data[nvar] = new char*[num[nvar]]; copy(1,&arg[2],data[nvar]); data[nvar][1] = new char[VALUELENGTH]; strcpy(data[nvar][1],"(undefined)"); } // ATOM // replace pre-existing var if also style ATOM (allows it to be reset) // num = 1, which = 1st value // data = 1 value, string to eval } else if (strcmp(arg[1],"atom") == 0) { if (narg != 3) error->all(FLERR,"Illegal variable command"); int ivar = find(arg[0]); if (ivar >= 0) { if (style[ivar] != ATOM) error->all(FLERR,"Cannot redefine variable as a different style"); delete [] data[ivar][0]; copy(1,&arg[2],data[ivar]); replaceflag = 1; } else { if (nvar == maxvar) grow(); style[nvar] = ATOM; num[nvar] = 1; which[nvar] = 0; pad[nvar] = 0; data[nvar] = new char*[num[nvar]]; copy(1,&arg[2],data[nvar]); } // PYTHON // replace pre-existing var if also style PYTHON (allows it to be reset) // num = 2, which = 1st value // data = 2 values, 1st is Python func to invoke, 2nd is filled by invoke } else if (strcmp(arg[1],"python") == 0) { if (narg != 3) error->all(FLERR,"Illegal variable command"); if (!python->python_exists) error->all(FLERR,"LAMMPS is not built with Python embedded"); int ivar = find(arg[0]); if (ivar >= 0) { if (style[ivar] != PYTHON) error->all(FLERR,"Cannot redefine variable as a different style"); delete [] data[ivar][0]; copy(1,&arg[2],data[ivar]); replaceflag = 1; } else { if (nvar == maxvar) grow(); style[nvar] = PYTHON; num[nvar] = 2; which[nvar] = 1; pad[nvar] = 0; data[nvar] = new char*[num[nvar]]; copy(1,&arg[2],data[nvar]); data[nvar][1] = new char[VALUELENGTH]; strcpy(data[nvar][1],"(undefined)"); } } else error->all(FLERR,"Illegal variable command"); // set name of variable, if not replacing (EQUAL/ATOM/STRING/PYTHON) // name must be all alphanumeric chars or underscores if (replaceflag) return; int n = strlen(arg[0]) + 1; names[nvar] = new char[n]; strcpy(names[nvar],arg[0]); for (int i = 0; i < n-1; i++) if (!isalnum(names[nvar][i]) && names[nvar][i] != '_') error->all(FLERR,"Variable name must be alphanumeric or " "underscore characters"); nvar++; } /* ---------------------------------------------------------------------- INDEX variable created by command-line argument make it INDEX rather than STRING so cannot be re-defined in input script ------------------------------------------------------------------------- */ void Variable::set(char *name, int narg, char **arg) { char **newarg = new char*[2+narg]; newarg[0] = name; newarg[1] = (char *) "index"; for (int i = 0; i < narg; i++) newarg[2+i] = arg[i]; set(2+narg,newarg); delete [] newarg; } /* ---------------------------------------------------------------------- set existing STRING variable to str return 0 if successful return -1 if variable doesn't exist or isn't a STRING variable called via library interface, so external programs can set variables ------------------------------------------------------------------------- */ int Variable::set_string(char *name, char *str) { int ivar = find(name); if (ivar < 0) return -1; if (style[ivar] != STRING) return -1; delete [] data[ivar][0]; copy(1,&str,data[ivar]); return 0; } /* ---------------------------------------------------------------------- increment variable(s) return 0 if OK if successfully incremented return 1 if any variable is exhausted, free the variable to allow re-use ------------------------------------------------------------------------- */ int Variable::next(int narg, char **arg) { int ivar; if (narg == 0) error->all(FLERR,"Illegal next command"); // check that variables exist and are all the same style // exception: UNIVERSE and ULOOP variables can be mixed in same next command for (int iarg = 0; iarg < narg; iarg++) { ivar = find(arg[iarg]); if (ivar < 0) error->all(FLERR,"Invalid variable in next command"); if (style[ivar] == ULOOP && style[find(arg[0])] == UNIVERSE) continue; else if (style[ivar] == UNIVERSE && style[find(arg[0])] == ULOOP) continue; else if (style[ivar] != style[find(arg[0])]) error->all(FLERR,"All variables in next command must be same style"); } // invalid styles: STRING, EQUAL, WORLD, ATOM, GETENV, FORMAT, PYTHON int istyle = style[find(arg[0])]; if (istyle == STRING || istyle == EQUAL || istyle == WORLD || istyle == GETENV || istyle == ATOM || istyle == FORMAT || istyle == PYTHON) error->all(FLERR,"Invalid variable style with next command"); // if istyle = UNIVERSE or ULOOP, insure all such variables are incremented if (istyle == UNIVERSE || istyle == ULOOP) for (int i = 0; i < nvar; i++) { if (style[i] != UNIVERSE && style[i] != ULOOP) continue; int iarg = 0; for (iarg = 0; iarg < narg; iarg++) if (strcmp(arg[iarg],names[i]) == 0) break; if (iarg == narg) error->universe_one(FLERR,"Next command must list all " "universe and uloop variables"); } // increment all variables in list // if any variable is exhausted, set flag = 1 and remove var to allow re-use int flag = 0; if (istyle == INDEX || istyle == LOOP) { for (int iarg = 0; iarg < narg; iarg++) { ivar = find(arg[iarg]); which[ivar]++; if (which[ivar] >= num[ivar]) { flag = 1; remove(ivar); } } } else if (istyle == SCALARFILE) { for (int iarg = 0; iarg < narg; iarg++) { ivar = find(arg[iarg]); int done = reader[ivar]->read_scalar(data[ivar][0]); if (done) { flag = 1; remove(ivar); } } } else if (istyle == ATOMFILE) { for (int iarg = 0; iarg < narg; iarg++) { ivar = find(arg[iarg]); int done = reader[ivar]->read_peratom(); if (done) { flag = 1; remove(ivar); } } } else if (istyle == UNIVERSE || istyle == ULOOP) { // wait until lock file can be created and owned by proc 0 of this world // rename() is not atomic in practice, but no known simple fix // means multiple procs can read/write file at the same time (bad!) // random delays help // delay for random fraction of 1 second before first rename() call // delay for random fraction of 1 second before subsequent tries // when successful, read next available index and Bcast it within my world int nextindex; if (me == 0) { int seed = 12345 + universe->me + which[find(arg[0])]; RanMars *random = new RanMars(lmp,seed); int delay = (int) (1000000*random->uniform()); usleep(delay); while (1) { if (!rename("tmp.lammps.variable","tmp.lammps.variable.lock")) break; delay = (int) (1000000*random->uniform()); usleep(delay); } delete random; FILE *fp = fopen("tmp.lammps.variable.lock","r"); fscanf(fp,"%d",&nextindex); //printf("READ %d %d\n",universe->me,nextindex); fclose(fp); fp = fopen("tmp.lammps.variable.lock","w"); fprintf(fp,"%d\n",nextindex+1); //printf("WRITE %d %d\n",universe->me,nextindex+1); fclose(fp); fp = NULL; rename("tmp.lammps.variable.lock","tmp.lammps.variable"); if (universe->uscreen) fprintf(universe->uscreen, "Increment via next: value %d on partition %d\n", nextindex+1,universe->iworld); if (universe->ulogfile) fprintf(universe->ulogfile, "Increment via next: value %d on partition %d\n", nextindex+1,universe->iworld); } MPI_Bcast(&nextindex,1,MPI_INT,0,world); // set all variables in list to nextindex // must increment all UNIVERSE and ULOOP variables here // error check above tested for this for (int iarg = 0; iarg < narg; iarg++) { ivar = find(arg[iarg]); which[ivar] = nextindex; if (which[ivar] >= num[ivar]) { flag = 1; remove(ivar); } } } return flag; } /* ---------------------------------------------------------------------- search for name in list of variables names return index or -1 if not found ------------------------------------------------------------------------- */ int Variable::find(char *name) { for (int i = 0; i < nvar; i++) if (strcmp(name,names[i]) == 0) return i; return -1; } /* ---------------------------------------------------------------------- initialize one atom's storage values in all VarReaders via fix STORE called when atom is created ------------------------------------------------------------------------- */ void Variable::set_arrays(int i) { for (int i = 0; i < nvar; i++) if (reader[i] && style[i] == ATOMFILE) reader[i]->fixstore->vstore[i] = 0.0; } /* ---------------------------------------------------------------------- called by python command in input script simply pass input script line args to Python class ------------------------------------------------------------------------- */ void Variable::python_command(int narg, char **arg) { if (!python->python_exists) error->all(FLERR,"LAMMPS is not built with Python embedded"); python->command(narg,arg); } /* ---------------------------------------------------------------------- return 1 if variable is EQUAL or PYTHON numeric style, 0 if not this is checked before call to compute_equal() to return a double ------------------------------------------------------------------------- */ int Variable::equalstyle(int ivar) { if (style[ivar] == EQUAL) return 1; if (style[ivar] == PYTHON) { int ifunc = python->variable_match(data[ivar][0],names[ivar],1); if (ifunc < 0) return 0; else return 1; } return 0; } /* ---------------------------------------------------------------------- return 1 if variable is ATOM or ATOMFILE style, 0 if not this is checked before call to compute_atom() to return a vector of doubles ------------------------------------------------------------------------- */ int Variable::atomstyle(int ivar) { if (style[ivar] == ATOM || style[ivar] == ATOMFILE) return 1; return 0; } /* ---------------------------------------------------------------------- check if variable with name is PYTHON and matches funcname called by Python class before it invokes a Python function return data storage so Python function can return a value for this variable return NULL if not a match ------------------------------------------------------------------------- */ char *Variable::pythonstyle(char *name, char *funcname) { int ivar = find(name); if (ivar < 0) return NULL; if (style[ivar] != PYTHON) return NULL; if (strcmp(data[ivar][0],funcname) != 0) return NULL; return data[ivar][1]; } /* ---------------------------------------------------------------------- return ptr to the data text associated with a variable if INDEX or WORLD or UNIVERSE or STRING or SCALARFILE, return ptr to stored string if LOOP or ULOOP, write int to data[0] and return ptr to string if EQUAL, evaluate variable and put result in str if FORMAT, evaluate its variable and put formatted result in str if GETENV, query environment and put result in str if PYTHON, evaluate Python function, it will put result in str if ATOM or ATOMFILE, return NULL return NULL if no variable with name, or which value is bad, caller must respond ------------------------------------------------------------------------- */ char *Variable::retrieve(char *name) { int ivar = find(name); if (ivar < 0) return NULL; if (which[ivar] >= num[ivar]) return NULL; if (eval_in_progress[ivar]) error->all(FLERR,"Variable has circular dependency"); eval_in_progress[ivar] = 1; char *str = NULL; if (style[ivar] == INDEX || style[ivar] == WORLD || style[ivar] == UNIVERSE || style[ivar] == STRING || style[ivar] == SCALARFILE) { str = data[ivar][which[ivar]]; } else if (style[ivar] == LOOP || style[ivar] == ULOOP) { char result[16]; if (pad[ivar] == 0) sprintf(result,"%d",which[ivar]+1); else { char padstr[16]; sprintf(padstr,"%%0%dd",pad[ivar]); sprintf(result,padstr,which[ivar]+1); } int n = strlen(result) + 1; delete [] data[ivar][0]; data[ivar][0] = new char[n]; strcpy(data[ivar][0],result); str = data[ivar][0]; } else if (style[ivar] == EQUAL) { double answer = evaluate(data[ivar][0],NULL); sprintf(data[ivar][1],"%.15g",answer); str = data[ivar][1]; } else if (style[ivar] == FORMAT) { int jvar = find(data[ivar][0]); if (jvar == -1) return NULL; if (!equalstyle(jvar)) return NULL; double answer = compute_equal(jvar); sprintf(data[ivar][2],data[ivar][1],answer); str = data[ivar][2]; } else if (style[ivar] == GETENV) { const char *result = getenv(data[ivar][0]); if (result == NULL) result = (const char *) ""; int n = strlen(result) + 1; if (n > VALUELENGTH) { delete [] data[ivar][1]; data[ivar][1] = new char[n]; } strcpy(data[ivar][1],result); str = data[ivar][1]; } else if (style[ivar] == PYTHON) { int ifunc = python->variable_match(data[ivar][0],names[ivar],0); if (ifunc < 0) error->all(FLERR,"Python variable does not match Python function"); python->invoke_function(ifunc,data[ivar][1]); str = data[ivar][1]; } else if (style[ivar] == ATOM || style[ivar] == ATOMFILE) return NULL; eval_in_progress[ivar] = 0; return str; } /* ---------------------------------------------------------------------- return result of equal-style variable evaluation can be EQUAL style or PYTHON numeric style for PYTHON, don't need to check python->variable_match() error return, since caller will have already checked via equalstyle() ------------------------------------------------------------------------- */ double Variable::compute_equal(int ivar) { if (eval_in_progress[ivar]) error->all(FLERR,"Variable has circular dependency"); eval_in_progress[ivar] = 1; double value; if (style[ivar] == EQUAL) value = evaluate(data[ivar][0],NULL); else if (style[ivar] == PYTHON) { int ifunc = python->find(data[ivar][0]); if (ifunc < 0) error->all(FLERR,"Python variable has no function"); python->invoke_function(ifunc,data[ivar][1]); value = atof(data[ivar][1]); } eval_in_progress[ivar] = 0; return value; } /* ---------------------------------------------------------------------- return result of immediate equal-style variable evaluation called from Input::substitute() don't need to flag eval_in_progress since is an immediate variable ------------------------------------------------------------------------- */ double Variable::compute_equal(char *str) { return evaluate(str,NULL); } /* ---------------------------------------------------------------------- compute result of atom-style and atomfile-style variable evaluation only computed for atoms in igroup, else result is 0.0 answers are placed every stride locations into result if sumflag, add variable values to existing result ------------------------------------------------------------------------- */ void Variable::compute_atom(int ivar, int igroup, double *result, int stride, int sumflag) { Tree *tree; double *vstore; if (eval_in_progress[ivar]) error->all(FLERR,"Variable has circular dependency"); eval_in_progress[ivar] = 1; if (style[ivar] == ATOM) { evaluate(data[ivar][0],&tree); collapse_tree(tree); } else vstore = reader[ivar]->fixstore->vstore; if (result == NULL) { eval_in_progress[ivar] = 0; return; } int groupbit = group->bitmask[igroup]; int *mask = atom->mask; int nlocal = atom->nlocal; if (style[ivar] == ATOM) { if (sumflag == 0) { int m = 0; for (int i = 0; i < nlocal; i++) { if (mask[i] & groupbit) result[m] = eval_tree(tree,i); else result[m] = 0.0; m += stride; } } else { int m = 0; for (int i = 0; i < nlocal; i++) { if (mask[i] & groupbit) result[m] += eval_tree(tree,i); m += stride; } } } else { if (sumflag == 0) { int m = 0; for (int i = 0; i < nlocal; i++) { if (mask[i] & groupbit) result[m] = vstore[i]; else result[m] = 0.0; m += stride; } } else { int m = 0; for (int i = 0; i < nlocal; i++) { if (mask[i] & groupbit) result[m] += vstore[i]; m += stride; } } } if (style[ivar] == ATOM) free_tree(tree); eval_in_progress[ivar] = 0; } /* ---------------------------------------------------------------------- save copy of EQUAL style ivar formula in copy allocate copy here, later equal_restore() call will free it insure data[ivar][0] is of VALUELENGTH since will be overridden next 3 functions are used by create_atoms to temporarily override variables ------------------------------------------------------------------------- */ void Variable::equal_save(int ivar, char *©) { int n = strlen(data[ivar][0]) + 1; copy = new char[n]; strcpy(copy,data[ivar][0]); delete [] data[ivar][0]; data[ivar][0] = new char[VALUELENGTH]; } /* ---------------------------------------------------------------------- restore formula string of EQUAL style ivar from copy then free copy, allocated in equal_save() ------------------------------------------------------------------------- */ void Variable::equal_restore(int ivar, char *copy) { delete [] data[ivar][0]; int n = strlen(copy) + 1; data[ivar][0] = new char[n]; strcpy(data[ivar][0],copy); delete [] copy; } /* ---------------------------------------------------------------------- override EQUAL style ivar formula with value converted to string data[ivar][0] was set to length 64 in equal_save() ------------------------------------------------------------------------- */ void Variable::equal_override(int ivar, double value) { sprintf(data[ivar][0],"%.15g",value); } /* ---------------------------------------------------------------------- remove Nth variable from list and compact list delete reader explicitly if it exists ------------------------------------------------------------------------- */ void Variable::remove(int n) { delete [] names[n]; if (style[n] == LOOP || style[n] == ULOOP) delete [] data[n][0]; else for (int i = 0; i < num[n]; i++) delete [] data[n][i]; delete [] data[n]; delete reader[n]; for (int i = n+1; i < nvar; i++) { names[i-1] = names[i]; style[i-1] = style[i]; num[i-1] = num[i]; which[i-1] = which[i]; pad[i-1] = pad[i]; reader[i-1] = reader[i]; data[i-1] = data[i]; } nvar--; } /* ---------------------------------------------------------------------- make space in arrays for new variable ------------------------------------------------------------------------- */ void Variable::grow() { int old = maxvar; maxvar += VARDELTA; names = (char **) memory->srealloc(names,maxvar*sizeof(char *),"var:names"); memory->grow(style,maxvar,"var:style"); memory->grow(num,maxvar,"var:num"); memory->grow(which,maxvar,"var:which"); memory->grow(pad,maxvar,"var:pad"); reader = (VarReader **) memory->srealloc(reader,maxvar*sizeof(VarReader *),"var:reader"); for (int i = old; i < maxvar; i++) reader[i] = NULL; data = (char ***) memory->srealloc(data,maxvar*sizeof(char **),"var:data"); memory->grow(eval_in_progress,maxvar,"var:eval_in_progress"); for (int i = 0; i < maxvar; i++) eval_in_progress[i] = 0; } /* ---------------------------------------------------------------------- copy narg strings from **from to **to, and allocate space for them ------------------------------------------------------------------------- */ void Variable::copy(int narg, char **from, char **to) { int n; for (int i = 0; i < narg; i++) { n = strlen(from[i]) + 1; to[i] = new char[n]; strcpy(to[i],from[i]); } } /* ---------------------------------------------------------------------- recursive evaluation of a string str str is an equal-style or atom-style formula containing one or more items: number = 0.0, -5.45, 2.8e-4, ... - constant = PI + constant = PI, version, yes, no, on, off thermo keyword = ke, vol, atoms, ... math operation = (),-x,x+y,x-y,x*y,x/y,x^y, x==y,x!=y,xy,x>=y,x&&y,x||y, sqrt(x),exp(x),ln(x),log(x),abs(x), sin(x),cos(x),tan(x),asin(x),atan2(y,x),... group function = count(group), mass(group), xcm(group,x), ... special function = sum(x),min(x), ... atom value = x[i], y[i], vx[i], ... atom vector = x, y, vx, ... compute = c_ID, c_ID[i], c_ID[i][j] fix = f_ID, f_ID[i], f_ID[i][j] variable = v_name, v_name[i] equal-style variables passes in tree = NULL: evaluate the formula, return result as a double atom-style variable passes in tree = non-NULL: parse the formula but do not evaluate it create a parse tree and return it ------------------------------------------------------------------------- */ double Variable::evaluate(char *str, Tree **tree) { int op,opprevious; double value1,value2; char onechar; char *ptr; double argstack[MAXLEVEL]; Tree *treestack[MAXLEVEL]; int opstack[MAXLEVEL]; int nargstack = 0; int ntreestack = 0; int nopstack = 0; int i = 0; int expect = ARG; while (1) { onechar = str[i]; // whitespace: just skip if (isspace(onechar)) i++; // ---------------- // parentheses: recursively evaluate contents of parens // ---------------- else if (onechar == '(') { if (expect == OP) error->all(FLERR,"Invalid syntax in variable formula"); expect = OP; char *contents; i = find_matching_paren(str,i,contents); i++; // evaluate contents and push on stack if (tree) { Tree *newtree; evaluate(contents,&newtree); treestack[ntreestack++] = newtree; } else argstack[nargstack++] = evaluate(contents,NULL); delete [] contents; // ---------------- // number: push value onto stack // ---------------- } else if (isdigit(onechar) || onechar == '.') { if (expect == OP) error->all(FLERR,"Invalid syntax in variable formula"); expect = OP; // istop = end of number, including scientific notation int istart = i; while (isdigit(str[i]) || str[i] == '.') i++; if (str[i] == 'e' || str[i] == 'E') { i++; if (str[i] == '+' || str[i] == '-') i++; while (isdigit(str[i])) i++; } int istop = i - 1; int n = istop - istart + 1; char *number = new char[n+1]; strncpy(number,&str[istart],n); number[n] = '\0'; if (tree) { Tree *newtree = new Tree(); newtree->type = VALUE; newtree->value = atof(number); newtree->first = newtree->second = NULL; newtree->extra = 0; treestack[ntreestack++] = newtree; } else argstack[nargstack++] = atof(number); delete [] number; // ---------------- // letter: c_ID, c_ID[], c_ID[][], f_ID, f_ID[], f_ID[][], // v_name, v_name[], exp(), xcm(,), x, x[], PI, vol // ---------------- } else if (isalpha(onechar)) { if (expect == OP) error->all(FLERR,"Invalid syntax in variable formula"); expect = OP; // istop = end of word // word = all alphanumeric or underscore int istart = i; while (isalnum(str[i]) || str[i] == '_') i++; int istop = i-1; int n = istop - istart + 1; char *word = new char[n+1]; strncpy(word,&str[istart],n); word[n] = '\0'; // ---------------- // compute // ---------------- if (strncmp(word,"c_",2) == 0) { if (domain->box_exist == 0) error->all(FLERR, "Variable evaluation before simulation box is defined"); int icompute = modify->find_compute(word+2); if (icompute < 0) error->all(FLERR,"Invalid compute ID in variable formula"); Compute *compute = modify->compute[icompute]; // parse zero or one or two trailing brackets // point i beyond last bracket // nbracket = # of bracket pairs // index1,index2 = int inside each bracket pair, possibly an atom ID int nbracket; tagint index1,index2; if (str[i] != '[') nbracket = 0; else { nbracket = 1; ptr = &str[i]; index1 = int_between_brackets(ptr,1); i = ptr-str+1; if (str[i] == '[') { nbracket = 2; ptr = &str[i]; index2 = int_between_brackets(ptr,1); i = ptr-str+1; } } // c_ID = scalar from global scalar if (nbracket == 0 && compute->scalar_flag) { if (update->whichflag == 0) { if (compute->invoked_scalar != update->ntimestep) error->all(FLERR,"Compute used in variable between runs " "is not current"); } else if (!(compute->invoked_flag & INVOKED_SCALAR)) { compute->compute_scalar(); compute->invoked_flag |= INVOKED_SCALAR; } value1 = compute->scalar; if (tree) { Tree *newtree = new Tree(); newtree->type = VALUE; newtree->value = value1; newtree->first = newtree->second = NULL; newtree->nextra = 0; treestack[ntreestack++] = newtree; } else argstack[nargstack++] = value1; // c_ID[i] = scalar from global vector } else if (nbracket == 1 && compute->vector_flag) { if (index1 > compute->size_vector && compute->size_vector_variable == 0) error->all(FLERR,"Variable formula compute vector " "is accessed out-of-range"); if (update->whichflag == 0) { if (compute->invoked_vector != update->ntimestep) error->all(FLERR,"Compute used in variable between runs " "is not current"); } else if (!(compute->invoked_flag & INVOKED_VECTOR)) { compute->compute_vector(); compute->invoked_flag |= INVOKED_VECTOR; } if (compute->size_vector_variable && index1 > compute->size_vector) value1 = 0.0; else value1 = compute->vector[index1-1]; if (tree) { Tree *newtree = new Tree(); newtree->type = VALUE; newtree->value = value1; newtree->first = newtree->second = NULL; newtree->nextra = 0; treestack[ntreestack++] = newtree; } else argstack[nargstack++] = value1; // c_ID[i][j] = scalar from global array } else if (nbracket == 2 && compute->array_flag) { if (index1 > compute->size_array_rows && compute->size_array_rows_variable == 0) error->all(FLERR,"Variable formula compute array " "is accessed out-of-range"); if (index2 > compute->size_array_cols) error->all(FLERR,"Variable formula compute array " "is accessed out-of-range"); if (update->whichflag == 0) { if (compute->invoked_array != update->ntimestep) error->all(FLERR,"Compute used in variable between runs " "is not current"); } else if (!(compute->invoked_flag & INVOKED_ARRAY)) { compute->compute_array(); compute->invoked_flag |= INVOKED_ARRAY; } if (compute->size_array_rows_variable && index1 > compute->size_array_rows) value1 = 0.0; else value1 = compute->array[index1-1][index2-1]; if (tree) { Tree *newtree = new Tree(); newtree->type = VALUE; newtree->value = value1; newtree->first = newtree->second = NULL; newtree->nextra = 0; treestack[ntreestack++] = newtree; } else argstack[nargstack++] = value1; // c_ID[i] = scalar from per-atom vector } else if (nbracket == 1 && compute->peratom_flag && compute->size_peratom_cols == 0) { if (update->whichflag == 0) { if (compute->invoked_peratom != update->ntimestep) error->all(FLERR,"Compute used in variable between runs " "is not current"); } else if (!(compute->invoked_flag & INVOKED_PERATOM)) { compute->compute_peratom(); compute->invoked_flag |= INVOKED_PERATOM; } peratom2global(1,NULL,compute->vector_atom,1,index1, tree,treestack,ntreestack,argstack,nargstack); // c_ID[i][j] = scalar from per-atom array } else if (nbracket == 2 && compute->peratom_flag && compute->size_peratom_cols > 0) { if (index2 > compute->size_peratom_cols) error->all(FLERR,"Variable formula compute array " "is accessed out-of-range"); if (update->whichflag == 0) { if (compute->invoked_peratom != update->ntimestep) error->all(FLERR,"Compute used in variable between runs " "is not current"); } else if (!(compute->invoked_flag & INVOKED_PERATOM)) { compute->compute_peratom(); compute->invoked_flag |= INVOKED_PERATOM; } if (compute->array_atom) peratom2global(1,NULL,&compute->array_atom[0][index2-1], compute->size_peratom_cols,index1, tree,treestack,ntreestack,argstack,nargstack); else peratom2global(1,NULL,NULL, compute->size_peratom_cols,index1, tree,treestack,ntreestack,argstack,nargstack); // c_ID = vector from per-atom vector } else if (nbracket == 0 && compute->peratom_flag && compute->size_peratom_cols == 0) { if (tree == NULL) error->all(FLERR, "Per-atom compute in equal-style variable formula"); if (update->whichflag == 0) { if (compute->invoked_peratom != update->ntimestep) error->all(FLERR,"Compute used in variable between runs " "is not current"); } else if (!(compute->invoked_flag & INVOKED_PERATOM)) { compute->compute_peratom(); compute->invoked_flag |= INVOKED_PERATOM; } Tree *newtree = new Tree(); newtree->type = ATOMARRAY; newtree->array = compute->vector_atom; newtree->nstride = 1; newtree->selfalloc = 0; newtree->first = newtree->second = NULL; newtree->nextra = 0; treestack[ntreestack++] = newtree; // c_ID[i] = vector from per-atom array } else if (nbracket == 1 && compute->peratom_flag && compute->size_peratom_cols > 0) { if (tree == NULL) error->all(FLERR, "Per-atom compute in equal-style variable formula"); if (index1 > compute->size_peratom_cols) error->all(FLERR,"Variable formula compute array " "is accessed out-of-range"); if (update->whichflag == 0) { if (compute->invoked_peratom != update->ntimestep) error->all(FLERR,"Compute used in variable between runs " "is not current"); } else if (!(compute->invoked_flag & INVOKED_PERATOM)) { compute->compute_peratom(); compute->invoked_flag |= INVOKED_PERATOM; } Tree *newtree = new Tree(); newtree->type = ATOMARRAY; if (compute->array_atom) newtree->array = &compute->array_atom[0][index1-1]; else newtree->array = NULL; newtree->nstride = compute->size_peratom_cols; newtree->selfalloc = 0; newtree->first = newtree->second = NULL; newtree->nextra = 0; treestack[ntreestack++] = newtree; } else error->all(FLERR,"Mismatched compute in variable formula"); // ---------------- // fix // ---------------- } else if (strncmp(word,"f_",2) == 0) { if (domain->box_exist == 0) error->all(FLERR, "Variable evaluation before simulation box is defined"); int ifix = modify->find_fix(word+2); if (ifix < 0) error->all(FLERR,"Invalid fix ID in variable formula"); Fix *fix = modify->fix[ifix]; // parse zero or one or two trailing brackets // point i beyond last bracket // nbracket = # of bracket pairs // index1,index2 = int inside each bracket pair, possibly an atom ID int nbracket; tagint index1,index2; if (str[i] != '[') nbracket = 0; else { nbracket = 1; ptr = &str[i]; index1 = int_between_brackets(ptr,1); i = ptr-str+1; if (str[i] == '[') { nbracket = 2; ptr = &str[i]; index2 = int_between_brackets(ptr,1); i = ptr-str+1; } } // f_ID = scalar from global scalar if (nbracket == 0 && fix->scalar_flag) { if (update->whichflag > 0 && update->ntimestep % fix->global_freq) error->all(FLERR,"Fix in variable not computed at compatible time"); value1 = fix->compute_scalar(); if (tree) { Tree *newtree = new Tree(); newtree->type = VALUE; newtree->value = value1; newtree->first = newtree->second = NULL; newtree->nextra = 0; treestack[ntreestack++] = newtree; } else argstack[nargstack++] = value1; // f_ID[i] = scalar from global vector } else if (nbracket == 1 && fix->vector_flag) { if (index1 > fix->size_vector && fix->size_vector_variable == 0) error->all(FLERR,"Variable formula fix vector is " "accessed out-of-range"); if (update->whichflag > 0 && update->ntimestep % fix->global_freq) error->all(FLERR,"Fix in variable not computed at compatible time"); value1 = fix->compute_vector(index1-1); if (tree) { Tree *newtree = new Tree(); newtree->type = VALUE; newtree->value = value1; newtree->first = newtree->second = NULL; newtree->nextra = 0; treestack[ntreestack++] = newtree; } else argstack[nargstack++] = value1; // f_ID[i][j] = scalar from global array } else if (nbracket == 2 && fix->array_flag) { if (index1 > fix->size_array_rows && fix->size_array_rows_variable == 0) error->all(FLERR, "Variable formula fix array is accessed out-of-range"); if (index2 > fix->size_array_cols) error->all(FLERR, "Variable formula fix array is accessed out-of-range"); if (update->whichflag > 0 && update->ntimestep % fix->global_freq) error->all(FLERR,"Fix in variable not computed at compatible time"); value1 = fix->compute_array(index1-1,index2-1); if (tree) { Tree *newtree = new Tree(); newtree->type = VALUE; newtree->value = value1; newtree->first = newtree->second = NULL; newtree->nextra = 0; treestack[ntreestack++] = newtree; } else argstack[nargstack++] = value1; // f_ID[i] = scalar from per-atom vector } else if (nbracket == 1 && fix->peratom_flag && fix->size_peratom_cols == 0) { if (update->whichflag > 0 && update->ntimestep % fix->peratom_freq) error->all(FLERR, "Fix in variable not computed at compatible time"); peratom2global(1,NULL,fix->vector_atom,1,index1, tree,treestack,ntreestack,argstack,nargstack); // f_ID[i][j] = scalar from per-atom array } else if (nbracket == 2 && fix->peratom_flag && fix->size_peratom_cols > 0) { if (index2 > fix->size_peratom_cols) error->all(FLERR, "Variable formula fix array is accessed out-of-range"); if (update->whichflag > 0 && update->ntimestep % fix->peratom_freq) error->all(FLERR,"Fix in variable not computed at compatible time"); if (fix->array_atom) peratom2global(1,NULL,&fix->array_atom[0][index2-1], fix->size_peratom_cols,index1, tree,treestack,ntreestack,argstack,nargstack); else peratom2global(1,NULL,NULL, fix->size_peratom_cols,index1, tree,treestack,ntreestack,argstack,nargstack); // f_ID = vector from per-atom vector } else if (nbracket == 0 && fix->peratom_flag && fix->size_peratom_cols == 0) { if (tree == NULL) error->all(FLERR,"Per-atom fix in equal-style variable formula"); if (update->whichflag > 0 && update->ntimestep % fix->peratom_freq) error->all(FLERR,"Fix in variable not computed at compatible time"); Tree *newtree = new Tree(); newtree->type = ATOMARRAY; newtree->array = fix->vector_atom; newtree->nstride = 1; newtree->selfalloc = 0; newtree->first = newtree->second = NULL; newtree->nextra = 0; treestack[ntreestack++] = newtree; // f_ID[i] = vector from per-atom array } else if (nbracket == 1 && fix->peratom_flag && fix->size_peratom_cols > 0) { if (tree == NULL) error->all(FLERR,"Per-atom fix in equal-style variable formula"); if (index1 > fix->size_peratom_cols) error->all(FLERR, "Variable formula fix array is accessed out-of-range"); if (update->whichflag > 0 && update->ntimestep % fix->peratom_freq) error->all(FLERR,"Fix in variable not computed at compatible time"); Tree *newtree = new Tree(); newtree->type = ATOMARRAY; if (fix->array_atom) newtree->array = &fix->array_atom[0][index1-1]; else newtree->array = NULL; newtree->nstride = fix->size_peratom_cols; newtree->selfalloc = 0; newtree->first = newtree->second = NULL; newtree->nextra = 0; treestack[ntreestack++] = newtree; } else error->all(FLERR,"Mismatched fix in variable formula"); // ---------------- // variable // ---------------- } else if (strncmp(word,"v_",2) == 0) { int ivar = find(word+2); if (ivar < 0) error->all(FLERR,"Invalid variable name in variable formula"); if (eval_in_progress[ivar]) error->all(FLERR,"Variable has circular dependency"); // parse zero or one trailing brackets // point i beyond last bracket // nbracket = # of bracket pairs // index = int inside bracket, possibly an atom ID int nbracket; tagint index; if (str[i] != '[') nbracket = 0; else { nbracket = 1; ptr = &str[i]; index = int_between_brackets(ptr,1); i = ptr-str+1; } // v_name = scalar from non atom/atomfile variable if (nbracket == 0 && style[ivar] != ATOM && style[ivar] != ATOMFILE) { char *var = retrieve(word+2); if (var == NULL) error->all(FLERR,"Invalid variable evaluation in variable formula"); if (tree) { Tree *newtree = new Tree(); newtree->type = VALUE; newtree->value = atof(var); newtree->first = newtree->second = NULL; newtree->nextra = 0; treestack[ntreestack++] = newtree; } else argstack[nargstack++] = atof(var); // v_name = per-atom vector from atom-style variable // evaluate the atom-style variable as newtree } else if (nbracket == 0 && style[ivar] == ATOM) { if (tree == NULL) error->all(FLERR, "Atom-style variable in equal-style variable formula"); Tree *newtree; evaluate(data[ivar][0],&newtree); treestack[ntreestack++] = newtree; // v_name = per-atom vector from atomfile-style variable } else if (nbracket == 0 && style[ivar] == ATOMFILE) { if (tree == NULL) error->all(FLERR,"Atomfile-style variable in " "equal-style variable formula"); Tree *newtree = new Tree(); newtree->type = ATOMARRAY; newtree->array = reader[ivar]->fixstore->vstore; newtree->nstride = 1; newtree->selfalloc = 0; newtree->first = newtree->second = NULL; newtree->nextra = 0; treestack[ntreestack++] = newtree; // v_name[N] = scalar from atom-style variable // compute the per-atom variable in result // use peratom2global to extract single value from result } else if (nbracket && style[ivar] == ATOM) { double *result; memory->create(result,atom->nlocal,"variable:result"); compute_atom(ivar,0,result,1,0); peratom2global(1,NULL,result,1,index, tree,treestack,ntreestack,argstack,nargstack); memory->destroy(result); // v_name[N] = scalar from atomfile-style variable } else if (nbracket && style[ivar] == ATOMFILE) { peratom2global(1,NULL,reader[ivar]->fixstore->vstore,1,index, tree,treestack,ntreestack,argstack,nargstack); } else error->all(FLERR,"Mismatched variable in variable formula"); // ---------------- // math/group/special function or atom value/vector or // constant or thermo keyword // ---------------- } else { // ---------------- // math or group or special function // ---------------- if (str[i] == '(') { char *contents; i = find_matching_paren(str,i,contents); i++; if (math_function(word,contents,tree, treestack,ntreestack,argstack,nargstack)); else if (group_function(word,contents,tree, treestack,ntreestack,argstack,nargstack)); else if (special_function(word,contents,tree, treestack,ntreestack,argstack,nargstack)); else error->all(FLERR,"Invalid math/group/special function " "in variable formula"); delete [] contents; // ---------------- // atom value // ---------------- } else if (str[i] == '[') { if (domain->box_exist == 0) error->all(FLERR, "Variable evaluation before simulation box is defined"); ptr = &str[i]; tagint id = int_between_brackets(ptr,1); i = ptr-str+1; peratom2global(0,word,NULL,0,id, tree,treestack,ntreestack,argstack,nargstack); // ---------------- // atom vector // ---------------- } else if (is_atom_vector(word)) { if (domain->box_exist == 0) error->all(FLERR, "Variable evaluation before simulation box is defined"); atom_vector(word,tree,treestack,ntreestack); // ---------------- // constant // ---------------- } else if (is_constant(word)) { value1 = constant(word); if (tree) { Tree *newtree = new Tree(); newtree->type = VALUE; newtree->value = value1; newtree->first = newtree->second = NULL; newtree->nextra = 0; treestack[ntreestack++] = newtree; } else argstack[nargstack++] = value1; // ---------------- // thermo keyword // ---------------- } else { if (domain->box_exist == 0) error->all(FLERR, "Variable evaluation before simulation box is defined"); int flag = output->thermo->evaluate_keyword(word,&value1); if (flag) error->all(FLERR,"Invalid thermo keyword in variable formula"); if (tree) { Tree *newtree = new Tree(); newtree->type = VALUE; newtree->value = value1; newtree->first = newtree->second = NULL; newtree->nextra = 0; treestack[ntreestack++] = newtree; } else argstack[nargstack++] = value1; } } delete [] word; // ---------------- // math operator, including end-of-string // ---------------- } else if (strchr("+-*/^<>=!&|%\0",onechar)) { if (onechar == '+') op = ADD; else if (onechar == '-') op = SUBTRACT; else if (onechar == '*') op = MULTIPLY; else if (onechar == '/') op = DIVIDE; else if (onechar == '%') op = MODULO; else if (onechar == '^') op = CARAT; else if (onechar == '=') { if (str[i+1] != '=') error->all(FLERR,"Invalid syntax in variable formula"); op = EQ; i++; } else if (onechar == '!') { if (str[i+1] == '=') { op = NE; i++; } else op = NOT; } else if (onechar == '<') { if (str[i+1] != '=') op = LT; else { op = LE; i++; } } else if (onechar == '>') { if (str[i+1] != '=') op = GT; else { op = GE; i++; } } else if (onechar == '&') { if (str[i+1] != '&') error->all(FLERR,"Invalid syntax in variable formula"); op = AND; i++; } else if (onechar == '|') { if (str[i+1] != '|') error->all(FLERR,"Invalid syntax in variable formula"); op = OR; i++; } else op = DONE; i++; if (op == SUBTRACT && expect == ARG) { opstack[nopstack++] = UNARY; continue; } if (op == NOT && expect == ARG) { opstack[nopstack++] = op; continue; } if (expect == ARG) error->all(FLERR,"Invalid syntax in variable formula"); expect = ARG; // evaluate stack as deep as possible while respecting precedence // before pushing current op onto stack while (nopstack && precedence[opstack[nopstack-1]] >= precedence[op]) { opprevious = opstack[--nopstack]; if (tree) { Tree *newtree = new Tree(); newtree->type = opprevious; if (opprevious == UNARY) { newtree->first = treestack[--ntreestack]; newtree->second = NULL; newtree->nextra = 0; } else { newtree->second = treestack[--ntreestack]; newtree->first = treestack[--ntreestack]; newtree->nextra = 0; } treestack[ntreestack++] = newtree; } else { value2 = argstack[--nargstack]; if (opprevious != UNARY && opprevious != NOT) value1 = argstack[--nargstack]; if (opprevious == ADD) argstack[nargstack++] = value1 + value2; else if (opprevious == SUBTRACT) argstack[nargstack++] = value1 - value2; else if (opprevious == MULTIPLY) argstack[nargstack++] = value1 * value2; else if (opprevious == DIVIDE) { if (value2 == 0.0) error->all(FLERR,"Divide by 0 in variable formula"); argstack[nargstack++] = value1 / value2; } else if (opprevious == MODULO) { if (value2 == 0.0) error->all(FLERR,"Modulo 0 in variable formula"); argstack[nargstack++] = fmod(value1,value2); } else if (opprevious == CARAT) { if (value2 == 0.0) error->all(FLERR,"Power by 0 in variable formula"); argstack[nargstack++] = pow(value1,value2); } else if (opprevious == UNARY) { argstack[nargstack++] = -value2; } else if (opprevious == NOT) { if (value2 == 0.0) argstack[nargstack++] = 1.0; else argstack[nargstack++] = 0.0; } else if (opprevious == EQ) { if (value1 == value2) argstack[nargstack++] = 1.0; else argstack[nargstack++] = 0.0; } else if (opprevious == NE) { if (value1 != value2) argstack[nargstack++] = 1.0; else argstack[nargstack++] = 0.0; } else if (opprevious == LT) { if (value1 < value2) argstack[nargstack++] = 1.0; else argstack[nargstack++] = 0.0; } else if (opprevious == LE) { if (value1 <= value2) argstack[nargstack++] = 1.0; else argstack[nargstack++] = 0.0; } else if (opprevious == GT) { if (value1 > value2) argstack[nargstack++] = 1.0; else argstack[nargstack++] = 0.0; } else if (opprevious == GE) { if (value1 >= value2) argstack[nargstack++] = 1.0; else argstack[nargstack++] = 0.0; } else if (opprevious == AND) { if (value1 != 0.0 && value2 != 0.0) argstack[nargstack++] = 1.0; else argstack[nargstack++] = 0.0; } else if (opprevious == OR) { if (value1 != 0.0 || value2 != 0.0) argstack[nargstack++] = 1.0; else argstack[nargstack++] = 0.0; } } } // if end-of-string, break out of entire formula evaluation loop if (op == DONE) break; // push current operation onto stack opstack[nopstack++] = op; } else error->all(FLERR,"Invalid syntax in variable formula"); } if (nopstack) error->all(FLERR,"Invalid syntax in variable formula"); // for atom-style variable, return remaining tree // for equal-style variable, return remaining arg if (tree) { if (ntreestack != 1) error->all(FLERR,"Invalid syntax in variable formula"); *tree = treestack[0]; return 0.0; } else { if (nargstack != 1) error->all(FLERR,"Invalid syntax in variable formula"); return argstack[0]; } } /* ---------------------------------------------------------------------- one-time collapse of an atom-style variable parse tree tree was created by one-time parsing of formula string via evaluate() only keep tree nodes that depend on ATOMARRAY, TYPEARRAY, INTARRAY, BIGINTARRAY remainder is converted to single VALUE this enables optimal eval_tree loop over atoms customize by adding a function: sqrt(),exp(),ln(),log(),abs(),sin(),cos(),tan(),asin(),acos(),atan(), atan2(y,x),random(x,y,z),normal(x,y,z),ceil(),floor(),round(), ramp(x,y),stagger(x,y),logfreq(x,y,z),logfreq2(x,y,z), stride(x,y,z),vdisplace(x,y),swiggle(x,y,z),cwiggle(x,y,z), gmask(x),rmask(x),grmask(x,y) ---------------------------------------------------------------------- */ double Variable::collapse_tree(Tree *tree) { double arg1,arg2; if (tree->type == VALUE) return tree->value; if (tree->type == ATOMARRAY) return 0.0; if (tree->type == TYPEARRAY) return 0.0; if (tree->type == INTARRAY) return 0.0; if (tree->type == BIGINTARRAY) return 0.0; if (tree->type == ADD) { arg1 = collapse_tree(tree->first); arg2 = collapse_tree(tree->second); if (tree->first->type != VALUE || tree->second->type != VALUE) return 0.0; tree->type = VALUE; tree->value = arg1 + arg2; return tree->value; } if (tree->type == SUBTRACT) { arg1 = collapse_tree(tree->first); arg2 = collapse_tree(tree->second); if (tree->first->type != VALUE || tree->second->type != VALUE) return 0.0; tree->type = VALUE; tree->value = arg1 - arg2; return tree->value; } if (tree->type == MULTIPLY) { arg1 = collapse_tree(tree->first); arg2 = collapse_tree(tree->second); if (tree->first->type != VALUE || tree->second->type != VALUE) return 0.0; tree->type = VALUE; tree->value = arg1 * arg2; return tree->value; } if (tree->type == DIVIDE) { arg1 = collapse_tree(tree->first); arg2 = collapse_tree(tree->second); if (tree->first->type != VALUE || tree->second->type != VALUE) return 0.0; tree->type = VALUE; if (arg2 == 0.0) error->one(FLERR,"Divide by 0 in variable formula"); tree->value = arg1 / arg2; return tree->value; } if (tree->type == MODULO) { arg1 = collapse_tree(tree->first); arg2 = collapse_tree(tree->second); if (tree->first->type != VALUE || tree->second->type != VALUE) return 0.0; tree->type = VALUE; if (arg2 == 0.0) error->one(FLERR,"Modulo 0 in variable formula"); tree->value = fmod(arg1,arg2); return tree->value; } if (tree->type == CARAT) { arg1 = collapse_tree(tree->first); arg2 = collapse_tree(tree->second); if (tree->first->type != VALUE || tree->second->type != VALUE) return 0.0; tree->type = VALUE; if (arg2 == 0.0) error->one(FLERR,"Power by 0 in variable formula"); tree->value = pow(arg1,arg2); return tree->value; } if (tree->type == UNARY) { arg1 = collapse_tree(tree->first); if (tree->first->type != VALUE) return 0.0; tree->type = VALUE; tree->value = -arg1; return tree->value; } if (tree->type == NOT) { arg1 = collapse_tree(tree->first); if (tree->first->type != VALUE) return 0.0; tree->type = VALUE; if (arg1 == 0.0) tree->value = 1.0; else tree->value = 0.0; return tree->value; } if (tree->type == EQ) { arg1 = collapse_tree(tree->first); arg2 = collapse_tree(tree->second); if (tree->first->type != VALUE || tree->second->type != VALUE) return 0.0; tree->type = VALUE; if (arg1 == arg2) tree->value = 1.0; else tree->value = 0.0; return tree->value; } if (tree->type == NE) { arg1 = collapse_tree(tree->first); arg2 = collapse_tree(tree->second); if (tree->first->type != VALUE || tree->second->type != VALUE) return 0.0; tree->type = VALUE; if (arg1 != arg2) tree->value = 1.0; else tree->value = 0.0; return tree->value; } if (tree->type == LT) { arg1 = collapse_tree(tree->first); arg2 = collapse_tree(tree->second); if (tree->first->type != VALUE || tree->second->type != VALUE) return 0.0; tree->type = VALUE; if (arg1 < arg2) tree->value = 1.0; else tree->value = 0.0; return tree->value; } if (tree->type == LE) { arg1 = collapse_tree(tree->first); arg2 = collapse_tree(tree->second); if (tree->first->type != VALUE || tree->second->type != VALUE) return 0.0; tree->type = VALUE; if (arg1 <= arg2) tree->value = 1.0; else tree->value = 0.0; return tree->value; } if (tree->type == GT) { arg1 = collapse_tree(tree->first); arg2 = collapse_tree(tree->second); if (tree->first->type != VALUE || tree->second->type != VALUE) return 0.0; tree->type = VALUE; if (arg1 > arg2) tree->value = 1.0; else tree->value = 0.0; return tree->value; } if (tree->type == GE) { arg1 = collapse_tree(tree->first); arg2 = collapse_tree(tree->second); if (tree->first->type != VALUE || tree->second->type != VALUE) return 0.0; tree->type = VALUE; if (arg1 >= arg2) tree->value = 1.0; else tree->value = 0.0; return tree->value; } if (tree->type == AND) { arg1 = collapse_tree(tree->first); arg2 = collapse_tree(tree->second); if (tree->first->type != VALUE || tree->second->type != VALUE) return 0.0; tree->type = VALUE; if (arg1 != 0.0 && arg2 != 0.0) tree->value = 1.0; else tree->value = 0.0; return tree->value; } if (tree->type == OR) { arg1 = collapse_tree(tree->first); arg2 = collapse_tree(tree->second); if (tree->first->type != VALUE || tree->second->type != VALUE) return 0.0; tree->type = VALUE; if (arg1 != 0.0 || arg2 != 0.0) tree->value = 1.0; else tree->value = 0.0; return tree->value; } if (tree->type == SQRT) { arg1 = collapse_tree(tree->first); if (tree->first->type != VALUE) return 0.0; tree->type = VALUE; if (arg1 < 0.0) error->one(FLERR,"Sqrt of negative value in variable formula"); tree->value = sqrt(arg1); return tree->value; } if (tree->type == EXP) { arg1 = collapse_tree(tree->first); if (tree->first->type != VALUE) return 0.0; tree->type = VALUE; tree->value = exp(arg1); return tree->value; } if (tree->type == LN) { arg1 = collapse_tree(tree->first); if (tree->first->type != VALUE) return 0.0; tree->type = VALUE; if (arg1 <= 0.0) error->one(FLERR,"Log of zero/negative value in variable formula"); tree->value = log(arg1); return tree->value; } if (tree->type == LOG) { arg1 = collapse_tree(tree->first); if (tree->first->type != VALUE) return 0.0; tree->type = VALUE; if (arg1 <= 0.0) error->one(FLERR,"Log of zero/negative value in variable formula"); tree->value = log10(arg1); return tree->value; } if (tree->type == ABS) { arg1 = collapse_tree(tree->first); if (tree->first->type != VALUE) return 0.0; tree->type = VALUE; tree->value = fabs(arg1); return tree->value; } if (tree->type == SIN) { arg1 = collapse_tree(tree->first); if (tree->first->type != VALUE) return 0.0; tree->type = VALUE; tree->value = sin(arg1); return tree->value; } if (tree->type == COS) { arg1 = collapse_tree(tree->first); if (tree->first->type != VALUE) return 0.0; tree->type = VALUE; tree->value = cos(arg1); return tree->value; } if (tree->type == TAN) { arg1 = collapse_tree(tree->first); if (tree->first->type != VALUE) return 0.0; tree->type = VALUE; tree->value = tan(arg1); return tree->value; } if (tree->type == ASIN) { arg1 = collapse_tree(tree->first); if (tree->first->type != VALUE) return 0.0; tree->type = VALUE; if (arg1 < -1.0 || arg1 > 1.0) error->one(FLERR,"Arcsin of invalid value in variable formula"); tree->value = asin(arg1); return tree->value; } if (tree->type == ACOS) { arg1 = collapse_tree(tree->first); if (tree->first->type != VALUE) return 0.0; tree->type = VALUE; if (arg1 < -1.0 || arg1 > 1.0) error->one(FLERR,"Arccos of invalid value in variable formula"); tree->value = acos(arg1); return tree->value; } if (tree->type == ATAN) { arg1 = collapse_tree(tree->first); if (tree->first->type != VALUE) return 0.0; tree->type = VALUE; tree->value = atan(arg1); return tree->value; } if (tree->type == ATAN2) { arg1 = collapse_tree(tree->first); arg2 = collapse_tree(tree->second); if (tree->first->type != VALUE || tree->second->type != VALUE) return 0.0; tree->type = VALUE; tree->value = atan2(arg1,arg2); return tree->value; } // random() or normal() do not become a single collapsed value if (tree->type == RANDOM) { collapse_tree(tree->first); collapse_tree(tree->second); if (randomatom == NULL) { int seed = static_cast (collapse_tree(tree->extra[0])); if (seed <= 0) error->one(FLERR,"Invalid math function in variable formula"); randomatom = new RanMars(lmp,seed+me); } return 0.0; } if (tree->type == NORMAL) { collapse_tree(tree->first); double sigma = collapse_tree(tree->second); if (sigma < 0.0) error->one(FLERR,"Invalid math function in variable formula"); if (randomatom == NULL) { int seed = static_cast (collapse_tree(tree->extra[0])); if (seed <= 0) error->one(FLERR,"Invalid math function in variable formula"); randomatom = new RanMars(lmp,seed+me); } return 0.0; } if (tree->type == CEIL) { arg1 = collapse_tree(tree->first); if (tree->first->type != VALUE) return 0.0; tree->type = VALUE; tree->value = ceil(arg1); return tree->value; } if (tree->type == FLOOR) { arg1 = collapse_tree(tree->first); if (tree->first->type != VALUE) return 0.0; tree->type = VALUE; tree->value = floor(arg1); return tree->value; } if (tree->type == ROUND) { arg1 = collapse_tree(tree->first); if (tree->first->type != VALUE) return 0.0; tree->type = VALUE; tree->value = MYROUND(arg1); return tree->value; } if (tree->type == RAMP) { arg1 = collapse_tree(tree->first); arg2 = collapse_tree(tree->second); if (tree->first->type != VALUE || tree->second->type != VALUE) return 0.0; tree->type = VALUE; double delta = update->ntimestep - update->beginstep; if (delta != 0.0) delta /= update->endstep - update->beginstep; tree->value = arg1 + delta*(arg2-arg1); return tree->value; } if (tree->type == STAGGER) { int ivalue1 = static_cast (collapse_tree(tree->first)); int ivalue2 = static_cast (collapse_tree(tree->second)); if (tree->first->type != VALUE || tree->second->type != VALUE) return 0.0; tree->type = VALUE; if (ivalue1 <= 0 || ivalue2 <= 0 || ivalue1 <= ivalue2) error->one(FLERR,"Invalid math function in variable formula"); int lower = update->ntimestep/ivalue1 * ivalue1; int delta = update->ntimestep - lower; if (delta < ivalue2) tree->value = lower+ivalue2; else tree->value = lower+ivalue1; return tree->value; } if (tree->type == LOGFREQ) { int ivalue1 = static_cast (collapse_tree(tree->first)); int ivalue2 = static_cast (collapse_tree(tree->second)); int ivalue3 = static_cast (collapse_tree(tree->extra[0])); if (tree->first->type != VALUE || tree->second->type != VALUE || tree->extra[0]->type != VALUE) return 0.0; tree->type = VALUE; if (ivalue1 <= 0 || ivalue2 <= 0 || ivalue3 <= 0 || ivalue2 >= ivalue3) error->one(FLERR,"Invalid math function in variable formula"); if (update->ntimestep < ivalue1) tree->value = ivalue1; else { int lower = ivalue1; while (update->ntimestep >= ivalue3*lower) lower *= ivalue3; int multiple = update->ntimestep/lower; if (multiple < ivalue2) tree->value = (multiple+1)*lower; else tree->value = lower*ivalue3; } return tree->value; } if (tree->type == LOGFREQ2) { int ivalue1 = static_cast (collapse_tree(tree->first)); int ivalue2 = static_cast (collapse_tree(tree->second)); int ivalue3 = static_cast (collapse_tree(tree->extra[0])); if (tree->first->type != VALUE || tree->second->type != VALUE || tree->extra[0]->type != VALUE) return 0.0; tree->type = VALUE; if (ivalue1 <= 0 || ivalue2 <= 0 || ivalue3 <= 0 ) error->all(FLERR,"Invalid math function in variable formula"); if (update->ntimestep < ivalue1) tree->value = ivalue1; else { tree->value = ivalue1; double delta = ivalue1*(ivalue3-1.0)/ivalue2; int count = 0; while (update->ntimestep >= tree->value) { tree->value += delta; count++; if (count % ivalue2 == 0) delta *= ivalue3; } } tree->value = ceil(tree->value); return tree->value; } if (tree->type == STRIDE) { int ivalue1 = static_cast (collapse_tree(tree->first)); int ivalue2 = static_cast (collapse_tree(tree->second)); int ivalue3 = static_cast (collapse_tree(tree->extra[0])); if (tree->first->type != VALUE || tree->second->type != VALUE || tree->extra[0]->type != VALUE) return 0.0; tree->type = VALUE; if (ivalue1 < 0 || ivalue2 < 0 || ivalue3 <= 0 || ivalue1 > ivalue2) error->one(FLERR,"Invalid math function in variable formula"); if (update->ntimestep < ivalue1) tree->value = ivalue1; else if (update->ntimestep < ivalue2) { int offset = update->ntimestep - ivalue1; tree->value = ivalue1 + (offset/ivalue3)*ivalue3 + ivalue3; if (tree->value > ivalue2) tree->value = MAXBIGINT; } else tree->value = MAXBIGINT; return tree->value; } if (tree->type == STRIDE2) { int ivalue1 = static_cast (collapse_tree(tree->first)); int ivalue2 = static_cast (collapse_tree(tree->second)); int ivalue3 = static_cast (collapse_tree(tree->extra[0])); int ivalue4 = static_cast (collapse_tree(tree->extra[1])); int ivalue5 = static_cast (collapse_tree(tree->extra[2])); int ivalue6 = static_cast (collapse_tree(tree->extra[3])); if (tree->first->type != VALUE || tree->second->type != VALUE || tree->extra[0]->type != VALUE || tree->extra[1]->type != VALUE || tree->extra[2]->type != VALUE || tree->extra[3]->type != VALUE) return 0.0; tree->type = VALUE; if (ivalue1 < 0 || ivalue2 < 0 || ivalue3 <= 0 || ivalue1 > ivalue2) error->one(FLERR,"Invalid math function in variable formula"); if (ivalue4 < 0 || ivalue5 < 0 || ivalue6 <= 0 || ivalue4 > ivalue5) error->one(FLERR,"Invalid math function in variable formula"); if (ivalue4 < ivalue1 || ivalue5 > ivalue2) error->one(FLERR,"Invalid math function in variable formula"); bigint istep; if (update->ntimestep < ivalue1) istep = ivalue1; else if (update->ntimestep < ivalue2) { if (update->ntimestep < ivalue4 || update->ntimestep > ivalue5) { int offset = update->ntimestep - ivalue1; istep = ivalue1 + (offset/ivalue3)*ivalue3 + ivalue3; if (update->ntimestep < ivalue2 && istep > ivalue4) tree->value = ivalue4; } else { int offset = update->ntimestep - ivalue4; istep = ivalue4 + (offset/ivalue6)*ivalue6 + ivalue6; if (istep > ivalue5) { int offset = ivalue5 - ivalue1; istep = ivalue1 + (offset/ivalue3)*ivalue3 + ivalue3; if (istep > ivalue2) istep = MAXBIGINT; } } } else istep = MAXBIGINT; tree->value = istep; return tree->value; } if (tree->type == VDISPLACE) { double arg1 = collapse_tree(tree->first); double arg2 = collapse_tree(tree->second); if (tree->first->type != VALUE || tree->second->type != VALUE) return 0.0; tree->type = VALUE; double delta = update->ntimestep - update->beginstep; tree->value = arg1 + arg2*delta*update->dt; return tree->value; } if (tree->type == SWIGGLE) { double arg1 = collapse_tree(tree->first); double arg2 = collapse_tree(tree->second); double arg3 = collapse_tree(tree->extra[0]); if (tree->first->type != VALUE || tree->second->type != VALUE || tree->extra[0]->type != VALUE) return 0.0; tree->type = VALUE; if (arg3 == 0.0) error->one(FLERR,"Invalid math function in variable formula"); double delta = update->ntimestep - update->beginstep; double omega = 2.0*MY_PI/arg3; tree->value = arg1 + arg2*sin(omega*delta*update->dt); return tree->value; } if (tree->type == CWIGGLE) { double arg1 = collapse_tree(tree->first); double arg2 = collapse_tree(tree->second); double arg3 = collapse_tree(tree->extra[0]); if (tree->first->type != VALUE || tree->second->type != VALUE || tree->extra[0]->type != VALUE) return 0.0; tree->type = VALUE; if (arg3 == 0.0) error->one(FLERR,"Invalid math function in variable formula"); double delta = update->ntimestep - update->beginstep; double omega = 2.0*MY_PI/arg3; tree->value = arg1 + arg2*(1.0-cos(omega*delta*update->dt)); return tree->value; } // mask functions do not become a single collapsed value if (tree->type == GMASK) return 0.0; if (tree->type == RMASK) return 0.0; if (tree->type == GRMASK) return 0.0; return 0.0; } /* ---------------------------------------------------------------------- evaluate an atom-style variable parse tree for atom I tree was created by one-time parsing of formula string via evaulate() customize by adding a function: sqrt(),exp(),ln(),log(),sin(),cos(),tan(),asin(),acos(),atan(), atan2(y,x),random(x,y,z),normal(x,y,z),ceil(),floor(),round(), ramp(x,y),stagger(x,y),logfreq(x,y,z),logfreq2(x,y,z), stride(x,y,z),stride2(x,y,z),vdisplace(x,y),swiggle(x,y,z), cwiggle(x,y,z),gmask(x),rmask(x),grmask(x,y) ---------------------------------------------------------------------- */ double Variable::eval_tree(Tree *tree, int i) { double arg,arg1,arg2,arg3; if (tree->type == VALUE) return tree->value; if (tree->type == ATOMARRAY) return tree->array[i*tree->nstride]; if (tree->type == TYPEARRAY) return tree->array[atom->type[i]]; if (tree->type == INTARRAY) return (double) tree->iarray[i*tree->nstride]; if (tree->type == BIGINTARRAY) return (double) tree->barray[i*tree->nstride]; if (tree->type == ADD) return eval_tree(tree->first,i) + eval_tree(tree->second,i); if (tree->type == SUBTRACT) return eval_tree(tree->first,i) - eval_tree(tree->second,i); if (tree->type == MULTIPLY) return eval_tree(tree->first,i) * eval_tree(tree->second,i); if (tree->type == DIVIDE) { double denom = eval_tree(tree->second,i); if (denom == 0.0) error->one(FLERR,"Divide by 0 in variable formula"); return eval_tree(tree->first,i) / denom; } if (tree->type == MODULO) { double denom = eval_tree(tree->second,i); if (denom == 0.0) error->one(FLERR,"Modulo 0 in variable formula"); return fmod(eval_tree(tree->first,i),denom); } if (tree->type == CARAT) { double exponent = eval_tree(tree->second,i); if (exponent == 0.0) error->one(FLERR,"Power by 0 in variable formula"); return pow(eval_tree(tree->first,i),exponent); } if (tree->type == UNARY) return -eval_tree(tree->first,i); if (tree->type == NOT) { if (eval_tree(tree->first,i) == 0.0) return 1.0; else return 0.0; } if (tree->type == EQ) { if (eval_tree(tree->first,i) == eval_tree(tree->second,i)) return 1.0; else return 0.0; } if (tree->type == NE) { if (eval_tree(tree->first,i) != eval_tree(tree->second,i)) return 1.0; else return 0.0; } if (tree->type == LT) { if (eval_tree(tree->first,i) < eval_tree(tree->second,i)) return 1.0; else return 0.0; } if (tree->type == LE) { if (eval_tree(tree->first,i) <= eval_tree(tree->second,i)) return 1.0; else return 0.0; } if (tree->type == GT) { if (eval_tree(tree->first,i) > eval_tree(tree->second,i)) return 1.0; else return 0.0; } if (tree->type == GE) { if (eval_tree(tree->first,i) >= eval_tree(tree->second,i)) return 1.0; else return 0.0; } if (tree->type == AND) { if (eval_tree(tree->first,i) != 0.0 && eval_tree(tree->second,i) != 0.0) return 1.0; else return 0.0; } if (tree->type == OR) { if (eval_tree(tree->first,i) != 0.0 || eval_tree(tree->second,i) != 0.0) return 1.0; else return 0.0; } if (tree->type == SQRT) { arg1 = eval_tree(tree->first,i); if (arg1 < 0.0) error->one(FLERR,"Sqrt of negative value in variable formula"); return sqrt(arg1); } if (tree->type == EXP) return exp(eval_tree(tree->first,i)); if (tree->type == LN) { arg1 = eval_tree(tree->first,i); if (arg1 <= 0.0) error->one(FLERR,"Log of zero/negative value in variable formula"); return log(arg1); } if (tree->type == LOG) { arg1 = eval_tree(tree->first,i); if (arg1 <= 0.0) error->one(FLERR,"Log of zero/negative value in variable formula"); return log10(arg1); } if (tree->type == ABS) return fabs(eval_tree(tree->first,i)); if (tree->type == SIN) return sin(eval_tree(tree->first,i)); if (tree->type == COS) return cos(eval_tree(tree->first,i)); if (tree->type == TAN) return tan(eval_tree(tree->first,i)); if (tree->type == ASIN) { arg1 = eval_tree(tree->first,i); if (arg1 < -1.0 || arg1 > 1.0) error->one(FLERR,"Arcsin of invalid value in variable formula"); return asin(arg1); } if (tree->type == ACOS) { arg1 = eval_tree(tree->first,i); if (arg1 < -1.0 || arg1 > 1.0) error->one(FLERR,"Arccos of invalid value in variable formula"); return acos(arg1); } if (tree->type == ATAN) return atan(eval_tree(tree->first,i)); if (tree->type == ATAN2) return atan2(eval_tree(tree->first,i),eval_tree(tree->second,i)); if (tree->type == RANDOM) { double lower = eval_tree(tree->first,i); double upper = eval_tree(tree->second,i); if (randomatom == NULL) { int seed = static_cast (eval_tree(tree->extra[0],i)); if (seed <= 0) error->one(FLERR,"Invalid math function in variable formula"); randomatom = new RanMars(lmp,seed+me); } return randomatom->uniform()*(upper-lower)+lower; } if (tree->type == NORMAL) { double mu = eval_tree(tree->first,i); double sigma = eval_tree(tree->second,i); if (sigma < 0.0) error->one(FLERR,"Invalid math function in variable formula"); if (randomatom == NULL) { int seed = static_cast (eval_tree(tree->extra[0],i)); if (seed <= 0) error->one(FLERR,"Invalid math function in variable formula"); randomatom = new RanMars(lmp,seed+me); } return mu + sigma*randomatom->gaussian(); } if (tree->type == CEIL) return ceil(eval_tree(tree->first,i)); if (tree->type == FLOOR) return floor(eval_tree(tree->first,i)); if (tree->type == ROUND) return MYROUND(eval_tree(tree->first,i)); if (tree->type == RAMP) { arg1 = eval_tree(tree->first,i); arg2 = eval_tree(tree->second,i); double delta = update->ntimestep - update->beginstep; if (delta != 0.0) delta /= update->endstep - update->beginstep; arg = arg1 + delta*(arg2-arg1); return arg; } if (tree->type == STAGGER) { int ivalue1 = static_cast (eval_tree(tree->first,i)); int ivalue2 = static_cast (eval_tree(tree->second,i)); if (ivalue1 <= 0 || ivalue2 <= 0 || ivalue1 <= ivalue2) error->one(FLERR,"Invalid math function in variable formula"); int lower = update->ntimestep/ivalue1 * ivalue1; int delta = update->ntimestep - lower; if (delta < ivalue2) arg = lower+ivalue2; else arg = lower+ivalue1; return arg; } if (tree->type == LOGFREQ) { int ivalue1 = static_cast (eval_tree(tree->first,i)); int ivalue2 = static_cast (eval_tree(tree->second,i)); int ivalue3 = static_cast (eval_tree(tree->extra[0],i)); if (ivalue1 <= 0 || ivalue2 <= 0 || ivalue3 <= 0 || ivalue2 >= ivalue3) error->one(FLERR,"Invalid math function in variable formula"); if (update->ntimestep < ivalue1) arg = ivalue1; else { int lower = ivalue1; while (update->ntimestep >= ivalue3*lower) lower *= ivalue3; int multiple = update->ntimestep/lower; if (multiple < ivalue2) arg = (multiple+1)*lower; else arg = lower*ivalue3; } return arg; } if (tree->type == LOGFREQ2) { int ivalue1 = static_cast (eval_tree(tree->first,i)); int ivalue2 = static_cast (eval_tree(tree->second,i)); int ivalue3 = static_cast (eval_tree(tree->extra[0],i)); if (ivalue1 <= 0 || ivalue2 <= 0 || ivalue3 <= 0 ) error->all(FLERR,"Invalid math function in variable formula"); if (update->ntimestep < ivalue1) arg = ivalue1; else { arg = ivalue1; double delta = ivalue1*(ivalue3-1.0)/ivalue2; int count = 0; while (update->ntimestep >= arg) { arg += delta; count++; if (count % ivalue2 == 0) delta *= ivalue3; } } arg = ceil(arg); return arg; } if (tree->type == STRIDE) { int ivalue1 = static_cast (eval_tree(tree->first,i)); int ivalue2 = static_cast (eval_tree(tree->second,i)); int ivalue3 = static_cast (eval_tree(tree->extra[0],i)); if (ivalue1 < 0 || ivalue2 < 0 || ivalue3 <= 0 || ivalue1 > ivalue2) error->one(FLERR,"Invalid math function in variable formula"); if (update->ntimestep < ivalue1) arg = ivalue1; else if (update->ntimestep < ivalue2) { int offset = update->ntimestep - ivalue1; arg = ivalue1 + (offset/ivalue3)*ivalue3 + ivalue3; if (arg > ivalue2) arg = MAXBIGINT; } else arg = MAXBIGINT; return arg; } if (tree->type == STRIDE2) { int ivalue1 = static_cast (eval_tree(tree->first,i)); int ivalue2 = static_cast (eval_tree(tree->second,i)); int ivalue3 = static_cast (eval_tree(tree->extra[0],i)); int ivalue4 = static_cast (eval_tree(tree->extra[1],i)); int ivalue5 = static_cast (eval_tree(tree->extra[2],i)); int ivalue6 = static_cast (eval_tree(tree->extra[3],i)); if (ivalue1 < 0 || ivalue2 < 0 || ivalue3 <= 0 || ivalue1 > ivalue2) error->one(FLERR,"Invalid math function in variable formula"); if (ivalue4 < 0 || ivalue5 < 0 || ivalue6 <= 0 || ivalue4 > ivalue5) error->one(FLERR,"Invalid math function in variable formula"); if (ivalue4 < ivalue1 || ivalue5 > ivalue2) error->one(FLERR,"Invalid math function in variable formula"); bigint istep; if (update->ntimestep < ivalue1) istep = ivalue1; else if (update->ntimestep < ivalue2) { if (update->ntimestep < ivalue4 || update->ntimestep > ivalue5) { int offset = update->ntimestep - ivalue1; istep = ivalue1 + (offset/ivalue3)*ivalue3 + ivalue3; if (update->ntimestep < ivalue2 && istep > ivalue4) tree->value = ivalue4; } else { int offset = update->ntimestep - ivalue4; istep = ivalue4 + (offset/ivalue6)*ivalue6 + ivalue6; if (istep > ivalue5) { int offset = ivalue5 - ivalue1; istep = ivalue1 + (offset/ivalue3)*ivalue3 + ivalue3; if (istep > ivalue2) istep = MAXBIGINT; } } } else istep = MAXBIGINT; arg = istep; return arg; } if (tree->type == VDISPLACE) { arg1 = eval_tree(tree->first,i); arg2 = eval_tree(tree->second,i); double delta = update->ntimestep - update->beginstep; arg = arg1 + arg2*delta*update->dt; return arg; } if (tree->type == SWIGGLE) { arg1 = eval_tree(tree->first,i); arg2 = eval_tree(tree->second,i); arg3 = eval_tree(tree->extra[0],i); if (arg3 == 0.0) error->one(FLERR,"Invalid math function in variable formula"); double delta = update->ntimestep - update->beginstep; double omega = 2.0*MY_PI/arg3; arg = arg1 + arg2*sin(omega*delta*update->dt); return arg; } if (tree->type == CWIGGLE) { arg1 = eval_tree(tree->first,i); arg2 = eval_tree(tree->second,i); arg3 = eval_tree(tree->extra[0],i); if (arg3 == 0.0) error->one(FLERR,"Invalid math function in variable formula"); double delta = update->ntimestep - update->beginstep; double omega = 2.0*MY_PI/arg3; arg = arg1 + arg2*(1.0-cos(omega*delta*update->dt)); return arg; } if (tree->type == GMASK) { if (atom->mask[i] & tree->ivalue1) return 1.0; else return 0.0; } if (tree->type == RMASK) { if (domain->regions[tree->ivalue1]->match(atom->x[i][0], atom->x[i][1], atom->x[i][2])) return 1.0; else return 0.0; } if (tree->type == GRMASK) { if ((atom->mask[i] & tree->ivalue1) && (domain->regions[tree->ivalue2]->match(atom->x[i][0], atom->x[i][1], atom->x[i][2]))) return 1.0; else return 0.0; } return 0.0; } /* ---------------------------------------------------------------------- */ void Variable::free_tree(Tree *tree) { if (tree->first) free_tree(tree->first); if (tree->second) free_tree(tree->second); if (tree->nextra) { for (int i = 0; i < tree->nextra; i++) free_tree(tree->extra[i]); delete [] tree->extra; } if (tree->type == ATOMARRAY && tree->selfalloc) memory->destroy(tree->array); delete tree; } /* ---------------------------------------------------------------------- find matching parenthesis in str, allocate contents = str between parens i = left paren return loc or right paren ------------------------------------------------------------------------- */ int Variable::find_matching_paren(char *str, int i,char *&contents) { // istop = matching ')' at same level, allowing for nested parens int istart = i; int ilevel = 0; while (1) { i++; if (!str[i]) break; if (str[i] == '(') ilevel++; else if (str[i] == ')' && ilevel) ilevel--; else if (str[i] == ')') break; } if (!str[i]) error->all(FLERR,"Invalid syntax in variable formula"); int istop = i; int n = istop - istart - 1; contents = new char[n+1]; strncpy(contents,&str[istart+1],n); contents[n] = '\0'; return istop; } /* ---------------------------------------------------------------------- find int between brackets and return it return a tagint, since value can be an atom ID ptr initially points to left bracket return it pointing to right bracket error if no right bracket or brackets are empty or index = 0 if varallow = 0: error if any between-bracket chars are non-digits if varallow = 1: also allow for v_name, where name is variable name ------------------------------------------------------------------------- */ tagint Variable::int_between_brackets(char *&ptr, int varallow) { int varflag; tagint index; char *start = ++ptr; if (varallow && strstr(ptr,"v_") == ptr) { varflag = 1; while (*ptr && *ptr != ']') { if (!isalnum(*ptr) && *ptr != '_') error->all(FLERR,"Variable name between brackets must be " "alphanumeric or underscore characters"); ptr++; } } else { varflag = 0; while (*ptr && *ptr != ']') { if (!isdigit(*ptr)) error->all(FLERR,"Non digit character between brackets in variable"); ptr++; } } if (*ptr != ']') error->all(FLERR,"Mismatched brackets in variable"); if (ptr == start) error->all(FLERR,"Empty brackets in variable"); *ptr = '\0'; // evaluate index as floating point variable or as tagint via ATOTAGINT() if (varflag) { char *id = start+2; int ivar = find(id); if (ivar < 0) error->all(FLERR,"Invalid variable name in variable formula"); char *var = retrieve(id); if (var == NULL) error->all(FLERR,"Invalid variable evaluation in variable formula"); index = static_cast (atof(var)); } else index = ATOTAGINT(start); *ptr = ']'; if (index == 0) error->all(FLERR,"Index between variable brackets must be positive"); return index; } /* ---------------------------------------------------------------------- process a math function in formula push result onto tree or arg stack word = math function contents = str between parentheses with comma-separated args return 0 if not a match, 1 if successfully processed customize by adding a math function: sqrt(),exp(),ln(),log(),abs(),sin(),cos(),tan(),asin(),acos(),atan(), atan2(y,x),random(x,y,z),normal(x,y,z),ceil(),floor(),round(), ramp(x,y),stagger(x,y),logfreq(x,y,z),logfreq2(x,y,z), stride(x,y,z),stride2(x,y,z,a,b,c),vdisplace(x,y),swiggle(x,y,z), cwiggle(x,y,z) ------------------------------------------------------------------------- */ int Variable::math_function(char *word, char *contents, Tree **tree, Tree **treestack, int &ntreestack, double *argstack, int &nargstack) { // word not a match to any math function if (strcmp(word,"sqrt") && strcmp(word,"exp") && strcmp(word,"ln") && strcmp(word,"log") && strcmp(word,"abs") && strcmp(word,"sin") && strcmp(word,"cos") && strcmp(word,"tan") && strcmp(word,"asin") && strcmp(word,"acos") && strcmp(word,"atan") && strcmp(word,"atan2") && strcmp(word,"random") && strcmp(word,"normal") && strcmp(word,"ceil") && strcmp(word,"floor") && strcmp(word,"round") && strcmp(word,"ramp") && strcmp(word,"stagger") && strcmp(word,"logfreq") && strcmp(word,"logfreq2") && strcmp(word,"stride") && strcmp(word,"stride2") && strcmp(word,"vdisplace") && strcmp(word,"swiggle") && strcmp(word,"cwiggle")) return 0; // parse contents for comma-separated args // narg = number of args, args = strings between commas char *args[MAXFUNCARG]; int narg = parse_args(contents,args); Tree *newtree; double value1,value2; double values[MAXFUNCARG-2]; if (tree) { newtree = new Tree(); newtree->first = newtree->second = NULL; newtree->nextra = 0; Tree *argtree; evaluate(args[0],&argtree); newtree->first = argtree; if (narg > 1) { evaluate(args[1],&argtree); newtree->second = argtree; if (narg > 2) { newtree->nextra = narg-2; newtree->extra = new Tree*[narg-2]; for (int i = 2; i < narg; i++) { evaluate(args[i],&argtree); newtree->extra[i-2] = argtree; } } } treestack[ntreestack++] = newtree; } else { value1 = evaluate(args[0],NULL); if (narg > 1) { value2 = evaluate(args[1],NULL); if (narg > 2) { for (int i = 2; i < narg; i++) values[i-2] = evaluate(args[i],NULL); } } } // individual math functions // customize by adding a function if (strcmp(word,"sqrt") == 0) { if (narg != 1) error->all(FLERR,"Invalid math function in variable formula"); if (tree) newtree->type = SQRT; else { if (value1 < 0.0) error->all(FLERR,"Sqrt of negative value in variable formula"); argstack[nargstack++] = sqrt(value1); } } else if (strcmp(word,"exp") == 0) { if (narg != 1) error->all(FLERR,"Invalid math function in variable formula"); if (tree) newtree->type = EXP; else argstack[nargstack++] = exp(value1); } else if (strcmp(word,"ln") == 0) { if (narg != 1) error->all(FLERR,"Invalid math function in variable formula"); if (tree) newtree->type = LN; else { if (value1 <= 0.0) error->all(FLERR,"Log of zero/negative value in variable formula"); argstack[nargstack++] = log(value1); } } else if (strcmp(word,"log") == 0) { if (narg != 1) error->all(FLERR,"Invalid math function in variable formula"); if (tree) newtree->type = LOG; else { if (value1 <= 0.0) error->all(FLERR,"Log of zero/negative value in variable formula"); argstack[nargstack++] = log10(value1); } } else if (strcmp(word,"abs") == 0) { if (narg != 1) error->all(FLERR,"Invalid math function in variable formula"); if (tree) newtree->type = ABS; else argstack[nargstack++] = fabs(value1); } else if (strcmp(word,"sin") == 0) { if (narg != 1) error->all(FLERR,"Invalid math function in variable formula"); if (tree) newtree->type = SIN; else argstack[nargstack++] = sin(value1); } else if (strcmp(word,"cos") == 0) { if (narg != 1) error->all(FLERR,"Invalid math function in variable formula"); if (tree) newtree->type = COS; else argstack[nargstack++] = cos(value1); } else if (strcmp(word,"tan") == 0) { if (narg != 1) error->all(FLERR,"Invalid math function in variable formula"); if (tree) newtree->type = TAN; else argstack[nargstack++] = tan(value1); } else if (strcmp(word,"asin") == 0) { if (narg != 1) error->all(FLERR,"Invalid math function in variable formula"); if (tree) newtree->type = ASIN; else { if (value1 < -1.0 || value1 > 1.0) error->all(FLERR,"Arcsin of invalid value in variable formula"); argstack[nargstack++] = asin(value1); } } else if (strcmp(word,"acos") == 0) { if (narg != 1) error->all(FLERR,"Invalid math function in variable formula"); if (tree) newtree->type = ACOS; else { if (value1 < -1.0 || value1 > 1.0) error->all(FLERR,"Arccos of invalid value in variable formula"); argstack[nargstack++] = acos(value1); } } else if (strcmp(word,"atan") == 0) { if (narg != 1) error->all(FLERR,"Invalid math function in variable formula"); if (tree) newtree->type = ATAN; else argstack[nargstack++] = atan(value1); } else if (strcmp(word,"atan2") == 0) { if (narg != 2) error->all(FLERR,"Invalid math function in variable formula"); if (tree) newtree->type = ATAN2; else argstack[nargstack++] = atan2(value1,value2); } else if (strcmp(word,"random") == 0) { if (narg != 3) error->all(FLERR,"Invalid math function in variable formula"); if (tree) newtree->type = RANDOM; else { if (randomequal == NULL) { int seed = static_cast (values[0]); if (seed <= 0) error->all(FLERR,"Invalid math function in variable formula"); randomequal = new RanMars(lmp,seed); } argstack[nargstack++] = randomequal->uniform()*(value2-value1) + value1; } } else if (strcmp(word,"normal") == 0) { if (narg != 3) error->all(FLERR,"Invalid math function in variable formula"); if (tree) newtree->type = NORMAL; else { if (value2 < 0.0) error->all(FLERR,"Invalid math function in variable formula"); if (randomequal == NULL) { int seed = static_cast (values[0]); if (seed <= 0) error->all(FLERR,"Invalid math function in variable formula"); randomequal = new RanMars(lmp,seed); } argstack[nargstack++] = value1 + value2*randomequal->gaussian(); } } else if (strcmp(word,"ceil") == 0) { if (narg != 1) error->all(FLERR,"Invalid math function in variable formula"); if (tree) newtree->type = CEIL; else argstack[nargstack++] = ceil(value1); } else if (strcmp(word,"floor") == 0) { if (narg != 1) error->all(FLERR,"Invalid math function in variable formula"); if (tree) newtree->type = FLOOR; else argstack[nargstack++] = floor(value1); } else if (strcmp(word,"round") == 0) { if (narg != 1) error->all(FLERR,"Invalid math function in variable formula"); if (tree) newtree->type = ROUND; else argstack[nargstack++] = MYROUND(value1); } else if (strcmp(word,"ramp") == 0) { if (narg != 2) error->all(FLERR,"Invalid math function in variable formula"); if (update->whichflag == 0) error->all(FLERR,"Cannot use ramp in variable formula between runs"); if (tree) newtree->type = RAMP; else { double delta = update->ntimestep - update->beginstep; if (delta != 0.0) delta /= update->endstep - update->beginstep; double value = value1 + delta*(value2-value1); argstack[nargstack++] = value; } } else if (strcmp(word,"stagger") == 0) { if (narg != 2) error->all(FLERR,"Invalid math function in variable formula"); if (tree) newtree->type = STAGGER; else { int ivalue1 = static_cast (value1); int ivalue2 = static_cast (value2); if (ivalue1 <= 0 || ivalue2 <= 0 || ivalue1 <= ivalue2) error->all(FLERR,"Invalid math function in variable formula"); int lower = update->ntimestep/ivalue1 * ivalue1; int delta = update->ntimestep - lower; double value; if (delta < ivalue2) value = lower+ivalue2; else value = lower+ivalue1; argstack[nargstack++] = value; } } else if (strcmp(word,"logfreq") == 0) { if (narg != 3) error->all(FLERR,"Invalid math function in variable formula"); if (tree) newtree->type = LOGFREQ; else { int ivalue1 = static_cast (value1); int ivalue2 = static_cast (value2); int ivalue3 = static_cast (values[0]); if (ivalue1 <= 0 || ivalue2 <= 0 || ivalue3 <= 0 || ivalue2 >= ivalue3) error->all(FLERR,"Invalid math function in variable formula"); double value; if (update->ntimestep < ivalue1) value = ivalue1; else { int lower = ivalue1; while (update->ntimestep >= ivalue3*lower) lower *= ivalue3; int multiple = update->ntimestep/lower; if (multiple < ivalue2) value = (multiple+1)*lower; else value = lower*ivalue3; } argstack[nargstack++] = value; } } else if (strcmp(word,"logfreq2") == 0) { if (narg != 3) error->all(FLERR,"Invalid math function in variable formula"); if (tree) newtree->type = LOGFREQ2; else { int ivalue1 = static_cast (value1); int ivalue2 = static_cast (value2); int ivalue3 = static_cast (values[0]); if (ivalue1 <= 0 || ivalue2 <= 0 || ivalue3 <= 0 ) error->all(FLERR,"Invalid math function in variable formula"); double value; if (update->ntimestep < ivalue1) value = ivalue1; else { value = ivalue1; double delta = ivalue1*(ivalue3-1.0)/ivalue2; int count = 0; while (update->ntimestep >= value) { value += delta; count++; if (count % ivalue2 == 0) delta *= ivalue3; } } argstack[nargstack++] = ceil(value); } } else if (strcmp(word,"stride") == 0) { if (narg != 3) error->all(FLERR,"Invalid math function in variable formula"); if (tree) newtree->type = STRIDE; else { int ivalue1 = static_cast (value1); int ivalue2 = static_cast (value2); int ivalue3 = static_cast (values[0]); if (ivalue1 < 0 || ivalue2 < 0 || ivalue3 <= 0 || ivalue1 > ivalue2) error->one(FLERR,"Invalid math function in variable formula"); double value; if (update->ntimestep < ivalue1) value = ivalue1; else if (update->ntimestep < ivalue2) { int offset = update->ntimestep - ivalue1; value = ivalue1 + (offset/ivalue3)*ivalue3 + ivalue3; if (value > ivalue2) value = MAXBIGINT; } else value = MAXBIGINT; argstack[nargstack++] = value; } } else if (strcmp(word,"stride2") == 0) { if (narg != 6) error->all(FLERR,"Invalid math function in variable formula"); if (tree) newtree->type = STRIDE2; else { int ivalue1 = static_cast (value1); int ivalue2 = static_cast (value2); int ivalue3 = static_cast (values[0]); int ivalue4 = static_cast (values[1]); int ivalue5 = static_cast (values[2]); int ivalue6 = static_cast (values[3]); if (ivalue1 < 0 || ivalue2 < 0 || ivalue3 <= 0 || ivalue1 > ivalue2) error->one(FLERR,"Invalid math function in variable formula"); if (ivalue4 < 0 || ivalue5 < 0 || ivalue6 <= 0 || ivalue4 > ivalue5) error->one(FLERR,"Invalid math function in variable formula"); if (ivalue4 < ivalue1 || ivalue5 > ivalue2) error->one(FLERR,"Invalid math function in variable formula"); bigint istep; if (update->ntimestep < ivalue1) istep = ivalue1; else if (update->ntimestep < ivalue2) { if (update->ntimestep < ivalue4 || update->ntimestep > ivalue5) { int offset = update->ntimestep - ivalue1; istep = ivalue1 + (offset/ivalue3)*ivalue3 + ivalue3; if (update->ntimestep < ivalue4 && istep > ivalue4) istep = ivalue4; } else { int offset = update->ntimestep - ivalue4; istep = ivalue4 + (offset/ivalue6)*ivalue6 + ivalue6; if (istep > ivalue5) { int offset = ivalue5 - ivalue1; istep = ivalue1 + (offset/ivalue3)*ivalue3 + ivalue3; if (istep > ivalue2) istep = MAXBIGINT; } } } else istep = MAXBIGINT; double value = istep; argstack[nargstack++] = value; } } else if (strcmp(word,"vdisplace") == 0) { if (narg != 2) error->all(FLERR,"Invalid math function in variable formula"); if (update->whichflag == 0) error->all(FLERR,"Cannot use vdisplace in variable formula between runs"); if (tree) newtree->type = VDISPLACE; else { double delta = update->ntimestep - update->beginstep; double value = value1 + value2*delta*update->dt; argstack[nargstack++] = value; } } else if (strcmp(word,"swiggle") == 0) { if (narg != 3) error->all(FLERR,"Invalid math function in variable formula"); if (update->whichflag == 0) error->all(FLERR,"Cannot use swiggle in variable formula between runs"); if (tree) newtree->type = CWIGGLE; else { if (values[0] == 0.0) error->all(FLERR,"Invalid math function in variable formula"); double delta = update->ntimestep - update->beginstep; double omega = 2.0*MY_PI/values[0]; double value = value1 + value2*sin(omega*delta*update->dt); argstack[nargstack++] = value; } } else if (strcmp(word,"cwiggle") == 0) { if (narg != 3) error->all(FLERR,"Invalid math function in variable formula"); if (update->whichflag == 0) error->all(FLERR,"Cannot use cwiggle in variable formula between runs"); if (tree) newtree->type = CWIGGLE; else { if (values[0] == 0.0) error->all(FLERR,"Invalid math function in variable formula"); double delta = update->ntimestep - update->beginstep; double omega = 2.0*MY_PI/values[0]; double value = value1 + value2*(1.0-cos(omega*delta*update->dt)); argstack[nargstack++] = value; } } // delete stored args for (int i = 0; i < narg; i++) delete [] args[i]; return 1; } /* ---------------------------------------------------------------------- process a group function in formula with optional region arg push result onto tree or arg stack word = group function contents = str between parentheses with one,two,three args return 0 if not a match, 1 if successfully processed customize by adding a group function with optional region arg: count(group),mass(group),charge(group), xcm(group,dim),vcm(group,dim),fcm(group,dim), bound(group,xmin),gyration(group),ke(group),angmom(group,dim), torque(group,dim),inertia(group,dim),omega(group,dim) ------------------------------------------------------------------------- */ int Variable::group_function(char *word, char *contents, Tree **tree, Tree **treestack, int &ntreestack, double *argstack, int &nargstack) { // word not a match to any group function if (strcmp(word,"count") && strcmp(word,"mass") && strcmp(word,"charge") && strcmp(word,"xcm") && strcmp(word,"vcm") && strcmp(word,"fcm") && strcmp(word,"bound") && strcmp(word,"gyration") && strcmp(word,"ke") && strcmp(word,"angmom") && strcmp(word,"torque") && strcmp(word,"inertia") && strcmp(word,"omega")) return 0; // parse contents for comma-separated args // narg = number of args, args = strings between commas char *args[MAXFUNCARG]; int narg = parse_args(contents,args); // group to operate on int igroup = group->find(args[0]); if (igroup == -1) error->all(FLERR,"Group ID in variable formula does not exist"); // match word to group function double value; if (strcmp(word,"count") == 0) { if (narg == 1) value = group->count(igroup); else if (narg == 2) value = group->count(igroup,region_function(args[1])); else error->all(FLERR,"Invalid group function in variable formula"); } else if (strcmp(word,"mass") == 0) { if (narg == 1) value = group->mass(igroup); else if (narg == 2) value = group->mass(igroup,region_function(args[1])); else error->all(FLERR,"Invalid group function in variable formula"); } else if (strcmp(word,"charge") == 0) { if (narg == 1) value = group->charge(igroup); else if (narg == 2) value = group->charge(igroup,region_function(args[1])); else error->all(FLERR,"Invalid group function in variable formula"); } else if (strcmp(word,"xcm") == 0) { atom->check_mass(); double xcm[3]; if (narg == 2) { double masstotal = group->mass(igroup); group->xcm(igroup,masstotal,xcm); } else if (narg == 3) { int iregion = region_function(args[2]); double masstotal = group->mass(igroup,iregion); group->xcm(igroup,masstotal,xcm,iregion); } else error->all(FLERR,"Invalid group function in variable formula"); if (strcmp(args[1],"x") == 0) value = xcm[0]; else if (strcmp(args[1],"y") == 0) value = xcm[1]; else if (strcmp(args[1],"z") == 0) value = xcm[2]; else error->all(FLERR,"Invalid group function in variable formula"); } else if (strcmp(word,"vcm") == 0) { atom->check_mass(); double vcm[3]; if (narg == 2) { double masstotal = group->mass(igroup); group->vcm(igroup,masstotal,vcm); } else if (narg == 3) { int iregion = region_function(args[2]); double masstotal = group->mass(igroup,iregion); group->vcm(igroup,masstotal,vcm,iregion); } else error->all(FLERR,"Invalid group function in variable formula"); if (strcmp(args[1],"x") == 0) value = vcm[0]; else if (strcmp(args[1],"y") == 0) value = vcm[1]; else if (strcmp(args[1],"z") == 0) value = vcm[2]; else error->all(FLERR,"Invalid group function in variable formula"); } else if (strcmp(word,"fcm") == 0) { double fcm[3]; if (narg == 2) group->fcm(igroup,fcm); else if (narg == 3) group->fcm(igroup,fcm,region_function(args[2])); else error->all(FLERR,"Invalid group function in variable formula"); if (strcmp(args[1],"x") == 0) value = fcm[0]; else if (strcmp(args[1],"y") == 0) value = fcm[1]; else if (strcmp(args[1],"z") == 0) value = fcm[2]; else error->all(FLERR,"Invalid group function in variable formula"); } else if (strcmp(word,"bound") == 0) { double minmax[6]; if (narg == 2) group->bounds(igroup,minmax); else if (narg == 3) group->bounds(igroup,minmax,region_function(args[2])); else error->all(FLERR,"Invalid group function in variable formula"); if (strcmp(args[1],"xmin") == 0) value = minmax[0]; else if (strcmp(args[1],"xmax") == 0) value = minmax[1]; else if (strcmp(args[1],"ymin") == 0) value = minmax[2]; else if (strcmp(args[1],"ymax") == 0) value = minmax[3]; else if (strcmp(args[1],"zmin") == 0) value = minmax[4]; else if (strcmp(args[1],"zmax") == 0) value = minmax[5]; else error->all(FLERR,"Invalid group function in variable formula"); } else if (strcmp(word,"gyration") == 0) { atom->check_mass(); double xcm[3]; if (narg == 1) { double masstotal = group->mass(igroup); group->xcm(igroup,masstotal,xcm); value = group->gyration(igroup,masstotal,xcm); } else if (narg == 2) { int iregion = region_function(args[1]); double masstotal = group->mass(igroup,iregion); group->xcm(igroup,masstotal,xcm,iregion); value = group->gyration(igroup,masstotal,xcm,iregion); } else error->all(FLERR,"Invalid group function in variable formula"); } else if (strcmp(word,"ke") == 0) { if (narg == 1) value = group->ke(igroup); else if (narg == 2) value = group->ke(igroup,region_function(args[1])); else error->all(FLERR,"Invalid group function in variable formula"); } else if (strcmp(word,"angmom") == 0) { atom->check_mass(); double xcm[3],lmom[3]; if (narg == 2) { double masstotal = group->mass(igroup); group->xcm(igroup,masstotal,xcm); group->angmom(igroup,xcm,lmom); } else if (narg == 3) { int iregion = region_function(args[2]); double masstotal = group->mass(igroup,iregion); group->xcm(igroup,masstotal,xcm,iregion); group->angmom(igroup,xcm,lmom,iregion); } else error->all(FLERR,"Invalid group function in variable formula"); if (strcmp(args[1],"x") == 0) value = lmom[0]; else if (strcmp(args[1],"y") == 0) value = lmom[1]; else if (strcmp(args[1],"z") == 0) value = lmom[2]; else error->all(FLERR,"Invalid group function in variable formula"); } else if (strcmp(word,"torque") == 0) { atom->check_mass(); double xcm[3],tq[3]; if (narg == 2) { double masstotal = group->mass(igroup); group->xcm(igroup,masstotal,xcm); group->torque(igroup,xcm,tq); } else if (narg == 3) { int iregion = region_function(args[2]); double masstotal = group->mass(igroup,iregion); group->xcm(igroup,masstotal,xcm,iregion); group->torque(igroup,xcm,tq,iregion); } else error->all(FLERR,"Invalid group function in variable formula"); if (strcmp(args[1],"x") == 0) value = tq[0]; else if (strcmp(args[1],"y") == 0) value = tq[1]; else if (strcmp(args[1],"z") == 0) value = tq[2]; else error->all(FLERR,"Invalid group function in variable formula"); } else if (strcmp(word,"inertia") == 0) { atom->check_mass(); double xcm[3],inertia[3][3]; if (narg == 2) { double masstotal = group->mass(igroup); group->xcm(igroup,masstotal,xcm); group->inertia(igroup,xcm,inertia); } else if (narg == 3) { int iregion = region_function(args[2]); double masstotal = group->mass(igroup,iregion); group->xcm(igroup,masstotal,xcm,iregion); group->inertia(igroup,xcm,inertia,iregion); } else error->all(FLERR,"Invalid group function in variable formula"); if (strcmp(args[1],"xx") == 0) value = inertia[0][0]; else if (strcmp(args[1],"yy") == 0) value = inertia[1][1]; else if (strcmp(args[1],"zz") == 0) value = inertia[2][2]; else if (strcmp(args[1],"xy") == 0) value = inertia[0][1]; else if (strcmp(args[1],"yz") == 0) value = inertia[1][2]; else if (strcmp(args[1],"xz") == 0) value = inertia[0][2]; else error->all(FLERR,"Invalid group function in variable formula"); } else if (strcmp(word,"omega") == 0) { atom->check_mass(); double xcm[3],angmom[3],inertia[3][3],omega[3]; if (narg == 2) { double masstotal = group->mass(igroup); group->xcm(igroup,masstotal,xcm); group->angmom(igroup,xcm,angmom); group->inertia(igroup,xcm,inertia); group->omega(angmom,inertia,omega); } else if (narg == 3) { int iregion = region_function(args[2]); double masstotal = group->mass(igroup,iregion); group->xcm(igroup,masstotal,xcm,iregion); group->angmom(igroup,xcm,angmom,iregion); group->inertia(igroup,xcm,inertia,iregion); group->omega(angmom,inertia,omega); } else error->all(FLERR,"Invalid group function in variable formula"); if (strcmp(args[1],"x") == 0) value = omega[0]; else if (strcmp(args[1],"y") == 0) value = omega[1]; else if (strcmp(args[1],"z") == 0) value = omega[2]; else error->all(FLERR,"Invalid group function in variable formula"); } // delete stored args for (int i = 0; i < narg; i++) delete [] args[i]; // save value in tree or on argstack if (tree) { Tree *newtree = new Tree(); newtree->type = VALUE; newtree->value = value; newtree->first = newtree->second = NULL; newtree->nextra = 0; treestack[ntreestack++] = newtree; } else argstack[nargstack++] = value; return 1; } /* ---------------------------------------------------------------------- */ int Variable::region_function(char *id) { int iregion = domain->find_region(id); if (iregion == -1) error->all(FLERR,"Region ID in variable formula does not exist"); // init region in case sub-regions have been deleted domain->regions[iregion]->init(); return iregion; } /* ---------------------------------------------------------------------- process a special function in formula push result onto tree or arg stack word = special function contents = str between parentheses with one,two,three args return 0 if not a match, 1 if successfully processed customize by adding a special function: sum(x),min(x),max(x),ave(x),trap(x),slope(x), gmask(x),rmask(x),grmask(x,y),next(x) ------------------------------------------------------------------------- */ int Variable::special_function(char *word, char *contents, Tree **tree, Tree **treestack, int &ntreestack, double *argstack, int &nargstack) { double value,xvalue,sx,sy,sxx,sxy; // word not a match to any special function if (strcmp(word,"sum") && strcmp(word,"min") && strcmp(word,"max") && strcmp(word,"ave") && strcmp(word,"trap") && strcmp(word,"slope") && strcmp(word,"gmask") && strcmp(word,"rmask") && - strcmp(word,"grmask") && strcmp(word,"next")) + strcmp(word,"grmask") && strcmp(word,"next") && + strcmp(word,"is_active") && strcmp(word,"is_defined") && + strcmp(word,"is_available")) return 0; // parse contents for comma-separated args // narg = number of args, args = strings between commas char *args[MAXFUNCARG]; int narg = parse_args(contents,args); // special functions that operate on global vectors if (strcmp(word,"sum") == 0 || strcmp(word,"min") == 0 || strcmp(word,"max") == 0 || strcmp(word,"ave") == 0 || strcmp(word,"trap") == 0 || strcmp(word,"slope") == 0) { int method; if (strcmp(word,"sum") == 0) method = SUM; else if (strcmp(word,"min") == 0) method = XMIN; else if (strcmp(word,"max") == 0) method = XMAX; else if (strcmp(word,"ave") == 0) method = AVE; else if (strcmp(word,"trap") == 0) method = TRAP; else if (strcmp(word,"slope") == 0) method = SLOPE; if (narg != 1) error->all(FLERR,"Invalid special function in variable formula"); Compute *compute = NULL; Fix *fix = NULL; int index,nvec,nstride; char *ptr1,*ptr2; if (strstr(args[0],"c_") == args[0]) { ptr1 = strchr(args[0],'['); if (ptr1) { ptr2 = ptr1; index = (int) int_between_brackets(ptr2,0); *ptr1 = '\0'; } else index = 0; int icompute = modify->find_compute(&args[0][2]); if (icompute < 0) error->all(FLERR,"Invalid compute ID in variable formula"); compute = modify->compute[icompute]; if (index == 0 && compute->vector_flag) { if (update->whichflag == 0) { if (compute->invoked_vector != update->ntimestep) error->all(FLERR, "Compute used in variable between runs is not current"); } else if (!(compute->invoked_flag & INVOKED_VECTOR)) { compute->compute_vector(); compute->invoked_flag |= INVOKED_VECTOR; } nvec = compute->size_vector; nstride = 1; } else if (index && compute->array_flag) { if (index > compute->size_array_cols) error->all(FLERR,"Variable formula compute array " "is accessed out-of-range"); if (update->whichflag == 0) { if (compute->invoked_array != update->ntimestep) error->all(FLERR, "Compute used in variable between runs is not current"); } else if (!(compute->invoked_flag & INVOKED_ARRAY)) { compute->compute_array(); compute->invoked_flag |= INVOKED_ARRAY; } nvec = compute->size_array_rows; nstride = compute->size_array_cols; } else error->all(FLERR,"Mismatched compute in variable formula"); } else if (strstr(args[0],"f_") == args[0]) { ptr1 = strchr(args[0],'['); if (ptr1) { ptr2 = ptr1; index = (int) int_between_brackets(ptr2,0); *ptr1 = '\0'; } else index = 0; int ifix = modify->find_fix(&args[0][2]); if (ifix < 0) error->all(FLERR,"Invalid fix ID in variable formula"); fix = modify->fix[ifix]; if (index == 0 && fix->vector_flag) { if (update->whichflag > 0 && update->ntimestep % fix->global_freq) error->all(FLERR,"Fix in variable not computed at compatible time"); nvec = fix->size_vector; nstride = 1; } else if (index && fix->array_flag) { if (index > fix->size_array_cols) error->all(FLERR, "Variable formula fix array is accessed out-of-range"); if (update->whichflag > 0 && update->ntimestep % fix->global_freq) error->all(FLERR,"Fix in variable not computed at compatible time"); nvec = fix->size_array_rows; nstride = fix->size_array_cols; } else error->all(FLERR,"Mismatched fix in variable formula"); } else error->all(FLERR,"Invalid special function in variable formula"); value = 0.0; if (method == SLOPE) sx = sy = sxx = sxy = 0.0; if (method == XMIN) value = BIG; if (method == XMAX) value = -BIG; if (compute) { double *vec; if (index) { if (compute->array) vec = &compute->array[0][index-1]; else vec = NULL; } else vec = compute->vector; int j = 0; for (int i = 0; i < nvec; i++) { if (method == SUM) value += vec[j]; else if (method == XMIN) value = MIN(value,vec[j]); else if (method == XMAX) value = MAX(value,vec[j]); else if (method == AVE) value += vec[j]; else if (method == TRAP) value += vec[j]; else if (method == SLOPE) { if (nvec > 1) xvalue = (double) i / (nvec-1); else xvalue = 0.0; sx += xvalue; sy += vec[j]; sxx += xvalue*xvalue; sxy += xvalue*vec[j]; } j += nstride; } if (method == TRAP) value -= 0.5*vec[0] + 0.5*vec[nvec-1]; } if (fix) { double one; for (int i = 0; i < nvec; i++) { if (index) one = fix->compute_array(i,index-1); else one = fix->compute_vector(i); if (method == SUM) value += one; else if (method == XMIN) value = MIN(value,one); else if (method == XMAX) value = MAX(value,one); else if (method == AVE) value += one; else if (method == TRAP) value += one; else if (method == SLOPE) { if (nvec > 1) xvalue = (double) i / (nvec-1); else xvalue = 0.0; sx += xvalue; sy += one; sxx += xvalue*xvalue; sxy += xvalue*one; } } if (method == TRAP) { if (index) value -= 0.5*fix->compute_array(0,index-1) + 0.5*fix->compute_array(nvec-1,index-1); else value -= 0.5*fix->compute_vector(0) + 0.5*fix->compute_vector(nvec-1); } } if (method == AVE) value /= nvec; if (method == SLOPE) { double numerator = sxy - sx*sy; double denominator = sxx - sx*sx; if (denominator != 0.0) value = numerator/denominator / nvec; else value = BIG; } // save value in tree or on argstack if (tree) { Tree *newtree = new Tree(); newtree->type = VALUE; newtree->value = value; newtree->first = newtree->second = NULL; newtree->nextra = 0; treestack[ntreestack++] = newtree; } else argstack[nargstack++] = value; // mask special functions } else if (strcmp(word,"gmask") == 0) { if (tree == NULL) error->all(FLERR,"Gmask function in equal-style variable formula"); if (narg != 1) error->all(FLERR,"Invalid special function in variable formula"); int igroup = group->find(args[0]); if (igroup == -1) error->all(FLERR,"Group ID in variable formula does not exist"); Tree *newtree = new Tree(); newtree->type = GMASK; newtree->ivalue1 = group->bitmask[igroup]; newtree->first = newtree->second = NULL; newtree->nextra = 0; treestack[ntreestack++] = newtree; } else if (strcmp(word,"rmask") == 0) { if (tree == NULL) error->all(FLERR,"Rmask function in equal-style variable formula"); if (narg != 1) error->all(FLERR,"Invalid special function in variable formula"); int iregion = region_function(args[0]); domain->regions[iregion]->prematch(); Tree *newtree = new Tree(); newtree->type = RMASK; newtree->ivalue1 = iregion; newtree->first = newtree->second = NULL; newtree->nextra = 0; treestack[ntreestack++] = newtree; } else if (strcmp(word,"grmask") == 0) { if (tree == NULL) error->all(FLERR,"Grmask function in equal-style variable formula"); if (narg != 2) error->all(FLERR,"Invalid special function in variable formula"); int igroup = group->find(args[0]); if (igroup == -1) error->all(FLERR,"Group ID in variable formula does not exist"); int iregion = region_function(args[1]); domain->regions[iregion]->prematch(); Tree *newtree = new Tree(); newtree->type = GRMASK; newtree->ivalue1 = group->bitmask[igroup]; newtree->ivalue2 = iregion; newtree->first = newtree->second = NULL; newtree->nextra = 0; treestack[ntreestack++] = newtree; // special function for file-style or atomfile-style variables } else if (strcmp(word,"next") == 0) { if (narg != 1) error->all(FLERR,"Invalid special function in variable formula"); int ivar = find(args[0]); if (ivar < 0) error->all(FLERR,"Variable ID in variable formula does not exist"); // SCALARFILE has single current value, read next one // save value in tree or on argstack if (style[ivar] == SCALARFILE) { double value = atof(data[ivar][0]); int done = reader[ivar]->read_scalar(data[ivar][0]); if (done) remove(ivar); if (tree) { Tree *newtree = new Tree(); newtree->type = VALUE; newtree->value = value; newtree->first = newtree->second = NULL; newtree->nextra = 0; treestack[ntreestack++] = newtree; } else argstack[nargstack++] = value; // ATOMFILE has per-atom values, save values in tree // copy current per-atom values into result so can read next ones // set selfalloc = 1 so result will be deleted by free_tree() after eval } else if (style[ivar] == ATOMFILE) { if (tree == NULL) error->all(FLERR,"Atomfile variable in equal-style variable formula"); double *result; memory->create(result,atom->nlocal,"variable:result"); memcpy(result,reader[ivar]->fixstore->vstore,atom->nlocal*sizeof(double)); int done = reader[ivar]->read_peratom(); if (done) remove(ivar); Tree *newtree = new Tree(); newtree->type = ATOMARRAY; newtree->array = result; newtree->nstride = 1; newtree->selfalloc = 1; newtree->first = newtree->second = NULL; newtree->nextra = 0; treestack[ntreestack++] = newtree; } else error->all(FLERR,"Invalid variable style in special function next"); + + } else if (strcmp(word,"is_active") == 0) { + if (narg != 2) + error->all(FLERR,"Invalid is_active() function in variable formula"); + + Info info(lmp); + value = (info.is_active(args[0],args[1])) ? 1.0 : 0.0; + + // save value in tree or on argstack + + if (tree) { + Tree *newtree = new Tree(); + newtree->type = VALUE; + newtree->value = value; + newtree->first = newtree->second = NULL; + newtree->nextra = 0; + treestack[ntreestack++] = newtree; + } else argstack[nargstack++] = value; + + } else if (strcmp(word,"is_available") == 0) { + if (narg != 2) + error->all(FLERR,"Invalid is_available() function in variable formula"); + + Info info(lmp); + value = (info.is_available(args[0],args[1])) ? 1.0 : 0.0; + + // save value in tree or on argstack + + if (tree) { + Tree *newtree = new Tree(); + newtree->type = VALUE; + newtree->value = value; + newtree->first = newtree->second = NULL; + newtree->nextra = 0; + treestack[ntreestack++] = newtree; + } else argstack[nargstack++] = value; + + } else if (strcmp(word,"is_defined") == 0) { + if (narg != 2) + error->all(FLERR,"Invalid is_defined() function in variable formula"); + + Info info(lmp); + value = (info.is_defined(args[0],args[1])) ? 1.0 : 0.0; + + // save value in tree or on argstack + + if (tree) { + Tree *newtree = new Tree(); + newtree->type = VALUE; + newtree->value = value; + newtree->first = newtree->second = NULL; + newtree->nextra = 0; + treestack[ntreestack++] = newtree; + } else argstack[nargstack++] = value; } // delete stored args for (int i = 0; i < narg; i++) delete [] args[i]; return 1; } /* ---------------------------------------------------------------------- extract a global value from a per-atom quantity in a formula flag = 0 -> word is an atom vector flag = 1 -> vector is a per-atom compute or fix quantity with nstride id = global ID of atom, converted to local index push result onto tree or arg stack customize by adding an atom vector: id,mass,type,mol,x,y,z,vx,vy,vz,fx,fy,fz,q ------------------------------------------------------------------------- */ void Variable::peratom2global(int flag, char *word, double *vector, int nstride, tagint id, Tree **tree, Tree **treestack, int &ntreestack, double *argstack, int &nargstack) { // error check for ID larger than any atom // int_between_brackets() already checked for ID <= 0 if (atom->map_style == 0) error->all(FLERR, "Indexed per-atom vector in variable formula without atom map"); if (id > atom->map_tag_max) error->all(FLERR,"Variable atom ID is too large"); // if ID does not exist, index will be -1 for all procs, // and mine will be set to 0.0 int index = atom->map(id); double mine; if (index >= 0 && index < atom->nlocal) { if (flag == 0) { if (strcmp(word,"id") == 0) mine = atom->tag[index]; else if (strcmp(word,"mass") == 0) { if (atom->rmass) mine = atom->rmass[index]; else mine = atom->mass[atom->type[index]]; } else if (strcmp(word,"type") == 0) mine = atom->type[index]; else if (strcmp(word,"mol") == 0) { if (!atom->molecule_flag) error->one(FLERR,"Variable uses atom property that isn't allocated"); mine = atom->molecule[index]; } else if (strcmp(word,"x") == 0) mine = atom->x[index][0]; else if (strcmp(word,"y") == 0) mine = atom->x[index][1]; else if (strcmp(word,"z") == 0) mine = atom->x[index][2]; else if (strcmp(word,"vx") == 0) mine = atom->v[index][0]; else if (strcmp(word,"vy") == 0) mine = atom->v[index][1]; else if (strcmp(word,"vz") == 0) mine = atom->v[index][2]; else if (strcmp(word,"fx") == 0) mine = atom->f[index][0]; else if (strcmp(word,"fy") == 0) mine = atom->f[index][1]; else if (strcmp(word,"fz") == 0) mine = atom->f[index][2]; else if (strcmp(word,"q") == 0) { if (!atom->q_flag) error->one(FLERR,"Variable uses atom property that isn't allocated"); mine = atom->q[index]; } else error->one(FLERR,"Invalid atom vector in variable formula"); } else mine = vector[index*nstride]; } else mine = 0.0; double value; MPI_Allreduce(&mine,&value,1,MPI_DOUBLE,MPI_SUM,world); if (tree) { Tree *newtree = new Tree(); newtree->type = VALUE; newtree->value = value; newtree->first = newtree->second = NULL; newtree->nextra = 0; treestack[ntreestack++] = newtree; } else argstack[nargstack++] = value; } /* ---------------------------------------------------------------------- check if word matches an atom vector return 1 if yes, else 0 customize by adding an atom vector: id,mass,type,mol,x,y,z,vx,vy,vz,fx,fy,fz,q ------------------------------------------------------------------------- */ int Variable::is_atom_vector(char *word) { if (strcmp(word,"id") == 0) return 1; if (strcmp(word,"mass") == 0) return 1; if (strcmp(word,"type") == 0) return 1; if (strcmp(word,"mol") == 0) return 1; if (strcmp(word,"x") == 0) return 1; if (strcmp(word,"y") == 0) return 1; if (strcmp(word,"z") == 0) return 1; if (strcmp(word,"vx") == 0) return 1; if (strcmp(word,"vy") == 0) return 1; if (strcmp(word,"vz") == 0) return 1; if (strcmp(word,"fx") == 0) return 1; if (strcmp(word,"fy") == 0) return 1; if (strcmp(word,"fz") == 0) return 1; if (strcmp(word,"q") == 0) return 1; return 0; } /* ---------------------------------------------------------------------- process an atom vector in formula push result onto tree word = atom vector customize by adding an atom vector: id,mass,type,mol,x,y,z,vx,vy,vz,fx,fy,fz,q ------------------------------------------------------------------------- */ void Variable::atom_vector(char *word, Tree **tree, Tree **treestack, int &ntreestack) { if (tree == NULL) error->all(FLERR,"Atom vector in equal-style variable formula"); Tree *newtree = new Tree(); newtree->type = ATOMARRAY; newtree->nstride = 3; newtree->selfalloc = 0; newtree->first = newtree->second = NULL; newtree->nextra = 0; treestack[ntreestack++] = newtree; if (strcmp(word,"id") == 0) { if (sizeof(tagint) == sizeof(smallint)) { newtree->type = INTARRAY; newtree->iarray = (int *) atom->tag; } else { newtree->type = BIGINTARRAY; newtree->barray = (bigint *) atom->tag; } newtree->nstride = 1; } else if (strcmp(word,"mass") == 0) { if (atom->rmass) { newtree->nstride = 1; newtree->array = atom->rmass; } else { newtree->type = TYPEARRAY; newtree->array = atom->mass; } } else if (strcmp(word,"type") == 0) { newtree->type = INTARRAY; newtree->nstride = 1; newtree->iarray = atom->type; } else if (strcmp(word,"mol") == 0) { if (!atom->molecule_flag) error->one(FLERR,"Variable uses atom property that isn't allocated"); if (sizeof(tagint) == sizeof(smallint)) { newtree->type = INTARRAY; newtree->iarray = (int *) atom->molecule; } else { newtree->type = BIGINTARRAY; newtree->barray = (bigint *) atom->molecule; } newtree->nstride = 1; } else if (strcmp(word,"x") == 0) newtree->array = &atom->x[0][0]; else if (strcmp(word,"y") == 0) newtree->array = &atom->x[0][1]; else if (strcmp(word,"z") == 0) newtree->array = &atom->x[0][2]; else if (strcmp(word,"vx") == 0) newtree->array = &atom->v[0][0]; else if (strcmp(word,"vy") == 0) newtree->array = &atom->v[0][1]; else if (strcmp(word,"vz") == 0) newtree->array = &atom->v[0][2]; else if (strcmp(word,"fx") == 0) newtree->array = &atom->f[0][0]; else if (strcmp(word,"fy") == 0) newtree->array = &atom->f[0][1]; else if (strcmp(word,"fz") == 0) newtree->array = &atom->f[0][2]; else if (strcmp(word,"q") == 0) { newtree->nstride = 1; newtree->array = atom->q; } } /* ---------------------------------------------------------------------- check if word matches a constant return 1 if yes, else 0 customize by adding a constant: PI, version ------------------------------------------------------------------------- */ int Variable::is_constant(char *word) { if (strcmp(word,"PI") == 0) return 1; if (strcmp(word,"version") == 0) return 1; + if (strcmp(word,"yes") == 0) return 1; + if (strcmp(word,"no") == 0) return 1; + if (strcmp(word,"on") == 0) return 1; + if (strcmp(word,"off") == 0) return 1; + if (strcmp(word,"true") == 0) return 1; + if (strcmp(word,"false") == 0) return 1; return 0; } /* ---------------------------------------------------------------------- process a constant in formula customize by adding a constant: PI, version ------------------------------------------------------------------------- */ double Variable::constant(char *word) { if (strcmp(word,"PI") == 0) return MY_PI; if (strcmp(word,"version") == 0) return atof(universe->num_ver); + if (strcmp(word,"yes") == 0) return 1.0; + if (strcmp(word,"no") == 0) return 0.0; + if (strcmp(word,"on") == 0) return 1.0; + if (strcmp(word,"off") == 0) return 0.0; + if (strcmp(word,"true") == 0) return 1.0; + if (strcmp(word,"false") == 0) return 0.0; return 0.0; } /* ---------------------------------------------------------------------- parse string for comma-separated args store copy of each arg in args array max allowed # of args = MAXFUNCARG ------------------------------------------------------------------------- */ int Variable::parse_args(char *str, char **args) { int n; char *ptrnext; int narg = 0; char *ptr = str; while (ptr && narg < MAXFUNCARG) { ptrnext = find_next_comma(ptr); if (ptrnext) *ptrnext = '\0'; n = strlen(ptr) + 1; args[narg] = new char[n]; strcpy(args[narg],ptr); narg++; ptr = ptrnext; if (ptr) ptr++; } if (ptr) error->all(FLERR,"Too many args in variable function"); return narg; } /* ---------------------------------------------------------------------- find next comma in str skip commas inside one or more nested parenthesis only return ptr to comma at level 0, else NULL if not found ------------------------------------------------------------------------- */ char *Variable::find_next_comma(char *str) { int level = 0; for (char *p = str; *p; ++p) { if ('(' == *p) level++; else if (')' == *p) level--; else if (',' == *p && !level) return p; } return NULL; } /* ---------------------------------------------------------------------- debug routine for printing formula tree recursively ------------------------------------------------------------------------- */ void Variable::print_tree(Tree *tree, int level) { printf("TREE %d: %d %g\n",level,tree->type,tree->value); if (tree->first) print_tree(tree->first,level+1); if (tree->second) print_tree(tree->second,level+1); if (tree->nextra) for (int i = 0; i < tree->nextra; i++) print_tree(tree->extra[i],level+1); return; } /* ---------------------------------------------------------------------- recursive evaluation of string str called from "if" command in input script str is a boolean expression containing one or more items: number = 0.0, -5.45, 2.8e-4, ... math operation = (),x==y,x!=y,xy,x>=y,x&&y,x||y ------------------------------------------------------------------------- */ double Variable::evaluate_boolean(char *str) { int op,opprevious,flag1,flag2; double value1,value2; char onechar; char *str1,*str2; struct Arg { int flag; // 0 for numeric value, 1 for string double value; // stored numeric value char *str; // stored string }; Arg argstack[MAXLEVEL]; int opstack[MAXLEVEL]; int nargstack = 0; int nopstack = 0; int i = 0; int expect = ARG; while (1) { onechar = str[i]; // whitespace: just skip if (isspace(onechar)) i++; // ---------------- // parentheses: recursively evaluate contents of parens // ---------------- else if (onechar == '(') { if (expect == OP) error->all(FLERR,"Invalid Boolean syntax in if command"); expect = OP; char *contents; i = find_matching_paren(str,i,contents); i++; // evaluate contents and push on stack argstack[nargstack].value = evaluate_boolean(contents); argstack[nargstack].flag = 0; nargstack++; delete [] contents; // ---------------- // number: push value onto stack // ---------------- } else if (isdigit(onechar) || onechar == '.' || onechar == '-') { if (expect == OP) error->all(FLERR,"Invalid Boolean syntax in if command"); expect = OP; // set I to end of number, including scientific notation int istart = i++; while (isdigit(str[i]) || str[i] == '.') i++; if (str[i] == 'e' || str[i] == 'E') { i++; if (str[i] == '+' || str[i] == '-') i++; while (isdigit(str[i])) i++; } onechar = str[i]; str[i] = '\0'; argstack[nargstack].value = atof(&str[istart]); str[i] = onechar; argstack[nargstack++].flag = 0; // ---------------- // string: push string onto stack // ---------------- } else if (isalpha(onechar)) { if (expect == OP) error->all(FLERR,"Invalid Boolean syntax in if command"); expect = OP; // set I to end of string int istart = i++; while (isalnum(str[i]) || str[i] == '_') i++; int n = i - istart + 1; argstack[nargstack].str = new char[n]; onechar = str[i]; str[i] = '\0'; strcpy(argstack[nargstack].str,&str[istart]); str[i] = onechar; argstack[nargstack++].flag = 1; // ---------------- // Boolean operator, including end-of-string // ---------------- } else if (strchr("<>=!&|\0",onechar)) { if (onechar == '=') { if (str[i+1] != '=') error->all(FLERR,"Invalid Boolean syntax in if command"); op = EQ; i++; } else if (onechar == '!') { if (str[i+1] == '=') { op = NE; i++; } else op = NOT; } else if (onechar == '<') { if (str[i+1] != '=') op = LT; else { op = LE; i++; } } else if (onechar == '>') { if (str[i+1] != '=') op = GT; else { op = GE; i++; } } else if (onechar == '&') { if (str[i+1] != '&') error->all(FLERR,"Invalid Boolean syntax in if command"); op = AND; i++; } else if (onechar == '|') { if (str[i+1] != '|') error->all(FLERR,"Invalid Boolean syntax in if command"); op = OR; i++; } else op = DONE; i++; if (op == NOT && expect == ARG) { opstack[nopstack++] = op; continue; } if (expect == ARG) error->all(FLERR,"Invalid Boolean syntax in if command"); expect = ARG; // evaluate stack as deep as possible while respecting precedence // before pushing current op onto stack while (nopstack && precedence[opstack[nopstack-1]] >= precedence[op]) { opprevious = opstack[--nopstack]; nargstack--; flag2 = argstack[nargstack].flag; value2 = argstack[nargstack].value; str2 = argstack[nargstack].str; if (opprevious != NOT) { nargstack--; flag1 = argstack[nargstack].flag; value1 = argstack[nargstack].value; str1 = argstack[nargstack].str; } if (opprevious == NOT) { if (flag2) error->all(FLERR,"Invalid Boolean syntax in if command"); if (value2 == 0.0) argstack[nargstack].value = 1.0; else argstack[nargstack].value = 0.0; } else if (opprevious == EQ) { if (flag1 != flag2) error->all(FLERR,"Invalid Boolean syntax in if command"); if (flag2 == 0) { if (value1 == value2) argstack[nargstack].value = 1.0; else argstack[nargstack].value = 0.0; } else { if (strcmp(str1,str2) == 0) argstack[nargstack].value = 1.0; else argstack[nargstack].value = 0.0; delete [] str1; delete [] str2; } } else if (opprevious == NE) { if (flag1 != flag2) error->all(FLERR,"Invalid Boolean syntax in if command"); if (flag2 == 0) { if (value1 != value2) argstack[nargstack].value = 1.0; else argstack[nargstack].value = 0.0; } else { if (strcmp(str1,str2) != 0) argstack[nargstack].value = 1.0; else argstack[nargstack].value = 0.0; delete [] str1; delete [] str2; } } else if (opprevious == LT) { if (flag2) error->all(FLERR,"Invalid Boolean syntax in if command"); if (value1 < value2) argstack[nargstack].value = 1.0; else argstack[nargstack].value = 0.0; } else if (opprevious == LE) { if (flag2) error->all(FLERR,"Invalid Boolean syntax in if command"); if (value1 <= value2) argstack[nargstack].value = 1.0; else argstack[nargstack].value = 0.0; } else if (opprevious == GT) { if (flag2) error->all(FLERR,"Invalid Boolean syntax in if command"); if (value1 > value2) argstack[nargstack].value = 1.0; else argstack[nargstack].value = 0.0; } else if (opprevious == GE) { if (flag2) error->all(FLERR,"Invalid Boolean syntax in if command"); if (value1 >= value2) argstack[nargstack].value = 1.0; else argstack[nargstack].value = 0.0; } else if (opprevious == AND) { if (flag2) error->all(FLERR,"Invalid Boolean syntax in if command"); if (value1 != 0.0 && value2 != 0.0) argstack[nargstack].value = 1.0; else argstack[nargstack].value = 0.0; } else if (opprevious == OR) { if (flag2) error->all(FLERR,"Invalid Boolean syntax in if command"); if (value1 != 0.0 || value2 != 0.0) argstack[nargstack].value = 1.0; else argstack[nargstack].value = 0.0; } argstack[nargstack++].flag = 0; } // if end-of-string, break out of entire formula evaluation loop if (op == DONE) break; // push current operation onto stack opstack[nopstack++] = op; } else error->all(FLERR,"Invalid Boolean syntax in if command"); } if (nopstack) error->all(FLERR,"Invalid Boolean syntax in if command"); if (nargstack != 1) error->all(FLERR,"Invalid Boolean syntax in if command"); return argstack[0].value; } /* ---------------------------------------------------------------------- */ unsigned int Variable::data_mask(int ivar) { if (eval_in_progress[ivar]) return EMPTY_MASK; eval_in_progress[ivar] = 1; unsigned int datamask = data_mask(data[ivar][0]); eval_in_progress[ivar] = 0; return datamask; } /* ---------------------------------------------------------------------- */ unsigned int Variable::data_mask(char *str) { unsigned int datamask = EMPTY_MASK; for (unsigned int i = 0; i < strlen(str)-2; i++) { int istart = i; while (isalnum(str[i]) || str[i] == '_') i++; int istop = i-1; int n = istop - istart + 1; char *word = new char[n+1]; strncpy(word,&str[istart],n); word[n] = '\0'; // ---------------- // compute // ---------------- if ((strncmp(word,"c_",2) == 0) && (i>0) && (!isalnum(str[i-1]))) { if (domain->box_exist == 0) error->all(FLERR, "Variable evaluation before simulation box is defined"); int icompute = modify->find_compute(word+2); if (icompute < 0) error->all(FLERR,"Invalid compute ID in variable formula"); datamask &= modify->compute[icompute]->data_mask(); } if ((strncmp(word,"f_",2) == 0) && (i>0) && (!isalnum(str[i-1]))) { if (domain->box_exist == 0) error->all(FLERR, "Variable evaluation before simulation box is defined"); int ifix = modify->find_fix(word+2); if (ifix < 0) error->all(FLERR,"Invalid fix ID in variable formula"); datamask &= modify->fix[ifix]->data_mask(); } if ((strncmp(word,"v_",2) == 0) && (i>0) && (!isalnum(str[i-1]))) { int ivar = find(word+2); if (ivar < 0) error->all(FLERR,"Invalid variable name in variable formula"); datamask &= data_mask(ivar); } delete [] word; } return datamask; } /* ---------------------------------------------------------------------- class to read variable values from a file for flag = SCALARFILE, reads one value per line for flag = ATOMFILE, reads set of one value per atom ------------------------------------------------------------------------- */ VarReader::VarReader(LAMMPS *lmp, char *name, char *file, int flag) : Pointers(lmp) { me = comm->me; style = flag; fp = NULL; if (me == 0) { fp = fopen(file,"r"); if (fp == NULL) { char str[128]; sprintf(str,"Cannot open file variable file %s",file); error->one(FLERR,str); } } // if atomfile-style variable, must store per-atom values read from file // allocate a new fix STORE, so they persist // id = variable-ID + VARIABLE_STORE, fix group = all fixstore = NULL; id_fix = NULL; buffer = NULL; if (style == ATOMFILE) { if (atom->map_style == 0) error->all(FLERR, "Cannot use atomfile-style variable unless atom map exists"); int n = strlen(name) + strlen("_VARIABLE_STORE") + 1; id_fix = new char[n]; strcpy(id_fix,name); strcat(id_fix,"_VARIABLE_STORE"); char **newarg = new char*[5]; newarg[0] = id_fix; newarg[1] = (char *) "all"; newarg[2] = (char *) "STORE"; newarg[3] = (char *) "0"; newarg[4] = (char *) "1"; modify->add_fix(5,newarg); fixstore = (FixStore *) modify->fix[modify->nfix-1]; delete [] newarg; buffer = new char[CHUNK*MAXLINE]; } } /* ---------------------------------------------------------------------- */ VarReader::~VarReader() { if (me == 0) { fclose(fp); fp = NULL; } // check modify in case all fixes have already been deleted if (fixstore) { if (modify) modify->delete_fix(id_fix); delete [] id_fix; delete [] buffer; } } /* ---------------------------------------------------------------------- read for SCALARFILE style read next value from file into str for file-style variable strip comments, skip blank lines return 0 if successful, 1 if end-of-file ------------------------------------------------------------------------- */ int VarReader::read_scalar(char *str) { int n; char *ptr; // read one string from file if (me == 0) { while (1) { if (fgets(str,MAXLINE,fp) == NULL) n = 0; else n = strlen(str); if (n == 0) break; // end of file str[n-1] = '\0'; // strip newline if ((ptr = strchr(str,'#'))) *ptr = '\0'; // strip comment if (strtok(str," \t\n\r\f") == NULL) continue; // skip if blank n = strlen(str) + 1; break; } } MPI_Bcast(&n,1,MPI_INT,0,world); if (n == 0) return 1; MPI_Bcast(str,n,MPI_CHAR,0,world); return 0; } /* ---------------------------------------------------------------------- read snapshot of per-atom values from file into str for atomfile-style variable return 0 if successful, 1 if end-of-file ------------------------------------------------------------------------- */ int VarReader::read_peratom() { int i,m,n,nchunk,eof; tagint tag; char *ptr,*next; double value; // set all per-atom values to 0.0 // values that appear in file will overwrite this double *vstore = fixstore->vstore; int nlocal = atom->nlocal; for (i = 0; i < nlocal; i++) vstore[i] = 0.0; // read one string from file, convert to Nlines char str[MAXLINE]; if (me == 0) { while (1) { if (fgets(str,MAXLINE,fp) == NULL) n = 0; else n = strlen(str); if (n == 0) break; // end of file str[n-1] = '\0'; // strip newline if ((ptr = strchr(str,'#'))) *ptr = '\0'; // strip comment if (strtok(str," \t\n\r\f") == NULL) continue; // skip if blank n = strlen(str) + 1; break; } } MPI_Bcast(&n,1,MPI_INT,0,world); if (n == 0) return 1; MPI_Bcast(str,n,MPI_CHAR,0,world); bigint nlines = force->bnumeric(FLERR,str); tagint map_tag_max = atom->map_tag_max; bigint nread = 0; while (nread < nlines) { nchunk = MIN(nlines-nread,CHUNK); eof = comm->read_lines_from_file(fp,nchunk,MAXLINE,buffer); if (eof) return 1; char *buf = buffer; for (i = 0; i < nchunk; i++) { next = strchr(buf,'\n'); *next = '\0'; sscanf(buf,TAGINT_FORMAT " %lg",&tag,&value); if (tag <= 0 || tag > map_tag_max) error->one(FLERR,"Invalid atom ID in variable file"); if ((m = atom->map(tag)) >= 0) vstore[m] = value; buf = next + 1; } nread += nchunk; } return 0; }