diff --git a/doc/src/Section_accelerate.txt b/doc/src/Section_accelerate.txt
index 881235888..bb0c93b8a 100644
--- a/doc/src/Section_accelerate.txt
+++ b/doc/src/Section_accelerate.txt
@@ -1,391 +1,391 @@
 "Previous Section"_Section_packages.html - "LAMMPS WWW Site"_lws -
 "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc - "Next
 Section"_Section_howto.html :c
 
 :link(lws,http://lammps.sandia.gov)
 :link(ld,Manual.html)
 :link(lc,Section_commands.html#comm)
 
 :line
 
 5. Accelerating LAMMPS performance :h3
 
 This section describes various methods for improving LAMMPS
 performance for different classes of problems running on different
 kinds of machines.
 
 There are two thrusts to the discussion that follows.  The
 first is using code options that implement alternate algorithms
 that can speed-up a simulation.  The second is to use one
 of the several accelerator packages provided with LAMMPS that
 contain code optimized for certain kinds of hardware, including
 multi-core CPUs, GPUs, and Intel Xeon Phi coprocessors.
 
 5.1 "Measuring performance"_#acc_1 :ulb,l
 5.2 "Algorithms and code options to boost performace"_#acc_2 :l
 5.3 "Accelerator packages with optimized styles"_#acc_3 :l
     5.3.1 "GPU package"_accelerate_gpu.html :l
     5.3.2 "USER-INTEL package"_accelerate_intel.html :l
     5.3.3 "KOKKOS package"_accelerate_kokkos.html :l
     5.3.4 "USER-OMP package"_accelerate_omp.html :l
     5.3.5 "OPT package"_accelerate_opt.html :l
 5.4 "Comparison of various accelerator packages"_#acc_4 :l
 :ule
 
 The "Benchmark page"_http://lammps.sandia.gov/bench.html of the LAMMPS
 web site gives performance results for the various accelerator
 packages discussed in Section 5.2, for several of the standard LAMMPS
 benchmark problems, as a function of problem size and number of
 compute nodes, on different hardware platforms.
 
 :line
 :line
 
 5.1 Measuring performance :h4,link(acc_1)
 
 Before trying to make your simulation run faster, you should
 understand how it currently performs and where the bottlenecks are.
 
 The best way to do this is run the your system (actual number of
 atoms) for a modest number of timesteps (say 100 steps) on several
 different processor counts, including a single processor if possible.
 Do this for an equilibrium version of your system, so that the
 100-step timings are representative of a much longer run.  There is
 typically no need to run for 1000s of timesteps to get accurate
 timings; you can simply extrapolate from short runs.
 
 For the set of runs, look at the timing data printed to the screen and
 log file at the end of each LAMMPS run.  "This
 section"_Section_start.html#start_7 of the manual has an overview.
 
 Running on one (or a few processors) should give a good estimate of
 the serial performance and what portions of the timestep are taking
 the most time.  Running the same problem on a few different processor
 counts should give an estimate of parallel scalability.  I.e. if the
 simulation runs 16x faster on 16 processors, its 100% parallel
 efficient; if it runs 8x faster on 16 processors, it's 50% efficient.
 
 The most important data to look at in the timing info is the timing
 breakdown and relative percentages.  For example, trying different
 options for speeding up the long-range solvers will have little impact
 if they only consume 10% of the run time.  If the pairwise time is
 dominating, you may want to look at GPU or OMP versions of the pair
 style, as discussed below.  Comparing how the percentages change as
 you increase the processor count gives you a sense of how different
 operations within the timestep are scaling.  Note that if you are
 running with a Kspace solver, there is additional output on the
 breakdown of the Kspace time.  For PPPM, this includes the fraction
 spent on FFTs, which can be communication intensive.
 
 Another important detail in the timing info are the histograms of
 atoms counts and neighbor counts.  If these vary widely across
 processors, you have a load-imbalance issue.  This often results in
 inaccurate relative timing data, because processors have to wait when
 communication occurs for other processors to catch up.  Thus the
 reported times for "Communication" or "Other" may be higher than they
 really are, due to load-imbalance.  If this is an issue, you can
 uncomment the MPI_Barrier() lines in src/timer.cpp, and recompile
 LAMMPS, to obtain synchronized timings.
 
 :line
 
 5.2 General strategies :h4,link(acc_2)
 
 NOTE: this section 5.2 is still a work in progress
 
 Here is a list of general ideas for improving simulation performance.
 Most of them are only applicable to certain models and certain
 bottlenecks in the current performance, so let the timing data you
 generate be your guide.  It is hard, if not impossible, to predict how
 much difference these options will make, since it is a function of
 problem size, number of processors used, and your machine.  There is
 no substitute for identifying performance bottlenecks, and trying out
 various options.
 
 rRESPA
 2-FFT PPPM
 Staggered PPPM
 single vs double PPPM
 partial charge PPPM
 verlet/split run style
 processor command for proc layout and numa layout
 load-balancing: balance and fix balance :ul
 
 2-FFT PPPM, also called {analytic differentiation} or {ad} PPPM, uses
 2 FFTs instead of the 4 FFTs used by the default {ik differentiation}
 PPPM. However, 2-FFT PPPM also requires a slightly larger mesh size to
 achieve the same accuracy as 4-FFT PPPM. For problems where the FFT
 cost is the performance bottleneck (typically large problems running
 on many processors), 2-FFT PPPM may be faster than 4-FFT PPPM.
 
 Staggered PPPM performs calculations using two different meshes, one
 shifted slightly with respect to the other.  This can reduce force
 aliasing errors and increase the accuracy of the method, but also
 doubles the amount of work required. For high relative accuracy, using
 staggered PPPM allows one to half the mesh size in each dimension as
 compared to regular PPPM, which can give around a 4x speedup in the
 kspace time. However, for low relative accuracy, using staggered PPPM
 gives little benefit and can be up to 2x slower in the kspace
 time. For example, the rhodopsin benchmark was run on a single
 processor, and results for kspace time vs. relative accuracy for the
 different methods are shown in the figure below.  For this system,
 staggered PPPM (using ik differentiation) becomes useful when using a
 relative accuracy of slightly greater than 1e-5 and above.
 
 :c,image(JPG/rhodo_staggered.jpg)
 
 NOTE: Using staggered PPPM may not give the same increase in accuracy
 of energy and pressure as it does in forces, so some caution must be
 used if energy and/or pressure are quantities of interest, such as
 when using a barostat.
 
 :line
 
 5.3 Packages with optimized styles :h4,link(acc_3)
 
 Accelerated versions of various "pair_style"_pair_style.html,
 "fixes"_fix.html, "computes"_compute.html, and other commands have
 been added to LAMMPS, which will typically run faster than the
 standard non-accelerated versions.  Some require appropriate hardware
 to be present on your system, e.g. GPUs or Intel Xeon Phi
 coprocessors.
 
 All of these commands are in packages provided with LAMMPS.  An
 overview of packages is give in "Section
 packages"_Section_packages.html.
 
 These are the accelerator packages
 currently in LAMMPS, either as standard or user packages:
 
 "GPU Package"_accelerate_gpu.html : for NVIDIA GPUs as well as OpenCL support
 "USER-INTEL Package"_accelerate_intel.html : for Intel CPUs and Intel Xeon Phi
 "KOKKOS Package"_accelerate_kokkos.html : for Nvidia GPUs, Intel Xeon Phi, and OpenMP threading
 "USER-OMP Package"_accelerate_omp.html : for OpenMP threading and generic CPU optimizations
 "OPT Package"_accelerate_opt.html : generic CPU optimizations :tb(s=:)
 
 <!-- RST
 
 .. toctree::
    :maxdepth: 1
    :hidden:
 
    accelerate_gpu
    accelerate_intel
    accelerate_kokkos
    accelerate_omp
    accelerate_opt
 
 END_RST -->
 
 Inverting this list, LAMMPS currently has acceleration support for
 three kinds of hardware, via the listed packages:
 
 Many-core CPUs : "USER-INTEL"_accelerate_intel.html, "KOKKOS"_accelerate_kokkos.html, "USER-OMP"_accelerate_omp.html, "OPT"_accelerate_opt.html packages
 NVIDIA GPUs : "GPU"_accelerate_gpu.html, "KOKKOS"_accelerate_kokkos.html packages
 Intel Phi : "USER-INTEL"_accelerate_intel.html, "KOKKOS"_accelerate_kokkos.html packages :tb(s=:)
 
 Which package is fastest for your hardware may depend on the size
 problem you are running and what commands (accelerated and
 non-accelerated) are invoked by your input script.  While these doc
 pages include performance guidelines, there is no substitute for
 trying out the different packages appropriate to your hardware.
 
 Any accelerated style has the same name as the corresponding standard
 style, except that a suffix is appended.  Otherwise, the syntax for
 the command that uses the style is identical, their functionality is
 the same, and the numerical results it produces should also be the
 same, except for precision and round-off effects.
 
 For example, all of these styles are accelerated variants of the
 Lennard-Jones "pair_style lj/cut"_pair_lj.html:
 
 "pair_style lj/cut/gpu"_pair_lj.html
 "pair_style lj/cut/intel"_pair_lj.html
 "pair_style lj/cut/kk"_pair_lj.html
 "pair_style lj/cut/omp"_pair_lj.html
 "pair_style lj/cut/opt"_pair_lj.html :ul
 
 To see what accelerate styles are currently available, see
 "Section 3.5"_Section_commands.html#cmd_5 of the manual.  The
 doc pages for individual commands (e.g. "pair lj/cut"_pair_lj.html or
 "fix nve"_fix_nve.html) also list any accelerated variants available
 for that style.
 
 To use an accelerator package in LAMMPS, and one or more of the styles
 it provides, follow these general steps.  Details vary from package to
 package and are explained in the individual accelerator doc pages,
 listed above:
 
 build the accelerator library |
   only for GPU package |
 install the accelerator package |
   make yes-opt, make yes-user-intel, etc |
 add compile/link flags to Makefile.machine in src/MAKE |
   only for USER-INTEL, KOKKOS, USER-OMP, OPT packages |
 re-build LAMMPS |
   make machine |
 prepare and test a regular LAMMPS simulation |
   lmp_machine -in in.script; mpirun -np 32 lmp_machine -in in.script |
 enable specific accelerator support via '-k on' "command-line switch"_Section_start.html#start_6, |
   only needed for KOKKOS package |
 set any needed options for the package via "-pk" "command-line switch"_Section_start.html#start_6 or "package"_package.html command, |
   only if defaults need to be changed |
 use accelerated styles in your input via "-sf" "command-line switch"_Section_start.html#start_6 or "suffix"_suffix.html command | lmp_machine -in in.script -sf gpu
 :tb(c=2,s=|)
 
-Note that the first 4 steps can be done as a single command, using the
-src/Make.py tool.  This tool is discussed in "Section
+Note that the first 4 steps can be done as a single command with
+suitable make command invocations. This is discussed in "Section
 4"_Section_packages.html of the manual, and its use is
 illustrated in the individual accelerator sections.  Typically these
 steps only need to be done once, to create an executable that uses one
 or more accelerator packages.
 
 The last 4 steps can all be done from the command-line when LAMMPS is
 launched, without changing your input script, as illustrated in the
 individual accelerator sections.  Or you can add
 "package"_package.html and "suffix"_suffix.html commands to your input
 script.
 
 NOTE: With a few exceptions, you can build a single LAMMPS executable
 with all its accelerator packages installed.  Note however that the
 USER-INTEL and KOKKOS packages require you to choose one of their
 hardware options when building for a specific platform.  I.e. CPU or
 Phi option for the USER-INTEL package.  Or the OpenMP, Cuda, or Phi
 option for the KOKKOS package.
 
 These are the exceptions.  You cannot build a single executable with:
 
 both the USER-INTEL Phi and KOKKOS Phi options
 the USER-INTEL Phi or Kokkos Phi option, and the GPU package :ul
 
 See the examples/accelerate/README and make.list files for sample
 Make.py commands that build LAMMPS with any or all of the accelerator
 packages.  As an example, here is a command that builds with all the
 GPU related packages installed (GPU, KOKKOS with Cuda), including
 settings to build the needed auxiliary GPU libraries for Kepler GPUs:
 
 Make.py -j 16 -p omp gpu kokkos -cc nvcc wrap=mpi \
   -gpu mode=double arch=35 -kokkos cuda arch=35 lib-all file mpi :pre
 
 The examples/accelerate directory also has input scripts that can be
 used with all of the accelerator packages.  See its README file for
 details.
 
 Likewise, the bench directory has FERMI and KEPLER and PHI
 sub-directories with Make.py commands and input scripts for using all
 the accelerator packages on various machines.  See the README files in
 those dirs.
 
 As mentioned above, the "Benchmark
 page"_http://lammps.sandia.gov/bench.html of the LAMMPS web site gives
 performance results for the various accelerator packages for several
 of the standard LAMMPS benchmark problems, as a function of problem
 size and number of compute nodes, on different hardware platforms.
 
 Here is a brief summary of what the various packages provide.  Details
 are in the individual accelerator sections.
 
 Styles with a "gpu" suffix are part of the GPU package, and can be run
 on NVIDIA GPUs.  The speed-up on a GPU depends on a variety of
 factors, discussed in the accelerator sections. :ulb,l
 
 Styles with an "intel" suffix are part of the USER-INTEL
 package. These styles support vectorized single and mixed precision
 calculations, in addition to full double precision.  In extreme cases,
 this can provide speedups over 3.5x on CPUs.  The package also
 supports acceleration in "offload" mode to Intel(R) Xeon Phi(TM)
 coprocessors.  This can result in additional speedup over 2x depending
 on the hardware configuration. :l
 
 Styles with a "kk" suffix are part of the KOKKOS package, and can be
 run using OpenMP on multicore CPUs, on an NVIDIA GPU, or on an Intel
 Xeon Phi in "native" mode.  The speed-up depends on a variety of
 factors, as discussed on the KOKKOS accelerator page. :l
 
 Styles with an "omp" suffix are part of the USER-OMP package and allow
 a pair-style to be run in multi-threaded mode using OpenMP.  This can
 be useful on nodes with high-core counts when using less MPI processes
 than cores is advantageous, e.g. when running with PPPM so that FFTs
 are run on fewer MPI processors or when the many MPI tasks would
 overload the available bandwidth for communication. :l
 
 Styles with an "opt" suffix are part of the OPT package and typically
 speed-up the pairwise calculations of your simulation by 5-25% on a
 CPU. :l
 :ule
 
 The individual accelerator package doc pages explain:
 
 what hardware and software the accelerated package requires
 how to build LAMMPS with the accelerated package
 how to run with the accelerated package either via command-line switches or modifying the input script
 speed-ups to expect
 guidelines for best performance
 restrictions :ul
 
 :line
 
 5.4 Comparison of various accelerator packages :h4,link(acc_4)
 
 NOTE: this section still needs to be re-worked with additional KOKKOS
 and USER-INTEL information.
 
 The next section compares and contrasts the various accelerator
 options, since there are multiple ways to perform OpenMP threading,
 run on GPUs, and run on Intel Xeon Phi coprocessors.
 
 All 3 of these packages accelerate a LAMMPS calculation using NVIDIA
 hardware, but they do it in different ways.
 
 As a consequence, for a particular simulation on specific hardware,
 one package may be faster than the other.  We give guidelines below,
 but the best way to determine which package is faster for your input
 script is to try both of them on your machine.  See the benchmarking
 section below for examples where this has been done.
 
 [Guidelines for using each package optimally:]
 
 The GPU package allows you to assign multiple CPUs (cores) to a single
 GPU (a common configuration for "hybrid" nodes that contain multicore
 CPU(s) and GPU(s)) and works effectively in this mode. :ulb,l
 
 The GPU package moves per-atom data (coordinates, forces)
 back-and-forth between the CPU and GPU every timestep.  The
 KOKKOS/CUDA package only does this on timesteps when a CPU calculation
 is required (e.g. to invoke a fix or compute that is non-GPU-ized).
 Hence, if you can formulate your input script to only use GPU-ized
 fixes and computes, and avoid doing I/O too often (thermo output, dump
 file snapshots, restart files), then the data transfer cost of the
 KOKKOS/CUDA package can be very low, causing it to run faster than the
 GPU package. :l
 
 The GPU package is often faster than the KOKKOS/CUDA package, if the
 number of atoms per GPU is smaller.  The crossover point, in terms of
 atoms/GPU at which the KOKKOS/CUDA package becomes faster depends
 strongly on the pair style.  For example, for a simple Lennard Jones
 system the crossover (in single precision) is often about 50K-100K
 atoms per GPU.  When performing double precision calculations the
 crossover point can be significantly smaller. :l
 
 Both packages compute bonded interactions (bonds, angles, etc) on the
 CPU.  If the GPU package is running with several MPI processes
 assigned to one GPU, the cost of computing the bonded interactions is
 spread across more CPUs and hence the GPU package can run faster. :l
 
 When using the GPU package with multiple CPUs assigned to one GPU, its
 performance depends to some extent on high bandwidth between the CPUs
 and the GPU.  Hence its performance is affected if full 16 PCIe lanes
 are not available for each GPU.  In HPC environments this can be the
 case if S2050/70 servers are used, where two devices generally share
 one PCIe 2.0 16x slot.  Also many multi-GPU mainboards do not provide
 full 16 lanes to each of the PCIe 2.0 16x slots. :l
 :ule
 
 [Differences between the two packages:]
 
 The GPU package accelerates only pair force, neighbor list, and PPPM
 calculations. :ulb,l
 
 The GPU package requires neighbor lists to be built on the CPU when using
 exclusion lists, hybrid pair styles, or a triclinic simulation box. :l
 :ule
diff --git a/doc/src/Section_example.txt b/doc/src/Section_example.txt
index 26dc3b969..f8b39be17 100644
--- a/doc/src/Section_example.txt
+++ b/doc/src/Section_example.txt
@@ -1,144 +1,145 @@
 "Previous Section"_Section_howto.html - "LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc - "Next Section"_Section_perf.html :c
 
 :link(lws,http://lammps.sandia.gov)
 :link(ld,Manual.html)
 :link(lc,Section_commands.html#comm)
 
 :line
 
 7. Example problems :h3
 
 The LAMMPS distribution includes an examples sub-directory with many
 sample problems.  Many are 2d models that run quickly are are
 straightforward to visualize, requiring at most a couple of minutes to
 run on a desktop machine.  Each problem has an input script (in.*) and
 produces a log file (log.*) when it runs.  Some use a data file
 (data.*) of initial coordinates as additional input.  A few sample log
 file run on different machines and different numbers of processors are
 included in the directories to compare your answers to.  E.g. a log
 file like log.date.crack.foo.P means the "crack" example was run on P
 processors of machine "foo" on that date (i.e. with that version of
 LAMMPS).
 
 Many of the input files have commented-out lines for creating dump
 files and image files.
 
 If you uncomment the "dump"_dump.html command in the input script, a
 text dump file will be produced, which can be animated by various
 "visualization programs"_http://lammps.sandia.gov/viz.html.
 
 If you uncomment the "dump image"_dump.html command in the input
 script, and assuming you have built LAMMPS with a JPG library, JPG
 snapshot images will be produced when the simulation runs.  They can
 be quickly post-processed into a movie using commands described on the
 "dump image"_dump_image.html doc page.
 
 Animations of many of the examples can be viewed on the Movies section
 of the "LAMMPS web site"_lws.
 
 There are two kinds of sub-directories in the examples dir.  Lowercase
 dirs contain one or a few simple, quick-to-run problems.  Uppercase
 dirs contain up to several complex scripts that illustrate a
 particular kind of simulation method or model.  Some of these run for
 longer times, e.g. to measure a particular quantity.
 
 Lists of both kinds of directories are given below.
 
 :line
 
 Lowercase directories :h4
 
 accelerate: run with various acceleration options (OpenMP, GPU, Phi)
+airebo:   polyethylene with AIREBO potential
 balance:  dynamic load balancing, 2d system
 body:     body particles, 2d system
 cmap:     CMAP 5-body contributions to CHARMM force field
 colloid:  big colloid particles in a small particle solvent, 2d system
 comb:     models using the COMB potential
 coreshell: core/shell model using CORESHELL package
 controller: use of fix controller as a thermostat
 crack:    crack propagation in a 2d solid
 deposit:  deposit atoms and molecules on a surface
 dipole:   point dipolar particles, 2d system
 dreiding: methanol via Dreiding FF
 eim:      NaCl using the EIM potential
 ellipse:  ellipsoidal particles in spherical solvent, 2d system
 flow:     Couette and Poiseuille flow in a 2d channel
 friction: frictional contact of spherical asperities between 2d surfaces
 gcmc:     Grand Canonical Monte Carlo (GCMC) via the fix gcmc command
 granregion: use of fix wall/region/gran as boundary on granular particles
 hugoniostat: Hugoniostat shock dynamics
 indent:   spherical indenter into a 2d solid
 kim:      use of potentials in Knowledge Base for Interatomic Models (KIM)
 meam:     MEAM test for SiC and shear (same as shear examples)
 melt:     rapid melt of 3d LJ system
 micelle:  self-assembly of small lipid-like molecules into 2d bilayers
 min:      energy minimization of 2d LJ melt
 mscg:     parameterize a multi-scale coarse-graining (MSCG) model
 msst:     MSST shock dynamics
 nb3b:     use of nonbonded 3-body harmonic pair style
 neb:      nudged elastic band (NEB) calculation for barrier finding
 nemd:     non-equilibrium MD of 2d sheared system
 obstacle: flow around two voids in a 2d channel
 peptide:  dynamics of a small solvated peptide chain (5-mer)
 peri:     Peridynamic model of cylinder impacted by indenter
 pour:     pouring of granular particles into a 3d box, then chute flow
 prd:      parallel replica dynamics of vacancy diffusion in bulk Si
 python:   using embedded Python in a LAMMPS input script
 qeq:      use of the QEQ package for charge equilibration
 reax:     RDX and TATB models using the ReaxFF
 rigid:    rigid bodies modeled as independent or coupled
 shear:    sideways shear applied to 2d solid, with and without a void
 snap:     NVE dynamics for BCC tantalum crystal using SNAP potential
 srd:      stochastic rotation dynamics (SRD) particles as solvent
 streitz:  use of Streitz/Mintmire potential with charge equilibration
 tad:      temperature-accelerated dynamics of vacancy diffusion in bulk Si
 vashishta: use of the Vashishta potential
 voronoi:  Voronoi tesselation via compute voronoi/atom command :tb(s=:)
 
 Here is how you can run and visualize one of the sample problems:
 
 cd indent
 cp ../../src/lmp_linux .           # copy LAMMPS executable to this dir
 lmp_linux -in in.indent            # run the problem :pre
 
 Running the simulation produces the files {dump.indent} and
 {log.lammps}.  You can visualize the dump file of snapshots with a
 variety of 3rd-party tools highlighted on the
 "Visualization"_http://lammps.sandia.gov/viz.html page of the LAMMPS
 web site.
 
 If you uncomment the "dump image"_dump_image.html line(s) in the input
 script a series of JPG images will be produced by the run (assuming
 you built LAMMPS with JPG support; see "Section
 2.2"_Section_start.html#start_2 for details).  These can be viewed
 individually or turned into a movie or animated by tools like
 ImageMagick or QuickTime or various Windows-based tools.  See the
 "dump image"_dump_image.html doc page for more details.  E.g. this
 Imagemagick command would create a GIF file suitable for viewing in a
 browser.
 
 % convert -loop 1 *.jpg foo.gif :pre
 
 :line
 
 Uppercase directories :h4
 
 ASPHERE: various aspherical particle models, using ellipsoids, rigid bodies, line/triangle particles, etc
 COUPLE: examples of how to use LAMMPS as a library
 DIFFUSE: compute diffusion coefficients via several methods
 ELASTIC: compute elastic constants at zero temperature
 ELASTIC_T: compute elastic constants at finite temperature
 KAPPA: compute thermal conductivity via several methods
 MC: using LAMMPS in a Monte Carlo mode to relax the energy of a system
 USER: examples for USER packages and USER-contributed commands
 VISCOSITY: compute viscosity via several methods :tb(s=:)
 
 Nearly all of these directories have README files which give more
 details on how to understand and use their contents.
 
 The USER directory has a large number of sub-directories which
 correspond by name to a USER package.  They contain scripts that
 illustrate how to use the command(s) provided in that package.  Many
 of the sub-directories have their own README files which give further
 instructions.  See the "Section 4"_Section_packages.html doc
 page for more info on specific USER packages.
diff --git a/doc/src/Section_packages.txt b/doc/src/Section_packages.txt
index 6afcb2758..ea7b41b0a 100644
--- a/doc/src/Section_packages.txt
+++ b/doc/src/Section_packages.txt
@@ -1,2658 +1,2658 @@
 "Previous Section"_Section_commands.html - "LAMMPS WWW Site"_lws -
 "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc - "Next
 Section"_Section_accelerate.html :c
 
 :link(lws,http://lammps.sandia.gov)
 :link(ld,Manual.html)
 :link(lc,Section_commands.html#comm)
 
 :line
 
 4. Packages :h3
 
 This section gives an overview of the optional packages that extend
 LAMMPS functionality with instructions on how to build LAMMPS with
 each of them.  Packages are groups of files that enable a specific set
 of features.  For example, force fields for molecular systems or
 granular systems are in packages.  You can see the list of all
 packages and "make" commands to manage them by typing "make package"
 from within the src directory of the LAMMPS distribution.  "Section
 2.3"_Section_start.html#start_3 gives general info on how to install
 and un-install packages as part of the LAMMPS build process.
 
 There are two kinds of packages in LAMMPS, standard and user packages:
 
 "Table of standard packages"_#table_standard
 "Table of user packages"_#table_user :ul
 
 Standard packages are supported by the LAMMPS developers and are
 written in a syntax and style consistent with the rest of LAMMPS.
 This means the developers will answer questions about them, debug and
 fix them if necessary, and keep them compatible with future changes to
 LAMMPS.
 
 User packages have been contributed by users, and begin with the
 "user" prefix.  If they are a single command (single file), they are
 typically in the user-misc package.  User packages don't necessarily
 meet the requirements of the standard packages.  If you have problems
 using a feature provided in a user package, you may need to contact
 the contributor directly to get help.  Information on how to submit
 additions you make to LAMMPS as single files or as a standard or user
 package are given in "this section"_Section_modify.html#mod_15 of the
 manual.
 
 Following the next two tables is a sub-section for each package.  It
 lists authors (if applicable) and summarizes the package contents.  It
 has specific instructions on how to install the package, including (if
 necessary) downloading or building any extra library it requires. It
 also gives links to documentation, example scripts, and
 pictures/movies (if available) that illustrate use of the package.
 
 NOTE: To see the complete list of commands a package adds to LAMMPS,
 just look at the files in its src directory, e.g. "ls src/GRANULAR".
 Files with names that start with fix, compute, atom, pair, bond,
 angle, etc correspond to commands with the same style names.
 
 In these two tables, the "Example" column is a sub-directory in the
 examples directory of the distribution which has an input script that
 uses the package.  E.g. "peptide" refers to the examples/peptide
 directory; USER/atc refers to the examples/USER/atc directory.  The
 "Library" column indicates whether an extra library is needed to build
 and use the package:
 
 dash = no library
 sys = system library: you likely have it on your machine
 int = internal library: provided with LAMMPS, but you may need to build it
 ext = external library: you will need to download and install it on your machine :ul
 
 :line
 :line
 
 [Standard packages] :link(table_standard),p
 
 Package, Description, Doc page, Example, Library
 "ASPHERE"_#ASPHERE, aspherical particle models, "Section 6.6.14"_Section_howto.html#howto_14, ellipse, -
 "BODY"_#BODY, body-style particles, "body"_body.html, body, -
 "CLASS2"_#CLASS2, class 2 force fields, "pair_style lj/class2"_pair_class2.html, -, -
 "COLLOID"_#COLLOID, colloidal particles, "atom_style colloid"_atom_style.html, colloid, -
 "COMPRESS"_#COMPRESS, I/O compression, "dump */gz"_dump.html, -, sys
 "CORESHELL"_#CORESHELL, adiabatic core/shell model, "Section 6.6.25"_Section_howto.html#howto_25, coreshell, -
 "DIPOLE"_#DIPOLE, point dipole particles, "pair_style dipole/cut"_pair_dipole.html, dipole, -
 "GPU"_#GPU, GPU-enabled styles, "Section 5.3.1"_accelerate_gpu.html, WWW bench, int
 "GRANULAR"_#GRANULAR, granular systems, "Section 6.6.6"_Section_howto.html#howto_6, pour, -
-"KIM"_#KIM, openKIM wrapper, "pair_style kim"_pair_kim.html, kim, ext
+"KIM"_#KIM, OpenKIM wrapper, "pair_style kim"_pair_kim.html, kim, ext
 "KOKKOS"_#KOKKOS, Kokkos-enabled styles, "Section 5.3.3"_accelerate_kokkos.html, WWW bench, -
 "KSPACE"_#KSPACE, long-range Coulombic solvers, "kspace_style"_kspace_style.html, peptide, -
 "MANYBODY"_#MANYBODY, many-body potentials, "pair_style tersoff"_pair_tersoff.html, shear, -
 "MC"_#MC, Monte Carlo options, "fix gcmc"_fix_gcmc.html, -, -
 "MEAM"_#MEAM, modified EAM potential, "pair_style meam"_pair_meam.html, meam, int
 "MISC"_#MISC, miscellanous single-file commands, -, -, -
 "MOLECULE"_#MOLECULE, molecular system force fields, "Section 6.6.3"_Section_howto.html#howto_3, peptide, -
 "MPIIO"_#MPIIO, MPI parallel I/O dump and restart, "dump"_dump.html, -, -
 "MSCG"_#MSCG, multi-scale coarse-graining wrapper, "fix mscg"_fix_mscg.html, mscg, ext
 "OPT"_#OPT, optimized pair styles, "Section 5.3.5"_accelerate_opt.html, WWW bench, -
 "PERI"_#PERI, Peridynamics models, "pair_style peri"_pair_peri.html, peri, -
 "POEMS"_#POEMS, coupled rigid body motion, "fix poems"_fix_poems.html, rigid, int
 "PYTHON"_#PYTHON, embed Python code in an input script, "python"_python.html, python, sys
 "QEQ"_#QEQ, QEq charge equilibration, "fix qeq"_fix_qeq.html, qeq, -
 "REAX"_#REAX, ReaxFF potential (Fortran), "pair_style reax"_pair_reax.html, reax, int
 "REPLICA"_#REPLICA, multi-replica methods, "Section 6.6.5"_Section_howto.html#howto_5, tad, -
 "RIGID"_#RIGID, rigid bodies and constraints, "fix rigid"_fix_rigid.html, rigid, -
 "SHOCK"_#SHOCK, shock loading methods, "fix msst"_fix_msst.html, -, -
 "SNAP"_#SNAP, quantum-fitted potential, "pair snap"_pair_snap.html, snap, -
 "SRD"_#SRD, stochastic rotation dynamics, "fix srd"_fix_srd.html, srd, -
 "VORONOI"_#VORONOI, Voronoi tesselation, "compute voronoi/atom"_compute_voronoi_atom.html, -, ext
 :tb(ea=c,ca1=l)
 
 [USER packages] :link(table_user),p
 
 Package, Description, Doc page, Example, Library
 "USER-ATC"_#USER-ATC, atom-to-continuum coupling, "fix atc"_fix_atc.html, USER/atc, int
 "USER-AWPMD"_#USER-AWPMD, wave-packet MD, "pair_style awpmd/cut"_pair_awpmd.html, USER/awpmd, int
 "USER-CGDNA"_#USER-CGDNA, coarse-grained DNA force fields, src/USER-CGDNA/README, USER/cgdna, -
 "USER-CGSDK"_#USER-CGSDK, SDK coarse-graining model, "pair_style lj/sdk"_pair_sdk.html, USER/cgsdk, -
 "USER-COLVARS"_#USER-COLVARS, collective variables library, "fix colvars"_fix_colvars.html, USER/colvars, int
 "USER-DIFFRACTION"_#USER-DIFFRACTION, virtual x-ray and electron diffraction,"compute xrd"_compute_xrd.html, USER/diffraction, -
 "USER-DPD"_#USER-DPD, reactive dissipative particle dynamics, src/USER-DPD/README, USER/dpd, -
 "USER-DRUDE"_#USER-DRUDE, Drude oscillators, "tutorial"_tutorial_drude.html, USER/drude, -
 "USER-EFF"_#USER-EFF, electron force field,"pair_style eff/cut"_pair_eff.html, USER/eff, -
 "USER-FEP"_#USER-FEP, free energy perturbation,"compute fep"_compute_fep.html, USER/fep, -
 "USER-H5MD"_#USER-H5MD, dump output via HDF5,"dump h5md"_dump_h5md.html, -, ext
 "USER-INTEL"_#USER-INTEL, optimized Intel CPU and KNL styles,"Section 5.3.2"_accelerate_intel.html, WWW bench, -
 "USER-LB"_#USER-LB, Lattice Boltzmann fluid,"fix lb/fluid"_fix_lb_fluid.html, USER/lb, -
 "USER-MANIFOLD"_#USER-MANIFOLD, motion on 2d surfaces,"fix manifoldforce"_fix_manifoldforce.html, USER/manifold, -
 "USER-MEAMC"_#USER-MEAMC, modified EAM potential (C++), "pair_style meam/c"_pair_meam.html, meam, -
 "USER-MGPT"_#USER-MGPT, fast MGPT multi-ion potentials, "pair_style mgpt"_pair_mgpt.html, USER/mgpt, -
 "USER-MISC"_#USER-MISC, single-file contributions, USER-MISC/README, USER/misc, -
 "USER-MOLFILE"_#USER-MOLFILE, "VMD"_vmd_home molfile plug-ins,"dump molfile"_dump_molfile.html, -, ext
 "USER-NETCDF"_#USER-NETCDF, dump output via NetCDF,"dump netcdf"_dump_netcdf.html, -, ext
 "USER-OMP"_#USER-OMP, OpenMP-enabled styles,"Section 5.3.4"_accelerate_omp.html, WWW bench, -
 "USER-PHONON"_#USER-PHONON, phonon dynamical matrix,"fix phonon"_fix_phonon.html, USER/phonon, -
 "USER-QMMM"_#USER-QMMM, QM/MM coupling,"fix qmmm"_fix_qmmm.html, USER/qmmm, ext
 "USER-QTB"_#USER-QTB, quantum nuclear effects,"fix qtb"_fix_qtb.html "fix qbmsst"_fix_qbmsst.html, qtb, -
 "USER-QUIP"_#USER-QUIP, QUIP/libatoms interface,"pair_style quip"_pair_quip.html, USER/quip, ext
 "USER-REAXC"_#USER-REAXC, ReaxFF potential (C/C++) ,"pair_style reaxc"_pair_reaxc.html, reax, -
 "USER-SMD"_#USER-SMD, smoothed Mach dynamics,"SMD User Guide"_PDF/SMD_LAMMPS_userguide.pdf, USER/smd, ext
 "USER-SMTBQ"_#USER-SMTBQ, second moment tight binding QEq potential,"pair_style smtbq"_pair_smtbq.html, USER/smtbq, -
 "USER-SPH"_#USER-SPH, smoothed particle hydrodynamics,"SPH User Guide"_PDF/SPH_LAMMPS_userguide.pdf, USER/sph, -
 "USER-TALLY"_#USER-TALLY, pairwise tally computes,"compute XXX/tally"_compute_tally.html, USER/tally, -
 "USER-VTK"_#USER-VTK, dump output via VTK, "compute vtk"_dump_vtk.html, -, ext
 :tb(ea=c,ca1=l)
 
 :line
 :line
 
 ASPHERE package :link(ASPHERE),h4
 
 [Contents:]
 
 Computes, time-integration fixes, and pair styles for aspherical
 particle models including ellipsoids, 2d lines, and 3d triangles.
 
 [Install or un-install:]
 
 make yes-asphere
 make machine :pre
 
 make no-asphere
 make machine :pre
 
 [Supporting info:]
 
 src/ASPHERE: filenames -> commands
 "Section 6.14"_Section_howto.html#howto_14
 "pair_style gayberne"_pair_gayberne.html
 "pair_style resquared"_pair_resquared.html
 "doc/PDF/pair_gayberne_extra.pdf"_PDF/pair_gayberne_extra.pdf
 "doc/PDF/pair_resquared_extra.pdf"_PDF/pair_resquared_extra.pdf
 examples/ASPHERE
 examples/ellipse
 http://lammps.sandia.gov/movies.html#line
 http://lammps.sandia.gov/movies.html#tri :ul
 
 :line
 
 BODY package :link(BODY),h4
 
 [Contents:]
 
 Body-style particles with internal structure.  Computes,
 time-integration fixes, pair styles, as well as the body styles
 themselves.  See the "body"_body.html doc page for an overview.
 
 [Install or un-install:]
 
 make yes-body
 make machine :pre
 
 make no-body
 make machine :pre
 
 [Supporting info:]
 
 src/BODY filenames -> commands
 "body"_body.html
 "atom_style body"_atom_style.html
 "fix nve/body"_fix_nve_body.html
 "pair_style body"_pair_body.html
 examples/body :ul
 
 :line
 
 CLASS2 package :link(CLASS2),h4
 
 [Contents:]
 
 Bond, angle, dihedral, improper, and pair styles for the COMPASS
 CLASS2 molecular force field.
 
 [Install or un-install:]
 
 make yes-class2
 make machine :pre
 
 make no-class2
 make machine :pre
 
 [Supporting info:]
 
 src/CLASS2: filenames -> commands
 "bond_style class2"_bond_class2.html
 "angle_style class2"_angle_class2.html
 "dihedral_style class2"_dihedral_class2.html
 "improper_style class2"_improper_class2.html
 "pair_style lj/class2"_pair_class2.html :ul
 
 :line
 
 COLLOID package :link(COLLOID),h4
 
 [Contents:]
 
 Coarse-grained finite-size colloidal particles.  Pair stayle and fix
 wall styles for colloidal interactions.  Includes the Fast Lubrication
 Dynamics (FLD) method for hydrodynamic interactions, which is a
 simplified approximation to Stokesian dynamics.
 
 [Authors:] This package includes Fast Lubrication Dynamics pair styles
 which were created by Amit Kumar and Michael Bybee from Jonathan
 Higdon's group at UIUC.
 
 [Install or un-install:]
 
 make yes-colloid
 make machine :pre
 
 make no-colloid
 make machine :pre
 
 [Supporting info:]
 
 src/COLLOID: filenames -> commands
 "fix wall/colloid"_fix_wall.html
 "pair_style colloid"_pair_colloid.html
 "pair_style yukawa/colloid"_pair_yukawa_colloid.html
 "pair_style brownian"_pair_brownian.html
 "pair_style lubricate"_pair_lubricate.html
 "pair_style lubricateU"_pair_lubricateU.html
 examples/colloid
 examples/srd :ul
 
 :line
 
 COMPRESS package :link(COMPRESS),h4
 
 [Contents:]
 
 Compressed output of dump files via the zlib compression library,
 using dump styles with a "gz" in their style name.
 
 To use this package you must have the zlib compression library
 available on your system.
 
 [Author:] Axel Kohlmeyer (Temple U).
 
 [Install or un-install:]
 
 Note that building with this package assumes you have the zlib
 compression library available on your system.  The LAMMPS build uses
 the settings in the lib/compress/Makefile.lammps file in the
 compile/link process.  You should only need to edit this file if the
 LAMMPS build fails on your system.
 
 make yes-compress
 make machine :pre
 
 make no-compress
 make machine :pre
 
 [Supporting info:]
 
 src/COMPRESS: filenames -> commands
 src/COMPRESS/README
 lib/compress/README
 "dump atom/gz"_dump.html
 "dump cfg/gz"_dump.html
 "dump custom/gz"_dump.html
 "dump xyz/gz"_dump.html :ul
 
 :line
 
 CORESHELL package :link(CORESHELL),h4
 
 [Contents:]
 
 Compute and pair styles that implement the adiabatic core/shell model
 for polarizability.  The pair styles augment Born, Buckingham, and
 Lennard-Jones styles with core/shell capabilities.  The "compute
 temp/cs"_compute_temp_cs.html command calculates the temperature of a
 system with core/shell particles.  See "Section
 6.26"_Section_howto.html#howto_26 for an overview of how to use this
 package.
 
 [Author:] Hendrik Heenen (Technical U of Munich).
 
 [Install or un-install:]
 
 make yes-coreshell
 make machine :pre
 
 make no-coreshell
 make machine :pre
 
 [Supporting info:]
 
 src/CORESHELL: filenames -> commands
 "Section 6.26"_Section_howto.html#howto_26
 "Section 6.25"_Section_howto.html#howto_25
 "compute temp/cs"_compute_temp_cs.html
 "pair_style born/coul/long/cs"_pair_cs.html
 "pair_style buck/coul/long/cs"_pair_cs.html
 "pair_style lj/cut/coul/long/cs"_pair_lj.html
 examples/coreshell :ul
 
 :line
 
 DIPOLE package :link(DIPOLE),h4
 
 [Contents:]
 
 An atom style and several pair styles for point dipole models with
 short-range or long-range interactions.
 
 [Install or un-install:]
 
 make yes-dipole
 make machine :pre
 
 make no-dipole
 make machine :pre
 
 [Supporting info:]
 
 src/DIPOLE: filenames -> commands
 "atom_style dipole"_atom_style.html
 "pair_style lj/cut/dipole/cut"_pair_dipole.html
 "pair_style lj/cut/dipole/long"_pair_dipole.html
 "pair_style lj/long/dipole/long"_pair_dipole.html
 examples/dipole :ul
 
 :line
 
 GPU package :link(GPU),h4
 
 [Contents:]
 
 Dozens of pair styles and a version of the PPPM long-range Coulombic
 solver optimized for NVIDIA GPUs.  All such styles have a "gpu" as a
 suffix in their style name.  "Section 5.3.1"_accelerate_gpu.html gives
 details of what hardware and Cuda software is required on your system,
 and details on how to build and use this package.  Its styles can be
 invoked at run time via the "-sf gpu" or "-suffix gpu" "command-line
 switches"_Section_start.html#start_6.  See also the "KOKKOS"_#KOKKOS
 package, which has GPU-enabled styles.
 
 [Authors:] Mike Brown (Intel) while at Sandia and ORNL and Trung Nguyen
 (Northwestern U) while at ORNL.
 
 [Install or un-install:]
 
 Before building LAMMPS with this package, you must first build the GPU
 library in lib/gpu from a set of provided C and Cuda files.  You can
 do this manually if you prefer; follow the instructions in
 lib/gpu/README.  You can also do it in one step from the lammps/src
 dir, using a command like these, which simply invoke the
 lib/gpu/Install.py script with the specified args:
 
 make lib-gpu                                # print help message
 make lib-gpu args="-m"                      # build GPU library with default Makefile.linux
 make lib-gpu args="-i xk7 -p single -o xk7.single"      # create new Makefile.xk7.single, altered for single-precision
 make lib-gpu args="-i xk7 -p single -o xk7.single -m"   # ditto, also build GPU library
 
 Note that this procedure starts with one of the existing
 Makefile.machine files in lib/gpu.  It allows you to alter 4 important
 settings in that Makefile, via the -h, -a, -p, -e switches,
 and save the new Makefile, if desired:
 
 CUDA_HOME = where NVIDIA Cuda software is installed on your system
 CUDA_ARCH = what GPU hardware you have (see help message for details)
 CUDA_PRECISION = precision (double, mixed, single)
 EXTRAMAKE = which Makefile.lammps.* file to copy to Makefile.lammps :ul
 
 If the library build is successful, 2 files should be created:
 lib/gpu/libgpu.a and lib/gpu/Makefile.lammps.  The latter has settings
 that enable LAMMPS to link with Cuda libraries.  If the settings in
 Makefile.lammps for your machine are not correct, the LAMMPS build
 will fail.
 
 You can then install/un-install the package and build LAMMPS in the
 usual manner:
 
 make yes-gpu
 make machine :pre
 
 make no-gpu
 make machine :pre
 
 NOTE: If you re-build the GPU library in lib/gpu, you should always
 un-install the GPU package, then re-install it and re-build LAMMPS.
 This is because the compilation of files in the GPU package use the
 library settings from the lib/gpu/Makefile.machine used to build the
 GPU library.
 
 [Supporting info:]
 
 src/GPU: filenames -> commands
 src/GPU/README
 lib/gpu/README
 "Section 5.3"_Section_accelerate.html#acc_3
 "Section 5.3.1"_accelerate_gpu.html
 "Section 2.6 -sf gpu"_Section_start.html#start_6
 "Section 2.6 -pk gpu"_Section_start.html#start_6
 "package gpu"_package.html
 Pair Styles section of "Section 3.5"_Section_commands.html#cmd_5 for pair styles followed by (g)
 "Benchmarks page"_http://lammps.sandia.gov/bench.html of web site :ul
 
 :line
 
 GRANULAR package :link(GRANULAR),h4
 
 [Contents:]
 
 Pair styles and fixes for finite-size granular particles, which
 interact with each other and boundaries via frictional and dissipative
 potentials.
 
 [Install or un-install:]
 
 make yes-granular
 make machine :pre
 
 make no-granular
 make machine :pre
 
 [Supporting info:]
 
 src/GRANULAR: filenames -> commands
 "Section 6.6"_Section_howto.html#howto_6,
 "fix pour"_fix_pour.html
 "fix wall/gran"_fix_wall_gran.html
 "pair_style gran/hooke"_pair_gran.html
 "pair_style gran/hertz/history"_pair_gran.html
 examples/granregion
 examples/pour
 bench/in.chute
 http://lammps.sandia.gov/pictures.html#jamming
 http://lammps.sandia.gov/movies.html#hopper
 http://lammps.sandia.gov/movies.html#dem
 http://lammps.sandia.gov/movies.html#brazil
 http://lammps.sandia.gov/movies.html#granregion :ul
 
 :line
 
 KIM package :link(KIM),h4
 
 [Contents:]
 
 A "pair_style kim"_pair_kim.html command which is a wrapper on the
 Knowledge Base for Interatomic Models (KIM) repository of interatomic
 potentials, enabling any of them to be used in LAMMPS simulations.
 
 To use this package you must have the KIM library available on your
 system.
 
 Information about the KIM project can be found at its website:
 https://openkim.org.  The KIM project is led by Ellad Tadmor and Ryan
 Elliott (U Minnesota) and James Sethna (Cornell U).
 
 [Authors:] Ryan Elliott (U Minnesota) is the main developer for the KIM
 API which the "pair_style kim"_pair_kim.html command uses.  He
 developed the pair style in collaboration with Valeriu Smirichinski (U
 Minnesota).
 
 [Install or un-install:]
 
 Before building LAMMPS with this package, you must first download and
 build the KIM library and include the KIM models that you want to
 use. You can do this manually if you prefer; follow the instructions
 in lib/kim/README.  You can also do it in one step from the lammps/src
 dir, using a command like these, which simply invoke the
 lib/kim/Install.py script with the specified args.
 
 make lib-kim                    # print help message
 make lib-kim args="-b . none"   # install KIM API lib with only example models
 make lib-kim args="-b . Glue_Ercolessi_Adams_Al__MO_324507536345_001"  # ditto plus one model
 make lib-kim args="-b . OpenKIM"   # install KIM API lib with all models
 make lib-kim args="-a EAM_Dynamo_Ackland_W__MO_141627196590_002"       # add one model or model driver :pre
 
 Note that in LAMMPS lingo, a KIM model driver is a pair style
 (e.g. EAM or Tersoff).  A KIM model is a pair style for a particular
 element or alloy and set of parameters, e.g. EAM for Cu with a
 specific EAM potential file.  Also note that installing the KIM API
 library with all its models, may take around 30 min to build.  Of
 course you only need to do that once.
 
 See the list of KIM model drivers here:
 https://openkim.org/kim-items/model-drivers/alphabetical
 
 See the list of all KIM models here:
 https://openkim.org/kim-items/models/by-model-drivers
 
 See the list of example KIM models included by default here:
 https://openkim.org/kim-api in the "What is in the KIM API source
 package?" section
 
 You can then install/un-install the package and build LAMMPS in the
 usual manner:
 
 make yes-kim
 make machine :pre
 
 make no-kim
 make machine :pre
 
 [Supporting info:]
 
 src/KIM: filenames -> commands
 src/KIM/README
 lib/kim/README
 "pair_style kim"_pair_kim.html
 examples/kim :ul
 
 :line
 
 KOKKOS package :link(KOKKOS),h4
 
 [Contents:]
 
 Dozens of atom, pair, bond, angle, dihedral, improper, fix, compute
 styles adapted to compile using the Kokkos library which can convert
 them to OpenMP or Cuda code so that they run efficiently on multicore
 CPUs, KNLs, or GPUs.  All the styles have a "kk" as a suffix in their
 style name.  "Section 5.3.3"_accelerate_kokkos.html gives details of
 what hardware and software is required on your system, and how to
 build and use this package.  Its styles can be invoked at run time via
 the "-sf kk" or "-suffix kk" "command-line
 switches"_Section_start.html#start_6.  Also see the "GPU"_#GPU,
 "OPT"_#OPT, "USER-INTEL"_#USER-INTEL, and "USER-OMP"_#USER-OMP
 packages, which have styles optimized for CPUs, KNLs, and GPUs.
 
 You must have a C++11 compatible compiler to use this package.
 
 [Authors:] The KOKKOS package was created primarily by Christian Trott
 and Stan Moore (Sandia), with contributions from other folks as well.
 It uses the open-source "Kokkos library"_https://github.com/kokkos
 which was developed by Carter Edwards, Christian Trott, and others at
 Sandia, and which is included in the LAMMPS distribution in
 lib/kokkos.
 
 [Install or un-install:]
 
 For the KOKKOS package, you have 3 choices when building.  You can
 build with either CPU or KNL or GPU support.  Each choice requires
 additional settings in your Makefile.machine for the KOKKOS_DEVICES
 and KOKKOS_ARCH settings.  See the src/MAKE/OPTIONS/Makefile.kokkos*
 files for examples.
 
 For multicore CPUs using OpenMP:
 
 KOKKOS_DEVICES = OpenMP
 KOKKOS_ARCH = HSW           # HSW = Haswell, SNB = SandyBridge, BDW = Broadwell, etc
 
 For Intel KNLs using OpenMP:
 
 KOKKOS_DEVICES = OpenMP
 KOKKOS_ARCH = KNL
 
 For NVIDIA GPUs using Cuda:
 
 KOKKOS_DEVICES = Cuda
 KOKKOS_ARCH = Pascal60,Power8     # P100 hosted by an IBM Power8, etc
 KOKKOS_ARCH = Kepler37,Power8     # K80 hosted by an IBM Power8, etc
 
 For GPUs, you also need these 2 lines in your Makefile.machine before
 the CC line is defined, in this case for use with OpenMPI mpicxx.  The
 2 lines define a nvcc wrapper compiler, which will use nvcc for
 compiling Cuda files or use a C++ compiler for non-Kokkos, non-Cuda
 files.
 
 KOKKOS_ABSOLUTE_PATH = $(shell cd $(KOKKOS_PATH); pwd)
 export OMPI_CXX = $(KOKKOS_ABSOLUTE_PATH)/config/nvcc_wrapper
 CC =		mpicxx
 
 Once you have an appropriate Makefile.machine, you can
 install/un-install the package and build LAMMPS in the usual manner.
 Note that you cannot build one executable to run on multiple hardware
 targets (CPU or KNL or GPU).  You need to build LAMMPS once for each
 hardware target, to produce a separate executable.  Also note that we
 do not recommend building with other acceleration packages installed
 (GPU, OPT, USER-INTEL, USER-OMP) when also building with KOKKOS.
 
 make yes-kokkos
 make machine :pre
 
 make no-kokkos
 make machine :pre
 
 [Supporting info:]
 
 src/KOKKOS: filenames -> commands
 src/KOKKOS/README
 lib/kokkos/README
 "Section 5.3"_Section_accelerate.html#acc_3
 "Section 5.3.3"_accelerate_kokkos.html
 "Section 2.6 -k on ..."_Section_start.html#start_6
 "Section 2.6 -sf kk"_Section_start.html#start_6
 "Section 2.6 -pk kokkos"_Section_start.html#start_6
 "package kokkos"_package.html
 Styles sections of "Section 3.5"_Section_commands.html#cmd_5 for styles followed by (k)
 "Benchmarks page"_http://lammps.sandia.gov/bench.html of web site :ul
 
 :line
 
 KSPACE package :link(KSPACE),h4
 
 [Contents:]
 
 A variety of long-range Coulombic solvers, as well as pair styles
 which compute the corresponding short-range pairwise Coulombic
 interactions.  These include Ewald, particle-particle particle-mesh
 (PPPM), and multilevel summation method (MSM) solvers.
 
 [Install or un-install:]
 
 Building with this package requires a 1d FFT library be present on
 your system for use by the PPPM solvers.  This can be the KISS FFT
 library provided with LAMMPS, 3rd party libraries like FFTW, or a
 vendor-supplied FFT library.  See step 6 of "Section
 2.2.2"_Section_start.html#start_2_2 of the manual for details on how
 to select different FFT options in your machine Makefile.
 
 make yes-kspace
 make machine :pre
 
 make no-kspace
 make machine :pre
 
 [Supporting info:]
 
 src/KSPACE: filenames -> commands
 "kspace_style"_kspace_style.html
 "doc/PDF/kspace.pdf"_PDF/kspace.pdf
 "Section 6.7"_Section_howto.html#howto_7
 "Section 6.8"_Section_howto.html#howto_8
 "Section 6.9"_Section_howto.html#howto_9
 "pair_style coul"_pair_coul.html
 Pair Styles section of "Section 3.5"_Section_commands.html#cmd_5 with "long" or "msm" in pair style name
 examples/peptide
 bench/in.rhodo :ul
 
 :line
 
 MANYBODY package :link(MANYBODY),h4
 
 [Contents:]
 
 A variety of manybody and bond-order potentials.  These include
 (AI)REBO, BOP, EAM, EIM, Stillinger-Weber, and Tersoff potentials.
 
 [Install or un-install:]
 
 make yes-manybody
 make machine :pre
 
 make no-manybody
 make machine :pre
 
 [Supporting info:]
 
 src/MANYBODY: filenames -> commands
 Pair Styles section of "Section 3.5"_Section_commands.html#cmd_5
 examples/comb
 examples/eim
 examples/nb3d
 examples/shear
 examples/streitz
 examples/vashishta
 bench/in.eam :ul
 
 :line
 
 MC package :link(MC),h4
 
 [Contents:]
 
 Several fixes and a pair style that have Monte Carlo (MC) or MC-like
 attributes.  These include fixes for creating, breaking, and swapping
 bonds, for performing atomic swaps, and performing grand-canonical MC
 (GCMC) in conjuction with dynamics.
 
 [Install or un-install:]
 
 make yes-mc
 make machine :pre
 
 make no-mc
 make machine :pre
 
 [Supporting info:]
 
 src/MC: filenames -> commands
 "fix atom/swap"_fix_atom_swap.html
 "fix bond/break"_fix_bond_break.html
 "fix bond/create"_fix_bond_create.html
 "fix bond/swap"_fix_bond_swap.html
 "fix gcmc"_fix_gcmc.html
 "pair_style dsmc"_pair_dsmc.html
 http://lammps.sandia.gov/movies.html#gcmc :ul
 
 :line
 
 MEAM package :link(MEAM),h4
 
 [Contents:]
 
 A pair style for the modified embedded atom (MEAM) potential.
 
 [Author:] Greg Wagner (Northwestern U) while at Sandia.
 
 [Install or un-install:]
 
 Before building LAMMPS with this package, you must first build the
 MEAM library in lib/meam.  You can do this manually if you prefer;
 follow the instructions in lib/meam/README.  You can also do it in one
 step from the lammps/src dir, using a command like these, which simply
 invoke the lib/meam/Install.py script with the specified args:
 
 make lib-meam                      # print help message
 make lib-meam args="-m gfortran"   # build with GNU Fortran compiler
 make lib-meam args="-m ifort"      # build with Intel ifort compiler :pre
 
 The build should produce two files: lib/meam/libmeam.a and
 lib/meam/Makefile.lammps.  The latter is copied from an existing
 Makefile.lammps.* and has settings needed to link C++ (LAMMPS) with
 Fortran (MEAM library).  Typically the two compilers used for LAMMPS
 and the MEAM library need to be consistent (e.g. both Intel or both
 GNU compilers).  If necessary, you can edit/create a new
 lib/meam/Makefile.machine file for your system, which should define an
 EXTRAMAKE variable to specify a corresponding Makefile.lammps.machine
 file.
 
 You can then install/un-install the package and build LAMMPS in the
 usual manner:
 
 make yes-meam
 make machine :pre
 
 make no-meam
 make machine :pre
 
 NOTE: You should test building the MEAM library with both the Intel
 and GNU compilers to see if a simulation runs faster with one versus
 the other on your system.
 
 [Supporting info:]
 
 src/MEAM: filenames -> commands
 src/meam/README
 lib/meam/README
 "pair_style meam"_pair_meam.html
 examples/meam :ul
 
 :line
 
 MISC package :link(MISC),h4
 
 [Contents:]
 
 A variety of compute, fix, pair, dump styles with specialized
 capabilities that don't align with other packages.  Do a directory
 listing, "ls src/MISC", to see the list of commands.
 
 [Install or un-install:]
 
 make yes-misc
 make machine :pre
 
 make no-misc
 make machine :pre
 
 [Supporting info:]
 
 src/MISC: filenames -> commands
 "compute ti"_compute_ti.html
 "fix evaporate"_fix_evaporate.html
 "fix orient/fcc"_fix_orient.html
 "fix ttm"_fix_ttm.html
 "fix thermal/conductivity"_fix_thermal_conductivity.html
 "fix viscosity"_fix_viscosity.html
 examples/KAPPA
 examples/VISCOSITY
 http://lammps.sandia.gov/pictures.html#ttm
 http://lammps.sandia.gov/movies.html#evaporation :ul
 
 :line
 
 MOLECULE package :link(MOLECULE),h4
 
 [Contents:]
 
 A large number of atom, pair, bond, angle, dihedral, improper styles
 that are used to model molecular systems with fixed covalent bonds.
 The pair styles include the Dreiding (hydrogen-bonding) and CHARMM
 force fields, and a TIP4P water model.
 
 [Install or un-install:]
 
 make yes-molecule
 make machine :pre
 
 make no-molecule
 make machine :pre
 
 [Supporting info:]
 
 src/MOLECULE: filenames -> commands
 "atom_style"_atom_style.html
 "bond_style"_bond_style.html
 "angle_style"_angle_style.html
 "dihedral_style"_dihedral_style.html
 "improper_style"_improper_style.html
 "pair_style hbond/dreiding/lj"_pair_hbond_dreiding.html
 "pair_style lj/charmm/coul/charmm"_pair_charmm.html
 "Section 6.3"_Section_howto.html#howto_3
 examples/cmap
 examples/dreiding
 examples/micelle,
 examples/peptide
 bench/in.chain
 bench/in.rhodo :ul
 
 :line
 
 MPIIO package :link(MPIIO),h4
 
 [Contents:]
 
 Support for parallel output/input of dump and restart files via the
 MPIIO library.  It adds "dump styles"_dump.html with a "mpiio" in
 their style name.  Restart files with an ".mpiio" suffix are also
 written and read in parallel.
 
 [Install or un-install:]
 
 Note that MPIIO is part of the standard message-passing interface
 (MPI) library, so you should not need any additional compiler or link
 settings, beyond what LAMMPS normally uses for MPI on your system.
 
 make yes-mpiio
 make machine :pre
 
 make no-mpiio
 make machine :pre
 
 [Supporting info:]
 
 src/MPIIO: filenames -> commands
 "dump"_dump.html
 "restart"_restart.html
 "write_restart"_write_restart.html
 "read_restart"_read_restart.html :ul
 
 :line
 
 MSCG package :link(mscg),h4
 
 [Contents:]
 
 A "fix mscg"_fix_mscg.html command which can parameterize a
 Mulit-Scale Coarse-Graining (MSCG) model using the open-source "MS-CG
 library"_mscg_home.
 
 :link(mscg_home,https://github.com/uchicago-voth/MSCG-release)
 
 To use this package you must have the MS-CG library available on your
 system.
 
 [Authors:] The fix was written by Lauren Abbott (Sandia).  The MS-CG
 library was developed by Jacob Wagner in Greg Voth's group at the
 University of Chicago.
 
 [Install or un-install:]
 
 Before building LAMMPS with this package, you must first download and
 build the MS-CG library.  Building the MS-CG library and using it from
 LAMMPS requires a C++11 compatible compiler, and that LAPACK and GSL
 (GNU Scientific Library) libraries be installed on your machine.  See
 the lib/mscg/README and MSCG/Install files for more details.
 
 Assuming these libraries are in place, you can do the download and
 build of MS-CG manually if you prefer; follow the instructions in
 lib/mscg/README.  You can also do it in one step from the lammps/src
 dir, using a command like these, which simply invoke the
 lib/mscg/Install.py script with the specified args:
 
 make lib-mscg                                # print help message
 make lib-mscg args="-g -b -l"                # download and build in default lib/mscg/MSCG-release-master
 make lib-mscg args="-h . MSCG -g -b -l"      # download and build in lib/mscg/MSCG
 make lib-mscg args="-h ~ MSCG -g -b -l"      # download and build in ~/mscg :pre
 
 Note that the final -l switch is to create 2 symbolic (soft) links,
 "includelink" and "liblink", in lib/mscg to point to the MS-CG src
 dir.  When LAMMPS builds it will use these links.  You should not need
 to edit the lib/mscg/Makefile.lammps file.
 
 You can then install/un-install the package and build LAMMPS in the
 usual manner:
 
 make yes-mscg
 make machine :pre
 
 make no-mscg
 make machine :pre
 
 [Supporting info:]
 
 src/MSCG: filenames -> commands
 src/MSCG/README
 lib/mscg/README
 examples/mscg :ul
 
 :line
 
 OPT package :link(OPT),h4
 
 [Contents:]
 
 A handful of pair styles which are optimized for improved CPU
 performance on single or multiple cores.  These include EAM, LJ,
 CHARMM, and Morse potentials.  The styles have an "opt" suffix in
 their style name.  "Section 5.3.5"_accelerate_opt.html gives details
 of how to build and use this package.  Its styles can be invoked at
 run time via the "-sf opt" or "-suffix opt" "command-line
 switches"_Section_start.html#start_6.  See also the "KOKKOS"_#KOKKOS,
 "USER-INTEL"_#USER-INTEL, and "USER-OMP"_#USER-OMP packages, which
 have styles optimized for CPU performance.
 
 [Authors:] James Fischer (High Performance Technologies), David Richie,
 and Vincent Natoli (Stone Ridge Technolgy).
 
 [Install or un-install:]
 
 make yes-opt
 make machine :pre
 
 make no-opt
 make machine :pre
 
 NOTE: The compile flag "-restrict" must be used to build LAMMPS with
 the OPT package.  It should be added to the CCFLAGS line of your
 Makefile.machine.  See Makefile.opt in src/MAKE/OPTIONS for an
 example.
 
 CCFLAGS: add -restrict :ul
 
 [Supporting info:]
 
 src/OPT: filenames -> commands
 "Section 5.3"_Section_accelerate.html#acc_3
 "Section 5.3.5"_accelerate_opt.html
 "Section 2.6 -sf opt"_Section_start.html#start_6
 Pair Styles section of "Section 3.5"_Section_commands.html#cmd_5 for pair styles followed by (t)
 "Benchmarks page"_http://lammps.sandia.gov/bench.html of web site :ul
 
 :line
 
 PERI package :link(PERI),h4
 
 [Contents:]
 
 An atom style, several pair styles which implement different
 Peridynamics materials models, and several computes which calculate
 diagnostics.  Peridynamics is a a particle-based meshless continuum
 model.
 
 [Authors:] The original package was created by Mike Parks (Sandia).
 Additional Peridynamics models were added by Rezwanur Rahman and John
 Foster (UTSA).
 
 [Install or un-install:]
 
 make yes-peri
 make machine :pre
 
 make no-peri
 make machine :pre
 
 [Supporting info:]
 
 src/PERI: filenames -> commands
 "doc/PDF/PDLammps_overview.pdf"_PDF/PDLammps_overview.pdf
 "doc/PDF/PDLammps_EPS.pdf"_PDF/PDLammps_EPS.pdf
 "doc/PDF/PDLammps_VES.pdf"_PDF/PDLammps_VES.pdf
 "atom_style peri"_atom_style.html
 "pair_style peri/*"_pair_peri.html
 "compute damage/atom"_compute_damage_atom.html
 "compute plasticity/atom"_compute_plasticity_atom.html
 examples/peri
 http://lammps.sandia.gov/movies.html#peri :ul
 
 :line
 
 POEMS package :link(POEMS),h4
 
 [Contents:]
 
 A fix that wraps the Parallelizable Open source Efficient Multibody
 Software (POEMS) library, which is able to simulate the dynamics of
 articulated body systems.  These are systems with multiple rigid
 bodies (collections of particles) whose motion is coupled by
 connections at hinge points.
 
 [Author:] Rudra Mukherjee (JPL) while at RPI.
 
 [Install or un-install:]
 
 Before building LAMMPS with this package, you must first build the
 POEMS library in lib/poems.  You can do this manually if you prefer;
 follow the instructions in lib/poems/README.  You can also do it in
 one step from the lammps/src dir, using a command like these, which
 simply invoke the lib/poems/Install.py script with the specified args:
 
 make lib-poems                      # print help message
 make lib-poems args="-m g++"        # build with GNU g++ compiler
 make lib-poems args="-m icc"        # build with Intel icc compiler :pre
 
 The build should produce two files: lib/poems/libpoems.a and
 lib/poems/Makefile.lammps.  The latter is copied from an existing
 Makefile.lammps.* and has settings needed to build LAMMPS with the
 POEMS library (though typically the settings are just blank).  If
 necessary, you can edit/create a new lib/poems/Makefile.machine file
 for your system, which should define an EXTRAMAKE variable to specify
 a corresponding Makefile.lammps.machine file.
 
 You can then install/un-install the package and build LAMMPS in the
 usual manner:
 
 make yes-poems
 make machine :pre
 
 make no-meam
 make machine :pre
 
 [Supporting info:]
 
 src/POEMS: filenames -> commands
 src/POEMS/README
 lib/poems/README
 "fix poems"_fix_poems.html
 examples/rigid :ul
 
 :line
 
 PYTHON package :link(PYTHON),h4
 
 [Contents:]
 
 A "python"_python.html command which allow you to execute Python code
 from a LAMMPS input script.  The code can be in a separate file or
 embedded in the input script itself.  See "Section
 11.2"_Section_python.html#py_2 for an overview of using Python from
 LAMMPS in this manner and the entire section for other ways to use
 LAMMPS and Python together.
 
 [Install or un-install:]
 
 make yes-python
 make machine :pre
 
 make no-python
 make machine :pre
 
 NOTE: Building with the PYTHON package assumes you have a Python
 shared library available on your system, which needs to be a Python 2
 version, 2.6 or later.  Python 3 is not yet supported.  See the
 lib/python/README for more details.  Note that the build uses the
 lib/python/Makefile.lammps file in the compile/link process.  You
 should only need to create a new Makefile.lammps.* file (and copy it
 to Makefile.lammps) if the LAMMPS build fails.
 
 [Supporting info:]
 
 src/PYTHON: filenames -> commands
 "Section 11"_Section_python.html
 lib/python/README
 examples/python :ul
 
 :line
 
 QEQ package :link(QEQ),h4
 
 [Contents:]
 
 Several fixes for performing charge equilibration (QEq) via different
 algorithms.  These can be used with pair styles that perform QEq as
 part of their formulation.
 
 [Install or un-install:]
 
 make yes-qeq
 make machine :pre
 
 make no-qeq
 make machine :pre
 
 [Supporting info:]
 
 src/QEQ: filenames -> commands
 "fix qeq/*"_fix_qeq.html
 examples/qeq
 examples/streitz :ul
 
 :line
 
 REAX package :link(REAX),h4
 
 [Contents:]
 
 A pair style which wraps a Fortran library which implements the ReaxFF
 potential, which is a universal reactive force field.  See the
 "USER-REAXC package"_#USER-REAXC for an alternate implementation in
 C/C++.  Also a "fix reax/bonds"_fix_reax_bonds.html command for
 monitoring molecules as bonds are created and destroyed.
 
 [Author:] Aidan Thompson (Sandia).
 
 [Install or un-install:]
 
 Before building LAMMPS with this package, you must first build the
 REAX library in lib/reax.  You can do this manually if you prefer;
 follow the instructions in lib/reax/README.  You can also do it in one
 step from the lammps/src dir, using a command like these, which simply
 invoke the lib/reax/Install.py script with the specified args:
 
 make lib-reax                      # print help message
 make lib-reax args="-m gfortran"   # build with GNU Fortran compiler
 make lib-reax args="-m ifort"      # build with Intel ifort compiler :pre
 
 The build should produce two files: lib/reax/libreax.a and
 lib/reax/Makefile.lammps.  The latter is copied from an existing
 Makefile.lammps.* and has settings needed to link C++ (LAMMPS) with
 Fortran (REAX library).  Typically the two compilers used for LAMMPS
 and the REAX library need to be consistent (e.g. both Intel or both
 GNU compilers).  If necessary, you can edit/create a new
 lib/reax/Makefile.machine file for your system, which should define an
 EXTRAMAKE variable to specify a corresponding Makefile.lammps.machine
 file.
 
 You can then install/un-install the package and build LAMMPS in the
 usual manner:
 
 make yes-reax
 make machine :pre
 
 make no-reax
 make machine :pre
 
 [Supporting info:]
 
 src/REAX: filenames -> commands
 lib/reax/README
 "pair_style reax"_pair_reax.html
 "fix reax/bonds"_fix_reax_bonds.html
 examples/reax :ul
 
 :line
 
 REPLICA package :link(REPLICA),h4
 
 [Contents:]
 
 A collection of multi-replica methods which can be used when running
 multiple LAMMPS simulations (replicas).  See "Section
 6.5"_Section_howto.html#howto_5 for an overview of how to run
 multi-replica simulations in LAMMPS.  Methods in the package include
 nudged elastic band (NEB), parallel replica dynamics (PRD),
 temperature accelerated dynamics (TAD), parallel tempering, and a
 verlet/split algorithm for performing long-range Coulombics on one set
 of processors, and the remainder of the force field calcalation on
 another set.
 
 [Install or un-install:]
 
 make yes-replica
 make machine :pre
 
 make no-replica
 make machine :pre
 
 [Supporting info:]
 
 src/REPLICA: filenames -> commands
 "Section 6.5"_Section_howto.html#howto_5
 "neb"_neb.html
 "prd"_prd.html
 "tad"_tad.html
 "temper"_temper.html,
 "run_style verlet/split"_run_style.html
 examples/neb
 examples/prd
 examples/tad :ul
 
 :line
 
 RIGID package :link(RIGID),h4
 
 [Contents:]
 
 Fixes which enforce rigid constraints on collections of atoms or
 particles.  This includes SHAKE and RATTLE, as well as varous
 rigid-body integrators for a few large bodies or many small bodies.
 Also several computes which calculate properties of rigid bodies.
 
 To install/build:
 
 make yes-rigid
 make machine :pre
 
 To un-install/re-build:
 
 make no-rigid
 make machine :pre
 
 [Supporting info:]
 
 src/RIGID: filenames -> commands
 "compute erotate/rigid"_compute_erotate_rigid.html
 fix shake"_fix_shake.html
 "fix rattle"_fix_shake.html
 "fix rigid/*"_fix_rigid.html
 examples/ASPHERE
 examples/rigid
 bench/in.rhodo
 http://lammps.sandia.gov/movies.html#box
 http://lammps.sandia.gov/movies.html#star :ul
 
 :line
 
 SHOCK package :link(SHOCK),h4
 
 [Contents:]
 
 Fixes for running impact simulations where a shock-wave passes through
 a material.
 
 [Install or un-install:]
 
 make yes-shock
 make machine :pre
 
 make no-shock
 make machine :pre
 
 [Supporting info:]
 
 src/SHOCK: filenames -> commands
 "fix append/atoms"_fix_append_atoms.html
 "fix msst"_fix_msst.html
 "fix nphug"_fix_nphug.html
 "fix wall/piston"_fix_wall_piston.html
 examples/hugoniostat
 examples/msst :ul
 
 :line
 
 SNAP package :link(SNAP),h4
 
 [Contents:]
 
 A pair style for the spectral neighbor analysis potential (SNAP).
 SNAP is methodology for deriving a highly accurate classical potential
 fit to a large archive of quantum mechanical (DFT) data. Also several
 computes which analyze attributes of the potential.
 
 [Author:] Aidan Thompson (Sandia).
 
 [Install or un-install:]
 
 make yes-snap
 make machine :pre
 
 make no-snap
 make machine :pre
 
 [Supporting info:]
 
 src/SNAP: filenames -> commands
 "pair snap"_pair_snap.html
 "compute sna/atom"_compute_sna_atom.html
 "compute snad/atom"_compute_sna_atom.html
 "compute snav/atom"_compute_sna_atom.html
 examples/snap :ul
 
 :line
 
 SRD package :link(SRD),h4
 
 [Contents:]
 
 A pair of fixes which implement the Stochastic Rotation Dynamics (SRD)
 method for coarse-graining of a solvent, typically around large
 colloidal particles.
 
 To install/build:
 
 make yes-srd
 make machine :pre
 
 To un-install/re-build:
 
 make no-srd
 make machine :pre
 
 [Supporting info:]
 
 src/SRD: filenames -> commands
 "fix srd"_fix_srd.html
 "fix wall/srd"_fix_wall_srd.html
 examples/srd
 examples/ASPHERE
 http://lammps.sandia.gov/movies.html#tri
 http://lammps.sandia.gov/movies.html#line
 http://lammps.sandia.gov/movies.html#poly :ul
 
 :line
 
 VORONOI package :link(VORONOI),h4
 
 [Contents:]
 
 A compute command which calculates the Voronoi tesselation of a
 collection of atoms by wrapping the "Voro++ library"_voro_home.  This
 can be used to calculate the local volume or each atoms or its near
 neighbors.
 
 :link(voro_home,http://math.lbl.gov/voro++)
 
 To use this package you must have the Voro++ library available on your
 system.
 
 [Author:] Daniel Schwen (INL) while at LANL.  The open-source Voro++
 library was written by Chris Rycroft (Harvard U) while at UC Berkeley
 and LBNL.
 
 [Install or un-install:]
 
 Before building LAMMPS with this package, you must first download and
 build the Voro++ library.  You can do this manually if you prefer;
 follow the instructions in lib/voronoi/README.  You can also do it in
 one step from the lammps/src dir, using a command like these, which
 simply invoke the lib/voronoi/Install.py script with the specified
 args:
 
 make lib-voronoi                                # print help message
 make lib-voronoi args="-g -b -l"                # download and build in default lib/voronoi/voro++-0.4.6
 make lib-voronoi args="-h . voro++ -g -b -l"    # download and build in lib/voronoi/voro++
 make lib-voronoi args="-h ~ voro++ -g -b -l"    # download and build in ~/voro++ :pre
 
 Note that the final -l switch is to create 2 symbolic (soft) links,
 "includelink" and "liblink", in lib/voronoi to point to the Voro++ src
 dir.  When LAMMPS builds it will use these links.  You should not need
 to edit the lib/voronoi/Makefile.lammps file.
 
 You can then install/un-install the package and build LAMMPS in the
 usual manner:
 
 make yes-voronoi
 make machine :pre
 
 make no-voronoi
 make machine :pre
 
 [Supporting info:]
 
 src/VORONOI: filenames -> commands
 src/VORONOI/README
 lib/voronoi/README
 "compute voronoi/atom"_compute_voronoi_atom.html
 examples/voronoi :ul
 
 :line
 :line
 
 USER-ATC package :link(USER-ATC),h4
 
 [Contents:]
 
 ATC stands for atoms-to-continuum.  This package implements a "fix
 atc"_fix_atc.html command to either couple molecular dynamics with
 continuum finite element equations or perform on-the-fly conversion of
 atomic information to continuum fields.
 
 [Authors:] Reese Jones, Jeremy Templeton, Jon Zimmerman (Sandia).
 
 [Install or un-install:]
 
 Before building LAMMPS with this package, you must first build the ATC
 library in lib/atc.  You can do this manually if you prefer; follow
 the instructions in lib/atc/README.  You can also do it in one step
 from the lammps/src dir, using a command like these, which simply
 invoke the lib/atc/Install.py script with the specified args:
 
 make lib-atc                      # print help message
 make lib-atc args="-m g++"        # build with GNU g++ compiler
 make lib-atc args="-m icc"        # build with Intel icc compiler :pre
 
 The build should produce two files: lib/atc/libatc.a and
 lib/atc/Makefile.lammps.  The latter is copied from an existing
 Makefile.lammps.* and has settings needed to build LAMMPS with the ATC
 library.  If necessary, you can edit/create a new
 lib/atc/Makefile.machine file for your system, which should define an
 EXTRAMAKE variable to specify a corresponding Makefile.lammps.machine
 file.
 
 Note that the Makefile.lammps file has settings for the BLAS and
 LAPACK linear algebra libraries.  As explained in lib/atc/README these
 can either exist on your system, or you can use the files provided in
 lib/linalg.  In the latter case you also need to build the library
 in lib/linalg with a command like these:
 
 make lib-linalg                      # print help message
-make lib-atc args="-m gfortran"      # build with GNU Fortran compiler
+make lib-linalg args="-m gfortran"   # build with GNU Fortran compiler
 
 You can then install/un-install the package and build LAMMPS in the
 usual manner:
 
 make yes-user-atc
 make machine :pre
 
 make no-user-atc
 make machine :pre
 
 [Supporting info:]
 
 src/USER-ATC: filenames -> commands
 src/USER-ATC/README
 "fix atc"_fix_atc.html
 examples/USER/atc
 http://lammps.sandia.gov/pictures.html#atc :ul
 
 :line
 
 USER-AWPMD package :link(USER-AWPMD),h4
 
 [Contents:]
 
 AWPMD stands for Antisymmetrized Wave Packet Molecular Dynamics.  This
 package implements an atom, pair, and fix style which allows electrons
 to be treated as explicit particles in a classical molecular dynamics
 model.
 
 [Author:] Ilya Valuev (JIHT, Russia).
 
 [Install or un-install:]
 
 Before building LAMMPS with this package, you must first build the
 AWPMD library in lib/awpmd.  You can do this manually if you prefer;
 follow the instructions in lib/awpmd/README.  You can also do it in
 one step from the lammps/src dir, using a command like these, which
 simply invoke the lib/awpmd/Install.py script with the specified args:
 
 make lib-awpmd                      # print help message
 make lib-awpmd args="-m g++"        # build with GNU g++ compiler
 make lib-awpmd args="-m icc"        # build with Intel icc compiler :pre
 
 The build should produce two files: lib/awpmd/libawpmd.a and
 lib/awpmd/Makefile.lammps.  The latter is copied from an existing
 Makefile.lammps.* and has settings needed to build LAMMPS with the
 AWPMD library.  If necessary, you can edit/create a new
 lib/awpmd/Makefile.machine file for your system, which should define
 an EXTRAMAKE variable to specify a corresponding
 Makefile.lammps.machine file.
 
 Note that the Makefile.lammps file has settings for the BLAS and
 LAPACK linear algebra libraries.  As explained in lib/awpmd/README
 these can either exist on your system, or you can use the files
 provided in lib/linalg.  In the latter case you also need to build the
 library in lib/linalg with a command like these:
 
 make lib-linalg                      # print help message
 make lib-atc args="-m gfortran"      # build with GNU Fortran compiler
 
 You can then install/un-install the package and build LAMMPS in the
 usual manner:
 
 make yes-user-awpmd
 make machine :pre
 
 make no-user-awpmd
 make machine :pre
 
 [Supporting info:]
 
 src/USER-AWPMD: filenames -> commands
 src/USER-AWPMD/README
 "pair awpmd/cut"_pair_awpmd.html
 examples/USER/awpmd :ul
 
 :line
 
 USER-CGDNA package :link(USER-CGDNA),h4
 
 [Contents:]
 
 Several pair styles, a bond style, and integration fixes for
 coarse-grained models of single- and double-stranded DNA based on the
 oxDNA model of Doye, Louis and Ouldridge at the University of Oxford.
 This includes Langevin-type rigid-body integrators with improved
 stability.
 
 [Author:] Oliver Henrich (University of Strathclyde, Glasgow).
 
 [Install or un-install:]
 
 make yes-user-cgdna
 make machine :pre
 
 make no-user-cgdna
 make machine :pre
 
 [Supporting info:]
 
 src/USER-CGDNA: filenames -> commands
 /src/USER-CGDNA/README
 "pair_style oxdna/*"_pair_oxdna.html
 "pair_style oxdna2/*"_pair_oxdna2.html
 "bond_style oxdna/*"_bond_oxdna.html
 "bond_style oxdna2/*"_bond_oxdna.html
 "fix nve/dotc/langevin"_fix_nve_dotc_langevin.html :ul
 
 :line
 
 USER-CGSDK package :link(USER-CGSDK),h4
 
 [Contents:]
 
 Several pair styles and an angle style which implement the
 coarse-grained SDK model of Shinoda, DeVane, and Klein which enables
 simulation of ionic liquids, electrolytes, lipids and charged amino
 acids.
 
 [Author:] Axel Kohlmeyer (Temple U).
 
 [Install or un-install:]
 
 make yes-user-cgsdk
 make machine :pre
 
 make no-user-cgsdk
 make machine :pre
 
 [Supporting info:]
 
 src/USER-CGSDK: filenames -> commands
 src/USER-CGSDK/README
 "pair_style lj/sdk/*"_pair_sdk.html
 "angle_style sdk"_angle_sdk.html
 examples/USER/cgsdk
 http://lammps.sandia.gov/pictures.html#cg :ul
 
 :line
 
 USER-COLVARS package :link(USER-COLVARS),h4
 
 [Contents:]
 
 COLVARS stands for collective variables, which can be used to
 implement various enhanced sampling methods, including Adaptive
 Biasing Force, Metadynamics, Steered MD, Umbrella Sampling and
 Restraints.  A "fix colvars"_fix_colvars.html command is implemented
 which wraps a COLVARS library, which implements these methods.
 simulations.
 
 [Authors:] Axel Kohlmeyer (Temple U).  The COLVARS library was written
 by Giacomo Fiorin (ICMS, Temple University, Philadelphia, PA, USA) and
 Jerome Henin (LISM, CNRS, Marseille, France).
 
 [Install or un-install:]
 
 Before building LAMMPS with this package, you must first build the
 COLVARS library in lib/colvars.  You can do this manually if you
 prefer; follow the instructions in lib/colvars/README.  You can also
 do it in one step from the lammps/src dir, using a command like these,
 which simply invoke the lib/colvars/Install.py script with the
 specified args:
 
 make lib-colvars                      # print help message
 make lib-colvars args="-m g++"        # build with GNU g++ compiler :pre
 
 The build should produce two files: lib/colvars/libcolvars.a and
 lib/colvars/Makefile.lammps.  The latter is copied from an existing
 Makefile.lammps.* and has settings needed to build LAMMPS with the
 COLVARS library (though typically the settings are just blank).  If
 necessary, you can edit/create a new lib/colvars/Makefile.machine file
 for your system, which should define an EXTRAMAKE variable to specify
 a corresponding Makefile.lammps.machine file.
 
 You can then install/un-install the package and build LAMMPS in the
 usual manner:
 
 make yes-user-colvars
 make machine :pre
 
 make no-user-colvars
 make machine :pre
 
 [Supporting info:]
 
 src/USER-COLVARS: filenames -> commands
 "doc/PDF/colvars-refman-lammps.pdf"_PDF/colvars-refman-lammps.pdf
 src/USER-COLVARS/README
 lib/colvars/README
 "fix colvars"_fix_colvars.html
 examples/USER/colvars :ul
 
 :line
 
 USER-DIFFRACTION package :link(USER-DIFFRACTION),h4
 
 [Contents:]
 
 Two computes and a fix for calculating x-ray and electron diffraction
 intensities based on kinematic diffraction theory.
 
 [Author:] Shawn Coleman while at the U Arkansas.
 
 [Install or un-install:]
 
 make yes-user-diffraction
 make machine :pre
 
 make no-user-diffraction
 make machine :pre
 
 [Supporting info:]
 
 src/USER-DIFFRACTION: filenames -> commands
 "compute saed"_compute_saed.html
 "compute xrd"_compute_xrd.html
 "fix saed/vtk"_fix_saed_vtk.html
 examples/USER/diffraction :ul
 
 :line
 
 USER-DPD package :link(USER-DPD),h4
 
 [Contents:]
 
 DPD stands for dissipative particle dynamics.  This package implements
 coarse-grained DPD-based models for energetic, reactive molecular
 crystalline materials.  It includes many pair styles specific to these
 systems, including for reactive DPD, where each particle has internal
 state for multiple species and a coupled set of chemical reaction ODEs
 are integrated each timestep.  Highly accurate time intergrators for
 isothermal, isoenergetic, isobaric and isenthalpic conditions are
 included.  These enable long timesteps via the Shardlow splitting
 algorithm.
 
 [Authors:] Jim Larentzos (ARL), Tim Mattox (Engility Corp), and and John
 Brennan (ARL).
 
 [Install or un-install:]
 
 make yes-user-dpd
 make machine :pre
 
 make no-user-dpd
 make machine :pre
 
 [Supporting info:]
 
 src/USER-DPD: filenames -> commands
 /src/USER-DPD/README
 "compute dpd"_compute_dpd.html
 "compute dpd/atom"_compute_dpd_atom.html
 "fix eos/cv"_fix_eos_table.html
 "fix eos/table"_fix_eos_table.html
 "fix eos/table/rx"_fix_eos_table_rx.html
 "fix shardlow"_fix_shardlow.html
 "fix rx"_fix_rx.html
 "pair table/rx"_pair_table_rx.html
 "pair dpd/fdt"_pair_dpd_fdt.html
 "pair dpd/fdt/energy"_pair_dpd_fdt.html
 "pair exp6/rx"_pair_exp6_rx.html
 "pair multi/lucy"_pair_multi_lucy.html
 "pair multi/lucy/rx"_pair_multi_lucy_rx.html
 examples/USER/dpd :ul
 
 :line
 
 USER-DRUDE package :link(USER-DRUDE),h4
 
 [Contents:]
 
 Fixes, pair styles, and a compute to simulate thermalized Drude
 oscillators as a model of polarization.  See "Section
 6.27"_Section_howto.html#howto_27 for an overview of how to use the
 package.  There are auxiliary tools for using this package in
 tools/drude.
 
 [Authors:] Alain Dequidt (U Blaise Pascal Clermont-Ferrand), Julien
 Devemy (CNRS), and Agilio Padua (U Blaise Pascal).
 
 [Install or un-install:]
 
 make yes-user-drude
 make machine :pre
 
 make no-user-drude
 make machine :pre
 
 [Supporting info:]
 
 src/USER-DRUDE: filenames -> commands
 "Section 6.27"_Section_howto.html#howto_27
 "Section 6.25"_Section_howto.html#howto_25
 src/USER-DRUDE/README
 "fix drude"_fix_drude.html
 "fix drude/transform/*"_fix_drude_transform.html
 "compute temp/drude"_compute_temp_drude.html
 "pair thole"_pair_thole.html
 "pair lj/cut/thole/long"_pair_thole.html
 examples/USER/drude
 tools/drude :ul
 
 :line
 
 USER-EFF package :link(USER-EFF),h4
 
 [Contents:]
 
 EFF stands for electron force field which allows a classical MD code
 to model electrons as particles of variable radius.  This package
 contains atom, pair, fix and compute styles which implement the eFF as
 described in A. Jaramillo-Botero, J. Su, Q. An, and W.A. Goddard III,
 JCC, 2010.  The eFF potential was first introduced by Su and Goddard,
 in 2007.  There are auxiliary tools for using this package in
 tools/eff; see its README file.
 
 [Author:] Andres Jaramillo-Botero (CalTech).
 
 [Install or un-install:]
 
 make yes-user-eff
 make machine :pre
 
 make no-user-eff
 make machine :pre
 
 [Supporting info:]
 
 src/USER-EFF: filenames -> commands
 src/USER-EFF/README
 "atom_style electron"_atom_style.html
 "fix nve/eff"_fix_nve_eff.html
 "fix nvt/eff"_fix_nh_eff.html
 "fix npt/eff"_fix_nh_eff.html
 "fix langevin/eff"_fix_langevin_eff.html
 "compute temp/eff"_compute_temp_eff.html
 "pair eff/cut"_pair_eff.html
 "pair eff/inline"_pair_eff.html
 examples/USER/eff
 tools/eff/README
 tools/eff
 http://lammps.sandia.gov/movies.html#eff :ul
 
 :line
 
 USER-FEP package :link(USER-FEP),h4
 
 [Contents:]
 
 FEP stands for free energy perturbation.  This package provides
 methods for performing FEP simulations by using a "fix
 adapt/fep"_fix_adapt_fep.html command with soft-core pair potentials,
 which have a "soft" in their style name.  There are auxiliary tools
 for using this package in tools/fep; see its README file.
 
 [Author:] Agilio Padua (Universite Blaise Pascal Clermont-Ferrand)
 
 [Install or un-install:]
 
 make yes-user-fep
 make machine :pre
 
 make no-user-fep
 make machine :pre
 
 [Supporting info:]
 
 src/USER-FEP: filenames -> commands
 src/USER-FEP/README
 "fix adapt/fep"_fix_adapt_fep.html
 "compute fep"_compute_fep.html
 "pair_style */soft"_pair_lj_soft.html
 examples/USER/fep
 tools/fep/README
 tools/fep :ul
 
 :line
 
 USER-H5MD package :link(USER-H5MD),h4
 
 [Contents:]
 
 H5MD stands for HDF5 for MD.  "HDF5"_HDF5 is a portable, binary,
 self-describing file format, used by many scientific simulations.
 H5MD is a format for molecular simulations, built on top of HDF5.
 This package implements a "dump h5md"_dump_h5md.html command to output
 LAMMPS snapshots in this format.
 
 :link(HDF5,http://www.hdfgroup.org/HDF5)
 
 To use this package you must have the HDF5 library available on your
 system.
 
 [Author:] Pierre de Buyl (KU Leuven) created both the package and the
 H5MD format.
 
 [Install or un-install:]
 
 Note that to follow these steps to compile and link to the CH5MD
 library, you need the standard HDF5 software package installed on your
 system, which should include the h5cc compiler and the HDF5 library.
 
 Before building LAMMPS with this package, you must first build the
 CH5MD library in lib/h5md.  You can do this manually if you prefer;
 follow the instructions in lib/h5md/README.  You can also do it in one
 step from the lammps/src dir, using a command like these, which simply
 invoke the lib/h5md/Install.py script with the specified args:
 
 make lib-h5md                     # print help message
 make lib-hm5d args="-m h5cc"      # build with h5cc compiler :pre
 
 The build should produce two files: lib/h5md/libch5md.a and
 lib/h5md/Makefile.lammps.  The latter is copied from an existing
 Makefile.lammps.* and has settings needed to build LAMMPS with the
 system HDF5 library.  If necessary, you can edit/create a new
 lib/h5md/Makefile.machine file for your system, which should define an
 EXTRAMAKE variable to specify a corresponding Makefile.lammps.machine
 file.
 
 You can then install/un-install the package and build LAMMPS in the
 usual manner:
 
 make yes-user-h5md
 make machine :pre
 
 make no-user-h5md
 make machine :pre
 
 [Supporting info:]
 
 src/USER-H5MD: filenames -> commands
 src/USER-H5MD/README
 lib/h5md/README
 "dump h5md"_dump_h5md.html :ul
 
 :line
 
 USER-INTEL package :link(USER-INTEL),h4
 
 [Contents:]
 
 Dozens of pair, fix, bond, angle, dihedral, improper, and kspace
 styles which are optimized for Intel CPUs and KNLs (Knights Landing).
 All of them have an "intel" in their style name.  "Section
 5.3.2"_accelerate_intel.html gives details of what hardware and
 compilers are required on your system, and how to build and use this
 package.  Its styles can be invoked at run time via the "-sf intel" or
 "-suffix intel" "command-line switches"_Section_start.html#start_6.
 Also see the "KOKKOS"_#KOKKOS, "OPT"_#OPT, and "USER-OMP"_#USER-OMP
 packages, which have styles optimized for CPUs and KNLs.
 
 You need to have an Intel compiler, version 14 or higher to take full
 advantage of this package.
 
 [Author:] Mike Brown (Intel).
 
 [Install or un-install:]
 
 For the USER-INTEL package, you have 2 choices when building.  You can
 build with either CPU or KNL support.  Each choice requires additional
 settings in your Makefile.machine for CCFLAGS and LINKFLAGS and
 optimized malloc libraries.  See the
 src/MAKE/OPTIONS/Makefile.intel_cpu and src/MAKE/OPTIONS/Makefile.knl
 files for examples.
 
 For CPUs:
 
 OPTFLAGS =      -xHost -O2 -fp-model fast=2 -no-prec-div -qoverride-limits
 CCFLAGS =	-g -qopenmp -DLAMMPS_MEMALIGN=64 -no-offload \
                 -fno-alias -ansi-alias -restrict $(OPTFLAGS)
 LINKFLAGS =	-g -qopenmp $(OPTFLAGS)
 LIB =           -ltbbmalloc -ltbbmalloc_proxy
 
 For KNLs:
 
 OPTFLAGS =      -xMIC-AVX512 -O2 -fp-model fast=2 -no-prec-div -qoverride-limits
 CCFLAGS =	-g -qopenmp -DLAMMPS_MEMALIGN=64 -no-offload \
                 -fno-alias -ansi-alias -restrict $(OPTFLAGS)
 LINKFLAGS =	-g -qopenmp $(OPTFLAGS)
 LIB =           -ltbbmalloc
 
 Once you have an appropriate Makefile.machine, you can
 install/un-install the package and build LAMMPS in the usual manner.
 Note that you cannot build one executable to run on multiple hardware
 targets (Intel CPUs or KNL).  You need to build LAMMPS once for each
 hardware target, to produce a separate executable.
 
 You should also typically install the USER-OMP package, as it can be
 used in tandem with the USER-INTEL package to good effect, as
 explained in "Section 5.3.2"_accelerate_intel.html.
 
 make yes-user-intel yes-user-omp
 make machine :pre
 
 make no-user-intel no-user-omp
 make machine :pre
 
 [Supporting info:]
 
 src/USER-INTEL: filenames -> commands
 src/USER-INTEL/README
 "Section 5.3"_Section_accelerate.html#acc_3
 "Section 5.3.2"_accelerate_gpu.html
 "Section 2.6 -sf intel"_Section_start.html#start_6
 "Section 2.6 -pk intel"_Section_start.html#start_6
 "package intel"_package.html
 Styles sections of "Section 3.5"_Section_commands.html#cmd_5 for styles followed by (i)
 src/USER-INTEL/TEST
 "Benchmarks page"_http://lammps.sandia.gov/bench.html of web site :ul
 
 :line
 
 USER-LB package :link(USER-LB),h4
 
 [Contents:]
 
 Fixes which implement a background Lattice-Boltzmann (LB) fluid, which
 can be used to model MD particles influenced by hydrodynamic forces.
 
 [Authors:] Frances Mackay and Colin Denniston (University of Western
 Ontario).
 
 [Install or un-install:]
 
 make yes-user-lb
 make machine :pre
 
 make no-user-lb
 make machine :pre
 
 [Supporting info:]
 
 src/USER-LB: filenames -> commands
 src/USER-LB/README
 "fix lb/fluid"_fix_lb_fluid.html
 "fix lb/momentum"_fix_lb_momentum.html
 "fix lb/viscous"_fix_lb_viscous.html
 examples/USER/lb :ul
 
 :line
 
 USER-MGPT package :link(USER-MGPT),h4
 
 [Contents:]
 
 A pair style which provides a fast implementation of the quantum-based
 MGPT multi-ion potentials.  The MGPT or model GPT method derives from
 first-principles DFT-based generalized pseudopotential theory (GPT)
 through a series of systematic approximations valid for mid-period
 transition metals with nearly half-filled d bands.  The MGPT method
 was originally developed by John Moriarty at LLNL.  The pair style in
 this package calculates forces and energies using an optimized
 matrix-MGPT algorithm due to Tomas Oppelstrup at LLNL.
 
 [Authors:] Tomas Oppelstrup and John Moriarty (LLNL).
 
 [Install or un-install:]
 
 make yes-user-mgpt
 make machine :pre
 
 make no-user-mgpt
 make machine :pre
 
 [Supporting info:]
 
 src/USER-MGPT: filenames -> commands
 src/USER-MGPT/README
 "pair_style mgpt"_pair_mgpt.html
 examples/USER/mgpt :ul
 
 :line
 
 USER-MISC package :link(USER-MISC),h4
 
 [Contents:]
 
 A potpourri of (mostly) unrelated features contributed to LAMMPS by
 users.  Each feature is a single fix, compute, pair, bond, angle,
 dihedral, improper, or command style.
 
 [Authors:] The author for each style in the package is listed in the
 src/USER-MISC/README file.
 
 [Install or un-install:]
 
 make yes-user-misc
 make machine :pre
 
 make no-user-misc
 make machine :pre
 
 [Supporting info:]
 
 src/USER-MISC: filenames -> commands
 src/USER-MISC/README
 one doc page per individual command listed in src/USER-MISC/README
 examples/USER/misc :ul
 
 :line
 
 USER-MANIFOLD package :link(USER-MANIFOLD),h4
 
 [Contents:]
 
 Several fixes and a "manifold" class which enable simulations of
 particles constrained to a manifold (a 2D surface within the 3D
 simulation box).  This is done by applying the RATTLE constraint
 algorithm to formulate single-particle constraint functions
 g(xi,yi,zi) = 0 and their derivative (i.e. the normal of the manifold)
 n = grad(g).
 
 [Author:] Stefan Paquay (until 2017: Eindhoven University of Technology (TU/e), The
 Netherlands; since 2017: Brandeis University, Waltham, MA, USA)
 
 [Install or un-install:]
 
 make yes-user-manifold
 make machine :pre
 
 make no-user-manifold
 make machine :pre
 
 [Supporting info:]
 
 src/USER-MANIFOLD: filenames -> commands
 src/USER-MANIFOLD/README
 "doc/manifolds"_manifolds.html
 "fix manifoldforce"_fix_manifoldforce.html
 "fix nve/manifold/rattle"_fix_nve_manifold_rattle.html
 "fix nvt/manifold/rattle"_fix_nvt_manifold_rattle.html
 examples/USER/manifold
 http://lammps.sandia.gov/movies.html#manifold :ul
 
 :line
 
 USER-MEAMC package :link(USER-MEAMC),h4
 
 [Contents:]
 
 A pair style for the modified embedded atom (MEAM) potential
 translated from the Fortran version in the "MEAM"_MEAM package
 to plain C++. In contrast to the MEAM package, no library
 needs to be compiled and the pair style can be instantiated
 multiple times.
 
 [Author:] Sebastian Huetter, (Otto-von-Guericke University Magdeburg)
 based on the Fortran version of Greg Wagner (Northwestern U) while at
 Sandia.
 
 [Install or un-install:]
   
 make yes-user-meamc
 make machine :pre
  
 make no-user-meamc
 make machine :pre
  
 [Supporting info:]
 
 src/USER-MEAMC: filenames -> commands
 src/USER-MEAMC/README
 "pair meam/c"_pair_meam.html
 examples/meam :ul
 
 :line
 
 USER-MOLFILE package :link(USER-MOLFILE),h4
 
 [Contents:]
 
 A "dump molfile"_dump_molfile.html command which uses molfile plugins
 that are bundled with the "VMD"_vmd_home
 molecular visualization and analysis program, to enable LAMMPS to dump
 snapshots in formats compatible with various molecular simulation
 tools.
 
 :link(vmd_home,http://www.ks.uiuc.edu/Research/vmd)
 
 To use this package you must have the desired VMD plugins available on
 your system.
 
 Note that this package only provides the interface code, not the
 plugins themselves, which will be accessed when requesting a specific
 plugin via the "dump molfile"_dump_molfile.html command.  Plugins can
 be obtained from a VMD installation which has to match the platform
 that you are using to compile LAMMPS for. By adding plugins to VMD,
 support for new file formats can be added to LAMMPS (or VMD or other
 programs that use them) without having to recompile the application
 itself.  More information about the VMD molfile plugins can be found
 at
 "http://www.ks.uiuc.edu/Research/vmd/plugins/molfile"_http://www.ks.uiuc.edu/Research/vmd/plugins/molfile.
 
 [Author:] Axel Kohlmeyer (Temple U).
 
 [Install or un-install:]
 
 Note that the lib/molfile/Makefile.lammps file has a setting for a
 dynamic loading library libdl.a that should is typically present on
 all systems, which is required for LAMMPS to link with this package.
 If the setting is not valid for your system, you will need to edit the
 Makefile.lammps file.  See lib/molfile/README and
 lib/molfile/Makefile.lammps for details.
 
 make yes-user-molfile
 make machine :pre
 
 make no-user-molfile
 make machine :pre
 
 [Supporting info:]
 
 src/USER-MOLFILE: filenames -> commands
 src/USER-MOLFILE/README
 lib/molfile/README
 "dump molfile"_dump_molfile.html :ul
 
 :line
 
 USER-NETCDF package :link(USER-NETCDF),h4
 
 [Contents:]
 
 Dump styles for writing NetCDF formatted dump files.  NetCDF is a
 portable, binary, self-describing file format developed on top of
 HDF5. The file contents follow the AMBER NetCDF trajectory conventions
 (http://ambermd.org/netcdf/nctraj.xhtml), but include extensions.
 
 To use this package you must have the NetCDF library available on your
 system.
 
 Note that NetCDF files can be directly visualized with the following
 tools:
 
 "Ovito"_ovito (Ovito supports the AMBER convention and the extensions mentioned above)
 "VMD"_vmd_home
 "AtomEye"_atomeye (the libAtoms version of AtomEye contains a NetCDF reader not present in the standard distribution) :ul
 
 :link(ovito,http://www.ovito.org)
 :link(atomeye,http://www.libatoms.org)
 
 [Author:] Lars Pastewka (Karlsruhe Institute of Technology).
 
 [Install or un-install:]
 
 Note that to follow these steps, you need the standard NetCDF software
 package installed on your system.  The lib/netcdf/Makefile.lammps file
 has settings for NetCDF include and library files that LAMMPS needs to
 compile and linkk with this package.  If the settings are not valid
 for your system, you will need to edit the Makefile.lammps file.  See
 lib/netcdf/README for details.
 
 make yes-user-netcdf
 make machine :pre
 
 make no-user-netcdf
 make machine :pre
 
 [Supporting info:]
 
 src/USER-NETCDF: filenames -> commands
 src/USER-NETCDF/README
 lib/netcdf/README
 "dump netcdf"_dump_netcdf.html :ul
 
 :line
 
 USER-OMP package :link(USER-OMP),h4
 
 [Contents:]
 
 Hundreds of pair, fix, compute, bond, angle, dihedral, improper, and
 kspace styles which are altered to enable threading on many-core CPUs
 via OpenMP directives.  All of them have an "omp" in their style name.
 "Section 5.3.4"_accelerate_omp.html gives details of what hardware and
 compilers are required on your system, and how to build and use this
 package.  Its styles can be invoked at run time via the "-sf omp" or
 "-suffix omp" "command-line switches"_Section_start.html#start_6.
 Also see the "KOKKOS"_#KOKKOS, "OPT"_#OPT, and
 "USER-INTEL"_#USER-INTEL packages, which have styles optimized for
 CPUs.
 
 [Author:] Axel Kohlmeyer (Temple U).
 
 NOTE: The compile flags "-restrict" and "-fopenmp" must be used to
 build LAMMPS with the USER-OMP package, as well as the link flag
 "-fopenmp".  They should be added to the CCFLAGS and LINKFLAGS lines
 of your Makefile.machine.  See src/MAKE/OPTIONS/Makefile.omp for an
 example.
 
 Once you have an appropriate Makefile.machine, you can
 install/un-install the package and build LAMMPS in the usual manner:
 
 [Install or un-install:]
 
 make yes-user-omp
 make machine :pre
 
 make no-user-omp
 make machine :pre
 
 CCFLAGS: add -fopenmp and -restrict
 LINKFLAGS: add -fopenmp :ul
 
 [Supporting info:]
 
 src/USER-OMP: filenames -> commands
 src/USER-OMP/README
 "Section 5.3"_Section_accelerate.html#acc_3
 "Section 5.3.4"_accelerate_omp.html
 "Section 2.6 -sf omp"_Section_start.html#start_6
 "Section 2.6 -pk omp"_Section_start.html#start_6
 "package omp"_package.html
 Styles sections of "Section 3.5"_Section_commands.html#cmd_5 for styles followed by (o)
 "Benchmarks page"_http://lammps.sandia.gov/bench.html of web site :ul
 
 :line
 
 USER-PHONON package :link(USER-PHONON),h4
 
 [Contents:]
 
 A "fix phonon"_fix_phonon.html command that calculates dynamical
 matrices, which can then be used to compute phonon dispersion
 relations, directly from molecular dynamics simulations.
 
 [Author:] Ling-Ti Kong (Shanghai Jiao Tong University).
 
 [Install or un-install:]
 
 make yes-user-phonon
 make machine :pre
 
 make no-user-phonon
 make machine :pre
 
 [Supporting info:]
 
 src/USER-PHONON: filenames -> commands
 src/USER-PHONON/README
 "fix phonon"_fix_phonon.html
 examples/USER/phonon :ul
 
 :line
 
 USER-QMMM package :link(USER-QMMM),h4
 
 [Contents:]
 
 A "fix qmmm"_fix_qmmm.html command which allows LAMMPS to be used in a
 QM/MM simulation, currently only in combination with the "Quantum
 ESPRESSO"_espresso package.
 
 :link(espresso,http://www.quantum-espresso.org)
 
 To use this package you must have Quantum ESPRESSO available on your
 system.
 
 The current implementation only supports an ONIOM style mechanical
 coupling to the Quantum ESPRESSO plane wave DFT package.
 Electrostatic coupling is in preparation and the interface has been
 written in a manner that coupling to other QM codes should be possible
 without changes to LAMMPS itself.
 
 [Author:] Axel Kohlmeyer (Temple U).
 
 [Install or un-install:]
 
 Before building LAMMPS with this package, you must first build the
 QMMM library in lib/qmmm.  You can do this manually if you prefer;
 follow the first two steps explained in lib/colvars/README.  You can
 also do it in one step from the lammps/src dir, using a command like
 these, which simply invoke the lib/colvars/Install.py script with the
 specified args:
 
 make lib-qmmm                      # print help message
 make lib-qmmm args="-m gfortran"   # build with GNU Fortran compiler :pre
 
 The build should produce two files: lib/qmmm/libqmmm.a and
 lib/qmmm/Makefile.lammps.  The latter is copied from an existing
 Makefile.lammps.* and has settings needed to build LAMMPS with the
 QMMM library (though typically the settings are just blank).  If
 necessary, you can edit/create a new lib/qmmm/Makefile.machine file
 for your system, which should define an EXTRAMAKE variable to specify
 a corresponding Makefile.lammps.machine file.
 
 You can then install/un-install the package and build LAMMPS in the
 usual manner:
 
 make yes-user-qmmm
 make machine :pre
 
 make no-user-qmmm
 make machine :pre
 
 NOTE: The LAMMPS executable these steps produce is not yet functional
 for a QM/MM simulation.  You must also build Quantum ESPRESSO and
 create a new executable which links LAMMPS and Quanutm ESPRESSO
 together.  These are steps 3 and 4 described in the lib/qmmm/README
 file.
 
 [Supporting info:]
 
 src/USER-QMMM: filenames -> commands
 src/USER-QMMM/README
 lib/qmmm/README
 "fix phonon"_fix_phonon.html
 lib/qmmm/example-ec/README
 lib/qmmm/example-mc/README :ul
 
 :line
 
 USER-QTB package :link(USER-QTB),h4
 
 [Contents:]
 
 Two fixes which provide a self-consistent quantum treatment of
 vibrational modes in a classical molecular dynamics simulation.  By
 coupling the MD simulation to a colored thermostat, it introduces zero
 point energy into the system, altering the energy power spectrum and
 the heat capacity to account for their quantum nature. This is useful
 when modeling systems at temperatures lower than their classical
 limits or when temperatures ramp across the classical limits in a
 simulation.
 
 [Author:] Yuan Shen (Stanford U).
 
 [Install or un-install:]
 
 make yes-user-qtb
 make machine :pre
 
 make no-user-qtb
 make machine :pre
 
 [Supporting info:]
 
 src/USER-QTB: filenames -> commands
 src/USER-QTB/README
 "fix qtb"_fix_qtb.html
 "fix qbmsst"_fix_qbmsst.html
 examples/USER/qtb :ul
 
 :line
 
 USER-QUIP package :link(USER-QUIP),h4
 
 [Contents:]
 
 A "pair_style quip"_pair_quip.html command which wraps the "QUIP
 libAtoms library"_quip, which includes a variety of interatomic
 potentials, including Gaussian Approximation Potential (GAP) models
 developed by the Cambridge University group.
 
 :link(quip,https://github.com/libAtoms/QUIP)
 
 To use this package you must have the QUIP libAatoms library available
 on your system.
 
 [Author:] Albert Bartok (Cambridge University)
 
 [Install or un-install:]
 
 Note that to follow these steps to compile and link to the QUIP
 library, you must first download and build QUIP on your systems.  It
 can be obtained from GitHub.  See step 1 and step 1.1 in the
 lib/quip/README file for details on how to do this.  Note that it
 requires setting two environment variables, QUIP_ROOT and QUIP_ARCH,
 which will be accessed by the lib/quip/Makefile.lammps file which is
 used when you compile and link LAMMPS with this package.  You should
 only need to edit this file if the LAMMPS build can not use its
 settings to successfully build on your system.
 
 You can then install/un-install the package and build LAMMPS in the
 usual manner:
 
 make yes-user-quip
 make machine :pre
 
 make no-user-quip
 make machine :pre
 
 [Supporting info:]
 
 src/USER-QUIP: filenames -> commands
 src/USER-QUIP/README
 "pair_style quip"_pair_quip.html
 examples/USER/quip :ul
 
 :line
 
 USER-REAXC package :link(USER-REAXC),h4
 
 [Contents:]
 
 A pair style which implements the ReaxFF potential in C/C++ (in
 contrast to the "REAX package"_#REAX and its Fortran library).  ReaxFF
 is universal reactive force field.  See the src/USER-REAXC/README file
 for more info on differences between the two packages.  Also two fixes
 for monitoring molecules as bonds are created and destroyed.
 
 [Author:] Hasan Metin Aktulga (MSU) while at Purdue University.
 
 [Install or un-install:]
 
 make yes-user-reaxc
 make machine :pre
 
 make no-user-reaxc
 make machine :pre
 
 [Supporting info:]
 
 src/USER-REAXC: filenames -> commands
 src/USER-REAXC/README
 "pair_style reax/c"_pair_reaxc.html
 "fix reax/c/bonds"_fix_reax_bonds.html
 "fix reax/c/species"_fix_reaxc_species.html
 examples/reax :ul
 
 :line
 
 USER-SMD package :link(USER-SMD),h4
 
 [Contents:]
 
 An atom style, fixes, computes, and several pair styles which
 implements smoothed Mach dynamics (SMD) for solids, which is a model
 related to smoothed particle hydrodynamics (SPH) for liquids (see the
 "USER-SPH package"_#USER-SPH).
 
 This package solves solids mechanics problems via a state of the art
 stabilized meshless method with hourglass control.  It can specify
 hydrostatic interactions independently from material strength models,
 i.e. pressure and deviatoric stresses are separated.  It provides many
 material models (Johnson-Cook, plasticity with hardening,
 Mie-Grueneisen, Polynomial EOS) and allows new material models to be
 added.  It implements rigid boundary conditions (walls) which can be
 specified as surface geometries from *.STL files.
 
 [Author:] Georg Ganzenmuller (Fraunhofer-Institute for High-Speed
 Dynamics, Ernst Mach Institute, Germany).
 
 [Install or un-install:]
 
 Before building LAMMPS with this package, you must first download the
 Eigen library.  Eigen is a template library, so you do not need to
 build it, just download it.  You can do this manually if you prefer;
 follow the instructions in lib/smd/README.  You can also do it in one
 step from the lammps/src dir, using a command like these, which simply
 invoke the lib/smd/Install.py script with the specified args:
 
 make lib-smd                            # print help message
-make lib-smd args="-g -l"               # download in default lib/smd/eigen-eigen-*
-make lib-smd args="-h . eigen -g -l"    # download in lib/smd/eigen
+make lib-smd args="-g -l"               # download and build in default lib/smd/eigen-eigen-*
+make lib-smd args="-h . eigen -g -l"    # download and build in lib/smd/eigen
 make lib-smd args="-h ~ eigen -g -l"    # download and build in ~/eigen :pre
 
 Note that the final -l switch is to create a symbolic (soft) link
 named "includelink" in lib/smd to point to the Eigen dir.  When LAMMPS
 builds it will use this link.  You should not need to edit the
 lib/smd/Makefile.lammps file.
 
 You can then install/un-install the package and build LAMMPS in the
 usual manner:
 
 make yes-user-smd
 make machine :pre
 
 make no-user-smd
 make machine :pre
 
 [Supporting info:]
 
 src/USER-SMD: filenames -> commands
 src/USER-SMD/README
 doc/PDF/SMD_LAMMPS_userguide.pdf
 examples/USER/smd
 http://lammps.sandia.gov/movies.html#smd :ul
 
 :line
 
 USER-SMTBQ package :link(USER-SMTBQ),h4
 
 [Contents:]
 
 A pair style which implements a Second Moment Tight Binding model with
 QEq charge equilibration (SMTBQ) potential for the description of
 ionocovalent bonds in oxides.
 
 [Authors:] Nicolas Salles, Emile Maras, Olivier Politano, and Robert
 Tetot (LAAS-CNRS, France).
 
 [Install or un-install:]
 
 make yes-user-smtbq
 make machine :pre
 
 make no-user-smtbq
 make machine :pre
 
 [Supporting info:]
 
 src/USER-SMTBQ: filenames -> commands
 src/USER-SMTBQ/README
 "pair_style smtbq"_pair_smtbq.html
 examples/USER/smtbq :ul
 
 :line
 
 USER-SPH package :link(USER-SPH),h4
 
 [Contents:]
 
 An atom style, fixes, computes, and several pair styles which
 implements smoothed particle hydrodynamics (SPH) for liquids.  See the
 related "USER-SMD package"_#USER-SMD package for smooth Mach dynamics
 (SMD) for solids.
 
 This package contains ideal gas, Lennard-Jones equation of states,
 Tait, and full support for complete (i.e. internal-energy dependent)
 equations of state.  It allows for plain or Monaghans XSPH integration
 of the equations of motion.  It has options for density continuity or
 density summation to propagate the density field.  It has
 "set"_set.html command options to set the internal energy and density
 of particles from the input script and allows the same quantities to
 be output with thermodynamic output or to dump files via the "compute
 property/atom"_compute_property_atom.html command.
 
 [Author:] Georg Ganzenmuller (Fraunhofer-Institute for High-Speed
 Dynamics, Ernst Mach Institute, Germany).
 
 [Install or un-install:]
 
 make yes-user-sph
 make machine :pre
 
 make no-user-sph
 make machine :pre
 
 [Supporting info:]
 
 src/USER-SPH: filenames -> commands
 src/USER-SPH/README
 doc/PDF/SPH_LAMMPS_userguide.pdf
 examples/USER/sph
 http://lammps.sandia.gov/movies.html#sph :ul
 
 :line
 
 USER-TALLY package :link(USER-TALLY),h4
 
 [Contents:]
 
 Several compute styles that can be called when pairwise interactions
 are calculated to tally information (forces, heat flux, energy,
 stress, etc) about individual interactions.
 
 [Author:] Axel Kohlmeyer (Temple U).
 
 [Install or un-install:]
 
 make yes-user-tally
 make machine :pre
 
 make no-user-tally
 make machine :pre
 
 [Supporting info:]
 
 src/USER-TALLY: filenames -> commands
 src/USER-TALLY/README
 "compute */tally"_compute_tally.html
 examples/USER/tally :ul
 
 :line
 
 USER-VTK package :link(USER-VTK),h4
 
 [Contents:]
 
 A "dump vtk"_dump_vtk.html command which outputs
 snapshot info in the "VTK format"_vtk, enabling visualization by
 "Paraview"_paraview or other visuzlization packages.
 
 :link(vtk,http://www.vtk.org)
 :link(paraview,http://www.paraview.org)
 
 To use this package you must have VTK library available on your
 system.
 
 [Authors:] Richard Berger (JKU) and Daniel Queteschiner (DCS Computing).
 
 [Install or un-install:]
 
 The lib/vtk/Makefile.lammps file has settings for accessing VTK files
 and its library, which are required for LAMMPS to build and link with
 this package.  If the settings are not valid for your system, check if
 one of the other lib/vtk/Makefile.lammps.* files is compatible and
 copy it to Makefile.lammps.  If none of the provided files work, you
 will need to edit the Makefile.lammps file.
 
 You can then install/un-install the package and build LAMMPS in the
 usual manner:
 
 make yes-user-vtk
 make machine :pre
 
 make no-user-vtk
 make machine :pre
 
 [Supporting info:]
 
 src/USER-VTK: filenames -> commands
 src/USER-VTK/README
 lib/vtk/README
 "dump vtk"_dump_vtk.html :ul
diff --git a/doc/src/accelerate_gpu.txt b/doc/src/accelerate_gpu.txt
index 68e9fa477..2723b6e97 100644
--- a/doc/src/accelerate_gpu.txt
+++ b/doc/src/accelerate_gpu.txt
@@ -1,254 +1,249 @@
 "Previous Section"_Section_packages.html - "LAMMPS WWW Site"_lws -
 "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c
 
 :link(lws,http://lammps.sandia.gov)
 :link(ld,Manual.html)
 :link(lc,Section_commands.html#comm)
 
 :line
 
 "Return to Section accelerate overview"_Section_accelerate.html
 
 5.3.1 GPU package :h5
 
 The GPU package was developed by Mike Brown at ORNL and his
 collaborators, particularly Trung Nguyen (ORNL).  It provides GPU
 versions of many pair styles, including the 3-body Stillinger-Weber
 pair style, and for "kspace_style pppm"_kspace_style.html for
 long-range Coulombics.  It has the following general features:
 
 It is designed to exploit common GPU hardware configurations where one
 or more GPUs are coupled to many cores of one or more multi-core CPUs,
 e.g. within a node of a parallel machine. :ulb,l
 
 Atom-based data (e.g. coordinates, forces) moves back-and-forth
 between the CPU(s) and GPU every timestep. :l
 
 Neighbor lists can be built on the CPU or on the GPU :l
 
 The charge assignment and force interpolation portions of PPPM can be
 run on the GPU.  The FFT portion, which requires MPI communication
 between processors, runs on the CPU. :l
 
 Asynchronous force computations can be performed simultaneously on the
 CPU(s) and GPU. :l
 
 It allows for GPU computations to be performed in single or double
 precision, or in mixed-mode precision, where pairwise forces are
 computed in single precision, but accumulated into double-precision
 force vectors. :l
 
 LAMMPS-specific code is in the GPU package.  It makes calls to a
 generic GPU library in the lib/gpu directory.  This library provides
 NVIDIA support as well as more general OpenCL support, so that the
 same functionality can eventually be supported on a variety of GPU
 hardware. :l
 :ule
 
 Here is a quick overview of how to enable and use the GPU package:
 
 build the library in lib/gpu for your GPU hardware with the desired precision settings
 install the GPU package and build LAMMPS as usual
 use the mpirun command to set the number of MPI tasks/node which determines the number of MPI tasks/GPU
 specify the # of GPUs per node
 use GPU styles in your input script :ul
 
 The latter two steps can be done using the "-pk gpu" and "-sf gpu"
 "command-line switches"_Section_start.html#start_6 respectively.  Or
 the effect of the "-pk" or "-sf" switches can be duplicated by adding
 the "package gpu"_package.html or "suffix gpu"_suffix.html commands
 respectively to your input script.
 
 [Required hardware/software:]
 
 To use this package, you currently need to have an NVIDIA GPU and
 install the NVIDIA Cuda software on your system:
 
 Check if you have an NVIDIA GPU: cat /proc/driver/nvidia/gpus/0/information
 Go to http://www.nvidia.com/object/cuda_get.html
 Install a driver and toolkit appropriate for your system (SDK is not necessary)
 Run lammps/lib/gpu/nvc_get_devices (after building the GPU library, see below) to list supported devices and properties :ul
 
 [Building LAMMPS with the GPU package:]
 
 This requires two steps (a,b): build the GPU library, then build
 LAMMPS with the GPU package.
 
-You can do both these steps in one line, using the src/Make.py script,
-described in "Section 4"_Section_packages.html of the manual.
-Type "Make.py -h" for help.  If run from the src directory, this
-command will create src/lmp_gpu using src/MAKE/Makefile.mpi as the
-starting Makefile.machine:
-
-Make.py -p gpu -gpu mode=single arch=31 -o gpu -a lib-gpu file mpi :pre
+You can do both these steps in one line as described in
+"Section 4"_Section_packages.html of the manual.
 
 Or you can follow these two (a,b) steps:
 
 (a) Build the GPU library
 
 The GPU library is in lammps/lib/gpu.  Select a Makefile.machine (in
 lib/gpu) appropriate for your system.  You should pay special
 attention to 3 settings in this makefile.
 
 CUDA_HOME = needs to be where NVIDIA Cuda software is installed on your system
 CUDA_ARCH = needs to be appropriate to your GPUs
 CUDA_PREC = precision (double, mixed, single) you desire :ul
 
 See lib/gpu/Makefile.linux.double for examples of the ARCH settings
 for different GPU choices, e.g. Fermi vs Kepler.  It also lists the
 possible precision settings:
 
 CUDA_PREC = -D_SINGLE_SINGLE  # single precision for all calculations
 CUDA_PREC = -D_DOUBLE_DOUBLE  # double precision for all calculations
 CUDA_PREC = -D_SINGLE_DOUBLE  # accumulation of forces, etc, in double :pre
 
 The last setting is the mixed mode referred to above.  Note that your
 GPU must support double precision to use either the 2nd or 3rd of
 these settings.
 
 To build the library, type:
 
 make -f Makefile.machine :pre
 
 If successful, it will produce the files libgpu.a and Makefile.lammps.
 
 The latter file has 3 settings that need to be appropriate for the
 paths and settings for the CUDA system software on your machine.
 Makefile.lammps is a copy of the file specified by the EXTRAMAKE
 setting in Makefile.machine.  You can change EXTRAMAKE or create your
 own Makefile.lammps.machine if needed.
 
 Note that to change the precision of the GPU library, you need to
 re-build the entire library.  Do a "clean" first, e.g. "make -f
 Makefile.linux clean", followed by the make command above.
 
 (b) Build LAMMPS with the GPU package
 
 cd lammps/src
 make yes-gpu
 make machine :pre
 
 No additional compile/link flags are needed in Makefile.machine.
 
 Note that if you change the GPU library precision (discussed above)
 and rebuild the GPU library, then you also need to re-install the GPU
 package and re-build LAMMPS, so that all affected files are
 re-compiled and linked to the new GPU library.
 
 [Run with the GPU package from the command line:]
 
 The mpirun or mpiexec command sets the total number of MPI tasks used
 by LAMMPS (one or multiple per compute node) and the number of MPI
 tasks used per node.  E.g. the mpirun command in MPICH does this via
 its -np and -ppn switches.  Ditto for OpenMPI via -np and -npernode.
 
 When using the GPU package, you cannot assign more than one GPU to a
 single MPI task.  However multiple MPI tasks can share the same GPU,
 and in many cases it will be more efficient to run this way.  Likewise
 it may be more efficient to use less MPI tasks/node than the available
 # of CPU cores.  Assignment of multiple MPI tasks to a GPU will happen
 automatically if you create more MPI tasks/node than there are
 GPUs/mode.  E.g. with 8 MPI tasks/node and 2 GPUs, each GPU will be
 shared by 4 MPI tasks.
 
 Use the "-sf gpu" "command-line switch"_Section_start.html#start_6,
 which will automatically append "gpu" to styles that support it.  Use
 the "-pk gpu Ng" "command-line switch"_Section_start.html#start_6 to
 set Ng = # of GPUs/node to use.
 
 lmp_machine -sf gpu -pk gpu 1 -in in.script                         # 1 MPI task uses 1 GPU
 mpirun -np 12 lmp_machine -sf gpu -pk gpu 2 -in in.script           # 12 MPI tasks share 2 GPUs on a single 16-core (or whatever) node
 mpirun -np 48 -ppn 12 lmp_machine -sf gpu -pk gpu 2 -in in.script   # ditto on 4 16-core nodes :pre
 
 Note that if the "-sf gpu" switch is used, it also issues a default
 "package gpu 1"_package.html command, which sets the number of
 GPUs/node to 1.
 
 Using the "-pk" switch explicitly allows for setting of the number of
 GPUs/node to use and additional options.  Its syntax is the same as
 same as the "package gpu" command.  See the "package"_package.html
 command doc page for details, including the default values used for
 all its options if it is not specified.
 
 Note that the default for the "package gpu"_package.html command is to
 set the Newton flag to "off" pairwise interactions.  It does not
 affect the setting for bonded interactions (LAMMPS default is "on").
 The "off" setting for pairwise interaction is currently required for
 GPU package pair styles.
 
 [Or run with the GPU package by editing an input script:]
 
 The discussion above for the mpirun/mpiexec command, MPI tasks/node,
 and use of multiple MPI tasks/GPU is the same.
 
 Use the "suffix gpu"_suffix.html command, or you can explicitly add an
 "gpu" suffix to individual styles in your input script, e.g.
 
 pair_style lj/cut/gpu 2.5 :pre
 
 You must also use the "package gpu"_package.html command to enable the
 GPU package, unless the "-sf gpu" or "-pk gpu" "command-line
 switches"_Section_start.html#start_6 were used.  It specifies the
 number of GPUs/node to use, as well as other options.
 
 [Speed-ups to expect:]
 
 The performance of a GPU versus a multi-core CPU is a function of your
 hardware, which pair style is used, the number of atoms/GPU, and the
 precision used on the GPU (double, single, mixed).
 
 See the "Benchmark page"_http://lammps.sandia.gov/bench.html of the
 LAMMPS web site for performance of the GPU package on various
 hardware, including the Titan HPC platform at ORNL.
 
 You should also experiment with how many MPI tasks per GPU to use to
 give the best performance for your problem and machine.  This is also
 a function of the problem size and the pair style being using.
 Likewise, you should experiment with the precision setting for the GPU
 library to see if single or mixed precision will give accurate
 results, since they will typically be faster.
 
 [Guidelines for best performance:]
 
 Using multiple MPI tasks per GPU will often give the best performance,
 as allowed my most multi-core CPU/GPU configurations. :ulb,l
 
 If the number of particles per MPI task is small (e.g. 100s of
 particles), it can be more efficient to run with fewer MPI tasks per
 GPU, even if you do not use all the cores on the compute node. :l
 
 The "package gpu"_package.html command has several options for tuning
 performance.  Neighbor lists can be built on the GPU or CPU.  Force
 calculations can be dynamically balanced across the CPU cores and
 GPUs.  GPU-specific settings can be made which can be optimized
 for different hardware.  See the "packakge"_package.html command
 doc page for details. :l
 
 As described by the "package gpu"_package.html command, GPU
 accelerated pair styles can perform computations asynchronously with
 CPU computations. The "Pair" time reported by LAMMPS will be the
 maximum of the time required to complete the CPU pair style
 computations and the time required to complete the GPU pair style
 computations. Any time spent for GPU-enabled pair styles for
 computations that run simultaneously with "bond"_bond_style.html,
 "angle"_angle_style.html, "dihedral"_dihedral_style.html,
 "improper"_improper_style.html, and "long-range"_kspace_style.html
 calculations will not be included in the "Pair" time. :l
 
 When the {mode} setting for the package gpu command is force/neigh,
 the time for neighbor list calculations on the GPU will be added into
 the "Pair" time, not the "Neigh" time.  An additional breakdown of the
 times required for various tasks on the GPU (data copy, neighbor
 calculations, force computations, etc) are output only with the LAMMPS
 screen output (not in the log file) at the end of each run.  These
 timings represent total time spent on the GPU for each routine,
 regardless of asynchronous CPU calculations. :l
 
 The output section "GPU Time Info (average)" reports "Max Mem / Proc".
 This is the maximum memory used at one time on the GPU for data
 storage by a single MPI process. :l
 :ule
 
 [Restrictions:]
 
 None.
diff --git a/doc/src/accelerate_intel.txt b/doc/src/accelerate_intel.txt
index 74ae9d9a4..9eb295e0d 100644
--- a/doc/src/accelerate_intel.txt
+++ b/doc/src/accelerate_intel.txt
@@ -1,517 +1,514 @@
 "Previous Section"_Section_packages.html - "LAMMPS WWW Site"_lws -
 "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c
 
 :link(lws,http://lammps.sandia.gov)
 :link(ld,Manual.html)
 :link(lc,Section_commands.html#comm)
 
 :line
 
 "Return to Section accelerate overview"_Section_accelerate.html
 
 5.3.2 USER-INTEL package :h5
 
 The USER-INTEL package is maintained by Mike Brown at Intel
 Corporation.  It provides two methods for accelerating simulations,
 depending on the hardware you have.  The first is acceleration on
 Intel CPUs by running in single, mixed, or double precision with
 vectorization.  The second is acceleration on Intel Xeon Phi
 coprocessors via offloading neighbor list and non-bonded force
 calculations to the Phi.  The same C++ code is used in both cases.
 When offloading to a coprocessor from a CPU, the same routine is run
 twice, once on the CPU and once with an offload flag. This allows
 LAMMPS to run on the CPU cores and coprocessor cores simultaneously.
 
 [Currently Available USER-INTEL Styles:]
 
 Angle Styles: charmm, harmonic :ulb,l
 Bond Styles: fene, harmonic :l
 Dihedral Styles: charmm, harmonic, opls :l
 Fixes: nve, npt, nvt, nvt/sllod :l
 Improper Styles: cvff, harmonic :l
 Pair Styles: buck/coul/cut, buck/coul/long, buck, eam, gayberne,
 charmm/coul/long, lj/cut, lj/cut/coul/long, lj/long/coul/long, sw, tersoff :l
 K-Space Styles: pppm, pppm/disp :l
 :ule
 
 [Speed-ups to expect:]
 
 The speedups will depend on your simulation, the hardware, which
 styles are used, the number of atoms, and the floating-point
 precision mode. Performance improvements are shown compared to
 LAMMPS {without using other acceleration packages} as these are
 under active development (and subject to performance changes). The
 measurements were performed using the input files available in
 the src/USER-INTEL/TEST directory with the provided run script.
 These are scalable in size; the results given are with 512K
 particles (524K for Liquid Crystal). Most of the simulations are
 standard LAMMPS benchmarks (indicated by the filename extension in
 parenthesis) with modifications to the run length and to add a
 warmup run (for use with offload benchmarks).
 
 :c,image(JPG/user_intel.png)
 
 Results are speedups obtained on Intel Xeon E5-2697v4 processors
 (code-named Broadwell) and Intel Xeon Phi 7250 processors
 (code-named Knights Landing) with "June 2017" LAMMPS built with
 Intel Parallel Studio 2017 update 2. Results are with 1 MPI task
 per physical core. See {src/USER-INTEL/TEST/README} for the raw
 simulation rates and instructions to reproduce.
 
 :line
 
 [Accuracy and order of operations:]
 
 In most molecular dynamics software, parallelization parameters
 (# of MPI, OpenMP, and vectorization) can change the results due
 to changing the order of operations with finite-precision
 calculations. The USER-INTEL package is deterministic. This means
 that the results should be reproducible from run to run with the
 {same} parallel configurations and when using determinstic
 libraries or library settings (MPI, OpenMP, FFT). However, there
 are differences in the USER-INTEL package that can change the
 order of operations compared to LAMMPS without acceleration:
 
 Neighbor lists can be created in a different order :ulb,l
 Bins used for sorting atoms can be oriented differently :l
 The default stencil order for PPPM is 7. By default, LAMMPS will
 calculate other PPPM parameters to fit the desired acuracy with
 this order :l
 The {newton} setting applies to all atoms, not just atoms shared
 between MPI tasks :l
 Vectorization can change the order for adding pairwise forces :l
 :ule
 
 The precision mode (described below) used with the USER-INTEL
 package can change the {accuracy} of the calculations. For the
 default {mixed} precision option, calculations between pairs or
 triplets of atoms are performed in single precision, intended to
 be within the inherent error of MD simulations. All accumulation
 is performed in double precision to prevent the error from growing
 with the number of atoms in the simulation. {Single} precision
 mode should not be used without appropriate validation.
 
 :line
 
 [Quick Start for Experienced Users:]
 
 LAMMPS should be built with the USER-INTEL package installed.
 Simulations should be run with 1 MPI task per physical {core},
 not {hardware thread}.
 
 Edit src/MAKE/OPTIONS/Makefile.intel_cpu_intelmpi as necessary. :ulb,l
 Set the environment variable KMP_BLOCKTIME=0 :l
 "-pk intel 0 omp $t -sf intel" added to LAMMPS command-line :l
 $t should be 2 for Intel Xeon CPUs and 2 or 4 for Intel Xeon Phi :l
 For some of the simple 2-body potentials without long-range
 electrostatics, performance and scalability can be better with
 the "newton off" setting added to the input script :l
 For simulations on higher node counts, add "processors * * * grid 
 numa" to the beginning of the input script for better scalability :l
 If using {kspace_style pppm} in the input script, add
 "kspace_modify diff ad" for better performance :l
 :ule
 
 For Intel Xeon Phi CPUs:
 
 Runs should be performed using MCDRAM. :ulb,l
 :ule
 
 For simulations using {kspace_style pppm} on Intel CPUs
 supporting AVX-512:
 
 Add "kspace_modify diff ad" to the input script :ulb,l
 The command-line option should be changed to
 "-pk intel 0 omp $r lrt yes -sf intel" where $r is the number of
 threads minus 1. :l
 Do not use thread affinity (set KMP_AFFINITY=none) :l
 The "newton off" setting may provide better scalability :l
 :ule
 
 For Intel Xeon Phi coprocessors (Offload):
 
 Edit src/MAKE/OPTIONS/Makefile.intel_coprocessor as necessary :ulb,l
 "-pk intel N omp 1" added to command-line where N is the number of
 coprocessors per node. :l
 :ule
 
 :line
 
 [Required hardware/software:]
 
 In order to use offload to coprocessors, an Intel Xeon Phi
 coprocessor and an Intel compiler are required. For this, the
 recommended version of the Intel compiler is 14.0.1.106 or
 versions 15.0.2.044 and higher.
 
 Although any compiler can be used with the USER-INTEL package,
 currently, vectorization directives are disabled by default when
 not using Intel compilers due to lack of standard support and
 observations of decreased performance. The OpenMP standard now
 supports directives for vectorization and we plan to transition the
 code to this standard once it is available in most compilers. We
 expect this to allow improved performance and support with other
 compilers.
 
 For Intel Xeon Phi x200 series processors (code-named Knights
 Landing), there are multiple configuration options for the hardware.
 For best performance, we recommend that the MCDRAM is configured in
 "Flat" mode and with the cluster mode set to "Quadrant" or "SNC4".
 "Cache" mode can also be used, although the performance might be
 slightly lower.
 
 [Notes about Simultaneous Multithreading:]
 
 Modern CPUs often support Simultaneous Multithreading (SMT). On
 Intel processors, this is called Hyper-Threading (HT) technology.
 SMT is hardware support for running multiple threads efficiently on
 a single core. {Hardware threads} or {logical cores} are often used
 to refer to the number of threads that are supported in hardware.
 For example, the Intel Xeon E5-2697v4 processor is described
 as having 36 cores and 72 threads. This means that 36 MPI processes
 or OpenMP threads can run simultaneously on separate cores, but that
 up to 72 MPI processes or OpenMP threads can be running on the CPU
 without costly operating system context switches.
 
 Molecular dynamics simulations will often run faster when making use
 of SMT. If a thread becomes stalled, for example because it is
 waiting on data that has not yet arrived from memory, another thread
 can start running so that the CPU pipeline is still being used
 efficiently. Although benefits can be seen by launching a MPI task
 for every hardware thread, for multinode simulations, we recommend
 that OpenMP threads are used for SMT instead, either with the
 USER-INTEL package, "USER-OMP package"_accelerate_omp.html, or
 "KOKKOS package"_accelerate_kokkos.html. In the example above, up
 to 36X speedups can be observed by using all 36 physical cores with
 LAMMPS. By using all 72 hardware threads, an additional 10-30%
 performance gain can be achieved.
 
 The BIOS on many platforms allows SMT to be disabled, however, we do
 not recommend this on modern processors as there is little to no
 benefit for any software package in most cases. The operating system
 will report every hardware thread as a separate core allowing one to
 determine the number of hardware threads available. On Linux systems,
 this information can normally be obtained with:
 
 cat /proc/cpuinfo :pre
 
 [Building LAMMPS with the USER-INTEL package:]
 
 NOTE: See the src/USER-INTEL/README file for additional flags that
 might be needed for best performance on Intel server processors
 code-named "Skylake".
 
 The USER-INTEL package must be installed into the source directory:
 
 make yes-user-intel :pre
 
 Several example Makefiles for building with the Intel compiler are
 included with LAMMPS in the src/MAKE/OPTIONS/ directory:
 
 Makefile.intel_cpu_intelmpi # Intel Compiler, Intel MPI, No Offload
 Makefile.knl                # Intel Compiler, Intel MPI, No Offload
 Makefile.intel_cpu_mpich    # Intel Compiler, MPICH, No Offload
 Makefile.intel_cpu_openpmi  # Intel Compiler, OpenMPI, No Offload
 Makefile.intel_coprocessor  # Intel Compiler, Intel MPI, Offload :pre
 
 Makefile.knl is identical to Makefile.intel_cpu_intelmpi except that
 it explicitly specifies that vectorization should be for Intel
 Xeon Phi x200 processors making it easier to cross-compile. For
 users with recent installations of Intel Parallel Studio, the
 process can be as simple as:
 
 make yes-user-intel
 source /opt/intel/parallel_studio_xe_2016.3.067/psxevars.sh
 # or psxevars.csh for C-shell
 make intel_cpu_intelmpi :pre
 
-Alternatively, the build can be accomplished with the src/Make.py
-script, described in "Section 4"_Section_packages.html of the
-manual. Type "Make.py -h" for help. For an example:
-
-Make.py -v -p intel omp -intel cpu -a file intel_cpu_intelmpi :pre
+Alternatively this can be done as a single command with
+suitable make command invocations. This is discussed in "Section
+4"_Section_packages.html of the manual.
 
 Note that if you build with support for a Phi coprocessor, the same
 binary can be used on nodes with or without coprocessors installed.
 However, if you do not have coprocessors on your system, building
 without offload support will produce a smaller binary.
 
 The general requirements for Makefiles with the USER-INTEL package
 are as follows. "-DLAMMPS_MEMALIGN=64" is required for CCFLAGS. When
 using Intel compilers, "-restrict" is required and "-qopenmp" is
 highly recommended for CCFLAGS and LINKFLAGS. LIB should include
 "-ltbbmalloc". For builds supporting offload, "-DLMP_INTEL_OFFLOAD"
 is required for CCFLAGS and "-qoffload" is required for LINKFLAGS.
 Other recommended CCFLAG options for best performance are
 "-O2 -fno-alias -ansi-alias -qoverride-limits fp-model fast=2
--no-prec-div". The Make.py command will add all of these
-automatically.
+-no-prec-div".
 
 NOTE: The vectorization and math capabilities can differ depending on
 the CPU. For Intel compilers, the "-x" flag specifies the type of
 processor for which to optimize. "-xHost" specifies that the compiler
 should build for the processor used for compiling. For Intel Xeon Phi
 x200 series processors, this option is "-xMIC-AVX512". For fourth
 generation Intel Xeon (v4/Broadwell) processors, "-xCORE-AVX2" should
 be used. For older Intel Xeon processors, "-xAVX" will perform best
 in general for the different simulations in LAMMPS. The default
 in most of the example Makefiles is to use "-xHost", however this
 should not be used when cross-compiling.
 
 [Running LAMMPS with the USER-INTEL package:]
 
 Running LAMMPS with the USER-INTEL package is similar to normal use
 with the exceptions that one should 1) specify that LAMMPS should use
 the USER-INTEL package, 2) specify the number of OpenMP threads, and
 3) optionally specify the specific LAMMPS styles that should use the
 USER-INTEL package. 1) and 2) can be performed from the command-line
 or by editing the input script. 3) requires editing the input script.
 Advanced performance tuning options are also described below to get
 the best performance.
 
 When running on a single node (including runs using offload to a
 coprocessor), best performance is normally obtained by using 1 MPI
 task per physical core and additional OpenMP threads with SMT. For
 Intel Xeon processors, 2 OpenMP threads should be used for SMT.
 For Intel Xeon Phi CPUs, 2 or 4 OpenMP threads should be used
 (best choice depends on the simulation). In cases where the user
 specifies that LRT mode is used (described below), 1 or 3 OpenMP
 threads should be used. For multi-node runs, using 1 MPI task per
 physical core will often perform best, however, depending on the
 machine and scale, users might get better performance by decreasing
 the number of MPI tasks and using more OpenMP threads. For
 performance, the product of the number of MPI tasks and OpenMP
 threads should not exceed the number of available hardware threads in
 almost all cases.
 
 NOTE: Setting core affinity is often used to pin MPI tasks and OpenMP
 threads to a core or group of cores so that memory access can be
 uniform. Unless disabled at build time, affinity for MPI tasks and
 OpenMP threads on the host (CPU) will be set by default on the host
 {when using offload to a coprocessor}. In this case, it is unnecessary
 to use other methods to control affinity (e.g. taskset, numactl,
 I_MPI_PIN_DOMAIN, etc.). This can be disabled with the {no_affinity}
 option to the "package intel"_package.html command or by disabling the
 option at build time (by adding -DINTEL_OFFLOAD_NOAFFINITY to the
 CCFLAGS line of your Makefile). Disabling this option is not
 recommended, especially when running on a machine with Intel
 Hyper-Threading technology disabled.
 
 [Run with the USER-INTEL package from the command line:]
 
 To enable USER-INTEL optimizations for all available styles used in
 the input script, the "-sf intel"
 "command-line switch"_Section_start.html#start_6 can be used without
 any requirement for editing the input script. This switch will
 automatically append "intel" to styles that support it. It also
 invokes a default command: "package intel 1"_package.html. This
 package command is used to set options for the USER-INTEL package.
 The default package command will specify that USER-INTEL calculations
 are performed in mixed precision, that the number of OpenMP threads
 is specified by the OMP_NUM_THREADS environment variable, and that
 if coprocessors are present and the binary was built with offload
 support, that 1 coprocessor per node will be used with automatic
 balancing of work between the CPU and the coprocessor.
 
 You can specify different options for the USER-INTEL package by using
 the "-pk intel Nphi" "command-line switch"_Section_start.html#start_6
 with keyword/value pairs as specified in the documentation. Here,
 Nphi = # of Xeon Phi coprocessors/node (ignored without offload
 support). Common options to the USER-INTEL package include {omp} to
 override any OMP_NUM_THREADS setting and specify the number of OpenMP
 threads, {mode} to set the floating-point precision mode, and
 {lrt} to enable Long-Range Thread mode as described below. See the
 "package intel"_package.html command for details, including the
 default values used for all its options if not specified, and how to
 set the number of OpenMP threads via the OMP_NUM_THREADS environment
 variable if desired.
 
 Examples (see documentation for your MPI/Machine for differences in
 launching MPI applications):
 
 mpirun -np 72 -ppn 36 lmp_machine -sf intel -in in.script                                 # 2 nodes, 36 MPI tasks/node, $OMP_NUM_THREADS OpenMP Threads
 mpirun -np 72 -ppn 36 lmp_machine -sf intel -in in.script -pk intel 0 omp 2 mode double   # Don't use any coprocessors that might be available, use 2 OpenMP threads for each task, use double precision :pre
 
 [Or run with the USER-INTEL package by editing an input script:]
 
 As an alternative to adding command-line arguments, the input script
 can be edited to enable the USER-INTEL package. This requires adding
 the "package intel"_package.html command to the top of the input
 script. For the second example above, this would be:
 
 package intel 0 omp 2 mode double :pre
 
 To enable the USER-INTEL package only for individual styles, you can
 add an "intel" suffix to the individual style, e.g.:
 
 pair_style lj/cut/intel 2.5 :pre
 
 Alternatively, the "suffix intel"_suffix.html command can be added to
 the input script to enable USER-INTEL styles for the commands that
 follow in the input script.
 
 [Tuning for Performance:]
 
 NOTE: The USER-INTEL package will perform better with modifications
 to the input script when "PPPM"_kspace_style.html is used:
 "kspace_modify diff ad"_kspace_modify.html should be added to the
 input script.
 
 Long-Range Thread (LRT) mode is an option to the "package
 intel"_package.html command that can improve performance when using
 "PPPM"_kspace_style.html for long-range electrostatics on processors
 with SMT. It generates an extra pthread for each MPI task. The thread
 is dedicated to performing some of the PPPM calculations and MPI
 communications. On Intel Xeon Phi x200 series CPUs, this will likely
 always improve performance, even on a single node. On Intel Xeon
 processors, using this mode might result in better performance when
 using multiple nodes, depending on the machine. To use this mode,
 specify that the number of OpenMP threads is one less than would
 normally be used for the run and add the "lrt yes" option to the "-pk"
 command-line suffix or "package intel" command. For example, if a run
 would normally perform best with "-pk intel 0 omp 4", instead use
 "-pk intel 0 omp 3 lrt yes". When using LRT, you should set the
 environment variable "KMP_AFFINITY=none". LRT mode is not supported
 when using offload.
 
 NOTE: Changing the "newton"_newton.html setting to off can improve
 performance and/or scalability for simple 2-body potentials such as
 lj/cut or when using LRT mode on processors supporting AVX-512.
 
 Not all styles are supported in the USER-INTEL package. You can mix
 the USER-INTEL package with styles from the "OPT"_accelerate_opt.html
 package or the "USER-OMP package"_accelerate_omp.html. Of course,
 this requires that these packages were installed at build time. This
 can performed automatically by using "-sf hybrid intel opt" or
 "-sf hybrid intel omp" command-line options. Alternatively, the "opt"
 and "omp" suffixes can be appended manually in the input script. For
 the latter, the "package omp"_package.html command must be in the
 input script or the "-pk omp Nt" "command-line
 switch"_Section_start.html#start_6 must be used where Nt is the
 number of OpenMP threads. The number of OpenMP threads should not be
 set differently for the different packages. Note that the "suffix
 hybrid intel omp"_suffix.html command can also be used within the
 input script to automatically append the "omp" suffix to styles when
 USER-INTEL styles are not available.
 
 NOTE: For simulations on higher node counts, add "processors * * * 
 grid numa"_processors.html" to the beginning of the input script for
 better scalability.
 
 When running on many nodes, performance might be better when using
 fewer OpenMP threads and more MPI tasks. This will depend on the
 simulation and the machine. Using the "verlet/split"_run_style.html
 run style might also give better performance for simulations with
 "PPPM"_kspace_style.html electrostatics. Note that this is an
 alternative to LRT mode and the two cannot be used together.
 
 Currently, when using Intel MPI with Intel Xeon Phi x200 series
 CPUs, better performance might be obtained by setting the
 environment variable "I_MPI_SHM_LMT=shm" for Linux kernels that do
 not yet have full support for AVX-512. Runs on Intel Xeon Phi x200
 series processors will always perform better using MCDRAM. Please
 consult your system documentation for the best approach to specify
 that MPI runs are performed in MCDRAM.
 
 [Tuning for Offload Performance:]
 
 The default settings for offload should give good performance.
 
 When using LAMMPS with offload to Intel coprocessors, best performance
 will typically be achieved with concurrent calculations performed on
 both the CPU and the coprocessor. This is achieved by offloading only
 a fraction of the neighbor and pair computations to the coprocessor or
 using "hybrid"_pair_hybrid.html pair styles where only one style uses
 the "intel" suffix. For simulations with long-range electrostatics or
 bond, angle, dihedral, improper calculations, computation and data
 transfer to the coprocessor will run concurrently with computations
 and MPI communications for these calculations on the host CPU. This
 is illustrated in the figure below for the rhodopsin protein benchmark
 running on E5-2697v2 processors with a Intel Xeon Phi 7120p
 coprocessor. In this plot, the vertical access is time and routines
 running at the same time are running concurrently on both the host and
 the coprocessor.
 
 :c,image(JPG/offload_knc.png)
 
 The fraction of the offloaded work is controlled by the {balance}
 keyword in the "package intel"_package.html command. A balance of 0
 runs all calculations on the CPU.  A balance of 1 runs all
 supported calculations on the coprocessor.  A balance of 0.5 runs half
 of the calculations on the coprocessor.  Setting the balance to -1
 (the default) will enable dynamic load balancing that continously
 adjusts the fraction of offloaded work throughout the simulation.
 Because data transfer cannot be timed, this option typically produces
 results within 5 to 10 percent of the optimal fixed balance.
 
 If running short benchmark runs with dynamic load balancing, adding a
 short warm-up run (10-20 steps) will allow the load-balancer to find a
 near-optimal setting that will carry over to additional runs.
 
 The default for the "package intel"_package.html command is to have
 all the MPI tasks on a given compute node use a single Xeon Phi
 coprocessor.  In general, running with a large number of MPI tasks on
 each node will perform best with offload.  Each MPI task will
 automatically get affinity to a subset of the hardware threads
 available on the coprocessor.  For example, if your card has 61 cores,
 with 60 cores available for offload and 4 hardware threads per core
 (240 total threads), running with 24 MPI tasks per node will cause
 each MPI task to use a subset of 10 threads on the coprocessor.  Fine
 tuning of the number of threads to use per MPI task or the number of
 threads to use per core can be accomplished with keyword settings of
 the "package intel"_package.html command.
 
 The USER-INTEL package has two modes for deciding which atoms will be
 handled by the coprocessor.  This choice is controlled with the {ghost}
 keyword of the "package intel"_package.html command.  When set to 0,
 ghost atoms (atoms at the borders between MPI tasks) are not offloaded
 to the card.  This allows for overlap of MPI communication of forces
 with computation on the coprocessor when the "newton"_newton.html
 setting is "on".  The default is dependent on the style being used,
 however, better performance may be achieved by setting this option
 explicitly.
 
 When using offload with CPU Hyper-Threading disabled, it may help
 performance to use fewer MPI tasks and OpenMP threads than available
 cores.  This is due to the fact that additional threads are generated
 internally to handle the asynchronous offload tasks.
 
 If pair computations are being offloaded to an Intel Xeon Phi
 coprocessor, a diagnostic line is printed to the screen (not to the
 log file), during the setup phase of a run, indicating that offload
 mode is being used and indicating the number of coprocessor threads
 per MPI task.  Additionally, an offload timing summary is printed at
 the end of each run.  When offloading, the frequency for "atom
 sorting"_atom_modify.html is changed to 1 so that the per-atom data is
 effectively sorted at every rebuild of the neighbor lists. All the
 available coprocessor threads on each Phi will be divided among MPI
 tasks, unless the {tptask} option of the "-pk intel" "command-line
 switch"_Section_start.html#start_6 is used to limit the coprocessor
 threads per MPI task.
 
 [Restrictions:]
 
 When offloading to a coprocessor, "hybrid"_pair_hybrid.html styles
 that require skip lists for neighbor builds cannot be offloaded.
 Using "hybrid/overlay"_pair_hybrid.html is allowed.  Only one intel
 accelerated style may be used with hybrid styles.
 "Special_bonds"_special_bonds.html exclusion lists are not currently
 supported with offload, however, the same effect can often be
 accomplished by setting cutoffs for excluded atom types to 0.  None of
 the pair styles in the USER-INTEL package currently support the
 "inner", "middle", "outer" options for rRESPA integration via the
 "run_style respa"_run_style.html command; only the "pair" option is
 supported.
 
 [References:]
 
 Brown, W.M., Carrillo, J.-M.Y., Mishra, B., Gavhane, N., Thakker, F.M., De Kraker, A.R., Yamada, M., Ang, J.A., Plimpton, S.J., "Optimizing Classical Molecular Dynamics in LAMMPS," in Intel Xeon Phi Processor High Performance Programming: Knights Landing Edition, J. Jeffers, J. Reinders, A. Sodani, Eds. Morgan Kaufmann. :ulb,l
 
 Brown, W. M., Semin, A., Hebenstreit, M., Khvostov, S., Raman, K., Plimpton, S.J. "Increasing Molecular Dynamics Simulation Rates with an 8-Fold Increase in Electrical Power Efficiency."_http://dl.acm.org/citation.cfm?id=3014915 2016 High Performance Computing, Networking, Storage and Analysis, SC16: International Conference (pp. 82-95). :l
 
 Brown, W.M., Carrillo, J.-M.Y., Gavhane, N., Thakkar, F.M., Plimpton, S.J. Optimizing Legacy Molecular Dynamics Software with Directive-Based Offload. Computer Physics Communications. 2015. 195: p. 95-101. :l
 :ule
 
 
 
 
diff --git a/doc/src/accelerate_kokkos.txt b/doc/src/accelerate_kokkos.txt
index 6ccd69584..712a05300 100644
--- a/doc/src/accelerate_kokkos.txt
+++ b/doc/src/accelerate_kokkos.txt
@@ -1,496 +1,493 @@
 "Previous Section"_Section_packages.html - "LAMMPS WWW Site"_lws -
 "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c
 
 :link(lws,http://lammps.sandia.gov)
 :link(ld,Manual.html)
 :link(lc,Section_commands.html#comm)
 
 :line
 
 "Return to Section accelerate overview"_Section_accelerate.html
 
 5.3.3 KOKKOS package :h5
 
 The KOKKOS package was developed primarily by Christian Trott (Sandia)
 with contributions of various styles by others, including Sikandar
 Mashayak (UIUC), Stan Moore (Sandia), and Ray Shan (Sandia).  The
 underlying Kokkos library was written primarily by Carter Edwards,
 Christian Trott, and Dan Sunderland (all Sandia).
 
 The KOKKOS package contains versions of pair, fix, and atom styles
 that use data structures and macros provided by the Kokkos library,
 which is included with LAMMPS in lib/kokkos.
 
 The Kokkos library is part of
 "Trilinos"_http://trilinos.sandia.gov/packages/kokkos and can also be
 downloaded from "Github"_https://github.com/kokkos/kokkos. Kokkos is a
 templated C++ library that provides two key abstractions for an
 application like LAMMPS.  First, it allows a single implementation of
 an application kernel (e.g. a pair style) to run efficiently on
 different kinds of hardware, such as a GPU, Intel Phi, or many-core
 CPU.
 
 The Kokkos library also provides data abstractions to adjust (at
 compile time) the memory layout of basic data structures like 2d and
 3d arrays and allow the transparent utilization of special hardware
 load and store operations.  Such data structures are used in LAMMPS to
 store atom coordinates or forces or neighbor lists.  The layout is
 chosen to optimize performance on different platforms.  Again this
 functionality is hidden from the developer, and does not affect how
 the kernel is coded.
 
 These abstractions are set at build time, when LAMMPS is compiled with
 the KOKKOS package installed.  All Kokkos operations occur within the
 context of an individual MPI task running on a single node of the
 machine.  The total number of MPI tasks used by LAMMPS (one or
 multiple per compute node) is set in the usual manner via the mpirun
 or mpiexec commands, and is independent of Kokkos.
 
 Kokkos currently provides support for 3 modes of execution (per MPI
 task).  These are OpenMP (for many-core CPUs), Cuda (for NVIDIA GPUs),
 and OpenMP (for Intel Phi).  Note that the KOKKOS package supports
 running on the Phi in native mode, not offload mode like the
 USER-INTEL package supports.  You choose the mode at build time to
 produce an executable compatible with specific hardware.
 
 Here is a quick overview of how to use the KOKKOS package
 for CPU acceleration, assuming one or more 16-core nodes.
 More details follow.
 
 use a C++11 compatible compiler
 make yes-kokkos
 make mpi KOKKOS_DEVICES=OpenMP                 # build with the KOKKOS package
-make kokkos_omp                                # or Makefile.kokkos_omp already has variable set
-Make.py -v -p kokkos -kokkos omp -o mpi -a file mpi   # or one-line build via Make.py :pre
+make kokkos_omp                                # or Makefile.kokkos_omp already has variable set :pre
 
 mpirun -np 16 lmp_mpi -k on -sf kk -in in.lj              # 1 node, 16 MPI tasks/node, no threads
 mpirun -np 2 -ppn 1 lmp_mpi -k on t 16 -sf kk -in in.lj   # 2 nodes, 1 MPI task/node, 16 threads/task
 mpirun -np 2 lmp_mpi -k on t 8 -sf kk -in in.lj           # 1 node, 2 MPI tasks/node, 8 threads/task
 mpirun -np 32 -ppn 4 lmp_mpi -k on t 4 -sf kk -in in.lj   # 8 nodes, 4 MPI tasks/node, 4 threads/task :pre
 
 specify variables and settings in your Makefile.machine that enable OpenMP, GPU, or Phi support
 include the KOKKOS package and build LAMMPS
 enable the KOKKOS package and its hardware options via the "-k on" command-line switch use KOKKOS styles in your input script :ul
 
 Here is a quick overview of how to use the KOKKOS package for GPUs,
 assuming one or more nodes, each with 16 cores and a GPU.  More
 details follow.
 
 discuss use of NVCC, which Makefiles to examine
 
 use a C++11 compatible compiler
 KOKKOS_DEVICES = Cuda, OpenMP
 KOKKOS_ARCH = Kepler35
 make yes-kokkos
-make machine
-Make.py -p kokkos -kokkos cuda arch=31 -o kokkos_cuda -a file kokkos_cuda :pre
+make machine :pre
 
 mpirun -np 1 lmp_cuda -k on t 6 -sf kk -in in.lj          # one MPI task, 6 threads on CPU
 mpirun -np 4 -ppn 1 lmp_cuda -k on t 6 -sf kk -in in.lj   # ditto on 4 nodes :pre
 
 mpirun -np 2 lmp_cuda -k on t 8 g 2 -sf kk -in in.lj           # two MPI tasks, 8 threads per CPU
 mpirun -np 32 -ppn 2 lmp_cuda -k on t 8 g 2 -sf kk -in in.lj   # ditto on 16 nodes :pre
 
 Here is a quick overview of how to use the KOKKOS package
 for the Intel Phi:
 
 use a C++11 compatible compiler
 KOKKOS_DEVICES = OpenMP
 KOKKOS_ARCH = KNC
 make yes-kokkos
-make machine
-Make.py -p kokkos -kokkos phi -o kokkos_phi -a file mpi :pre
+make machine :pre
 
 host=MIC, Intel Phi with 61 cores (240 threads/phi via 4x hardware threading):
 mpirun -np 1 lmp_g++ -k on t 240 -sf kk -in in.lj           # 1 MPI task on 1 Phi, 1*240 = 240
 mpirun -np 30 lmp_g++ -k on t 8 -sf kk -in in.lj            # 30 MPI tasks on 1 Phi, 30*8 = 240
 mpirun -np 12 lmp_g++ -k on t 20 -sf kk -in in.lj           # 12 MPI tasks on 1 Phi, 12*20 = 240
 mpirun -np 96 -ppn 12 lmp_g++ -k on t 20 -sf kk -in in.lj   # ditto on 8 Phis :pre
 
 [Required hardware/software:]
 
 Kokkos support within LAMMPS must be built with a C++11 compatible
 compiler.  If using gcc, version 4.7.2 or later is required.
 
 To build with Kokkos support for CPUs, your compiler must support the
 OpenMP interface.  You should have one or more multi-core CPUs so that
 multiple threads can be launched by each MPI task running on a CPU.
 
 To build with Kokkos support for NVIDIA GPUs, NVIDIA Cuda software
 version 7.5 or later must be installed on your system.  See the
 discussion for the "GPU"_accelerate_gpu.html package for details of
 how to check and do this.
 
 NOTE: For good performance of the KOKKOS package on GPUs, you must
 have Kepler generation GPUs (or later).  The Kokkos library exploits
 texture cache options not supported by Telsa generation GPUs (or
 older).
 
 To build with Kokkos support for Intel Xeon Phi coprocessors, your
 sysmte must be configured to use them in "native" mode, not "offload"
 mode like the USER-INTEL package supports.
 
 [Building LAMMPS with the KOKKOS package:]
 
 You must choose at build time whether to build for CPUs (OpenMP),
 GPUs, or Phi.
 
-You can do any of these in one line, using the src/Make.py script,
-described in "Section 4"_Section_packages.html of the manual.
-Type "Make.py -h" for help.  If run from the src directory, these
+You can do any of these in one line, using the suitable make command
+line flags as described in "Section 4"_Section_packages.html of the
+manual. If run from the src directory, these
 commands will create src/lmp_kokkos_omp, lmp_kokkos_cuda, and
 lmp_kokkos_phi.  Note that the OMP and PHI options use
 src/MAKE/Makefile.mpi as the starting Makefile.machine.  The CUDA
 option uses src/MAKE/OPTIONS/Makefile.kokkos_cuda.
 
 The latter two steps can be done using the "-k on", "-pk kokkos" and
 "-sf kk" "command-line switches"_Section_start.html#start_6
 respectively.  Or the effect of the "-pk" or "-sf" switches can be
 duplicated by adding the "package kokkos"_package.html or "suffix
 kk"_suffix.html commands respectively to your input script.
 
 
 Or you can follow these steps:
 
 CPU-only (run all-MPI or with OpenMP threading):
 
 cd lammps/src
 make yes-kokkos
 make kokkos_omp :pre
 
 CPU-only (only MPI, no threading):
 
 cd lammps/src
 make yes-kokkos
 make kokkos_mpi :pre
 
 Intel Xeon Phi (Intel Compiler, Intel MPI):
 
 cd lammps/src
 make yes-kokkos
 make kokkos_phi :pre
 
 CPUs and GPUs (with MPICH):
 
 cd lammps/src
 make yes-kokkos
 make kokkos_cuda_mpich :pre
 
 These examples set the KOKKOS-specific OMP, MIC, CUDA variables on the
 make command line which requires a GNU-compatible make command.  Try
 "gmake" if your system's standard make complains.
 
 NOTE: If you build using make line variables and re-build LAMMPS twice
 with different KOKKOS options and the *same* target, e.g. g++ in the
 first two examples above, then you *must* perform a "make clean-all"
 or "make clean-machine" before each build.  This is to force all the
 KOKKOS-dependent files to be re-compiled with the new options.
 
 NOTE: Currently, there are no precision options with the KOKKOS
 package.  All compilation and computation is performed in double
 precision.
 
 There are other allowed options when building with the KOKKOS package.
 As above, they can be set either as variables on the make command line
 or in Makefile.machine.  This is the full list of options, including
 those discussed above, Each takes a value shown below.  The
 default value is listed, which is set in the
 lib/kokkos/Makefile.kokkos file.
 
 #Default settings specific options
 #Options: force_uvm,use_ldg,rdc
 
 KOKKOS_DEVICES, values = {OpenMP}, {Serial}, {Pthreads}, {Cuda}, default = {OpenMP}
 KOKKOS_ARCH, values = {KNC}, {SNB}, {HSW}, {Kepler}, {Kepler30}, {Kepler32}, {Kepler35}, {Kepler37}, {Maxwell}, {Maxwell50}, {Maxwell52}, {Maxwell53}, {ARMv8}, {BGQ}, {Power7}, {Power8}, default = {none}
 KOKKOS_DEBUG, values = {yes}, {no}, default = {no}
 KOKKOS_USE_TPLS, values = {hwloc}, {librt}, default = {none}
 KOKKOS_CUDA_OPTIONS, values = {force_uvm}, {use_ldg}, {rdc} :ul
 
 KOKKOS_DEVICE sets the parallelization method used for Kokkos code
 (within LAMMPS).  KOKKOS_DEVICES=OpenMP means that OpenMP will be
 used.  KOKKOS_DEVICES=Pthreads means that pthreads will be used.
 KOKKOS_DEVICES=Cuda means an NVIDIA GPU running CUDA will be used.
 
 If KOKKOS_DEVICES=Cuda, then the lo-level Makefile in the src/MAKE
 directory must use "nvcc" as its compiler, via its CC setting.  For
 best performance its CCFLAGS setting should use -O3 and have a
 KOKKOS_ARCH setting that matches the compute capability of your NVIDIA
 hardware and software installation, e.g. KOKKOS_ARCH=Kepler30.  Note
 the minimal required compute capability is 2.0, but this will give
 significantly reduced performance compared to Kepler generation GPUs
 with compute capability 3.x.  For the LINK setting, "nvcc" should not
 be used; instead use g++ or another compiler suitable for linking C++
 applications.  Often you will want to use your MPI compiler wrapper
 for this setting (i.e. mpicxx).  Finally, the lo-level Makefile must
 also have a "Compilation rule" for creating *.o files from *.cu files.
 See src/Makefile.cuda for an example of a lo-level Makefile with all
 of these settings.
 
 KOKKOS_USE_TPLS=hwloc binds threads to hardware cores, so they do not
 migrate during a simulation.  KOKKOS_USE_TPLS=hwloc should always be
 used if running with KOKKOS_DEVICES=Pthreads for pthreads.  It is not
 necessary for KOKKOS_DEVICES=OpenMP for OpenMP, because OpenMP
 provides alternative methods via environment variables for binding
 threads to hardware cores.  More info on binding threads to cores is
 given in "Section 5.3"_Section_accelerate.html#acc_3.
 
 KOKKOS_ARCH=KNC enables compiler switches needed when compiling for an
 Intel Phi processor.
 
 KOKKOS_USE_TPLS=librt enables use of a more accurate timer mechanism
 on most Unix platforms.  This library is not available on all
 platforms.
 
 KOKKOS_DEBUG is only useful when developing a Kokkos-enabled style
 within LAMMPS.  KOKKOS_DEBUG=yes enables printing of run-time
 debugging information that can be useful.  It also enables runtime
 bounds checking on Kokkos data structures.
 
 KOKKOS_CUDA_OPTIONS are additional options for CUDA.
 
 For more information on Kokkos see the Kokkos programmers' guide here:
 /lib/kokkos/doc/Kokkos_PG.pdf.
 
 [Run with the KOKKOS package from the command line:]
 
 The mpirun or mpiexec command sets the total number of MPI tasks used
 by LAMMPS (one or multiple per compute node) and the number of MPI
 tasks used per node.  E.g. the mpirun command in MPICH does this via
 its -np and -ppn switches.  Ditto for OpenMPI via -np and -npernode.
 
 When using KOKKOS built with host=OMP, you need to choose how many
 OpenMP threads per MPI task will be used (via the "-k" command-line
 switch discussed below).  Note that the product of MPI tasks * OpenMP
 threads/task should not exceed the physical number of cores (on a
 node), otherwise performance will suffer.
 
 When using the KOKKOS package built with device=CUDA, you must use
 exactly one MPI task per physical GPU.
 
 When using the KOKKOS package built with host=MIC for Intel Xeon Phi
 coprocessor support you need to insure there are one or more MPI tasks
 per coprocessor, and choose the number of coprocessor threads to use
 per MPI task (via the "-k" command-line switch discussed below).  The
 product of MPI tasks * coprocessor threads/task should not exceed the
 maximum number of threads the coprocessor is designed to run,
 otherwise performance will suffer.  This value is 240 for current
 generation Xeon Phi(TM) chips, which is 60 physical cores * 4
 threads/core.  Note that with the KOKKOS package you do not need to
 specify how many Phi coprocessors there are per node; each
 coprocessors is simply treated as running some number of MPI tasks.
 
 You must use the "-k on" "command-line
 switch"_Section_start.html#start_6 to enable the KOKKOS package.  It
 takes additional arguments for hardware settings appropriate to your
 system.  Those arguments are "documented
 here"_Section_start.html#start_6.  The two most commonly used
 options are:
 
 -k on t Nt g Ng :pre
 
 The "t Nt" option applies to host=OMP (even if device=CUDA) and
 host=MIC.  For host=OMP, it specifies how many OpenMP threads per MPI
 task to use with a node.  For host=MIC, it specifies how many Xeon Phi
 threads per MPI task to use within a node.  The default is Nt = 1.
 Note that for host=OMP this is effectively MPI-only mode which may be
 fine.  But for host=MIC you will typically end up using far less than
 all the 240 available threads, which could give very poor performance.
 
 The "g Ng" option applies to device=CUDA.  It specifies how many GPUs
 per compute node to use.  The default is 1, so this only needs to be
 specified is you have 2 or more GPUs per compute node.
 
 The "-k on" switch also issues a "package kokkos" command (with no
 additional arguments) which sets various KOKKOS options to default
 values, as discussed on the "package"_package.html command doc page.
 
 Use the "-sf kk" "command-line switch"_Section_start.html#start_6,
 which will automatically append "kk" to styles that support it.  Use
 the "-pk kokkos" "command-line switch"_Section_start.html#start_6 if
 you wish to change any of the default "package kokkos"_package.html
 optionns set by the "-k on" "command-line
 switch"_Section_start.html#start_6.
 
 
 
 Note that the default for the "package kokkos"_package.html command is
 to use "full" neighbor lists and set the Newton flag to "off" for both
 pairwise and bonded interactions.  This typically gives fastest
 performance.  If the "newton"_newton.html command is used in the input
 script, it can override the Newton flag defaults.
 
 However, when running in MPI-only mode with 1 thread per MPI task, it
 will typically be faster to use "half" neighbor lists and set the
 Newton flag to "on", just as is the case for non-accelerated pair
 styles.  You can do this with the "-pk" "command-line
 switch"_Section_start.html#start_6.
 
 [Or run with the KOKKOS package by editing an input script:]
 
 The discussion above for the mpirun/mpiexec command and setting
 appropriate thread and GPU values for host=OMP or host=MIC or
 device=CUDA are the same.
 
 You must still use the "-k on" "command-line
 switch"_Section_start.html#start_6 to enable the KOKKOS package, and
 specify its additional arguments for hardware options appropriate to
 your system, as documented above.
 
 Use the "suffix kk"_suffix.html command, or you can explicitly add a
 "kk" suffix to individual styles in your input script, e.g.
 
 pair_style lj/cut/kk 2.5 :pre
 
 You only need to use the "package kokkos"_package.html command if you
 wish to change any of its option defaults, as set by the "-k on"
 "command-line switch"_Section_start.html#start_6.
 
 [Speed-ups to expect:]
 
 The performance of KOKKOS running in different modes is a function of
 your hardware, which KOKKOS-enable styles are used, and the problem
 size.
 
 Generally speaking, the following rules of thumb apply:
 
 When running on CPUs only, with a single thread per MPI task,
 performance of a KOKKOS style is somewhere between the standard
 (un-accelerated) styles (MPI-only mode), and those provided by the
 USER-OMP package.  However the difference between all 3 is small (less
 than 20%). :ulb,l
 
 When running on CPUs only, with multiple threads per MPI task,
 performance of a KOKKOS style is a bit slower than the USER-OMP
 package. :l
 
 When running large number of atoms per GPU, KOKKOS is typically faster
 than the GPU package. :l
 
 When running on Intel Xeon Phi, KOKKOS is not as fast as
 the USER-INTEL package, which is optimized for that hardware. :l
 :ule
 
 See the "Benchmark page"_http://lammps.sandia.gov/bench.html of the
 LAMMPS web site for performance of the KOKKOS package on different
 hardware.
 
 [Guidelines for best performance:]
 
 Here are guidline for using the KOKKOS package on the different
 hardware configurations listed above.
 
 Many of the guidelines use the "package kokkos"_package.html command
 See its doc page for details and default settings.  Experimenting with
 its options can provide a speed-up for specific calculations.
 
 [Running on a multi-core CPU:]
 
 If N is the number of physical cores/node, then the number of MPI
 tasks/node * number of threads/task should not exceed N, and should
 typically equal N.  Note that the default threads/task is 1, as set by
 the "t" keyword of the "-k" "command-line
 switch"_Section_start.html#start_6.  If you do not change this, no
 additional parallelism (beyond MPI) will be invoked on the host
 CPU(s).
 
 You can compare the performance running in different modes:
 
 run with 1 MPI task/node and N threads/task
 run with N MPI tasks/node and 1 thread/task
 run with settings in between these extremes :ul
 
 Examples of mpirun commands in these modes are shown above.
 
 When using KOKKOS to perform multi-threading, it is important for
 performance to bind both MPI tasks to physical cores, and threads to
 physical cores, so they do not migrate during a simulation.
 
 If you are not certain MPI tasks are being bound (check the defaults
 for your MPI installation), binding can be forced with these flags:
 
 OpenMPI 1.8: mpirun -np 2 -bind-to socket -map-by socket ./lmp_openmpi ...
 Mvapich2 2.0: mpiexec -np 2 -bind-to socket -map-by socket ./lmp_mvapich ... :pre
 
 For binding threads with the KOKKOS OMP option, use thread affinity
 environment variables to force binding.  With OpenMP 3.1 (gcc 4.7 or
 later, intel 12 or later) setting the environment variable
 OMP_PROC_BIND=true should be sufficient.  For binding threads with the
 KOKKOS pthreads option, compile LAMMPS the KOKKOS HWLOC=yes option
 (see "this section"_Section_packages.html#KOKKOS of the manual for
 details).
 
 [Running on GPUs:]
 
 Insure the -arch setting in the machine makefile you are using,
 e.g. src/MAKE/Makefile.cuda, is correct for your GPU hardware/software.
 (see "this section"_Section_packages.html#KOKKOS of the manual for
 details).
 
 The -np setting of the mpirun command should set the number of MPI
 tasks/node to be equal to the # of physical GPUs on the node.
 
 Use the "-k" "command-line switch"_Section_commands.html#start_6 to
 specify the number of GPUs per node, and the number of threads per MPI
 task.  As above for multi-core CPUs (and no GPU), if N is the number
 of physical cores/node, then the number of MPI tasks/node * number of
 threads/task should not exceed N.  With one GPU (and one MPI task) it
 may be faster to use less than all the available cores, by setting
 threads/task to a smaller value.  This is because using all the cores
 on a dual-socket node will incur extra cost to copy memory from the
 2nd socket to the GPU.
 
 Examples of mpirun commands that follow these rules are shown above.
 
 NOTE: When using a GPU, you will achieve the best performance if your
 input script does not use any fix or compute styles which are not yet
 Kokkos-enabled.  This allows data to stay on the GPU for multiple
 timesteps, without being copied back to the host CPU.  Invoking a
 non-Kokkos fix or compute, or performing I/O for
 "thermo"_thermo_style.html or "dump"_dump.html output will cause data
 to be copied back to the CPU.
 
 You cannot yet assign multiple MPI tasks to the same GPU with the
 KOKKOS package.  We plan to support this in the future, similar to the
 GPU package in LAMMPS.
 
 You cannot yet use both the host (multi-threaded) and device (GPU)
 together to compute pairwise interactions with the KOKKOS package.  We
 hope to support this in the future, similar to the GPU package in
 LAMMPS.
 
 [Running on an Intel Phi:]
 
 Kokkos only uses Intel Phi processors in their "native" mode, i.e.
 not hosted by a CPU.
 
 As illustrated above, build LAMMPS with OMP=yes (the default) and
 MIC=yes.  The latter insures code is correctly compiled for the Intel
 Phi.  The OMP setting means OpenMP will be used for parallelization on
 the Phi, which is currently the best option within Kokkos.  In the
 future, other options may be added.
 
 Current-generation Intel Phi chips have either 61 or 57 cores.  One
 core should be excluded for running the OS, leaving 60 or 56 cores.
 Each core is hyperthreaded, so there are effectively N = 240 (4*60) or
 N = 224 (4*56) cores to run on.
 
 The -np setting of the mpirun command sets the number of MPI
 tasks/node.  The "-k on t Nt" command-line switch sets the number of
 threads/task as Nt.  The product of these 2 values should be N, i.e.
 240 or 224.  Also, the number of threads/task should be a multiple of
 4 so that logical threads from more than one MPI task do not run on
 the same physical core.
 
 Examples of mpirun commands that follow these rules are shown above.
 
 [Restrictions:]
 
 As noted above, if using GPUs, the number of MPI tasks per compute
 node should equal to the number of GPUs per compute node.  In the
 future Kokkos will support assigning multiple MPI tasks to a single
 GPU.
 
 Currently Kokkos does not support AMD GPUs due to limits in the
 available backend programming models.  Specifically, Kokkos requires
 extensive C++ support from the Kernel language.  This is expected to
 change in the future.
diff --git a/doc/src/accelerate_omp.txt b/doc/src/accelerate_omp.txt
index 81b7a5adc..fa7bef1a5 100644
--- a/doc/src/accelerate_omp.txt
+++ b/doc/src/accelerate_omp.txt
@@ -1,187 +1,183 @@
 "Previous Section"_Section_packages.html - "LAMMPS WWW Site"_lws -
 "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c
 
 :link(lws,http://lammps.sandia.gov)
 :link(ld,Manual.html)
 :link(lc,Section_commands.html#comm)
 
 :line
 
 "Return to Section 5 overview"_Section_accelerate.html
 
 5.3.4 USER-OMP package :h5
 
 The USER-OMP package was developed by Axel Kohlmeyer at Temple
 University.  It provides multi-threaded versions of most pair styles,
 nearly all bonded styles (bond, angle, dihedral, improper), several
 Kspace styles, and a few fix styles.  The package currently uses the
 OpenMP interface for multi-threading.
 
 Here is a quick overview of how to use the USER-OMP package, assuming
 one or more 16-core nodes.  More details follow.
 
 use -fopenmp with CCFLAGS and LINKFLAGS in Makefile.machine
 make yes-user-omp
 make mpi                                   # build with USER-OMP package, if settings added to Makefile.mpi
-make omp                                   # or Makefile.omp already has settings
-Make.py -v -p omp -o mpi -a file mpi       # or one-line build via Make.py :pre
+make omp                                   # or Makefile.omp already has settings :pre
 
 lmp_mpi -sf omp -pk omp 16 < in.script                         # 1 MPI task, 16 threads
 mpirun -np 4 lmp_mpi -sf omp -pk omp 4 -in in.script           # 4 MPI tasks, 4 threads/task
 mpirun -np 32 -ppn 4 lmp_mpi -sf omp -pk omp 4 -in in.script   # 8 nodes, 4 MPI tasks/node, 4 threads/task :pre
 
 [Required hardware/software:]
 
 Your compiler must support the OpenMP interface.  You should have one
 or more multi-core CPUs so that multiple threads can be launched by
 each MPI task running on a CPU.
 
 [Building LAMMPS with the USER-OMP package:]
 
 The lines above illustrate how to include/build with the USER-OMP
 package in two steps, using the "make" command.  Or how to do it with
-one command via the src/Make.py script, described in "Section
-4"_Section_packages.html of the manual.  Type "Make.py -h" for
-help.
+one command as described in "Section 4"_Section_packages.html of the manual.
 
 Note that the CCFLAGS and LINKFLAGS settings in Makefile.machine must
 include "-fopenmp".  Likewise, if you use an Intel compiler, the
-CCFLAGS setting must include "-restrict".  The Make.py command will
-add these automatically.
+CCFLAGS setting must include "-restrict".
 
 [Run with the USER-OMP package from the command line:]
 
 The mpirun or mpiexec command sets the total number of MPI tasks used
 by LAMMPS (one or multiple per compute node) and the number of MPI
 tasks used per node.  E.g. the mpirun command in MPICH does this via
 its -np and -ppn switches.  Ditto for OpenMPI via -np and -npernode.
 
 You need to choose how many OpenMP threads per MPI task will be used
 by the USER-OMP package.  Note that the product of MPI tasks *
 threads/task should not exceed the physical number of cores (on a
 node), otherwise performance will suffer.
 
 As in the lines above, use the "-sf omp" "command-line
 switch"_Section_start.html#start_6, which will automatically append
 "omp" to styles that support it.  The "-sf omp" switch also issues a
 default "package omp 0"_package.html command, which will set the
 number of threads per MPI task via the OMP_NUM_THREADS environment
 variable.
 
 You can also use the "-pk omp Nt" "command-line
 switch"_Section_start.html#start_6, to explicitly set Nt = # of OpenMP
 threads per MPI task to use, as well as additional options.  Its
 syntax is the same as the "package omp"_package.html command whose doc
 page gives details, including the default values used if it is not
 specified.  It also gives more details on how to set the number of
 threads via the OMP_NUM_THREADS environment variable.
 
 [Or run with the USER-OMP package by editing an input script:]
 
 The discussion above for the mpirun/mpiexec command, MPI tasks/node,
 and threads/MPI task is the same.
 
 Use the "suffix omp"_suffix.html command, or you can explicitly add an
 "omp" suffix to individual styles in your input script, e.g.
 
 pair_style lj/cut/omp 2.5 :pre
 
 You must also use the "package omp"_package.html command to enable the
 USER-OMP package.  When you do this you also specify how many threads
 per MPI task to use.  The command doc page explains other options and
 how to set the number of threads via the OMP_NUM_THREADS environment
 variable.
 
 [Speed-ups to expect:]
 
 Depending on which styles are accelerated, you should look for a
 reduction in the "Pair time", "Bond time", "KSpace time", and "Loop
 time" values printed at the end of a run.
 
 You may see a small performance advantage (5 to 20%) when running a
 USER-OMP style (in serial or parallel) with a single thread per MPI
 task, versus running standard LAMMPS with its standard un-accelerated
 styles (in serial or all-MPI parallelization with 1 task/core).  This
 is because many of the USER-OMP styles contain similar optimizations
 to those used in the OPT package, described in "Section
 5.3.5"_accelerate_opt.html.
 
 With multiple threads/task, the optimal choice of number of MPI
 tasks/node and OpenMP threads/task can vary a lot and should always be
 tested via benchmark runs for a specific simulation running on a
 specific machine, paying attention to guidelines discussed in the next
 sub-section.
 
 A description of the multi-threading strategy used in the USER-OMP
 package and some performance examples are "presented
 here"_http://sites.google.com/site/akohlmey/software/lammps-icms/lammps-icms-tms2011-talk.pdf?attredirects=0&d=1
 
 [Guidelines for best performance:]
 
 For many problems on current generation CPUs, running the USER-OMP
 package with a single thread/task is faster than running with multiple
 threads/task.  This is because the MPI parallelization in LAMMPS is
 often more efficient than multi-threading as implemented in the
 USER-OMP package.  The parallel efficiency (in a threaded sense) also
 varies for different USER-OMP styles.
 
 Using multiple threads/task can be more effective under the following
 circumstances:
 
 Individual compute nodes have a significant number of CPU cores but
 the CPU itself has limited memory bandwidth, e.g. for Intel Xeon 53xx
 (Clovertown) and 54xx (Harpertown) quad-core processors.  Running one
 MPI task per CPU core will result in significant performance
 degradation, so that running with 4 or even only 2 MPI tasks per node
 is faster.  Running in hybrid MPI+OpenMP mode will reduce the
 inter-node communication bandwidth contention in the same way, but
 offers an additional speedup by utilizing the otherwise idle CPU
 cores. :ulb,l
 
 The interconnect used for MPI communication does not provide
 sufficient bandwidth for a large number of MPI tasks per node.  For
 example, this applies to running over gigabit ethernet or on Cray XT4
 or XT5 series supercomputers.  As in the aforementioned case, this
 effect worsens when using an increasing number of nodes. :l
 
 The system has a spatially inhomogeneous particle density which does
 not map well to the "domain decomposition scheme"_processors.html or
 "load-balancing"_balance.html options that LAMMPS provides.  This is
 because multi-threading achives parallelism over the number of
 particles, not via their distribution in space. :l
 
 A machine is being used in "capability mode", i.e. near the point
 where MPI parallelism is maxed out.  For example, this can happen when
 using the "PPPM solver"_kspace_style.html for long-range
 electrostatics on large numbers of nodes.  The scaling of the KSpace
 calculation (see the "kspace_style"_kspace_style.html command) becomes
 the performance-limiting factor.  Using multi-threading allows less
 MPI tasks to be invoked and can speed-up the long-range solver, while
 increasing overall performance by parallelizing the pairwise and
 bonded calculations via OpenMP.  Likewise additional speedup can be
 sometimes be achived by increasing the length of the Coulombic cutoff
 and thus reducing the work done by the long-range solver.  Using the
 "run_style verlet/split"_run_style.html command, which is compatible
 with the USER-OMP package, is an alternative way to reduce the number
 of MPI tasks assigned to the KSpace calculation. :l
 :ule
 
 Additional performance tips are as follows:
 
 The best parallel efficiency from {omp} styles is typically achieved
 when there is at least one MPI task per physical CPU chip, i.e. socket
 or die. :ulb,l
 
 It is usually most efficient to restrict threading to a single
 socket, i.e. use one or more MPI task per socket. :l
 
 NOTE: By default, several current MPI implementations use a processor
 affinity setting that restricts each MPI task to a single CPU core.
 Using multi-threading in this mode will force all threads to share the
 one core and thus is likely to be counterproductive.  Instead, binding
 MPI tasks to a (multi-core) socket, should solve this issue. :l
 :ule
 
 [Restrictions:]
 
 None.
diff --git a/doc/src/accelerate_opt.txt b/doc/src/accelerate_opt.txt
index 5a2a5eac0..845264b52 100644
--- a/doc/src/accelerate_opt.txt
+++ b/doc/src/accelerate_opt.txt
@@ -1,71 +1,67 @@
 "Previous Section"_Section_packages.html - "LAMMPS WWW Site"_lws -
 "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c
 
 :link(lws,http://lammps.sandia.gov)
 :link(ld,Manual.html)
 :link(lc,Section_commands.html#comm)
 
 :line
 
 "Return to Section accelerate overview"_Section_accelerate.html
 
 5.3.5 OPT package :h5
 
 The OPT package was developed by James Fischer (High Performance
 Technologies), David Richie, and Vincent Natoli (Stone Ridge
 Technologies).  It contains a handful of pair styles whose compute()
 methods were rewritten in C++ templated form to reduce the overhead
 due to if tests and other conditional code.
 
 Here is a quick overview of how to use the OPT package.  More details
 follow.
 
 make yes-opt
-make mpi                               # build with the OPT package
-Make.py -v -p opt -o mpi -a file mpi   # or one-line build via Make.py :pre
+make mpi                               # build with the OPT package :pre
 
 lmp_mpi -sf opt -in in.script                # run in serial
 mpirun -np 4 lmp_mpi -sf opt -in in.script   # run in parallel :pre
 
 [Required hardware/software:]
 
 None.
 
 [Building LAMMPS with the OPT package:]
 
 The lines above illustrate how to build LAMMPS with the OPT package in
 two steps, using the "make" command.  Or how to do it with one command
-via the src/Make.py script, described in "Section
-4"_Section_packages.html of the manual.  Type "Make.py -h" for
-help.
+as described in "Section 4"_Section_packages.html of the manual.
 
 Note that if you use an Intel compiler to build with the OPT package,
 the CCFLAGS setting in your Makefile.machine must include "-restrict".
-The Make.py command will add this automatically.
 
 [Run with the OPT package from the command line:]
 
 As in the lines above, use the "-sf opt" "command-line
 switch"_Section_start.html#start_6, which will automatically append
 "opt" to styles that support it.
 
 [Or run with the OPT package by editing an input script:]
 
 Use the "suffix opt"_suffix.html command, or you can explicitly add an
 "opt" suffix to individual styles in your input script, e.g.
 
 pair_style lj/cut/opt 2.5 :pre
 
 [Speed-ups to expect:]
 
 You should see a reduction in the "Pair time" value printed at the end
 of a run.  On most machines for reasonable problem sizes, it will be a
 5 to 20% savings.
 
 [Guidelines for best performance:]
 
 Just try out an OPT pair style to see how it performs.
 
 [Restrictions:]
 
 None.
diff --git a/doc/src/compute_voronoi_atom.txt b/doc/src/compute_voronoi_atom.txt
index d084fcee6..a280b2b15 100644
--- a/doc/src/compute_voronoi_atom.txt
+++ b/doc/src/compute_voronoi_atom.txt
@@ -1,228 +1,228 @@
 "LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c
 
 :link(lws,http://lammps.sandia.gov)
 :link(ld,Manual.html)
 :link(lc,Section_commands.html#comm)
 
 :line
 
 compute voronoi/atom command :h3
 
 [Syntax:]
 
 compute ID group-ID voronoi/atom keyword arg ... :pre
 
 ID, group-ID are documented in "compute"_compute.html command :ulb,l
 voronoi/atom = style name of this compute command :l
 zero or more keyword/value pairs may be appended :l
 keyword = {only_group} or {surface} or {radius} or {edge_histo} or {edge_threshold}
 or {face_threshold} or {neighbors} or {peratom} :l
   {only_group} = no arg
   {occupation} = no arg
   {surface} arg = sgroup-ID
     sgroup-ID = compute the dividing surface between group-ID and sgroup-ID
       this keyword adds a third column to the compute output
   {radius} arg = v_r
     v_r = radius atom style variable for a poly-disperse Voronoi tessellation
   {edge_histo} arg = maxedge
     maxedge = maximum number of Voronoi cell edges to be accounted in the histogram
   {edge_threshold} arg = minlength
     minlength = minimum length for an edge to be counted
   {face_threshold} arg = minarea
     minarea = minimum area for a face to be counted
   {neighbors} value = {yes} or {no} = store list of all neighbors or no
   {peratom} value = {yes} or {no} = per-atom quantities accessible or no :pre
 :ule
 
 [Examples:]
 
 compute 1 all voronoi/atom
 compute 2 precipitate voronoi/atom surface matrix
 compute 3b precipitate voronoi/atom radius v_r
 compute 4 solute voronoi/atom only_group
 compute 5 defects voronoi/atom occupation
 compute 6 all voronoi/atom neighbors yes :pre
 
 [Description:]
 
 Define a computation that calculates the Voronoi tessellation of the
 atoms in the simulation box.  The tessellation is calculated using all
 atoms in the simulation, but non-zero values are only stored for atoms
 in the group.
 
 By default two per-atom quantities are calculated by this compute.
 The first is the volume of the Voronoi cell around each atom.  Any
 point in an atom's Voronoi cell is closer to that atom than any other.
 The second is the number of faces of the Voronoi cell. This is
 equal to the number of nearest neighbors of the central atom,
 plus any exterior faces (see note below). If the {peratom} keyword
 is set to "no", the per-atom quantities are still calculated,
 but they are not accessible.
 
 :line
 
 If the {only_group} keyword is specified the tessellation is performed
 only with respect to the atoms contained in the compute group. This is
 equivalent to deleting all atoms not contained in the group prior to
 evaluating the tessellation.
 
 If the {surface} keyword is specified a third quantity per atom is
 computed: the Voronoi cell surface of the given atom. {surface} takes
 a group ID as an argument. If a group other than {all} is specified,
 only the Voronoi cell facets facing a neighbor atom from the specified
 group are counted towards the surface area.
 
 In the example above, a precipitate embedded in a matrix, only atoms
 at the surface of the precipitate will have non-zero surface area, and
 only the outward facing facets of the Voronoi cells are counted (the
 hull of the precipitate). The total surface area of the precipitate
 can be obtained by running a "reduce sum" compute on c_2\[3\]
 
 If the {radius} keyword is specified with an atom style variable as
 the argument, a poly-disperse Voronoi tessellation is
 performed. Examples for radius variables are
 
 variable r1 atom (type==1)*0.1+(type==2)*0.4
 compute radius all property/atom radius
 variable r2 atom c_radius :pre
 
 Here v_r1 specifies a per-type radius of 0.1 units for type 1 atoms
 and 0.4 units for type 2 atoms, and v_r2 accesses the radius property
 present in atom_style sphere for granular models.
 
 The {edge_histo} keyword activates the compilation of a histogram of
 number of edges on the faces of the Voronoi cells in the compute
 group. The argument {maxedge} of the this keyword is the largest number
 of edges on a single Voronoi cell face expected to occur in the
 sample. This keyword adds the generation of a global vector with
 {maxedge}+1 entries. The last entry in the vector contains the number of
 faces with with more than {maxedge} edges. Since the polygon with the
 smallest amount of edges is a triangle, entries 1 and 2 of the vector
 will always be zero.
 
 The {edge_threshold} and {face_threshold} keywords allow the
 suppression of edges below a given minimum length and faces below a
 given minimum area. Ultra short edges and ultra small faces can occur
 as artifacts of the Voronoi tessellation. These keywords will affect
 the neighbor count and edge histogram outputs.
 
 If the {occupation} keyword is specified the tessellation is only
 performed for the first invocation of the compute and then stored.
 For all following invocations of the compute the number of atoms in
 each Voronoi cell in the stored tessellation is counted. In this mode
 the compute returns a per-atom array with 2 columns. The first column
 is the number of atoms currently in the Voronoi volume defined by this
 atom at the time of the first invocation of the compute (note that the
 atom may have moved significantly). The second column contains the
 total number of atoms sharing the Voronoi cell of the stored
 tessellation at the location of the current atom. Numbers in column
 one can be any positive integer including zero, while column two
 values will always be greater than zero. Column one data can be used
 to locate vacancies (the coordinates are given by the atom coordinates
 at the time step when the compute was first invoked), while column two
 data can be used to identify interstitial atoms.
 
 If the {neighbors} value is set to yes, then
 this compute creates a local array with 3 columns. There
 is one row for each face of each Voronoi cell. The
 3 columns are the atom ID of the atom that owns the cell,
 the atom ID of the atom in the neighboring cell
 (or zero if the face is external), and the area of the face.
 The array can be accessed by any command that
 uses local values from a compute as input.  See "this
 section"_Section_howto.html#howto_15 for an overview of LAMMPS output
 options. More specifically, the array can be accessed by a
 "dump local"_dump.html command to write a file containing
 all the Voronoi neighbors in a system:
 
 compute 6 all voronoi/atom neighbors yes
 dump d2 all local 1 dump.neighbors index c_6\[1\] c_6\[2\] c_6\[3\] :pre
 
 If the {face_threshold} keyword is used, then only faces
 with areas greater than the threshold are stored.
 
 :line
 
 The Voronoi calculation is performed by the freely available "Voro++
 package"_voronoi, written by Chris Rycroft at UC Berkeley and LBL,
 which must be installed on your system when building LAMMPS for use
 with this compute.  See instructions on obtaining and installing the
 Voro++ software in the src/VORONOI/README file.
 
 :link(voronoi,http://math.lbl.gov/voro++/)
 
 NOTE: The calculation of Voronoi volumes is performed by each
 processor for the atoms it owns, and includes the effect of ghost
 atoms stored by the processor.  This assumes that the Voronoi cells of
 owned atoms are not affected by atoms beyond the ghost atom cut-off
 distance.  This is usually a good assumption for liquid and solid
 systems, but may lead to underestimation of Voronoi volumes in low
 density systems.  By default, the set of ghost atoms stored by each
 processor is determined by the cutoff used for
 "pair_style"_pair_style.html interactions.  The cutoff can be set
 explicitly via the "comm_modify cutoff"_comm_modify.html command.  The
 Voronoi cells for atoms adjacent to empty regions will extend into
 those regions up to the communication cutoff in x, y, or z.  In that
 situation, an exterior face is created at the cutoff distance normal
 to the x, y, or z direction.  For triclinic systems, the exterior face
 is parallel to the corresponding reciprocal lattice vector.
 
 NOTE: The Voro++ package performs its calculation in 3d.  This will
 still work for a 2d LAMMPS simulation, provided all the atoms have the
 same z coordinate. The Voronoi cell of each atom will be a columnar
 polyhedron with constant cross-sectional area along the z direction
 and two exterior faces at the top and bottom of the simulation box. If
 the atoms do not all have the same z coordinate, then the columnar
 cells will be accordingly distorted. The cross-sectional area of each
 Voronoi cell can be obtained by dividing its volume by the z extent of
 the simulation box.  Note that you define the z extent of the
 simulation box for 2d simulations when using the
 "create_box"_create_box.html or "read_data"_read_data.html commands.
 
 [Output info:]
 
 By default, this compute calculates a per-atom array with 2
 columns. In regular dynamic tessellation mode the first column is the
 Voronoi volume, the second is the neighbor count, as described above
 (read above for the output data in case the {occupation} keyword is
 specified).  These values can be accessed by any command that uses
 per-atom values from a compute as input.  See "Section
 6.15"_Section_howto.html#howto_15 for an overview of LAMMPS output
 options. If the {peratom} keyword is set to "no", the per-atom array
 is still created, but it is not accessible.
 
 If the {edge_histo} keyword is used, then this compute generates a
 global vector of length {maxedge}+1, containing a histogram of the
 number of edges per face.
 
 If the {neighbors} value is set to yes, then this compute calculates a
 local array with 3 columns. There is one row for each face of each
 Voronoi cell.
 
 NOTE: Some LAMMPS commands such as the "compute
 reduce"_compute_reduce.html command can accept either a per-atom or
 local quantity. If this compute produces both quantities, the command
 may access the per-atom quantity, even if you want to access the local
 quantity.  This effect can be eliminated by using the {peratom}
 keyword to turn off the production of the per-atom quantities.  For
 the default value {yes} both quantities are produced.  For the value
 {no}, only the local array is produced.
 
 The Voronoi cell volume will be in distance "units"_units.html cubed.
 The Voronoi face area will be in distance "units"_units.html squared.
 
 [Restrictions:]
 
 This compute is part of the VORONOI package.  It is only enabled if
 LAMMPS was built with that package.  See the "Making
 LAMMPS"_Section_start.html#start_3 section for more info.
 
-It also requiers you have a copy of the Voro++ library built and
+It also requires you have a copy of the Voro++ library built and
 installed on your system.  See instructions on obtaining and
 installing the Voro++ software in the src/VORONOI/README file.
 
 [Related commands:]
 
 "dump custom"_dump.html, "dump local"_dump.html
 
 [Default:] {neighbors} no, {peratom} yes
diff --git a/doc/src/fix_msst.txt b/doc/src/fix_msst.txt
index 025c73389..310692669 100644
--- a/doc/src/fix_msst.txt
+++ b/doc/src/fix_msst.txt
@@ -1,193 +1,193 @@
 "LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c
 
 :link(lws,http://lammps.sandia.gov)
 :link(ld,Manual.html)
 :link(lc,Section_commands.html#comm)
 
 :line
 
  fix msst command :h3
 
 [Syntax:]
 
 fix ID group-ID msst dir shockvel keyword value ... :pre
 
 ID, group-ID are documented in "fix"_fix.html command :ulb,l
 msst = style name of this fix :l
 dir = {x} or {y} or {z} :l
 shockvel = shock velocity (strictly positive, distance/time units) :l
 zero or more keyword value pairs may be appended :l
 keyword = {q} or {mu} or {p0} or {v0} or {e0} or {tscale} or {beta} or {dftb} :l
   {q} value = cell mass-like parameter (mass^2/distance^4 units)
   {mu} value = artificial viscosity (mass/length/time units)
   {p0} value = initial pressure in the shock equations (pressure units)
   {v0} value = initial simulation cell volume in the shock equations (distance^3 units)
   {e0} value = initial total energy (energy units)
   {tscale} value = reduction in initial temperature (unitless fraction between 0.0 and 1.0) 
   {dftb} value = {yes} or {no} for whether using MSST in conjunction with DFTB+
   {beta} value = scale factor for improved energy conservation :pre
 :ule
 
 [Examples:]
 
 fix 1 all msst y 100.0 q 1.0e5 mu 1.0e5
 fix 2 all msst z 50.0 q 1.0e4 mu 1.0e4  v0 4.3419e+03 p0 3.7797e+03 e0 -9.72360e+02 tscale 0.01
 fix 1 all msst y 100.0 q 1.0e5 mu 1.0e5 dftb yes beta 0.5 :pre
 
 [Description:]
 
 This command performs the Multi-Scale Shock Technique (MSST)
 integration to update positions and velocities each timestep to mimic
 a compressive shock wave passing over the system. See "(Reed)"_#Reed
 for a detailed description of this method.  The MSST varies the cell
 volume and temperature in such a way as to restrain the system to the
 shock Hugoniot and the Rayleigh line. These restraints correspond to
 the macroscopic conservation laws dictated by a shock
 front. {shockvel} determines the steady shock velocity that will be
 simulated.
 
 To perform a simulation, choose a value of {q} that provides volume
 compression on the timescale of 100 fs to 1 ps.  If the volume is not
 compressing, either the shock speed is chosen to be below the material
 sound speed or {p0} has been chosen inaccurately.  Volume compression
 at the start can be sped up by using a non-zero value of {tscale}. Use
 the smallest value of {tscale} that results in compression.
 
 Under some special high-symmetry conditions, the pressure (volume)
 and/or temperature of the system may oscillate for many cycles even
 with an appropriate choice of mass-like parameter {q}. Such
 oscillations have physical significance in some cases.  The optional
 {mu} keyword adds an artificial viscosity that helps break the system
 symmetry to equilibrate to the shock Hugoniot and Rayleigh line more
 rapidly in such cases.
 
 The keyword {tscale} is a factor between 0 and 1 that determines what
 fraction of thermal kinetic energy is converted to compressive strain
 kinetic energy at the start of the simulation.  Setting this parameter
 to a non-zero value may assist in compression at the start of
 simulations where it is slow to occur.
 
 If keywords {e0}, {p0},or {v0} are not supplied, these quantities will
 be calculated on the first step, after the energy specified by
 {tscale} is removed.  The value of {e0} is not used in the dynamical
 equations, but is used in calculating the deviation from the Hugoniot.
 
 The keyword {beta} is a scaling term that can be added to the MSST
 ionic equations of motion to account for drift in the conserved
 quantity during long timescale simulations, similar to a Berendson
-thermostat. See "(Reed)"_#Reed and "(Goldman)"_#Goldman for more
+thermostat. See "(Reed)"_#Reed and "(Goldman)"_#Goldman2 for more
 details.  The value of {beta} must be between 0.0 and 1.0 inclusive.
 A value of 0.0 means no contribution, a value of 1.0 means a full
 contribution.
 
 Values of shockvel less than a critical value determined by the
 material response will not have compressive solutions. This will be
 reflected in lack of significant change of the volume in the MSST.
 
 For all pressure styles, the simulation box stays orthogonal in shape.
 Parrinello-Rahman boundary conditions (tilted box) are supported by
 LAMMPS, but are not implemented for MSST.
 
 This fix computes a temperature and pressure and potential energy each
 timestep. To do this, the fix creates its own computes of style "temp"
 "pressure", and "pe", as if these commands had been issued:
 
 compute fix-ID_MSST_temp all temp
 compute fix-ID_MSST_press all pressure fix-ID_MSST_temp :pre
 compute fix-ID_MSST_pe all pe :pre
 
 See the "compute temp"_compute_temp.html and "compute
 pressure"_compute_pressure.html commands for details.  Note that the
 IDs of the new computes are the fix-ID + "_MSST_temp" or "_MSST_press"
 or "_MSST_pe".  The group for the new computes is "all".
 
 :line
 
 The {dftb} keyword is to allow this fix to be used when LAMMPS is
 being driven by DFTB+, a density-functional tight-binding code. If the
 keyword {dftb} is used with a value of {yes}, then the MSST equations
 are altered to account for the electron entropy contribution to the
 Hugonio relations and total energy.  See "(Reed2)"_#Reed2 and
-"(Goldman)"_#Goldman for details on this contribution.  In this case,
+"(Goldman)"_#Goldman2 for details on this contribution.  In this case,
 you must define a "fix external"_fix_external.html command in your
 input script, which is used to callback to DFTB+ during the LAMMPS
 timestepping.  DFTB+ will communicate its info to LAMMPS via that fix.
 
 :line
 
 [Restart, fix_modify, output, run start/stop, minimize info:]
 
 This fix writes the state of all internal variables to "binary restart
 files"_restart.html.  See the "read_restart"_read_restart.html command
 for info on how to re-specify a fix in an input script that reads a
 restart file, so that the operation of the fix continues in an
 uninterrupted fashion.
 
 The progress of the MSST can be monitored by printing the global
 scalar and global vector quantities computed by the fix.
 
 The scalar is the cumulative energy change due to the fix. This is
 also the energy added to the potential energy by the
 "fix_modify"_fix_modify.html {energy} command.  With this command, the
 thermo keyword {etotal} prints the conserved quantity of the MSST
 dynamic equations. This can be used to test if the MD timestep is
 sufficiently small for accurate integration of the dynamic
 equations. See also "thermo_style"_thermo_style.html command.
 
 The global vector contains four values in this order:
 
 \[{dhugoniot}, {drayleigh}, {lagrangian_speed}, {lagrangian_position}\]
 
 {dhugoniot} is the departure from the Hugoniot (temperature units).
 {drayleigh} is the departure from the Rayleigh line (pressure units).
 {lagrangian_speed} is the laboratory-frame Lagrangian speed (particle velocity) of the computational cell (velocity units).
 {lagrangian_position} is the computational cell position in the reference frame moving at the shock speed. This is usually a good estimate of distance of the computational cell behind the shock front. :ol
 
 To print these quantities to the log file with descriptive column
 headers, the following LAMMPS commands are suggested:
 
 fix              msst all msst z
 fix_modify       msst energy yes
 variable dhug    equal f_msst\[1\]
 variable dray    equal f_msst\[2\]
 variable lgr_vel equal f_msst\[3\]
 variable lgr_pos equal f_msst\[4\]
 thermo_style     custom step temp ke pe lz pzz etotal v_dhug v_dray v_lgr_vel v_lgr_pos f_msst :pre
 
 These fixes compute a global scalar and a global vector of 4
 quantities, which can be accessed by various "output
 commands"_Section_howto.html#howto_15.  The scalar values calculated
 by this fix are "extensive"; the vector values are "intensive".
 
 [Restrictions:]
 
 This fix style is part of the SHOCK package.  It is only enabled if
 LAMMPS was built with that package. See the "Making
 LAMMPS"_Section_start.html#start_3 section for more info.
 
 All cell dimensions must be periodic. This fix can not be used with a
 triclinic cell.  The MSST fix has been tested only for the group-ID
 all.
 
 [Related commands:]
 
 "fix nphug"_fix_nphug.html, "fix deform"_fix_deform.html
 
 [Default:]
 
 The keyword defaults are q = 10, mu = 0, tscale = 0.01, dftb = no,
 beta = 0.0.  Note that p0, v0, and e0 are calculated on the first
 timestep.
 
 :line
 
 :link(Reed)
 [(Reed)] Reed, Fried, and Joannopoulos, Phys. Rev. Lett., 90, 235503
 (2003).
 
 :link(Reed2)
 [(Reed2)] Reed, J. Phys. Chem. C, 116, 2205 (2012).
 
-:link(Goldman)
+:link(Goldman2)
 [(Goldman)] Goldman, Srinivasan, Hamel, Fried, Gaus, and Elstner,
 J. Phys. Chem. C, 117, 7885 (2013).
diff --git a/doc/src/fix_qbmsst.txt b/doc/src/fix_qbmsst.txt
index 468206a57..2c116fb0f 100644
--- a/doc/src/fix_qbmsst.txt
+++ b/doc/src/fix_qbmsst.txt
@@ -1,219 +1,219 @@
 "LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c
 
 :link(lws,http://lammps.sandia.gov)
 :link(ld,Manual.html)
 :link(lc,Section_commands.html#comm)
 
 :line
 
 fix qbmsst command :h3
 
 [Syntax:]
 
 fix ID group-ID qbmsst dir shockvel keyword value ... :pre
 
 ID, group-ID are documented in "fix"_fix.html command :ulb,l
 qbmsst = style name of this fix :l
 dir = {x} or {y} or {z} :l
 shockvel = shock velocity (strictly positive, velocity units) :l
 zero or more keyword/value pairs may be appended :l
 keyword = {q} or {mu} or {p0} or {v0} or {e0} or {tscale} or {damp} or {seed}or {f_max} or {N_f} or {eta} or {beta} or {T_init} :l
 
   {q} value = cell mass-like parameter (mass^2/distance^4 units)
   {mu} value = artificial viscosity (mass/distance/time units)
   {p0} value = initial pressure in the shock equations (pressure units)
   {v0} value = initial simulation cell volume in the shock equations (distance^3 units)
   {e0} value = initial total energy (energy units)
   {tscale} value = reduction in initial temperature (unitless fraction between 0.0 and 1.0)
   {damp} value = damping parameter (time units) inverse of friction <i>&gamma;</i>
   {seed} value = random number seed (positive integer)
   {f_max} value = upper cutoff frequency of the vibration spectrum (1/time units)
   {N_f} value = number of frequency bins (positive integer)
   {eta} value = coupling constant between the shock system and the quantum thermal bath (positive unitless)
   {beta} value = the quantum temperature is updated every beta time steps (positive integer)
   {T_init} value = quantum temperature for the initial state (temperature units) :pre
 :ule
 
 [Examples:]
 
 fix 1 all qbmsst z 0.122 q 25 mu 0.9 tscale 0.01 damp 200 seed 35082 f_max 0.3 N_f 100 eta 1 beta 400 T_init 110 (liquid methane modeled with the REAX force field, real units)
 fix 2 all qbmsst z 72 q 40 tscale 0.05 damp 1 seed 47508 f_max 120.0 N_f 100 eta 1.0 beta 500 T_init 300 (quartz modeled with the BKS force field, metal units) :pre
 
 Two example input scripts are given, including shocked alpha quartz
 and shocked liquid methane. The input script first equilibrate an
 initial state with the quantum thermal bath at the target temperature
 and then apply the qbmsst to simulate shock compression with quantum
 nuclear correction.  The following two figures plot related quantities
 for shocked alpha quartz.
 
 :c,image(JPG/qbmsst_init.jpg)
 
 Figure 1. Classical temperature <i>T</i><sup>cl</sup> = &sum;
 <i>m<sub>i</sub>v<sub>i</sub><sup>2</sup>/3Nk</i><sub>B</sub> vs. time
 for coupling the alpha quartz initial state with the quantum thermal
 bath at target quantum temperature <i>T</i><sup>qm</sup> = 300 K. The
 NpH ensemble is used for time integration while QTB provides the
 colored random force. <i>T</i><sup>cl</sup> converges at the timescale
 of {damp} which is set to be 1 ps.
 
 :c,image(JPG/qbmsst_shock.jpg)
 
 Figure 2. Quantum temperature and pressure vs. time for simulating
 shocked alpha quartz with the QBMSST. The shock propagates along the z
 direction. Restart of the QBMSST command is demonstrated in the
 example input script. Thermodynamic quantities stay continuous before
 and after the restart.
 
 [Description:]
 
 This command performs the Quantum-Bath coupled Multi-Scale Shock
 Technique (QBMSST) integration. See "(Qi)"_#Qi for a detailed
 description of this method.  The QBMSST provides description of the
 thermodynamics and kinetics of shock processes while incorporating
 quantum nuclear effects.  The {shockvel} setting determines the steady
 shock velocity that will be simulated along direction {dir}.
 
 Quantum nuclear effects "(fix qtb)"_fix_qtb.html can be crucial
 especially when the temperature of the initial state is below the
 classical limit or there is a great change in the zero point energies
 between the initial and final states. Theoretical post processing
 quantum corrections of shock compressed water and methane have been
-reported as much as 30% of the temperatures "(Goldman)"_#Goldman.  A
+reported as much as 30% of the temperatures "(Goldman)"_#Goldman1.  A
 self-consistent method that couples the shock to a quantum thermal
 bath described by a colored noise Langevin thermostat has been
 developed by Qi et al "(Qi)"_#Qi and applied to shocked methane.  The
 onset of chemistry is reported to be at a pressure on the shock
 Hugoniot that is 40% lower than observed with classical molecular
 dynamics.
 
 It is highly recommended that the system be already in an equilibrium
 state with a quantum thermal bath at temperature of {T_init}.  The fix
 command "fix qtb"_fix_qtb.html at constant temperature {T_init} could
 be used before applying this command to introduce self-consistent
 quantum nuclear effects into the initial state.
 
 The parameters {q}, {mu}, {e0}, {p0}, {v0} and {tscale} are described
 in the command "fix msst"_fix_msst.html. The values of {e0}, {p0}, or
 {v0} will be calculated on the first step if not specified.  The
 parameter of {damp}, {f_max}, and {N_f} are described in the command
 "fix qtb"_fix_qtb.html.
 
 The fix qbmsst command couples the shock system to a quantum thermal
 bath with a rate that is proportional to the change of the total
 energy of the shock system, <i>etot</i> - <i>etot</i><sub>0</sub>.
 Here <i>etot</i> consists of both the system energy and a thermal
 term, see "(Qi)"_#Qi, and <i>etot</i><sub>0</sub> = {e0} is the
 initial total energy.
 
 The {eta} (<i>&eta;</i>) parameter is a unitless coupling constant
 between the shock system and the quantum thermal bath. A small {eta}
 value cannot adjust the quantum temperature fast enough during the
 temperature ramping period of shock compression while large {eta}
 leads to big temperature oscillation. A value of {eta} between 0.3 and
 1 is usually appropriate for simulating most systems under shock
 compression. We observe that different values of {eta} lead to almost
 the same final thermodynamic state behind the shock, as expected.
 
 The quantum temperature is updated every {beta} (<i>&beta;</i>) steps
 with an integration time interval {beta} times longer than the
 simulation time step. In that case, <i>etot</i> is taken as its
 average over the past {beta} steps. The temperature of the quantum
 thermal bath <i>T</i><sup>qm</sup> changes dynamically according to
 the following equation where &Delta;<i>t</i> is the MD time step and
 <i>&gamma;</i> is the friction constant which is equal to the inverse
 of the {damp} parameter.
 
 <center><font size="4"> <i>dT</i><sup>qm</sup>/<i>dt =
 &gamma;&eta;</i>&sum;<i><sup>&beta;</sup><sub>l =
 1</sub></i>[<i>etot</i>(<i>t-l</i>&Delta;<i>t</i>)-<i>etot</i><sub>0</sub>]/<i>3&beta;Nk</i><sub>B</sub>
 </font></center>
 
 The parameter {T_init} is the initial temperature of the quantum
 thermal bath and the system before shock loading.
 
 For all pressure styles, the simulation box stays orthorhombic in
 shape. Parrinello-Rahman boundary conditions (tilted box) are
 supported by LAMMPS, but are not implemented for QBMSST.
 
 :line
 
 [Restart, fix_modify, output, run start/stop, minimize info:]
 
 Because the state of the random number generator is not written to
 "binary restart files"_restart.html, this fix cannot be restarted
 "exactly" in an uninterrupted fashion. However, in a statistical
 sense, a restarted simulation should produce similar behaviors of the
 system as if it is not interrupted.  To achieve such a restart, one
 should write explicitly the same value for {q}, {mu}, {damp}, {f_max},
 {N_f}, {eta}, and {beta} and set {tscale} = 0 if the system is
 compressed during the first run.
 
 The progress of the QBMSST can be monitored by printing the global
 scalar and global vector quantities computed by the fix.  The global
 vector contains five values in this order:
 
 \[{dhugoniot}, {drayleigh}, {lagrangian_speed}, {lagrangian_position},
 {quantum_temperature}\]
 
 {dhugoniot} is the departure from the Hugoniot (temperature units).
 {drayleigh} is the departure from the Rayleigh line (pressure units).
 {lagrangian_speed} is the laboratory-frame Lagrangian speed (particle velocity) of the computational cell (velocity units).
 {lagrangian_position} is the computational cell position in the reference frame moving at the shock speed. This is the distance of the computational cell behind the shock front.
 {quantum_temperature} is the temperature of the quantum thermal bath <i>T</i><sup>qm</sup>. :ol
 
 To print these quantities to the log file with descriptive column
 headers, the following LAMMPS commands are suggested. Here the
 "fix_modify"_fix_modify.html energy command is also enabled to allow
 the thermo keyword {etotal} to print the quantity <i>etot</i>.  See
 also the "thermo_style"_thermo_style.html command.
 
 fix             fix_id all msst z
 fix_modify      fix_id energy yes
 variable        dhug    equal f_fix_id\[1\]
 variable        dray    equal f_fix_id\[2\]
 variable        lgr_vel equal f_fix_id\[3\]
 variable        lgr_pos equal f_fix_id\[4\]
 variable        T_qm    equal f_fix_id\[5\]
 thermo_style    custom  step temp ke pe lz pzz etotal v_dhug v_dray v_lgr_vel v_lgr_pos v_T_qm f_fix_id :pre
 
 The global scalar under the entry f_fix_id is the quantity of thermo
 energy as an extra part of <i>etot</i>. This global scalar and the
 vector of 5 quantities can be accessed by various "output
 commands"_Section_howto.html#howto_15. It is worth noting that the
 temp keyword under the "thermo_style"_thermo_style.html command print
 the instantaneous classical temperature <i>T</i><sup>cl</sup> as
 described in the command "fix qtb"_fix_qtb.html.
 
 :line
 
 [Restrictions:]
 
 This fix style is part of the USER-QTB package.  It is only enabled if
 LAMMPS was built with that package. See the "Making
 LAMMPS"_Section_start.html#start_3 section for more info.
 
 All cell dimensions must be periodic. This fix can not be used with a
 triclinic cell.  The QBMSST fix has been tested only for the group-ID
 all.
 
 :line
 
 [Related commands:]
 
 "fix qtb"_fix_qtb.html, "fix msst"_fix_msst.html
 
 :line
 
 [Default:]
 
 The keyword defaults are q = 10, mu = 0, tscale = 0.01, damp = 1, seed
 = 880302, f_max = 200.0, N_f = 100, eta = 1.0, beta = 100, and
 T_init=300.0. e0, p0, and v0 are calculated on the first step.
 
 :line
 
-:link(Goldman)
+:link(Goldman1)
 [(Goldman)] Goldman, Reed and Fried, J. Chem. Phys. 131, 204103 (2009)
 
 :link(Qi)
 [(Qi)] Qi and Reed, J. Phys. Chem. A 116, 10451 (2012).
diff --git a/doc/src/neigh_modify.txt b/doc/src/neigh_modify.txt
index 5c149d892..c4544cb29 100644
--- a/doc/src/neigh_modify.txt
+++ b/doc/src/neigh_modify.txt
@@ -1,217 +1,218 @@
 "LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c
 
 :link(lws,http://lammps.sandia.gov)
 :link(ld,Manual.html)
 :link(lc,Section_commands.html#comm)
 
 :line
 
 neigh_modify command :h3
 
 [Syntax:]
 
 neigh_modify keyword values ... :pre
 
 one or more keyword/value pairs may be listed :ulb,l
 keyword = {delay} or {every} or {check} or {once} or {cluster} or {include} or {exclude} or {page} or {one} or {binsize}
   {delay} value = N
     N = delay building until this many steps since last build
   {every} value = M
     M = build neighbor list every this many steps
   {check} value = {yes} or {no}
     {yes} = only build if some atom has moved half the skin distance or more
     {no} = always build on 1st step that {every} and {delay} are satisfied
   {once}
     {yes} = only build neighbor list once at start of run and never rebuild
     {no} = rebuild neighbor list according to other settings
   {cluster}
     {yes} = check bond,angle,etc neighbor list for nearby clusters
     {no} = do not check bond,angle,etc neighbor list for nearby clusters
   {include} value = group-ID
     group-ID = only build pair neighbor lists for atoms in this group
   {exclude} values:
     type M N
       M,N = exclude if one atom in pair is type M, other is type N
     group group1-ID group2-ID
       group1-ID,group2-ID = exclude if one atom is in 1st group, other in 2nd
     molecule/intra group-ID
       group-ID = exclude if both atoms are in the same molecule and in group
     molecule/inter group-ID
       group-ID = exclude if both atoms are in different molecules and in group
     none
       delete all exclude settings
   {page} value = N
     N = number of pairs stored in a single neighbor page
   {one} value = N
     N = max number of neighbors of one atom
   {binsize} value = size
     size = bin size for neighbor list construction (distance units) :pre
 :ule
 
 [Examples:]
 
 neigh_modify every 2 delay 10 check yes page 100000
 neigh_modify exclude type 2 3
 neigh_modify exclude group frozen frozen check no
 neigh_modify exclude group residue1 chain3
 neigh_modify exclude molecule/intra rigid :pre
 
 [Description:]
 
 This command sets parameters that affect the building and use of
 pairwise neighbor lists.  Depending on what pair interactions and
 other commands are defined, a simulation may require one or more
 neighbor lists.
 
 The {every}, {delay}, {check}, and {once} options affect how often
 lists are built as a simulation runs.  The {delay} setting means never
 build new lists until at least N steps after the previous build.  The
 {every} setting means build lists every M steps (after the delay has
 passed).  If the {check} setting is {no}, the lists are built on the
 first step that satisfies the {delay} and {every} settings.  If the
 {check} setting is {yes}, then the {every} and {delay} settings
 determine when a build may possibly be performed, but an actual build
 only occurs if some atom has moved more than half the skin distance
 (specified in the "neighbor"_neighbor.html command) since the last
 build.
 
 If the {once} setting is yes, then the neighbor list is only built
 once at the beginning of each run, and never rebuilt, except on steps
 when a restart file is written, or steps when a fix forces a rebuild
 to occur (e.g. fixes that create or delete atoms, such as "fix
 deposit"_fix_deposit.html or "fix evaporate"_fix_evaporate.html).
 This setting should only be made if you are certain atoms will not
 move far enough that the neighbor list should be rebuilt, e.g. running
 a simulation of a cold crystal.  Note that it is not that expensive to
 check if neighbor lists should be rebuilt.
 
 When the rRESPA integrator is used (see the "run_style"_run_style.html
 command), the {every} and {delay} parameters refer to the longest
 (outermost) timestep.
 
 The {cluster} option does a sanity test every time neighbor lists are
 built for bond, angle, dihedral, and improper interactions, to check
 that each set of 2, 3, or 4 atoms is a cluster of nearby atoms.  It
 does this by computing the distance between pairs of atoms in the
 interaction and insuring they are not further apart than half the
 periodic box length.  If they are, an error is generated, since the
 interaction would be computed between far-away atoms instead of their
 nearby periodic images.  The only way this should happen is if the
 pairwise cutoff is so short that atoms that are part of the same
 interaction are not communicated as ghost atoms.  This is an unusual
 model (e.g. no pair interactions at all) and the problem can be fixed
 by use of the "comm_modify cutoff"_comm_modify.html command.  Note
 that to save time, the default {cluster} setting is {no}, so that this
 check is not performed.
 
 The {include} option limits the building of pairwise neighbor lists to
 atoms in the specified group.  This can be useful for models where a
 large portion of the simulation is particles that do not interact with
 other particles or with each other via pairwise interactions.  The
 group specified with this option must also be specified via the
-"atom_modify first"_atom_modify.html command.
+"atom_modify first"_atom_modify.html command.  Note that specifying
+"all" as the group-ID effectively turns off the {include} option.
 
 The {exclude} option turns off pairwise interactions between certain
 pairs of atoms, by not including them in the neighbor list.  These are
 sample scenarios where this is useful:
 
 In crack simulations, pairwise interactions can be shut off between 2
 slabs of atoms to effectively create a crack. :ulb,l
 
 When a large collection of atoms is treated as frozen, interactions
 between those atoms can be turned off to save needless
 computation. E.g. Using the "fix setforce"_fix_setforce.html command
 to freeze a wall or portion of a bio-molecule. :l
 
 When one or more rigid bodies are specified, interactions within each
 body can be turned off to save needless computation.  See the "fix
 rigid"_fix_rigid.html command for more details. :l
 :ule
 
 The {exclude type} option turns off the pairwise interaction if one
 atom is of type M and the other of type N.  M can equal N.  The
 {exclude group} option turns off the interaction if one atom is in the
 first group and the other is the second.  Group1-ID can equal
 group2-ID.  The {exclude molecule/intra} option turns off the
 interaction if both atoms are in the specified group and in the same
 molecule, as determined by their molecule ID.  The {exclude
 molecule/inter} turns off the interaction between pairs of atoms that
 have different molecule IDs and are both in the specified group.
 
 Each of the exclude options can be specified multiple times.  The
 {exclude type} option is the most efficient option to use; it requires
 only a single check, no matter how many times it has been specified.
 The other exclude options are more expensive if specified multiple
 times; they require one check for each time they have been specified.
 
 Note that the exclude options only affect pairwise interactions; see
 the "delete_bonds"_delete_bonds.html command for information on
 turning off bond interactions.
 
 NOTE: Excluding pairwise interactions will not work correctly when
 also using a long-range solver via the
 "kspace_style"_kspace_style.html command.  LAMMPS will give a warning
 to this effect.  This is because the short-range pairwise interaction
 needs to subtract off a term from the total energy for pairs whose
 short-range interaction is excluded, to compensate for how the
 long-range solver treats the interaction.  This is done correctly for
 pairwise interactions that are excluded (or weighted) via the
 "special_bonds"_special_bonds.html command.  But it is not done for
 interactions that are excluded via these neigh_modify exclude options.
 
 The {page} and {one} options affect how memory is allocated for the
 neighbor lists.  For most simulations the default settings for these
 options are fine, but if a very large problem is being run or a very
 long cutoff is being used, these parameters can be tuned.  The indices
 of neighboring atoms are stored in "pages", which are allocated one
 after another as they fill up.  The size of each page is set by the
 {page} value.  A new page is allocated when the next atom's neighbors
 could potentially overflow the list.  This threshold is set by the
 {one} value which tells LAMMPS the maximum number of neighbor's one
 atom can have.
 
 NOTE: LAMMPS can crash without an error message if the number of
 neighbors for a single particle is larger than the {page} setting,
 which means it is much, much larger than the {one} setting.  This is
 because LAMMPS doesn't error check these limits for every pairwise
 interaction (too costly), but only after all the particle's neighbors
 have been found.  This problem usually means something is very wrong
 with the way you've setup your problem (particle spacing, cutoff
 length, neighbor skin distance, etc).  If you really expect that many
 neighbors per particle, then boost the {one} and {page} settings
 accordingly.
 
 The {binsize} option allows you to specify what size of bins will be
 used in neighbor list construction to sort and find neighboring atoms.
 By default, for "neighbor style bin"_neighbor.html, LAMMPS uses bins
 that are 1/2 the size of the maximum pair cutoff.  For "neighbor style
 multi"_neighbor.html, the bins are 1/2 the size of the minimum pair
 cutoff.  Typically these are good values values for minimizing the
 time for neighbor list construction.  This setting overrides the
 default.  If you make it too big, there is little overhead due to
 looping over bins, but more atoms are checked.  If you make it too
 small, the optimal number of atoms is checked, but bin overhead goes
 up.  If you set the binsize to 0.0, LAMMPS will use the default
 binsize of 1/2 the cutoff.
 
 [Restrictions:]
 
 If the "delay" setting is non-zero, then it must be a multiple of the
 "every" setting.
 
 The molecule/intra and molecule/inter exclude options can only be used
 with atom styles that define molecule IDs.
 
 The value of the {page} setting must be at least 10x larger than the
 {one} setting.  This insures neighbor pages are not mostly empty
 space.
 
 [Related commands:]
 
 "neighbor"_neighbor.html, "delete_bonds"_delete_bonds.html
 
 [Default:]
 
 The option defaults are delay = 10, every = 1, check = yes, once = no,
-cluster = no, include = all, exclude = none, page = 100000, one =
-2000, and binsize = 0.0.
+cluster = no, include = all (same as no include option defined),
+exclude = none, page = 100000, one = 2000, and binsize = 0.0.
diff --git a/doc/src/pair_kim.txt b/doc/src/pair_kim.txt
index 5a623e5ec..c5d910e27 100644
--- a/doc/src/pair_kim.txt
+++ b/doc/src/pair_kim.txt
@@ -1,118 +1,139 @@
 "LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c
 
 :link(lws,http://lammps.sandia.gov)
 :link(ld,Manual.html)
 :link(lc,Section_commands.html#comm)
 
 :line
 
 pair_style kim command :h3
 
 [Syntax:]
 
 pair_style kim virialmode model printflag :pre
 
 virialmode = KIMvirial or LAMMPSvirial
 model = name of KIM model (potential)
 printflag = 1/0 do or do not print KIM descriptor file, optional :ul
 
 [Examples:]
 
 pair_style kim KIMvirial model_Ar_P_Morse
 pair_coeff * * Ar Ar :pre
 
 pair_style kim KIMvirial model_Ar_P_Morse 1
 pair_coeff * * Ar Ar :pre
 
 [Description:]
 
 This pair style is a wrapper on the "Knowledge Base for Interatomic
-Models (KIM)"_https://openkim.org repository of interatomic potentials,
-so that they can be used by LAMMPS scripts.
+Models (OpenKIM)"_https://openkim.org repository of interatomic
+potentials, so that they can be used by LAMMPS scripts.
 
-In KIM lingo, a potential is a "model" and a model contains both the
-analytic formulas that define the potential as well as the parameters
-needed to run it for one or more materials, including coefficients and
-cutoffs.
+Note that in LAMMPS lingo, a KIM model driver is a pair style
+(e.g. EAM or Tersoff).  A KIM model is a pair style for a particular
+element or alloy and set of parameters, e.g. EAM for Cu with a
+specific EAM potential file.
+
+See the current list of "KIM model
+drivers"_https://openkim.org/kim-items/model-drivers/alphabetical.
+
+See the current list of all "KIM
+models"_https://openkim.org/kim-items/models/by-model-drivers
+
+See the list of "example KIM models"_https://openkim.org/kim-api which
+are included in the KIM library by default, in the "What is in the KIM
+API source package?" section.
+
+To use this pair style, you must first download and install the KIM
+API library from the "OpenKIM website"_https://openkim.org.  The "KIM
+section of Section packages"_Section_packages.html#KIM has
+instructions on how to do this with a simple make command, when
+building LAMMPS.
+
+See the examples/kim dir for an input script that uses a KIM model
+(potential) for Lennard-Jones.
+
+:line
 
 The argument {virialmode} determines how the global virial is
 calculated.  If {KIMvirial} is specified, the KIM model performs the
 global virial calculation (if it knows how).  If {LAMMPSvirial} is
 specified, LAMMPS computes the global virial using its fdotr mechanism.
 
 The argument {model} is the name of the KIM model for a specific
 potential as KIM defines it.  In principle, LAMMPS can invoke any KIM
 model.  You should get an error or warning message from either LAMMPS
 or KIM if there is an incompatibility.
 
 The argument {printflag} is optional.  If it is set to a non-zero
 value then a KIM descriptor file is printed when KIM is invoked.  This
 can be useful for debugging.  The default is to not print this file.
 
 Only a single pair_coeff command is used with the {kim} style which
 specifies the mapping of LAMMPS atom types to KIM elements.  This is
 done by specifying N additional arguments after the * * in the
 pair_coeff command, where N is the number of LAMMPS atom types:
 
 N element names = mapping of KIM elements to atom types :ul
 
 As an example, imagine the KIM model supports Si and C atoms.  If your
 LAMMPS simulation has 4 atom types and you want the 1st 3 to be Si,
 and the 4th to be C, you would use the following pair_coeff command:
 
 pair_coeff * * Si Si Si C :pre
 
 The 1st 2 arguments must be * * so as to span all LAMMPS atom types.
 The first three Si arguments map LAMMPS atom types 1,2,3 to Si as
 defined within KIM.  The final C argument maps LAMMPS atom type 4 to C
 as defined within KIM.  If a mapping value is specified as NULL, the
 mapping is not performed.  This can only be used when a {kim}
 potential is used as part of the {hybrid} pair style.  The NULL values
 are placeholders for atom types that will be used with other
 potentials.
 
 :line
 
 In addition to the usual LAMMPS error messages, the KIM library itself
 may generate errors, which should be printed to the screen.  In this
 case it is also useful to check the kim.log file for additional error
 information.  This file kim.log should be generated in the same
 directory where LAMMPS is running.
 
 To download, build, and install the KIM library on your system, see
 the lib/kim/README file.  Once you have done this and built LAMMPS
 with the KIM package installed you can run the example input scripts
 in examples/kim.
 
 :line
 
 [Mixing, shift, table, tail correction, restart, rRESPA info]:
 
 This pair style does not support the "pair_modify"_pair_modify.html
 mix, shift, table, and tail options.
 
 This pair style does not write its information to "binary restart
 files"_restart.html, since KIM stores the potential parameters.
 Thus, you need to re-specify the pair_style and pair_coeff commands in
 an input script that reads a restart file.
 
 This pair style can only be used via the {pair} keyword of the
 "run_style respa"_run_style.html command.  It does not support the
 {inner}, {middle}, {outer} keywords.
 
 :line
 
 [Restrictions:]
 
 This pair style is part of the KIM package.  It is only enabled if
 LAMMPS was built with that package.  See the "Making
 LAMMPS"_Section_start.html#start_3 section for more info.
 
 This current version of pair_style kim is compatible with the
 kim-api package version 1.6.0 and higher.
 
 [Related commands:]
 
 "pair_coeff"_pair_coeff.html
 
 [Default:] none
diff --git a/examples/README b/examples/README
index 0b037f5c3..dc622ef7c 100644
--- a/examples/README
+++ b/examples/README
@@ -1,170 +1,170 @@
 LAMMPS example problems
 
 There are 3 flavors of sub-directories in this file, each with sample
 problems you can run with LAMMPS.
 
 lower-case directories = simple test problems for LAMMPS and its packages
 upper-case directories = more complex problems
 USER directory with its own sub-directories = tests for USER packages
 
 Each is discussed below.
 
 ------------------------------------------
 
 Lower-case directories
 
 Each of these sub-directories contains a sample problem you can run
 with LAMMPS.  Most are 2d models so that they run quickly, requiring a
 few seconds to a few minutes to run on a desktop machine.  Each
 problem has an input script (in.*) and produces a log file (log.*) and
 (optionally) a dump file (dump.*) or image files (image.*) or movie
 (movie.mpg) when it runs.  Some use a data file (data.*) of initial
 coordinates as additional input.  Some require that you install one or
 more optional LAMMPS packages.
 
 A few sample log file outputs on different machines and different
 numbers of processors are included in the directories to compare your
 answers to.  E.g. a log file like log.crack.date.foo.P means it ran on
 P processors of machine "foo" with the dated version of LAMMPS.  Note
 that these problems should get statistically similar answers when run
 on different machines or different numbers of processors, but not
 identical answers to those in the log of dump files included here.
 See the Errors section of the LAMMPS documentation for more
 discussion.
 
 Most of the example input scripts have commented-out lines that
 produce dump snapshots of the running simulation in any of 3 formats.
 
 If you uncomment the dump command in the input script, a text dump
 file will be produced, which can be animated by various visualization
 programs (see http://lammps.sandia.gov/viz.html) such as Ovito, VMD,
 or AtomEye.
 
 If you uncomment the dump image command in the input script, and
 assuming you have built LAMMPS with a JPG library, JPG snapshot images
 will be produced when the simulation runs.  They can be quickly
 post-processed into a movie using commands described on the dump image
 doc page.
 
 If you uncomment the dump movie command in the input script, and
 assuming you have built LAMMPS with the FFMPEG library, an MPG movie
 will be produced when the simulation runs.  The movie file can be
 played using various viewers, such as mplayer or QuickTime.
 
 Animations of many of these examples can be viewed on the Movies
 section of the LAMMPS WWW Site.
 
 These are the sample problems and their output in the various
 sub-directories:
 
 accelerate: use of all the various accelerator packages
-airebo:   example for using AIREBO and AIREBO-M
+airebo:   polyethylene with AIREBO potential
 balance:  dynamic load balancing, 2d system
 body:     body particles, 2d system
 cmap:     CMAP 5-body contributions to CHARMM force field
 colloid:  big colloid particles in a small particle solvent, 2d system
 comb:	  models using the COMB potential
 coreshell: adiabatic core/shell model
 controller: use of fix controller as a thermostat
 crack:	  crack propagation in a 2d solid
 deposit:  deposition of atoms and molecules onto a 3d substrate
 dipole:   point dipolar particles, 2d system
 dreiding: methanol via Dreiding FF
 eim:      NaCl using the EIM potential
 ellipse:  ellipsoidal particles in spherical solvent, 2d system
 flow:	  Couette and Poiseuille flow in a 2d channel
 friction: frictional contact of spherical asperities between 2d surfaces
 gcmc:     Grand Canonical Monte Carlo (GCMC) via the fix gcmc command
 granregion: use of fix wall/region/gran as boundary on granular particles
 hugoniostat: Hugoniostat shock dynamics
 indent:	  spherical indenter into a 2d solid
 kim:      use of potentials in Knowledge Base for Interatomic Models (KIM)
 meam:	  MEAM test for SiC and shear (same as shear examples)
 melt:	  rapid melt of 3d LJ system
 micelle:  self-assembly of small lipid-like molecules into 2d bilayers
 min:	  energy minimization of 2d LJ melt
 mscg:     parameterize a multi-scale coarse-graining (MSCG) model
 msst:	  MSST shock dynamics
 nb3b:     use of nonbonded 3-body harmonic pair style
 neb:	  nudged elastic band (NEB) calculation for barrier finding
 nemd:	  non-equilibrium MD of 2d sheared system
 obstacle: flow around two voids in a 2d channel
 peptide:  dynamics of a small solvated peptide chain (5-mer)
 peri:	  Peridynamic model of cylinder impacted by indenter
 pour:     pouring of granular particles into a 3d box, then chute flow
 prd:      parallel replica dynamics of vacancy diffusion in bulk Si
 python:   use of PYTHON package to invoke Python code from input script
 qeq:      use of QEQ package for charge equilibration
 reax:     RDX and TATB models using the ReaxFF
 rigid:    rigid bodies modeled as independent or coupled
 shear:    sideways shear applied to 2d solid, with and without a void
 snap:     use of SNAP potential for Ta
 srd:      stochastic rotation dynamics (SRD) particles as solvent
 snap:     NVE dynamics for BCC tantalum crystal using SNAP potential
 streitz:  Streitz-Mintmire potential for Al2O3
 tad:      temperature-accelerated dynamics of vacancy diffusion in bulk Si
 vashishta: models using the Vashishta potential
 voronoi:  Voronoi tesselation via compute voronoi/atom command
 
 Here is how you might run and visualize one of the sample problems:
 
 cd indent
 cp ../../src/lmp_mpi .           # copy LAMMPS executable to this dir
 lmp_mpi -in in.indent              # run the problem
 
 Running the simulation produces the files {dump.indent} and
 {log.lammps}.  You can visualize the dump file as follows:
 
 ../../tools/xmovie/xmovie -scale dump.indent
 
 If you uncomment the dump image line(s) in the input script a series
 of JPG images will be produced by the run.  These can be viewed
 individually or turned into a movie or animated by tools like
 ImageMagick or QuickTime or various Windows-based tools.  See the dump
 image doc page for more details.  E.g. this Imagemagick command would
 create a GIF file suitable for viewing in a browser.
 
 % convert -loop 1 *.jpg foo.gif
 
 ------------------------------------------
 
 Upper-case directories
 
 The ASPHERE directory has examples of how to model aspherical
 particles with or without solvent, in 3 styles LAMMPS provides.
 Namely point ellipsoids, rigid bodies, and generalized aspherical
 bodies built from line/triangle surface facets in 2d/3d.  See the
 ASPHERE/README file to get started.
 
 The COUPLE directory has examples of how to use LAMMPS as a library,
 either by itself or in tandem with another code or library.  See the
 COUPLE/README file to get started.
 
 The ELASTIC directory has an example script for computing elastic
 constants at zero temperature, using an Si example.  See the
 ELASTIC/in.elastic file for more info.
 
 The ELASTIC_T directory has an example script for computing elastic
 constants at finite temperature, using an Si example.  See the
 ELASTIC_T/in.elastic file for more info.
 
 The HEAT directory has example scripts for heat exchange algorithms
 (e.g. used for establishing a thermal gradient), using two different
 methods.  See the HEAT/README file for more info.
 
 The KAPPA directory has example scripts for computing the thermal
 conductivity (kappa) of a LJ liquid using 5 different methods.  See
 the KAPPA/README file for more info.
 
 The MC directory has an example script for using LAMMPS as an
 energy-evaluation engine in a iterative Monte Carlo energy-relaxation
 loop.
 
 The USER directory contains subdirectories of user-provided example
 scripts for ser packages.  See the README files in those directories
 for more info.  See the doc/Section_start.html file for more info
 about installing and building user packages.
 
 The VISCOSITY directory has example scripts for computing the
 viscosity of a LJ liquid using 4 different methods.  See the
 VISCOSITY/README file for more info.
diff --git a/examples/USER/quip/in.gap b/examples/USER/quip/in.gap
index 37667e39b..dd049a473 100644
--- a/examples/USER/quip/in.gap
+++ b/examples/USER/quip/in.gap
@@ -1,22 +1,22 @@
 # Test of GAP potential for Si system
 
 units		metal
 boundary	p p p
 
 atom_style	atomic
 
 read_data	data_gap
 
 pair_style	quip
 pair_coeff	* * gap_example.xml "Potential xml_label=GAP_2015_2_20_0_10_54_35_765" 14
 
 neighbor	0.3 bin
 neigh_modify	delay 10
 
 fix		1 all nve
 thermo		10
 timestep	0.001
 
-dump		1 all custom 10 dump.gap id fx fy fz
+#dump		1 all custom 10 dump.gap id fx fy fz
 
 run		40
diff --git a/examples/USER/quip/in.molecular b/examples/USER/quip/in.molecular
index 24d21d676..4253399d7 100644
--- a/examples/USER/quip/in.molecular
+++ b/examples/USER/quip/in.molecular
@@ -1,48 +1,47 @@
 units metal
 atom_style full
 boundary p p p
-processors 1 1 1
 timestep 0.0001 # 0.1 fs
 
 read_data methane-box-8.data
 
 # DISCLAIMER: This potential mixes parameters from methane and silane
 # potentials and is NOT intended to be a realistic representation of either
 # system.  It is meant to demonstrate the use of hybrid QUIP/LAMMPS potentials,
 # including the use of separate 'special_bonds' settings.
 
 pair_style hybrid/overlay lj/cut 8.0 quip
 
 # exclusion setting for quip; cannot be exactly 1.0 1.0 1.0,
 # since that would not flag 1-2, 1-3, and 1-4 pairs in lj/cut
 special_bonds lj/coul 0.999999999 0.999999999 0.999999999
 
 # Intermolecular: OPLS (JACS 118 (45), p. 11225 (1996))
 # Coulomb interactions ommitted for simplicity
 pair_coeff 1 1 lj/cut 0.0028619844 3.5 # CT
 pair_coeff 2 2 lj/cut 0.0013009018 2.5 # HC
 pair_coeff 1 2 lj/cut 0.0019295487 2.95
 pair_modify shift no
 # change exclusion settings for lj/cut only: exclude bonded pairs
 pair_modify pair lj/cut special lj/coul 0.0 0.0 0.0
 
 # Intramolecular
 # Tell QUIP to pretend this is silane (which is covered by the parameter file)
-pair_coeff * * quip ip.parms.SW.xml "IP SW" 14 1
+pair_coeff * * quip sw_example.xml "IP SW" 14 1
 bond_style none
 angle_style none
 
 fix 1 all nve
 
 # Include diagnostics that allow us to compare to a pure QUIP run
 compute equip all pair quip
 compute evdw all pair lj/cut
 compute vir all pressure NULL virial
 
 thermo_style custom step epair ke etotal temp press c_vir c_evdw c_equip
 thermo 1
 
 # dump 1 all custom 1 dump.molecular id type x y z fx fy fz
 # dump_modify 1 sort id
 
 run 10
diff --git a/examples/USER/quip/in.sw b/examples/USER/quip/in.sw
index c1367ac80..aaa4217b2 100644
--- a/examples/USER/quip/in.sw
+++ b/examples/USER/quip/in.sw
@@ -1,22 +1,23 @@
 # Test of SW potential for Si system
 
 units		metal
 boundary	p p p
 
 atom_style	atomic
 
 read_data	data_sw
 
 pair_style	quip
 pair_coeff	* * sw_example.xml "IP SW" 14
 
+velocity        all create 10.0 355311
 neighbor	0.3 bin
 neigh_modify	delay 10
 
 fix		1 all nve
 thermo		10
 timestep	0.001
 
-dump		1 all custom 10 dump.sw id fx fy fz
+#dump		1 all custom 10 dump.sw id fx fy fz
 
-run		1
+run		100
diff --git a/examples/USER/quip/log.24Jul17.gap.g++.1 b/examples/USER/quip/log.24Jul17.gap.g++.1
new file mode 100644
index 000000000..348f2ae0c
--- /dev/null
+++ b/examples/USER/quip/log.24Jul17.gap.g++.1
@@ -0,0 +1,76 @@
+LAMMPS (24 Jul 2017)
+  using 1 OpenMP thread(s) per MPI task
+# Test of GAP potential for Si system
+
+units		metal
+boundary	p p p
+
+atom_style	atomic
+
+read_data	data_gap
+  orthogonal box = (0 0 0) to (10.9685 10.9685 10.9685)
+  1 by 1 by 1 MPI processor grid
+  reading atoms ...
+  64 atoms
+
+pair_style	quip
+pair_coeff	* * gap_example.xml "Potential xml_label=GAP_2015_2_20_0_10_54_35_765" 14
+
+neighbor	0.3 bin
+neigh_modify	delay 10
+
+fix		1 all nve
+thermo		10
+timestep	0.001
+
+#dump		1 all custom 10 dump.gap id fx fy fz
+
+run		40
+Neighbor list info ...
+  update every 1 steps, delay 10 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 4.3
+  ghost atom cutoff = 4.3
+  binsize = 2.15, bins = 6 6 6
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair quip, perpetual
+      attributes: full, newton on
+      pair build: full/bin/atomonly
+      stencil: full/bin/3d
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 2.689 | 2.689 | 2.689 Mbytes
+Step Temp E_pair E_mol TotEng Press 
+       0            0   -10412.677            0   -10412.677   -107490.01 
+      10    173.98393   -10414.096            0   -10412.679   -91270.969 
+      20    417.38493    -10416.08            0   -10412.681   -42816.133 
+      30    434.34789   -10416.217            0    -10412.68      2459.83 
+      40    423.05899   -10416.124            0   -10412.679    22936.209 
+Loop time of 1.83555 on 1 procs for 40 steps with 64 atoms
+
+Performance: 1.883 ns/day, 12.747 hours/ns, 21.792 timesteps/s
+98.1% CPU use with 1 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 1.8349     | 1.8349     | 1.8349     |   0.0 | 99.96
+Neigh   | 0.00022817 | 0.00022817 | 0.00022817 |   0.0 |  0.01
+Comm    | 0.00013709 | 0.00013709 | 0.00013709 |   0.0 |  0.01
+Output  | 9.8228e-05 | 9.8228e-05 | 9.8228e-05 |   0.0 |  0.01
+Modify  | 8.6308e-05 | 8.6308e-05 | 8.6308e-05 |   0.0 |  0.00
+Other   |            | 0.0001223  |            |       |  0.01
+
+Nlocal:    64 ave 64 max 64 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:    303 ave 303 max 303 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:    0 ave 0 max 0 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+FullNghs:  1080 ave 1080 max 1080 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 1080
+Ave neighs/atom = 16.875
+Neighbor list builds = 2
+Dangerous builds = 0
+Total wall time: 0:00:01
diff --git a/examples/USER/quip/log.24Jul17.gap.g++.4 b/examples/USER/quip/log.24Jul17.gap.g++.4
new file mode 100644
index 000000000..a8127148b
--- /dev/null
+++ b/examples/USER/quip/log.24Jul17.gap.g++.4
@@ -0,0 +1,76 @@
+LAMMPS (24 Jul 2017)
+  using 1 OpenMP thread(s) per MPI task
+# Test of GAP potential for Si system
+
+units		metal
+boundary	p p p
+
+atom_style	atomic
+
+read_data	data_gap
+  orthogonal box = (0 0 0) to (10.9685 10.9685 10.9685)
+  1 by 2 by 2 MPI processor grid
+  reading atoms ...
+  64 atoms
+
+pair_style	quip
+pair_coeff	* * gap_example.xml "Potential xml_label=GAP_2015_2_20_0_10_54_35_765" 14
+
+neighbor	0.3 bin
+neigh_modify	delay 10
+
+fix		1 all nve
+thermo		10
+timestep	0.001
+
+#dump		1 all custom 10 dump.gap id fx fy fz
+
+run		40
+Neighbor list info ...
+  update every 1 steps, delay 10 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 4.3
+  ghost atom cutoff = 4.3
+  binsize = 2.15, bins = 6 6 6
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair quip, perpetual
+      attributes: full, newton on
+      pair build: full/bin/atomonly
+      stencil: full/bin/3d
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 2.685 | 2.779 | 3.06 Mbytes
+Step Temp E_pair E_mol TotEng Press 
+       0            0   -10412.677            0   -10412.677   -107490.01 
+      10    173.98393   -10414.096            0   -10412.679   -91270.969 
+      20    417.38493    -10416.08            0   -10412.681   -42816.133 
+      30    434.34789   -10416.217            0    -10412.68      2459.83 
+      40    423.05899   -10416.124            0   -10412.679    22936.209 
+Loop time of 0.837345 on 4 procs for 40 steps with 64 atoms
+
+Performance: 4.127 ns/day, 5.815 hours/ns, 47.770 timesteps/s
+96.0% CPU use with 4 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0.73144    | 0.79214    | 0.83586    |   4.3 | 94.60
+Neigh   | 5.7936e-05 | 6.5327e-05 | 7.1049e-05 |   0.0 |  0.01
+Comm    | 0.00085807 | 0.044631   | 0.10532    |  18.0 |  5.33
+Output  | 0.00013208 | 0.00013494 | 0.00013733 |   0.0 |  0.02
+Modify  | 6.0558e-05 | 7.8678e-05 | 9.5129e-05 |   0.0 |  0.01
+Other   |            | 0.0002971  |            |       |  0.04
+
+Nlocal:    16 ave 18 max 14 min
+Histogram: 1 0 1 0 0 0 0 1 0 1
+Nghost:    174 ave 182 max 167 min
+Histogram: 1 0 0 0 2 0 0 0 0 1
+Neighs:    0 ave 0 max 0 min
+Histogram: 4 0 0 0 0 0 0 0 0 0
+FullNghs:  270 ave 294 max 237 min
+Histogram: 1 0 0 0 1 0 0 0 1 1
+
+Total # of neighbors = 1080
+Ave neighs/atom = 16.875
+Neighbor list builds = 2
+Dangerous builds = 0
+Total wall time: 0:00:00
diff --git a/examples/USER/quip/log.24Jul17.molecular.g++.1 b/examples/USER/quip/log.24Jul17.molecular.g++.1
new file mode 100644
index 000000000..28fc63579
--- /dev/null
+++ b/examples/USER/quip/log.24Jul17.molecular.g++.1
@@ -0,0 +1,130 @@
+LAMMPS (24 Jul 2017)
+  using 1 OpenMP thread(s) per MPI task
+units metal
+atom_style full
+boundary p p p
+timestep 0.0001 # 0.1 fs
+
+read_data methane-box-8.data
+  orthogonal box = (-0.499095 -0.270629 0.131683) to (8.4109 8.63937 9.04168)
+  1 by 1 by 1 MPI processor grid
+  reading atoms ...
+  40 atoms
+  scanning bonds ...
+  4 = max bonds/atom
+  scanning angles ...
+  6 = max angles/atom
+  reading bonds ...
+  32 bonds
+  reading angles ...
+  48 angles
+  4 = max # of 1-2 neighbors
+  3 = max # of 1-3 neighbors
+  3 = max # of 1-4 neighbors
+  4 = max # of special neighbors
+
+# DISCLAIMER: This potential mixes parameters from methane and silane
+# potentials and is NOT intended to be a realistic representation of either
+# system.  It is meant to demonstrate the use of hybrid QUIP/LAMMPS potentials,
+# including the use of separate 'special_bonds' settings.
+
+pair_style hybrid/overlay lj/cut 8.0 quip
+
+# exclusion setting for quip; cannot be exactly 1.0 1.0 1.0,
+# since that would not flag 1-2, 1-3, and 1-4 pairs in lj/cut
+special_bonds lj/coul 0.999999999 0.999999999 0.999999999
+  4 = max # of 1-2 neighbors
+  3 = max # of 1-3 neighbors
+  3 = max # of 1-4 neighbors
+  4 = max # of special neighbors
+
+# Intermolecular: OPLS (JACS 118 (45), p. 11225 (1996))
+# Coulomb interactions ommitted for simplicity
+pair_coeff 1 1 lj/cut 0.0028619844 3.5 # CT
+pair_coeff 2 2 lj/cut 0.0013009018 2.5 # HC
+pair_coeff 1 2 lj/cut 0.0019295487 2.95
+pair_modify shift no
+# change exclusion settings for lj/cut only: exclude bonded pairs
+pair_modify pair lj/cut special lj/coul 0.0 0.0 0.0
+
+# Intramolecular
+# Tell QUIP to pretend this is silane (which is covered by the parameter file)
+pair_coeff * * quip sw_example.xml "IP SW" 14 1
+bond_style none
+angle_style none
+
+fix 1 all nve
+
+# Include diagnostics that allow us to compare to a pure QUIP run
+compute equip all pair quip
+compute evdw all pair lj/cut
+compute vir all pressure NULL virial
+
+thermo_style custom step epair ke etotal temp press c_vir c_evdw c_equip
+thermo 1
+
+# dump 1 all custom 1 dump.molecular id type x y z fx fy fz
+# dump_modify 1 sort id
+
+run 10
+Neighbor list info ...
+  update every 1 steps, delay 10 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 10
+  ghost atom cutoff = 10
+  binsize = 5, bins = 2 2 2
+  2 neighbor lists, perpetual/occasional/extra = 2 0 0
+  (1) pair lj/cut, perpetual, half/full from (2)
+      attributes: half, newton on
+      pair build: halffull/newton
+      stencil: none
+      bin: none
+  (2) pair quip, perpetual
+      attributes: full, newton on
+      pair build: full/bin
+      stencil: full/bin/3d
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 8.288 | 8.288 | 8.288 Mbytes
+Step E_pair KinEng TotEng Temp Press c_vir c_evdw c_equip 
+       0   -5.3530213            0   -5.3530213            0    518847.56    518847.56  -0.10904079   -5.2439805 
+       1   -5.9384459   0.58384822   -5.3545977    115.81657     517370.5    516488.87  -0.10783656   -5.8306093 
+       2    -7.669616    2.3104051   -5.3592109    458.30954    512986.36    509497.58  -0.10422283   -7.5653932 
+       3   -10.473314    5.1069211   -5.3663924    1013.0477    505833.04    498121.43 -0.098049469   -10.375264 
+       4   -14.234705     8.859182   -5.3755227    1757.3747    496127.44    482749.79 -0.089147485   -14.145557 
+       5   -18.806851    13.420941   -5.3859098      2662.28    484148.76    463882.72 -0.077305196   -18.729546 
+       6   -24.021727    18.625147   -5.3965797    3694.6259    470219.95    442095.39  -0.06194509   -23.959782 
+       7   -29.702647    24.295529   -5.4071176     4819.446    454683.57    417996.56 -0.042859727   -29.659787 
+       8    -35.67405    30.257258   -5.4167913    6002.0599    437887.03    392197.62 -0.019248651   -35.654801 
+       9   -41.771047    36.345757   -5.4252893    7209.8209    420163.51    365280.27 0.0096063065   -41.780653 
+      10   -47.845522    42.413161   -5.4323614    8413.3973    401821.91     337776.7  0.044743702   -47.890266 
+Loop time of 0.0537777 on 1 procs for 10 steps with 40 atoms
+
+Performance: 1.607 ns/day, 14.938 hours/ns, 185.951 timesteps/s
+90.3% CPU use with 1 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0.053478   | 0.053478   | 0.053478   |   0.0 | 99.44
+Bond    | 1.9073e-06 | 1.9073e-06 | 1.9073e-06 |   0.0 |  0.00
+Neigh   | 0          | 0          | 0          |   0.0 |  0.00
+Comm    | 7.7724e-05 | 7.7724e-05 | 7.7724e-05 |   0.0 |  0.14
+Output  | 0.00018263 | 0.00018263 | 0.00018263 |   0.0 |  0.34
+Modify  | 1.5974e-05 | 1.5974e-05 | 1.5974e-05 |   0.0 |  0.03
+Other   |            | 2.122e-05  |            |       |  0.04
+
+Nlocal:    40 ave 40 max 40 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:    1175 ave 1175 max 1175 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:    4768 ave 4768 max 4768 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+FullNghs:  9536 ave 9536 max 9536 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 9536
+Ave neighs/atom = 238.4
+Ave special neighs/atom = 4
+Neighbor list builds = 0
+Dangerous builds = 0
+Total wall time: 0:00:00
diff --git a/examples/USER/quip/log.24Jul17.molecular.g++.4 b/examples/USER/quip/log.24Jul17.molecular.g++.4
new file mode 100644
index 000000000..a8be8e77b
--- /dev/null
+++ b/examples/USER/quip/log.24Jul17.molecular.g++.4
@@ -0,0 +1,130 @@
+LAMMPS (24 Jul 2017)
+  using 1 OpenMP thread(s) per MPI task
+units metal
+atom_style full
+boundary p p p
+timestep 0.0001 # 0.1 fs
+
+read_data methane-box-8.data
+  orthogonal box = (-0.499095 -0.270629 0.131683) to (8.4109 8.63937 9.04168)
+  1 by 2 by 2 MPI processor grid
+  reading atoms ...
+  40 atoms
+  scanning bonds ...
+  4 = max bonds/atom
+  scanning angles ...
+  6 = max angles/atom
+  reading bonds ...
+  32 bonds
+  reading angles ...
+  48 angles
+  4 = max # of 1-2 neighbors
+  3 = max # of 1-3 neighbors
+  3 = max # of 1-4 neighbors
+  4 = max # of special neighbors
+
+# DISCLAIMER: This potential mixes parameters from methane and silane
+# potentials and is NOT intended to be a realistic representation of either
+# system.  It is meant to demonstrate the use of hybrid QUIP/LAMMPS potentials,
+# including the use of separate 'special_bonds' settings.
+
+pair_style hybrid/overlay lj/cut 8.0 quip
+
+# exclusion setting for quip; cannot be exactly 1.0 1.0 1.0,
+# since that would not flag 1-2, 1-3, and 1-4 pairs in lj/cut
+special_bonds lj/coul 0.999999999 0.999999999 0.999999999
+  4 = max # of 1-2 neighbors
+  3 = max # of 1-3 neighbors
+  3 = max # of 1-4 neighbors
+  4 = max # of special neighbors
+
+# Intermolecular: OPLS (JACS 118 (45), p. 11225 (1996))
+# Coulomb interactions ommitted for simplicity
+pair_coeff 1 1 lj/cut 0.0028619844 3.5 # CT
+pair_coeff 2 2 lj/cut 0.0013009018 2.5 # HC
+pair_coeff 1 2 lj/cut 0.0019295487 2.95
+pair_modify shift no
+# change exclusion settings for lj/cut only: exclude bonded pairs
+pair_modify pair lj/cut special lj/coul 0.0 0.0 0.0
+
+# Intramolecular
+# Tell QUIP to pretend this is silane (which is covered by the parameter file)
+pair_coeff * * quip sw_example.xml "IP SW" 14 1
+bond_style none
+angle_style none
+
+fix 1 all nve
+
+# Include diagnostics that allow us to compare to a pure QUIP run
+compute equip all pair quip
+compute evdw all pair lj/cut
+compute vir all pressure NULL virial
+
+thermo_style custom step epair ke etotal temp press c_vir c_evdw c_equip
+thermo 1
+
+# dump 1 all custom 1 dump.molecular id type x y z fx fy fz
+# dump_modify 1 sort id
+
+run 10
+Neighbor list info ...
+  update every 1 steps, delay 10 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 10
+  ghost atom cutoff = 10
+  binsize = 5, bins = 2 2 2
+  2 neighbor lists, perpetual/occasional/extra = 2 0 0
+  (1) pair lj/cut, perpetual, half/full from (2)
+      attributes: half, newton on
+      pair build: halffull/newton
+      stencil: none
+      bin: none
+  (2) pair quip, perpetual
+      attributes: full, newton on
+      pair build: full/bin
+      stencil: full/bin/3d
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 8.26 | 8.386 | 8.762 Mbytes
+Step E_pair KinEng TotEng Temp Press c_vir c_evdw c_equip 
+       0   -5.3530213            0   -5.3530213            0    518847.56    518847.56  -0.10904079   -5.2439805 
+       1   -5.9384459   0.58384822   -5.3545977    115.81657     517370.5    516488.87  -0.10783656   -5.8306093 
+       2    -7.669616    2.3104051   -5.3592109    458.30954    512986.36    509497.58  -0.10422283   -7.5653932 
+       3   -10.473314    5.1069211   -5.3663924    1013.0477    505833.04    498121.43 -0.098049469   -10.375264 
+       4   -14.234705     8.859182   -5.3755227    1757.3747    496127.44    482749.79 -0.089147485   -14.145557 
+       5   -18.806851    13.420941   -5.3859098      2662.28    484148.76    463882.72 -0.077305196   -18.729546 
+       6   -24.021727    18.625147   -5.3965797    3694.6259    470219.95    442095.39  -0.06194509   -23.959782 
+       7   -29.702647    24.295529   -5.4071176     4819.446    454683.57    417996.56 -0.042859727   -29.659787 
+       8    -35.67405    30.257258   -5.4167913    6002.0599    437887.03    392197.62 -0.019248651   -35.654801 
+       9   -41.771047    36.345757   -5.4252893    7209.8209    420163.51    365280.27 0.0096063065   -41.780653 
+      10   -47.845522    42.413161   -5.4323614    8413.3973    401821.91     337776.7  0.044743702   -47.890266 
+Loop time of 0.0506847 on 4 procs for 10 steps with 40 atoms
+
+Performance: 1.705 ns/day, 14.079 hours/ns, 197.298 timesteps/s
+94.4% CPU use with 4 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0.04216    | 0.045656   | 0.049349   |   1.2 | 90.08
+Bond    | 1.9073e-06 | 2.4438e-06 | 2.861e-06  |   0.0 |  0.00
+Neigh   | 0          | 0          | 0          |   0.0 |  0.00
+Comm    | 0.00068545 | 0.004438   | 0.0079191  |   3.9 |  8.76
+Output  | 0.00048304 | 0.00053334 | 0.00060964 |   0.0 |  1.05
+Modify  | 1.1444e-05 | 1.4424e-05 | 1.9312e-05 |   0.0 |  0.03
+Other   |            | 4.047e-05  |            |       |  0.08
+
+Nlocal:    10 ave 15 max 6 min
+Histogram: 1 0 0 1 1 0 0 0 0 1
+Nghost:    878 ave 948 max 812 min
+Histogram: 1 0 1 0 0 0 1 0 0 1
+Neighs:    1192 ave 1764 max 731 min
+Histogram: 1 0 0 1 1 0 0 0 0 1
+FullNghs:  2384 ave 3527 max 1439 min
+Histogram: 1 0 0 1 1 0 0 0 0 1
+
+Total # of neighbors = 9536
+Ave neighs/atom = 238.4
+Ave special neighs/atom = 4
+Neighbor list builds = 0
+Dangerous builds = 0
+Total wall time: 0:00:00
diff --git a/examples/USER/quip/log.24Jul17.sw.g++.1 b/examples/USER/quip/log.24Jul17.sw.g++.1
new file mode 100644
index 000000000..c8115f4cf
--- /dev/null
+++ b/examples/USER/quip/log.24Jul17.sw.g++.1
@@ -0,0 +1,83 @@
+LAMMPS (24 Jul 2017)
+  using 1 OpenMP thread(s) per MPI task
+# Test of SW potential for Si system
+
+units		metal
+boundary	p p p
+
+atom_style	atomic
+
+read_data	data_sw
+  orthogonal box = (0 0 0) to (5.431 5.431 5.431)
+  1 by 1 by 1 MPI processor grid
+  reading atoms ...
+  8 atoms
+
+pair_style	quip
+pair_coeff	* * sw_example.xml "IP SW" 14
+
+velocity        all create 10.0 355311
+neighbor	0.3 bin
+neigh_modify	delay 10
+
+fix		1 all nve
+thermo		10
+timestep	0.001
+
+#dump		1 all custom 10 dump.sw id fx fy fz
+
+run		100
+Neighbor list info ...
+  update every 1 steps, delay 10 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 4.2258
+  ghost atom cutoff = 4.2258
+  binsize = 2.1129, bins = 3 3 3
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair quip, perpetual
+      attributes: full, newton on
+      pair build: full/bin/atomonly
+      stencil: full/bin/3d
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 2.684 | 2.684 | 2.684 Mbytes
+Step Temp E_pair E_mol TotEng Press 
+       0           10       -34.68            0   -34.670952    32.206289 
+      10    4.5659178   -34.675073            0   -34.670942    46.253731 
+      20     1.606683   -34.672391            0   -34.670937    44.736892 
+      30    6.7007748   -34.677011            0   -34.670948    16.403049 
+      40     5.682757   -34.676087            0   -34.670945    18.696408 
+      50    2.2140716   -34.672942            0   -34.670939    37.592282 
+      60    5.0475382   -34.675512            0   -34.670944    37.331666 
+      70    7.0990979   -34.677369            0   -34.670946    40.533757 
+      80    5.7306189   -34.676128            0   -34.670943    47.748813 
+      90    5.0895648   -34.675549            0   -34.670944    38.092721 
+     100    4.1070919   -34.674659            0   -34.670943    28.737864 
+Loop time of 0.384233 on 1 procs for 100 steps with 8 atoms
+
+Performance: 22.486 ns/day, 1.067 hours/ns, 260.259 timesteps/s
+94.6% CPU use with 1 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0.38365    | 0.38365    | 0.38365    |   0.0 | 99.85
+Neigh   | 0          | 0          | 0          |   0.0 |  0.00
+Comm    | 0.00017333 | 0.00017333 | 0.00017333 |   0.0 |  0.05
+Output  | 0.00014162 | 0.00014162 | 0.00014162 |   0.0 |  0.04
+Modify  | 7.081e-05  | 7.081e-05  | 7.081e-05  |   0.0 |  0.02
+Other   |            | 0.0001957  |            |       |  0.05
+
+Nlocal:    8 ave 8 max 8 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:    162 ave 162 max 162 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:    0 ave 0 max 0 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+FullNghs:  128 ave 128 max 128 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 128
+Ave neighs/atom = 16
+Neighbor list builds = 0
+Dangerous builds = 0
+Total wall time: 0:00:00
diff --git a/examples/USER/quip/log.24Jul17.sw.g++.4 b/examples/USER/quip/log.24Jul17.sw.g++.4
new file mode 100644
index 000000000..d7306c705
--- /dev/null
+++ b/examples/USER/quip/log.24Jul17.sw.g++.4
@@ -0,0 +1,83 @@
+LAMMPS (24 Jul 2017)
+  using 1 OpenMP thread(s) per MPI task
+# Test of SW potential for Si system
+
+units		metal
+boundary	p p p
+
+atom_style	atomic
+
+read_data	data_sw
+  orthogonal box = (0 0 0) to (5.431 5.431 5.431)
+  1 by 2 by 2 MPI processor grid
+  reading atoms ...
+  8 atoms
+
+pair_style	quip
+pair_coeff	* * sw_example.xml "IP SW" 14
+
+velocity        all create 10.0 355311
+neighbor	0.3 bin
+neigh_modify	delay 10
+
+fix		1 all nve
+thermo		10
+timestep	0.001
+
+#dump		1 all custom 10 dump.sw id fx fy fz
+
+run		100
+Neighbor list info ...
+  update every 1 steps, delay 10 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 4.2258
+  ghost atom cutoff = 4.2258
+  binsize = 2.1129, bins = 3 3 3
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair quip, perpetual
+      attributes: full, newton on
+      pair build: full/bin/atomonly
+      stencil: full/bin/3d
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 2.698 | 2.698 | 2.698 Mbytes
+Step Temp E_pair E_mol TotEng Press 
+       0           10       -34.68            0   -34.670952    32.206289 
+      10    4.5659178   -34.675073            0   -34.670942    46.253731 
+      20     1.606683   -34.672391            0   -34.670937    44.736892 
+      30    6.7007748   -34.677011            0   -34.670948    16.403049 
+      40     5.682757   -34.676087            0   -34.670945    18.696408 
+      50    2.2140716   -34.672942            0   -34.670939    37.592282 
+      60    5.0475382   -34.675512            0   -34.670944    37.331666 
+      70    7.0990979   -34.677369            0   -34.670946    40.533757 
+      80    5.7306189   -34.676128            0   -34.670943    47.748813 
+      90    5.0895648   -34.675549            0   -34.670944    38.092721 
+     100    4.1070919   -34.674659            0   -34.670943    28.737864 
+Loop time of 0.423803 on 4 procs for 100 steps with 8 atoms
+
+Performance: 20.387 ns/day, 1.177 hours/ns, 235.959 timesteps/s
+90.6% CPU use with 4 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0.39332    | 0.40011    | 0.40704    |   0.8 | 94.41
+Neigh   | 0          | 0          | 0          |   0.0 |  0.00
+Comm    | 0.015632   | 0.022605   | 0.029425   |   3.3 |  5.33
+Output  | 0.00025702 | 0.00028491 | 0.00035429 |   0.0 |  0.07
+Modify  | 7.3671e-05 | 8.1897e-05 | 8.9884e-05 |   0.0 |  0.02
+Other   |            | 0.0007259  |            |       |  0.17
+
+Nlocal:    2 ave 2 max 2 min
+Histogram: 4 0 0 0 0 0 0 0 0 0
+Nghost:    113 ave 113 max 113 min
+Histogram: 4 0 0 0 0 0 0 0 0 0
+Neighs:    0 ave 0 max 0 min
+Histogram: 4 0 0 0 0 0 0 0 0 0
+FullNghs:  32 ave 32 max 32 min
+Histogram: 4 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 128
+Ave neighs/atom = 16
+Neighbor list builds = 0
+Dangerous builds = 0
+Total wall time: 0:00:00
diff --git a/lib/.gitignore b/lib/.gitignore
index e153da2c3..cbeae7705 100644
--- a/lib/.gitignore
+++ b/lib/.gitignore
@@ -1 +1,2 @@
 Makefile.lammps
+.depend
diff --git a/lib/Install.py b/lib/Install.py
index 18b426f92..6b9025433 100644
--- a/lib/Install.py
+++ b/lib/Install.py
@@ -1,82 +1,92 @@
 #!/usr/bin/env python
 
 # install.py tool to do a generic build of a library
 # soft linked to by many of the lib/Install.py files
 # used to automate the steps described in the corresponding lib/README
 
-import sys,commands,os
+from __future__ import print_function
+import sys,os,subprocess
 
 # help message
 
 help = """
-Syntax: python Install.py -m machine -e suffix
-  specify -m and optionally -e, order does not matter
+Syntax from src dir: make lib-libname args="-m machine -e suffix"
+Syntax from lib dir: python Install.py -m machine -e suffix
+
+libname = name of lib dir (e.g. atc, h5md, meam, poems, etc)
+specify -m and optionally -e, order does not matter
+
   -m = peform a clean followed by "make -f Makefile.machine"
        machine = suffix of a lib/Makefile.* file
   -e = set EXTRAMAKE variable in Makefile.machine to Makefile.lammps.suffix
        does not alter existing Makefile.machine
+
+Examples:
+
+make lib-poems args="-m g++"    # build COLVARS lib with GNU g++ compiler
+make lib-meam args="-m ifort"   # build MEAM lib with Intel ifort compiler
 """
 
 # print error message or help
 
 def error(str=None):
-  if not str: print help
-  else: print "ERROR",str
+  if not str: print(help)
+  else: print("ERROR",str)
   sys.exit()
 
 # parse args
 
 args = sys.argv[1:]
 nargs = len(args)
 if nargs == 0: error()
 
 machine = None
 extraflag = 0
 
 iarg = 0
 while iarg < nargs:
   if args[iarg] == "-m":
     if iarg+2 > nargs: error()
     machine = args[iarg+1]
-    iarg += 2  
+    iarg += 2
   elif args[iarg] == "-e":
     if iarg+2 > nargs: error()
     extraflag = 1
     suffix = args[iarg+1]
-    iarg += 2  
+    iarg += 2
   else: error()
 
 # set lib from working dir
 
 cwd = os.getcwd()
 lib = os.path.basename(cwd)
 
 # create Makefile.auto as copy of Makefile.machine
 # reset EXTRAMAKE if requested
-  
+
 if not os.path.exists("Makefile.%s" % machine):
   error("lib/%s/Makefile.%s does not exist" % (lib,machine))
 
 lines = open("Makefile.%s" % machine,'r').readlines()
 fp = open("Makefile.auto",'w')
 
 for line in lines:
   words = line.split()
   if len(words) == 3 and extraflag and \
         words[0] == "EXTRAMAKE" and words[1] == '=':
     line = line.replace(words[2],"Makefile.lammps.%s" % suffix)
   print >>fp,line,
 
 fp.close()
 
 # make the library via Makefile.auto
 
-print "Building lib%s.a ..." % lib
+print("Building lib%s.a ..." % lib)
 cmd = "make -f Makefile.auto clean; make -f Makefile.auto"
-txt = commands.getoutput(cmd)
-print txt
+txt = subprocess.check_output(cmd,shell=True,stderr=subprocess.STDOUT)
+print(txt)
 
-if os.path.exists("lib%s.a" % lib): print "Build was successful"
+if os.path.exists("lib%s.a" % lib): print("Build was successful")
 else: error("Build of lib/%s/lib%s.a was NOT successful" % (lib,lib))
 if not os.path.exists("Makefile.lammps"):
-  print "lib/%s/Makefile.lammps was NOT created" % lib
+  print("lib/%s/Makefile.lammps was NOT created" % lib)
diff --git a/lib/atc/Install.py b/lib/atc/Install.py
index 37041d2ea..ffe709d44 120000
--- a/lib/atc/Install.py
+++ b/lib/atc/Install.py
@@ -1 +1 @@
-Install.py
\ No newline at end of file
+../Install.py
\ No newline at end of file
diff --git a/lib/atc/Makefile.g++ b/lib/atc/Makefile.g++
index d15e6cb3b..bb3028392 100644
--- a/lib/atc/Makefile.g++
+++ b/lib/atc/Makefile.g++
@@ -1,51 +1,57 @@
+# library build -*- makefile -*-
 SHELL = /bin/sh
 
 # which file will be copied to Makefile.lammps
 
 EXTRAMAKE = Makefile.lammps.installed
 
 # ------ FILES ------
+
 SRC = $(wildcard *.cpp)
 INC = $(wildcard *.h)
 
 # ------ DEFINITIONS ------
 
 LIB = libatc.a
 OBJ =   $(SRC:.cpp=.o)
 
 # ------ SETTINGS ------
 
 # include any MPI settings needed for the ATC library to build with
 # must be the same MPI library that LAMMPS is built with
 
 CC =	        g++
 CCFLAGS =       -O -g -fPIC -I../../src -DMPICH_IGNORE_CXX_SEEK
 ARCHIVE =	ar
 ARCHFLAG =	-rc
 DEPFLAGS =      -M
 LINK =         	g++
 LINKFLAGS =	-O
 USRLIB =
 SYSLIB =
 
 # ------ MAKE PROCEDURE ------
 
 lib: 	$(OBJ)
 	$(ARCHIVE) $(ARFLAGS) $(LIB) $(OBJ)
 	@cp $(EXTRAMAKE) Makefile.lammps
 
 # ------ COMPILE RULES ------
 
 %.o:%.cpp
 	$(CC) $(CCFLAGS) -c $<
 %.d:%.cpp
 	$(CC) $(CCFLAGS) $(DEPFLAGS) $< > $@		
 
 # ------ DEPENDENCIES ------
 
 DEPENDS = $(OBJ:.o=.d)
 
 # ------ CLEAN ------
 
+.PHONY: clean lib
+
 clean:
 	-rm *.o *.d *~ $(LIB)
+
+sinclude $(DEPENDS)
diff --git a/lib/atc/Makefile.lammps b/lib/atc/Makefile.lammps
deleted file mode 100644
index c8cd66af2..000000000
--- a/lib/atc/Makefile.lammps
+++ /dev/null
@@ -1,5 +0,0 @@
-# Settings that the LAMMPS build will import when this package library is used
-
-user-atc_SYSINC =
-user-atc_SYSLIB = -lblas -llapack
-user-atc_SYSPATH =
diff --git a/lib/atc/Makefile.mingw32-cross b/lib/atc/Makefile.mingw32-cross
deleted file mode 100644
index 8b3354098..000000000
--- a/lib/atc/Makefile.mingw32-cross
+++ /dev/null
@@ -1,67 +0,0 @@
-# library build -*- makefile -*-
-SHELL = /bin/sh
-
-# which file will be copied to Makefile.lammps
-EXTRAMAKE = Makefile.lammps.linalg
-
-# ------ FILES ------
-
-SRC = $(wildcard *.cpp)
-INC = $(wildcard *.h)
-
-# ------ DEFINITIONS ------
-
-DIR = Obj_mingw32/
-LIB = $(DIR)libatc.a
-OBJ = $(SRC:%.cpp=$(DIR)%.o)
-
-# ------ SETTINGS ------
-
-# include any MPI settings needed for the ATC library to build with
-# the same MPI library that LAMMPS is built with
-
-CC =		i686-w64-mingw32-g++
-CCFLAGS =	-I../../src -I../../src/STUBS -DMPICH_IGNORE_CXX_SEEK	\
-		-O3 -march=i686 -mtune=generic -mfpmath=387 -mpc64	\
-                -ffast-math -funroll-loops -fstrict-aliasing 	\
-		-DLAMMPS_SMALLSMALL -Wno-uninitialized
-ARCHIVE =	i686-w64-mingw32-ar
-ARCHFLAG =	-rcs
-DEPFLAGS =      -M
-LINK =         	$(CC)
-LINKFLAGS =	-O
-USRLIB =
-SYSLIB =
-
-# ------ MAKE PROCEDURE ------
-
-default: $(DIR) $(LIB) Makefile.lammps
-
-$(DIR):
-	mkdir $(DIR)
-
-Makefile.lammps:
-	@cp $(EXTRAMAKE) Makefile.lammps
-
-$(LIB): $(OBJ)
-	$(ARCHIVE) $(ARFLAGS) $(LIB) $(OBJ)
-	@cp $(EXTRAMAKE) Makefile.lammps
-
-# ------ COMPILE RULES ------
-
-$(DIR)%.o:%.cpp
-	$(CC) $(CCFLAGS) -c $< -o $@
-$(DIR)%.d:%.cpp
-	$(CC) $(CCFLAGS) $(DEPFLAGS) $< > $@		
-
-# ------ DEPENDENCIES ------
-
-DEPENDS = $(OBJ:.o=.d)
-
-# ------ CLEAN ------
-
-clean:
-	-rm $(DIR)*.o $(DIR)*.d *~ $(LIB)
-
-$(DEPENDS) : $(DIR)
-sinclude $(DEPENDS)
diff --git a/lib/atc/Makefile.mingw32-cross-mpi b/lib/atc/Makefile.mingw32-cross-mpi
deleted file mode 100644
index c5feeca81..000000000
--- a/lib/atc/Makefile.mingw32-cross-mpi
+++ /dev/null
@@ -1,68 +0,0 @@
-# library build -*- makefile -*-
-SHELL = /bin/sh
-
-# which file will be copied to Makefile.lammps
-EXTRAMAKE = Makefile.lammps.linalg
-
-# ------ FILES ------
-
-SRC = $(wildcard *.cpp)
-INC = $(wildcard *.h)
-
-# ------ DEFINITIONS ------
-
-DIR = Obj_mingw32-mpi/
-LIB = $(DIR)libatc.a
-OBJ = $(SRC:%.cpp=$(DIR)%.o)
-
-# ------ SETTINGS ------
-
-# include any MPI settings needed for the ATC library to build with
-# the same MPI library that LAMMPS is built with
-
-CC =		i686-w64-mingw32-g++
-CCFLAGS =	-I../../tools/mingw-cross/mpich2-win32/include/		\
-		-I../../src -DMPICH_IGNORE_CXX_SEEK	\
-		-O3 -march=i686 -mtune=generic -mfpmath=387 -mpc64	\
-                -ffast-math -funroll-loops -fstrict-aliasing 	\
-		-DLAMMPS_SMALLSMALL -Wno-uninitialized
-ARCHIVE =	i686-w64-mingw32-ar
-ARCHFLAG =	-rcs
-DEPFLAGS =      -M
-LINK =         	$(CC)
-LINKFLAGS =	-O
-USRLIB =
-SYSLIB =
-
-# ------ MAKE PROCEDURE ------
-
-default: $(DIR) $(LIB) Makefile.lammps
-
-$(DIR):
-	mkdir $(DIR)
-
-Makefile.lammps:
-	@cp $(EXTRAMAKE) Makefile.lammps
-
-$(LIB): $(OBJ)
-	$(ARCHIVE) $(ARFLAGS) $(LIB) $(OBJ)
-	@cp $(EXTRAMAKE) Makefile.lammps
-
-# ------ COMPILE RULES ------
-
-$(DIR)%.o:%.cpp
-	$(CC) $(CCFLAGS) -c $< -o $@
-$(DIR)%.d:%.cpp
-	$(CC) $(CCFLAGS) $(DEPFLAGS) $< > $@		
-
-# ------ DEPENDENCIES ------
-
-DEPENDS = $(OBJ:.o=.d)
-
-# ------ CLEAN ------
-
-clean:
-	-rm $(DIR)*.o $(DIR)*.d *~ $(LIB)
-
-$(DEPENDS) : $(DIR)
-sinclude $(DEPENDS)
diff --git a/lib/atc/Makefile.mingw64-cross b/lib/atc/Makefile.mingw64-cross
deleted file mode 100644
index fbd3a0261..000000000
--- a/lib/atc/Makefile.mingw64-cross
+++ /dev/null
@@ -1,67 +0,0 @@
-# library build -*- makefile -*-
-SHELL = /bin/sh
-
-# which file will be copied to Makefile.lammps
-EXTRAMAKE = Makefile.lammps.linalg
-
-# ------ FILES ------
-
-SRC = $(wildcard *.cpp)
-INC = $(wildcard *.h)
-
-# ------ DEFINITIONS ------
-
-DIR = Obj_mingw64/
-LIB = $(DIR)libatc.a
-OBJ = $(SRC:%.cpp=$(DIR)%.o)
-
-# ------ SETTINGS ------
-
-# include any MPI settings needed for the ATC library to build with
-# the same MPI library that LAMMPS is built with
-
-CC =		  x86_64-w64-mingw32-g++
-CCFLAGS =	-I../../src -I../../src/STUBS -DMPICH_IGNORE_CXX_SEEK	\
-		-O3 -march=core2 -mtune=core2 -mpc64 -msse2    \
-                -ffast-math -funroll-loops -fstrict-aliasing		\
-		-DLAMMPS_SMALLBIG -Wno-uninitialized
-ARCHIVE =	x86_64-w64-mingw32-ar
-ARCHFLAG =	-rcs
-DEPFLAGS =      -M
-LINK =         	$(CC)
-LINKFLAGS =	-O
-USRLIB =
-SYSLIB =
-
-# ------ MAKE PROCEDURE ------
-
-default: $(DIR) $(LIB) Makefile.lammps
-
-$(DIR):
-	mkdir $(DIR)
-
-Makefile.lammps:
-	@cp $(EXTRAMAKE) Makefile.lammps
-
-$(LIB): $(OBJ)
-	$(ARCHIVE) $(ARFLAGS) $(LIB) $(OBJ)
-	@cp $(EXTRAMAKE) Makefile.lammps
-
-# ------ COMPILE RULES ------
-
-$(DIR)%.o:%.cpp
-	$(CC) $(CCFLAGS) -c $< -o $@
-$(DIR)%.d:%.cpp
-	$(CC) $(CCFLAGS) $(DEPFLAGS) $< > $@		
-
-# ------ DEPENDENCIES ------
-
-DEPENDS = $(OBJ:.o=.d)
-
-# ------ CLEAN ------
-
-clean:
-	-rm $(DIR)*.o $(DIR)*.d *~ $(LIB)
-
-$(DEPENDS) : $(DIR)
-sinclude $(DEPENDS)
diff --git a/lib/atc/Makefile.mingw64-cross-mpi b/lib/atc/Makefile.mingw64-cross-mpi
deleted file mode 100644
index f8dd64eae..000000000
--- a/lib/atc/Makefile.mingw64-cross-mpi
+++ /dev/null
@@ -1,68 +0,0 @@
-# library build -*- makefile -*-
-SHELL = /bin/sh
-
-# which file will be copied to Makefile.lammps
-EXTRAMAKE = Makefile.lammps.linalg
-
-# ------ FILES ------
-
-SRC = $(wildcard *.cpp)
-INC = $(wildcard *.h)
-
-# ------ DEFINITIONS ------
-
-DIR = Obj_mingw64-mpi/
-LIB = $(DIR)libatc.a
-OBJ = $(SRC:%.cpp=$(DIR)%.o)
-
-# ------ SETTINGS ------
-
-# include any MPI settings needed for the ATC library to build with
-# the same MPI library that LAMMPS is built with
-
-CC =		  x86_64-w64-mingw32-g++
-CCFLAGS =	-I../../tools/mingw-cross/mpich2-win64/include/		\
-		-I../../src -DMPICH_IGNORE_CXX_SEEK	\
-		-O3 -march=core2 -mtune=core2 -mpc64 -msse2    \
-                -ffast-math -funroll-loops -fstrict-aliasing		\
-		-DLAMMPS_SMALLBIG -Wno-uninitialized
-ARCHIVE =	x86_64-w64-mingw32-ar
-ARCHFLAG =	-rcs
-DEPFLAGS =      -M
-LINK =         	$(CC)
-LINKFLAGS =	-O
-USRLIB =
-SYSLIB =
-
-# ------ MAKE PROCEDURE ------
-
-default: $(DIR) $(LIB) Makefile.lammps
-
-$(DIR):
-	mkdir $(DIR)
-
-Makefile.lammps:
-	@cp $(EXTRAMAKE) Makefile.lammps
-
-$(LIB): $(OBJ)
-	$(ARCHIVE) $(ARFLAGS) $(LIB) $(OBJ)
-	@cp $(EXTRAMAKE) Makefile.lammps
-
-# ------ COMPILE RULES ------
-
-$(DIR)%.o:%.cpp
-	$(CC) $(CCFLAGS) -c $< -o $@
-$(DIR)%.d:%.cpp
-	$(CC) $(CCFLAGS) $(DEPFLAGS) $< > $@		
-
-# ------ DEPENDENCIES ------
-
-DEPENDS = $(OBJ:.o=.d)
-
-# ------ CLEAN ------
-
-clean:
-	-rm $(DIR)*.o $(DIR)*.d *~ $(LIB)
-
-$(DEPENDS) : $(DIR)
-sinclude $(DEPENDS)
diff --git a/lib/atc/Makefile.mpic++ b/lib/atc/Makefile.mpi
similarity index 59%
rename from lib/atc/Makefile.mpic++
rename to lib/atc/Makefile.mpi
index c9dfdb79c..ec941efdc 100644
--- a/lib/atc/Makefile.mpic++
+++ b/lib/atc/Makefile.mpi
@@ -1,39 +1,55 @@
 # library build -*- makefile -*-
 SHELL = /bin/sh
 
 # which file will be copied to Makefile.lammps
-EXTRAMAKE = Makefile.lammps.installed
+EXTRAMAKE = Makefile.lammps.linalg
+
 # ------ FILES ------
+
 SRC = $(wildcard *.cpp)
 INC = $(wildcard *.h)
+
 # ------ DEFINITIONS ------
+
 LIB = libatc.a
 OBJ =   $(SRC:.cpp=.o)
+
+default: lib
+
 # ------ SETTINGS ------
 
+.PHONY: clean lib depend
+
 # include any MPI settings needed for the ATC library to build with
 # must be the same MPI library that LAMMPS is built with
 
-CC =	        mpic++
-CCFLAGS =       -O3 -Wall -g -I../../src -fPIC -DMPICH_IGNORE_CXX_SEEK -DOMPI_SKIP_MPICXX=1
+CC =	        mpicxx
+CCFLAGS =       -O3 -Wall -g -fPIC
+CPPFLAGS = -I../../src -DMPICH_IGNORE_CXX_SEEK -DOMPI_SKIP_MPICXX=1
 ARCHIVE =	ar
 ARCHFLAG =	-rc
-DEPFLAGS =      -M
-LINK =         	$(CC)
-LINKFLAGS =	-O
 # ------ MAKE PROCEDURE ------
+
 lib: 	$(OBJ)
 	$(ARCHIVE) $(ARFLAGS) $(LIB) $(OBJ)
 	@cp $(EXTRAMAKE) Makefile.lammps
+
 # ------ COMPILE RULES ------
+
 %.o:%.cpp
-	$(CC) $(CCFLAGS) -c $<
-%.d:%.cpp
-	$(CC) $(CCFLAGS) $(DEPFLAGS) $< > $@		
+	$(CC) $(CPPFLAGS) $(CCFLAGS) -c $<
+
 # ------ DEPENDENCIES ------
-DEPENDS = $(OBJ:.o=.d)
+
+depend .depend : fastdep.exe $(SRC)
+	@./fastdep.exe $(INCFLAGS) -- $^ > .depend || exit 1
+
+fastdep.exe: ../../src/DEPEND/fastdep.c
+	@cc -O -o $@ $<
+
 # ------ CLEAN ------
+
 clean:
-	-rm *.o *.d *~ $(LIB)
+	-rm -f *.o *~ .depend $(LIB) fastdep.exe
 
 sinclude $(DEPENDS)
diff --git a/lib/atc/Makefile.serial b/lib/atc/Makefile.serial
index 44ce5fd34..70b786a6b 100644
--- a/lib/atc/Makefile.serial
+++ b/lib/atc/Makefile.serial
@@ -1,51 +1,55 @@
 # library build -*- makefile -*-
 SHELL = /bin/sh
 
 # which file will be copied to Makefile.lammps
 EXTRAMAKE = Makefile.lammps.linalg
 
 # ------ FILES ------
 
 SRC = $(wildcard *.cpp)
 INC = $(wildcard *.h)
 
 # ------ DEFINITIONS ------
 
 LIB = libatc.a
 OBJ =   $(SRC:.cpp=.o)
 
+default: lib
+
 # ------ SETTINGS ------
 
+.PHONY: clean lib depend
+
 # include any MPI settings needed for the ATC library to build with
 # must be the same MPI library that LAMMPS is built with
 
 CC =	        g++
-CCFLAGS =       -O -g -fPIC -I../../src -I../../src/STUBS
+CCFLAGS =       -O3 -g -fPIC
+CPPFLAGS = -I../../src -I../../src/STUBS
 ARCHIVE =	ar
 ARCHFLAG =	-rc
-DEPFLAGS =      -M
-LINK =         	$(CC)
-LINKFLAGS =	-O
 # ------ MAKE PROCEDURE ------
 
 lib: 	$(OBJ)
 	$(ARCHIVE) $(ARFLAGS) $(LIB) $(OBJ)
 	@cp $(EXTRAMAKE) Makefile.lammps
 
 # ------ COMPILE RULES ------
 
 %.o:%.cpp
-	$(CC) $(CCFLAGS) -c $<
-%.d:%.cpp
-	$(CC) $(CCFLAGS) $(DEPFLAGS) $< > $@		
+	$(CC) $(CPPFLAGS) $(CCFLAGS) -c $<
 
 # ------ DEPENDENCIES ------
 
-DEPENDS = $(OBJ:.o=.d)
+depend .depend : fastdep.exe $(SRC)
+	@./fastdep.exe $(INCFLAGS) -- $^ > .depend || exit 1
+
+fastdep.exe: ../../src/DEPEND/fastdep.c
+	@cc -O -o $@ $<
 
 # ------ CLEAN ------
 
 clean:
-	-rm *.o *.d *~ $(LIB)
+	-rm -f *.o *~ .depend $(LIB) fastdep.exe
 
-sinclude $(DEPENDS)
+sinclude .depend
diff --git a/lib/awpmd/Install.py b/lib/awpmd/Install.py
index 37041d2ea..ffe709d44 120000
--- a/lib/awpmd/Install.py
+++ b/lib/awpmd/Install.py
@@ -1 +1 @@
-Install.py
\ No newline at end of file
+../Install.py
\ No newline at end of file
diff --git a/lib/awpmd/Makefile.mingw32-cross b/lib/awpmd/Makefile.mingw32-cross
deleted file mode 100644
index 6a9398717..000000000
--- a/lib/awpmd/Makefile.mingw32-cross
+++ /dev/null
@@ -1,80 +0,0 @@
-# library build -*- makefile -*-
-SHELL = /bin/sh
-
-# which file will be copied to Makefile.lammps
-
-EXTRAMAKE = Makefile.lammps.linalg
-
-# ------ FILES ------
-
-SRC = logexc.cpp wpmd.cpp wpmd_split.cpp
-vpath %.cpp ivutils/src
-vpath %.cpp systems/interact/TCP
-
-INC = \
-    cerf.h \
-    cerf2.h \
-    cerf_octave.h \
-    cvector_3.h \
-    lapack_inter.h \
-    logexc.h \
-    pairhash.h \
-    refobj.h \
-    tcpdefs.h \
-    vector_3.h \
-    wavepacket.h \
-    wpmd.h \
-    wpmd_split.h
-
-# ------ DEFINITIONS ------
-DIR = Obj_mingw32/
-LIB = $(DIR)libawpmd.a
-OBJ =  $(SRC:%.cpp=$(DIR)%.o)
-
-# ------ SETTINGS ------
-
-# include any MPI settings needed for the ATC library to build with
-# the same MPI library that LAMMPS is built with
-
-CC =	  i686-w64-mingw32-g++
-CCFLAGS = 	-O2 -march=i686 -mtune=generic -mfpmath=387 -mpc64	\
-		-finline-functions		\
-                -ffast-math -funroll-loops -fstrict-aliasing		\
-                -Wall -W -Wno-uninitialized -Isystems/interact/TCP/ -Isystems/interact -Iivutils/include
-ARCHIVE =	i686-w64-mingw32-ar
-ARCHFLAG =	-rscv
-DEPFLAGS =  -M
-#LINK =         
-#LINKFLAGS =	
-USRLIB =
-SYSLIB =
-
-# ------ MAKE PROCEDURE ------
-
-default: $(DIR) $(LIB) Makefile.lammps
-
-$(DIR):
-	mkdir $(DIR)
-
-Makefile.lammps:
-	@cp $(EXTRAMAKE) Makefile.lammps
-
-$(LIB): 	$(OBJ)
-	$(ARCHIVE) $(ARFLAGS) $(LIB) $(OBJ)
-	@cp $(EXTRAMAKE) Makefile.lammps
-
-# ------ COMPILE RULES ------
-
-$(DIR)%.o:%.cpp
-	$(CC) $(CCFLAGS) -c $< -o $@
-$(DIR)%.d:%.cpp
-	$(CC) $(CCFLAGS) $(DEPFLAGS) $< > $@		
-
-# ------ DEPENDENCIES ------
-
-DEPENDS = $(OBJ:.o=.d)
-
-# ------ CLEAN ------
-
-clean:
-	-rm *.d *~ $(OBJ) $(LIB)
diff --git a/lib/awpmd/Makefile.mingw32-cross-mpi b/lib/awpmd/Makefile.mingw32-cross-mpi
deleted file mode 100644
index cc2a76111..000000000
--- a/lib/awpmd/Makefile.mingw32-cross-mpi
+++ /dev/null
@@ -1,13 +0,0 @@
-# -*- makefile -*- wrapper for non-MPI libraries
-
-SHELL=/bin/sh
-
-all:
-	$(MAKE) $(MFLAGS) mingw32-cross
-	rm -f Obj_mingw32-mpi
-	ln -s Obj_mingw32 Obj_mingw32-mpi
-
-clean:
-	$(MAKE) $(MFLAGS) clean-mingw32-cross
-	rm -f Obj_mingw32-mpi
-
diff --git a/lib/awpmd/Makefile.mingw64-cross b/lib/awpmd/Makefile.mingw64-cross
deleted file mode 100644
index 1f3e60812..000000000
--- a/lib/awpmd/Makefile.mingw64-cross
+++ /dev/null
@@ -1,79 +0,0 @@
-# library build -*- makefile -*-
-SHELL = /bin/sh
-
-# which file will be copied to Makefile.lammps
-
-EXTRAMAKE = Makefile.lammps.linalg
-
-# ------ FILES ------
-
-SRC = logexc.cpp wpmd.cpp wpmd_split.cpp
-vpath %.cpp ivutils/src
-vpath %.cpp systems/interact/TCP
-
-INC = \
-    cerf.h \
-    cerf2.h \
-    cerf_octave.h \
-    cvector_3.h \
-    lapack_inter.h \
-    logexc.h \
-    pairhash.h \
-    refobj.h \
-    tcpdefs.h \
-    vector_3.h \
-    wavepacket.h \
-    wpmd.h \
-    wpmd_split.h
-
-# ------ DEFINITIONS ------
-DIR = Obj_mingw64/
-LIB = $(DIR)libawpmd.a
-OBJ =  $(SRC:%.cpp=$(DIR)%.o)
-
-# ------ SETTINGS ------
-
-# include any MPI settings needed for the ATC library to build with
-# the same MPI library that LAMMPS is built with
-
-CC =	  x86_64-w64-mingw32-g++
-CCFLAGS =  -O3 -march=core2 -mtune=core2 -mpc64 -msse2    \
-                -ffast-math -funroll-loops -fstrict-aliasing		\
-                -Wall -W -Wno-uninitialized -Isystems/interact/TCP/ -Isystems/interact -Iivutils/include
-ARCHIVE =	x86_64-w64-mingw32-ar
-ARCHFLAG =	-rscv
-DEPFLAGS =  -M
-#LINK =         
-#LINKFLAGS =	
-USRLIB =
-SYSLIB =
-
-# ------ MAKE PROCEDURE ------
-
-default: $(DIR) $(LIB) Makefile.lammps
-
-$(DIR):
-	mkdir $(DIR)
-
-Makefile.lammps:
-	@cp $(EXTRAMAKE) Makefile.lammps
-
-$(LIB): 	$(OBJ)
-	$(ARCHIVE) $(ARFLAGS) $(LIB) $(OBJ)
-	@cp $(EXTRAMAKE) Makefile.lammps
-
-# ------ COMPILE RULES ------
-
-$(DIR)%.o:%.cpp
-	$(CC) $(CCFLAGS) -c $< -o $@
-$(DIR)%.d:%.cpp
-	$(CC) $(CCFLAGS) $(DEPFLAGS) $< > $@		
-
-# ------ DEPENDENCIES ------
-
-DEPENDS = $(OBJ:.o=.d)
-
-# ------ CLEAN ------
-
-clean:
-	-rm *.d *~ $(OBJ) $(LIB)
diff --git a/lib/awpmd/Makefile.mingw64-cross-mpi b/lib/awpmd/Makefile.mingw64-cross-mpi
deleted file mode 100644
index 1ec1a0995..000000000
--- a/lib/awpmd/Makefile.mingw64-cross-mpi
+++ /dev/null
@@ -1,13 +0,0 @@
-# -*- makefile -*- wrapper for non-MPI libraries
-
-SHELL=/bin/sh
-
-all:
-	$(MAKE) $(MFLAGS) mingw64-cross
-	rm -f Obj_mingw64-mpi
-	ln -s Obj_mingw64 Obj_mingw64-mpi
-
-clean:
-	$(MAKE) $(MFLAGS) clean-mingw64-cross
-	rm -f Obj_mingw64-mpi
-
diff --git a/lib/colvars/Install.py b/lib/colvars/Install.py
index af658fa26..2fc207710 100644
--- a/lib/colvars/Install.py
+++ b/lib/colvars/Install.py
@@ -1,142 +1,142 @@
 #!/usr/bin/env python
 
 # Install.py tool to do automate build of Colvars
 
 from __future__ import print_function
 import sys,os,subprocess
 
 # help message
 
 help = """
 Syntax from src dir: make lib-colvars args="-m machine -e suffix"
 Syntax from lib/colvars dir: python Install.py -m machine -e suffix
 
 specify -m and optionally -e, order does not matter
 
   -m = peform a clean followed by "make -f Makefile.machine"
        machine = suffix of a lib/colvars/Makefile.* or of a
          src/MAKE/MACHINES/Makefile.* file
   -e = set EXTRAMAKE variable in Makefile.machine to Makefile.lammps.suffix
        does not alter existing Makefile.machine
 
 Examples:
 
 make lib-colvars args="-m g++"     # build COLVARS lib with GNU g++ compiler
 """
 
 # print error message or help
 
 def error(str=None):
   if not str: print(help)
-  else: print("ERROR"),str
+  else: print("ERROR",str)
   sys.exit()
 
 # parse args
 
 args = sys.argv[1:]
 nargs = len(args)
 if nargs == 0: error()
 
 machine = None
 extraflag = False
 
 iarg = 0
 while iarg < nargs:
   if args[iarg] == "-m":
     if iarg+2 > len(args): error()
     machine = args[iarg+1]
     iarg += 2  
   elif args[iarg] == "-e":
     if iarg+2 > len(args): error()
     extraflag = True
     suffix = args[iarg+1]
     iarg += 2  
   else: error()
 
 # set lib from working dir
 
 cwd = os.getcwd()
 lib = os.path.basename(cwd)
 
 def get_lammps_machine_flags(machine):
   """Parse Makefile.machine from LAMMPS, return dictionary of compiler flags"""
   if not os.path.exists("../../src/MAKE/MACHINES/Makefile.%s" % machine):
     error("Cannot locate src/MAKE/MACHINES/Makefile.%s" % machine)
   lines = open("../../src/MAKE/MACHINES/Makefile.%s" % machine,
                'r').readlines()
   machine_flags = {}
   for line in lines:
     line = line.partition('#')[0]
     line = line.rstrip()
     words = line.split()
     if (len(words) > 2):
       if ((words[0] == 'CC') or (words[0] == 'CCFLAGS') or
           (words[0] == 'SHFLAGS') or (words[0] == 'ARCHIVE') or
           (words[0] == 'ARFLAGS') or (words[0] == 'SHELL')):
         machine_flags[words[0]] = ' '.join(words[2:])
   return machine_flags
 
 def gen_colvars_makefile_machine(machine, machine_flags):
   """Generate Makefile.machine for Colvars given the compiler flags"""
   machine_makefile = open("Makefile.%s" % machine, 'w')
   machine_makefile.write('''# -*- makefile -*- to build Colvars module with %s
 
 COLVARS_LIB = libcolvars.a
 COLVARS_OBJ_DIR =
 
 CXX =		%s
 CXXFLAGS =	%s %s
 AR =		%s
 ARFLAGS =	%s
 SHELL =		%s
 
 include Makefile.common
 
 .PHONY: default clean
 
 default: $(COLVARS_LIB) Makefile.lammps
 
 clean:
 	-rm -f $(COLVARS_OBJS) $(COLVARS_LIB)
 ''' % (machine, machine_flags['CC'],
        machine_flags['CCFLAGS'], machine_flags['SHFLAGS'] ,
        machine_flags['ARCHIVE'], machine_flags['ARFLAGS'],
        machine_flags['SHELL']))
 
 if not os.path.exists("Makefile.%s" % machine):
   machine_flags = get_lammps_machine_flags(machine)
   gen_colvars_makefile_machine(machine, machine_flags)
 if not os.path.exists("Makefile.%s" % machine):
   error("lib/%s/Makefile.%s does not exist" % (lib,machine))
 
 # create Makefile.auto as copy of Makefile.machine
 # reset EXTRAMAKE if requested
 
 lines = open("Makefile.%s" % machine,'r').readlines()
 fp = open("Makefile.auto",'w')
 for line in lines:
   words = line.split()
   if len(words) == 3 and extraflag and \
         words[0] == "EXTRAMAKE" and words[1] == '=':
     line = line.replace(words[2],"Makefile.lammps.%s" % suffix)
   fp.write(line)
 fp.close()
 
 # make the library via Makefile.auto
 
 try:
   import multiprocessing
   n_cpus = multiprocessing.cpu_count()
 except:
   n_cpus = 1
 
 print("Building lib%s.a ..." % lib)
 cmd = ["make -f Makefile.auto clean"]
 print(subprocess.check_output(cmd, shell=True).decode())
 cmd = ["make -f Makefile.auto -j%d" % n_cpus]
 print(subprocess.check_output(cmd, shell=True).decode())
 
 if os.path.exists("lib%s.a" % lib): print("Build was successful")
 else: error("Build of lib/%s/lib%s.a was NOT successful" % (lib,lib))
 if not os.path.exists("Makefile.lammps"):
   print("lib/%s/Makefile.lammps was NOT created" % lib)
diff --git a/lib/gpu/Install.py b/lib/gpu/Install.py
index d396be5e1..c6cd1f302 100644
--- a/lib/gpu/Install.py
+++ b/lib/gpu/Install.py
@@ -1,146 +1,155 @@
 #!/usr/bin/env python
 
 # Install.py tool to build the GPU library
 # used to automate the steps described in the README file in this dir
 
 import sys,os,re,commands
 
 # help message
 
 help = """
-Syntax: python Install.py -i isuffix -h hdir -a arch -p precision -e esuffix -m -o osuffix
-  specify one or more options, order does not matter
-  copies an existing Makefile.isuffix in lib/gpu to Makefile.auto 
-  optionally edits these variables in Makefile.auto:
-    CUDA_HOME, CUDA_ARCH, CUDA_PRECISION, EXTRAMAKE
-  optionally uses Makefile.auto to build the GPU library -> libgpu.a
-    and to copy a Makefile.lammps.esuffix -> Makefile.lammps
-  optionally copies Makefile.auto to a new Makefile.osuffix
+Syntax from src dir: make lib-gpu args="-i isuffix -h hdir -a arch -p precision -e esuffix -m -o osuffix"
+Syntax from lib dir: python Install.py -i isuffix -h hdir -a arch -p precision -e esuffix -m -o osuffix
+
+specify one or more options, order does not matter
+
+copies an existing Makefile.isuffix in lib/gpu to Makefile.auto 
+optionally edits these variables in Makefile.auto:
+  CUDA_HOME, CUDA_ARCH, CUDA_PRECISION, EXTRAMAKE
+optionally uses Makefile.auto to build the GPU library -> libgpu.a
+  and to copy a Makefile.lammps.esuffix -> Makefile.lammps
+optionally copies Makefile.auto to a new Makefile.osuffix
 
   -i = use Makefile.isuffix as starting point, copy to Makefile.auto
        default isuffix = linux
   -h = set CUDA_HOME variable in Makefile.auto to hdir
        hdir = path to NVIDIA Cuda software, e.g. /usr/local/cuda
   -a = set CUDA_ARCH variable in Makefile.auto to arch
        use arch = ?? for K40 (Tesla)
        use arch = 37 for dual K80 (Tesla)
        use arch = 60 for P100 (Pascal)
   -p = set CUDA_PRECISION variable in Makefile.auto to precision
        use precision = double or mixed or single
   -e = set EXTRAMAKE variable in Makefile.auto to Makefile.lammps.esuffix
   -m = make the GPU library using Makefile.auto
        first performs a "make clean"
        produces libgpu.a if successful
        also copies EXTRAMAKE file -> Makefile.lammps
          -e can set which Makefile.lammps.esuffix file is copied
   -o = copy final Makefile.auto to Makefile.osuffix
+
+Examples:
+
+make lib-gpu args="-m"      # build GPU lib with default Makefile.linux
+make lib-gpu args="-i xk7 -p single -o xk7.single"      # create new Makefile.xk7.single, altered for single-precision
+make lib-gpu args="-i xk7 -p single -o xk7.single -m"   # ditto, also build GPU lib
 """
 
 # print error message or help
 
 def error(str=None):
   if not str: print help
   else: print "ERROR",str
   sys.exit()
 
 # parse args
 
 args = sys.argv[1:]
 nargs = len(args)
 if nargs == 0: error()
 
 isuffix = "linux"
 hflag = aflag = pflag = eflag = 0
 makeflag = 0
 outflag = 0
 
 iarg = 0
 while iarg < nargs:
   if args[iarg] == "-i":
     if iarg+2 > nargs: error()
     isuffix = args[iarg+1]
     iarg += 2
   elif args[iarg] == "-h":
     if iarg+2 > nargs: error()
     hflag = 1
     hdir = args[iarg+1]
     iarg += 2
   elif args[iarg] == "-a":
     if iarg+2 > nargs: error()
     aflag = 1
     arch = args[iarg+1]
     iarg += 2
   elif args[iarg] == "-p":
     if iarg+2 > nargs: error()
     pflag = 1
     precision = args[iarg+1]
     iarg += 2
   elif args[iarg] == "-e":
     if iarg+2 > nargs: error()
     eflag = 1
     lmpsuffix = args[iarg+1]
     iarg += 2
   elif args[iarg] == "-m":
     makeflag = 1
     iarg += 1
   elif args[iarg] == "-o":
     if iarg+2 > nargs: error()
     outflag = 1
     osuffix = args[iarg+1]
     iarg += 2
   else: error()
 
 if pflag:
   if precision == "double": precstr = "-D_DOUBLE_DOUBLE"
   elif precision == "mixed": precstr = "-D_SINGLE_DOUBLE"
   elif precision == "single": precstr = "-D_SINGLE_SINGLE"
   else: error("Invalid precision setting")
   
 # create Makefile.auto
 # reset EXTRAMAKE, CUDA_HOME, CUDA_ARCH, CUDA_PRECISION if requested
   
 if not os.path.exists("Makefile.%s" % isuffix):
   error("lib/gpu/Makefile.%s does not exist" % isuffix)
 
 lines = open("Makefile.%s" % isuffix,'r').readlines()
 fp = open("Makefile.auto",'w')
 
 for line in lines:
   words = line.split()
   if len(words) != 3:
     print >>fp,line,
     continue
   
   if hflag and words[0] == "CUDA_HOME" and words[1] == '=':
     line = line.replace(words[2],hdir)
   if aflag and words[0] == "CUDA_ARCH" and words[1] == '=':
     line = line.replace(words[2],"-arch=sm_%s" % arch)
   if pflag and words[0] == "CUDA_PRECISION" and words[1] == '=':
     line = line.replace(words[2],precstr)
   if eflag and words[0] == "EXTRAMAKE" and words[1] == '=':
     line = line.replace(words[2],"Makefile.lammps.%s" % lmpsuffix)
     
   print >>fp,line,
 
 fp.close()
 
 # perform make
 # make operations copies EXTRAMAKE file to Makefile.lammps
 
 if makeflag:
   print "Building libgpu.a ..."
   cmd = "rm -f libgpu.a"
   commands.getoutput(cmd)
   cmd = "make -f Makefile.auto clean; make -f Makefile.auto"
   commands.getoutput(cmd)
   if not os.path.exists("libgpu.a"):
     error("Build of lib/gpu/libgpu.a was NOT successful")
   if not os.path.exists("Makefile.lammps"):
     error("lib/gpu/Makefile.lammps was NOT created")
 
 # copy new Makefile.auto to Makefile.osuffix
 
 if outflag:
   print "Creating new Makefile.%s" % osuffix
   cmd = "cp Makefile.auto Makefile.%s" % osuffix
   commands.getoutput(cmd)
diff --git a/lib/h5md/Install.py b/lib/h5md/Install.py
index 37041d2ea..ffe709d44 120000
--- a/lib/h5md/Install.py
+++ b/lib/h5md/Install.py
@@ -1 +1 @@
-Install.py
\ No newline at end of file
+../Install.py
\ No newline at end of file
diff --git a/lib/h5md/Makefile.mpi b/lib/h5md/Makefile.mpi
new file mode 120000
index 000000000..df682a954
--- /dev/null
+++ b/lib/h5md/Makefile.mpi
@@ -0,0 +1 @@
+Makefile.h5cc
\ No newline at end of file
diff --git a/lib/h5md/Makefile.serial b/lib/h5md/Makefile.serial
new file mode 120000
index 000000000..df682a954
--- /dev/null
+++ b/lib/h5md/Makefile.serial
@@ -0,0 +1 @@
+Makefile.h5cc
\ No newline at end of file
diff --git a/lib/kim/.gitignore b/lib/kim/.gitignore
index 3be8ecbdd..c1f57fe64 100644
--- a/lib/kim/.gitignore
+++ b/lib/kim/.gitignore
@@ -1,2 +1,3 @@
 /Makefile.KIM_DIR
 /Makefile.KIM_Config
+/installed-kim-api-*
diff --git a/lib/kim/Install.py b/lib/kim/Install.py
index bcd22dcbb..315bb4e11 100644
--- a/lib/kim/Install.py
+++ b/lib/kim/Install.py
@@ -1,150 +1,276 @@
 #!/usr/bin/env python
 
 # install.py tool to setup the kim-api library
 # used to automate the steps described in the README file in this dir
+from __future__ import print_function
+import sys,os,re,subprocess
 
-import sys,os,re,urllib,commands
+# transparently use either urllib or an external tool
+try:
+  import ssl
+  try: from urllib.request import urlretrieve as geturl
+  except: from urllib import urlretrieve as geturl
+except:
+  def geturl(url,fname):
+    cmd = "curl -o %s %s" % (fname,url)
+    txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
+    return txt
 
 help = """
-Syntax: install.py -v version -c kim-dir -b kim-model-name -a kim-name
-        specify one or more options, order does not matter
-        -v = version of kim-api to download and work with
-             default = kim-api-v1.8.2 (current as of June 2017)
-        -c = create Makefile.KIM_DIR within lammps lib/kim to configure lammps
-             for use with the kim-api library installed at "kim-dir" (absolute
-             path).  default = this dir
-        -b = build kim-api and kim model where kim-model-name can be a specific
-             openkim.org model name (such as
-             "EAM_Dynamo_Ackland_W__MO_141627196590_002") or the keyword
-             "OpenKIM" to install all compatible models from the openkim.org
-             site.
-        -a = add kim-name openkim.org item (model driver or model) to existing
-             kim-api instalation.
+Syntax from src dir: make lib-kim args="-v version  -a kim-name"
+Syntax from lib dir: python Install.py -v version  -a kim-name
+
+specify one or more options, order does not matter
+
+  -v = version of KIM API library to use
+       default = kim-api-v1.8.2 (current as of June 2017)
+  -b = download and build base KIM API library with example Models (default)
+       this will delete any previous installation in the current folder
+  -n = do NOT download and build base KIM API library. Use an existing installation
+  -p = specify location of KIM API installation (implies -n)
+  -a = add single KIM model or model driver with kim-name
+       to existing KIM API lib (see example below).
+       If kim-name = everything, then rebuild KIM API library with
+       all available OpenKIM Models (this implies -b).
+  -vv = be more verbose about what is happening while the script runs
+
+Examples:
+
+make lib-kim           # install KIM API lib with only example models
+make lib-kim args="-a Glue_Ercolessi_Adams_Al__MO_324507536345_001"  # Ditto plus one model
+make lib-kim args="-a everything"   # install KIM API lib with all models
+make lib-kim args="-n -a EAM_Dynamo_Ackland_W__MO_141627196590_002"   # only add one model or model driver
+
+See the list of KIM model drivers here:
+https://openkim.org/kim-items/model-drivers/alphabetical
+
+See the list of all KIM models here:
+https://openkim.org/kim-items/models/by-model-drivers
+
+See the list of example KIM models included by default here:
+https://openkim.org/kim-api
+in the "What is in the KIM API source package?" section
 """
 
 def error():
-  print help
+  print(help)
   sys.exit()
 
+# expand to full path name
+# process leading '~' or relative path
+
+def fullpath(path):
+  return os.path.abspath(os.path.expanduser(path))
+
 # parse args
 
-args = sys.argv
+args = sys.argv[1:]
+nargs = len(args)
 
 thisdir = os.environ['PWD']
-dir = thisdir
 version = "kim-api-v1.8.2"
 
-dirflag = 0
-buildflag = 0
-addflag = 0
+buildflag = True
+everythingflag = False
+addflag = False
+verboseflag = False
+pathflag = False
 
-iarg = 1
+iarg = 0
 while iarg < len(args):
   if args[iarg] == "-v":
     if iarg+2 > len(args): error()
     version = args[iarg+1]
     iarg += 2
-  elif args[iarg] == "-c":
-    dirflag = 1
-    if iarg+2 > len(args): error()
-    dir = args[iarg+1]
-    iarg += 2
   elif args[iarg] == "-b":
-    buildflag = 1
+    buildflag = True
+    iarg += 1
+  elif args[iarg] == "-n":
+    buildflag = False
+    iarg += 1
+  elif args[iarg] == "-p":
     if iarg+2 > len(args): error()
-    modelname = args[iarg+1]
+    kimdir = fullpath(args[iarg+1])
+    pathflag = True
+    buildflag = False
     iarg += 2
   elif args[iarg] == "-a":
-    addflag = 1
+    addflag = True
     if iarg+2 > len(args): error()
     addmodelname = args[iarg+1]
+    if addmodelname == "everything":
+      buildflag = True
+      everythingflag = True
+      addflag = False
     iarg += 2
+  elif args[iarg] == "-vv":
+    verboseflag = True
+    iarg += 1
   else: error()
 
 thisdir = os.path.abspath(thisdir)
-dir = os.path.abspath(dir)
 url = "https://s3.openkim.org/kim-api/%s.tgz" % version
 
-# download and unpack tarball
+# set KIM API directory
 
+if pathflag:
+  if not os.path.isdir(kimdir):
+    print("\nkim-api is not installed at %s" % kimdir)
+    error()
 
-if not os.path.isfile("%s/Makefile.KIM_DIR" % thisdir):
-  open("%s/Makefile.KIM_DIR" % thisdir, 'w').write("KIM_INSTALL_DIR=%s" % dir)
-  open("%s/Makefile.KIM_Config" % thisdir, 'w').write("include %s/lib/kim-api/Makefile.KIM_Config" % dir)
-  print "Created %s/Makefile.KIM_DIR : using %s" % (thisdir,dir)
+  # configure LAMMPS to use existing kim-api installation
+  with open("%s/Makefile.KIM_DIR" % thisdir, 'w') as mkfile:
+    mkfile.write("KIM_INSTALL_DIR=%s\n\n" % kimdir)
+    mkfile.write(".DUMMY: print_dir\n\n")
+    mkfile.write("print_dir:\n")
+    mkfile.write("	@printf $(KIM_INSTALL_DIR)\n")
+
+  with open("%s/Makefile.KIM_Config" % thisdir, 'w') as cfgfile:
+    cfgfile.write("include %s/lib/kim-api/Makefile.KIM_Config" % kimdir)
+
+  print("Created %s/Makefile.KIM_DIR\n  using %s" % (thisdir,kimdir))
 else:
-  if dirflag == 1:
-    open("%s/Makefile.KIM_DIR" % thisdir, 'w').write("KIM_INSTALL_DIR=%s" % dir)
-    open("%s/Makefile.KIM_Config" % thisdir, 'w').write("include %s/lib/kim-api/Makefile.KIM_Config" % dir)
-    print "Updated %s/Makefile.KIM_DIR : using %s" % (thisdir,dir)
+  kimdir = os.path.join(os.path.abspath(thisdir), "installed-" + version)
+
+# download KIM tarball, unpack, build KIM
+if buildflag:
+
+  # check to see if an installed kim-api already exists and wipe it out.
+
+  if os.path.isdir(kimdir):
+    print("kim-api is already installed at %s.\nRemoving it for re-install" % kimdir)
+    cmd = "rm -rf %s" % kimdir
+    subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
 
+  # configure LAMMPS to use kim-api to be installed
 
-if buildflag == 1:
-  # download kim-api
-  print "Downloading kim-api tarball ..."
-  urllib.urlretrieve(url,"%s/%s.tgz" % (thisdir,version))
-  print "Unpacking kim-api tarball ..."
+  with open("%s/Makefile.KIM_DIR" % thisdir, 'w') as mkfile:
+    mkfile.write("KIM_INSTALL_DIR=%s\n\n" % kimdir)
+    mkfile.write(".DUMMY: print_dir\n\n")
+    mkfile.write("print_dir:\n")
+    mkfile.write("	@printf $(KIM_INSTALL_DIR)\n")
+
+  with open("%s/Makefile.KIM_Config" % thisdir, 'w') as cfgfile:
+    cfgfile.write("include %s/lib/kim-api/Makefile.KIM_Config" % kimdir)
+
+  print("Created %s/Makefile.KIM_DIR\n  using %s" % (thisdir,kimdir))
+
+  # download entire kim-api tarball
+
+  print("Downloading kim-api tarball ...")
+  geturl(url,"%s/%s.tgz" % (thisdir,version))
+  print("Unpacking kim-api tarball ...")
   cmd = "cd %s; rm -rf %s; tar zxvf %s.tgz" % (thisdir,version,version)
-  txt = commands.getstatusoutput(cmd)
-  if txt[0] != 0: error()
+  subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
 
   # configure kim-api
-  print "Configuring kim-api ..."
-  cmd = "cd %s/%s; ./configure --prefix='%s'" % (thisdir,version,dir)
-  txt = commands.getstatusoutput(cmd)
-  print txt[1]
-  if txt[0] != 0: error()
+
+  print("Configuring kim-api ...")
+  cmd = "cd %s/%s; ./configure --prefix='%s'" % (thisdir,version,kimdir)
+  subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
 
   # build kim-api
-  print "Configuring model : %s" % modelname
-  cmd = "cd %s/%s; make add-%s" % (thisdir,version,modelname)
-  txt = commands.getstatusoutput(cmd)
-  print txt[1]
-  if txt[0] != 0: error()
-  #
-  print "Building kim-api ..."
+
+  print("Configuring example Models")
+  cmd = "cd %s/%s; make add-examples" % (thisdir,version)
+  txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
+  if verboseflag: print (txt.decode("UTF-8"))
+
+  if everythingflag:
+    print("Configuring all OpenKIM models, this will take a while ...")
+    cmd = "cd %s/%s; make add-OpenKIM" % (thisdir,version)
+    txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
+    if verboseflag: print(txt.decode("UTF-8"))
+
+  print("Building kim-api ...")
   cmd = "cd %s/%s; make" % (thisdir,version)
-  txt = commands.getstatusoutput(cmd)
-  print txt[1]
-  if txt[0] != 0: error()
+  txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
+  if verboseflag: print(txt.decode("UTF-8"))
 
   # install kim-api
-  print "Installing kim-api ..."
+
+  print("Installing kim-api ...")
   cmd = "cd %s/%s; make install" % (thisdir,version)
-  txt = commands.getstatusoutput(cmd)
-  print txt[1]
-  if txt[0] != 0: error()
-  #
+  txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
+  if verboseflag: print(txt.decode("UTF-8"))
+
   cmd = "cd %s/%s; make install-set-default-to-v1" %(thisdir,version)
-  txt = commands.getstatusoutput(cmd)
-  print txt[1]
-  if txt[0] != 0: error()
+  txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
+  if verboseflag: print(txt.decode("UTF-8"))
 
   # remove source files
-  print "Removing kim-api source and build files ..."
+
+  print("Removing kim-api source and build files ...")
   cmd = "cd %s; rm -rf %s; rm -rf %s.tgz" % (thisdir,version,version)
-  txt = commands.getstatusoutput(cmd)
-  print txt[1]
-  if txt[0] != 0: error()
+  subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
 
-if addflag == 1:
-  # download model
+# add a single model (and possibly its driver) to existing KIM installation
+
+if addflag:
+
+  if not os.path.isdir(kimdir):
+    print("\nkim-api is not installed")
+    error()
+
+  # download single model
+
+  print("Downloading tarball for %s..." % addmodelname)
   url = "https://openkim.org/download/%s.tgz" % addmodelname
-  print "Downloading item tarball ..."
-  urllib.urlretrieve(url,"%s/%s.tgz" % (thisdir,addmodelname))
-  print "Unpacking item tarball ..."
+  geturl(url,"%s/%s.tgz" % (thisdir,addmodelname))
+
+  print("Unpacking item tarball ...")
   cmd = "cd %s; tar zxvf %s.tgz" % (thisdir,addmodelname)
-  txt = commands.getstatusoutput(cmd)
-  if txt[0] != 0: error()
-  #
-  print "Building item ..."
+  subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
+
+  print("Building item ...")
   cmd = "cd %s/%s; make; make install" %(thisdir,addmodelname)
-  txt = commands.getstatusoutput(cmd)
-  print txt[1]
-  if txt[0] != 0: error()
-  #
-  print "Removing kim item source and build files ..."
+  try:
+    txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
+  except subprocess.CalledProcessError as e:
+
+    # Error: but first, check to see if it needs a driver
+    firstRunOutput = e.output.decode("UTF-8")
+
+    cmd = "cd %s/%s; make kim-item-type" % (thisdir,addmodelname)
+    txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
+    txt = txt.decode("UTF-8")
+    if txt == "ParameterizedModel":
+
+      # Get and install driver
+
+      cmd = "cd %s/%s; make model-driver-name" % (thisdir,addmodelname)
+      txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
+      adddrivername = txt.decode("UTF-8").strip()
+      print("First installing model driver: %s..." % adddrivername)
+      cmd = "cd %s; python Install.py -n -a %s" % (thisdir,adddrivername)
+      try:
+        txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
+      except subprocess.CalledProcessError as e:
+        print(e.output)
+        sys.exit()
+
+      if verboseflag: print(txt.decode("UTF-8"))
+
+      # now install the model that needed the driver
+
+      print("Now installing model : %s" % addmodelname)
+      cmd = "cd %s; python Install.py -n -a %s" % (thisdir,addmodelname)
+      try:
+        txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
+      except subprocess.CalledProcessError as e:
+        print(e.output)
+        sys.exit()
+      print(txt.decode("UTF-8"))
+      sys.exit()
+    else:
+      print(firstRunOutput)
+      print("Error, unable to build and install OpenKIM item: %s" \
+            % addmodelname)
+      sys.exit()
+
+  # success the first time
+
+  if verboseflag: print(txt.decode("UTF-8"))
+  print("Removing kim item source and build files ...")
   cmd = "cd %s; rm -rf %s; rm -rf %s.tgz" %(thisdir,addmodelname,addmodelname)
-  txt = commands.getstatusoutput(cmd)
-  print txt[1]
-  if txt[0] != 0: error()
+  subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
diff --git a/lib/kim/README b/lib/kim/README
index 00d6ea8fa..7a4230dc2 100644
--- a/lib/kim/README
+++ b/lib/kim/README
@@ -1,74 +1,76 @@
 This directory contains build settings for the KIM API library which
 is required to use the KIM package and its pair_style kim command in a
 LAMMPS input script.
 
 Information about the KIM project can be found at https://openkim.org.
 The KIM project is lead by Ellad Tadmor and Ryan Elliott (U Minn) and
 James Sethna (Cornell U).  Ryan Elliott is the main developer for the
 KIM API and he also maintains the code that implements the pair_style
 kim command.
 
-To download, build, and install the KIM API on your system, follow
-these steps.  You can use the install.py script to automate these steps.
+You can type "make lib-kim" from the src directory to see help on
+how to download and build this library via make commands, or you can
+do the same thing by typing "python Install.py" from within this
+directory, or you can do it manually by following the instructions
+below.
 
 -----------------
 
 Instructions:
 
-
 1. Configure lammps for use with the kim-api library installed in this directory
 
 $ printf "KIM_INSTALL_DIR=${PWD}\n" > ./Makefile.KIM_DIR
 $ printf "include ${PWD}/lib/kim-api/Makefile.KIM_Config\n" > ./Makefile.KIM_Config
 
 2. Download and unpack the kim-api
 
 # replace X.Y.Z as appropriate here and below
 $ wget http://s3.openkim.org/kim-api/kim-api-vX.Y.Z.tgz
 $ tar zxvf kim-api-vX.Y.Z.tgz
 
 # configure the kim-api
 $ cd kim-api-vX.Y.Z
 $ ./configure --prefix=${PWD}/../
 
 # setup the desired kim item
 $ make add-Pair_Johnson_Fe__MO_857282754307_002
 
 3. Build and install the kim-api and model
 
 $ make
 $ make install
 
 # replace X with the KIM API major version number
 $ make install-set-default-to-vX
 $ cd ../
 
 4. Remove source and build files
 
 $ rm -rf kim-api-vX.Y.Z
 $ rm -rf kim-api-vX.Y.Z.tgz
 
 5. To add additional items do the following (replace the kim item name with your
    desired value)
 
 $ wget https://openkim.org/download/EAM_Johnson_NearestNeighbor_Cu__MO_887933271505_001.tgz
 $ tar zxvf EAM_Johnson_NearestNeighbor_Cu__MO_887933271505_001.tgz
 $ cd EAM_Johnson_NearestNeighbor_Cu__MO_887933271505_001
 $ make
 $ make install
 $ cd ..
 $ rm -rf EAM_Johnson_NearestNeighbor_Cu__MO_887933271505_001
 $ rm -rf EAM_Johnson_NearestNeighbor_Cu__MO_887933271505_001.tgz
 
 -----------------
 
 When these steps are complete you can build LAMMPS with the KIM
 package installed:
 
-$ cd ../../src
+$ cd lammpos/src
 $ make yes-kim
 $ make g++ (or whatever target you wish)
 
 Note that the Makefile.lammps and Makefile.KIM_DIR files in this directory
 are required to allow the LAMMPS build to find the necessary KIM files.
 You should not normally need to edit this file.
diff --git a/lib/linalg/Install.py b/lib/linalg/Install.py
index c7076ca52..560afecec 100644
--- a/lib/linalg/Install.py
+++ b/lib/linalg/Install.py
@@ -1,52 +1,58 @@
 #!/usr/bin/env python
 
 # install.py tool to do build of the linear algebra library
 # used to automate the steps described in the README file in this dir
 
 import sys,commands,os
 
 # help message
 
 help = """
-Syntax: python Install.py -m machine
+Syntax from src dir: make lib-linalg args="-m machine"
+Syntax from lib dir: python Install.py -m machine
+
   -m = peform a clean followed by "make -f Makefile.machine"
        machine = suffix of a lib/Makefile.* file
+
+Example:
+
+make lib-linalg args="-m gfortran"   # build with GNU Fortran compiler
 """
 
 # print error message or help
 
 def error(str=None):
   if not str: print help
   else: print "ERROR",str
   sys.exit()
 
 # parse args
 
 args = sys.argv[1:]
 nargs = len(args)
 if nargs == 0: error()
 
 machine = None
 
 iarg = 0
 while iarg < nargs:
   if args[iarg] == "-m":
     if iarg+2 > nargs: error()
     machine = args[iarg+1]
     iarg += 2  
   else: error()
 
 # set lib from working dir
 
 cwd = os.getcwd()
 lib = os.path.basename(cwd)
 
 # make the library
 
 print "Building lib%s.a ..." % lib
 cmd = "make -f Makefile.%s clean; make -f Makefile.%s" % (machine,machine)
 txt = commands.getoutput(cmd)
 print txt
 
 if os.path.exists("lib%s.a" % lib): print "Build was successful"
 else: error("Build of lib/%s/lib%s.a was NOT successful" % (lib,lib))
diff --git a/lib/meam/Install.py b/lib/meam/Install.py
index 37041d2ea..ffe709d44 120000
--- a/lib/meam/Install.py
+++ b/lib/meam/Install.py
@@ -1 +1 @@
-Install.py
\ No newline at end of file
+../Install.py
\ No newline at end of file
diff --git a/lib/mscg/Install.py b/lib/mscg/Install.py
index e54723261..7b10be189 100644
--- a/lib/mscg/Install.py
+++ b/lib/mscg/Install.py
@@ -1,122 +1,129 @@
 #!/usr/bin/env python
 
 # Install.py tool to download, unpack, build, and link to the MS-CG library
 # used to automate the steps described in the README file in this dir
 
 import sys,os,re,commands
 
 # help message
 
 help = """
-Syntax: python Install.py -h hpath hdir -g -b [suffix] -l
-  specify one or more options, order does not matter
+Syntax from src dir: make lib-mscg args="-h hpath hdir -g -b [suffix] -l"
+Syntax from lib dir: python Install.py -h hpath hdir -g -b [suffix] -l
+
+specify one or more options, order does not matter
+
   -h = set home dir of MS-CG to be hpath/hdir
        hpath can be full path, contain '~' or '.' chars
        default hpath = . = lib/mscg
        default hdir = MSCG-release-master = what GitHub zipfile unpacks to
   -g = grab (download) zipfile from MS-CG GitHub website
        unpack it to hpath/hdir
        hpath must already exist
        if hdir already exists, it will be deleted before unpack
   -b = build MS-CG library in its src dir
        optional suffix specifies which src/Make/Makefile.suffix to use
        default suffix = g++_simple
   -l = create 2 softlinks (includelink,liblink) in lib/mscg to MS-CG src dir
+
+Example:
+
+make lib-mscg args="-g -b -l"   # download/build in lib/mscg/MSCG-release-master
 """
 
 # settings
 
 url = "https://github.com/uchicago-voth/MSCG-release/archive/master.zip"
 zipfile = "MS-CG-master.zip"
 zipdir = "MSCG-release-master"
 
 # print error message or help
 
 def error(str=None):
   if not str: print help
   else: print "ERROR",str
   sys.exit()
 
 # expand to full path name
 # process leading '~' or relative path
   
 def fullpath(path):
   return os.path.abspath(os.path.expanduser(path))
   
 # parse args
 
 args = sys.argv[1:]
 nargs = len(args)
 if nargs == 0: error()
 
 homepath = "."
 homedir = zipdir
 
 grabflag = 0
 buildflag = 0
 msuffix = "g++_simple"
 linkflag = 0
 
 iarg = 0
 while iarg < nargs:
   if args[iarg] == "-h":
     if iarg+3 > nargs: error()
     homepath = args[iarg+1]
     homedir = args[iarg+2]
     iarg += 3
   elif args[iarg] == "-g":
     grabflag = 1
     iarg += 1
   elif args[iarg] == "-b":
     buildflag = 1
     if iarg+1 < nargs and args[iarg+1][0] != '-':
       msuffix = args[iarg+1]
       iarg += 1
     iarg += 1
   elif args[iarg] == "-l":
     linkflag = 1
     iarg += 1
   else: error()
 
 homepath = fullpath(homepath)
 if not os.path.isdir(homepath): error("MS-CG path does not exist")
 homedir = "%s/%s" % (homepath,homedir)
 
 # download and unpack MS-CG zipfile
 
 if grabflag:
   print "Downloading MS-CG ..."
   cmd = "curl -L %s > %s/%s" % (url,homepath,zipfile)
   print cmd
   print commands.getoutput(cmd)
 
   print "Unpacking MS-CG zipfile ..."
   if os.path.exists("%s/%s" % (homepath,zipdir)):
     commands.getoutput("rm -rf %s/%s" % (homepath,zipdir))
   cmd = "cd %s; unzip %s" % (homepath,zipfile)
   commands.getoutput(cmd)
   if os.path.basename(homedir) != zipdir:
     if os.path.exists(homedir): commands.getoutput("rm -rf %s" % homedir)
     os.rename("%s/%s" % (homepath,zipdir),homedir)
 
 # build MS-CG
 
 if buildflag:
   print "Building MS-CG ..."
   cmd = "cd %s/src; cp Make/Makefile.%s .; make -f Makefile.%s" % \
       (homedir,msuffix,msuffix)
   txt = commands.getoutput(cmd)
   print txt
 
 # create 2 links in lib/mscg to MS-CG src dir
 
 if linkflag:
   print "Creating links to MS-CG include and lib files"
   if os.path.isfile("includelink") or os.path.islink("includelink"):
     os.remove("includelink")
   if os.path.isfile("liblink") or os.path.islink("liblink"):
     os.remove("liblink")
   cmd = "ln -s %s/src includelink" % homedir
   commands.getoutput(cmd)
   cmd = "ln -s %s/src liblink" % homedir
   commands.getoutput(cmd)
diff --git a/lib/poems/Install.py b/lib/poems/Install.py
index 37041d2ea..ffe709d44 120000
--- a/lib/poems/Install.py
+++ b/lib/poems/Install.py
@@ -1 +1 @@
-Install.py
\ No newline at end of file
+../Install.py
\ No newline at end of file
diff --git a/lib/qmmm/Install.py b/lib/qmmm/Install.py
index 37041d2ea..ffe709d44 120000
--- a/lib/qmmm/Install.py
+++ b/lib/qmmm/Install.py
@@ -1 +1 @@
-Install.py
\ No newline at end of file
+../Install.py
\ No newline at end of file
diff --git a/lib/quip/.gitignore b/lib/quip/.gitignore
new file mode 100644
index 000000000..d6797a67f
--- /dev/null
+++ b/lib/quip/.gitignore
@@ -0,0 +1 @@
+/QUIP
diff --git a/lib/quip/Makefile.lammps b/lib/quip/Makefile.lammps
index 19ff20b07..e471d3f6f 100644
--- a/lib/quip/Makefile.lammps
+++ b/lib/quip/Makefile.lammps
@@ -1,30 +1,40 @@
 # Settings that the LAMMPS build will import when this package library is used
 
-# include ${QUIP_ROOT}/Makefiles/Makefile.${QUIP_ARCH}
-
-F95=$(shell egrep 'F95[ ]*=' ${QUIP_ROOT}/arch/Makefile.${QUIP_ARCH} | sed 's/.*F95[ ]*=[ ]*//')
-
+# try to guess settings assuming there is a configured QUIP git checkout inside the lib/quip directory
+QUIPDIR=$(abspath ../../lib/quip/QUIP)
 ifeq (${QUIP_ROOT},)
-$(error Environment variable QUIP_ROOT must be set.)
+  QUIP_ROOT=$(shell test -d $(QUIPDIR) && echo $(QUIPDIR))
+  ifeq (${QUIP_ARCH},)
+    QUIP_ARCH=$(notdir $(wildcard $(QUIP_ROOT)/build/*))
+  endif
+else
+# uncomment and set manually or set the corresponding environment variables
+#  QUIP_ROOT=
+#  QUIP_ARCH=
 endif
 
+ifeq (${QUIP_ROOT},)
+$(error Environment or make variable QUIP_ROOT must be set.)
+endif
 ifeq (${QUIP_ARCH},)
-$(error Environment variable QUIP_ARCH must be set.)
+$(error Environment or make variable QUIP_ARCH must be set.)
 endif
 
+F95=$(shell egrep 'F95[ ]*=' ${QUIP_ROOT}/arch/Makefile.${QUIP_ARCH} | sed 's/.*F95[ ]*=[ ]*//')
 include ${QUIP_ROOT}/build/${QUIP_ARCH}/Makefile.inc
 include ${QUIP_ROOT}/Makefile.rules
 
 quip_SYSLIB = -lquip
 quip_SYSLIB += ${NETCDF_SYSLIBS}
 quip_SYSLIB += ${MATH_LINKOPTS}
 
 ifeq (${F95},gfortran)
 	quip_SYSLIB += -lgfortran
 else ifeq (${F95},ifort)
 	quip_SYSLIB += -lifcore -lifport
 else
 $(error fortran compiler >>${F95}<< not recognised. Edit lib/quip/Makefile.lammps to specify the fortran library your linker should link to)
 endif
 
 quip_SYSPATH = -L${QUIP_ROOT}/build/${QUIP_ARCH}
+
diff --git a/lib/quip/README b/lib/quip/README
index 94039cfa1..e6cc3903b 100644
--- a/lib/quip/README
+++ b/lib/quip/README
@@ -1,91 +1,102 @@
 QUIP library
 
 Albert Bartok-Partay
 apbartok at gmail dot com
 2014
 
 This library provides a plug-in for calling QUIP potentials from
 LAMMPS. The QUIP package should be built separately, and then the
 resulting libraries can be linked to the LAMMPS code. In case of some
 potentials, such as BOP or GAP, third-party packages are needed, which
 must be downloaded and compiled separately. NB: although GAP has to be
 downloaded separately as it is licensed under a different license
 agreement, it is compiled together with the rest of QUIP.
 
 Building LAMMPS with QUIP support:
 
 1) Building QUIP
 1.1) Obtaining QUIP
 
-The most current release of QUIP can be obtained from github: 
+The most current release of QUIP can be obtained from github:
 
 $ git clone https://github.com/libAtoms/QUIP.git QUIP
 
 If GAP is needed, it may be downloaded from the `Software' section of
 libatoms.org, after accepting the terms and conditions of the Academic
 License Agreement. Extract the tarball under the /path/to/QUIP/src/ directory.
 
 1.2) Building QUIP
 
 There is a README file in the top-level QUIP directory, but here are
 the main steps.
 
 The arch directory contains a selection of machine- and
 compiler-specific makefiles,
 e.g. Makefile.linux_x86_64_gfortran. Decide which one is most
 appropriate for your system, and edit if necessary. The configuring
 step will use the makefile based on the QUIP_ARCH environment
 variable, i.e. Makefile.${QUIP_ARCH}. The script will create a build
 directory, build/${QUIP_ARCH}, and all the building will happen
 there. First it will ask you some questions about where you keep
 libraries and other stuff.
 
 Please note: if you are building QUIP to link it to LAMMPS, the serial version
 of QUIP must be compiled. For example, QUIP_ARCH may be:
 darwin_x86_64_gfortran
 linux_x86_64_gfortran
 linux_x86_64_ifort_icc etc.
 
 If you don't use something it is asking for, just leave it blank. NB
 make sure to answer `y' to `Do you want to compile with GAP prediction
 support ? [y/n]'. The answers will be stored in Makefile.inc in the
 build/${QUIP_ARCH} directory, and you can edit them later (e.g. to
 change optimisation or debug options). Note that the default state is
 usually with rather heavy debugging on, including bounds checking,
 which makes the code quite slow. The make command has to be executed
 from the top-level directory. Making `libquip' ensures all the
 necessary libraries will be built.
 
 for example:
 
 $ cd QUIP
-$ export QUIP_ROOT=/path/to/QUIP
+$ export QUIP_ROOT=${PWD}
 $ export QUIP_ARCH=linux_x86_64_gfortran
 $ make config
 $ make libquip
 
 Optionally, do
 $ make test
 to run a test suite.
 
 2) Building LAMMPS
 
-LAMMPS is now shipped with the interface necessary to use QUIP potentials, but
-it should be enabled first. Enter the LAMMPS directory:
+Edit Makefile.lammps in the lib/quip folder, if necessary.  If you
+have cloned, configured, and built QUIP inside this folder, QUIP_ROOT
+and QUIP_ARCH should be autodetected, even without having to set
+the environment variables.  Otherwise export the environment variables
+as shown above or edit Makefile.lammps
+
+LAMMPS ships with a user package containing the interface necessary
+to use QUIP potentials, but it needs to be added to the compilation
+first. To do that, enter the LAMMPS source directory and type:
 
-$ cd LAMMPS
-$ cd src
 $ make yes-user-quip
 
 2.2) Build LAMMPS according to the instructions on the LAMMPS website.
 
-3) There are two example sets in examples/USER/quip:
+3) There are three example sets in examples/USER/quip:
 
 - a set of input files to compute the energy of an 8-atom cubic
   diamond cell of silicon with the Stillinger-Weber potential. Use
   this to benchmark that the interface is working correctly.
 
+- a set of input files demonstrating the use of the QUIP pair style
+  for a molecular system with pair style hybrid/overlay and different
+  exclusion settings for different pair styles. This input is
+  for DEMONSTRATION purposes only, and does not simulate a physically
+  meaningful system.
+
 - a set of input files to demonstrate how GAP potentials are specified
   in a LAMMPS input file to run a short MD. The GAP parameter file
   gap_example.xml is intended for TESTING purposes only. Potentials can be
   downloaded from http://www.libatoms.org or obtained from the authors of QUIP.
diff --git a/lib/reax/Install.py b/lib/reax/Install.py
index 37041d2ea..ffe709d44 120000
--- a/lib/reax/Install.py
+++ b/lib/reax/Install.py
@@ -1 +1 @@
-Install.py
\ No newline at end of file
+../Install.py
\ No newline at end of file
diff --git a/lib/smd/.gitignore b/lib/smd/.gitignore
new file mode 100644
index 000000000..4ab7a789e
--- /dev/null
+++ b/lib/smd/.gitignore
@@ -0,0 +1,5 @@
+# ignore these entries with git
+/eigen.tar.gz
+/eigen-eigen-*
+/includelink
+/eigen3
diff --git a/lib/smd/Install.py b/lib/smd/Install.py
index dc0a3187c..337f993be 100644
--- a/lib/smd/Install.py
+++ b/lib/smd/Install.py
@@ -1,103 +1,117 @@
 #!/usr/bin/env python
 
 # Install.py tool to download, unpack, and point to the Eigen library
 # used to automate the steps described in the README file in this dir
 
-import sys,os,re,glob,commands
+from __future__ import print_function
+import sys,os,re,glob,subprocess
+try: from urllib.request import urlretrieve as geturl
+except: from urllib import urlretrieve as geturl
 
 # help message
 
 help = """
-Syntax: python Install.py -h hpath hdir -g -l
-  specify one or more options, order does not matter
-  -h = set home dir of Eigen to be hpath/hdir
-       hpath can be full path, contain '~' or '.' chars
-       default hpath = . = lib/smd
-       default hdir = "ee" = what tarball unpacks to (eigen-eigen-*)
-  -g = grab (download) tarball from http://eigen.tuxfamily.org website
-       unpack it to hpath/hdir
-       hpath must already exist
-       if hdir already exists, it will be deleted before unpack
-  -l = create softlink (includelink) in lib/smd to Eigen src dir
+Syntax from src dir: make lib-smd
+                 or: make lib-smd args="-p /usr/include/eigen3"
+
+Syntax from lib dir: python Install.py
+                 or: python Install.py -p /usr/include/eigen3"
+                 or: python Install.py -v 3.3.4 -b
+
+specify one or more options, order does not matter
+
+  -b = download and unpack/configure the Eigen library (default)
+  -p = specify folder holding an existing installation of Eigen
+  -v = set version of Eigen library to download and set up (default = 3.3.4)
+
+
+Example:
+
+make lib-smd args="-b"   # download/build in default lib/smd/eigen-eigen-*
 """
 
 # settings
 
-url = "http://bitbucket.org/eigen/eigen/get/3.3.3.tar.gz"
+version = '3.3.4'
 tarball = "eigen.tar.gz"
 
 # print error message or help
 
 def error(str=None):
-  if not str: print help
-  else: print "ERROR",str
+  if not str: print(help)
+  else: print("ERROR",str)
   sys.exit()
 
 # expand to full path name
 # process leading '~' or relative path
-  
+
 def fullpath(path):
   return os.path.abspath(os.path.expanduser(path))
-  
+
 # parse args
 
 args = sys.argv[1:]
 nargs = len(args)
-if nargs == 0: error()
 
 homepath = "."
-homedir = "ee"
+homedir = "eigen3"
 
-grabflag = 0
-linkflag = 0
+grabflag = True
+buildflag = True
+pathflag = False
+linkflag = True
 
 iarg = 0
 while iarg < nargs:
-  if args[iarg] == "-h":
-    if iarg+3 > nargs: error()
-    homepath = args[iarg+1]
-    homedir = args[iarg+2]
-    iarg += 3
-  elif args[iarg] == "-g":
-    grabflag = 1
-    iarg += 1
-  elif args[iarg] == "-l":
-    linkflag = 1
+  if args[iarg] == "-v":
+    if iarg+2 > nargs: error()
+    version = args[iarg+1]
+    iarg += 2
+  elif args[iarg] == "-p":
+    if iarg+2 > nargs: error()
+    eigenpath = fullpath(args[iarg+1])
+    pathflag = True
+    buildflag = False
+    iarg += 2
+  elif args[iarg] == "-b":
+    buildflag = True
     iarg += 1
   else: error()
 
 homepath = fullpath(homepath)
-if not os.path.isdir(homepath): error("Eigen path does not exist")
+
+if (pathflag):
+  if not os.path.isdir(eigenpath): error("Eigen path does not exist")
+
+if (buildflag and pathflag):
+    error("Cannot use -b and -p flag at the same time")
 
 # download and unpack Eigen tarball
-# glob to find name of dir it unpacks to
+# use glob to find name of dir it unpacks to
 
-if grabflag:
-  print "Downloading Eigen ..."
-  cmd = "curl -L %s > %s/%s" % (url,homepath,tarball)
-  print cmd
-  print commands.getoutput(cmd)
+if buildflag:
+  print("Downloading Eigen ...")
+  url = "http://bitbucket.org/eigen/eigen/get/%s.tar.gz" % version
+  geturl(url,"%s/%s" % (homepath,tarball))
 
-  print "Unpacking Eigen tarball ..."
+  print("Unpacking Eigen tarball ...")
   edir = glob.glob("%s/eigen-eigen-*" % homepath)
   for one in edir:
-    if os.path.isdir(one): commands.getoutput("rm -rf %s" % one)
-  cmd = "cd %s; tar zxvf %s" % (homepath,tarball)
-  commands.getoutput(cmd)
-  if homedir != "ee":
-    if os.path.exists(homedir): commands.getoutput("rm -rf %s" % homedir)
-    edir = glob.glob("%s/eigen-eigen-*" % homepath)
-    os.rename(edir[0],"%s/%s" % (homepath,homedir))
+    if os.path.isdir(one):
+      subprocess.check_output("rm -rf %s" % one,stderr=subprocess.STDOUT,shell=True)
+  cmd = 'cd "%s"; tar -xzvf %s' % (homepath,tarball)
+  subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
+  edir = glob.glob("%s/eigen-eigen-*" % homepath)
+  os.rename(edir[0],"%s/%s" % (homepath,homedir))
+  os.remove(tarball)
 
 # create link in lib/smd to Eigen src dir
 
 if linkflag:
-  print "Creating link to Eigen files"
+  print("Creating link to Eigen files")
   if os.path.isfile("includelink") or os.path.islink("includelink"):
     os.remove("includelink")
-  if homedir == "ee":
-    edir = glob.glob("%s/eigen-eigen-*" % homepath)
-    linkdir = edir[0]
+  if pathflag: linkdir = eigenpath
   else: linkdir = "%s/%s" % (homepath,homedir)
   cmd = "ln -s %s includelink" % linkdir
-  commands.getoutput(cmd)
+  subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
diff --git a/lib/voronoi/.gitignore b/lib/voronoi/.gitignore
new file mode 100644
index 000000000..6ca01c094
--- /dev/null
+++ b/lib/voronoi/.gitignore
@@ -0,0 +1,4 @@
+# files to ignore
+/liblink
+/includelink
+/voro++-*
diff --git a/lib/voronoi/Install.py b/lib/voronoi/Install.py
index 7d847183b..17bba5e8e 100644
--- a/lib/voronoi/Install.py
+++ b/lib/voronoi/Install.py
@@ -1,118 +1,128 @@
 #!/usr/bin/env python
 
 # Install.py tool to download, unpack, build, and link to the Voro++ library
 # used to automate the steps described in the README file in this dir
 
-import sys,os,re,urllib,commands
+from __future__ import print_function
+import sys,os,re,subprocess
+try: from urllib.request import urlretrieve as geturl
+except: from urllib import urlretrieve as geturl
 
 # help message
 
 help = """
-Syntax: python Install.py -v version -h hpath hdir -g -b -l
-  specify one or more options, order does not matter
-  -v = version of Voro++ to download and build
-       default version = voro++-0.4.6 (current as of Jan 2015)
-  -h = set home dir of Voro++ to be hpath/hdir
-       hpath can be full path, contain '~' or '.' chars
-       default hpath = . = lib/voronoi
-       default hdir = voro++-0.4.6 = what tarball unpacks to
-  -g = grab (download) tarball from math.lbl.gov/voro++ website
-       unpack it to hpath/hdir
-       hpath must already exist
-       if hdir already exists, it will be deleted before unpack
-  -b = build Voro++ library in its src dir
-  -l = create 2 softlinks (includelink,liblink) in lib/voronoi to Voro++ src dir
+Syntax from src dir: make lib-voronoi
+                 or: make lib-voronoi args="-p /usr/local/voro++-0.4.6"
+                 or: make lib-voronoi args="-v voro++-0.4.6 -b"
+Syntax from lib dir: python Install.py -v voro++-0.4.6 -b
+                 or: python Install.py
+                 or: python Install.py -p /usr/local/voro++-0.4.6
+
+specify one or more options, order does not matter
+
+  -b = download and build the Voro++ library (default)
+  -p = specify folder of existing Voro++ installation 
+  -v = set version of Voro++ to download and build (default voro++-0.4.6)
+
+Example:
+
+make lib-voronoi args="-b"   # download/build in lib/voronoi/voro++-0.4.6
 """
 
 # settings
 
 version = "voro++-0.4.6"
 url = "http://math.lbl.gov/voro++/download/dir/%s.tar.gz" % version
 
 # print error message or help
 
 def error(str=None):
-  if not str: print help
-  else: print "ERROR",str
+  if not str: print(help)
+  else: print("ERROR",str)
   sys.exit()
 
 # expand to full path name
 # process leading '~' or relative path
-  
+
 def fullpath(path):
   return os.path.abspath(os.path.expanduser(path))
-  
+
 # parse args
 
 args = sys.argv[1:]
 nargs = len(args)
-if nargs == 0: error()
 
 homepath = "."
 homedir = version
 
-grabflag = 0
-buildflag = 0
-linkflag = 0
+grabflag = True
+buildflag = True
+pathflag = False
+linkflag = True
 
 iarg = 0
 while iarg < nargs:
   if args[iarg] == "-v":
     if iarg+2 > nargs: error()
     version = args[iarg+1]
     iarg += 2
-  elif args[iarg] == "-h":
-    if iarg+3 > nargs: error()
-    homepath = args[iarg+1]
-    homedir = args[iarg+2]
-    iarg += 3
-  elif args[iarg] == "-g":
-    grabflag = 1
-    iarg += 1
+  elif args[iarg] == "-p":
+    if iarg+2 > nargs: error()
+    voropath = fullpath(args[iarg+1])
+    pathflag = True
+    buildflag = False
+    iarg += 2
   elif args[iarg] == "-b":
-    buildflag = 1
-    iarg += 1
-  elif args[iarg] == "-l":
-    linkflag = 1
+    buildflag = True
     iarg += 1
   else: error()
 
 homepath = fullpath(homepath)
-if not os.path.isdir(homepath): error("Voro++ path does not exist")
-homedir = "%s/%s" % (homepath,homedir)
+homedir = "%s/%s" % (homepath,version)
+
+if (pathflag):
+    if not os.path.isdir(voropath): error("Voro++ path does not exist")
+    homedir = voropath
+
+if (buildflag and pathflag):
+    error("Cannot use -b and -p flag at the same time")
 
 # download and unpack Voro++ tarball
 
 if grabflag:
-  print "Downloading Voro++ ..."
-  urllib.urlretrieve(url,"%s/%s.tar.gz" % (homepath,version))
-  
-  print "Unpacking Voro++ tarball ..."
+  print("Downloading Voro++ ...")
+  geturl(url,"%s/%s.tar.gz" % (homepath,version))
+
+  print("Unpacking Voro++ tarball ...")
   if os.path.exists("%s/%s" % (homepath,version)):
-    commands.getoutput("rm -rf %s/%s" % (homepath,version))
-  cmd = "cd %s; tar zxvf %s.tar.gz" % (homepath,version)
-  commands.getoutput(cmd)
+    cmd = 'rm -rf "%s/%s"' % (homepath,version)
+    subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
+  cmd = 'cd "%s"; tar -xzvf %s.tar.gz' % (homepath,version)
+  subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
+  os.remove("%s/%s.tar.gz" % (homepath,version))
   if os.path.basename(homedir) != version:
-    if os.path.exists(homedir): commands.getoutput("rm -rf %s" % homedir)
+    if os.path.exists(homedir):
+      cmd = 'rm -rf "%s"' % homedir
+      subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
     os.rename("%s/%s" % (homepath,version),homedir)
 
 # build Voro++
 
 if buildflag:
-  print "Building Voro++ ..."
-  cmd = "cd %s; make" % homedir
-  txt = commands.getoutput(cmd)
-  print txt
+  print("Building Voro++ ...")
+  cmd = 'cd "%s"; make' % homedir
+  txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
+  print(txt.decode('UTF-8'))
 
 # create 2 links in lib/voronoi to Voro++ src dir
 
 if linkflag:
-  print "Creating links to Voro++ include and lib files"
+  print("Creating links to Voro++ include and lib files")
   if os.path.isfile("includelink") or os.path.islink("includelink"):
     os.remove("includelink")
   if os.path.isfile("liblink") or os.path.islink("liblink"):
     os.remove("liblink")
-  cmd = "ln -s %s/src includelink" % homedir
-  commands.getoutput(cmd)
-  cmd = "ln -s %s/src liblink" % homedir
-  commands.getoutput(cmd)
+  cmd = ['ln -s "%s/src" includelink' % homedir, 'includelink']
+  subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
+  cmd = ['ln -s "%s/src" liblink' % homedir]
+  subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
diff --git a/lib/voronoi/README b/lib/voronoi/README
index 9863632be..2ca11c922 100644
--- a/lib/voronoi/README
+++ b/lib/voronoi/README
@@ -1,63 +1,63 @@
 This directory contains links to the Voro++ library which is required
 to use the VORONOI package and its compute voronoi/atom command in a
 LAMMPS input script.
 
 The Voro++ library is available at http://math.lbl.gov/voro++ and was
 developed by Chris H. Rycroft while at UC Berkeley / Lawrence Berkeley
 Laboratory.
 
 You can type "make lib-voronoi" from the src directory to see help on
 how to download and build this library via make commands, or you can
 do the same thing by typing "python Install.py" from within this
 directory, or you can do it manually by following the instructions
 below.
 
 -----------------
 
 Instructions:
 
 1.  Download Voro++ at http://math.lbl.gov/voro++/download
     either as a tarball or via SVN, and unpack the
     tarball either in this /lib/voronoi directory
     or somewhere else on your system.
 
 2.  compile Voro++ from within its home directory
-    % make 
+    % make
 
 3.  There is no need to install Voro++ if you only wish
     to use it from LAMMPS.  You can install it if you
     wish to use it stand-alone or from other codes:
     a) install under the default /usr/local
        % sudo make install
     b) install under a user-writeable location by first
          changing the PREFIX variable in the config.mk file, then
        % make install
 
 4.  Create two soft links in this dir (lib/voronoi)
     to the Voro++ src directory is.  E.g if you built Voro++ in this dir:
       % ln -s voro++-0.4.6/src includelink
       % ln -s voro++-0.4.6/src liblink
     These links could instead be set to the include and lib
     directories created by a Voro++ install, e.g.
       % ln -s /usr/local/include includelink
       % ln -s /usr/local/lib liblink
 
 -----------------
 
 When these steps are complete you can build LAMMPS
 with the VORONOI package installed:
 
 % cd lammps/src
 % make yes-voronoi
 % make g++ (or whatever target you wish)
 
 Note that if you download and unpack a new LAMMPS tarball, the
 "includelink" and "liblink" files will be lost and you will need to
 re-create them (step 4).  If you built Voro++ in this directory (as
 opposed to somewhere else on your system) and did not install it
 somewhere else, you will also need to repeat steps 1,2,3.
 
 The Makefile.lammps file in this directory is there for compatibility
 with the way other libraries under the lib dir are linked with by
 LAMMPS.  However, Voro++ requires no auxiliary files or settings, so
 its variables are blank.
diff --git a/src/.gitignore b/src/.gitignore
index e26f3c6ca..80166e260 100644
--- a/src/.gitignore
+++ b/src/.gitignore
@@ -1,1082 +1,1084 @@
 /Makefile.package
 /Makefile.package.settings
 /MAKE/MINE
 /Make.py.last
 /lmp_*
 
 /style_*.h
 
 /*_gpu.h
 /*_gpu.cpp
 /*_intel.h
 /*_intel.cpp
 /*_kokkos.h
 /*_kokkos.cpp
 /*_omp.h
 /*_omp.cpp
 /*_tally.h
 /*_tally.cpp
 /*_rx.h
 /*_rx.cpp
 /*_ssa.h
 /*_ssa.cpp
 
 /kokkos.cpp
 /kokkos.h
 /kokkos_type.h
 /kokkos_few.h
 
 /manifold*.cpp
 /manifold*.h
 /fix_*manifold*.cpp
 /fix_*manifold*.h
 
 /meam*.h
 /meam*.cpp
 /pair_meamc.cpp
 /pair_meamc.h
 
 /fix_qeq*.cpp
 /fix_qeq*.h
 
 /compute_test_nbl.cpp
 /compute_test_nbl.h
 /pair_multi_lucy.cpp
 /pair_multi_lucy.h
 
 /colvarproxy_lammps.cpp
 /colvarproxy_lammps.h
 /fix_colvars.cpp
 /fix_colvars.h
 /dump_molfile.cpp
 /dump_molfile.h
 /molfile_interface.cpp
 /molfile_interface.h
 /type_detector.h
 
 /intel_buffers.cpp
 /intel_buffers.h
 /intel_intrinsics.h
 /intel_preprocess.h
 /intel_simd.h
 
 /compute_sna_atom.cpp
 /compute_sna_atom.h
 /compute_snad_atom.cpp
 /compute_snad_atom.h
 /compute_snav_atom.cpp
 /compute_snav_atom.h
 /openmp_snap.h
 /pair_snap.cpp
 /pair_snap.h
 /sna.cpp
 /sna.h
 
 /atom_vec_wavepacket.cpp
 /atom_vec_wavepacket.h
 /fix_nve_awpmd.cpp
 /fix_nve_awpmd.h
 /pair_awpmd_cut.cpp
 /pair_awpmd_cut.h
 
 /dihedral_charmmfsw.cpp
 /dihedral_charmmfsw.h
 /pair_lj_charmmfsw_coul_charmmfsh.cpp
 /pair_lj_charmmfsw_coul_charmmfsh.h
 /pair_lj_charmmfsw_coul_long.cpp
 /pair_lj_charmmfsw_coul_long.h
 
 /angle_cg_cmm.cpp
 /angle_cg_cmm.h
 /angle_charmm.cpp
 /angle_charmm.h
 /angle_class2.cpp
 /angle_class2.h
 /angle_cosine.cpp
 /angle_cosine.h
 /angle_cosine_delta.cpp
 /angle_cosine_delta.h
 /angle_cosine_periodic.cpp
 /angle_cosine_periodic.h
 /angle_cosine_shift.cpp
 /angle_cosine_shift.h
 /angle_cosine_shift_exp.cpp
 /angle_cosine_shift_exp.h
 /angle_cosine_squared.cpp
 /angle_cosine_squared.h
 /angle_dipole.cpp
 /angle_dipole.h
 /angle_fourier.cpp
 /angle_fourier.h
 /angle_fourier_simple.cpp
 /angle_fourier_simple.h
 /angle_harmonic.cpp
 /angle_harmonic.h
 /angle_quartic.cpp
 /angle_quartic.h
 /angle_sdk.cpp
 /angle_sdk.h
 /angle_table.cpp
 /angle_table.h
 /atom_vec_angle.cpp
 /atom_vec_angle.h
 /atom_vec_bond.cpp
 /atom_vec_bond.h
 /atom_vec_colloid.cpp
 /atom_vec_colloid.h
 /atom_vec_dipole.cpp
 /atom_vec_dipole.h
 /atom_vec_dpd.cpp
 /atom_vec_dpd.h
 /atom_vec_electron.cpp
 /atom_vec_electron.h
 /atom_vec_ellipsoid.cpp
 /atom_vec_ellipsoid.h
 /atom_vec_full.cpp
 /atom_vec_full.h
 /atom_vec_full_hars.cpp
 /atom_vec_full_hars.h
 /atom_vec_granular.cpp
 /atom_vec_granular.h
 /atom_vec_meso.cpp
 /atom_vec_meso.h
 /atom_vec_molecular.cpp
 /atom_vec_molecular.h
 /atom_vec_peri.cpp
 /atom_vec_peri.h
 /atom_vec_template.cpp
 /atom_vec_template.h
 /body_nparticle.cpp
 /body_nparticle.h
 /bond_class2.cpp
 /bond_class2.h
 /bond_fene.cpp
 /bond_fene.h
 /bond_fene_expand.cpp
 /bond_fene_expand.h
 /bond_harmonic.cpp
 /bond_harmonic.h
 /bond_harmonic_shift.cpp
 /bond_harmonic_shift.h
 /bond_harmonic_shift_cut.cpp
 /bond_harmonic_shift_cut.h
 /bond_morse.cpp
 /bond_morse.h
 /bond_nonlinear.cpp
 /bond_nonlinear.h
 /bond_oxdna_fene.cpp
 /bond_oxdna_fene.h
 /bond_oxdna2_fene.cpp
 /bond_oxdna2_fene.h
 /bond_quartic.cpp
 /bond_quartic.h
 /bond_table.cpp
 /bond_table.h
 /cg_cmm_parms.cpp
 /cg_cmm_parms.h
 /commgrid.cpp
 /commgrid.h
 /compute_ackland_atom.cpp
 /compute_ackland_atom.h
 /compute_basal_atom.cpp
 /compute_basal_atom.h
 /compute_body_local.cpp
 /compute_body_local.h
 /compute_cnp_atom.cpp
 /compute_cnp_atom.h
 /compute_damage_atom.cpp
 /compute_damage_atom.h
 /compute_dilatation_atom.cpp
 /compute_dilatation_atom.h
 /compute_dpd.cpp
 /compute_dpd.h
 /compute_dpd_atom.cpp
 /compute_dpd_atom.h
 /compute_erotate_asphere.cpp
 /compute_erotate_asphere.h
 /compute_erotate_rigid.cpp
 /compute_erotate_rigid.h
 /compute_event_displace.cpp
 /compute_event_displace.h
 /compute_fep.cpp
 /compute_fep.h
 /compute_force_tally.cpp
 /compute_force_tally.h
 /compute_heat_flux_tally.cpp
 /compute_heat_flux_tally.h
 /compute_ke_atom_eff.cpp
 /compute_ke_atom_eff.h
 /compute_ke_eff.cpp
 /compute_ke_eff.h
 /compute_ke_rigid.cpp
 /compute_ke_rigid.h
 /compute_meso_e_atom.cpp
 /compute_meso_e_atom.h
 /compute_meso_rho_atom.cpp
 /compute_meso_rho_atom.h
 /compute_meso_t_atom.cpp
 /compute_meso_t_atom.h
 /compute_msd_nongauss.cpp
 /compute_msd_nongauss.h
 /compute_pe_tally.cpp
 /compute_pe_tally.h
 /compute_plasticity_atom.cpp
 /compute_plasticity_atom.h
 /compute_pressure_grem.cpp
 /compute_pressure_grem.h
 /compute_rigid_local.cpp
 /compute_rigid_local.h
 /compute_spec_atom.cpp
 /compute_spec_atom.h
 /compute_stress_tally.cpp
 /compute_stress_tally.h
 /compute_temp_asphere.cpp
 /compute_temp_asphere.h
 /compute_temp_body.cpp
 /compute_temp_body.h
 /compute_temp_deform_eff.cpp
 /compute_temp_deform_eff.h
 /compute_temp_eff.cpp
 /compute_temp_eff.h
 /compute_temp_region_eff.cpp
 /compute_temp_region_eff.h
 /compute_temp_rotate.cpp
 /compute_temp_rotate.h
 /compute_ti.cpp
 /compute_ti.h
 /compute_voronoi_atom.cpp
 /compute_voronoi_atom.h
 /dihedral_charmm.cpp
 /dihedral_charmm.h
 /dihedral_class2.cpp
 /dihedral_class2.h
 /dihedral_cosine_shift_exp.cpp
 /dihedral_cosine_shift_exp.h
 /dihedral_fourier.cpp
 /dihedral_fourier.h
 /dihedral_harmonic.cpp
 /dihedral_harmonic.h
 /dihedral_helix.cpp
 /dihedral_helix.h
 /dihedral_hybrid.cpp
 /dihedral_hybrid.h
 /dihedral_multi_harmonic.cpp
 /dihedral_multi_harmonic.h
 /dihedral_nharmonic.cpp
 /dihedral_nharmonic.h
 /dihedral_opls.cpp
 /dihedral_opls.h
 /dihedral_quadratic.cpp
 /dihedral_quadratic.h
 /dihedral_spherical.cpp
 /dihedral_spherical.h
 /dihedral_table.cpp
 /dihedral_table.h
 /dump_atom_gz.cpp
 /dump_atom_gz.h
 /dump_xyz_gz.cpp
 /dump_xyz_gz.h
 /dump_atom_mpiio.cpp
 /dump_atom_mpiio.h
 /dump_cfg_gz.cpp
 /dump_cfg_gz.h
 /dump_cfg_mpiio.cpp
 /dump_cfg_mpiio.h
 /dump_custom_gz.cpp
 /dump_custom_gz.h
 /dump_custom_mpiio.cpp
 /dump_custom_mpiio.h
 /dump_h5md.cpp
 /dump_h5md.h
 /dump_netcdf.cpp
 /dump_netcdf.h
 /dump_netcdf_mpiio.cpp
 /dump_netcdf_mpiio.h
 /dump_vtk.cpp
 /dump_vtk.h
 /dump_xtc.cpp
 /dump_xtc.h
 /dump_xyz_mpiio.cpp
 /dump_xyz_mpiio.h
 /ewald.cpp
 /ewald.h
 /ewald_cg.cpp
 /ewald_cg.h
 /ewald_disp.cpp
 /ewald_disp.h
 /ewald_n.cpp
 /ewald_n.h
 /fft3d.cpp
 /fft3d.h
 /fft3d_wrap.cpp
 /fft3d_wrap.h
 /fix_adapt_fep.cpp
 /fix_adapt_fep.h
 /fix_addtorque.cpp
 /fix_addtorque.h
 /fix_append_atoms.cpp
 /fix_append_atoms.h
 /fix_atc.cpp
 /fix_atc.h
 /fix_ave_correlate_long.cpp
 /fix_ave_correlate_long.h
 /fix_bond_break.cpp
 /fix_bond_break.h
 /fix_bond_create.cpp
 /fix_bond_create.h
 /fix_bond_swap.cpp
 /fix_bond_swap.h
 /fix_cmap.cpp
 /fix_cmap.h
 /fix_deposit.cpp
 /fix_deposit.h
 /fix_dpd_energy.cpp
 /fix_dpd_energy.h
 /fix_efield.cpp
 /fix_efield.h
 /fix_eos_cv.cpp
 /fix_eos_cv.h
 /fix_eos_table.cpp
 /fix_eos_table.h
 /fix_evaporate.cpp
 /fix_evaporate.h
 /fix_filter_corotate.cpp
 /fix_filter_corotate.h
 /fix_viscosity.cpp
 /fix_viscosity.h
 /fix_ehex.cpp
 /fix_ehex.h
 /fix_event.cpp
 /fix_event.h
 /fix_event_prd.cpp
 /fix_event_prd.h
 /fix_event_tad.cpp
 /fix_event_tad.h
 /fix_flow_gauss.cpp
 /fix_flow_gauss.h
 /fix_freeze.cpp
 /fix_freeze.h
 /fix_gcmc.cpp
 /fix_gcmc.h
 /fix_gld.cpp
 /fix_gld.h
 /fix_gle.cpp
 /fix_gle.h
 /fix_gpu.cpp
 /fix_gpu.h
 /fix_grem.cpp
 /fix_grem.h
 /fix_imd.cpp
 /fix_imd.h
 /fix_ipi.cpp
 /fix_ipi.h
 /fix_lambdah_calc.cpp
 /fix_lambdah_calc.h
 /fix_langevin_eff.cpp
 /fix_langevin_eff.h
 /fix_lb_fluid.cpp
 /fix_lb_fluid.h
 /fix_lb_momentum.cpp
 /fix_lb_momentum.h
 /fix_lb_pc.cpp
 /fix_lb_pc.h
 /fix_lb_rigid_pc_sphere.cpp
 /fix_lb_rigid_pc_sphere.h
 /fix_lb_viscous.cpp
 /fix_lb_viscous.h
 /fix_load_report.cpp
 /fix_load_report.h
 /fix_meso.cpp
 /fix_meso.h
 /fix_meso_stationary.cpp
 /fix_meso_stationary.h
 /fix_mscg.cpp
 /fix_mscg.h
 /fix_msst.cpp
 /fix_msst.h
 /fix_neb.cpp
 /fix_neb.h
 /fix_nh_asphere.cpp
 /fix_nh_asphere.h
 /fix_nph_asphere.cpp
 /fix_nph_asphere.h
 /fix_npt_asphere.cpp
 /fix_npt_asphere.h
 /fix_nve_asphere.cpp
 /fix_nve_asphere.h
 /fix_nve_asphere_noforce.cpp
 /fix_nve_asphere_noforce.h
 /fix_nve_dot.cpp
 /fix_nve_dot.h
 /fix_nve_dotc_langevin.cpp
 /fix_nve_dotc_langevin.h
 /fix_nh_body.cpp
 /fix_nh_body.h
 /fix_nph_body.cpp
 /fix_nph_body.h
 /fix_npt_body.cpp
 /fix_npt_body.h
 /fix_nvk.cpp
 /fix_nvk.h
 /fix_nvt_body.cpp
 /fix_nvt_body.h
 /fix_nve_body.cpp
 /fix_nve_body.h
 /fix_nvt_asphere.cpp
 /fix_nvt_asphere.h
 /fix_nh_eff.cpp
 /fix_nh_eff.h
 /fix_nph_eff.cpp
 /fix_nph_eff.h
 /fix_nphug.cpp
 /fix_nphug.h
 /fix_npt_eff.cpp
 /fix_npt_eff.h
 /fix_nve_eff.cpp
 /fix_nve_eff.h
 /fix_nve_line.cpp
 /fix_nve_line.h
 /fix_nvt_eff.cpp
 /fix_nvt_eff.h
 /fix_nvt_sllod_eff.cpp
 /fix_nvt_sllod_eff.h
 /fix_nve_tri.cpp
 /fix_nve_tri.h
 /fix_oneway.cpp
 /fix_oneway.h
 /fix_orient_bcc.cpp
 /fix_orient_bcc.h
 /fix_orient_fcc.cpp
 /fix_orient_fcc.h
 /fix_peri_neigh.cpp
 /fix_peri_neigh.h
 /fix_phonon.cpp
 /fix_phonon.h
 /fix_poems.cpp
 /fix_poems.h
 /fix_pour.cpp
 /fix_pour.h
 /fix_qeq_comb.cpp
 /fix_qeq_comb.h
 /fix_qeq_reax.cpp
 /fix_qeq_fire.cpp
 /fix_qeq_fire.h
 /fix_qeq_reax.h
 /fix_qmmm.cpp
 /fix_qmmm.h
 /fix_reax_bonds.cpp
 /fix_reax_bonds.h
 /fix_reaxc.cpp
 /fix_reaxc.h
 /fix_reaxc_bonds.cpp
 /fix_reaxc_bonds.h
 /fix_reaxc_species.cpp
 /fix_reaxc_species.h
 /fix_rigid.cpp
 /fix_rigid.h
 /fix_rigid_nh.cpp
 /fix_rigid_nh.h
 /fix_rigid_nph.cpp
 /fix_rigid_nph.h
 /fix_rigid_npt.cpp
 /fix_rigid_npt.h
 /fix_rigid_nve.cpp
 /fix_rigid_nve.h
 /fix_rigid_nvt.cpp
 /fix_rigid_nvt.h
 /fix_rigid_nh_small.cpp
 /fix_rigid_nh_small.h
 /fix_rigid_nph_small.cpp
 /fix_rigid_nph_small.h
 /fix_rigid_npt_small.cpp
 /fix_rigid_npt_small.h
 /fix_rigid_nve_small.cpp
 /fix_rigid_nve_small.h
 /fix_rigid_nvt_small.cpp
 /fix_rigid_nvt_small.h
 /fix_rigid_small.cpp
 /fix_rigid_small.h
 /fix_shake.cpp
 /fix_shake.h
 /fix_shardlow.cpp
 /fix_shardlow.h
 /fix_smd.cpp
 /fix_smd.h
 /fix_species.cpp
 /fix_species.h
 /fix_spring_pull.cpp
 /fix_spring_pull.h
 /fix_srd.cpp
 /fix_srd.h
 /fix_temp_rescale_eff.cpp
 /fix_temp_rescale_eff.h
 /fix_thermal_conductivity.cpp
 /fix_thermal_conductivity.h
 /fix_ti_rs.cpp
 /fix_ti_rs.h
 /fix_ti_spring.cpp
 /fix_ti_spring.h
 /fix_ttm.cpp
 /fix_ttm.h
 /fix_tune_kspace.cpp
 /fix_tune_kspace.h
 /fix_wall_colloid.cpp
 /fix_wall_colloid.h
 /fix_wall_ees.cpp
 /fix_wall_ees.h
 /fix_wall_region_ees.cpp
 /fix_wall_region_ees.h
 /fix_wall_gran.cpp
 /fix_wall_gran.h
 /fix_wall_gran_region.cpp
 /fix_wall_gran_region.h
 /fix_wall_piston.cpp
 /fix_wall_piston.h
 /fix_wall_srd.cpp
 /fix_wall_srd.h
 /gpu_extra.h
 /gridcomm.cpp
 /gridcomm.h
 /group_ndx.cpp
 /group_ndx.h
 /ndx_group.cpp
 /ndx_group.h
 /improper_class2.cpp
 /improper_class2.h
 /improper_cossq.cpp
 /improper_cossq.h
 /improper_cvff.cpp
 /improper_cvff.h
 /improper_distance.cpp
 /improper_distance.h
 /improper_fourier.cpp
 /improper_fourier.h
 /improper_harmonic.cpp
 /improper_harmonic.h
 /improper_hybrid.cpp
 /improper_hybrid.h
 /improper_ring.cpp
 /improper_ring.h
 /improper_umbrella.cpp
 /improper_umbrella.h
 /kissfft.h
 /lj_sdk_common.h
 /math_complex.h
 /math_vector.h
 /mgpt_*.cpp
 /mgpt_*.h
 /msm.cpp
 /msm.h
 /msm_cg.cpp
 /msm_cg.h
 /neb.cpp
 /neb.h
 
 /pair_adp.cpp
 /pair_adp.h
 /pair_agni.cpp
 /pair_agni.h
 /pair_airebo.cpp
 /pair_airebo.h
 /pair_airebo_morse.cpp
 /pair_airebo_morse.h
 /pair_body.cpp
 /pair_body.h
 /pair_bop.cpp
 /pair_bop.h
 /pair_born_coul_long.cpp
 /pair_born_coul_long.h
 /pair_born_coul_msm.cpp
 /pair_born_coul_msm.h
 /pair_brownian.cpp
 /pair_brownian.h
 /pair_brownian_poly.cpp
 /pair_brownian_poly.h
 /pair_buck_coul_long.cpp
 /pair_buck_coul_long.h
 /pair_buck_coul_msm.cpp
 /pair_buck_coul_msm.h
 /pair_buck_coul.cpp
 /pair_buck_coul.h
 /pair_buck_long_coul_long.cpp
 /pair_buck_long_coul_long.h
 /pair_cdeam.cpp
 /pair_cdeam.h
 /pair_cg_cmm.cpp
 /pair_cg_cmm.h
 /pair_cg_cmm_coul_cut.cpp
 /pair_cg_cmm_coul_cut.h
 /pair_cg_cmm_coul_long.cpp
 /pair_cg_cmm_coul_long.h
 /pair_cmm_common.cpp
 /pair_cmm_common.h
 /pair_cg_cmm_coul_msm.cpp
 /pair_cg_cmm_coul_msm.h
 /pair_comb.cpp
 /pair_comb.h
 /pair_comb3.cpp
 /pair_comb3.h
 /pair_colloid.cpp
 /pair_colloid.h
 /pair_coul_diel.cpp
 /pair_coul_diel.h
 /pair_coul_long.cpp
 /pair_coul_long.h
 /pair_coul_msm.cpp
 /pair_coul_msm.h
 /pair_dipole_cut.cpp
 /pair_dipole_cut.h
 /pair_dipole_sf.cpp
 /pair_dipole_sf.h
 /pair_dpd_mt.cpp
 /pair_dpd_mt.h
 /pair_dsmc.cpp
 /pair_dsmc.h
 /pair_eam.cpp
 /pair_eam.h
 /pair_eam_opt.cpp
 /pair_eam_opt.h
 /pair_eam_alloy.cpp
 /pair_eam_alloy.h
 /pair_eam_alloy_opt.cpp
 /pair_eam_alloy_opt.h
 /pair_eam_fs.cpp
 /pair_eam_fs.h
 /pair_eam_fs_opt.cpp
 /pair_eam_fs_opt.h
 /pair_edip.cpp
 /pair_edip.h
 /pair_edip_multi.cpp
 /pair_edip_multi.h
 /pair_eff_cut.cpp
 /pair_eff_cut.h
 /pair_eff_inline.h
 /pair_eim.cpp
 /pair_eim.h
 /pair_gauss_cut.cpp
 /pair_gauss_cut.h
 /pair_gayberne.cpp
 /pair_gayberne.h
 /pair_gran_easy.cpp
 /pair_gran_easy.h
 /pair_gran_hertz_history.cpp
 /pair_gran_hertz_history.h
 /pair_gran_hooke.cpp
 /pair_gran_hooke.h
 /pair_gran_hooke_history.cpp
 /pair_gran_hooke_history.h
 /pair_gw.cpp
 /pair_gw.h
 /pair_gw_zbl.cpp
 /pair_gw_zbl.h
 /pair_hbond_dreiding_lj.cpp
 /pair_hbond_dreiding_lj.h
 /pair_hbond_dreiding_morse.cpp
 /pair_hbond_dreiding_morse.h
 /pair_kolmogorov_crespi_z.cpp
 /pair_kolmogorov_crespi_z.h
 /pair_lcbop.cpp
 /pair_lcbop.h
 /pair_line_lj.cpp
 /pair_line_lj.h
 /pair_list.cpp
 /pair_list.h
 /pair_lj_charmm_coul_charmm.cpp
 /pair_lj_charmm_coul_charmm.h
 /pair_lj_charmm_coul_charmm_implicit.cpp
 /pair_lj_charmm_coul_charmm_implicit.h
 /pair_lj_charmm_coul_long.cpp
 /pair_lj_charmm_coul_long.h
 /pair_lj_charmm_coul_long_opt.cpp
 /pair_lj_charmm_coul_long_opt.h
 /pair_lj_charmm_coul_long_soft.cpp
 /pair_lj_charmm_coul_long_soft.h
 /pair_lj_charmm_coul_msm.cpp
 /pair_lj_charmm_coul_msm.h
 /pair_lj_class2.cpp
 /pair_lj_class2.h
 /pair_lj_class2_coul_cut.cpp
 /pair_lj_class2_coul_cut.h
 /pair_lj_class2_coul_long.cpp
 /pair_lj_class2_coul_long.h
 /pair_lj_coul.cpp
 /pair_lj_coul.h
 /pair_coul_cut_soft.cpp
 /pair_coul_cut_soft.h
 /pair_coul_long_soft.cpp
 /pair_coul_long_soft.h
 /pair_lj_cut_coul_cut_soft.cpp
 /pair_lj_cut_coul_cut_soft.h
 /pair_lj_cut_tip4p_cut.cpp
 /pair_lj_cut_tip4p_cut.h
 /pair_lj_cut_coul_long.cpp
 /pair_lj_cut_coul_long.h
 /pair_lj_cut_coul_long_opt.cpp
 /pair_lj_cut_coul_long_opt.h
 /pair_lj_cut_coul_long_soft.cpp
 /pair_lj_cut_coul_long_soft.h
 /pair_lj_cut_coul_msm.cpp
 /pair_lj_cut_coul_msm.h
 /pair_lj_cut_dipole_cut.cpp
 /pair_lj_cut_dipole_cut.h
 /pair_lj_cut_dipole_long.cpp
 /pair_lj_cut_dipole_long.h
 /pair_lj_cut_*hars_*.cpp
 /pair_lj_cut_*hars_*.h
 /pair_lj_cut_soft.cpp
 /pair_lj_cut_soft.h
 /pair_lj_cut_tip4p_long.cpp
 /pair_lj_cut_tip4p_long.h
 /pair_lj_cut_tip4p_long_opt.cpp
 /pair_lj_cut_tip4p_long_opt.h
 /pair_lj_cut_tip4p_long_soft.cpp
 /pair_lj_cut_tip4p_long_soft.h
 /pair_lj_long_coul_long.cpp
 /pair_lj_long_coul_long.h
 /pair_lj_long_coul_long_opt.cpp
 /pair_lj_long_coul_long_opt.h
 /pair_lj_long_dipole_long.cpp
 /pair_lj_long_dipole_long.h
 /pair_lj_long_tip4p_long.cpp
 /pair_lj_long_tip4p_long.h
 /pair_lj_cut_opt.cpp
 /pair_lj_cut_opt.h
 /pair_lj_cut_tgpu.cpp
 /pair_lj_cut_tgpu.h
 /pair_lj_sdk.cpp
 /pair_lj_sdk.h
 /pair_lj_sdk_coul_long.cpp
 /pair_lj_sdk_coul_long.h
 /pair_lj_sdk_coul_msm.cpp
 /pair_lj_sdk_coul_msm.h
 /pair_lj_sf_dipole_sf.cpp
 /pair_lj_sf_dipole_sf.h
 /pair_lubricateU.cpp
 /pair_lubricateU.h
 /pair_lubricateU_poly.cpp
 /pair_lubricateU_poly.h
 /pair_lubricate_poly.cpp
 /pair_lubricate_poly.h
 /pair_lubricate.cpp
 /pair_lubricate.h
 /pair_meam.cpp
 /pair_meam.h
 /pair_meam_spline.cpp
 /pair_meam_spline.h
 /pair_meam_sw_spline.cpp
 /pair_meam_sw_spline.h
 /pair_morse_opt.cpp
 /pair_morse_opt.h
 /pair_morse_soft.cpp
 /pair_morse_soft.h
 /pair_nb3b_harmonic.cpp
 /pair_nb3b_harmonic.h
 /pair_nm_cut.cpp
 /pair_nm_cut.h
 /pair_nm_cut_coul_cut.cpp
 /pair_nm_cut_coul_cut.h
 /pair_nm_cut_coul_long.cpp
 /pair_nm_cut_coul_long.h
 /pair_oxdna_*.cpp
 /pair_oxdna_*.h
 /pair_oxdna2_*.cpp
 /pair_oxdna2_*.h
 /mf_oxdna.h
 /pair_peri_eps.cpp
 /pair_peri_eps.h
 /pair_peri_lps.cpp
 /pair_peri_lps.h
 /pair_peri_pmb.cpp
 /pair_peri_pmb.h
 /pair_peri_ves.cpp
 /pair_peri_ves.h
+/pair_quip.cpp
+/pair_quip.h
 /pair_reax.cpp
 /pair_reax.h
 /pair_reax_fortran.h
 /pair_reaxc.cpp
 /pair_reaxc.h
 /pair_rebo.cpp
 /pair_rebo.h
 /pair_resquared.cpp
 /pair_resquared.h
 /pair_sph_heatconduction.cpp
 /pair_sph_heatconduction.h
 /pair_sph_idealgas.cpp
 /pair_sph_idealgas.h
 /pair_sph_lj.cpp
 /pair_sph_lj.h
 /pair_sph_rhosum.cpp
 /pair_sph_rhosum.h
 /pair_sph_taitwater.cpp
 /pair_sph_taitwater.h
 /pair_sph_taitwater_morris.cpp
 /pair_sph_taitwater_morris.h
 /pair_sw.cpp
 /pair_sw.h
 /pair_tersoff.cpp
 /pair_tersoff.h
 /pair_tersoff_mod.cpp
 /pair_tersoff_mod.h
 /pair_tersoff_mod_c.cpp
 /pair_tersoff_mod_c.h
 /pair_tersoff_table.cpp
 /pair_tersoff_table.h
 /pair_tersoff_zbl.cpp
 /pair_tersoff_zbl.h
 /pair_tip4p_cut.cpp
 /pair_tip4p_cut.h
 /pair_tip4p_long.cpp
 /pair_tip4p_long.h
 /pair_tip4p_long_soft.cpp
 /pair_tip4p_long_soft.h
 /pair_tri_lj.cpp
 /pair_tri_lj.h
 /pair_yukawa_colloid.cpp
 /pair_yukawa_colloid.h
 /pair_momb.cpp
 /pair_momb.h
 /pppm.cpp
 /pppm.h
 /pppm_cg.cpp
 /pppm_cg.h
 /pppm_disp.cpp
 /pppm_disp.h
 /pppm_disp_tip4p.cpp
 /pppm_disp_tip4p.h
 /pppm_old.cpp
 /pppm_old.h
 /pppm_proxy.cpp
 /pppm_proxy.h
 /pppm_stagger.cpp
 /pppm_stagger.h
 /pppm_tip4p.cpp
 /pppm_tip4p.h
 /pppm_tip4p_proxy.cpp
 /pppm_tip4p_proxy.h
 /pppm_tip4p_cg.cpp
 /pppm_tip4p_cg.h
 /prd.cpp
 /prd.h
 /python_impl.cpp
 /python_impl.h
 /python_compat.h
 /fix_python.cpp
 /fix_python.h
 /pair_python.cpp
 /pair_python.h
 /reader_molfile.cpp
 /reader_molfile.h
 /reaxc_allocate.cpp
 /reaxc_allocate.h
 /reaxc_basic_comm.cpp
 /reaxc_basic_comm.h
 /reaxc_bond_orders.cpp
 /reaxc_bond_orders.h
 /reaxc_bonds.cpp
 /reaxc_bonds.h
 /reaxc_control.cpp
 /reaxc_control.h
 /reaxc_defs.h
 /reaxc_ffield.cpp
 /reaxc_ffield.h
 /reaxc_forces.cpp
 /reaxc_forces.h
 /reaxc_hydrogen_bonds.cpp
 /reaxc_hydrogen_bonds.h
 /reaxc_init_md.cpp
 /reaxc_init_md.h
 /reaxc_io_tools.cpp
 /reaxc_io_tools.h
 /reaxc_list.cpp
 /reaxc_list.h
 /reaxc_lookup.cpp
 /reaxc_lookup.h
 /reaxc_multi_body.cpp
 /reaxc_multi_body.h
 /reaxc_nonbonded.cpp
 /reaxc_nonbonded.h
 /reaxc_reset_tools.cpp
 /reaxc_reset_tools.h
 /reaxc_system_props.cpp
 /reaxc_system_props.h
 /reaxc_tool_box.cpp
 /reaxc_tool_box.h
 /reaxc_torsion_angles.cpp
 /reaxc_torsion_angles.h
 /reaxc_traj.cpp
 /reaxc_traj.h
 /reaxc_types.h
 /reaxc_valence_angles.cpp
 /reaxc_valence_angles.h
 /reaxc_vector.cpp
 /reaxc_vector.h
 /remap.cpp
 /remap.h
 /remap_wrap.cpp
 /remap_wrap.h
 /restart_mpiio.cpp
 /restart_mpiio.h
 /smd_kernels.h
 /smd_material_models.cpp
 /smd_material_models.h
 /smd_math.h
 /tad.cpp
 /tad.h
 /temper.cpp
 /temper.h
 /temper_grem.cpp
 /temper_grem.h
 /thr_data.cpp
 /thr_data.h
 /verlet_split.cpp
 /verlet_split.h
 /write_dump.cpp
 /write_dump.h
 /xdr_compat.cpp
 /xdr_compat.h
 
 /atom_vec_smd.cpp
 /atom_vec_smd.h
 /compute_saed.cpp
 /compute_saed.h
 /compute_saed_consts.h
 /compute_smd_contact_radius.cpp
 /compute_smd_contact_radius.h
 /compute_smd_damage.cpp
 /compute_smd_damage.h
 /compute_smd_hourglass_error.cpp
 /compute_smd_hourglass_error.h
 /compute_smd_internal_energy.cpp
 /compute_smd_internal_energy.h
 /compute_smd_plastic_strain.cpp
 /compute_smd_plastic_strain.h
 /compute_smd_plastic_strain_rate.cpp
 /compute_smd_plastic_strain_rate.h
 /compute_smd_rho.cpp
 /compute_smd_rho.h
 /compute_smd_tlsph_defgrad.cpp
 /compute_smd_tlsph_defgrad.h
 /compute_smd_tlsph_dt.cpp
 /compute_smd_tlsph_dt.h
 /compute_smd_tlsph_num_neighs.cpp
 /compute_smd_tlsph_num_neighs.h
 /compute_smd_tlsph_shape.cpp
 /compute_smd_tlsph_shape.h
 /compute_smd_tlsph_strain.cpp
 /compute_smd_tlsph_strain.h
 /compute_smd_tlsph_strain_rate.cpp
 /compute_smd_tlsph_strain_rate.h
 /compute_smd_tlsph_stress.cpp
 /compute_smd_tlsph_stress.h
 /compute_smd_triangle_mesh_vertices.cpp
 /compute_smd_triangle_mesh_vertices.h
 /compute_smd_ulsph_effm.cpp
 /compute_smd_ulsph_effm.h
 /compute_smd_ulsph_num_neighs.cpp
 /compute_smd_ulsph_num_neighs.h
 /compute_smd_ulsph_strain.cpp
 /compute_smd_ulsph_strain.h
 /compute_smd_ulsph_strain_rate.cpp
 /compute_smd_ulsph_strain_rate.h
 /compute_smd_ulsph_stress.cpp
 /compute_smd_ulsph_stress.h
 /compute_smd_vol.cpp
 /compute_smd_vol.h
 /compute_temp_cs.cpp
 /compute_temp_cs.h
 /compute_temp_drude.cpp
 /compute_temp_drude.h
 /compute_xrd.cpp
 /compute_xrd.h
 /compute_xrd_consts.h
 /fix_atom_swap.cpp
 /fix_atom_swap.h
 /fix_ave_spatial_sphere.cpp
 /fix_ave_spatial_sphere.h
 /fix_drude.cpp
 /fix_drude.h
 /fix_drude_transform.cpp
 /fix_drude_transform.h
 /fix_langevin_drude.cpp
 /fix_langevin_drude.h
 /fix_pimd.cpp
 /fix_pimd.h
 /fix_qbmsst.cpp
 /fix_qbmsst.h
 /fix_qtb.cpp
 /fix_qtb.h
 /fix_rattle.cpp
 /fix_rattle.h
 /fix_saed_vtk.cpp
 /fix_saed_vtk.h
 /fix_smd_adjust_dt.cpp
 /fix_smd_adjust_dt.h
 /fix_smd_integrate_tlsph.cpp
 /fix_smd_integrate_tlsph.h
 /fix_smd_integrate_ulsph.cpp
 /fix_smd_integrate_ulsph.h
 /fix_smd_move_triangulated_surface.cpp
 /fix_smd_move_triangulated_surface.h
 /fix_smd_setvel.cpp
 /fix_smd_setvel.h
 /fix_smd_tlsph_reference_configuration.cpp
 /fix_smd_tlsph_reference_configuration.h
 /fix_smd_wall_surface.cpp
 /fix_smd_wall_surface.h
 /fix_srp.cpp
 /fix_srp.h
 /fix_tfmc.cpp
 /fix_tfmc.h
 /fix_ttm_mod.cpp
 /fix_ttm_mod.h
 /pair_born_coul_long_cs.cpp
 /pair_born_coul_long_cs.h
 /pair_born_coul_dsf_cs.cpp
 /pair_born_coul_dsf_cs.h
 /pair_buck_coul_long_cs.cpp
 /pair_buck_coul_long_cs.h
 /pair_coul_long_cs.cpp
 /pair_coul_long_cs.h
 /pair_lj_cut_thole_long.cpp
 /pair_lj_cut_thole_long.h
 /pair_plum_hb.cpp
 /pair_plum_hb.h
 /pair_plum_hp.cpp
 /pair_plum_hp.h
 /pair_polymorphic.cpp
 /pair_polymorphic.h
 /pair_smd_hertz.cpp
 /pair_smd_hertz.h
 /pair_smd_tlsph.cpp
 /pair_smd_tlsph.h
 /pair_smd_triangulated_surface.cpp
 /pair_smd_triangulated_surface.h
 /pair_smd_ulsph.cpp
 /pair_smd_ulsph.h
 /pair_srp.cpp
 /pair_srp.h
 /pair_thole.cpp
 /pair_thole.h
 /pair_buck_mdf.cpp
 /pair_buck_mdf.h
 /pair_dpd_conservative.cpp
 /pair_dpd_conservative.h
 /pair_dpd_fdt.cpp
 /pair_dpd_fdt.h
 /pair_dpd_fdt_energy.cpp
 /pair_dpd_fdt_energy.h
 /pair_lennard_mdf.cpp
 /pair_lennard_mdf.h
 /pair_lj_cut_coul_long_cs.cpp
 /pair_lj_cut_coul_long_cs.h
 /pair_lj_mdf.cpp
 /pair_lj_mdf.h
 /pair_mgpt.cpp
 /pair_mgpt.h
 /pair_morse_smooth_linear.cpp
 /pair_morse_smooth_linear.h
 /pair_smtbq.cpp
 /pair_smtbq.h
 /pair_vashishta*.cpp
 /pair_vashishta*.h
 
diff --git a/src/Makefile b/src/Makefile
index c7b20dcb1..3d1085e0b 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -1,383 +1,384 @@
 # LAMMPS multiple-machine -*- Makefile -*-
 
 SHELL = /bin/bash
 PYTHON = python
 
 #.IGNORE:
 
 # Definitions
 
 ROOT =	 lmp
 EXE =	 lmp_$@
 ARLIB =  liblammps_$@.a
 SHLIB =	 liblammps_$@.so
 ARLINK = liblammps.a
 SHLINK = liblammps.so
 
 OBJDIR =   Obj_$@
 OBJSHDIR = Obj_shared_$@
 
 SRC =	$(wildcard *.cpp)
 INC =	$(wildcard *.h)
 OBJ = 	$(SRC:.cpp=.o)
 
 SRCLIB = $(filter-out main.cpp,$(SRC))
 OBJLIB = $(filter-out main.o,$(OBJ))
 
 # Command-line options for mode: exe (default), shexe, lib, shlib
 
 mode = exe
 objdir = $(OBJDIR)
 
 ifeq ($(mode),shexe)
 objdir = $(OBJSHDIR)
 endif
 
 ifeq ($(mode),lib)
 objdir = $(OBJDIR)
 endif
 
 ifeq ($(mode),shlib)
 objdir = $(OBJSHDIR)
 endif
 
 # Package variables
 
 # PACKAGE    = standard packages
 # PACKUSER   = user packagse
 # PACKLIB    = all packages that require an additional lib
 #              should be PACKSYS + PACKINT + PACKEXT
 # PACKSYS    = subset that reqiure a common system library
 #              include MPIIO and LB b/c require full MPI, not just STUBS
 # PACKINT    = subset that require an internal (provided) library
 # PACKEXT    = subset that require an external (downloaded) library
 
 PACKAGE = asphere body class2 colloid compress coreshell dipole gpu \
 	  granular kim kokkos kspace manybody mc meam misc molecule \
 	  mpiio mscg opt peri poems \
 	  python qeq reax replica rigid shock snap srd voronoi
 
 PACKUSER = user-atc user-awpmd user-cgdna user-cgsdk user-colvars \
 	   user-diffraction user-dpd user-drude user-eff user-fep user-h5md \
 	   user-intel user-lb user-manifold user-meamc user-mgpt user-misc user-molfile \
 	   user-netcdf user-omp user-phonon user-qmmm user-qtb \
 	   user-quip user-reaxc user-smd user-smtbq user-sph user-tally \
 	   user-vtk
 
 PACKLIB = compress gpu kim kokkos meam mpiio mscg poems \
 	  python reax voronoi \
 	  user-atc user-awpmd user-colvars user-h5md user-lb user-molfile \
 	  user-netcdf user-qmmm user-quip user-smd user-vtk
 
 PACKSYS = compress mpiio python user-lb
 
 PACKINT = gpu kokkos meam poems reax user-atc user-awpmd user-colvars
 
 PACKEXT = kim mscg voronoi \
 	  user-h5md user-molfile user-netcdf user-qmmm user-quip \
 	  user-smd user-vtk
 
 PACKALL = $(PACKAGE) $(PACKUSER)
 
 PACKAGEUC = $(shell echo $(PACKAGE) | tr a-z A-Z)
 PACKUSERUC = $(shell echo $(PACKUSER) | tr a-z A-Z)
 
 YESDIR = $(shell echo $(@:yes-%=%) | tr a-z A-Z)
 NODIR  = $(shell echo $(@:no-%=%) | tr a-z A-Z)
 LIBDIR = $(shell echo $(@:lib-%=%))
 LIBUSERDIR = $(shell echo $(@:lib-user-%=%))
 
 # List of all targets
 
 help:
 	@echo ''
 	@echo 'make clean-all           delete all object files'
 	@echo 'make clean-machine       delete object files for one machine'
 	@echo 'make mpi-stubs           build dummy MPI library in STUBS'
 	@echo 'make install-python      install LAMMPS wrapper in Python'
 	@echo 'make tar                 create lmp_src.tar.gz for src dir and packages'
 	@echo ''
 	@echo 'make package                 list available packages and their dependencies'
 	@echo 'make package-status (ps)     status of all packages'
 	@echo 'make yes-package             install a single pgk in src dir'
 	@echo 'make no-package              remove a single pkg from src dir'
 	@echo 'make yes-all                 install all pgks in src dir'
 	@echo 'make no-all                  remove all pkgs from src dir'
 	@echo 'make yes-standard (yes-std)  install all standard pkgs'
 	@echo 'make no-standard (no-std)    remove all standard pkgs'
 	@echo 'make yes-user                install all user pkgs'
 	@echo 'make no-user                 remove all user pkgs'
 	@echo 'make yes-lib       install all pkgs with libs (included or ext)'
 	@echo 'make no-lib        remove all pkgs with libs (included or ext)'
 	@echo 'make yes-ext                 install all pkgs with external libs'
 	@echo 'make no-ext                  remove all pkgs with external libs'
 	@echo ''
 	@echo 'make package-update (pu) replace src files with updated package files'
 	@echo 'make package-overwrite   replace package files with src files'
 	@echo 'make package-diff (pd)   diff src files against package files'
 	@echo ''
-	@echo 'make lib-package         download/build/install a package library'
+	@echo 'make lib-package         help for download/build/install a package library'
+	@echo 'make lib-package args="..."    download/build/install a package library'
 	@echo 'make purge               purge obsolete copies of source files'
 	@echo ''
 	@echo 'make machine             build LAMMPS for machine'
 	@echo 'make mode=lib machine    build LAMMPS as static lib for machine'
 	@echo 'make mode=shlib machine  build LAMMPS as shared lib for machine'
 	@echo 'make mode=shexe machine  build LAMMPS as shared exe for machine'
 	@echo 'make makelist            create Makefile.list used by old makes'
 	@echo 'make -f Makefile.list machine     build LAMMPS for machine (old)'
 	@echo ''
 	@echo 'machine is one of these from src/MAKE:'
 	@echo ''
 	@files="`ls MAKE/Makefile.*`"; \
 	  for file in $$files; do head -1 $$file; done
 	@echo ''
 	@echo '... or one of these from src/MAKE/OPTIONS:'
 	@echo ''
 	@files="`ls MAKE/OPTIONS/Makefile.*`"; \
 	  for file in $$files; do head -1 $$file; done
 	@echo ''
 	@echo '... or one of these from src/MAKE/MACHINES:'
 	@echo ''
 	@files="`ls MAKE/MACHINES/Makefile.*`"; \
 	  for file in $$files; do head -1 $$file; done
 	@echo ''
 	@echo '... or one of these from src/MAKE/MINE:'
 	@echo ''
 	@files="`ls MAKE/MINE/Makefile.* 2>/dev/null`"; \
 	  for file in $$files; do head -1 $$file; done
 	@echo ''
 
 # Build LAMMPS in one of 4 modes
 # exe =   exe with static compile in Obj_machine (default)
 # shexe = exe with shared compile in Obj_shared_machine
 # lib =   static lib in Obj_machine
 # shlib = shared lib in Obj_shared_machine
 
 .DEFAULT:
 	@if [ $@ = "serial" -a ! -f STUBS/libmpi_stubs.a ]; \
 	  then $(MAKE) mpi-stubs; fi
 	@test -f MAKE/Makefile.$@ -o -f MAKE/OPTIONS/Makefile.$@ -o \
 	  -f MAKE/MACHINES/Makefile.$@ -o -f MAKE/MINE/Makefile.$@
 	@if [ ! -d $(objdir) ]; then mkdir $(objdir); fi
 	@$(SHELL) Make.sh style
 	@if [ -f MAKE/MACHINES/Makefile.$@ ]; \
 	  then cp MAKE/MACHINES/Makefile.$@ $(objdir)/Makefile; fi
 	@if [ -f MAKE/OPTIONS/Makefile.$@ ]; \
 	  then cp MAKE/OPTIONS/Makefile.$@ $(objdir)/Makefile; fi
 	@if [ -f MAKE/Makefile.$@ ]; \
 	  then cp MAKE/Makefile.$@ $(objdir)/Makefile; fi
 	@if [ -f MAKE/MINE/Makefile.$@ ]; \
 	  then cp MAKE/MINE/Makefile.$@ $(objdir)/Makefile; fi
 	@if [ ! -e Makefile.package ]; \
 	  then cp Makefile.package.empty Makefile.package; fi
 	@if [ ! -e Makefile.package.settings ]; \
 	  then cp Makefile.package.settings.empty Makefile.package.settings; fi
 	@cp Makefile.package Makefile.package.settings $(objdir)
 	@cd $(objdir); rm -f .depend; \
 	$(MAKE) $(MFLAGS) "SRC = $(SRC)" "INC = $(INC)" depend || :
 ifeq ($(mode),exe)
 	@cd $(objdir); \
 	$(MAKE) $(MFLAGS) "OBJ = $(OBJ)" "INC = $(INC)" "SHFLAGS =" \
 	  "EXE = ../$(EXE)" ../$(EXE)
 endif
 ifeq ($(mode),shexe)
 	@cd $(objdir); \
 	$(MAKE) $(MFLAGS) "OBJ = $(OBJ)" "INC = $(INC)" \
 	  "EXE = ../$(EXE)" ../$(EXE)
 endif
 ifeq ($(mode),lib)
 	@cd $(objdir); \
 	$(MAKE) $(MFLAGS) "OBJ = $(OBJLIB)" "INC = $(INC)" "SHFLAGS =" \
 	  "EXE = ../$(ARLIB)" lib
 	@rm -f $(ARLINK)
 	@ln -s $(ARLIB) $(ARLINK)
 endif
 ifeq ($(mode),shlib)
 	@cd $(objdir); \
 	$(MAKE) $(MFLAGS) "OBJ = $(OBJLIB)" "INC = $(INC)" \
 	  "EXE = ../$(SHLIB)" shlib
 	@rm -f $(SHLINK)
 	@ln -s $(SHLIB) $(SHLINK)
 endif
 
 # Remove machine-specific object files
 
 clean:
 	@echo 'make clean-all           delete all object files'
 	@echo 'make clean-machine       delete object files for one machine'
 
 clean-all:
 	rm -rf Obj_*
 clean-%:
 	rm -rf Obj_$(@:clean-%=%) Obj_shared_$(@:clean-%=%)
 
 # Create Makefile.list
 
 makelist:
 	@$(SHELL) Make.sh style
 	@$(SHELL) Make.sh Makefile.list
 
 # Make MPI STUBS library
 
 mpi-stubs:
 	@cd STUBS; $(MAKE) clean; $(MAKE)
 
 # install LAMMPS shared lib and Python wrapper for Python usage
 # include python package settings to 
 #   automatically adapt name of python interpreter
 
 sinclude ../lib/python/Makefile.lammps
 install-python:
 	@$(PYTHON) ../python/install.py
 
 # Create a tarball of src dir and packages
 
 tar:
 	@cd STUBS; $(MAKE) clean
 	@cd ..; tar cvzf src/$(ROOT)_src.tar.gz \
 	  src/Make* src/Package.sh src/Depend.sh src/Install.sh \
 	  src/MAKE src/DEPEND src/*.cpp src/*.h src/STUBS \
 	  $(patsubst %,src/%,$(PACKAGEUC)) $(patsubst %,src/%,$(PACKUSERUC)) \
           --exclude=*/.svn
 	@cd STUBS; $(MAKE)
 	@echo "Created $(ROOT)_src.tar.gz"
 
 # Package management
 
 package:
 	@echo 'Standard packages:' $(PACKAGE)
 	@echo ''
 	@echo 'User-contributed packages:' $(PACKUSER)
 	@echo ''
 	@echo 'Packages that need system libraries:' $(PACKSYS)
 	@echo ''
 	@echo 'Packages that need provided libraries:' $(PACKINT)
 	@echo ''
 	@echo 'Packages that need external libraries:' $(PACKEXT)
 	@echo ''
 	@echo 'make package                 list available packages'
 	@echo 'make package                 list available packages'
 	@echo 'make package-status (ps)     status of all packages'
 	@echo 'make yes-package             install a single pgk in src dir'
 	@echo 'make no-package              remove a single pkg from src dir'
 	@echo 'make yes-all                 install all pgks in src dir'
 	@echo 'make no-all                  remove all pkgs from src dir'
 	@echo 'make yes-standard (yes-std)  install all standard pkgs'
 	@echo 'make no-standard (no-srd)    remove all standard pkgs'
 	@echo 'make yes-user                install all user pkgs'
 	@echo 'make no-user                 remove all user pkgs'
 	@echo 'make yes-lib       install all pkgs with libs (included or ext)'
 	@echo 'make no-lib        remove all pkgs with libs (included or ext)'
 	@echo 'make yes-ext                 install all pkgs with external libs'
 	@echo 'make no-ext                  remove all pkgs with external libs'
 	@echo ''
 	@echo 'make package-update (pu)  replace src files with package files'
 	@echo 'make package-overwrite    replace package files with src files'
 	@echo 'make package-diff (pd)    diff src files against package file'
 	@echo ''
 	@echo 'make lib-package      build and/or download a package library'
 
 yes-all:
 	@for p in $(PACKALL); do $(MAKE) yes-$$p; done
 
 no-all:
 	@for p in $(PACKALL); do $(MAKE) no-$$p; done
 
 yes-standard yes-std:
 	@for p in $(PACKAGE); do $(MAKE) yes-$$p; done
 
 no-standard no-std:
 	@for p in $(PACKAGE); do $(MAKE) no-$$p; done
 
 yes-user:
 	@for p in $(PACKUSER); do $(MAKE) yes-$$p; done
 
 no-user:
 	@for p in $(PACKUSER); do $(MAKE) no-$$p; done
 
 yes-lib:
 	@for p in $(PACKLIB); do $(MAKE) yes-$$p; done
 
 no-lib:
 	@for p in $(PACKLIB); do $(MAKE) no-$$p; done
 
 yes-ext:
 	@for p in $(PACKEXT); do $(MAKE) yes-$$p; done
 
 no-ext:
 	@for p in $(PACKEXT); do $(MAKE) no-$$p; done
 
 yes-%:
 	@if [ ! -e Makefile.package ]; \
 	  then cp Makefile.package.empty Makefile.package; fi
 	@if [ ! -e Makefile.package.settings ]; \
 	  then cp Makefile.package.settings.empty Makefile.package.settings; fi
 	@if [ ! -e $(YESDIR) ]; then \
 	  echo "Package $(@:yes-%=%) does not exist"; \
 	elif [ -e $(YESDIR)/Install.sh ]; then \
 	  echo "Installing package $(@:yes-%=%)"; \
 	  cd $(YESDIR); $(SHELL) Install.sh 1; cd ..; \
 		$(SHELL) Depend.sh $(YESDIR) 1; \
 	else \
 	  echo "Installing package $(@:yes-%=%)"; \
 	  cd $(YESDIR); $(SHELL) ../Install.sh 1; cd ..; \
 		$(SHELL) Depend.sh $(YESDIR) 1; \
 	fi;
 
 no-%:
 	@if [ ! -e $(NODIR) ]; then \
 	  echo "Package $(@:no-%=%) does not exist"; \
 	elif [ -e $(NODIR)/Install.sh ]; then \
 	  echo "Uninstalling package $(@:no-%=%)"; \
 	  cd $(NODIR); $(SHELL) Install.sh 0; cd ..; \
 		$(SHELL) Depend.sh $(NODIR) 0; \
 	else \
 	  echo "Uninstalling package $(@:no-%=%)"; \
 	  cd $(NODIR); $(SHELL) ../Install.sh 0; cd ..; \
 		$(SHELL) Depend.sh $(NODIR) 0; \
         fi;
 
 # download/build/install a package library
 
 lib-%:
 	@if [ -e ../lib/$(LIBDIR)/Install.py ]; then \
 	  echo "Installing lib $(@:lib-%=%)"; \
 	  cd ../lib/$(LIBDIR); python Install.py $(args); \
 	elif [ -e ../lib/$(LIBUSERDIR)/Install.py ]; then \
 	  echo "Installing lib $(@:lib-user-%=%)"; \
 	  cd ../lib/$(LIBUSERDIR); python Install.py $(args); \
 	else \
 	  echo "Install script for lib $(@:lib-%=%) does not exist"; \
 	fi;
 
 # status = list src files that differ from package files
 # update = replace src files with newer package files
 # overwrite = overwrite package files with newer src files
 # diff = show differences between src and package files
 # purge = delete obsolete and auto-generated package files
 
 package-status ps:
 	@for p in $(PACKAGEUC); do $(SHELL) Package.sh $$p status; done
 	@echo ''
 	@for p in $(PACKUSERUC); do $(SHELL) Package.sh $$p status; done
 
 package-update pu:
 	@for p in $(PACKAGEUC); do $(SHELL) Package.sh $$p update; done
 	@echo ''
 	@for p in $(PACKUSERUC); do $(SHELL) Package.sh $$p update; done
 
 package-overwrite:
 	@for p in $(PACKAGEUC); do $(SHELL) Package.sh $$p overwrite; done
 	@echo ''
 	@for p in $(PACKUSERUC); do $(SHELL) Package.sh $$p overwrite; done
 
 package-diff pd:
 	@for p in $(PACKAGEUC); do $(SHELL) Package.sh $$p diff; done
 	@echo ''
 	@for p in $(PACKUSERUC); do $(SHELL) Package.sh $$p diff; done
 
 purge: Purge.list
 	@echo 'Purging obsolete and auto-generated source files'
 	@for f in `grep -v '#' Purge.list` ;		\
 	    do test -f $$f && rm $$f && echo $$f || : ;		\
 	done
diff --git a/src/balance.cpp b/src/balance.cpp
index c184a72d3..8f994466a 100644
--- a/src/balance.cpp
+++ b/src/balance.cpp
@@ -1,1337 +1,1337 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing authors, for weighted balancing: 
      Axel Kohlmeyer (Temple U), Iain Bethune (EPCC)
 ------------------------------------------------------------------------- */
 
 //#define BALANCE_DEBUG 1
 
 #include <mpi.h>
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include "balance.h"
 #include "atom.h"
 #include "comm.h"
 #include "rcb.h"
 #include "irregular.h"
 #include "domain.h"
 #include "force.h"
 #include "update.h"
 #include "group.h"
 #include "modify.h"
 #include "fix_store.h"
 #include "imbalance.h"
 #include "imbalance_group.h"
 #include "imbalance_time.h"
 #include "imbalance_neigh.h"
 #include "imbalance_store.h"
 #include "imbalance_var.h"
 #include "timer.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
 enum{XYZ,SHIFT,BISECTION};
 enum{NONE,UNIFORM,USER};
 enum{X,Y,Z};
 enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED};    // several files
 
 /* ---------------------------------------------------------------------- */
 
 Balance::Balance(LAMMPS *lmp) : Pointers(lmp)
 {
   MPI_Comm_rank(world,&me);
   MPI_Comm_size(world,&nprocs);
 
   user_xsplit = user_ysplit = user_zsplit = NULL;
   shift_allocate = 0;
   proccost = allproccost = NULL;
 
   rcb = NULL;
 
   nimbalance = 0;
   imbalances = NULL;
   fixstore = NULL;
 
   fp = NULL;
   firststep = 1;
 }
 
 /* ---------------------------------------------------------------------- */
 
 Balance::~Balance()
 {
   memory->destroy(proccost);
   memory->destroy(allproccost);
 
   delete [] user_xsplit;
   delete [] user_ysplit;
   delete [] user_zsplit;
 
   if (shift_allocate) {
     delete [] bdim;
     delete [] onecost;
     delete [] allcost;
     delete [] sum;
     delete [] target;
     delete [] lo;
     delete [] hi;
     delete [] losum;
     delete [] hisum;
   }
 
   delete rcb;
 
   for (int i = 0; i < nimbalance; i++) delete imbalances[i];
   delete [] imbalances;
 
   // check nfix in case all fixes have already been deleted
 
   if (fixstore && modify->nfix) modify->delete_fix(fixstore->id);
   fixstore = NULL;
 
   if (fp) fclose(fp);
 }
 
 /* ----------------------------------------------------------------------
    called as balance command in input script
 ------------------------------------------------------------------------- */
 
 void Balance::command(int narg, char **arg)
 {
   if (domain->box_exist == 0)
     error->all(FLERR,"Balance command before simulation box is defined");
 
   if (me == 0 && screen) fprintf(screen,"Balancing ...\n");
 
   // parse required arguments
 
   if (narg < 2) error->all(FLERR,"Illegal balance command");
 
   thresh = force->numeric(FLERR,arg[0]);
 
   int dimension = domain->dimension;
   int *procgrid = comm->procgrid;
   style = -1;
   xflag = yflag = zflag = NONE;
 
   int iarg = 1;
   while (iarg < narg) {
     if (strcmp(arg[iarg],"x") == 0) {
       if (style != -1 && style != XYZ)
         error->all(FLERR,"Illegal balance command");
       style = XYZ;
       if (strcmp(arg[iarg+1],"uniform") == 0) {
         if (iarg+2 > narg) error->all(FLERR,"Illegal balance command");
         xflag = UNIFORM;
         iarg += 2;
       } else {
         if (1 + procgrid[0]-1 > narg)
           error->all(FLERR,"Illegal balance command");
         xflag = USER;
         delete [] user_xsplit;
         user_xsplit = new double[procgrid[0]+1];
         user_xsplit[0] = 0.0;
         iarg++;
         for (int i = 1; i < procgrid[0]; i++)
           user_xsplit[i] = force->numeric(FLERR,arg[iarg++]);
         user_xsplit[procgrid[0]] = 1.0;
       }
     } else if (strcmp(arg[iarg],"y") == 0) {
       if (style != -1 && style != XYZ)
         error->all(FLERR,"Illegal balance command");
       style = XYZ;
       if (strcmp(arg[iarg+1],"uniform") == 0) {
         if (iarg+2 > narg) error->all(FLERR,"Illegal balance command");
         yflag = UNIFORM;
         iarg += 2;
       } else {
         if (1 + procgrid[1]-1 > narg)
           error->all(FLERR,"Illegal balance command");
         yflag = USER;
         delete [] user_ysplit;
         user_ysplit = new double[procgrid[1]+1];
         user_ysplit[0] = 0.0;
         iarg++;
         for (int i = 1; i < procgrid[1]; i++)
           user_ysplit[i] = force->numeric(FLERR,arg[iarg++]);
         user_ysplit[procgrid[1]] = 1.0;
       }
     } else if (strcmp(arg[iarg],"z") == 0) {
       if (style != -1 && style != XYZ)
         error->all(FLERR,"Illegal balance command");
       style = XYZ;
       if (strcmp(arg[iarg+1],"uniform") == 0) {
         if (iarg+2 > narg) error->all(FLERR,"Illegal balance command");
         zflag = UNIFORM;
         iarg += 2;
       } else {
         if (1 + procgrid[2]-1 > narg)
           error->all(FLERR,"Illegal balance command");
         zflag = USER;
         delete [] user_zsplit;
         user_zsplit = new double[procgrid[2]+1];
         user_zsplit[0] = 0.0;
         iarg++;
         for (int i = 1; i < procgrid[2]; i++)
           user_zsplit[i] = force->numeric(FLERR,arg[iarg++]);
         user_zsplit[procgrid[2]] = 1.0;
       }
 
     } else if (strcmp(arg[iarg],"shift") == 0) {
       if (style != -1) error->all(FLERR,"Illegal balance command");
       if (iarg+4 > narg) error->all(FLERR,"Illegal balance command");
       style = SHIFT;
       if (strlen(arg[iarg+1]) > 3) error->all(FLERR,"Illegal balance command");
       strcpy(bstr,arg[iarg+1]);
       nitermax = force->inumeric(FLERR,arg[iarg+2]);
       if (nitermax <= 0) error->all(FLERR,"Illegal balance command");
       stopthresh = force->numeric(FLERR,arg[iarg+3]);
       if (stopthresh < 1.0) error->all(FLERR,"Illegal balance command");
       iarg += 4;
 
     } else if (strcmp(arg[iarg],"rcb") == 0) {
       if (style != -1) error->all(FLERR,"Illegal balance command");
       style = BISECTION;
       iarg++;
 
     } else break;
   }
 
   // error checks
 
   if (style == XYZ) {
     if (zflag != NONE  && dimension == 2)
       error->all(FLERR,"Cannot balance in z dimension for 2d simulation");
 
     if (xflag == USER)
       for (int i = 1; i <= procgrid[0]; i++)
         if (user_xsplit[i-1] >= user_xsplit[i])
           error->all(FLERR,"Illegal balance command");
     if (yflag == USER)
       for (int i = 1; i <= procgrid[1]; i++)
         if (user_ysplit[i-1] >= user_ysplit[i])
           error->all(FLERR,"Illegal balance command");
     if (zflag == USER)
       for (int i = 1; i <= procgrid[2]; i++)
         if (user_zsplit[i-1] >= user_zsplit[i])
           error->all(FLERR,"Illegal balance command");
   }
 
   if (style == SHIFT) {
     const int blen=strlen(bstr);
     for (int i = 0; i < blen; i++) {
       if (bstr[i] != 'x' && bstr[i] != 'y' && bstr[i] != 'z')
         error->all(FLERR,"Balance shift string is invalid");
       if (bstr[i] == 'z' && dimension == 2)
         error->all(FLERR,"Balance shift string is invalid");
       for (int j = i+1; j < blen; j++)
         if (bstr[i] == bstr[j])
           error->all(FLERR,"Balance shift string is invalid");
     }
   }
 
   if (style == BISECTION && comm->style == 0)
     error->all(FLERR,"Balance rcb cannot be used with comm_style brick");
 
   // process remaining optional args
 
   options(iarg,narg,arg);
   if (wtflag) weight_storage(NULL);
 
   // insure particles are in current box & update box via shrink-wrap
   // init entire system since comm->setup is done
   // comm::init needs neighbor::init needs pair::init needs kspace::init, etc
   // must reset atom map after exchange() since it clears it
 
   MPI_Barrier(world);
   double start_time = MPI_Wtime();
 
   lmp->init();
 
   if (domain->triclinic) domain->x2lamda(atom->nlocal);
   domain->pbc();
   domain->reset_box();
   comm->setup();
   comm->exchange();
   if (atom->map_style) atom->map_set();
   if (domain->triclinic) domain->lamda2x(atom->nlocal);
 
   // imbinit = initial imbalance
 
   double maxinit;
   init_imbalance(0);
   set_weights();
   double imbinit = imbalance_factor(maxinit);
 
   // no load-balance if imbalance doesn't exceed threshold
   // unless switching from tiled to non tiled layout, then force rebalance
 
   if (comm->layout == LAYOUT_TILED && style != BISECTION) {
   } else if (imbinit < thresh) return;
 
   // debug output of initial state
 
 #ifdef BALANCE_DEBUG
   if (outflag) dumpout(update->ntimestep);
 #endif
 
   int niter = 0;
   
   // perform load-balance
   // style XYZ = explicit setting of cutting planes of logical 3d grid
 
   if (style == XYZ) {
     if (comm->layout == LAYOUT_UNIFORM) {
       if (xflag == USER || yflag == USER || zflag == USER)
         comm->layout = LAYOUT_NONUNIFORM;
     } else if (comm->style == LAYOUT_NONUNIFORM) {
       if (xflag == UNIFORM && yflag == UNIFORM && zflag == UNIFORM)
         comm->layout = LAYOUT_UNIFORM;
     } else if (comm->style == LAYOUT_TILED) {
       if (xflag == UNIFORM && yflag == UNIFORM && zflag == UNIFORM)
         comm->layout = LAYOUT_UNIFORM;
       else comm->layout = LAYOUT_NONUNIFORM;
     }
 
     if (xflag == UNIFORM) {
       for (int i = 0; i < procgrid[0]; i++)
         comm->xsplit[i] = i * 1.0/procgrid[0];
       comm->xsplit[procgrid[0]] = 1.0;
     } else if (xflag == USER)
       for (int i = 0; i <= procgrid[0]; i++) comm->xsplit[i] = user_xsplit[i];
 
     if (yflag == UNIFORM) {
       for (int i = 0; i < procgrid[1]; i++)
         comm->ysplit[i] = i * 1.0/procgrid[1];
       comm->ysplit[procgrid[1]] = 1.0;
     } else if (yflag == USER)
       for (int i = 0; i <= procgrid[1]; i++) comm->ysplit[i] = user_ysplit[i];
 
     if (zflag == UNIFORM) {
       for (int i = 0; i < procgrid[2]; i++)
         comm->zsplit[i] = i * 1.0/procgrid[2];
       comm->zsplit[procgrid[2]] = 1.0;
     } else if (zflag == USER)
       for (int i = 0; i <= procgrid[2]; i++) comm->zsplit[i] = user_zsplit[i];
   }
 
   // style SHIFT = adjust cutting planes of logical 3d grid
 
   if (style == SHIFT) {
     comm->layout = LAYOUT_NONUNIFORM;
     shift_setup_static(bstr);
     niter = shift();
   }
 
   // style BISECTION = recursive coordinate bisectioning
 
   if (style == BISECTION) {
     comm->layout = LAYOUT_TILED;
     bisection(1);
   }
 
   // reset proc sub-domains
   // for either brick or tiled comm style
 
   if (domain->triclinic) domain->set_lamda_box();
   domain->set_local_box();
 
   // move particles to new processors via irregular()
 
   if (domain->triclinic) domain->x2lamda(atom->nlocal);
   Irregular *irregular = new Irregular(lmp);
   if (wtflag) fixstore->disable = 0;
   if (style == BISECTION) irregular->migrate_atoms(1,1,rcb->sendproc);
   else irregular->migrate_atoms(1);
   if (wtflag) fixstore->disable = 1;
   delete irregular;
   if (domain->triclinic) domain->lamda2x(atom->nlocal);
 
   // output of final result
 
   if (outflag) dumpout(update->ntimestep);
 
   // check if any particles were lost
 
   bigint natoms;
   bigint nblocal = atom->nlocal;
   MPI_Allreduce(&nblocal,&natoms,1,MPI_LMP_BIGINT,MPI_SUM,world);
   if (natoms != atom->natoms) {
     char str[128];
     sprintf(str,"Lost atoms via balance: original " BIGINT_FORMAT
             " current " BIGINT_FORMAT,atom->natoms,natoms);
     error->all(FLERR,str);
   }
 
   // imbfinal = final imbalance
 
   double maxfinal;
   double imbfinal = imbalance_factor(maxfinal);
 
   // stats output
 
   double stop_time = MPI_Wtime();
 
   if (me == 0) {
     if (screen) {
       fprintf(screen,"  rebalancing time: %g seconds\n",stop_time-start_time);
       fprintf(screen,"  iteration count = %d\n",niter);
       for (int i = 0; i < nimbalance; ++i) imbalances[i]->info(screen);
       fprintf(screen,"  initial/final max load/proc = %g %g\n",
               maxinit,maxfinal);
       fprintf(screen,"  initial/final imbalance factor = %g %g\n",
               imbinit,imbfinal);
     }
     if (logfile) {
       fprintf(logfile,"  rebalancing time: %g seconds\n",stop_time-start_time);
       fprintf(logfile,"  iteration count = %d\n",niter);
       for (int i = 0; i < nimbalance; ++i) imbalances[i]->info(logfile);
       fprintf(logfile,"  initial/final max load/proc = %g %g\n",
               maxinit,maxfinal);
       fprintf(logfile,"  initial/final imbalance factor = %g %g\n",
               imbinit,imbfinal);
     }
   }
 
   if (style != BISECTION) {
     if (me == 0) {
       if (screen) {
         fprintf(screen,"  x cuts:");
         for (int i = 0; i <= comm->procgrid[0]; i++)
           fprintf(screen," %g",comm->xsplit[i]);
         fprintf(screen,"\n");
         fprintf(screen,"  y cuts:");
         for (int i = 0; i <= comm->procgrid[1]; i++)
           fprintf(screen," %g",comm->ysplit[i]);
         fprintf(screen,"\n");
         fprintf(screen,"  z cuts:");
         for (int i = 0; i <= comm->procgrid[2]; i++)
           fprintf(screen," %g",comm->zsplit[i]);
         fprintf(screen,"\n");
       }
       if (logfile) {
         fprintf(logfile,"  x cuts:");
         for (int i = 0; i <= comm->procgrid[0]; i++)
           fprintf(logfile," %g",comm->xsplit[i]);
         fprintf(logfile,"\n");
         fprintf(logfile,"  y cuts:");
         for (int i = 0; i <= comm->procgrid[1]; i++)
           fprintf(logfile," %g",comm->ysplit[i]);
         fprintf(logfile,"\n");
         fprintf(logfile,"  z cuts:");
         for (int i = 0; i <= comm->procgrid[2]; i++)
           fprintf(logfile," %g",comm->zsplit[i]);
         fprintf(logfile,"\n");
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    process optional command args for Balance and FixBalance
 ------------------------------------------------------------------------- */
 
 void Balance::options(int iarg, int narg, char **arg)
 {
   // count max number of weight settings
 
   nimbalance = 0;
   for (int i = iarg; i < narg; i++)
     if (strcmp(arg[i],"weight") == 0) nimbalance++;
   if (nimbalance) imbalances = new Imbalance*[nimbalance];
   nimbalance = 0;
 
   wtflag = 0;
   varflag = 0;
   oldrcb = 0;
   outflag = 0;
   int outarg = 0;
   fp = NULL;
 
   while (iarg < narg) {
     if (strcmp(arg[iarg],"weight") == 0) {
       wtflag = 1;
       Imbalance *imb;
       int nopt = 0;
       if (strcmp(arg[iarg+1],"group") == 0) {
         imb = new ImbalanceGroup(lmp);
         nopt = imb->options(narg-iarg,arg+iarg+2);
         imbalances[nimbalance++] = imb;
       } else if (strcmp(arg[iarg+1],"time") == 0) {
         imb = new ImbalanceTime(lmp);
         nopt = imb->options(narg-iarg,arg+iarg+2);
         imbalances[nimbalance++] = imb;
       } else if (strcmp(arg[iarg+1],"neigh") == 0) {
         imb = new ImbalanceNeigh(lmp);
         nopt = imb->options(narg-iarg,arg+iarg+2);
         imbalances[nimbalance++] = imb;
       } else if (strcmp(arg[iarg+1],"var") == 0) {
         varflag = 1;
         imb = new ImbalanceVar(lmp);
         nopt = imb->options(narg-iarg,arg+iarg+2);
         imbalances[nimbalance++] = imb;
       } else if (strcmp(arg[iarg+1],"store") == 0) {
         imb = new ImbalanceStore(lmp);
         nopt = imb->options(narg-iarg,arg+iarg+2);
         imbalances[nimbalance++] = imb;
       } else {
         error->all(FLERR,"Unknown (fix) balance weight method");
       }
       iarg += 2+nopt;
 
     } else if (strcmp(arg[iarg],"old") == 0) {
       oldrcb = 1;
       iarg++;
     } else if (strcmp(arg[iarg],"out") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal (fix) balance command");
       outflag = 1;
       outarg = iarg+1;
       iarg += 2;
     } else error->all(FLERR,"Illegal (fix) balance command");
   }
 
   // output file
 
   if (outflag && comm->me == 0) {
     fp = fopen(arg[outarg],"w");
     if (fp == NULL) error->one(FLERR,"Cannot open (fix) balance output file");
   }
 }
 
 /* ----------------------------------------------------------------------
    allocate per-particle weight storage via FixStore
    use prefix to distinguish Balance vs FixBalance storage
    fix could already be allocated if fix balance is re-specified
 ------------------------------------------------------------------------- */
 
 void Balance::weight_storage(char *prefix)
 {
   char *fixargs[6];
 
   if (prefix) {
     int n = strlen(prefix) + 32;
     fixargs[0] = new char[n];
     strcpy(fixargs[0],prefix);
     strcat(fixargs[0],"IMBALANCE_WEIGHTS");
   } else fixargs[0] = (char *) "IMBALANCE_WEIGHTS";
 
   fixargs[1] = (char *) "all";
   fixargs[2] = (char *) "STORE";
   fixargs[3] = (char *) "peratom";
   fixargs[4] = (char *) "0";
   fixargs[5] = (char *) "1";
 
   int ifix = modify->find_fix(fixargs[0]);
   if (ifix < 1) {
     modify->add_fix(6,fixargs);
     fixstore = (FixStore *) modify->fix[modify->nfix-1];
   } else fixstore = (FixStore *) modify->fix[ifix];
 
   fixstore->disable = 1;
 
   if (prefix) delete [] fixargs[0];
 }
 
 /* ----------------------------------------------------------------------
    invoke init() for each Imbalance class
    flag = 0 for call from Balance, 1 for call from FixBalance
 ------------------------------------------------------------------------- */
 
 void Balance::init_imbalance(int flag)
 {
   if (!wtflag) return;
   for (int n = 0; n < nimbalance; n++) imbalances[n]->init(flag);
 }
 
 /* ----------------------------------------------------------------------
    set weight for each particle
    via list of Nimbalance classes
 ------------------------------------------------------------------------- */
 
 void Balance::set_weights()
 {
   if (!wtflag) return;
   weight = fixstore->vstore;
 
   int nlocal = atom->nlocal;
   for (int i = 0; i < nlocal; i++) weight[i] = 1.0;
   for (int n = 0; n < nimbalance; n++) imbalances[n]->compute(weight);
 }
 
 /* ----------------------------------------------------------------------
    calculate imbalance factor based on particle count or particle weights
    return max = max load per proc
    return imbalance = max load per proc / ave load per proc
 ------------------------------------------------------------------------- */
 
 double Balance::imbalance_factor(double &maxcost)
 {
   double mycost,totalcost;
 
   if (wtflag) {
     weight = fixstore->vstore;
     int nlocal = atom->nlocal;
 
     mycost = 0.0;
     for (int i = 0; i < nlocal; i++) mycost += weight[i];
 
   } else mycost = atom->nlocal;
 
   MPI_Allreduce(&mycost,&maxcost,1,MPI_DOUBLE,MPI_MAX,world);
   MPI_Allreduce(&mycost,&totalcost,1,MPI_DOUBLE,MPI_SUM,world);
 
   double imbalance = 1.0;
   if (maxcost > 0.0) imbalance = maxcost / (totalcost/nprocs);
   return imbalance;
 }
 
 /* ----------------------------------------------------------------------
    perform balancing via RCB class
    sortflag = flag for sorting order of received messages by proc ID
    return list of procs to send my atoms to
 ------------------------------------------------------------------------- */
 
 int *Balance::bisection(int sortflag)
 {
   if (!rcb) rcb = new RCB(lmp);
 
   // NOTE: this logic is specific to orthogonal boxes, not triclinic
 
   int dim = domain->dimension;
   double *boxlo = domain->boxlo;
   double *boxhi = domain->boxhi;
   double *prd = domain->prd;
 
   // shrink-wrap simulation box around atoms for input to RCB
   // leads to better-shaped sub-boxes when atoms are far from box boundaries
 
   double shrink[6],shrinkall[6];
 
   shrink[0] = boxhi[0]; shrink[1] = boxhi[1]; shrink[2] = boxhi[2];
   shrink[3] = boxlo[0]; shrink[4] = boxlo[1]; shrink[5] = boxlo[2];
 
   double **x = atom->x;
   int nlocal = atom->nlocal;
   for (int i = 0; i < nlocal; i++) {
     shrink[0] = MIN(shrink[0],x[i][0]);
     shrink[1] = MIN(shrink[1],x[i][1]);
     shrink[2] = MIN(shrink[2],x[i][2]);
     shrink[3] = MAX(shrink[3],x[i][0]);
     shrink[4] = MAX(shrink[4],x[i][1]);
     shrink[5] = MAX(shrink[5],x[i][2]);
   }
 
   shrink[3] = -shrink[3]; shrink[4] = -shrink[4]; shrink[5] = -shrink[5];
   MPI_Allreduce(shrink,shrinkall,6,MPI_DOUBLE,MPI_MIN,world);
   shrinkall[3] = -shrinkall[3];
   shrinkall[4] = -shrinkall[4];
   shrinkall[5] = -shrinkall[5];
 
   double *shrinklo = &shrinkall[0];
   double *shrinkhi = &shrinkall[3];
 
   // invoke RCB
   // then invert() to create list of proc assignments for my atoms
   // NOTE: (3/2017) can remove undocumented "old" option at some point
   //       ditto in rcb.cpp
 
   if (oldrcb) {
     if (wtflag) {
       weight = fixstore->vstore;
       rcb->compute_old(dim,atom->nlocal,atom->x,weight,shrinklo,shrinkhi);
     } else rcb->compute_old(dim,atom->nlocal,atom->x,NULL,shrinklo,shrinkhi);
   } else {
     if (wtflag) {
       weight = fixstore->vstore;
       rcb->compute(dim,atom->nlocal,atom->x,weight,shrinklo,shrinkhi);
     } else rcb->compute(dim,atom->nlocal,atom->x,NULL,shrinklo,shrinkhi);
   }
     
   rcb->invert(sortflag);
 
   // reset RCB lo/hi bounding box to full simulation box as needed
 
   double *lo = rcb->lo;
   double *hi = rcb->hi;
 
   if (lo[0] == shrinklo[0]) lo[0] = boxlo[0];
   if (lo[1] == shrinklo[1]) lo[1] = boxlo[1];
   if (lo[2] == shrinklo[2]) lo[2] = boxlo[2];
   if (hi[0] == shrinkhi[0]) hi[0] = boxhi[0];
   if (hi[1] == shrinkhi[1]) hi[1] = boxhi[1];
   if (hi[2] == shrinkhi[2]) hi[2] = boxhi[2];
 
   // store RCB cut, dim, lo/hi box in CommTiled
   // cut and lo/hi need to be in fractional form so can
   // OK if changes by epsilon from what RCB used since atoms
   //   will subsequently migrate to new owning procs by exchange() anyway
   // ditto for atoms exactly on lo/hi RCB box boundaries due to ties
 
   comm->rcbnew = 1;
 
   int idim = rcb->cutdim;
   if (idim >= 0) comm->rcbcutfrac = (rcb->cut - boxlo[idim]) / prd[idim];
   else comm->rcbcutfrac = 0.0;
   comm->rcbcutdim = idim;
 
   double (*mysplit)[2] = comm->mysplit;
 
   mysplit[0][0] = (lo[0] - boxlo[0]) / prd[0];
   if (hi[0] == boxhi[0]) mysplit[0][1] = 1.0;
   else mysplit[0][1] = (hi[0] - boxlo[0]) / prd[0];
 
   mysplit[1][0] = (lo[1] - boxlo[1]) / prd[1];
   if (hi[1] == boxhi[1]) mysplit[1][1] = 1.0;
   else mysplit[1][1] = (hi[1] - boxlo[1]) / prd[1];
 
   mysplit[2][0] = (lo[2] - boxlo[2]) / prd[2];
   if (hi[2] == boxhi[2]) mysplit[2][1] = 1.0;
   else mysplit[2][1] = (hi[2] - boxlo[2]) / prd[2];
 
   // return list of procs to send my atoms to
 
   return rcb->sendproc;
 }
 
 /* ----------------------------------------------------------------------
    setup static load balance operations
    called from command and indirectly initially from fix balance
    set rho = 0 for static balancing
 ------------------------------------------------------------------------- */
 
 void Balance::shift_setup_static(char *str)
 {
   shift_allocate = 1;
 
   memory->create(proccost,nprocs,"balance:proccost");
   memory->create(allproccost,nprocs,"balance:allproccost");
 
   ndim = strlen(str);
   bdim = new int[ndim];
 
   for (int i = 0; i < ndim; i++) {
     if (str[i] == 'x') bdim[i] = X;
     if (str[i] == 'y') bdim[i] = Y;
     if (str[i] == 'z') bdim[i] = Z;
   }
 
   int max = MAX(comm->procgrid[0],comm->procgrid[1]);
   max = MAX(max,comm->procgrid[2]);
 
   onecost = new double[max];
   allcost = new double[max];
   sum = new double[max+1];
   target = new double[max+1];
   lo = new double[max+1];
   hi = new double[max+1];
   losum = new double[max+1];
   hisum = new double[max+1];
 
   // if current layout is TILED, set initial uniform splits in Comm
   // this gives starting point to subsequent shift balancing
 
   if (comm->layout == LAYOUT_TILED) {
     int *procgrid = comm->procgrid;
     double *xsplit = comm->xsplit;
     double *ysplit = comm->ysplit;
     double *zsplit = comm->zsplit;
 
     for (int i = 0; i < procgrid[0]; i++) xsplit[i] = i * 1.0/procgrid[0];
     for (int i = 0; i < procgrid[1]; i++) ysplit[i] = i * 1.0/procgrid[1];
     for (int i = 0; i < procgrid[2]; i++) zsplit[i] = i * 1.0/procgrid[2];
     xsplit[procgrid[0]] = ysplit[procgrid[1]] = zsplit[procgrid[2]] = 1.0;
   }
 
   rho = 0;
 }
 
 /* ----------------------------------------------------------------------
    setup shift load balance operations
    called from fix balance
    set rho = 1 to do dynamic balancing after call to shift_setup_static()
 ------------------------------------------------------------------------- */
 
 void Balance::shift_setup(char *str, int nitermax_in, double thresh_in)
 {
   shift_setup_static(str);
   nitermax = nitermax_in;
   stopthresh = thresh_in;
   rho = 1;
 }
 
 /* ----------------------------------------------------------------------
    load balance by changing xyz split proc boundaries in Comm
    called one time from input script command or many times from fix balance
    return niter = iteration count
 ------------------------------------------------------------------------- */
 
 int Balance::shift()
 {
-  int i,j,k,m,np,max;
+  int i,j,k,m,np;
   double mycost,totalcost;
   double *split;
 
   // no balancing if no atoms
 
   bigint natoms = atom->natoms;
   if (natoms == 0) return 0;
 
   // set delta for 1d balancing = root of threshold
   // root = # of dimensions being balanced on
 
   double delta = pow(stopthresh,1.0/ndim) - 1.0;
   int *procgrid = comm->procgrid;
 
   // all balancing done in lamda coords
 
   domain->x2lamda(atom->nlocal);
 
   // loop over dimensions in balance string
 
   int niter = 0;
   for (int idim = 0; idim < ndim; idim++) {
 
     // split = ptr to xyz split in Comm
 
     if (bdim[idim] == X) split = comm->xsplit;
     else if (bdim[idim] == Y) split = comm->ysplit;
     else if (bdim[idim] == Z) split = comm->zsplit;
     else continue;
 
     // initial count and sum
 
     np = procgrid[bdim[idim]];
     tally(bdim[idim],np,split);
 
     // target[i] = desired sum at split I
 
     if (wtflag) {
       weight = fixstore->vstore;
       int nlocal = atom->nlocal;
       mycost = 0.0;
       for (i = 0; i < nlocal; i++) mycost += weight[i];
     } else mycost = atom->nlocal;
 
     MPI_Allreduce(&mycost,&totalcost,1,MPI_DOUBLE,MPI_SUM,world);
 
     for (i = 0; i < np; i++) target[i] = totalcost/np * i;
     target[np] = totalcost;
 
     // lo[i] = closest split <= split[i] with a sum <= target
     // hi[i] = closest split >= split[i] with a sum >= target
 
     lo[0] = hi[0] = 0.0;
     lo[np] = hi[np] = 1.0;
     losum[0] = hisum[0] = 0.0;
     losum[np] = hisum[np] = totalcost;
 
     for (i = 1; i < np; i++) {
       for (j = i; j >= 0; j--)
         if (sum[j] <= target[i]) {
           lo[i] = split[j];
           losum[i] = sum[j];
           break;
         }
       for (j = i; j <= np; j++)
         if (sum[j] >= target[i]) {
           hi[i] = split[j];
           hisum[i] = sum[j];
           break;
         }
     }
 
     // iterate until balanced
 
 #ifdef BALANCE_DEBUG
     if (me == 0) debug_shift_output(idim,0,np,split);
 #endif
 
     int doneflag;
     int change = 1;
     for (m = 0; m < nitermax; m++) {
       change = adjust(np,split);
       tally(bdim[idim],np,split);
       niter++;
 
 #ifdef BALANCE_DEBUG
       if (me == 0) debug_shift_output(idim,m+1,np,split);
       if (outflag) dumpout(update->ntimestep);
 #endif
 
       // stop if no change in splits, b/c all targets are met exactly
 
       if (!change) break;
 
       // stop if all split sums are within delta of targets
       // this is a 1d test of particle count per slice
       // assumption is that this is sufficient accuracy
       //   for 3d imbalance factor to reach threshold
 
       doneflag = 1;
       for (i = 1; i < np; i++)
         if (fabs(1.0*(sum[i]-target[i]))/target[i] > delta) doneflag = 0;
       if (doneflag) break;
     }
 
     // eliminate final adjacent splits that are duplicates
     // can happen if particle distribution is narrow and Nitermax is small
     // set lo = midpt between splits
     // spread duplicates out evenly between bounding midpts with non-duplicates
     // i,j = lo/hi indices of set of duplicate splits
     // delta = new spacing between duplicates
     // bounding midpts = lo[i-1] and lo[j]
 
     int duplicate = 0;
     for (i = 1; i < np-1; i++)
       if (split[i] == split[i+1]) duplicate = 1;
     if (duplicate) {
       for (i = 0; i < np; i++)
         lo[i] = 0.5 * (split[i] + split[i+1]);
       i = 1;
       while (i < np-1) {
         j = i+1;
         while (split[j] == split[i]) j++;
         j--;
         if (j > i) {
           delta = (lo[j] - lo[i-1]) / (j-i+2);
           for (k = i; k <= j; k++)
             split[k] = lo[i-1] + (k-i+1)*delta;
         }
         i = j+1;
       }
     }
 
     // sanity check on bad duplicate or inverted splits
     // zero or negative width sub-domains will break Comm class
     // should never happen if recursive multisection algorithm is correct
 
     int bad = 0;
     for (i = 0; i < np; i++)
       if (split[i] >= split[i+1]) bad = 1;
     if (bad) error->all(FLERR,"Balance produced bad splits");
     /*
       if (me == 0) {
       printf("BAD SPLITS %d %d %d\n",np+1,niter,delta);
       for (i = 0; i < np+1; i++)
       printf(" %g",split[i]);
       printf("\n");
       }
     */
 
     // stop at this point in bstr if imbalance factor < threshold
     // this is a true 3d test of particle count per processor
 
     double imbfactor = imbalance_splits();
     if (imbfactor <= stopthresh) break;
   }
 
   // restore real coords
 
   domain->lamda2x(atom->nlocal);
 
   return niter;
 }
 
 /* ----------------------------------------------------------------------
    count atoms in each slice, based on their dim coordinate
    N = # of slices
    split = N+1 cuts between N slices
    return updated count = particles per slice
    return updated sum = cumulative count below each of N+1 splits
    use binary search to find which slice each atom is in
 ------------------------------------------------------------------------- */
 
 void Balance::tally(int dim, int n, double *split)
 {
   for (int i = 0; i < n; i++) onecost[i] = 0.0;
 
   double **x = atom->x;
   int nlocal = atom->nlocal;
   int index;
 
   if (wtflag) {
     weight = fixstore->vstore;
     for (int i = 0; i < nlocal; i++) {
       index = binary(x[i][dim],n,split);
       onecost[index] += weight[i];
     }
   } else {
     for (int i = 0; i < nlocal; i++) {
       index = binary(x[i][dim],n,split);
       onecost[index] += 1.0;
     }
   }
 
   MPI_Allreduce(onecost,allcost,n,MPI_DOUBLE,MPI_SUM,world);
 
   sum[0] = 0.0;
   for (int i = 1; i < n+1; i++)
     sum[i] = sum[i-1] + allcost[i-1];
 }
 
 /* ----------------------------------------------------------------------
    adjust cuts between N slices in a dim via recursive multisectioning method
    split = current N+1 cuts, with 0.0 and 1.0 at end points
    sum = cumulative count up to each split
    target = desired cumulative count up to each split
    lo/hi = split values that bound current split
    update lo/hi to reflect sums at current split values
    overwrite split with new cuts
      guaranteed that splits will remain in ascending order,
      though adjacent values may be identical
    recursive bisectioning zooms in on each cut by halving lo/hi
    return 0 if no changes in any splits, b/c they are all perfect
 ------------------------------------------------------------------------- */
 
 int Balance::adjust(int n, double *split)
 {
   int i;
   double fraction;
 
   // reset lo/hi based on current sum and splits
   // insure lo is monotonically increasing, ties are OK
   // insure hi is monotonically decreasing, ties are OK
   // this effectively uses info from nearby splits
   // to possibly tighten bounds on lo/hi
 
   for (i = 1; i < n; i++) {
     if (sum[i] <= target[i]) {
       lo[i] = split[i];
       losum[i] = sum[i];
     }
     if (sum[i] >= target[i]) {
       hi[i] = split[i];
       hisum[i] = sum[i];
     }
   }
   for (i = 1; i < n; i++)
     if (lo[i] < lo[i-1]) {
       lo[i] = lo[i-1];
       losum[i] = losum[i-1];
     }
   for (i = n-1; i > 0; i--)
     if (hi[i] > hi[i+1]) {
       hi[i] = hi[i+1];
       hisum[i] = hisum[i+1];
     }
 
   int change = 0;
   for (int i = 1; i < n; i++)
     if (sum[i] != target[i]) {
       change = 1;
       if (rho == 0) split[i] = 0.5 * (lo[i]+hi[i]);
       else {
         fraction = 1.0*(target[i]-losum[i]) / (hisum[i]-losum[i]);
         split[i] = lo[i] + fraction * (hi[i]-lo[i]);
       }
     }
   return change;
 }
 
 /* ----------------------------------------------------------------------
    calculate imbalance based on processor splits in 3 dims
    atoms must be in lamda coords (0-1) before called
    map particles to 3d grid of procs
    return imbalance factor = max load per proc / ave load per proc
 ------------------------------------------------------------------------- */
 
 double Balance::imbalance_splits()
 {
   double *xsplit = comm->xsplit;
   double *ysplit = comm->ysplit;
   double *zsplit = comm->zsplit;
 
   int nx = comm->procgrid[0];
   int ny = comm->procgrid[1];
   int nz = comm->procgrid[2];
 
   for (int i = 0; i < nprocs; i++) proccost[i] = 0.0;
 
   double **x = atom->x;
   int nlocal = atom->nlocal;
   int ix,iy,iz;
 
   if (wtflag) {
     weight = fixstore->vstore;
     for (int i = 0; i < nlocal; i++) {
       ix = binary(x[i][0],nx,xsplit);
       iy = binary(x[i][1],ny,ysplit);
       iz = binary(x[i][2],nz,zsplit);
       proccost[iz*nx*ny + iy*nx + ix] += weight[i];
     }
   } else {
     for (int i = 0; i < nlocal; i++) {
       ix = binary(x[i][0],nx,xsplit);
       iy = binary(x[i][1],ny,ysplit);
       iz = binary(x[i][2],nz,zsplit);
       proccost[iz*nx*ny + iy*nx + ix] += 1.0;
     }
   }
 
   // one proc's particles may map to many partitions, so must Allreduce
 
   MPI_Allreduce(proccost,allproccost,nprocs,MPI_DOUBLE,MPI_SUM,world);
 
   double maxcost = 0.0;
   double totalcost = 0.0;
   for (int i = 0; i < nprocs; i++) {
     maxcost = MAX(maxcost,allproccost[i]);
     totalcost += allproccost[i];
   }
 
   double imbalance = 1.0;
   if (maxcost > 0.0) imbalance = maxcost / (totalcost/nprocs);
   return imbalance;
 }
 
 /* ----------------------------------------------------------------------
    binary search for where value falls in N-length vec
    note that vec actually has N+1 values, but ignore last one
    values in vec are monotonically increasing, but adjacent values can be ties
    value may be outside range of vec limits
    always return index from 0 to N-1 inclusive
    return 0 if value < vec[0]
    reutrn N-1 if value >= vec[N-1]
    return index = 1 to N-2 inclusive if vec[index] <= value < vec[index+1]
    note that for adjacent tie values, index of lower tie is not returned
      since never satisfies 2nd condition that value < vec[index+1]
 ------------------------------------------------------------------------- */
 
 int Balance::binary(double value, int n, double *vec)
 {
   int lo = 0;
   int hi = n-1;
 
   if (value < vec[lo]) return lo;
   if (value >= vec[hi]) return hi;
 
   // insure vec[lo] <= value < vec[hi] at every iteration
   // done when lo,hi are adjacent
 
   int index = (lo+hi)/2;
   while (lo < hi-1) {
     if (value < vec[index]) hi = index;
     else if (value >= vec[index]) lo = index;
     index = (lo+hi)/2;
   }
 
   return index;
 }
 
 /* ----------------------------------------------------------------------
    write dump snapshot of line segments in Pizza.py mdump mesh format
    write xy lines around each proc's sub-domain for 2d
    write xyz cubes around each proc's sub-domain for 3d
    only called by proc 0
    NOTE: only implemented for orthogonal boxes, not triclinic
 ------------------------------------------------------------------------- */
 
 void Balance::dumpout(bigint tstep)
 {
   int dimension = domain->dimension;
   int triclinic = domain->triclinic;
 
   // Allgather each proc's sub-box
   // could use Gather, but that requires MPI to alloc memory
 
   double *lo,*hi;
   if (triclinic == 0) {
     lo = domain->sublo;
     hi = domain->subhi;
   } else {
     lo = domain->sublo_lamda;
     hi = domain->subhi_lamda;
   }
 
   double box[6];
   box[0] = lo[0]; box[1] = lo[1]; box[2] = lo[2];
   box[3] = hi[0]; box[4] = hi[1]; box[5] = hi[2];
 
   double **boxall;
   memory->create(boxall,nprocs,6,"balance:dumpout");
   MPI_Allgather(box,6,MPI_DOUBLE,&boxall[0][0],6,MPI_DOUBLE,world);
 
   if (me) {
     memory->destroy(boxall);
     return;
   }
 
   // proc 0 writes out nodal coords
   // some will be duplicates
 
   double *boxlo = domain->boxlo;
   double *boxhi = domain->boxhi;
 
   fprintf(fp,"ITEM: TIMESTEP\n");
   fprintf(fp,BIGINT_FORMAT "\n",tstep);
   fprintf(fp,"ITEM: NUMBER OF NODES\n");
   if (dimension == 2) fprintf(fp,"%d\n",4*nprocs);
   else fprintf(fp,"%d\n",8*nprocs);
   fprintf(fp,"ITEM: BOX BOUNDS\n");
   fprintf(fp,"%g %g\n",boxlo[0],boxhi[0]);
   fprintf(fp,"%g %g\n",boxlo[1],boxhi[1]);
   fprintf(fp,"%g %g\n",boxlo[2],boxhi[2]);
   fprintf(fp,"ITEM: NODES\n");
 
   if (triclinic == 0) {
     if (dimension == 2) {
       int m = 0;
       for (int i = 0; i < nprocs; i++) {
         fprintf(fp,"%d %d %g %g %g\n",m+1,1,boxall[i][0],boxall[i][1],0.0);
         fprintf(fp,"%d %d %g %g %g\n",m+2,1,boxall[i][3],boxall[i][1],0.0);
         fprintf(fp,"%d %d %g %g %g\n",m+3,1,boxall[i][3],boxall[i][4],0.0);
         fprintf(fp,"%d %d %g %g %g\n",m+4,1,boxall[i][0],boxall[i][4],0.0);
         m += 4;
       }
     } else {
       int m = 0;
       for (int i = 0; i < nprocs; i++) {
         fprintf(fp,"%d %d %g %g %g\n",m+1,1,
                 boxall[i][0],boxall[i][1],boxall[i][2]);
         fprintf(fp,"%d %d %g %g %g\n",m+2,1,
                 boxall[i][3],boxall[i][1],boxall[i][2]);
         fprintf(fp,"%d %d %g %g %g\n",m+3,1,
                 boxall[i][3],boxall[i][4],boxall[i][2]);
         fprintf(fp,"%d %d %g %g %g\n",m+4,1,
                 boxall[i][0],boxall[i][4],boxall[i][2]);
         fprintf(fp,"%d %d %g %g %g\n",m+5,1,
                 boxall[i][0],boxall[i][1],boxall[i][5]);
         fprintf(fp,"%d %d %g %g %g\n",m+6,1,
                 boxall[i][3],boxall[i][1],boxall[i][5]);
         fprintf(fp,"%d %d %g %g %g\n",m+7,1,
                 boxall[i][3],boxall[i][4],boxall[i][5]);
         fprintf(fp,"%d %d %g %g %g\n",m+8,1,
                 boxall[i][0],boxall[i][4],boxall[i][5]);
         m += 8;
       }
     }
 
   } else {
     double (*bc)[3] = domain->corners;
 
     if (dimension == 2) {
       int m = 0;
       for (int i = 0; i < nprocs; i++) {
         domain->lamda_box_corners(&boxall[i][0],&boxall[i][3]);
         fprintf(fp,"%d %d %g %g %g\n",m+1,1,bc[0][0],bc[0][1],0.0);
         fprintf(fp,"%d %d %g %g %g\n",m+2,1,bc[1][0],bc[1][1],0.0);
         fprintf(fp,"%d %d %g %g %g\n",m+3,1,bc[2][0],bc[2][1],0.0);
         fprintf(fp,"%d %d %g %g %g\n",m+4,1,bc[3][0],bc[3][1],0.0);
         m += 4;
       }
     } else {
       int m = 0;
       for (int i = 0; i < nprocs; i++) {
         domain->lamda_box_corners(&boxall[i][0],&boxall[i][3]);
         fprintf(fp,"%d %d %g %g %g\n",m+1,1,bc[0][0],bc[0][1],bc[0][1]);
         fprintf(fp,"%d %d %g %g %g\n",m+2,1,bc[1][0],bc[1][1],bc[1][1]);
         fprintf(fp,"%d %d %g %g %g\n",m+3,1,bc[2][0],bc[2][1],bc[2][1]);
         fprintf(fp,"%d %d %g %g %g\n",m+4,1,bc[3][0],bc[3][1],bc[3][1]);
         fprintf(fp,"%d %d %g %g %g\n",m+5,1,bc[4][0],bc[4][1],bc[4][1]);
         fprintf(fp,"%d %d %g %g %g\n",m+6,1,bc[5][0],bc[5][1],bc[5][1]);
         fprintf(fp,"%d %d %g %g %g\n",m+7,1,bc[6][0],bc[6][1],bc[6][1]);
         fprintf(fp,"%d %d %g %g %g\n",m+8,1,bc[7][0],bc[7][1],bc[7][1]);
         m += 8;
       }
     }
   }
 
   // write out one square/cube per processor for 2d/3d
 
   fprintf(fp,"ITEM: TIMESTEP\n");
   fprintf(fp,BIGINT_FORMAT "\n",tstep);
   if (dimension == 2) fprintf(fp,"ITEM: NUMBER OF SQUARES\n");
   else fprintf(fp,"ITEM: NUMBER OF CUBES\n");
   fprintf(fp,"%d\n",nprocs);
   if (dimension == 2) fprintf(fp,"ITEM: SQUARES\n");
   else fprintf(fp,"ITEM: CUBES\n");
 
   if (dimension == 2) {
     int m = 0;
     for (int i = 0; i < nprocs; i++) {
       fprintf(fp,"%d %d %d %d %d %d\n",i+1,1,m+1,m+2,m+3,m+4);
       m += 4;
     }
   } else {
     int m = 0;
     for (int i = 0; i < nprocs; i++) {
       fprintf(fp,"%d %d %d %d %d %d %d %d %d %d\n",
               i+1,1,m+1,m+2,m+3,m+4,m+5,m+6,m+7,m+8);
       m += 8;
     }
   }
 
   memory->destroy(boxall);
 }
 
 /* ----------------------------------------------------------------------
    debug output for Idim and count
    only called by proc 0
 ------------------------------------------------------------------------- */
 
 #ifdef BALANCE_DEBUG
 void Balance::debug_shift_output(int idim, int m, int np, double *split)
 {
   int i;
   const char *dim = NULL;
 
   double *boxlo = domain->boxlo;
   double *prd = domain->prd;
 
   if (bdim[idim] == X) dim = "X";
   else if (bdim[idim] == Y) dim = "Y";
   else if (bdim[idim] == Z) dim = "Z";
   fprintf(stderr,"Dimension %s, Iteration %d\n",dim,m);
 
   fprintf(stderr,"  Count:");
   for (i = 0; i < np; i++) fprintf(stderr," " BIGINT_FORMAT,count[i]);
   fprintf(stderr,"\n");
   fprintf(stderr,"  Sum:");
   for (i = 0; i <= np; i++) fprintf(stderr," " BIGINT_FORMAT,sum[i]);
   fprintf(stderr,"\n");
   fprintf(stderr,"  Target:");
   for (i = 0; i <= np; i++) fprintf(stderr," " BIGINT_FORMAT,target[i]);
   fprintf(stderr,"\n");
   fprintf(stderr,"  Actual cut:");
   for (i = 0; i <= np; i++)
     fprintf(stderr," %g",boxlo[bdim[idim]] + split[i]*prd[bdim[idim]]);
   fprintf(stderr,"\n");
   fprintf(stderr,"  Split:");
   for (i = 0; i <= np; i++) fprintf(stderr," %g",split[i]);
   fprintf(stderr,"\n");
   fprintf(stderr,"  Low:");
   for (i = 0; i <= np; i++) fprintf(stderr," %g",lo[i]);
   fprintf(stderr,"\n");
   fprintf(stderr,"  Low-sum:");
   for (i = 0; i <= np; i++) fprintf(stderr," " BIGINT_FORMAT,losum[i]);
   fprintf(stderr,"\n");
   fprintf(stderr,"  Hi:");
   for (i = 0; i <= np; i++) fprintf(stderr," %g",hi[i]);
   fprintf(stderr,"\n");
   fprintf(stderr,"  Hi-sum:");
   for (i = 0; i <= np; i++) fprintf(stderr," " BIGINT_FORMAT,hisum[i]);
   fprintf(stderr,"\n");
   fprintf(stderr,"  Delta:");
   for (i = 0; i < np; i++) fprintf(stderr," %g",split[i+1]-split[i]);
   fprintf(stderr,"\n");
 
   bigint max = 0;
   for (i = 0; i < np; i++) max = MAX(max,count[i]);
   fprintf(stderr,"  Imbalance factor: %g\n",1.0*max*np/target[np]);
 }
 #endif
diff --git a/src/variable.cpp b/src/variable.cpp
index 6e16597c6..a8f195dbc 100644
--- a/src/variable.cpp
+++ b/src/variable.cpp
@@ -1,4986 +1,4988 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <ctype.h>
 #include <unistd.h>
 #include "variable.h"
 #include "universe.h"
 #include "atom.h"
 #include "update.h"
 #include "group.h"
 #include "domain.h"
 #include "comm.h"
 #include "region.h"
 #include "modify.h"
 #include "compute.h"
 #include "fix.h"
 #include "fix_store.h"
 #include "force.h"
 #include "output.h"
 #include "thermo.h"
 #include "random_mars.h"
 #include "math_const.h"
 #include "atom_masks.h"
 #include "python.h"
 #include "memory.h"
 #include "info.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
 #define VARDELTA 4
 #define MAXLEVEL 4
 #define MAXLINE 256
 #define CHUNK 1024
 #define VALUELENGTH 64               // also in python.cpp
 #define MAXFUNCARG 6
 
 #define MYROUND(a) (( a-floor(a) ) >= .5) ? ceil(a) : floor(a)
 
 enum{INDEX,LOOP,WORLD,UNIVERSE,ULOOP,STRING,GETENV,
      SCALARFILE,ATOMFILE,FORMAT,EQUAL,ATOM,VECTOR,PYTHON,INTERNAL};
 enum{ARG,OP};
 
 // customize by adding a function
 // if add before XOR:
 // also set precedence level in constructor and precedence length in *.h
 
 enum{DONE,ADD,SUBTRACT,MULTIPLY,DIVIDE,CARAT,MODULO,UNARY,
      NOT,EQ,NE,LT,LE,GT,GE,AND,OR,XOR,
      SQRT,EXP,LN,LOG,ABS,SIN,COS,TAN,ASIN,ACOS,ATAN,ATAN2,
      RANDOM,NORMAL,CEIL,FLOOR,ROUND,RAMP,STAGGER,LOGFREQ,LOGFREQ2,
      STRIDE,STRIDE2,VDISPLACE,SWIGGLE,CWIGGLE,GMASK,RMASK,GRMASK,
      IS_ACTIVE,IS_DEFINED,IS_AVAILABLE,
      VALUE,ATOMARRAY,TYPEARRAY,INTARRAY,BIGINTARRAY,VECTORARRAY};
 
 // customize by adding a special function
 
 enum{SUM,XMIN,XMAX,AVE,TRAP,SLOPE};
 
 #define INVOKED_SCALAR 1
 #define INVOKED_VECTOR 2
 #define INVOKED_ARRAY 4
 #define INVOKED_PERATOM 8
 
 #define BIG 1.0e20
 
 /* ---------------------------------------------------------------------- */
 
 Variable::Variable(LAMMPS *lmp) : Pointers(lmp)
 {
   MPI_Comm_rank(world,&me);
 
   nvar = maxvar = 0;
   names = NULL;
   style = NULL;
   num = NULL;
   which = NULL;
   pad = NULL;
   reader = NULL;
   data = NULL;
   dvalue = NULL;
   vecs = NULL;
 
   eval_in_progress = NULL;
 
   randomequal = NULL;
   randomatom = NULL;
 
   // customize by assigning a precedence level
 
   precedence[DONE] = 0;
   precedence[OR] = precedence[XOR] = 1;
   precedence[AND] = 2;
   precedence[EQ] = precedence[NE] = 3;
   precedence[LT] = precedence[LE] = precedence[GT] = precedence[GE] = 4;
   precedence[ADD] = precedence[SUBTRACT] = 5;
   precedence[MULTIPLY] = precedence[DIVIDE] = precedence[MODULO] = 6;
   precedence[CARAT] = 7;
   precedence[UNARY] = precedence[NOT] = 8;
 }
 
 /* ---------------------------------------------------------------------- */
 
 Variable::~Variable()
 {
   for (int i = 0; i < nvar; i++) {
     delete [] names[i];
     delete reader[i];
     if (style[i] == LOOP || style[i] == ULOOP) delete [] data[i][0];
     else for (int j = 0; j < num[i]; j++) delete [] data[i][j];
     delete [] data[i];
     if (style[i] == VECTOR) memory->destroy(vecs[i].values);
   }
   memory->sfree(names);
   memory->destroy(style);
   memory->destroy(num);
   memory->destroy(which);
   memory->destroy(pad);
   memory->sfree(reader);
   memory->sfree(data);
   memory->sfree(dvalue);
   memory->sfree(vecs);
 
   memory->destroy(eval_in_progress);
 
   delete randomequal;
   delete randomatom;
 
 }
 
 /* ----------------------------------------------------------------------
    called by variable command in input script
 ------------------------------------------------------------------------- */
 
 void Variable::set(int narg, char **arg)
 {
   if (narg < 2) error->all(FLERR,"Illegal variable command");
 
   int replaceflag = 0;
 
   // DELETE
   // doesn't matter if variable no longer exists
 
   if (strcmp(arg[1],"delete") == 0) {
     if (narg != 2) error->all(FLERR,"Illegal variable command");
     if (find(arg[0]) >= 0) remove(find(arg[0]));
     return;
 
   // INDEX
   // num = listed args, which = 1st value, data = copied args
 
   } else if (strcmp(arg[1],"index") == 0) {
     if (narg < 3) error->all(FLERR,"Illegal variable command");
     if (find(arg[0]) >= 0) return;
     if (nvar == maxvar) grow();
     style[nvar] = INDEX;
     num[nvar] = narg - 2;
     which[nvar] = 0;
     pad[nvar] = 0;
     data[nvar] = new char*[num[nvar]];
     copy(num[nvar],&arg[2],data[nvar]);
 
   // LOOP
   // 1 arg + pad: num = N, which = 1st value, data = single string
   // 2 args + pad: num = N2, which = N1, data = single string
 
   } else if (strcmp(arg[1],"loop") == 0) {
     if (find(arg[0]) >= 0) return;
     if (nvar == maxvar) grow();
     style[nvar] = LOOP;
     int nfirst,nlast;
     if (narg == 3 || (narg == 4 && strcmp(arg[3],"pad") == 0)) {
       nfirst = 1;
       nlast = force->inumeric(FLERR,arg[2]);
       if (nlast <= 0) error->all(FLERR,"Illegal variable command");
       if (narg == 4 && strcmp(arg[3],"pad") == 0) {
         char digits[12];
         sprintf(digits,"%d",nlast);
         pad[nvar] = strlen(digits);
       } else pad[nvar] = 0;
     } else if (narg == 4 || (narg == 5 && strcmp(arg[4],"pad") == 0)) {
       nfirst = force->inumeric(FLERR,arg[2]);
       nlast = force->inumeric(FLERR,arg[3]);
       if (nfirst > nlast || nlast < 0)
         error->all(FLERR,"Illegal variable command");
       if (narg == 5 && strcmp(arg[4],"pad") == 0) {
         char digits[12];
         sprintf(digits,"%d",nlast);
         pad[nvar] = strlen(digits);
       } else pad[nvar] = 0;
     } else error->all(FLERR,"Illegal variable command");
     num[nvar] = nlast;
     which[nvar] = nfirst-1;
     data[nvar] = new char*[1];
     data[nvar][0] = NULL;
 
   // WORLD
   // num = listed args, which = partition this proc is in, data = copied args
   // error check that num = # of worlds in universe
 
   } else if (strcmp(arg[1],"world") == 0) {
     if (narg < 3) error->all(FLERR,"Illegal variable command");
     if (find(arg[0]) >= 0) return;
     if (nvar == maxvar) grow();
     style[nvar] = WORLD;
     num[nvar] = narg - 2;
     if (num[nvar] != universe->nworlds)
       error->all(FLERR,"World variable count doesn't match # of partitions");
     which[nvar] = universe->iworld;
     pad[nvar] = 0;
     data[nvar] = new char*[num[nvar]];
     copy(num[nvar],&arg[2],data[nvar]);
 
   // UNIVERSE and ULOOP
   // for UNIVERSE: num = listed args, data = copied args
   // for ULOOP: num = N, data = single string
   // which = partition this proc is in
   // universe proc 0 creates lock file
   // error check that all other universe/uloop variables are same length
 
   } else if (strcmp(arg[1],"universe") == 0 || strcmp(arg[1],"uloop") == 0) {
     if (strcmp(arg[1],"universe") == 0) {
       if (narg < 3) error->all(FLERR,"Illegal variable command");
       if (find(arg[0]) >= 0) return;
       if (nvar == maxvar) grow();
       style[nvar] = UNIVERSE;
       num[nvar] = narg - 2;
       pad[nvar] = 0;
       data[nvar] = new char*[num[nvar]];
       copy(num[nvar],&arg[2],data[nvar]);
     } else if (strcmp(arg[1],"uloop") == 0) {
       if (narg < 3 || narg > 4 || (narg == 4 && strcmp(arg[3],"pad") != 0))
         error->all(FLERR,"Illegal variable command");
       if (find(arg[0]) >= 0) return;
       if (nvar == maxvar) grow();
       style[nvar] = ULOOP;
       num[nvar] = force->inumeric(FLERR,arg[2]);
       data[nvar] = new char*[1];
       data[nvar][0] = NULL;
       if (narg == 4) {
         char digits[12];
         sprintf(digits,"%d",num[nvar]);
         pad[nvar] = strlen(digits);
       } else pad[nvar] = 0;
     }
 
     if (num[nvar] < universe->nworlds)
       error->all(FLERR,"Universe/uloop variable count < # of partitions");
     which[nvar] = universe->iworld;
 
     if (universe->me == 0) {
       FILE *fp = fopen("tmp.lammps.variable","w");
       if (fp == NULL)
         error->one(FLERR,"Cannot open temporary file for world counter.");
       fprintf(fp,"%d\n",universe->nworlds);
       fclose(fp);
       fp = NULL;
     }
 
     for (int jvar = 0; jvar < nvar; jvar++)
       if (num[jvar] && (style[jvar] == UNIVERSE || style[jvar] == ULOOP) &&
           num[nvar] != num[jvar])
         error->all(FLERR,
                    "All universe/uloop variables must have same # of values");
 
   // STRING
   // replace pre-existing var if also style STRING (allows it to be reset)
   // num = 1, which = 1st value
   // data = 1 value, string to eval
 
   } else if (strcmp(arg[1],"string") == 0) {
     if (narg != 3) error->all(FLERR,"Illegal variable command");
     int ivar = find(arg[0]);
     if (ivar >= 0) {
       if (style[ivar] != STRING)
         error->all(FLERR,"Cannot redefine variable as a different style");
       delete [] data[ivar][0];
       copy(1,&arg[2],data[ivar]);
       replaceflag = 1;
     } else {
       if (nvar == maxvar) grow();
       style[nvar] = STRING;
       num[nvar] = 1;
       which[nvar] = 0;
       pad[nvar] = 0;
       data[nvar] = new char*[num[nvar]];
       copy(1,&arg[2],data[nvar]);
     }
 
   // GETENV
   // remove pre-existing var if also style GETENV (allows it to be reset)
   // num = 1, which = 1st value
   // data = 1 value, string to eval
 
   } else if (strcmp(arg[1],"getenv") == 0) {
     if (narg != 3) error->all(FLERR,"Illegal variable command");
     if (find(arg[0]) >= 0) {
       if (style[find(arg[0])] != GETENV)
         error->all(FLERR,"Cannot redefine variable as a different style");
       remove(find(arg[0]));
     }
     if (nvar == maxvar) grow();
     style[nvar] = GETENV;
     num[nvar] = 1;
     which[nvar] = 0;
     pad[nvar] = 0;
     data[nvar] = new char*[num[nvar]];
     copy(1,&arg[2],data[nvar]);
     data[nvar][1] = new char[VALUELENGTH];
     strcpy(data[nvar][1],"(undefined)");
 
   // SCALARFILE for strings or numbers
   // which = 1st value
   // data = 1 value, string to eval
 
   } else if (strcmp(arg[1],"file") == 0) {
     if (narg != 3) error->all(FLERR,"Illegal variable command");
     if (find(arg[0]) >= 0) return;
     if (nvar == maxvar) grow();
     style[nvar] = SCALARFILE;
     num[nvar] = 1;
     which[nvar] = 0;
     pad[nvar] = 0;
     data[nvar] = new char*[num[nvar]];
     data[nvar][0] = new char[MAXLINE];
     reader[nvar] = new VarReader(lmp,arg[0],arg[2],SCALARFILE);
     int flag = reader[nvar]->read_scalar(data[nvar][0]);
     if (flag) error->all(FLERR,"File variable could not read value");
 
   // ATOMFILE for numbers
   // which = 1st value
   // data = NULL
 
   } else if (strcmp(arg[1],"atomfile") == 0) {
     if (narg != 3) error->all(FLERR,"Illegal variable command");
     if (find(arg[0]) >= 0) return;
     if (nvar == maxvar) grow();
     style[nvar] = ATOMFILE;
     num[nvar] = 1;
     which[nvar] = 0;
     pad[nvar] = 0;
     data[nvar] = new char*[num[nvar]];
     data[nvar][0] = NULL;
     reader[nvar] = new VarReader(lmp,arg[0],arg[2],ATOMFILE);
     int flag = reader[nvar]->read_peratom();
     if (flag) error->all(FLERR,"Atomfile variable could not read values");
 
   // FORMAT
   // num = 3, which = 1st value
   // data = 3 values
   //   1st is name of variable to eval, 2nd is format string,
   //   3rd is filled on retrieval
 
   } else if (strcmp(arg[1],"format") == 0) {
     if (narg != 4) error->all(FLERR,"Illegal variable command");
     if (find(arg[0]) >= 0) return;
     if (nvar == maxvar) grow();
     style[nvar] = FORMAT;
     num[nvar] = 3;
     which[nvar] = 0;
     pad[nvar] = 0;
     data[nvar] = new char*[num[nvar]];
     copy(2,&arg[2],data[nvar]);
     data[nvar][2] = new char[VALUELENGTH];
     strcpy(data[nvar][2],"(undefined)");
 
   // EQUAL
   // replace pre-existing var if also style EQUAL (allows it to be reset)
   // num = 2, which = 1st value
   // data = 2 values, 1st is string to eval, 2nd is filled on retrieval
 
   } else if (strcmp(arg[1],"equal") == 0) {
     if (narg != 3) error->all(FLERR,"Illegal variable command");
     int ivar = find(arg[0]);
     if (ivar >= 0) {
       if (style[ivar] != EQUAL)
         error->all(FLERR,"Cannot redefine variable as a different style");
       delete [] data[ivar][0];
       copy(1,&arg[2],data[ivar]);
       replaceflag = 1;
     } else {
       if (nvar == maxvar) grow();
       style[nvar] = EQUAL;
       num[nvar] = 2;
       which[nvar] = 0;
       pad[nvar] = 0;
       data[nvar] = new char*[num[nvar]];
       copy(1,&arg[2],data[nvar]);
       data[nvar][1] = new char[VALUELENGTH];
       strcpy(data[nvar][1],"(undefined)");
     }
 
   // ATOM
   // replace pre-existing var if also style ATOM (allows it to be reset)
   // num = 1, which = 1st value
   // data = 1 value, string to eval
 
   } else if (strcmp(arg[1],"atom") == 0) {
     if (narg != 3) error->all(FLERR,"Illegal variable command");
     int ivar = find(arg[0]);
     if (ivar >= 0) {
       if (style[ivar] != ATOM)
         error->all(FLERR,"Cannot redefine variable as a different style");
       delete [] data[ivar][0];
       copy(1,&arg[2],data[ivar]);
       replaceflag = 1;
     } else {
       if (nvar == maxvar) grow();
       style[nvar] = ATOM;
       num[nvar] = 1;
       which[nvar] = 0;
       pad[nvar] = 0;
       data[nvar] = new char*[num[nvar]];
       copy(1,&arg[2],data[nvar]);
     }
 
   // VECTOR
   // replace pre-existing var if also style VECTOR (allows it to be reset)
   // num = 1, which = 1st value
   // data = 1 value, string to eval
 
   } else if (strcmp(arg[1],"vector") == 0) {
     if (narg != 3) error->all(FLERR,"Illegal variable command");
     int ivar = find(arg[0]);
     if (ivar >= 0) {
       if (style[ivar] != VECTOR)
         error->all(FLERR,"Cannot redefine variable as a different style");
       delete [] data[ivar][0];
       copy(1,&arg[2],data[ivar]);
       replaceflag = 1;
     } else {
       if (nvar == maxvar) grow();
       style[nvar] = VECTOR;
       num[nvar] = 1;
       which[nvar] = 0;
       pad[nvar] = 0;
       data[nvar] = new char*[num[nvar]];
       copy(1,&arg[2],data[nvar]);
     }
 
   // PYTHON
   // replace pre-existing var if also style PYTHON (allows it to be reset)
   // num = 2, which = 1st value
   // data = 2 values, 1st is Python func to invoke, 2nd is filled by invoke
 
   } else if (strcmp(arg[1],"python") == 0) {
     if (narg != 3) error->all(FLERR,"Illegal variable command");
     if (!python->is_enabled())
       error->all(FLERR,"LAMMPS is not built with Python embedded");
     int ivar = find(arg[0]);
     if (ivar >= 0) {
       if (style[ivar] != PYTHON)
         error->all(FLERR,"Cannot redefine variable as a different style");
       delete [] data[ivar][0];
       copy(1,&arg[2],data[ivar]);
       replaceflag = 1;
     } else {
       if (nvar == maxvar) grow();
       style[nvar] = PYTHON;
       num[nvar] = 2;
       which[nvar] = 1;
       pad[nvar] = 0;
       data[nvar] = new char*[num[nvar]];
       copy(1,&arg[2],data[nvar]);
       data[nvar][1] = new char[VALUELENGTH];
       strcpy(data[nvar][1],"(undefined)");
     }
 
   // INTERNAL
   // replace pre-existing var if also style INTERNAL (allows it to be reset)
   // num = 1, for string representation of dvalue, set by retrieve()
   // dvalue = numeric initialization from 2nd arg, reset by internal_set()
 
   } else if (strcmp(arg[1],"internal") == 0) {
     if (narg != 3) error->all(FLERR,"Illegal variable command");
     int ivar = find(arg[0]);
     if (ivar >= 0) {
       if (style[ivar] != INTERNAL)
         error->all(FLERR,"Cannot redefine variable as a different style");
       dvalue[nvar] = force->numeric(FLERR,arg[2]);
       replaceflag = 1;
     } else {
       if (nvar == maxvar) grow();
       style[nvar] = INTERNAL;
       num[nvar] = 1;
       which[nvar] = 0;
       pad[nvar] = 0;
       data[nvar] = new char*[num[nvar]];
       data[nvar][0] = new char[VALUELENGTH];
       dvalue[nvar] = force->numeric(FLERR,arg[2]);
     }
 
   } else error->all(FLERR,"Illegal variable command");
 
   // set name of variable, if not replacing one flagged with replaceflag
   // name must be all alphanumeric chars or underscores
 
   if (replaceflag) return;
 
   int n = strlen(arg[0]) + 1;
   names[nvar] = new char[n];
   strcpy(names[nvar],arg[0]);
 
   for (int i = 0; i < n-1; i++)
     if (!isalnum(names[nvar][i]) && names[nvar][i] != '_')
       error->all(FLERR,"Variable name must be alphanumeric or "
                  "underscore characters");
   nvar++;
 }
 
 /* ----------------------------------------------------------------------
    INDEX variable created by command-line argument
    make it INDEX rather than STRING so cannot be re-defined in input script
 ------------------------------------------------------------------------- */
 
 void Variable::set(char *name, int narg, char **arg)
 {
   char **newarg = new char*[2+narg];
   newarg[0] = name;
   newarg[1] = (char *) "index";
   for (int i = 0; i < narg; i++) newarg[2+i] = arg[i];
   set(2+narg,newarg);
   delete [] newarg;
 }
 
 /* ----------------------------------------------------------------------
    set existing STRING variable to str
    return 0 if successful
    return -1 if variable doesn't exist or isn't a STRING variable
    called via library interface, so external programs can set variables
 ------------------------------------------------------------------------- */
 
 int Variable::set_string(char *name, char *str)
 {
   int ivar = find(name);
   if (ivar < 0) return -1;
   if (style[ivar] != STRING) return -1;
   delete [] data[ivar][0];
   copy(1,&str,data[ivar]);
   return 0;
 }
 
 /* ----------------------------------------------------------------------
    increment variable(s)
    return 0 if OK if successfully incremented
    return 1 if any variable is exhausted, free the variable to allow re-use
 ------------------------------------------------------------------------- */
 
 int Variable::next(int narg, char **arg)
 {
   int ivar;
 
   if (narg == 0) error->all(FLERR,"Illegal next command");
 
   // check that variables exist and are all the same style
   // exception: UNIVERSE and ULOOP variables can be mixed in same next command
 
   for (int iarg = 0; iarg < narg; iarg++) {
     ivar = find(arg[iarg]);
     if (ivar < 0) error->all(FLERR,"Invalid variable in next command");
     if (style[ivar] == ULOOP && style[find(arg[0])] == UNIVERSE) continue;
     else if (style[ivar] == UNIVERSE && style[find(arg[0])] == ULOOP) continue;
     else if (style[ivar] != style[find(arg[0])])
       error->all(FLERR,"All variables in next command must be same style");
   }
 
   // invalid styles: STRING, EQUAL, WORLD, ATOM, VECTOR, GETENV,
   //                 FORMAT, PYTHON, INTERNAL
 
   int istyle = style[find(arg[0])];
   if (istyle == STRING || istyle == EQUAL || istyle == WORLD ||
       istyle == GETENV || istyle == ATOM || istyle == VECTOR || 
       istyle == FORMAT || istyle == PYTHON || istyle == INTERNAL)
     error->all(FLERR,"Invalid variable style with next command");
 
   // if istyle = UNIVERSE or ULOOP, insure all such variables are incremented
 
   if (istyle == UNIVERSE || istyle == ULOOP)
     for (int i = 0; i < nvar; i++) {
       if (style[i] != UNIVERSE && style[i] != ULOOP) continue;
       int iarg = 0;
       for (iarg = 0; iarg < narg; iarg++)
         if (strcmp(arg[iarg],names[i]) == 0) break;
       if (iarg == narg)
         error->universe_one(FLERR,"Next command must list all "
                             "universe and uloop variables");
     }
 
   // increment all variables in list
   // if any variable is exhausted, set flag = 1 and remove var to allow re-use
 
   int flag = 0;
 
   if (istyle == INDEX || istyle == LOOP) {
     for (int iarg = 0; iarg < narg; iarg++) {
       ivar = find(arg[iarg]);
       which[ivar]++;
       if (which[ivar] >= num[ivar]) {
         flag = 1;
         remove(ivar);
       }
     }
 
   } else if (istyle == SCALARFILE) {
 
     for (int iarg = 0; iarg < narg; iarg++) {
       ivar = find(arg[iarg]);
       int done = reader[ivar]->read_scalar(data[ivar][0]);
       if (done) {
         flag = 1;
         remove(ivar);
       }
     }
 
   } else if (istyle == ATOMFILE) {
 
     for (int iarg = 0; iarg < narg; iarg++) {
       ivar = find(arg[iarg]);
       int done = reader[ivar]->read_peratom();
       if (done) {
         flag = 1;
         remove(ivar);
       }
     }
 
   } else if (istyle == UNIVERSE || istyle == ULOOP) {
 
     // wait until lock file can be created and owned by proc 0 of this world
     // rename() is not atomic in practice, but no known simple fix
     //   means multiple procs can read/write file at the same time (bad!)
     // random delays help
     // delay for random fraction of 1 second before first rename() call
     // delay for random fraction of 1 second before subsequent tries
     // when successful, read next available index and Bcast it within my world
 
     int nextindex;
     if (me == 0) {
       int seed = 12345 + universe->me + which[find(arg[0])];
       RanMars *random = new RanMars(lmp,seed);
       int delay = (int) (1000000*random->uniform());
       usleep(delay);
       while (1) {
         if (!rename("tmp.lammps.variable","tmp.lammps.variable.lock")) break;
         delay = (int) (1000000*random->uniform());
         usleep(delay);
       }
       delete random;
 
       FILE *fp = fopen("tmp.lammps.variable.lock","r");
       fscanf(fp,"%d",&nextindex);
       //printf("READ %d %d\n",universe->me,nextindex);
       fclose(fp);
       fp = fopen("tmp.lammps.variable.lock","w");
       fprintf(fp,"%d\n",nextindex+1);
       //printf("WRITE %d %d\n",universe->me,nextindex+1);
       fclose(fp);
       fp = NULL;
       rename("tmp.lammps.variable.lock","tmp.lammps.variable");
       if (universe->uscreen)
         fprintf(universe->uscreen,
                 "Increment via next: value %d on partition %d\n",
                 nextindex+1,universe->iworld);
       if (universe->ulogfile)
         fprintf(universe->ulogfile,
                 "Increment via next: value %d on partition %d\n",
                 nextindex+1,universe->iworld);
     }
     MPI_Bcast(&nextindex,1,MPI_INT,0,world);
 
     // set all variables in list to nextindex
     // must increment all UNIVERSE and ULOOP variables here
     // error check above tested for this
 
     for (int iarg = 0; iarg < narg; iarg++) {
       ivar = find(arg[iarg]);
       which[ivar] = nextindex;
       if (which[ivar] >= num[ivar]) {
         flag = 1;
         remove(ivar);
       }
     }
   }
 
   return flag;
 }
 
 /* ----------------------------------------------------------------------
    search for name in list of variables names
    return index or -1 if not found
 ------------------------------------------------------------------------- */
 
 int Variable::find(char *name)
 {
   if(name==NULL) return -1;
   for (int i = 0; i < nvar; i++)
     if (strcmp(name,names[i]) == 0) return i;
   return -1;
 }
 
 /* ----------------------------------------------------------------------
    initialize one atom's storage values in all VarReaders via fix STORE
    called when atom is created
 ------------------------------------------------------------------------- */
 
 void Variable::set_arrays(int i)
 {
   for (int i = 0; i < nvar; i++)
     if (reader[i] && style[i] == ATOMFILE)
       reader[i]->fixstore->vstore[i] = 0.0;
 }
 
 /* ----------------------------------------------------------------------
    called by python command in input script
    simply pass input script line args to Python class
 ------------------------------------------------------------------------- */
 
 void Variable::python_command(int narg, char **arg)
 {
   if (!python->is_enabled())
     error->all(FLERR,"LAMMPS is not built with Python embedded");
   python->command(narg,arg);
 }
 
 /* ----------------------------------------------------------------------
    return 1 if variable is EQUAL or INTERNAL or PYTHON numeric style, 0 if not
    this is checked before call to compute_equal() to return a double
 ------------------------------------------------------------------------- */
 
 int Variable::equalstyle(int ivar)
 {
   if (style[ivar] == EQUAL || style[ivar] == INTERNAL) return 1;
   if (style[ivar] == PYTHON) {
     int ifunc = python->variable_match(data[ivar][0],names[ivar],1);
     if (ifunc < 0) return 0;
     else return 1;
   }
   return 0;
 }
 
 /* ----------------------------------------------------------------------
    return 1 if variable is ATOM or ATOMFILE style, 0 if not
    this is checked before call to compute_atom() to return a vector of doubles
 ------------------------------------------------------------------------- */
 
 int Variable::atomstyle(int ivar)
 {
   if (style[ivar] == ATOM || style[ivar] == ATOMFILE) return 1;
   return 0;
 }
 
 /* ----------------------------------------------------------------------
    return 1 if variable is VECTOR style, 0 if not
    this is checked before call to compute_vector() to return a vector of doubles
 ------------------------------------------------------------------------- */
 
 int Variable::vectorstyle(int ivar)
 {
   if (style[ivar] == VECTOR) return 1;
   return 0;
 }
 
 /* ----------------------------------------------------------------------
    check if variable with name is PYTHON and matches funcname
    called by Python class before it invokes a Python function
    return data storage so Python function can return a value for this variable
    return NULL if not a match
 ------------------------------------------------------------------------- */
 
 char *Variable::pythonstyle(char *name, char *funcname)
 {
   int ivar = find(name);
   if (ivar < 0) return NULL;
   if (style[ivar] != PYTHON) return NULL;
   if (strcmp(data[ivar][0],funcname) != 0) return NULL;
   return data[ivar][1];
 }
 
 /* ----------------------------------------------------------------------
    return 1 if variable is INTERNAL style, 0 if not
    this is checked before call to set_internal() to assure it can be set
 ------------------------------------------------------------------------- */
 
 int Variable::internalstyle(int ivar)
 {
   if (style[ivar] == INTERNAL) return 1;
   return 0;
 }
 
 /* ----------------------------------------------------------------------
    return ptr to the data text associated with a variable
    if INDEX or WORLD or UNIVERSE or STRING or SCALARFILE,
      return ptr to stored string
    if LOOP or ULOOP, write int to data[0] and return ptr to string
    if EQUAL, evaluate variable and put result in str
    if FORMAT, evaluate its variable and put formatted result in str
    if GETENV, query environment and put result in str
    if PYTHON, evaluate Python function, it will put result in str
    if INTERNAL, convert dvalue and put result in str
    if ATOM or ATOMFILE or VECTOR, return NULL
    return NULL if no variable with name, or which value is bad,
      caller must respond
 ------------------------------------------------------------------------- */
 
 char *Variable::retrieve(char *name)
 {
   int ivar = find(name);
   if (ivar < 0) return NULL;
   if (which[ivar] >= num[ivar]) return NULL;
 
   if (eval_in_progress[ivar])
     error->all(FLERR,"Variable has circular dependency");
   eval_in_progress[ivar] = 1;
 
   char *str = NULL;
   if (style[ivar] == INDEX || style[ivar] == WORLD ||
       style[ivar] == UNIVERSE || style[ivar] == STRING ||
       style[ivar] == SCALARFILE) {
     str = data[ivar][which[ivar]];
   } else if (style[ivar] == LOOP || style[ivar] == ULOOP) {
     char result[16];
     if (pad[ivar] == 0) sprintf(result,"%d",which[ivar]+1);
     else {
       char padstr[16];
       sprintf(padstr,"%%0%dd",pad[ivar]);
       sprintf(result,padstr,which[ivar]+1);
     }
     int n = strlen(result) + 1;
     delete [] data[ivar][0];
     data[ivar][0] = new char[n];
     strcpy(data[ivar][0],result);
     str = data[ivar][0];
   } else if (style[ivar] == EQUAL) {
     double answer = evaluate(data[ivar][0],NULL);
     sprintf(data[ivar][1],"%.15g",answer);
     str = data[ivar][1];
   } else if (style[ivar] == FORMAT) {
     int jvar = find(data[ivar][0]);
     if (jvar == -1) return NULL;
     if (!equalstyle(jvar)) return NULL;
     double answer = compute_equal(jvar);
     sprintf(data[ivar][2],data[ivar][1],answer);
     str = data[ivar][2];
   } else if (style[ivar] == GETENV) {
     const char *result = getenv(data[ivar][0]);
     if (result == NULL) result = (const char *) "";
     int n = strlen(result) + 1;
     if (n > VALUELENGTH) {
       delete [] data[ivar][1];
       data[ivar][1] = new char[n];
     }
     strcpy(data[ivar][1],result);
     str = data[ivar][1];
   } else if (style[ivar] == PYTHON) {
     int ifunc = python->variable_match(data[ivar][0],names[ivar],0);
     if (ifunc < 0)
       error->all(FLERR,"Python variable does not match Python function");
     python->invoke_function(ifunc,data[ivar][1]);
     str = data[ivar][1];
     // if Python func returns a string longer than VALUELENGTH
     // then the Python class stores the result, query it via long_string()
     char *strlong = python->long_string(ifunc);
     if (strlong) str = strlong;
   } else if (style[ivar] == INTERNAL) {
     sprintf(data[ivar][0],"%.15g",dvalue[ivar]);
     str = data[ivar][0];
   } else if (style[ivar] == ATOM || style[ivar] == ATOMFILE ||
 	     style[ivar] == VECTOR) return NULL;
   
   eval_in_progress[ivar] = 0;
 
   return str;
 }
 
 /* ----------------------------------------------------------------------
    return result of equal-style variable evaluation
    can be EQUAL or INTERNAL style or PYTHON numeric style
    for PYTHON, don't need to check python->variable_match() error return,
      since caller will have already checked via equalstyle()
 ------------------------------------------------------------------------- */
 
 double Variable::compute_equal(int ivar)
 {
   if (eval_in_progress[ivar])
     error->all(FLERR,"Variable has circular dependency");
   eval_in_progress[ivar] = 1;
 
   double value = 0.0;
   if (style[ivar] == EQUAL) value = evaluate(data[ivar][0],NULL);
   else if (style[ivar] == INTERNAL) value = dvalue[ivar];
   else if (style[ivar] == PYTHON) {
     int ifunc = python->find(data[ivar][0]);
     if (ifunc < 0) error->all(FLERR,"Python variable has no function");
     python->invoke_function(ifunc,data[ivar][1]);
     value = atof(data[ivar][1]);
   }
 
   eval_in_progress[ivar] = 0;
   return value;
 }
 
 /* ----------------------------------------------------------------------
    return result of immediate equal-style variable evaluation
    called from Input::substitute()
    don't need to flag eval_in_progress since is an immediate variable
 ------------------------------------------------------------------------- */
 
 double Variable::compute_equal(char *str)
 {
   return evaluate(str,NULL);
 }
 
 /* ----------------------------------------------------------------------
    compute result of atom-style and atomfile-style variable evaluation
    only computed for atoms in igroup, else result is 0.0
    answers are placed every stride locations into result
    if sumflag, add variable values to existing result
 ------------------------------------------------------------------------- */
 
 void Variable::compute_atom(int ivar, int igroup,
                             double *result, int stride, int sumflag)
 {
   Tree *tree;
   double *vstore;
 
   if (eval_in_progress[ivar])
     error->all(FLERR,"Variable has circular dependency");
   eval_in_progress[ivar] = 1;
 
   if (style[ivar] == ATOM) {
     treetype = ATOM;
     evaluate(data[ivar][0],&tree);
     collapse_tree(tree);
   } else vstore = reader[ivar]->fixstore->vstore;
 
   if (result == NULL) {
     eval_in_progress[ivar] = 0;
     return;
   }
 
   int groupbit = group->bitmask[igroup];
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
 
   if (style[ivar] == ATOM) {
     if (sumflag == 0) {
       int m = 0;
       for (int i = 0; i < nlocal; i++) {
         if (mask[i] & groupbit) result[m] = eval_tree(tree,i);
         else result[m] = 0.0;
         m += stride;
       }
 
     } else {
       int m = 0;
       for (int i = 0; i < nlocal; i++) {
         if (mask[i] & groupbit) result[m] += eval_tree(tree,i);
         m += stride;
       }
     }
 
   } else {
     if (sumflag == 0) {
       int m = 0;
       for (int i = 0; i < nlocal; i++) {
         if (mask[i] & groupbit) result[m] = vstore[i];
         else result[m] = 0.0;
         m += stride;
       }
 
     } else {
       int m = 0;
       for (int i = 0; i < nlocal; i++) {
         if (mask[i] & groupbit) result[m] += vstore[i];
         m += stride;
       }
     }
   }
 
   if (style[ivar] == ATOM) free_tree(tree);
   eval_in_progress[ivar] = 0;
 }
 
 /* ----------------------------------------------------------------------
    compute result of vector-style variable evaluation
    return length of vector and result pointer to vector values
      if length == 0 or -1 (mismatch), generate an error
    if variable already computed on this timestep, just return
    else evaluate the formula and its length, store results in VecVar entry
 ------------------------------------------------------------------------- */
 
 int Variable::compute_vector(int ivar, double **result)
 {
   Tree *tree;
   if (vecs[ivar].currentstep == update->ntimestep) {
     *result = vecs[ivar].values;
     return vecs[ivar].n;
   }
 
   if (eval_in_progress[ivar])
     error->all(FLERR,"Variable has circular dependency");
   eval_in_progress[ivar] = 1;
 
   treetype = VECTOR;
   evaluate(data[ivar][0],&tree);
   collapse_tree(tree);
   int nlen = size_tree_vector(tree);
   if (nlen == 0) error->all(FLERR,"Vector-style variable has zero length");
   if (nlen < 0) error->all(FLERR,
                            "Inconsistent lengths in vector-style variable");
 
   // (re)allocate space for results if necessary
 
   if (nlen > vecs[ivar].nmax) {
     memory->destroy(vecs[ivar].values);
     vecs[ivar].nmax = nlen;
     memory->create(vecs[ivar].values,vecs[ivar].nmax,"variable:values");
   }
 
   vecs[ivar].n = nlen;
   vecs[ivar].currentstep = update->ntimestep;
   double *vec = vecs[ivar].values;
   for (int i = 0; i < nlen; i++)
     vec[i] = eval_tree(tree,i);
 
   free_tree(tree);
   eval_in_progress[ivar] = 0;
 
   *result = vec;
   return nlen;
 }
 
 /* ----------------------------------------------------------------------
    set value stored by INTERNAL style ivar
 ------------------------------------------------------------------------- */
 
 void Variable::internal_set(int ivar, double value)
 {
   dvalue[ivar] = value;
 }
 
 /* ----------------------------------------------------------------------
    remove Nth variable from list and compact list
    delete reader explicitly if it exists
 ------------------------------------------------------------------------- */
 
 void Variable::remove(int n)
 {
   delete [] names[n];
   if (style[n] == LOOP || style[n] == ULOOP) delete [] data[n][0];
   else for (int i = 0; i < num[n]; i++) delete [] data[n][i];
   delete [] data[n];
   delete reader[n];
 
   for (int i = n+1; i < nvar; i++) {
     names[i-1] = names[i];
     style[i-1] = style[i];
     num[i-1] = num[i];
     which[i-1] = which[i];
     pad[i-1] = pad[i];
     reader[i-1] = reader[i];
     data[i-1] = data[i];
   }
   nvar--;
 }
 
 /* ----------------------------------------------------------------------
   make space in arrays for new variable
 ------------------------------------------------------------------------- */
 
 void Variable::grow()
 {
   int old = maxvar;
   maxvar += VARDELTA;
   names = (char **) memory->srealloc(names,maxvar*sizeof(char *),"var:names");
   memory->grow(style,maxvar,"var:style");
   memory->grow(num,maxvar,"var:num");
   memory->grow(which,maxvar,"var:which");
   memory->grow(pad,maxvar,"var:pad");
 
   reader = (VarReader **)
     memory->srealloc(reader,maxvar*sizeof(VarReader *),"var:reader");
   for (int i = old; i < maxvar; i++) reader[i] = NULL;
 
   data = (char ***) memory->srealloc(data,maxvar*sizeof(char **),"var:data");
   memory->grow(dvalue,maxvar,"var:dvalue");
 
   vecs = (VecVar *) memory->srealloc(vecs,maxvar*sizeof(VecVar),"var:vecvar");
   for (int i = old; i < maxvar; i++) {
     vecs[i].nmax = 0;
     vecs[i].currentstep = -1;
     vecs[i].values = NULL;
   }
 
   memory->grow(eval_in_progress,maxvar,"var:eval_in_progress");
   for (int i = 0; i < maxvar; i++) eval_in_progress[i] = 0;
 }
 
 /* ----------------------------------------------------------------------
    copy narg strings from **from to **to, and allocate space for them
 ------------------------------------------------------------------------- */
 
 void Variable::copy(int narg, char **from, char **to)
 {
   int n;
   for (int i = 0; i < narg; i++) {
     n = strlen(from[i]) + 1;
     to[i] = new char[n];
     strcpy(to[i],from[i]);
   }
 }
 
 /* ----------------------------------------------------------------------
    recursive evaluation of a string str
    str is an equal-style or atom-style or vector-style formula
      containing one or more items:
      number = 0.0, -5.45, 2.8e-4, ...
      constant = PI, version, yes, no, on, off
      thermo keyword = ke, vol, atoms, ...
      math operation = (),-x,x+y,x-y,x*y,x/y,x^y,
                       x==y,x!=y,x<y,x<=y,x>y,x>=y,x&&y,x||y,
                       sqrt(x),exp(x),ln(x),log(x),abs(x),
                       sin(x),cos(x),tan(x),asin(x),atan2(y,x),...
      group function = count(group), mass(group), xcm(group,x), ...
      special function = sum(x),min(x), ...
      atom value = x[i], y[i], vx[i], ...
      atom vector = x, y, vx, ...
      compute = c_ID, c_ID[i], c_ID[i][j]
      fix = f_ID, f_ID[i], f_ID[i][j]
      variable = v_name, v_name[i]
    equal-style variables passes in tree = NULL:
      evaluate the formula, return result as a double
    atom-style and vector-style variables pass in tree = non-NULL:
      parse the formula but do not evaluate it
      create a parse tree and return it
 ------------------------------------------------------------------------- */
 
 double Variable::evaluate(char *str, Tree **tree)
 {
   int op,opprevious;
   double value1,value2;
   char onechar;
   char *ptr;
 
   double argstack[MAXLEVEL];
   Tree *treestack[MAXLEVEL];
   int opstack[MAXLEVEL];
   int nargstack = 0;
   int ntreestack = 0;
   int nopstack = 0;
 
   int i = 0;
   int expect = ARG;
 
   while (1) {
     onechar = str[i];
 
     // whitespace: just skip
 
     if (isspace(onechar)) i++;
 
     // ----------------
     // parentheses: recursively evaluate contents of parens
     // ----------------
 
     else if (onechar == '(') {
       if (expect == OP) error->all(FLERR,"Invalid syntax in variable formula");
       expect = OP;
 
       char *contents;
       i = find_matching_paren(str,i,contents);
       i++;
 
       // evaluate contents and push on stack
 
       if (tree) {
         Tree *newtree;
         evaluate(contents,&newtree);
         treestack[ntreestack++] = newtree;
       } else argstack[nargstack++] = evaluate(contents,NULL);
 
       delete [] contents;
 
     // ----------------
     // number: push value onto stack
     // ----------------
 
     } else if (isdigit(onechar) || onechar == '.') {
       if (expect == OP) error->all(FLERR,"Invalid syntax in variable formula");
       expect = OP;
 
       // istop = end of number, including scientific notation
 
       int istart = i;
       while (isdigit(str[i]) || str[i] == '.') i++;
       if (str[i] == 'e' || str[i] == 'E') {
         i++;
         if (str[i] == '+' || str[i] == '-') i++;
         while (isdigit(str[i])) i++;
       }
       int istop = i - 1;
 
       int n = istop - istart + 1;
       char *number = new char[n+1];
       strncpy(number,&str[istart],n);
       number[n] = '\0';
 
       if (tree) {
         Tree *newtree = new Tree();
         newtree->type = VALUE;
         newtree->value = atof(number);
         newtree->first = newtree->second = NULL;
         newtree->extra = 0;
         treestack[ntreestack++] = newtree;
       } else argstack[nargstack++] = atof(number);
 
       delete [] number;
 
     // ----------------
     // letter: c_ID, c_ID[], c_ID[][], f_ID, f_ID[], f_ID[][],
     //         v_name, v_name[], exp(), xcm(,), x, x[], PI, vol
     // ----------------
 
     } else if (isalpha(onechar)) {
       if (expect == OP) error->all(FLERR,"Invalid syntax in variable formula");
       expect = OP;
 
       // istop = end of word
       // word = all alphanumeric or underscore
 
       int istart = i;
       while (isalnum(str[i]) || str[i] == '_') i++;
       int istop = i-1;
 
       int n = istop - istart + 1;
       char *word = new char[n+1];
       strncpy(word,&str[istart],n);
       word[n] = '\0';
 
       // ----------------
       // compute
       // ----------------
 
       if (strncmp(word,"c_",2) == 0 || strncmp(word,"C_",2) == 0) {
         if (domain->box_exist == 0)
           error->all(FLERR,
                      "Variable evaluation before simulation box is defined");
 
 	// uppercase used to force access of 
 	// global vector vs global scalar, and global array vs global vector
 
 	int lowercase = 1;
 	if (word[0] == 'C') lowercase = 0;
 
         int icompute = modify->find_compute(word+2);
         if (icompute < 0)
           error->all(FLERR,"Invalid compute ID in variable formula");
         Compute *compute = modify->compute[icompute];
 
         // parse zero or one or two trailing brackets
         // point i beyond last bracket
         // nbracket = # of bracket pairs
         // index1,index2 = int inside each bracket pair, possibly an atom ID
 
         int nbracket;
         tagint index1,index2;
         if (str[i] != '[') nbracket = 0;
         else {
           nbracket = 1;
           ptr = &str[i];
           index1 = int_between_brackets(ptr,1);
           i = ptr-str+1;
           if (str[i] == '[') {
             nbracket = 2;
             ptr = &str[i];
             index2 = int_between_brackets(ptr,1);
             i = ptr-str+1;
           }
         }
 
         // c_ID = scalar from global scalar, must be lowercase
 
         if (nbracket == 0 && compute->scalar_flag && lowercase) {
 
           if (update->whichflag == 0) {
             if (compute->invoked_scalar != update->ntimestep)
               error->all(FLERR,"Compute used in variable between runs "
                          "is not current");
           } else if (!(compute->invoked_flag & INVOKED_SCALAR)) {
             compute->compute_scalar();
             compute->invoked_flag |= INVOKED_SCALAR;
           }
 
           value1 = compute->scalar;
           if (tree) {
             Tree *newtree = new Tree();
             newtree->type = VALUE;
             newtree->value = value1;
             newtree->first = newtree->second = NULL;
             newtree->nextra = 0;
             treestack[ntreestack++] = newtree;
           } else argstack[nargstack++] = value1;
 
         // c_ID[i] = scalar from global vector, must be lowercase
 
         } else if (nbracket == 1 && compute->vector_flag && lowercase) {
 
           if (index1 > compute->size_vector &&
               compute->size_vector_variable == 0)
             error->all(FLERR,"Variable formula compute vector "
                        "is accessed out-of-range");
           if (update->whichflag == 0) {
             if (compute->invoked_vector != update->ntimestep)
               error->all(FLERR,"Compute used in variable between runs "
                          "is not current");
           } else if (!(compute->invoked_flag & INVOKED_VECTOR)) {
             compute->compute_vector();
             compute->invoked_flag |= INVOKED_VECTOR;
           }
 
           if (compute->size_vector_variable &&
               index1 > compute->size_vector) value1 = 0.0;
           else value1 = compute->vector[index1-1];
           if (tree) {
             Tree *newtree = new Tree();
             newtree->type = VALUE;
             newtree->value = value1;
             newtree->first = newtree->second = NULL;
             newtree->nextra = 0;
             treestack[ntreestack++] = newtree;
           } else argstack[nargstack++] = value1;
 
         // c_ID[i][j] = scalar from global array, must be lowercase
 
         } else if (nbracket == 2 && compute->array_flag && lowercase) {
 
           if (index1 > compute->size_array_rows &&
               compute->size_array_rows_variable == 0)
             error->all(FLERR,"Variable formula compute array "
                        "is accessed out-of-range");
           if (index2 > compute->size_array_cols)
             error->all(FLERR,"Variable formula compute array "
                        "is accessed out-of-range");
           if (update->whichflag == 0) {
             if (compute->invoked_array != update->ntimestep)
               error->all(FLERR,"Compute used in variable between runs "
                          "is not current");
           } else if (!(compute->invoked_flag & INVOKED_ARRAY)) {
             compute->compute_array();
             compute->invoked_flag |= INVOKED_ARRAY;
           }
 
           if (compute->size_array_rows_variable &&
               index1 > compute->size_array_rows) value1 = 0.0;
           else value1 = compute->array[index1-1][index2-1];
           if (tree) {
             Tree *newtree = new Tree();
             newtree->type = VALUE;
             newtree->value = value1;
             newtree->first = newtree->second = NULL;
             newtree->nextra = 0;
             treestack[ntreestack++] = newtree;
           } else argstack[nargstack++] = value1;
 
         // c_ID = vector from global vector, lowercase or uppercase
 	
         } else if (nbracket == 0 && compute->vector_flag) {
 
           if (tree == NULL)
             error->all(FLERR,
                        "Compute global vector in equal-style variable formula");
           if (treetype == ATOM)
             error->all(FLERR,
                        "Compute global vector in atom-style variable formula");
 	  if (compute->size_vector == 0) 
             error->all(FLERR,"Variable formula compute vector is zero length");
           if (update->whichflag == 0) {
             if (compute->invoked_vector != update->ntimestep)
               error->all(FLERR,"Compute used in variable between runs "
                          "is not current");
           } else if (!(compute->invoked_flag & INVOKED_VECTOR)) {
             compute->compute_vector();
             compute->invoked_flag |= INVOKED_VECTOR;
           }
 
           Tree *newtree = new Tree();
           newtree->type = VECTORARRAY;
           newtree->array = compute->vector;
 	  newtree->nvector = compute->size_vector;
           newtree->nstride = 1;
           newtree->selfalloc = 0;
           newtree->first = newtree->second = NULL;
           newtree->nextra = 0;
           treestack[ntreestack++] = newtree;
 
         // c_ID[i] = vector from global array, lowercase or uppercase
 	
         } else if (nbracket == 1 && compute->array_flag) {
 
           if (tree == NULL)
             error->all(FLERR,
                        "Compute global vector in equal-style variable formula");
           if (treetype == ATOM)
             error->all(FLERR,
                        "Compute global vector in atom-style variable formula");
 	  if (compute->size_array_rows == 0) 
             error->all(FLERR,"Variable formula compute array is zero length");
           if (update->whichflag == 0) {
             if (compute->invoked_array != update->ntimestep)
               error->all(FLERR,"Compute used in variable between runs "
                          "is not current");
           } else if (!(compute->invoked_flag & INVOKED_ARRAY)) {
             compute->compute_array();
             compute->invoked_flag |= INVOKED_ARRAY;
           }
 
           Tree *newtree = new Tree();
           newtree->type = VECTORARRAY;
           newtree->array = &compute->array[0][index1-1];
 	  newtree->nvector = compute->size_array_rows;
           newtree->nstride = compute->size_array_cols;
           newtree->selfalloc = 0;
           newtree->first = newtree->second = NULL;
           newtree->nextra = 0;
           treestack[ntreestack++] = newtree;
 
         // c_ID[i] = scalar from per-atom vector
 
         } else if (nbracket == 1 && compute->peratom_flag &&
                    compute->size_peratom_cols == 0) {
 
           if (update->whichflag == 0) {
             if (compute->invoked_peratom != update->ntimestep)
               error->all(FLERR,"Compute used in variable between runs "
                          "is not current");
           } else if (!(compute->invoked_flag & INVOKED_PERATOM)) {
             compute->compute_peratom();
             compute->invoked_flag |= INVOKED_PERATOM;
           }
 
           peratom2global(1,NULL,compute->vector_atom,1,index1,
                          tree,treestack,ntreestack,argstack,nargstack);
 
         // c_ID[i][j] = scalar from per-atom array
 
         } else if (nbracket == 2 && compute->peratom_flag &&
                    compute->size_peratom_cols > 0) {
 
           if (index2 > compute->size_peratom_cols)
             error->all(FLERR,"Variable formula compute array "
                        "is accessed out-of-range");
           if (update->whichflag == 0) {
             if (compute->invoked_peratom != update->ntimestep)
               error->all(FLERR,"Compute used in variable between runs "
                          "is not current");
           } else if (!(compute->invoked_flag & INVOKED_PERATOM)) {
             compute->compute_peratom();
             compute->invoked_flag |= INVOKED_PERATOM;
           }
 
           if (compute->array_atom)
             peratom2global(1,NULL,&compute->array_atom[0][index2-1],
                            compute->size_peratom_cols,index1,
                            tree,treestack,ntreestack,argstack,nargstack);
           else
             peratom2global(1,NULL,NULL,
                            compute->size_peratom_cols,index1,
                            tree,treestack,ntreestack,argstack,nargstack);
 
         // c_ID = vector from per-atom vector
 
         } else if (nbracket == 0 && compute->peratom_flag &&
                    compute->size_peratom_cols == 0) {
 
           if (tree == NULL)
             error->all(FLERR,
                        "Per-atom compute in equal-style variable formula");
           if (treetype == VECTOR)
             error->all(FLERR,
                        "Per-atom compute in vector-style variable formula");
           if (update->whichflag == 0) {
             if (compute->invoked_peratom != update->ntimestep)
               error->all(FLERR,"Compute used in variable between runs "
                          "is not current");
           } else if (!(compute->invoked_flag & INVOKED_PERATOM)) {
             compute->compute_peratom();
             compute->invoked_flag |= INVOKED_PERATOM;
           }
 
           Tree *newtree = new Tree();
           newtree->type = ATOMARRAY;
           newtree->array = compute->vector_atom;
           newtree->nstride = 1;
           newtree->selfalloc = 0;
           newtree->first = newtree->second = NULL;
           newtree->nextra = 0;
           treestack[ntreestack++] = newtree;
 
         // c_ID[i] = vector from per-atom array
 
         } else if (nbracket == 1 && compute->peratom_flag &&
                    compute->size_peratom_cols > 0) {
 
           if (tree == NULL)
             error->all(FLERR,
                        "Per-atom compute in equal-style variable formula");
           if (treetype == VECTOR)
             error->all(FLERR,
                        "Per-atom compute in vector-style variable formula");
           if (index1 > compute->size_peratom_cols)
             error->all(FLERR,"Variable formula compute array "
                        "is accessed out-of-range");
           if (update->whichflag == 0) {
             if (compute->invoked_peratom != update->ntimestep)
               error->all(FLERR,"Compute used in variable between runs "
                          "is not current");
           } else if (!(compute->invoked_flag & INVOKED_PERATOM)) {
             compute->compute_peratom();
             compute->invoked_flag |= INVOKED_PERATOM;
           }
 
           Tree *newtree = new Tree();
           newtree->type = ATOMARRAY;
           if (compute->array_atom)
             newtree->array = &compute->array_atom[0][index1-1];
           else
             newtree->array = NULL;
           newtree->nstride = compute->size_peratom_cols;
           newtree->selfalloc = 0;
           newtree->first = newtree->second = NULL;
           newtree->nextra = 0;
           treestack[ntreestack++] = newtree;
 
         } else error->all(FLERR,"Mismatched compute in variable formula");
 
       // ----------------
       // fix
       // ----------------
 
       } else if (strncmp(word,"f_",2) == 0 || strncmp(word,"F_",2) == 0) {
         if (domain->box_exist == 0)
           error->all(FLERR,
                      "Variable evaluation before simulation box is defined");
 
 	// uppercase used to force access of 
 	// global vector vs global scalar, and global array vs global vector
 
 	int lowercase = 1;
 	if (word[0] == 'F') lowercase = 0;
 
         int ifix = modify->find_fix(word+2);
         if (ifix < 0) error->all(FLERR,"Invalid fix ID in variable formula");
         Fix *fix = modify->fix[ifix];
 
         // parse zero or one or two trailing brackets
         // point i beyond last bracket
         // nbracket = # of bracket pairs
         // index1,index2 = int inside each bracket pair, possibly an atom ID
 
         int nbracket;
         tagint index1,index2;
         if (str[i] != '[') nbracket = 0;
         else {
           nbracket = 1;
           ptr = &str[i];
           index1 = int_between_brackets(ptr,1);
           i = ptr-str+1;
           if (str[i] == '[') {
             nbracket = 2;
             ptr = &str[i];
             index2 = int_between_brackets(ptr,1);
             i = ptr-str+1;
           }
         }
 
         // f_ID = scalar from global scalar, must be lowercase
 
         if (nbracket == 0 && fix->scalar_flag && lowercase) {
 
           if (update->whichflag > 0 && update->ntimestep % fix->global_freq)
             error->all(FLERR,"Fix in variable not computed at compatible time");
 
           value1 = fix->compute_scalar();
           if (tree) {
             Tree *newtree = new Tree();
             newtree->type = VALUE;
             newtree->value = value1;
             newtree->first = newtree->second = NULL;
             newtree->nextra = 0;
             treestack[ntreestack++] = newtree;
           } else argstack[nargstack++] = value1;
 
         // f_ID[i] = scalar from global vector, must be lowercase
 
         } else if (nbracket == 1 && fix->vector_flag && lowercase) {
 
           if (index1 > fix->size_vector &&
               fix->size_vector_variable == 0)
             error->all(FLERR,"Variable formula fix vector is "
                        "accessed out-of-range");
           if (update->whichflag > 0 && update->ntimestep % fix->global_freq)
             error->all(FLERR,"Fix in variable not computed at compatible time");
 
           value1 = fix->compute_vector(index1-1);
           if (tree) {
             Tree *newtree = new Tree();
             newtree->type = VALUE;
             newtree->value = value1;
             newtree->first = newtree->second = NULL;
             newtree->nextra = 0;
             treestack[ntreestack++] = newtree;
           } else argstack[nargstack++] = value1;
 
         // f_ID[i][j] = scalar from global array, must be lowercase
 
         } else if (nbracket == 2 && fix->array_flag && lowercase) {
 
           if (index1 > fix->size_array_rows &&
               fix->size_array_rows_variable == 0)
             error->all(FLERR,
                        "Variable formula fix array is accessed out-of-range");
           if (index2 > fix->size_array_cols)
             error->all(FLERR,
                        "Variable formula fix array is accessed out-of-range");
           if (update->whichflag > 0 && update->ntimestep % fix->global_freq)
             error->all(FLERR,"Fix in variable not computed at compatible time");
 
           value1 = fix->compute_array(index1-1,index2-1);
           if (tree) {
             Tree *newtree = new Tree();
             newtree->type = VALUE;
             newtree->value = value1;
             newtree->first = newtree->second = NULL;
             newtree->nextra = 0;
             treestack[ntreestack++] = newtree;
           } else argstack[nargstack++] = value1;
 
         // f_ID = vector from global vector, lowercase or uppercase
 
         } else if (nbracket == 0 && fix->vector_flag) {
 
           if (update->whichflag > 0 && update->ntimestep % fix->global_freq)
             error->all(FLERR,"Fix in variable not computed at compatible time");
           if (tree == NULL)
             error->all(FLERR,"Fix global vector in "
                        "equal-style variable formula");
           if (treetype == ATOM)
             error->all(FLERR,"Fix global vector in "
                        "atom-style variable formula");
 	  if (fix->size_vector == 0) 
             error->all(FLERR,"Variable formula fix vector is zero length");
 
 	  int nvec = fix->size_vector;
 	  double *vec;
 	  memory->create(vec,nvec,"variable:values");
 	  for (int m = 0; m < nvec; m++)
 	    vec[m] = fix->compute_vector(m);
 
           Tree *newtree = new Tree();
           newtree->type = VECTORARRAY;
           newtree->array = vec;
 	  newtree->nvector = nvec;
           newtree->nstride = 1;
           newtree->selfalloc = 1;
           newtree->first = newtree->second = NULL;
           newtree->nextra = 0;
           treestack[ntreestack++] = newtree;
 
         // f_ID[i] = vector from global array, lowercase or uppercase
 
         } else if (nbracket == 1 && fix->array_flag) {
 
           if (update->whichflag > 0 && update->ntimestep % fix->global_freq)
             error->all(FLERR,"Fix in variable not computed at compatible time");
           if (tree == NULL)
             error->all(FLERR,"Fix global vector in "
                        "equal-style variable formula");
           if (treetype == ATOM)
             error->all(FLERR,"Fix global vector in "
                        "atom-style variable formula");
 	  if (fix->size_array_rows == 0) 
             error->all(FLERR,"Variable formula fix array is zero length");
 
 	  int nvec = fix->size_array_rows;
 	  double *vec;
 	  memory->create(vec,nvec,"variable:values");
 	  for (int m = 0; m < nvec; m++)
 	    vec[m] = fix->compute_array(m,index1-1);
 
           Tree *newtree = new Tree();
           newtree->type = VECTORARRAY;
           newtree->array = vec;
 	  newtree->nvector = nvec;
           newtree->nstride = 1;
           newtree->selfalloc = 1;
           newtree->first = newtree->second = NULL;
           newtree->nextra = 0;
           treestack[ntreestack++] = newtree;
 
         // f_ID[i] = scalar from per-atom vector
 
         } else if (nbracket == 1 && fix->peratom_flag &&
                    fix->size_peratom_cols == 0) {
 
           if (update->whichflag > 0 &&
               update->ntimestep % fix->peratom_freq)
             error->all(FLERR,
                        "Fix in variable not computed at compatible time");
 
           peratom2global(1,NULL,fix->vector_atom,1,index1,
                          tree,treestack,ntreestack,argstack,nargstack);
 
         // f_ID[i][j] = scalar from per-atom array
 
         } else if (nbracket == 2 && fix->peratom_flag &&
                    fix->size_peratom_cols > 0) {
 
           if (index2 > fix->size_peratom_cols)
             error->all(FLERR,
                        "Variable formula fix array is accessed out-of-range");
           if (update->whichflag > 0 &&
               update->ntimestep % fix->peratom_freq)
             error->all(FLERR,"Fix in variable not computed at compatible time");
 
           if (fix->array_atom)
             peratom2global(1,NULL,&fix->array_atom[0][index2-1],
                            fix->size_peratom_cols,index1,
                            tree,treestack,ntreestack,argstack,nargstack);
           else
             peratom2global(1,NULL,NULL,
                            fix->size_peratom_cols,index1,
                            tree,treestack,ntreestack,argstack,nargstack);
 
         // f_ID = vector from per-atom vector
 
         } else if (nbracket == 0 && fix->peratom_flag &&
                    fix->size_peratom_cols == 0) {
 
           if (tree == NULL)
             error->all(FLERR,"Per-atom fix in equal-style variable formula");
           if (update->whichflag > 0 &&
               update->ntimestep % fix->peratom_freq)
             error->all(FLERR,"Fix in variable not computed at compatible time");
 
           Tree *newtree = new Tree();
           newtree->type = ATOMARRAY;
           newtree->array = fix->vector_atom;
           newtree->nstride = 1;
           newtree->selfalloc = 0;
           newtree->first = newtree->second = NULL;
           newtree->nextra = 0;
           treestack[ntreestack++] = newtree;
 
         // f_ID[i] = vector from per-atom array
 
         } else if (nbracket == 1 && fix->peratom_flag &&
                    fix->size_peratom_cols > 0) {
 
           if (tree == NULL)
             error->all(FLERR,"Per-atom fix in equal-style variable formula");
           if (index1 > fix->size_peratom_cols)
             error->all(FLERR,
                        "Variable formula fix array is accessed out-of-range");
           if (update->whichflag > 0 &&
               update->ntimestep % fix->peratom_freq)
             error->all(FLERR,"Fix in variable not computed at compatible time");
 
           Tree *newtree = new Tree();
           newtree->type = ATOMARRAY;
           if (fix->array_atom)
             newtree->array = &fix->array_atom[0][index1-1];
           else
             newtree->array = NULL;
           newtree->nstride = fix->size_peratom_cols;
           newtree->selfalloc = 0;
           newtree->first = newtree->second = NULL;
           newtree->nextra = 0;
           treestack[ntreestack++] = newtree;
 
         } else error->all(FLERR,"Mismatched fix in variable formula");
 
       // ----------------
       // variable
       // ----------------
 
       } else if (strncmp(word,"v_",2) == 0) {
 
         int ivar = find(word+2);
         if (ivar < 0)
           error->all(FLERR,"Invalid variable name in variable formula");
         if (eval_in_progress[ivar])
           error->all(FLERR,"Variable has circular dependency");
 
         // parse zero or one trailing brackets
         // point i beyond last bracket
         // nbracket = # of bracket pairs
         // index = int inside bracket, possibly an atom ID
 
         int nbracket;
         tagint index;
         if (str[i] != '[') nbracket = 0;
         else {
           nbracket = 1;
           ptr = &str[i];
           index = int_between_brackets(ptr,1);
           i = ptr-str+1;
         }
 
         // v_name = scalar from internal-style variable
         // access value directly
 
         if (nbracket == 0 && style[ivar] == INTERNAL) {
 
           value1 = dvalue[ivar];
           if (tree) {
             Tree *newtree = new Tree();
             newtree->type = VALUE;
             newtree->value = value1;
             newtree->first = newtree->second = NULL;
             newtree->nextra = 0;
             treestack[ntreestack++] = newtree;
           } else argstack[nargstack++] = value1;
 
         // v_name = scalar from non atom/atomfile & non vector-style variable
         // access value via retrieve()
 
         } else if (nbracket == 0 && style[ivar] != ATOM && 
                    style[ivar] != ATOMFILE && style[ivar] != VECTOR) {
 
           char *var = retrieve(word+2);
           if (var == NULL)
             error->all(FLERR,"Invalid variable evaluation in variable formula");
           if (tree) {
             Tree *newtree = new Tree();
             newtree->type = VALUE;
             newtree->value = atof(var);
             newtree->first = newtree->second = NULL;
             newtree->nextra = 0;
             treestack[ntreestack++] = newtree;
           } else argstack[nargstack++] = atof(var);
 
         // v_name = per-atom vector from atom-style variable
         // evaluate the atom-style variable as newtree
 
         } else if (nbracket == 0 && style[ivar] == ATOM) {
 
           if (tree == NULL)
             error->all(FLERR,
                        "Atom-style variable in equal-style variable formula");
 	  if (treetype == VECTOR)
             error->all(FLERR,
                        "Atom-style variable in vector-style variable formula");
 
           Tree *newtree;
           evaluate(data[ivar][0],&newtree);
           treestack[ntreestack++] = newtree;
 
         // v_name = per-atom vector from atomfile-style variable
 
         } else if (nbracket == 0 && style[ivar] == ATOMFILE) {
 
           if (tree == NULL)
             error->all(FLERR,"Atomfile-style variable in "
                        "equal-style variable formula");
 	  if (treetype == VECTOR)
             error->all(FLERR,"Atomfile-style variable in "
                        "vector-style variable formula");
 
           Tree *newtree = new Tree();
           newtree->type = ATOMARRAY;
           newtree->array = reader[ivar]->fixstore->vstore;
           newtree->nstride = 1;
           newtree->selfalloc = 0;
           newtree->first = newtree->second = NULL;
           newtree->nextra = 0;
           treestack[ntreestack++] = newtree;
 
         // v_name = vector from vector-style variable
         // evaluate the vector-style variable, put result in newtree
 
 	} else if (nbracket == 0 && style[ivar] == VECTOR) {
 
           if (tree == NULL)
             error->all(FLERR,
                        "Vector-style variable in equal-style variable formula");
           if (treetype == ATOM)
             error->all(FLERR,
                        "Vector-style variable in atom-style variable formula");
 
 	  double *vec;
 	  int nvec = compute_vector(ivar,&vec);
 
 	  Tree *newtree = new Tree();
 	  newtree->type = VECTORARRAY;
 	  newtree->array = vec;
 	  newtree->nvector = nvec;
 	  newtree->nstride = 1;
 	  newtree->selfalloc = 0;
 	  newtree->first = newtree->second = NULL;
 	  newtree->nextra = 0;
 	  treestack[ntreestack++] = newtree;
 
         // v_name[N] = scalar from atom-style variable
         // compute the per-atom variable in result
         // use peratom2global to extract single value from result
 
         } else if (nbracket && style[ivar] == ATOM) {
 
           double *result;
           memory->create(result,atom->nlocal,"variable:result");
           compute_atom(ivar,0,result,1,0);
           peratom2global(1,NULL,result,1,index,
                          tree,treestack,ntreestack,argstack,nargstack);
           memory->destroy(result);
 
         // v_name[N] = scalar from atomfile-style variable
 
         } else if (nbracket && style[ivar] == ATOMFILE) {
 
           peratom2global(1,NULL,reader[ivar]->fixstore->vstore,1,index,
                          tree,treestack,ntreestack,argstack,nargstack);
 
         // v_name[N] = scalar from vector-style variable
         // compute the vector-style variable, extract single value
 
         } else if (nbracket && style[ivar] == VECTOR) {
 
 	  double *vec;
 	  int nvec = compute_vector(ivar,&vec);
 	  if (index <= 0 || index > nvec)
 	    error->all(FLERR,"Invalid index into vector-style variable");
 	  int m = index;   // convert from tagint to int
 
           if (tree) {
             Tree *newtree = new Tree();
             newtree->type = VALUE;
             newtree->value = vec[m-1];
             newtree->first = newtree->second = NULL;
             newtree->nextra = 0;
             treestack[ntreestack++] = newtree;
           } else argstack[nargstack++] = vec[m-1];
 
         } else error->all(FLERR,"Mismatched variable in variable formula");
 
       // ----------------
       // math/group/special function or atom value/vector or
       // constant or thermo keyword
       // ----------------
 
       } else {
 
         // ----------------
         // math or group or special function
         // ----------------
 
         if (str[i] == '(') {
           char *contents;
           i = find_matching_paren(str,i,contents);
           i++;
 
           if (math_function(word,contents,tree,
                             treestack,ntreestack,argstack,nargstack));
           else if (group_function(word,contents,tree,
                                   treestack,ntreestack,argstack,nargstack));
           else if (special_function(word,contents,tree,
                                     treestack,ntreestack,argstack,nargstack));
           else error->all(FLERR,"Invalid math/group/special function "
                           "in variable formula");
           delete [] contents;
 
         // ----------------
         // atom value
         // ----------------
 
         } else if (str[i] == '[') {
           if (domain->box_exist == 0)
             error->all(FLERR,
                        "Variable evaluation before simulation box is defined");
 
           ptr = &str[i];
           tagint id = int_between_brackets(ptr,1);
           i = ptr-str+1;
 
           peratom2global(0,word,NULL,0,id,
                          tree,treestack,ntreestack,argstack,nargstack);
 
         // ----------------
         // atom vector
         // ----------------
 
         } else if (is_atom_vector(word)) {
           if (domain->box_exist == 0)
             error->all(FLERR,
                        "Variable evaluation before simulation box is defined");
 
           atom_vector(word,tree,treestack,ntreestack);
 
         // ----------------
         // constant
         // ----------------
 
         } else if (is_constant(word)) {
           value1 = constant(word);
           if (tree) {
             Tree *newtree = new Tree();
             newtree->type = VALUE;
             newtree->value = value1;
             newtree->first = newtree->second = NULL;
             newtree->nextra = 0;
             treestack[ntreestack++] = newtree;
           } else argstack[nargstack++] = value1;
 
         // ----------------
         // thermo keyword
         // ----------------
 
         } else {
           if (domain->box_exist == 0)
             error->all(FLERR,
                        "Variable evaluation before simulation box is defined");
 
           int flag = output->thermo->evaluate_keyword(word,&value1);
           if (flag)
             error->all(FLERR,"Invalid thermo keyword in variable formula");
           if (tree) {
             Tree *newtree = new Tree();
             newtree->type = VALUE;
             newtree->value = value1;
             newtree->first = newtree->second = NULL;
             newtree->nextra = 0;
             treestack[ntreestack++] = newtree;
           } else argstack[nargstack++] = value1;
         }
       }
 
       delete [] word;
 
     // ----------------
     // math operator, including end-of-string
     // ----------------
 
     } else if (strchr("+-*/^<>=!&|%\0",onechar)) {
       if (onechar == '+') op = ADD;
       else if (onechar == '-') op = SUBTRACT;
       else if (onechar == '*') op = MULTIPLY;
       else if (onechar == '/') op = DIVIDE;
       else if (onechar == '%') op = MODULO;
       else if (onechar == '^') op = CARAT;
       else if (onechar == '=') {
         if (str[i+1] != '=')
           error->all(FLERR,"Invalid syntax in variable formula");
         op = EQ;
         i++;
       } else if (onechar == '!') {
         if (str[i+1] == '=') {
           op = NE;
           i++;
         } else op = NOT;
       } else if (onechar == '<') {
         if (str[i+1] != '=') op = LT;
         else {
           op = LE;
           i++;
         }
       } else if (onechar == '>') {
         if (str[i+1] != '=') op = GT;
         else {
           op = GE;
           i++;
         }
       } else if (onechar == '&') {
         if (str[i+1] != '&')
           error->all(FLERR,"Invalid syntax in variable formula");
         op = AND;
         i++;
       } else if (onechar == '|') {
         if (str[i+1] == '|') op = OR;
         else if (str[i+1] == '^') op = XOR;
         else error->all(FLERR,"Invalid syntax in variable formula");
         i++;
       } else op = DONE;
 
       i++;
 
       if (op == SUBTRACT && expect == ARG) {
         opstack[nopstack++] = UNARY;
         continue;
       }
       if (op == NOT && expect == ARG) {
         opstack[nopstack++] = op;
         continue;
       }
 
       if (expect == ARG) error->all(FLERR,"Invalid syntax in variable formula");
       expect = ARG;
 
       // evaluate stack as deep as possible while respecting precedence
       // before pushing current op onto stack
 
       while (nopstack && precedence[opstack[nopstack-1]] >= precedence[op]) {
         opprevious = opstack[--nopstack];
 
         if (tree) {
           Tree *newtree = new Tree();
           newtree->type = opprevious;
           if (opprevious == UNARY) {
             newtree->first = treestack[--ntreestack];
             newtree->second = NULL;
             newtree->nextra = 0;
           } else {
             newtree->second = treestack[--ntreestack];
             newtree->first = treestack[--ntreestack];
             newtree->nextra = 0;
           }
           treestack[ntreestack++] = newtree;
 
         } else {
           value2 = argstack[--nargstack];
           if (opprevious != UNARY && opprevious != NOT)
             value1 = argstack[--nargstack];
 
           if (opprevious == ADD)
             argstack[nargstack++] = value1 + value2;
           else if (opprevious == SUBTRACT)
             argstack[nargstack++] = value1 - value2;
           else if (opprevious == MULTIPLY)
             argstack[nargstack++] = value1 * value2;
           else if (opprevious == DIVIDE) {
             if (value2 == 0.0)
               error->all(FLERR,"Divide by 0 in variable formula");
             argstack[nargstack++] = value1 / value2;
           } else if (opprevious == MODULO) {
             if (value2 == 0.0)
               error->all(FLERR,"Modulo 0 in variable formula");
             argstack[nargstack++] = fmod(value1,value2);
           } else if (opprevious == CARAT) {
             if (value2 == 0.0)
-              error->all(FLERR,"Power by 0 in variable formula");
-            argstack[nargstack++] = pow(value1,value2);
+              argstack[nargstack++] = 1.0;
+            else if ((value1 == 0.0) && (value2 < 0.0))
+              error->all(FLERR,"Invalid power expression in variable formula");
+            else argstack[nargstack++] = pow(value1,value2);
           } else if (opprevious == UNARY) {
             argstack[nargstack++] = -value2;
           } else if (opprevious == NOT) {
             if (value2 == 0.0) argstack[nargstack++] = 1.0;
             else argstack[nargstack++] = 0.0;
           } else if (opprevious == EQ) {
             if (value1 == value2) argstack[nargstack++] = 1.0;
             else argstack[nargstack++] = 0.0;
           } else if (opprevious == NE) {
             if (value1 != value2) argstack[nargstack++] = 1.0;
             else argstack[nargstack++] = 0.0;
           } else if (opprevious == LT) {
             if (value1 < value2) argstack[nargstack++] = 1.0;
             else argstack[nargstack++] = 0.0;
           } else if (opprevious == LE) {
             if (value1 <= value2) argstack[nargstack++] = 1.0;
             else argstack[nargstack++] = 0.0;
           } else if (opprevious == GT) {
             if (value1 > value2) argstack[nargstack++] = 1.0;
             else argstack[nargstack++] = 0.0;
           } else if (opprevious == GE) {
             if (value1 >= value2) argstack[nargstack++] = 1.0;
             else argstack[nargstack++] = 0.0;
           } else if (opprevious == AND) {
             if (value1 != 0.0 && value2 != 0.0) argstack[nargstack++] = 1.0;
             else argstack[nargstack++] = 0.0;
           } else if (opprevious == OR) {
             if (value1 != 0.0 || value2 != 0.0) argstack[nargstack++] = 1.0;
             else argstack[nargstack++] = 0.0;
           } else if (opprevious == XOR) {
             if ((value1 == 0.0 && value2 != 0.0) || 
                 (value1 != 0.0 && value2 == 0.0)) argstack[nargstack++] = 1.0;
             else argstack[nargstack++] = 0.0;
           }
         }
       }
 
       // if end-of-string, break out of entire formula evaluation loop
 
       if (op == DONE) break;
 
       // push current operation onto stack
 
       opstack[nopstack++] = op;
 
     } else error->all(FLERR,"Invalid syntax in variable formula");
   }
 
   if (nopstack) error->all(FLERR,"Invalid syntax in variable formula");
 
   // for atom-style variable, return remaining tree
   // for equal-style variable, return remaining arg
 
   if (tree) {
     if (ntreestack != 1) error->all(FLERR,"Invalid syntax in variable formula");
     *tree = treestack[0];
     return 0.0;
   } else {
     if (nargstack != 1) error->all(FLERR,"Invalid syntax in variable formula");
     return argstack[0];
   }
 }
 
 /* ----------------------------------------------------------------------
    one-time collapse of an atom-style variable parse tree
    tree was created by one-time parsing of formula string via evaluate()
    only keep tree nodes that depend on
      ATOMARRAY, TYPEARRAY, INTARRAY, BIGINTARRAY, VECTOR
    remainder is converted to single VALUE
    this enables optimal eval_tree loop over atoms
    customize by adding a function:
      sqrt(),exp(),ln(),log(),abs(),sin(),cos(),tan(),asin(),acos(),atan(),
      atan2(y,x),random(x,y,z),normal(x,y,z),ceil(),floor(),round(),
      ramp(x,y),stagger(x,y),logfreq(x,y,z),logfreq2(x,y,z),
      stride(x,y,z),vdisplace(x,y),swiggle(x,y,z),cwiggle(x,y,z),
      gmask(x),rmask(x),grmask(x,y)
 ---------------------------------------------------------------------- */
 
 double Variable::collapse_tree(Tree *tree)
 {
   double arg1,arg2;
 
   if (tree->type == VALUE) return tree->value;
   if (tree->type == ATOMARRAY) return 0.0;
   if (tree->type == TYPEARRAY) return 0.0;
   if (tree->type == INTARRAY) return 0.0;
   if (tree->type == BIGINTARRAY) return 0.0;
   if (tree->type == VECTORARRAY) return 0.0;
 
   if (tree->type == ADD) {
     arg1 = collapse_tree(tree->first);
     arg2 = collapse_tree(tree->second);
     if (tree->first->type != VALUE || tree->second->type != VALUE) return 0.0;
     tree->type = VALUE;
     tree->value = arg1 + arg2;
     return tree->value;
   }
 
   if (tree->type == SUBTRACT) {
     arg1 = collapse_tree(tree->first);
     arg2 = collapse_tree(tree->second);
     if (tree->first->type != VALUE || tree->second->type != VALUE) return 0.0;
     tree->type = VALUE;
     tree->value = arg1 - arg2;
     return tree->value;
   }
 
   if (tree->type == MULTIPLY) {
     arg1 = collapse_tree(tree->first);
     arg2 = collapse_tree(tree->second);
     if (tree->first->type != VALUE || tree->second->type != VALUE) return 0.0;
     tree->type = VALUE;
     tree->value = arg1 * arg2;
     return tree->value;
   }
 
   if (tree->type == DIVIDE) {
     arg1 = collapse_tree(tree->first);
     arg2 = collapse_tree(tree->second);
     if (tree->first->type != VALUE || tree->second->type != VALUE) return 0.0;
     tree->type = VALUE;
     if (arg2 == 0.0) error->one(FLERR,"Divide by 0 in variable formula");
     tree->value = arg1 / arg2;
     return tree->value;
   }
 
   if (tree->type == MODULO) {
     arg1 = collapse_tree(tree->first);
     arg2 = collapse_tree(tree->second);
     if (tree->first->type != VALUE || tree->second->type != VALUE) return 0.0;
     tree->type = VALUE;
     if (arg2 == 0.0) error->one(FLERR,"Modulo 0 in variable formula");
     tree->value = fmod(arg1,arg2);
     return tree->value;
   }
 
   if (tree->type == CARAT) {
     arg1 = collapse_tree(tree->first);
     arg2 = collapse_tree(tree->second);
     if (tree->first->type != VALUE || tree->second->type != VALUE) return 0.0;
     tree->type = VALUE;
     if (arg2 == 0.0) error->one(FLERR,"Power by 0 in variable formula");
     tree->value = pow(arg1,arg2);
     return tree->value;
   }
 
   if (tree->type == UNARY) {
     arg1 = collapse_tree(tree->first);
     if (tree->first->type != VALUE) return 0.0;
     tree->type = VALUE;
     tree->value = -arg1;
     return tree->value;
   }
 
   if (tree->type == NOT) {
     arg1 = collapse_tree(tree->first);
     if (tree->first->type != VALUE) return 0.0;
     tree->type = VALUE;
     if (arg1 == 0.0) tree->value = 1.0;
     else tree->value = 0.0;
     return tree->value;
   }
 
   if (tree->type == EQ) {
     arg1 = collapse_tree(tree->first);
     arg2 = collapse_tree(tree->second);
     if (tree->first->type != VALUE || tree->second->type != VALUE) return 0.0;
     tree->type = VALUE;
     if (arg1 == arg2) tree->value = 1.0;
     else tree->value = 0.0;
     return tree->value;
   }
 
   if (tree->type == NE) {
     arg1 = collapse_tree(tree->first);
     arg2 = collapse_tree(tree->second);
     if (tree->first->type != VALUE || tree->second->type != VALUE) return 0.0;
     tree->type = VALUE;
     if (arg1 != arg2) tree->value = 1.0;
     else tree->value = 0.0;
     return tree->value;
   }
 
   if (tree->type == LT) {
     arg1 = collapse_tree(tree->first);
     arg2 = collapse_tree(tree->second);
     if (tree->first->type != VALUE || tree->second->type != VALUE) return 0.0;
     tree->type = VALUE;
     if (arg1 < arg2) tree->value = 1.0;
     else tree->value = 0.0;
     return tree->value;
   }
 
   if (tree->type == LE) {
     arg1 = collapse_tree(tree->first);
     arg2 = collapse_tree(tree->second);
     if (tree->first->type != VALUE || tree->second->type != VALUE) return 0.0;
     tree->type = VALUE;
     if (arg1 <= arg2) tree->value = 1.0;
     else tree->value = 0.0;
     return tree->value;
   }
 
   if (tree->type == GT) {
     arg1 = collapse_tree(tree->first);
     arg2 = collapse_tree(tree->second);
     if (tree->first->type != VALUE || tree->second->type != VALUE) return 0.0;
     tree->type = VALUE;
     if (arg1 > arg2) tree->value = 1.0;
     else tree->value = 0.0;
     return tree->value;
   }
 
   if (tree->type == GE) {
     arg1 = collapse_tree(tree->first);
     arg2 = collapse_tree(tree->second);
     if (tree->first->type != VALUE || tree->second->type != VALUE) return 0.0;
     tree->type = VALUE;
     if (arg1 >= arg2) tree->value = 1.0;
     else tree->value = 0.0;
     return tree->value;
   }
 
   if (tree->type == AND) {
     arg1 = collapse_tree(tree->first);
     arg2 = collapse_tree(tree->second);
     if (tree->first->type != VALUE || tree->second->type != VALUE) return 0.0;
     tree->type = VALUE;
     if (arg1 != 0.0 && arg2 != 0.0) tree->value = 1.0;
     else tree->value = 0.0;
     return tree->value;
   }
 
   if (tree->type == OR) {
     arg1 = collapse_tree(tree->first);
     arg2 = collapse_tree(tree->second);
     if (tree->first->type != VALUE || tree->second->type != VALUE) return 0.0;
     tree->type = VALUE;
     if (arg1 != 0.0 || arg2 != 0.0) tree->value = 1.0;
     else tree->value = 0.0;
     return tree->value;
   }
 
   if (tree->type == XOR) {
     arg1 = collapse_tree(tree->first);
     arg2 = collapse_tree(tree->second);
     if (tree->first->type != VALUE || tree->second->type != VALUE) return 0.0;
     tree->type = VALUE;
     if ((arg1 == 0.0 && arg2 != 0.0) || (arg1 != 0.0 && arg2 == 0.0))
       tree->value = 1.0;
     else tree->value = 0.0;
     return tree->value;
   }
 
   if (tree->type == SQRT) {
     arg1 = collapse_tree(tree->first);
     if (tree->first->type != VALUE) return 0.0;
     tree->type = VALUE;
     if (arg1 < 0.0)
       error->one(FLERR,"Sqrt of negative value in variable formula");
     tree->value = sqrt(arg1);
     return tree->value;
   }
 
   if (tree->type == EXP) {
     arg1 = collapse_tree(tree->first);
     if (tree->first->type != VALUE) return 0.0;
     tree->type = VALUE;
     tree->value = exp(arg1);
     return tree->value;
   }
 
   if (tree->type == LN) {
     arg1 = collapse_tree(tree->first);
     if (tree->first->type != VALUE) return 0.0;
     tree->type = VALUE;
     if (arg1 <= 0.0)
       error->one(FLERR,"Log of zero/negative value in variable formula");
     tree->value = log(arg1);
     return tree->value;
   }
 
   if (tree->type == LOG) {
     arg1 = collapse_tree(tree->first);
     if (tree->first->type != VALUE) return 0.0;
     tree->type = VALUE;
     if (arg1 <= 0.0)
       error->one(FLERR,"Log of zero/negative value in variable formula");
     tree->value = log10(arg1);
     return tree->value;
   }
 
   if (tree->type == ABS) {
     arg1 = collapse_tree(tree->first);
     if (tree->first->type != VALUE) return 0.0;
     tree->type = VALUE;
     tree->value = fabs(arg1);
     return tree->value;
   }
 
   if (tree->type == SIN) {
     arg1 = collapse_tree(tree->first);
     if (tree->first->type != VALUE) return 0.0;
     tree->type = VALUE;
     tree->value = sin(arg1);
     return tree->value;
   }
 
   if (tree->type == COS) {
     arg1 = collapse_tree(tree->first);
     if (tree->first->type != VALUE) return 0.0;
     tree->type = VALUE;
     tree->value = cos(arg1);
     return tree->value;
   }
 
   if (tree->type == TAN) {
     arg1 = collapse_tree(tree->first);
     if (tree->first->type != VALUE) return 0.0;
     tree->type = VALUE;
     tree->value = tan(arg1);
     return tree->value;
   }
 
   if (tree->type == ASIN) {
     arg1 = collapse_tree(tree->first);
     if (tree->first->type != VALUE) return 0.0;
     tree->type = VALUE;
     if (arg1 < -1.0 || arg1 > 1.0)
       error->one(FLERR,"Arcsin of invalid value in variable formula");
     tree->value = asin(arg1);
     return tree->value;
   }
 
   if (tree->type == ACOS) {
     arg1 = collapse_tree(tree->first);
     if (tree->first->type != VALUE) return 0.0;
     tree->type = VALUE;
     if (arg1 < -1.0 || arg1 > 1.0)
       error->one(FLERR,"Arccos of invalid value in variable formula");
     tree->value = acos(arg1);
     return tree->value;
   }
 
   if (tree->type == ATAN) {
     arg1 = collapse_tree(tree->first);
     if (tree->first->type != VALUE) return 0.0;
     tree->type = VALUE;
     tree->value = atan(arg1);
     return tree->value;
   }
 
   if (tree->type == ATAN2) {
     arg1 = collapse_tree(tree->first);
     arg2 = collapse_tree(tree->second);
     if (tree->first->type != VALUE || tree->second->type != VALUE) return 0.0;
     tree->type = VALUE;
     tree->value = atan2(arg1,arg2);
     return tree->value;
   }
 
   // random() or normal() do not become a single collapsed value
 
   if (tree->type == RANDOM) {
     collapse_tree(tree->first);
     collapse_tree(tree->second);
     if (randomatom == NULL) {
       int seed = static_cast<int> (collapse_tree(tree->extra[0]));
       if (seed <= 0)
         error->one(FLERR,"Invalid math function in variable formula");
       randomatom = new RanMars(lmp,seed+me);
     }
     return 0.0;
   }
 
   if (tree->type == NORMAL) {
     collapse_tree(tree->first);
     double sigma = collapse_tree(tree->second);
     if (sigma < 0.0)
       error->one(FLERR,"Invalid math function in variable formula");
     if (randomatom == NULL) {
       int seed = static_cast<int> (collapse_tree(tree->extra[0]));
       if (seed <= 0)
         error->one(FLERR,"Invalid math function in variable formula");
       randomatom = new RanMars(lmp,seed+me);
     }
     return 0.0;
   }
 
   if (tree->type == CEIL) {
     arg1 = collapse_tree(tree->first);
     if (tree->first->type != VALUE) return 0.0;
     tree->type = VALUE;
     tree->value = ceil(arg1);
     return tree->value;
   }
 
   if (tree->type == FLOOR) {
     arg1 = collapse_tree(tree->first);
     if (tree->first->type != VALUE) return 0.0;
     tree->type = VALUE;
     tree->value = floor(arg1);
     return tree->value;
   }
 
   if (tree->type == ROUND) {
     arg1 = collapse_tree(tree->first);
     if (tree->first->type != VALUE) return 0.0;
     tree->type = VALUE;
     tree->value = MYROUND(arg1);
     return tree->value;
   }
 
   if (tree->type == RAMP) {
     arg1 = collapse_tree(tree->first);
     arg2 = collapse_tree(tree->second);
     if (tree->first->type != VALUE || tree->second->type != VALUE) return 0.0;
     tree->type = VALUE;
     double delta = update->ntimestep - update->beginstep;
     if (delta != 0.0) delta /= update->endstep - update->beginstep;
     tree->value = arg1 + delta*(arg2-arg1);
     return tree->value;
   }
 
   if (tree->type == STAGGER) {
     int ivalue1 = static_cast<int> (collapse_tree(tree->first));
     int ivalue2 = static_cast<int> (collapse_tree(tree->second));
     if (tree->first->type != VALUE || tree->second->type != VALUE) return 0.0;
     tree->type = VALUE;
     if (ivalue1 <= 0 || ivalue2 <= 0 || ivalue1 <= ivalue2)
       error->one(FLERR,"Invalid math function in variable formula");
     int lower = update->ntimestep/ivalue1 * ivalue1;
     int delta = update->ntimestep - lower;
     if (delta < ivalue2) tree->value = lower+ivalue2;
     else tree->value = lower+ivalue1;
     return tree->value;
   }
 
   if (tree->type == LOGFREQ) {
     int ivalue1 = static_cast<int> (collapse_tree(tree->first));
     int ivalue2 = static_cast<int> (collapse_tree(tree->second));
     int ivalue3 = static_cast<int> (collapse_tree(tree->extra[0]));
     if (tree->first->type != VALUE || tree->second->type != VALUE ||
         tree->extra[0]->type != VALUE) return 0.0;
     tree->type = VALUE;
     if (ivalue1 <= 0 || ivalue2 <= 0 || ivalue3 <= 0 || ivalue2 >= ivalue3)
       error->one(FLERR,"Invalid math function in variable formula");
     if (update->ntimestep < ivalue1) tree->value = ivalue1;
     else {
       int lower = ivalue1;
       while (update->ntimestep >= ivalue3*lower) lower *= ivalue3;
       int multiple = update->ntimestep/lower;
       if (multiple < ivalue2) tree->value = (multiple+1)*lower;
       else tree->value = lower*ivalue3;
     }
     return tree->value;
   }
 
   if (tree->type == LOGFREQ2) {
     int ivalue1 = static_cast<int> (collapse_tree(tree->first));
     int ivalue2 = static_cast<int> (collapse_tree(tree->second));
     int ivalue3 = static_cast<int> (collapse_tree(tree->extra[0]));
     if (tree->first->type != VALUE || tree->second->type != VALUE ||
         tree->extra[0]->type != VALUE) return 0.0;
     tree->type = VALUE;
     if (ivalue1 <= 0 || ivalue2 <= 0 || ivalue3 <= 0 )
       error->all(FLERR,"Invalid math function in variable formula");
     if (update->ntimestep < ivalue1) tree->value = ivalue1;
     else {
       tree->value = ivalue1;
       double delta = ivalue1*(ivalue3-1.0)/ivalue2;
       int count = 0;
       while (update->ntimestep >= tree->value) {
 	tree->value += delta;
 	count++;
 	if (count % ivalue2 == 0) delta *= ivalue3;
       }
     }
     tree->value = ceil(tree->value);
     return tree->value;
   }
 
   if (tree->type == STRIDE) {
     int ivalue1 = static_cast<int> (collapse_tree(tree->first));
     int ivalue2 = static_cast<int> (collapse_tree(tree->second));
     int ivalue3 = static_cast<int> (collapse_tree(tree->extra[0]));
     if (tree->first->type != VALUE || tree->second->type != VALUE ||
         tree->extra[0]->type != VALUE) return 0.0;
     tree->type = VALUE;
     if (ivalue1 < 0 || ivalue2 < 0 || ivalue3 <= 0 || ivalue1 > ivalue2)
       error->one(FLERR,"Invalid math function in variable formula");
     if (update->ntimestep < ivalue1) tree->value = ivalue1;
     else if (update->ntimestep < ivalue2) {
       int offset = update->ntimestep - ivalue1;
       tree->value = ivalue1 + (offset/ivalue3)*ivalue3 + ivalue3;
       if (tree->value > ivalue2) tree->value = MAXBIGINT;
     } else tree->value = MAXBIGINT;
     return tree->value;
   }
 
   if (tree->type == STRIDE2) {
     int ivalue1 = static_cast<int> (collapse_tree(tree->first));
     int ivalue2 = static_cast<int> (collapse_tree(tree->second));
     int ivalue3 = static_cast<int> (collapse_tree(tree->extra[0]));
     int ivalue4 = static_cast<int> (collapse_tree(tree->extra[1]));
     int ivalue5 = static_cast<int> (collapse_tree(tree->extra[2]));
     int ivalue6 = static_cast<int> (collapse_tree(tree->extra[3]));
     if (tree->first->type != VALUE || tree->second->type != VALUE ||
         tree->extra[0]->type != VALUE || tree->extra[1]->type != VALUE ||
 	tree->extra[2]->type != VALUE || tree->extra[3]->type != VALUE)
       return 0.0;
     tree->type = VALUE;
     if (ivalue1 < 0 || ivalue2 < 0 || ivalue3 <= 0 || ivalue1 > ivalue2)
       error->one(FLERR,"Invalid math function in variable formula");
     if (ivalue4 < 0 || ivalue5 < 0 || ivalue6 <= 0 || ivalue4 > ivalue5)
       error->one(FLERR,"Invalid math function in variable formula");
     if (ivalue4 < ivalue1 || ivalue5 > ivalue2)
       error->one(FLERR,"Invalid math function in variable formula");
     bigint istep;
     if (update->ntimestep < ivalue1) istep = ivalue1;
     else if (update->ntimestep < ivalue2) {
       if (update->ntimestep < ivalue4 || update->ntimestep > ivalue5) {
         int offset = update->ntimestep - ivalue1;
         istep = ivalue1 + (offset/ivalue3)*ivalue3 + ivalue3;
         if (update->ntimestep < ivalue2 && istep > ivalue4)
           tree->value = ivalue4;
       } else {
         int offset = update->ntimestep - ivalue4;
         istep = ivalue4 + (offset/ivalue6)*ivalue6 + ivalue6;
         if (istep > ivalue5) {
           int offset = ivalue5 - ivalue1;
           istep = ivalue1 + (offset/ivalue3)*ivalue3 + ivalue3;
           if (istep > ivalue2) istep = MAXBIGINT;
         }
       }
     } else istep = MAXBIGINT;
     tree->value = istep;
     return tree->value;
   }
 
   if (tree->type == VDISPLACE) {
     double arg1 = collapse_tree(tree->first);
     double arg2 = collapse_tree(tree->second);
     if (tree->first->type != VALUE || tree->second->type != VALUE) return 0.0;
     tree->type = VALUE;
     double delta = update->ntimestep - update->beginstep;
     tree->value = arg1 + arg2*delta*update->dt;
     return tree->value;
   }
 
   if (tree->type == SWIGGLE) {
     double arg1 = collapse_tree(tree->first);
     double arg2 = collapse_tree(tree->second);
     double arg3 = collapse_tree(tree->extra[0]);
     if (tree->first->type != VALUE || tree->second->type != VALUE ||
         tree->extra[0]->type != VALUE) return 0.0;
     tree->type = VALUE;
     if (arg3 == 0.0)
       error->one(FLERR,"Invalid math function in variable formula");
     double delta = update->ntimestep - update->beginstep;
     double omega = 2.0*MY_PI/arg3;
     tree->value = arg1 + arg2*sin(omega*delta*update->dt);
     return tree->value;
   }
 
   if (tree->type == CWIGGLE) {
     double arg1 = collapse_tree(tree->first);
     double arg2 = collapse_tree(tree->second);
     double arg3 = collapse_tree(tree->extra[0]);
     if (tree->first->type != VALUE || tree->second->type != VALUE ||
         tree->extra[0]->type != VALUE) return 0.0;
     tree->type = VALUE;
     if (arg3 == 0.0)
       error->one(FLERR,"Invalid math function in variable formula");
     double delta = update->ntimestep - update->beginstep;
     double omega = 2.0*MY_PI/arg3;
     tree->value = arg1 + arg2*(1.0-cos(omega*delta*update->dt));
     return tree->value;
   }
 
   // mask functions do not become a single collapsed value
 
   if (tree->type == GMASK) return 0.0;
   if (tree->type == RMASK) return 0.0;
   if (tree->type == GRMASK) return 0.0;
 
   return 0.0;
 }
 
 /* ----------------------------------------------------------------------
    evaluate an atom-style or vector-style variable parse tree
    index I = atom I or vector index I
    tree was created by one-time parsing of formula string via evaluate()
    customize by adding a function:
      sqrt(),exp(),ln(),log(),sin(),cos(),tan(),asin(),acos(),atan(),
      atan2(y,x),random(x,y,z),normal(x,y,z),ceil(),floor(),round(),
      ramp(x,y),stagger(x,y),logfreq(x,y,z),logfreq2(x,y,z),
      stride(x,y,z),stride2(x,y,z),vdisplace(x,y),swiggle(x,y,z),
      cwiggle(x,y,z),gmask(x),rmask(x),grmask(x,y)
 ---------------------------------------------------------------------- */
 
 double Variable::eval_tree(Tree *tree, int i)
 {
   double arg,arg1,arg2,arg3;
 
   if (tree->type == VALUE) return tree->value;
   if (tree->type == ATOMARRAY) return tree->array[i*tree->nstride];
   if (tree->type == TYPEARRAY) return tree->array[atom->type[i]];
   if (tree->type == INTARRAY) return (double) tree->iarray[i*tree->nstride];
   if (tree->type == BIGINTARRAY) return (double) tree->barray[i*tree->nstride];
   if (tree->type == VECTORARRAY) return tree->array[i*tree->nstride];
 
   if (tree->type == ADD)
     return eval_tree(tree->first,i) + eval_tree(tree->second,i);
   if (tree->type == SUBTRACT)
     return eval_tree(tree->first,i) - eval_tree(tree->second,i);
   if (tree->type == MULTIPLY)
     return eval_tree(tree->first,i) * eval_tree(tree->second,i);
   if (tree->type == DIVIDE) {
     double denom = eval_tree(tree->second,i);
     if (denom == 0.0) error->one(FLERR,"Divide by 0 in variable formula");
     return eval_tree(tree->first,i) / denom;
   }
   if (tree->type == MODULO) {
     double denom = eval_tree(tree->second,i);
     if (denom == 0.0) error->one(FLERR,"Modulo 0 in variable formula");
     return fmod(eval_tree(tree->first,i),denom);
   }
   if (tree->type == CARAT) {
     double exponent = eval_tree(tree->second,i);
     if (exponent == 0.0) error->one(FLERR,"Power by 0 in variable formula");
     return pow(eval_tree(tree->first,i),exponent);
   }
   if (tree->type == UNARY) return -eval_tree(tree->first,i);
 
   if (tree->type == NOT) {
     if (eval_tree(tree->first,i) == 0.0) return 1.0;
     else return 0.0;
   }
   if (tree->type == EQ) {
     if (eval_tree(tree->first,i) == eval_tree(tree->second,i)) return 1.0;
     else return 0.0;
   }
   if (tree->type == NE) {
     if (eval_tree(tree->first,i) != eval_tree(tree->second,i)) return 1.0;
     else return 0.0;
   }
   if (tree->type == LT) {
     if (eval_tree(tree->first,i) < eval_tree(tree->second,i)) return 1.0;
     else return 0.0;
   }
   if (tree->type == LE) {
     if (eval_tree(tree->first,i) <= eval_tree(tree->second,i)) return 1.0;
     else return 0.0;
   }
   if (tree->type == GT) {
     if (eval_tree(tree->first,i) > eval_tree(tree->second,i)) return 1.0;
     else return 0.0;
   }
   if (tree->type == GE) {
     if (eval_tree(tree->first,i) >= eval_tree(tree->second,i)) return 1.0;
     else return 0.0;
   }
   if (tree->type == AND) {
     if (eval_tree(tree->first,i) != 0.0 && eval_tree(tree->second,i) != 0.0)
       return 1.0;
     else return 0.0;
   }
   if (tree->type == OR) {
     if (eval_tree(tree->first,i) != 0.0 || eval_tree(tree->second,i) != 0.0)
       return 1.0;
     else return 0.0;
   }
   if (tree->type == XOR) {
     if ((eval_tree(tree->first,i) == 0.0 && eval_tree(tree->second,i) != 0.0) 
         ||
         (eval_tree(tree->first,i) != 0.0 && eval_tree(tree->second,i) == 0.0))
       return 1.0;
     else return 0.0;
   }
 
   if (tree->type == SQRT) {
     arg1 = eval_tree(tree->first,i);
     if (arg1 < 0.0)
       error->one(FLERR,"Sqrt of negative value in variable formula");
     return sqrt(arg1);
   }
   if (tree->type == EXP)
     return exp(eval_tree(tree->first,i));
   if (tree->type == LN) {
     arg1 = eval_tree(tree->first,i);
     if (arg1 <= 0.0)
       error->one(FLERR,"Log of zero/negative value in variable formula");
     return log(arg1);
   }
   if (tree->type == LOG) {
     arg1 = eval_tree(tree->first,i);
     if (arg1 <= 0.0)
       error->one(FLERR,"Log of zero/negative value in variable formula");
     return log10(arg1);
   }
   if (tree->type == ABS)
     return fabs(eval_tree(tree->first,i));
 
   if (tree->type == SIN)
     return sin(eval_tree(tree->first,i));
   if (tree->type == COS)
     return cos(eval_tree(tree->first,i));
   if (tree->type == TAN)
     return tan(eval_tree(tree->first,i));
 
   if (tree->type == ASIN) {
     arg1 = eval_tree(tree->first,i);
     if (arg1 < -1.0 || arg1 > 1.0)
       error->one(FLERR,"Arcsin of invalid value in variable formula");
     return asin(arg1);
   }
   if (tree->type == ACOS) {
     arg1 = eval_tree(tree->first,i);
     if (arg1 < -1.0 || arg1 > 1.0)
       error->one(FLERR,"Arccos of invalid value in variable formula");
     return acos(arg1);
   }
   if (tree->type == ATAN)
     return atan(eval_tree(tree->first,i));
   if (tree->type == ATAN2)
     return atan2(eval_tree(tree->first,i),eval_tree(tree->second,i));
 
   if (tree->type == RANDOM) {
     double lower = eval_tree(tree->first,i);
     double upper = eval_tree(tree->second,i);
     if (randomatom == NULL) {
       int seed = static_cast<int> (eval_tree(tree->extra[0],i));
       if (seed <= 0)
         error->one(FLERR,"Invalid math function in variable formula");
       randomatom = new RanMars(lmp,seed+me);
     }
     return randomatom->uniform()*(upper-lower)+lower;
   }
   if (tree->type == NORMAL) {
     double mu = eval_tree(tree->first,i);
     double sigma = eval_tree(tree->second,i);
     if (sigma < 0.0)
       error->one(FLERR,"Invalid math function in variable formula");
     if (randomatom == NULL) {
       int seed = static_cast<int> (eval_tree(tree->extra[0],i));
       if (seed <= 0)
         error->one(FLERR,"Invalid math function in variable formula");
       randomatom = new RanMars(lmp,seed+me);
     }
     return mu + sigma*randomatom->gaussian();
   }
 
   if (tree->type == CEIL)
     return ceil(eval_tree(tree->first,i));
   if (tree->type == FLOOR)
     return floor(eval_tree(tree->first,i));
   if (tree->type == ROUND)
     return MYROUND(eval_tree(tree->first,i));
 
   if (tree->type == RAMP) {
     arg1 = eval_tree(tree->first,i);
     arg2 = eval_tree(tree->second,i);
     double delta = update->ntimestep - update->beginstep;
     if (delta != 0.0) delta /= update->endstep - update->beginstep;
     arg = arg1 + delta*(arg2-arg1);
     return arg;
   }
 
   if (tree->type == STAGGER) {
     int ivalue1 = static_cast<int> (eval_tree(tree->first,i));
     int ivalue2 = static_cast<int> (eval_tree(tree->second,i));
     if (ivalue1 <= 0 || ivalue2 <= 0 || ivalue1 <= ivalue2)
       error->one(FLERR,"Invalid math function in variable formula");
     int lower = update->ntimestep/ivalue1 * ivalue1;
     int delta = update->ntimestep - lower;
     if (delta < ivalue2) arg = lower+ivalue2;
     else arg = lower+ivalue1;
     return arg;
   }
 
   if (tree->type == LOGFREQ) {
     int ivalue1 = static_cast<int> (eval_tree(tree->first,i));
     int ivalue2 = static_cast<int> (eval_tree(tree->second,i));
     int ivalue3 = static_cast<int> (eval_tree(tree->extra[0],i));
     if (ivalue1 <= 0 || ivalue2 <= 0 || ivalue3 <= 0 || ivalue2 >= ivalue3)
       error->one(FLERR,"Invalid math function in variable formula");
     if (update->ntimestep < ivalue1) arg = ivalue1;
     else {
       int lower = ivalue1;
       while (update->ntimestep >= ivalue3*lower) lower *= ivalue3;
       int multiple = update->ntimestep/lower;
       if (multiple < ivalue2) arg = (multiple+1)*lower;
       else arg = lower*ivalue3;
     }
     return arg;
   }
 
   if (tree->type == LOGFREQ2) {
     int ivalue1 = static_cast<int> (eval_tree(tree->first,i));
     int ivalue2 = static_cast<int> (eval_tree(tree->second,i));
     int ivalue3 = static_cast<int> (eval_tree(tree->extra[0],i));
     if (ivalue1 <= 0 || ivalue2 <= 0 || ivalue3 <= 0 )
       error->all(FLERR,"Invalid math function in variable formula");
     if (update->ntimestep < ivalue1) arg = ivalue1;
     else {
       arg = ivalue1;
       double delta = ivalue1*(ivalue3-1.0)/ivalue2;
       int count = 0;
       while (update->ntimestep >= arg) {
 	arg += delta;
 	count++;
 	if (count % ivalue2 == 0) delta *= ivalue3;
       }
     }
     arg = ceil(arg);
     return arg;
   }
 
   if (tree->type == STRIDE) {
     int ivalue1 = static_cast<int> (eval_tree(tree->first,i));
     int ivalue2 = static_cast<int> (eval_tree(tree->second,i));
     int ivalue3 = static_cast<int> (eval_tree(tree->extra[0],i));
     if (ivalue1 < 0 || ivalue2 < 0 || ivalue3 <= 0 || ivalue1 > ivalue2)
       error->one(FLERR,"Invalid math function in variable formula");
     if (update->ntimestep < ivalue1) arg = ivalue1;
     else if (update->ntimestep < ivalue2) {
       int offset = update->ntimestep - ivalue1;
       arg = ivalue1 + (offset/ivalue3)*ivalue3 + ivalue3;
       if (arg > ivalue2) arg = MAXBIGINT;
     } else arg = MAXBIGINT;
     return arg;
   }
 
   if (tree->type == STRIDE2) {
     int ivalue1 = static_cast<int> (eval_tree(tree->first,i));
     int ivalue2 = static_cast<int> (eval_tree(tree->second,i));
     int ivalue3 = static_cast<int> (eval_tree(tree->extra[0],i));
     int ivalue4 = static_cast<int> (eval_tree(tree->extra[1],i));
     int ivalue5 = static_cast<int> (eval_tree(tree->extra[2],i));
     int ivalue6 = static_cast<int> (eval_tree(tree->extra[3],i));
     if (ivalue1 < 0 || ivalue2 < 0 || ivalue3 <= 0 || ivalue1 > ivalue2)
       error->one(FLERR,"Invalid math function in variable formula");
     if (ivalue4 < 0 || ivalue5 < 0 || ivalue6 <= 0 || ivalue4 > ivalue5)
       error->one(FLERR,"Invalid math function in variable formula");
     if (ivalue4 < ivalue1 || ivalue5 > ivalue2)
       error->one(FLERR,"Invalid math function in variable formula");
     bigint istep;
     if (update->ntimestep < ivalue1) istep = ivalue1;
     else if (update->ntimestep < ivalue2) {
       if (update->ntimestep < ivalue4 || update->ntimestep > ivalue5) {
         int offset = update->ntimestep - ivalue1;
         istep = ivalue1 + (offset/ivalue3)*ivalue3 + ivalue3;
         if (update->ntimestep < ivalue2 && istep > ivalue4)
           tree->value = ivalue4;
       } else {
         int offset = update->ntimestep - ivalue4;
         istep = ivalue4 + (offset/ivalue6)*ivalue6 + ivalue6;
         if (istep > ivalue5) {
           int offset = ivalue5 - ivalue1;
           istep = ivalue1 + (offset/ivalue3)*ivalue3 + ivalue3;
           if (istep > ivalue2) istep = MAXBIGINT;
         }
       }
     } else istep = MAXBIGINT;
     arg = istep;
     return arg;
   }
 
   if (tree->type == VDISPLACE) {
     arg1 = eval_tree(tree->first,i);
     arg2 = eval_tree(tree->second,i);
     double delta = update->ntimestep - update->beginstep;
     arg = arg1 + arg2*delta*update->dt;
     return arg;
   }
 
   if (tree->type == SWIGGLE) {
     arg1 = eval_tree(tree->first,i);
     arg2 = eval_tree(tree->second,i);
     arg3 = eval_tree(tree->extra[0],i);
     if (arg3 == 0.0)
       error->one(FLERR,"Invalid math function in variable formula");
     double delta = update->ntimestep - update->beginstep;
     double omega = 2.0*MY_PI/arg3;
     arg = arg1 + arg2*sin(omega*delta*update->dt);
     return arg;
   }
 
   if (tree->type == CWIGGLE) {
     arg1 = eval_tree(tree->first,i);
     arg2 = eval_tree(tree->second,i);
     arg3 = eval_tree(tree->extra[0],i);
     if (arg3 == 0.0)
       error->one(FLERR,"Invalid math function in variable formula");
     double delta = update->ntimestep - update->beginstep;
     double omega = 2.0*MY_PI/arg3;
     arg = arg1 + arg2*(1.0-cos(omega*delta*update->dt));
     return arg;
   }
 
   if (tree->type == GMASK) {
     if (atom->mask[i] & tree->ivalue1) return 1.0;
     else return 0.0;
   }
 
   if (tree->type == RMASK) {
     if (domain->regions[tree->ivalue1]->match(atom->x[i][0],
                                               atom->x[i][1],
                                               atom->x[i][2])) return 1.0;
     else return 0.0;
   }
 
   if (tree->type == GRMASK) {
     if ((atom->mask[i] & tree->ivalue1) &&
         (domain->regions[tree->ivalue2]->match(atom->x[i][0],
                                                atom->x[i][1],
                                                atom->x[i][2]))) return 1.0;
     else return 0.0;
   }
 
   return 0.0;
 }
 
 /* ----------------------------------------------------------------------
    scan entire tree, find size of vectors for vector-style variable
    return N for consistent vector size
    return 0 for no vector size, caller flags as error
    return -1 for inconsistent vector size, caller flags as error
 ------------------------------------------------------------------------- */
 
 int Variable::size_tree_vector(Tree *tree)
 {
   int nsize = 0;
   if (tree->type == VECTORARRAY) nsize = tree->nvector;
   if (tree->first) nsize = compare_tree_vector(nsize,
                                                size_tree_vector(tree->first));
   if (tree->second) nsize = compare_tree_vector(nsize,
                                                 size_tree_vector(tree->second));
   if (tree->nextra) {
     for (int i = 0; i < tree->nextra; i++) 
       nsize = compare_tree_vector(nsize,size_tree_vector(tree->extra[i]));
   }
   return nsize;
 }
 
 /* ----------------------------------------------------------------------
    compare size of two vectors for vector-style variable
    return positive size if same or one has no size 0
    return -1 error if one is already error or not same positive size
 ------------------------------------------------------------------------- */
 
 int Variable::compare_tree_vector(int i, int j)
 {
   if (i < 0 || j < 0) return -1;
   if (i == 0 || j == 0) return MAX(i,j);
   if (i != j) return -1;
   return i;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void Variable::free_tree(Tree *tree)
 {
   if (tree->first) free_tree(tree->first);
   if (tree->second) free_tree(tree->second);
   if (tree->nextra) {
     for (int i = 0; i < tree->nextra; i++) free_tree(tree->extra[i]);
     delete [] tree->extra;
   }
 
   if (tree->selfalloc) memory->destroy(tree->array);
   delete tree;
 }
 
 /* ----------------------------------------------------------------------
    find matching parenthesis in str, allocate contents = str between parens
    i = left paren
    return loc or right paren
 ------------------------------------------------------------------------- */
 
 int Variable::find_matching_paren(char *str, int i,char *&contents)
 {
   // istop = matching ')' at same level, allowing for nested parens
 
   int istart = i;
   int ilevel = 0;
   while (1) {
     i++;
     if (!str[i]) break;
     if (str[i] == '(') ilevel++;
     else if (str[i] == ')' && ilevel) ilevel--;
     else if (str[i] == ')') break;
   }
   if (!str[i]) error->all(FLERR,"Invalid syntax in variable formula");
   int istop = i;
 
   int n = istop - istart - 1;
   contents = new char[n+1];
   strncpy(contents,&str[istart+1],n);
   contents[n] = '\0';
 
   return istop;
 }
 
 /* ----------------------------------------------------------------------
    find int between brackets and return it
    return a tagint, since value can be an atom ID
    ptr initially points to left bracket
    return it pointing to right bracket
    error if no right bracket or brackets are empty or index = 0
    if varallow = 0: error if any between-bracket chars are non-digits
    if varallow = 1: also allow for v_name, where name is variable name
 ------------------------------------------------------------------------- */
 
 tagint Variable::int_between_brackets(char *&ptr, int varallow)
 {
   int varflag;
   tagint index;
 
   char *start = ++ptr;
 
   if (varallow && strstr(ptr,"v_") == ptr) {
     varflag = 1;
     while (*ptr && *ptr != ']') {
       if (!isalnum(*ptr) && *ptr != '_')
         error->all(FLERR,"Variable name between brackets must be "
                    "alphanumeric or underscore characters");
       ptr++;
     }
 
   } else {
     varflag = 0;
     while (*ptr && *ptr != ']') {
       if (!isdigit(*ptr))
         error->all(FLERR,"Non digit character between brackets in variable");
       ptr++;
     }
   }
 
   if (*ptr != ']') error->all(FLERR,"Mismatched brackets in variable");
   if (ptr == start) error->all(FLERR,"Empty brackets in variable");
 
   *ptr = '\0';
 
   // evaluate index as floating point variable or as tagint via ATOTAGINT()
 
   if (varflag) {
     char *id = start+2;
     int ivar = find(id);
     if (ivar < 0)
       error->all(FLERR,"Invalid variable name in variable formula");
 
     char *var = retrieve(id);
     if (var == NULL)
       error->all(FLERR,"Invalid variable evaluation in variable formula");
     index = static_cast<tagint> (atof(var));
 
   } else index = ATOTAGINT(start);
 
   *ptr = ']';
 
   if (index == 0)
     error->all(FLERR,"Index between variable brackets must be positive");
   return index;
 }
 
 /* ----------------------------------------------------------------------
    process a math function in formula
    push result onto tree or arg stack
    word = math function
    contents = str between parentheses with comma-separated args
    return 0 if not a match, 1 if successfully processed
    customize by adding a math function:
      sqrt(),exp(),ln(),log(),abs(),sin(),cos(),tan(),asin(),acos(),atan(),
      atan2(y,x),random(x,y,z),normal(x,y,z),ceil(),floor(),round(),
      ramp(x,y),stagger(x,y),logfreq(x,y,z),logfreq2(x,y,z),
      stride(x,y,z),stride2(x,y,z,a,b,c),vdisplace(x,y),swiggle(x,y,z),
      cwiggle(x,y,z)
 ------------------------------------------------------------------------- */
 
 int Variable::math_function(char *word, char *contents, Tree **tree,
                             Tree **treestack, int &ntreestack,
                             double *argstack, int &nargstack)
 {
   // word not a match to any math function
 
   if (strcmp(word,"sqrt") && strcmp(word,"exp") &&
       strcmp(word,"ln") && strcmp(word,"log") &&
       strcmp(word,"abs") &&
       strcmp(word,"sin") && strcmp(word,"cos") &&
       strcmp(word,"tan") && strcmp(word,"asin") &&
       strcmp(word,"acos") && strcmp(word,"atan") &&
       strcmp(word,"atan2") && strcmp(word,"random") &&
       strcmp(word,"normal") && strcmp(word,"ceil") &&
       strcmp(word,"floor") && strcmp(word,"round") &&
       strcmp(word,"ramp") && strcmp(word,"stagger") &&
       strcmp(word,"logfreq") && strcmp(word,"logfreq2") &&
       strcmp(word,"stride") && strcmp(word,"stride2") &&
       strcmp(word,"vdisplace") && strcmp(word,"swiggle") &&
       strcmp(word,"cwiggle"))
     return 0;
 
   // parse contents for comma-separated args
   // narg = number of args, args = strings between commas
 
   char *args[MAXFUNCARG];
   int narg = parse_args(contents,args);
 
   Tree *newtree;
   double value1,value2;
   double values[MAXFUNCARG-2];
 
   if (tree) {
     newtree = new Tree();
     newtree->first = newtree->second = NULL;
     newtree->nextra = 0;
     Tree *argtree;
     evaluate(args[0],&argtree);
     newtree->first = argtree;
     if (narg > 1) {
       evaluate(args[1],&argtree);
       newtree->second = argtree;
       if (narg > 2) {
         newtree->nextra = narg-2;
         newtree->extra = new Tree*[narg-2];
         for (int i = 2; i < narg; i++) {
           evaluate(args[i],&argtree);
           newtree->extra[i-2] = argtree;
         }
       }
     }
     treestack[ntreestack++] = newtree;
 
   } else {
     value1 = evaluate(args[0],NULL);
     if (narg > 1) {
       value2 = evaluate(args[1],NULL);
       if (narg > 2) {
         for (int i = 2; i < narg; i++)
           values[i-2] = evaluate(args[i],NULL);
       }
     }
   }
 
   // individual math functions
   // customize by adding a function
 
   if (strcmp(word,"sqrt") == 0) {
     if (narg != 1)
       error->all(FLERR,"Invalid math function in variable formula");
     if (tree) newtree->type = SQRT;
     else {
       if (value1 < 0.0)
         error->all(FLERR,"Sqrt of negative value in variable formula");
       argstack[nargstack++] = sqrt(value1);
     }
 
   } else if (strcmp(word,"exp") == 0) {
     if (narg != 1)
       error->all(FLERR,"Invalid math function in variable formula");
     if (tree) newtree->type = EXP;
     else argstack[nargstack++] = exp(value1);
   } else if (strcmp(word,"ln") == 0) {
     if (narg != 1)
       error->all(FLERR,"Invalid math function in variable formula");
     if (tree) newtree->type = LN;
     else {
       if (value1 <= 0.0)
         error->all(FLERR,"Log of zero/negative value in variable formula");
       argstack[nargstack++] = log(value1);
     }
   } else if (strcmp(word,"log") == 0) {
     if (narg != 1)
       error->all(FLERR,"Invalid math function in variable formula");
     if (tree) newtree->type = LOG;
     else {
       if (value1 <= 0.0)
         error->all(FLERR,"Log of zero/negative value in variable formula");
       argstack[nargstack++] = log10(value1);
     }
   } else if (strcmp(word,"abs") == 0) {
     if (narg != 1)
       error->all(FLERR,"Invalid math function in variable formula");
     if (tree) newtree->type = ABS;
     else argstack[nargstack++] = fabs(value1);
 
   } else if (strcmp(word,"sin") == 0) {
     if (narg != 1)
       error->all(FLERR,"Invalid math function in variable formula");
     if (tree) newtree->type = SIN;
     else argstack[nargstack++] = sin(value1);
   } else if (strcmp(word,"cos") == 0) {
     if (narg != 1)
       error->all(FLERR,"Invalid math function in variable formula");
     if (tree) newtree->type = COS;
     else argstack[nargstack++] = cos(value1);
   } else if (strcmp(word,"tan") == 0) {
     if (narg != 1)
       error->all(FLERR,"Invalid math function in variable formula");
     if (tree) newtree->type = TAN;
     else argstack[nargstack++] = tan(value1);
 
   } else if (strcmp(word,"asin") == 0) {
     if (narg != 1)
       error->all(FLERR,"Invalid math function in variable formula");
     if (tree) newtree->type = ASIN;
     else {
       if (value1 < -1.0 || value1 > 1.0)
         error->all(FLERR,"Arcsin of invalid value in variable formula");
       argstack[nargstack++] = asin(value1);
     }
   } else if (strcmp(word,"acos") == 0) {
     if (narg != 1)
       error->all(FLERR,"Invalid math function in variable formula");
     if (tree) newtree->type = ACOS;
     else {
       if (value1 < -1.0 || value1 > 1.0)
         error->all(FLERR,"Arccos of invalid value in variable formula");
       argstack[nargstack++] = acos(value1);
     }
   } else if (strcmp(word,"atan") == 0) {
     if (narg != 1)
       error->all(FLERR,"Invalid math function in variable formula");
     if (tree) newtree->type = ATAN;
     else argstack[nargstack++] = atan(value1);
   } else if (strcmp(word,"atan2") == 0) {
     if (narg != 2)
       error->all(FLERR,"Invalid math function in variable formula");
     if (tree) newtree->type = ATAN2;
     else argstack[nargstack++] = atan2(value1,value2);
 
   } else if (strcmp(word,"random") == 0) {
     if (narg != 3)
       error->all(FLERR,"Invalid math function in variable formula");
     if (tree) newtree->type = RANDOM;
     else {
       if (randomequal == NULL) {
         int seed = static_cast<int> (values[0]);
         if (seed <= 0)
           error->all(FLERR,"Invalid math function in variable formula");
         randomequal = new RanMars(lmp,seed);
       }
       argstack[nargstack++] = randomequal->uniform()*(value2-value1) + value1;
     }
   } else if (strcmp(word,"normal") == 0) {
     if (narg != 3)
       error->all(FLERR,"Invalid math function in variable formula");
     if (tree) newtree->type = NORMAL;
     else {
       if (value2 < 0.0)
         error->all(FLERR,"Invalid math function in variable formula");
       if (randomequal == NULL) {
         int seed = static_cast<int> (values[0]);
         if (seed <= 0)
           error->all(FLERR,"Invalid math function in variable formula");
         randomequal = new RanMars(lmp,seed);
       }
       argstack[nargstack++] = value1 + value2*randomequal->gaussian();
     }
 
   } else if (strcmp(word,"ceil") == 0) {
     if (narg != 1)
       error->all(FLERR,"Invalid math function in variable formula");
     if (tree) newtree->type = CEIL;
     else argstack[nargstack++] = ceil(value1);
 
   } else if (strcmp(word,"floor") == 0) {
     if (narg != 1)
       error->all(FLERR,"Invalid math function in variable formula");
     if (tree) newtree->type = FLOOR;
     else argstack[nargstack++] = floor(value1);
 
   } else if (strcmp(word,"round") == 0) {
     if (narg != 1)
       error->all(FLERR,"Invalid math function in variable formula");
     if (tree) newtree->type = ROUND;
     else argstack[nargstack++] = MYROUND(value1);
 
   } else if (strcmp(word,"ramp") == 0) {
     if (narg != 2)
       error->all(FLERR,"Invalid math function in variable formula");
     if (update->whichflag == 0)
       error->all(FLERR,"Cannot use ramp in variable formula between runs");
     if (tree) newtree->type = RAMP;
     else {
       double delta = update->ntimestep - update->beginstep;
       if (delta != 0.0) delta /= update->endstep - update->beginstep;
       double value = value1 + delta*(value2-value1);
       argstack[nargstack++] = value;
     }
 
   } else if (strcmp(word,"stagger") == 0) {
     if (narg != 2)
       error->all(FLERR,"Invalid math function in variable formula");
     if (tree) newtree->type = STAGGER;
     else {
       int ivalue1 = static_cast<int> (value1);
       int ivalue2 = static_cast<int> (value2);
       if (ivalue1 <= 0 || ivalue2 <= 0 || ivalue1 <= ivalue2)
         error->all(FLERR,"Invalid math function in variable formula");
       int lower = update->ntimestep/ivalue1 * ivalue1;
       int delta = update->ntimestep - lower;
       double value;
       if (delta < ivalue2) value = lower+ivalue2;
       else value = lower+ivalue1;
       argstack[nargstack++] = value;
     }
 
   } else if (strcmp(word,"logfreq") == 0) {
     if (narg != 3)
       error->all(FLERR,"Invalid math function in variable formula");
     if (tree) newtree->type = LOGFREQ;
     else {
       int ivalue1 = static_cast<int> (value1);
       int ivalue2 = static_cast<int> (value2);
       int ivalue3 = static_cast<int> (values[0]);
       if (ivalue1 <= 0 || ivalue2 <= 0 || ivalue3 <= 0 || ivalue2 >= ivalue3)
         error->all(FLERR,"Invalid math function in variable formula");
       double value;
       if (update->ntimestep < ivalue1) value = ivalue1;
       else {
         int lower = ivalue1;
         while (update->ntimestep >= ivalue3*lower) lower *= ivalue3;
         int multiple = update->ntimestep/lower;
         if (multiple < ivalue2) value = (multiple+1)*lower;
         else value = lower*ivalue3;
       }
       argstack[nargstack++] = value;
     }
 
   } else if (strcmp(word,"logfreq2") == 0) {
     if (narg != 3)
       error->all(FLERR,"Invalid math function in variable formula");
     if (tree) newtree->type = LOGFREQ2;
     else {
       int ivalue1 = static_cast<int> (value1);
       int ivalue2 = static_cast<int> (value2);
       int ivalue3 = static_cast<int> (values[0]);
       if (ivalue1 <= 0 || ivalue2 <= 0 || ivalue3 <= 0 )
         error->all(FLERR,"Invalid math function in variable formula");
       double value;
       if (update->ntimestep < ivalue1) value = ivalue1;
       else {
         value = ivalue1;
 	double delta = ivalue1*(ivalue3-1.0)/ivalue2;
 	int count = 0;
         while (update->ntimestep >= value) {
 	  value += delta;
 	  count++;
 	  if (count % ivalue2 == 0) delta *= ivalue3;
 	}
       }
       argstack[nargstack++] = ceil(value);
     }
 
   } else if (strcmp(word,"stride") == 0) {
     if (narg != 3)
       error->all(FLERR,"Invalid math function in variable formula");
     if (tree) newtree->type = STRIDE;
     else {
       int ivalue1 = static_cast<int> (value1);
       int ivalue2 = static_cast<int> (value2);
       int ivalue3 = static_cast<int> (values[0]);
       if (ivalue1 < 0 || ivalue2 < 0 || ivalue3 <= 0 || ivalue1 > ivalue2)
         error->one(FLERR,"Invalid math function in variable formula");
       double value;
       if (update->ntimestep < ivalue1) value = ivalue1;
       else if (update->ntimestep < ivalue2) {
         int offset = update->ntimestep - ivalue1;
         value = ivalue1 + (offset/ivalue3)*ivalue3 + ivalue3;
         if (value > ivalue2) value = MAXBIGINT;
       } else value = MAXBIGINT;
       argstack[nargstack++] = value;
     }
 
   } else if (strcmp(word,"stride2") == 0) {
     if (narg != 6)
       error->all(FLERR,"Invalid math function in variable formula");
     if (tree) newtree->type = STRIDE2;
     else {
       int ivalue1 = static_cast<int> (value1);
       int ivalue2 = static_cast<int> (value2);
       int ivalue3 = static_cast<int> (values[0]);
       int ivalue4 = static_cast<int> (values[1]);
       int ivalue5 = static_cast<int> (values[2]);
       int ivalue6 = static_cast<int> (values[3]);
       if (ivalue1 < 0 || ivalue2 < 0 || ivalue3 <= 0 || ivalue1 > ivalue2)
         error->one(FLERR,"Invalid math function in variable formula");
       if (ivalue4 < 0 || ivalue5 < 0 || ivalue6 <= 0 || ivalue4 > ivalue5)
         error->one(FLERR,"Invalid math function in variable formula");
       if (ivalue4 < ivalue1 || ivalue5 > ivalue2)
 	error->one(FLERR,"Invalid math function in variable formula");
       bigint istep;
       if (update->ntimestep < ivalue1) istep = ivalue1;
       else if (update->ntimestep < ivalue2) {
         if (update->ntimestep < ivalue4 || update->ntimestep > ivalue5) {
           int offset = update->ntimestep - ivalue1;
           istep = ivalue1 + (offset/ivalue3)*ivalue3 + ivalue3;
           if (update->ntimestep < ivalue4 && istep > ivalue4) istep = ivalue4;
         } else {
           int offset = update->ntimestep - ivalue4;
           istep = ivalue4 + (offset/ivalue6)*ivalue6 + ivalue6;
           if (istep > ivalue5) {
             int offset = ivalue5 - ivalue1;
             istep = ivalue1 + (offset/ivalue3)*ivalue3 + ivalue3;
             if (istep > ivalue2) istep = MAXBIGINT;
           }
         }
       } else istep = MAXBIGINT;
       double value = istep;
       argstack[nargstack++] = value;
     }
 
   } else if (strcmp(word,"vdisplace") == 0) {
     if (narg != 2)
       error->all(FLERR,"Invalid math function in variable formula");
     if (update->whichflag == 0)
       error->all(FLERR,"Cannot use vdisplace in variable formula between runs");
     if (tree) newtree->type = VDISPLACE;
     else {
       double delta = update->ntimestep - update->beginstep;
       double value = value1 + value2*delta*update->dt;
       argstack[nargstack++] = value;
     }
 
   } else if (strcmp(word,"swiggle") == 0) {
     if (narg != 3)
       error->all(FLERR,"Invalid math function in variable formula");
     if (update->whichflag == 0)
       error->all(FLERR,"Cannot use swiggle in variable formula between runs");
     if (tree) newtree->type = CWIGGLE;
     else {
       if (values[0] == 0.0)
         error->all(FLERR,"Invalid math function in variable formula");
       double delta = update->ntimestep - update->beginstep;
       double omega = 2.0*MY_PI/values[0];
       double value = value1 + value2*sin(omega*delta*update->dt);
       argstack[nargstack++] = value;
     }
 
   } else if (strcmp(word,"cwiggle") == 0) {
     if (narg != 3)
       error->all(FLERR,"Invalid math function in variable formula");
     if (update->whichflag == 0)
       error->all(FLERR,"Cannot use cwiggle in variable formula between runs");
     if (tree) newtree->type = CWIGGLE;
     else {
       if (values[0] == 0.0)
         error->all(FLERR,"Invalid math function in variable formula");
       double delta = update->ntimestep - update->beginstep;
       double omega = 2.0*MY_PI/values[0];
       double value = value1 + value2*(1.0-cos(omega*delta*update->dt));
       argstack[nargstack++] = value;
     }
   }
 
   // delete stored args
 
   for (int i = 0; i < narg; i++) delete [] args[i];
 
   return 1;
 }
 
 /* ----------------------------------------------------------------------
    process a group function in formula with optional region arg
    push result onto tree or arg stack
    word = group function
    contents = str between parentheses with one,two,three args
    return 0 if not a match, 1 if successfully processed
    customize by adding a group function with optional region arg:
      count(group),mass(group),charge(group),
      xcm(group,dim),vcm(group,dim),fcm(group,dim),
      bound(group,xmin),gyration(group),ke(group),angmom(group,dim),
      torque(group,dim),inertia(group,dim),omega(group,dim)
 ------------------------------------------------------------------------- */
 
 int Variable::group_function(char *word, char *contents, Tree **tree,
                              Tree **treestack, int &ntreestack,
                              double *argstack, int &nargstack)
 {
   // word not a match to any group function
 
   if (strcmp(word,"count") && strcmp(word,"mass") &&
       strcmp(word,"charge") && strcmp(word,"xcm") &&
       strcmp(word,"vcm") && strcmp(word,"fcm") &&
       strcmp(word,"bound") && strcmp(word,"gyration") &&
       strcmp(word,"ke") && strcmp(word,"angmom") &&
       strcmp(word,"torque") && strcmp(word,"inertia") &&
       strcmp(word,"omega"))
     return 0;
 
   // parse contents for comma-separated args
   // narg = number of args, args = strings between commas
 
   char *args[MAXFUNCARG];
   int narg = parse_args(contents,args);
 
   // group to operate on
 
   int igroup = group->find(args[0]);
   if (igroup == -1)
     error->all(FLERR,"Group ID in variable formula does not exist");
 
   // match word to group function
 
   double value;
 
   if (strcmp(word,"count") == 0) {
     if (narg == 1) value = group->count(igroup);
     else if (narg == 2) value = group->count(igroup,region_function(args[1]));
     else error->all(FLERR,"Invalid group function in variable formula");
 
   } else if (strcmp(word,"mass") == 0) {
     if (narg == 1) value = group->mass(igroup);
     else if (narg == 2) value = group->mass(igroup,region_function(args[1]));
     else error->all(FLERR,"Invalid group function in variable formula");
 
   } else if (strcmp(word,"charge") == 0) {
     if (narg == 1) value = group->charge(igroup);
     else if (narg == 2) value = group->charge(igroup,region_function(args[1]));
     else error->all(FLERR,"Invalid group function in variable formula");
 
   } else if (strcmp(word,"xcm") == 0) {
     atom->check_mass(FLERR);
     double xcm[3];
     if (narg == 2) {
       double masstotal = group->mass(igroup);
       group->xcm(igroup,masstotal,xcm);
     } else if (narg == 3) {
       int iregion = region_function(args[2]);
       double masstotal = group->mass(igroup,iregion);
       group->xcm(igroup,masstotal,xcm,iregion);
     } else error->all(FLERR,"Invalid group function in variable formula");
     if (strcmp(args[1],"x") == 0) value = xcm[0];
     else if (strcmp(args[1],"y") == 0) value = xcm[1];
     else if (strcmp(args[1],"z") == 0) value = xcm[2];
     else error->all(FLERR,"Invalid group function in variable formula");
 
   } else if (strcmp(word,"vcm") == 0) {
     atom->check_mass(FLERR);
     double vcm[3];
     if (narg == 2) {
       double masstotal = group->mass(igroup);
       group->vcm(igroup,masstotal,vcm);
     } else if (narg == 3) {
       int iregion = region_function(args[2]);
       double masstotal = group->mass(igroup,iregion);
       group->vcm(igroup,masstotal,vcm,iregion);
     } else error->all(FLERR,"Invalid group function in variable formula");
     if (strcmp(args[1],"x") == 0) value = vcm[0];
     else if (strcmp(args[1],"y") == 0) value = vcm[1];
     else if (strcmp(args[1],"z") == 0) value = vcm[2];
     else error->all(FLERR,"Invalid group function in variable formula");
 
   } else if (strcmp(word,"fcm") == 0) {
     double fcm[3];
     if (narg == 2) group->fcm(igroup,fcm);
     else if (narg == 3) group->fcm(igroup,fcm,region_function(args[2]));
     else error->all(FLERR,"Invalid group function in variable formula");
     if (strcmp(args[1],"x") == 0) value = fcm[0];
     else if (strcmp(args[1],"y") == 0) value = fcm[1];
     else if (strcmp(args[1],"z") == 0) value = fcm[2];
     else error->all(FLERR,"Invalid group function in variable formula");
 
   } else if (strcmp(word,"bound") == 0) {
     double minmax[6];
     if (narg == 2) group->bounds(igroup,minmax);
     else if (narg == 3) group->bounds(igroup,minmax,region_function(args[2]));
     else error->all(FLERR,"Invalid group function in variable formula");
     if (strcmp(args[1],"xmin") == 0) value = minmax[0];
     else if (strcmp(args[1],"xmax") == 0) value = minmax[1];
     else if (strcmp(args[1],"ymin") == 0) value = minmax[2];
     else if (strcmp(args[1],"ymax") == 0) value = minmax[3];
     else if (strcmp(args[1],"zmin") == 0) value = minmax[4];
     else if (strcmp(args[1],"zmax") == 0) value = minmax[5];
     else error->all(FLERR,"Invalid group function in variable formula");
 
   } else if (strcmp(word,"gyration") == 0) {
     atom->check_mass(FLERR);
     double xcm[3];
     if (narg == 1) {
       double masstotal = group->mass(igroup);
       group->xcm(igroup,masstotal,xcm);
       value = group->gyration(igroup,masstotal,xcm);
     } else if (narg == 2) {
       int iregion = region_function(args[1]);
       double masstotal = group->mass(igroup,iregion);
       group->xcm(igroup,masstotal,xcm,iregion);
       value = group->gyration(igroup,masstotal,xcm,iregion);
     } else error->all(FLERR,"Invalid group function in variable formula");
 
   } else if (strcmp(word,"ke") == 0) {
     if (narg == 1) value = group->ke(igroup);
     else if (narg == 2) value = group->ke(igroup,region_function(args[1]));
     else error->all(FLERR,"Invalid group function in variable formula");
 
   } else if (strcmp(word,"angmom") == 0) {
     atom->check_mass(FLERR);
     double xcm[3],lmom[3];
     if (narg == 2) {
       double masstotal = group->mass(igroup);
       group->xcm(igroup,masstotal,xcm);
       group->angmom(igroup,xcm,lmom);
     } else if (narg == 3) {
       int iregion = region_function(args[2]);
       double masstotal = group->mass(igroup,iregion);
       group->xcm(igroup,masstotal,xcm,iregion);
       group->angmom(igroup,xcm,lmom,iregion);
     } else error->all(FLERR,"Invalid group function in variable formula");
     if (strcmp(args[1],"x") == 0) value = lmom[0];
     else if (strcmp(args[1],"y") == 0) value = lmom[1];
     else if (strcmp(args[1],"z") == 0) value = lmom[2];
     else error->all(FLERR,"Invalid group function in variable formula");
 
   } else if (strcmp(word,"torque") == 0) {
     atom->check_mass(FLERR);
     double xcm[3],tq[3];
     if (narg == 2) {
       double masstotal = group->mass(igroup);
       group->xcm(igroup,masstotal,xcm);
       group->torque(igroup,xcm,tq);
     } else if (narg == 3) {
       int iregion = region_function(args[2]);
       double masstotal = group->mass(igroup,iregion);
       group->xcm(igroup,masstotal,xcm,iregion);
       group->torque(igroup,xcm,tq,iregion);
     } else error->all(FLERR,"Invalid group function in variable formula");
     if (strcmp(args[1],"x") == 0) value = tq[0];
     else if (strcmp(args[1],"y") == 0) value = tq[1];
     else if (strcmp(args[1],"z") == 0) value = tq[2];
     else error->all(FLERR,"Invalid group function in variable formula");
 
   } else if (strcmp(word,"inertia") == 0) {
     atom->check_mass(FLERR);
     double xcm[3],inertia[3][3];
     if (narg == 2) {
       double masstotal = group->mass(igroup);
       group->xcm(igroup,masstotal,xcm);
       group->inertia(igroup,xcm,inertia);
     } else if (narg == 3) {
       int iregion = region_function(args[2]);
       double masstotal = group->mass(igroup,iregion);
       group->xcm(igroup,masstotal,xcm,iregion);
       group->inertia(igroup,xcm,inertia,iregion);
     } else error->all(FLERR,"Invalid group function in variable formula");
     if (strcmp(args[1],"xx") == 0) value = inertia[0][0];
     else if (strcmp(args[1],"yy") == 0) value = inertia[1][1];
     else if (strcmp(args[1],"zz") == 0) value = inertia[2][2];
     else if (strcmp(args[1],"xy") == 0) value = inertia[0][1];
     else if (strcmp(args[1],"yz") == 0) value = inertia[1][2];
     else if (strcmp(args[1],"xz") == 0) value = inertia[0][2];
     else error->all(FLERR,"Invalid group function in variable formula");
 
   } else if (strcmp(word,"omega") == 0) {
     atom->check_mass(FLERR);
     double xcm[3],angmom[3],inertia[3][3],omega[3];
     if (narg == 2) {
       double masstotal = group->mass(igroup);
       group->xcm(igroup,masstotal,xcm);
       group->angmom(igroup,xcm,angmom);
       group->inertia(igroup,xcm,inertia);
       group->omega(angmom,inertia,omega);
     } else if (narg == 3) {
       int iregion = region_function(args[2]);
       double masstotal = group->mass(igroup,iregion);
       group->xcm(igroup,masstotal,xcm,iregion);
       group->angmom(igroup,xcm,angmom,iregion);
       group->inertia(igroup,xcm,inertia,iregion);
       group->omega(angmom,inertia,omega);
     } else error->all(FLERR,"Invalid group function in variable formula");
     if (strcmp(args[1],"x") == 0) value = omega[0];
     else if (strcmp(args[1],"y") == 0) value = omega[1];
     else if (strcmp(args[1],"z") == 0) value = omega[2];
     else error->all(FLERR,"Invalid group function in variable formula");
   }
 
   // delete stored args
 
   for (int i = 0; i < narg; i++) delete [] args[i];
 
   // save value in tree or on argstack
 
   if (tree) {
     Tree *newtree = new Tree();
     newtree->type = VALUE;
     newtree->value = value;
     newtree->first = newtree->second = NULL;
     newtree->nextra = 0;
     treestack[ntreestack++] = newtree;
   } else argstack[nargstack++] = value;
 
   return 1;
 }
 
 /* ---------------------------------------------------------------------- */
 
 int Variable::region_function(char *id)
 {
   int iregion = domain->find_region(id);
   if (iregion == -1)
     error->all(FLERR,"Region ID in variable formula does not exist");
 
   // init region in case sub-regions have been deleted
 
   domain->regions[iregion]->init();
 
   return iregion;
 }
 
 /* ----------------------------------------------------------------------
    process a special function in formula
    push result onto tree or arg stack
    word = special function
    contents = str between parentheses with one,two,three args
    return 0 if not a match, 1 if successfully processed
    customize by adding a special function:
      sum(x),min(x),max(x),ave(x),trap(x),slope(x),
      gmask(x),rmask(x),grmask(x,y),next(x)
 ------------------------------------------------------------------------- */
 
 int Variable::special_function(char *word, char *contents, Tree **tree,
                                Tree **treestack, int &ntreestack,
                                double *argstack, int &nargstack)
 {
   double value,xvalue,sx,sy,sxx,sxy;
 
   // word not a match to any special function
 
   if (strcmp(word,"sum") && strcmp(word,"min") && strcmp(word,"max") &&
       strcmp(word,"ave") && strcmp(word,"trap") && strcmp(word,"slope") &&
       strcmp(word,"gmask") && strcmp(word,"rmask") &&
       strcmp(word,"grmask") && strcmp(word,"next") &&
       strcmp(word,"is_active") && strcmp(word,"is_defined") &&
       strcmp(word,"is_available"))
     return 0;
 
   // parse contents for comma-separated args
   // narg = number of args, args = strings between commas
 
   char *args[MAXFUNCARG];
   int narg = parse_args(contents,args);
 
   // special functions that operate on global vectors
 
   if (strcmp(word,"sum") == 0 || strcmp(word,"min") == 0 ||
       strcmp(word,"max") == 0 || strcmp(word,"ave") == 0 ||
       strcmp(word,"trap") == 0 || strcmp(word,"slope") == 0) {
 
     int method;
     if (strcmp(word,"sum") == 0) method = SUM;
     else if (strcmp(word,"min") == 0) method = XMIN;
     else if (strcmp(word,"max") == 0) method = XMAX;
     else if (strcmp(word,"ave") == 0) method = AVE;
     else if (strcmp(word,"trap") == 0) method = TRAP;
     else if (strcmp(word,"slope") == 0) method = SLOPE;
 
     if (narg != 1)
       error->all(FLERR,"Invalid special function in variable formula");
 
     Compute *compute = NULL;
     Fix *fix = NULL;
     int ivar = -1;
     int index,nvec,nstride;
     char *ptr1,*ptr2;
 
     // argument is compute
 
     if (strstr(args[0],"c_") == args[0]) {
       ptr1 = strchr(args[0],'[');
       if (ptr1) {
         ptr2 = ptr1;
         index = (int) int_between_brackets(ptr2,0);
         *ptr1 = '\0';
       } else index = 0;
 
       int icompute = modify->find_compute(&args[0][2]);
       if (icompute < 0)
         error->all(FLERR,"Invalid compute ID in variable formula");
       compute = modify->compute[icompute];
       if (index == 0 && compute->vector_flag) {
         if (update->whichflag == 0) {
           if (compute->invoked_vector != update->ntimestep)
             error->all(FLERR,
                        "Compute used in variable between runs is not current");
         } else if (!(compute->invoked_flag & INVOKED_VECTOR)) {
           compute->compute_vector();
           compute->invoked_flag |= INVOKED_VECTOR;
         }
         nvec = compute->size_vector;
         nstride = 1;
       } else if (index && compute->array_flag) {
         if (index > compute->size_array_cols)
           error->all(FLERR,"Variable formula compute array "
                      "is accessed out-of-range");
         if (update->whichflag == 0) {
           if (compute->invoked_array != update->ntimestep)
             error->all(FLERR,
                        "Compute used in variable between runs is not current");
         } else if (!(compute->invoked_flag & INVOKED_ARRAY)) {
           compute->compute_array();
           compute->invoked_flag |= INVOKED_ARRAY;
         }
         nvec = compute->size_array_rows;
         nstride = compute->size_array_cols;
       } else error->all(FLERR,"Mismatched compute in variable formula");
 
     // argument is fix
 
     } else if (strstr(args[0],"f_") == args[0]) {
       ptr1 = strchr(args[0],'[');
       if (ptr1) {
         ptr2 = ptr1;
         index = (int) int_between_brackets(ptr2,0);
         *ptr1 = '\0';
       } else index = 0;
 
       int ifix = modify->find_fix(&args[0][2]);
       if (ifix < 0) error->all(FLERR,"Invalid fix ID in variable formula");
       fix = modify->fix[ifix];
       if (index == 0 && fix->vector_flag) {
         if (update->whichflag > 0 && update->ntimestep % fix->global_freq)
           error->all(FLERR,"Fix in variable not computed at compatible time");
         nvec = fix->size_vector;
         nstride = 1;
       } else if (index && fix->array_flag) {
         if (index > fix->size_array_cols)
           error->all(FLERR,
                      "Variable formula fix array is accessed out-of-range");
         if (update->whichflag > 0 && update->ntimestep % fix->global_freq)
           error->all(FLERR,"Fix in variable not computed at compatible time");
         nvec = fix->size_array_rows;
         nstride = fix->size_array_cols;
       } else error->all(FLERR,"Mismatched fix in variable formula");
 
     // argument is vector-style variable
 
     } else if (strstr(args[0],"v_") == args[0]) {
       ptr1 = strchr(args[0],'[');
       if (ptr1) {
         ptr2 = ptr1;
         index = (int) int_between_brackets(ptr2,0);
         *ptr1 = '\0';
       } else index = 0;
 
       if (index) 
 	error->all(FLERR,"Invalid special function in variable formula");
       ivar = find(&args[0][2]);
       if (ivar < 0) 
         error->all(FLERR,"Invalid special function in variable formula");
       if (style[ivar] != VECTOR) 
 	error->all(FLERR,
                    "Mis-matched special function variable in variable formula");
       if (eval_in_progress[ivar]) 
         error->all(FLERR,"Variable has circular dependency");
 
       double *vec;
       nvec = compute_vector(ivar,&vec);
       nstride = 1;
 
     } else error->all(FLERR,"Invalid special function in variable formula");
 
     value = 0.0;
     if (method == SLOPE) sx = sy = sxx = sxy = 0.0;
     if (method == XMIN) value = BIG;
     if (method == XMAX) value = -BIG;
 
     if (compute) {
       double *vec;
       if (index) {
         if (compute->array) vec = &compute->array[0][index-1];
         else vec = NULL;
       } else vec = compute->vector;
 
       int j = 0;
       for (int i = 0; i < nvec; i++) {
         if (method == SUM) value += vec[j];
         else if (method == XMIN) value = MIN(value,vec[j]);
         else if (method == XMAX) value = MAX(value,vec[j]);
         else if (method == AVE) value += vec[j];
         else if (method == TRAP) value += vec[j];
         else if (method == SLOPE) {
           if (nvec > 1) xvalue = (double) i / (nvec-1);
           else xvalue = 0.0;
           sx += xvalue;
           sy += vec[j];
           sxx += xvalue*xvalue;
           sxy += xvalue*vec[j];
         }
         j += nstride;
       }
       if (method == TRAP) value -= 0.5*vec[0] + 0.5*vec[nvec-1];
     }
 
     if (fix) {
       double one;
       for (int i = 0; i < nvec; i++) {
         if (index) one = fix->compute_array(i,index-1);
         else one = fix->compute_vector(i);
         if (method == SUM) value += one;
         else if (method == XMIN) value = MIN(value,one);
         else if (method == XMAX) value = MAX(value,one);
         else if (method == AVE) value += one;
         else if (method == TRAP) value += one;
         else if (method == SLOPE) {
           if (nvec > 1) xvalue = (double) i / (nvec-1);
           else xvalue = 0.0;
           sx += xvalue;
           sy += one;
           sxx += xvalue*xvalue;
           sxy += xvalue*one;
         }
       }
       if (method == TRAP) {
         if (index) value -= 0.5*fix->compute_array(0,index-1) +
                      0.5*fix->compute_array(nvec-1,index-1);
         else value -= 0.5*fix->compute_vector(0) +
                0.5*fix->compute_vector(nvec-1);
       }
     }
 
     if (ivar >= 0) {
       double one;
       double *vec = vecs[ivar].values;
       for (int i = 0; i < nvec; i++) {
 	one = vec[i];
         if (method == SUM) value += one;
         else if (method == XMIN) value = MIN(value,one);
         else if (method == XMAX) value = MAX(value,one);
         else if (method == AVE) value += one;
         else if (method == TRAP) value += one;
         else if (method == SLOPE) {
           if (nvec > 1) xvalue = (double) i / (nvec-1);
           else xvalue = 0.0;
           sx += xvalue;
           sy += one;
           sxx += xvalue*xvalue;
           sxy += xvalue*one;
         }
       }
       if (method == TRAP) value -= 0.5*vec[0] + 0.5*vec[nvec-1];
     }
 
     if (method == AVE) value /= nvec;
 
     if (method == SLOPE) {
       double numerator = sxy - sx*sy;
       double denominator = sxx - sx*sx;
       if (denominator != 0.0) value = numerator/denominator / nvec;
       else value = BIG;
     }
 
     // save value in tree or on argstack
 
     if (tree) {
       Tree *newtree = new Tree();
       newtree->type = VALUE;
       newtree->value = value;
       newtree->first = newtree->second = NULL;
       newtree->nextra = 0;
       treestack[ntreestack++] = newtree;
     } else argstack[nargstack++] = value;
 
   // mask special functions
 
   } else if (strcmp(word,"gmask") == 0) {
     if (tree == NULL)
       error->all(FLERR,"Gmask function in equal-style variable formula");
     if (narg != 1)
       error->all(FLERR,"Invalid special function in variable formula");
 
     int igroup = group->find(args[0]);
     if (igroup == -1)
       error->all(FLERR,"Group ID in variable formula does not exist");
 
     Tree *newtree = new Tree();
     newtree->type = GMASK;
     newtree->ivalue1 = group->bitmask[igroup];
     newtree->first = newtree->second = NULL;
     newtree->nextra = 0;
     treestack[ntreestack++] = newtree;
 
   } else if (strcmp(word,"rmask") == 0) {
     if (tree == NULL)
       error->all(FLERR,"Rmask function in equal-style variable formula");
     if (narg != 1)
       error->all(FLERR,"Invalid special function in variable formula");
 
     int iregion = region_function(args[0]);
     domain->regions[iregion]->prematch();
 
     Tree *newtree = new Tree();
     newtree->type = RMASK;
     newtree->ivalue1 = iregion;
     newtree->first = newtree->second = NULL;
     newtree->nextra = 0;
     treestack[ntreestack++] = newtree;
 
   } else if (strcmp(word,"grmask") == 0) {
     if (tree == NULL)
       error->all(FLERR,"Grmask function in equal-style variable formula");
     if (narg != 2)
       error->all(FLERR,"Invalid special function in variable formula");
 
     int igroup = group->find(args[0]);
     if (igroup == -1)
       error->all(FLERR,"Group ID in variable formula does not exist");
     int iregion = region_function(args[1]);
     domain->regions[iregion]->prematch();
 
     Tree *newtree = new Tree();
     newtree->type = GRMASK;
     newtree->ivalue1 = group->bitmask[igroup];
     newtree->ivalue2 = iregion;
     newtree->first = newtree->second = NULL;
     newtree->nextra = 0;
     treestack[ntreestack++] = newtree;
 
   // special function for file-style or atomfile-style variables
 
   } else if (strcmp(word,"next") == 0) {
     if (narg != 1)
       error->all(FLERR,"Invalid special function in variable formula");
 
     int ivar = find(args[0]);
     if (ivar < 0)
       error->all(FLERR,"Variable ID in variable formula does not exist");
 
     // SCALARFILE has single current value, read next one
     // save value in tree or on argstack
 
     if (style[ivar] == SCALARFILE) {
       double value = atof(data[ivar][0]);
       int done = reader[ivar]->read_scalar(data[ivar][0]);
       if (done) remove(ivar);
 
       if (tree) {
         Tree *newtree = new Tree();
         newtree->type = VALUE;
         newtree->value = value;
         newtree->first = newtree->second = NULL;
         newtree->nextra = 0;
         treestack[ntreestack++] = newtree;
       } else argstack[nargstack++] = value;
 
     // ATOMFILE has per-atom values, save values in tree
     // copy current per-atom values into result so can read next ones
     // set selfalloc = 1 so result will be deleted by free_tree() after eval
 
     } else if (style[ivar] == ATOMFILE) {
       if (tree == NULL)
         error->all(FLERR,"Atomfile variable in equal-style variable formula");
 
       double *result;
       memory->create(result,atom->nlocal,"variable:result");
       memcpy(result,reader[ivar]->fixstore->vstore,atom->nlocal*sizeof(double));
 
       int done = reader[ivar]->read_peratom();
       if (done) remove(ivar);
 
       Tree *newtree = new Tree();
       newtree->type = ATOMARRAY;
       newtree->array = result;
       newtree->nstride = 1;
       newtree->selfalloc = 1;
       newtree->first = newtree->second = NULL;
       newtree->nextra = 0;
       treestack[ntreestack++] = newtree;
 
     } else error->all(FLERR,"Invalid variable style in special function next");
 
   } else if (strcmp(word,"is_active") == 0) {
     if (narg != 2)
       error->all(FLERR,"Invalid is_active() function in variable formula");
 
     Info info(lmp);
     value = (info.is_active(args[0],args[1])) ? 1.0 : 0.0;
 
     // save value in tree or on argstack
 
     if (tree) {
       Tree *newtree = new Tree();
       newtree->type = VALUE;
       newtree->value = value;
       newtree->first = newtree->second = NULL;
       newtree->nextra = 0;
       treestack[ntreestack++] = newtree;
     } else argstack[nargstack++] = value;
 
   } else if (strcmp(word,"is_available") == 0) {
     if (narg != 2)
       error->all(FLERR,"Invalid is_available() function in variable formula");
 
     Info info(lmp);
     value = (info.is_available(args[0],args[1])) ? 1.0 : 0.0;
 
     // save value in tree or on argstack
 
     if (tree) {
       Tree *newtree = new Tree();
       newtree->type = VALUE;
       newtree->value = value;
       newtree->first = newtree->second = NULL;
       newtree->nextra = 0;
       treestack[ntreestack++] = newtree;
     } else argstack[nargstack++] = value;
 
   } else if (strcmp(word,"is_defined") == 0) {
     if (narg != 2)
       error->all(FLERR,"Invalid is_defined() function in variable formula");
 
     Info info(lmp);
     value = (info.is_defined(args[0],args[1])) ? 1.0 : 0.0;
 
     // save value in tree or on argstack
 
     if (tree) {
       Tree *newtree = new Tree();
       newtree->type = VALUE;
       newtree->value = value;
       newtree->first = newtree->second = NULL;
       newtree->nextra = 0;
       treestack[ntreestack++] = newtree;
     } else argstack[nargstack++] = value;
   }
 
   // delete stored args
 
   for (int i = 0; i < narg; i++) delete [] args[i];
 
   return 1;
 }
 
 /* ----------------------------------------------------------------------
    extract a global value from a per-atom quantity in a formula
    flag = 0 -> word is an atom vector
    flag = 1 -> vector is a per-atom compute or fix quantity with nstride
    id = global ID of atom, converted to local index
    push result onto tree or arg stack
    customize by adding an atom vector:
      id,mass,type,mol,x,y,z,vx,vy,vz,fx,fy,fz,q
 ------------------------------------------------------------------------- */
 
 void Variable::peratom2global(int flag, char *word,
                               double *vector, int nstride, tagint id,
                               Tree **tree, Tree **treestack, int &ntreestack,
                               double *argstack, int &nargstack)
 {
   // error check for ID larger than any atom
   // int_between_brackets() already checked for ID <= 0
 
   if (atom->map_style == 0)
     error->all(FLERR,
                "Indexed per-atom vector in variable formula without atom map");
 
   if (id > atom->map_tag_max)
     error->all(FLERR,"Variable atom ID is too large");
 
   // if ID does not exist, index will be -1 for all procs,
   // and mine will be set to 0.0
 
   int index = atom->map(id);
 
   double mine;
   if (index >= 0 && index < atom->nlocal) {
 
     if (flag == 0) {
       if (strcmp(word,"id") == 0) mine = atom->tag[index];
       else if (strcmp(word,"mass") == 0) {
         if (atom->rmass) mine = atom->rmass[index];
         else mine = atom->mass[atom->type[index]];
       }
       else if (strcmp(word,"type") == 0) mine = atom->type[index];
       else if (strcmp(word,"mol") == 0) {
         if (!atom->molecule_flag)
           error->one(FLERR,"Variable uses atom property that isn't allocated");
         mine = atom->molecule[index];
       }
       else if (strcmp(word,"x") == 0) mine = atom->x[index][0];
       else if (strcmp(word,"y") == 0) mine = atom->x[index][1];
       else if (strcmp(word,"z") == 0) mine = atom->x[index][2];
       else if (strcmp(word,"vx") == 0) mine = atom->v[index][0];
       else if (strcmp(word,"vy") == 0) mine = atom->v[index][1];
       else if (strcmp(word,"vz") == 0) mine = atom->v[index][2];
       else if (strcmp(word,"fx") == 0) mine = atom->f[index][0];
       else if (strcmp(word,"fy") == 0) mine = atom->f[index][1];
       else if (strcmp(word,"fz") == 0) mine = atom->f[index][2];
       else if (strcmp(word,"q") == 0) {
         if (!atom->q_flag)
           error->one(FLERR,"Variable uses atom property that isn't allocated");
         mine = atom->q[index];
       }
       else error->one(FLERR,"Invalid atom vector in variable formula");
 
     } else mine = vector[index*nstride];
 
   } else mine = 0.0;
 
   double value;
   MPI_Allreduce(&mine,&value,1,MPI_DOUBLE,MPI_SUM,world);
 
   if (tree) {
     Tree *newtree = new Tree();
     newtree->type = VALUE;
     newtree->value = value;
     newtree->first = newtree->second = NULL;
     newtree->nextra = 0;
     treestack[ntreestack++] = newtree;
   } else argstack[nargstack++] = value;
 }
 
 /* ----------------------------------------------------------------------
    check if word matches an atom vector
    return 1 if yes, else 0
    customize by adding an atom vector:
      id,mass,type,mol,x,y,z,vx,vy,vz,fx,fy,fz,q
 ------------------------------------------------------------------------- */
 
 int Variable::is_atom_vector(char *word)
 {
   if (strcmp(word,"id") == 0) return 1;
   if (strcmp(word,"mass") == 0) return 1;
   if (strcmp(word,"type") == 0) return 1;
   if (strcmp(word,"mol") == 0) return 1;
   if (strcmp(word,"x") == 0) return 1;
   if (strcmp(word,"y") == 0) return 1;
   if (strcmp(word,"z") == 0) return 1;
   if (strcmp(word,"vx") == 0) return 1;
   if (strcmp(word,"vy") == 0) return 1;
   if (strcmp(word,"vz") == 0) return 1;
   if (strcmp(word,"fx") == 0) return 1;
   if (strcmp(word,"fy") == 0) return 1;
   if (strcmp(word,"fz") == 0) return 1;
   if (strcmp(word,"q") == 0) return 1;
   return 0;
 }
 
 /* ----------------------------------------------------------------------
    process an atom vector in formula
    push result onto tree
    word = atom vector
    customize by adding an atom vector:
      id,mass,type,mol,x,y,z,vx,vy,vz,fx,fy,fz,q
 ------------------------------------------------------------------------- */
 
 void Variable::atom_vector(char *word, Tree **tree,
                            Tree **treestack, int &ntreestack)
 {
   if (tree == NULL)
     error->all(FLERR,"Atom vector in equal-style variable formula");
 
   Tree *newtree = new Tree();
   newtree->type = ATOMARRAY;
   newtree->nstride = 3;
   newtree->selfalloc = 0;
   newtree->first = newtree->second = NULL;
   newtree->nextra = 0;
   treestack[ntreestack++] = newtree;
 
   if (strcmp(word,"id") == 0) {
     if (sizeof(tagint) == sizeof(smallint)) {
       newtree->type = INTARRAY;
       newtree->iarray = (int *) atom->tag;
     } else {
       newtree->type = BIGINTARRAY;
       newtree->barray = (bigint *) atom->tag;
     }
     newtree->nstride = 1;
 
   } else if (strcmp(word,"mass") == 0) {
     if (atom->rmass) {
       newtree->nstride = 1;
       newtree->array = atom->rmass;
     } else {
       newtree->type = TYPEARRAY;
       newtree->array = atom->mass;
     }
 
   } else if (strcmp(word,"type") == 0) {
     newtree->type = INTARRAY;
     newtree->nstride = 1;
     newtree->iarray = atom->type;
 
   } else if (strcmp(word,"mol") == 0) {
     if (!atom->molecule_flag)
       error->one(FLERR,"Variable uses atom property that isn't allocated");
     if (sizeof(tagint) == sizeof(smallint)) {
       newtree->type = INTARRAY;
       newtree->iarray = (int *) atom->molecule;
     } else {
       newtree->type = BIGINTARRAY;
       newtree->barray = (bigint *) atom->molecule;
     }
     newtree->nstride = 1;
   }
 
   else if (strcmp(word,"x") == 0) newtree->array = &atom->x[0][0];
   else if (strcmp(word,"y") == 0) newtree->array = &atom->x[0][1];
   else if (strcmp(word,"z") == 0) newtree->array = &atom->x[0][2];
   else if (strcmp(word,"vx") == 0) newtree->array = &atom->v[0][0];
   else if (strcmp(word,"vy") == 0) newtree->array = &atom->v[0][1];
   else if (strcmp(word,"vz") == 0) newtree->array = &atom->v[0][2];
   else if (strcmp(word,"fx") == 0) newtree->array = &atom->f[0][0];
   else if (strcmp(word,"fy") == 0) newtree->array = &atom->f[0][1];
   else if (strcmp(word,"fz") == 0) newtree->array = &atom->f[0][2];
 
   else if (strcmp(word,"q") == 0) {
     newtree->nstride = 1;
     newtree->array = atom->q;
   }
 }
 
 /* ----------------------------------------------------------------------
    check if word matches a constant
    return 1 if yes, else 0
    customize by adding a constant: PI, version
 ------------------------------------------------------------------------- */
 
 int Variable::is_constant(char *word)
 {
   if (strcmp(word,"PI") == 0) return 1;
   if (strcmp(word,"version") == 0) return 1;
   if (strcmp(word,"yes") == 0) return 1;
   if (strcmp(word,"no") == 0) return 1;
   if (strcmp(word,"on") == 0) return 1;
   if (strcmp(word,"off") == 0) return 1;
   if (strcmp(word,"true") == 0) return 1;
   if (strcmp(word,"false") == 0) return 1;
   return 0;
 }
 
 /* ----------------------------------------------------------------------
    process a constant in formula
    customize by adding a constant: PI, version
 ------------------------------------------------------------------------- */
 
 double Variable::constant(char *word)
 {
   if (strcmp(word,"PI") == 0) return MY_PI;
   if (strcmp(word,"version") == 0) return atof(universe->num_ver);
   if (strcmp(word,"yes") == 0) return 1.0;
   if (strcmp(word,"no") == 0) return 0.0;
   if (strcmp(word,"on") == 0) return 1.0;
   if (strcmp(word,"off") == 0) return 0.0;
   if (strcmp(word,"true") == 0) return 1.0;
   if (strcmp(word,"false") == 0) return 0.0;
   return 0.0;
 }
 
 /* ----------------------------------------------------------------------
    parse string for comma-separated args
    store copy of each arg in args array
    max allowed # of args = MAXFUNCARG
 ------------------------------------------------------------------------- */
 
 int Variable::parse_args(char *str, char **args)
 {
   int n;
   char *ptrnext;
 
   int narg = 0;
   char *ptr = str;
 
   while (ptr && narg < MAXFUNCARG) {
     ptrnext = find_next_comma(ptr);
     if (ptrnext) *ptrnext = '\0';
     n = strlen(ptr) + 1;
     args[narg] = new char[n];
     strcpy(args[narg],ptr);
     narg++;
     ptr = ptrnext;
     if (ptr) ptr++;
   }
 
   if (ptr) error->all(FLERR,"Too many args in variable function");
   return narg;
 }
 
 
 /* ----------------------------------------------------------------------
    find next comma in str
    skip commas inside one or more nested parenthesis
    only return ptr to comma at level 0, else NULL if not found
 ------------------------------------------------------------------------- */
 
 char *Variable::find_next_comma(char *str)
 {
   int level = 0;
   for (char *p = str; *p; ++p) {
     if ('(' == *p) level++;
     else if (')' == *p) level--;
     else if (',' == *p && !level) return p;
   }
   return NULL;
 }
 
 /* ----------------------------------------------------------------------
    debug routine for printing formula tree recursively
 ------------------------------------------------------------------------- */
 
 void Variable::print_tree(Tree *tree, int level)
 {
   printf("TREE %d: %d %g\n",level,tree->type,tree->value);
   if (tree->first) print_tree(tree->first,level+1);
   if (tree->second) print_tree(tree->second,level+1);
   if (tree->nextra)
     for (int i = 0; i < tree->nextra; i++) print_tree(tree->extra[i],level+1);
   return;
 }
 
 /* ----------------------------------------------------------------------
    recursive evaluation of string str
    called from "if" command in input script
    str is a boolean expression containing one or more items:
      number = 0.0, -5.45, 2.8e-4, ...
      math operation = (),x==y,x!=y,x<y,x<=y,x>y,x>=y,x&&y,x||y
 ------------------------------------------------------------------------- */
 
 double Variable::evaluate_boolean(char *str)
 {
   int op,opprevious,flag1,flag2;
   double value1,value2;
   char onechar;
   char *str1,*str2;
 
   struct Arg {
     int flag;          // 0 for numeric value, 1 for string
     double value;      // stored numeric value
     char *str;         // stored string
   };
 
   Arg argstack[MAXLEVEL];
   int opstack[MAXLEVEL];
   int nargstack = 0;
   int nopstack = 0;
 
   int i = 0;
   int expect = ARG;
 
   while (1) {
     onechar = str[i];
 
     // whitespace: just skip
 
     if (isspace(onechar)) i++;
 
     // ----------------
     // parentheses: recursively evaluate contents of parens
     // ----------------
 
     else if (onechar == '(') {
       if (expect == OP)
         error->all(FLERR,"Invalid Boolean syntax in if command");
       expect = OP;
 
       char *contents;
       i = find_matching_paren(str,i,contents);
       i++;
 
       // evaluate contents and push on stack
 
       argstack[nargstack].value = evaluate_boolean(contents);
       argstack[nargstack].flag = 0;
       nargstack++;
 
       delete [] contents;
 
     // ----------------
     // number: push value onto stack
     // ----------------
 
     } else if (isdigit(onechar) || onechar == '.' || onechar == '-') {
       if (expect == OP)
         error->all(FLERR,"Invalid Boolean syntax in if command");
       expect = OP;
 
       // set I to end of number, including scientific notation
 
       int istart = i++;
       while (isdigit(str[i]) || str[i] == '.') i++;
       if (str[i] == 'e' || str[i] == 'E') {
         i++;
         if (str[i] == '+' || str[i] == '-') i++;
         while (isdigit(str[i])) i++;
       }
 
       onechar = str[i];
       str[i] = '\0';
       argstack[nargstack].value = atof(&str[istart]);
       str[i] = onechar;
 
       argstack[nargstack++].flag = 0;
 
     // ----------------
     // string: push string onto stack
     // ----------------
 
     } else if (isalpha(onechar)) {
       if (expect == OP)
         error->all(FLERR,"Invalid Boolean syntax in if command");
       expect = OP;
 
       // set I to end of string
 
       int istart = i++;
       while (isalnum(str[i]) || str[i] == '_') i++;
 
       int n = i - istart + 1;
       argstack[nargstack].str = new char[n];
       onechar = str[i];
       str[i] = '\0';
       strcpy(argstack[nargstack].str,&str[istart]);
       str[i] = onechar;
 
       argstack[nargstack++].flag = 1;
 
     // ----------------
     // Boolean operator, including end-of-string
     // ----------------
 
     } else if (strchr("<>=!&|\0",onechar)) {
       if (onechar == '=') {
         if (str[i+1] != '=')
           error->all(FLERR,"Invalid Boolean syntax in if command");
         op = EQ;
         i++;
       } else if (onechar == '!') {
         if (str[i+1] == '=') {
           op = NE;
           i++;
         } else op = NOT;
       } else if (onechar == '<') {
         if (str[i+1] != '=') op = LT;
         else {
           op = LE;
           i++;
         }
       } else if (onechar == '>') {
         if (str[i+1] != '=') op = GT;
         else {
           op = GE;
           i++;
         }
       } else if (onechar == '&') {
         if (str[i+1] != '&')
           error->all(FLERR,"Invalid Boolean syntax in if command");
         op = AND;
         i++;
       } else if (onechar == '|') {
         if (str[i+1] == '|') op = OR;
         else if (str[i+1] == '^') op = XOR;
         else error->all(FLERR,"Invalid Boolean syntax in if command");
         i++;
       } else op = DONE;
 
       i++;
 
       if (op == NOT && expect == ARG) {
         opstack[nopstack++] = op;
         continue;
       }
 
       if (expect == ARG)
         error->all(FLERR,"Invalid Boolean syntax in if command");
       expect = ARG;
 
       // evaluate stack as deep as possible while respecting precedence
       // before pushing current op onto stack
 
       while (nopstack && precedence[opstack[nopstack-1]] >= precedence[op]) {
         opprevious = opstack[--nopstack];
 
         nargstack--;
         flag2 = argstack[nargstack].flag;
         value2 = argstack[nargstack].value;
         str2 = argstack[nargstack].str;
         if (opprevious != NOT) {
           nargstack--;
           flag1 = argstack[nargstack].flag;
           value1 = argstack[nargstack].value;
           str1 = argstack[nargstack].str;
         }
 
         if (opprevious == NOT) {
           if (flag2) error->all(FLERR,"Invalid Boolean syntax in if command");
           if (value2 == 0.0) argstack[nargstack].value = 1.0;
           else argstack[nargstack].value = 0.0;
         } else if (opprevious == EQ) {
           if (flag1 != flag2)
             error->all(FLERR,"Invalid Boolean syntax in if command");
           if (flag2 == 0) {
             if (value1 == value2) argstack[nargstack].value = 1.0;
             else argstack[nargstack].value = 0.0;
           } else {
             if (strcmp(str1,str2) == 0) argstack[nargstack].value = 1.0;
             else argstack[nargstack].value = 0.0;
             delete [] str1;
             delete [] str2;
           }
         } else if (opprevious == NE) {
           if (flag1 != flag2)
             error->all(FLERR,"Invalid Boolean syntax in if command");
           if (flag2 == 0) {
             if (value1 != value2) argstack[nargstack].value = 1.0;
             else argstack[nargstack].value = 0.0;
           } else {
             if (strcmp(str1,str2) != 0) argstack[nargstack].value = 1.0;
             else argstack[nargstack].value = 0.0;
             delete [] str1;
             delete [] str2;
           }
         } else if (opprevious == LT) {
           if (flag2) error->all(FLERR,"Invalid Boolean syntax in if command");
           if (value1 < value2) argstack[nargstack].value = 1.0;
           else argstack[nargstack].value = 0.0;
         } else if (opprevious == LE) {
           if (flag2) error->all(FLERR,"Invalid Boolean syntax in if command");
           if (value1 <= value2) argstack[nargstack].value = 1.0;
           else argstack[nargstack].value = 0.0;
         } else if (opprevious == GT) {
           if (flag2) error->all(FLERR,"Invalid Boolean syntax in if command");
           if (value1 > value2) argstack[nargstack].value = 1.0;
           else argstack[nargstack].value = 0.0;
         } else if (opprevious == GE) {
           if (flag2) error->all(FLERR,"Invalid Boolean syntax in if command");
           if (value1 >= value2) argstack[nargstack].value = 1.0;
           else argstack[nargstack].value = 0.0;
         } else if (opprevious == AND) {
           if (flag2) error->all(FLERR,"Invalid Boolean syntax in if command");
           if (value1 != 0.0 && value2 != 0.0) argstack[nargstack].value = 1.0;
           else argstack[nargstack].value = 0.0;
         } else if (opprevious == OR) {
           if (flag2) error->all(FLERR,"Invalid Boolean syntax in if command");
           if (value1 != 0.0 || value2 != 0.0) argstack[nargstack].value = 1.0;
           else argstack[nargstack].value = 0.0;
         } else if (opprevious == XOR) {
           if (flag2) error->all(FLERR,"Invalid Boolean syntax in if command");
           if ((value1 == 0.0 && value2 != 0.0) ||
               (value1 != 0.0 && value2 == 0.0))
             argstack[nargstack].value = 1.0;
           else argstack[nargstack].value = 0.0;
         }
 
         argstack[nargstack++].flag = 0;
       }
 
       // if end-of-string, break out of entire formula evaluation loop
 
       if (op == DONE) break;
 
       // push current operation onto stack
 
       opstack[nopstack++] = op;
 
     } else error->all(FLERR,"Invalid Boolean syntax in if command");
   }
 
   if (nopstack) error->all(FLERR,"Invalid Boolean syntax in if command");
   if (nargstack != 1) error->all(FLERR,"Invalid Boolean syntax in if command");
   return argstack[0].value;
 }
 
 /* ----------------------------------------------------------------------
    class to read variable values from a file
    for flag = SCALARFILE, reads one value per line
    for flag = ATOMFILE, reads set of one value per atom
 ------------------------------------------------------------------------- */
 
 VarReader::VarReader(LAMMPS *lmp, char *name, char *file, int flag) :
   Pointers(lmp)
 {
   me = comm->me;
   style = flag;
   fp = NULL;
 
   if (me == 0) {
     fp = fopen(file,"r");
     if (fp == NULL) {
       char str[128];
       sprintf(str,"Cannot open file variable file %s",file);
       error->one(FLERR,str);
     }
   }
 
   // if atomfile-style variable, must store per-atom values read from file
   // allocate a new fix STORE, so they persist
   // id = variable-ID + VARIABLE_STORE, fix group = all
 
   fixstore = NULL;
   id_fix = NULL;
   buffer = NULL;
 
   if (style == ATOMFILE) {
     if (atom->map_style == 0)
       error->all(FLERR,
                  "Cannot use atomfile-style variable unless atom map exists");
 
     int n = strlen(name) + strlen("_VARIABLE_STORE") + 1;
     id_fix = new char[n];
     strcpy(id_fix,name);
     strcat(id_fix,"_VARIABLE_STORE");
 
     char **newarg = new char*[6];
     newarg[0] = id_fix;
     newarg[1] = (char *) "all";
     newarg[2] = (char *) "STORE";
     newarg[3] = (char *) "peratom";
     newarg[4] = (char *) "0";
     newarg[5] = (char *) "1";
     modify->add_fix(6,newarg);
     fixstore = (FixStore *) modify->fix[modify->nfix-1];
     delete [] newarg;
 
     buffer = new char[CHUNK*MAXLINE];
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 VarReader::~VarReader()
 {
   if (me == 0) {
     fclose(fp);
     fp = NULL;
   }
 
   // check modify in case all fixes have already been deleted
 
   if (fixstore) {
     if (modify) modify->delete_fix(id_fix);
     delete [] id_fix;
     delete [] buffer;
   }
 }
 
 /* ----------------------------------------------------------------------
    read for SCALARFILE style
    read next value from file into str for file-style variable
    strip comments, skip blank lines
    return 0 if successful, 1 if end-of-file
 ------------------------------------------------------------------------- */
 
 int VarReader::read_scalar(char *str)
 {
   int n;
   char *ptr;
 
   // read one string from file
 
   if (me == 0) {
     while (1) {
       if (fgets(str,MAXLINE,fp) == NULL) n = 0;
       else n = strlen(str);
       if (n == 0) break;                                 // end of file
       str[n-1] = '\0';                                   // strip newline
       if ((ptr = strchr(str,'#'))) *ptr = '\0';          // strip comment
       if (strtok(str," \t\n\r\f") == NULL) continue;     // skip if blank
       n = strlen(str) + 1;
       break;
     }
   }
 
   MPI_Bcast(&n,1,MPI_INT,0,world);
   if (n == 0) return 1;
   MPI_Bcast(str,n,MPI_CHAR,0,world);
   return 0;
 }
 
 /* ----------------------------------------------------------------------
    read snapshot of per-atom values from file
    into str for atomfile-style variable
    return 0 if successful, 1 if end-of-file
 ------------------------------------------------------------------------- */
 
 int VarReader::read_peratom()
 {
   int i,m,n,nchunk,eof;
   tagint tag;
   char *ptr,*next;
   double value;
 
   // set all per-atom values to 0.0
   // values that appear in file will overwrite this
 
   double *vstore = fixstore->vstore;
 
   int nlocal = atom->nlocal;
   for (i = 0; i < nlocal; i++) vstore[i] = 0.0;
 
   // read one string from file, convert to Nlines
 
   char str[MAXLINE];
   if (me == 0) {
     while (1) {
       if (fgets(str,MAXLINE,fp) == NULL) n = 0;
       else n = strlen(str);
       if (n == 0) break;                                 // end of file
       str[n-1] = '\0';                                   // strip newline
       if ((ptr = strchr(str,'#'))) *ptr = '\0';          // strip comment
       if (strtok(str," \t\n\r\f") == NULL) continue;     // skip if blank
       n = strlen(str) + 1;
       break;
     }
   }
 
   MPI_Bcast(&n,1,MPI_INT,0,world);
   if (n == 0) return 1;
 
   MPI_Bcast(str,n,MPI_CHAR,0,world);
   bigint nlines = force->bnumeric(FLERR,str);
   tagint map_tag_max = atom->map_tag_max;
 
   bigint nread = 0;
   while (nread < nlines) {
     nchunk = MIN(nlines-nread,CHUNK);
     eof = comm->read_lines_from_file(fp,nchunk,MAXLINE,buffer);
     if (eof) return 1;
 
     char *buf = buffer;
     for (i = 0; i < nchunk; i++) {
       next = strchr(buf,'\n');
       *next = '\0';
       sscanf(buf,TAGINT_FORMAT " %lg",&tag,&value);
       if (tag <= 0 || tag > map_tag_max)
         error->one(FLERR,"Invalid atom ID in variable file");
       if ((m = atom->map(tag)) >= 0) vstore[m] = value;
       buf = next + 1;
     }
 
     nread += nchunk;
   }
 
   return 0;
 }