diff --git a/doc/src/Section_howto.txt b/doc/src/Section_howto.txt
index aedc2fc64..e83f5399c 100644
--- a/doc/src/Section_howto.txt
+++ b/doc/src/Section_howto.txt
@@ -1,2938 +1,2945 @@
 "Previous Section"_Section_accelerate.html - "LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc - "Next Section"_Section_example.html :c
 
 :link(lws,http://lammps.sandia.gov)
 :link(ld,Manual.html)
 :link(lc,Section_commands.html#comm)
 
 :line
 
 6. How-to discussions :h3
 
 This section describes how to perform common tasks using LAMMPS.
 
 6.1 "Restarting a simulation"_#howto_1
 6.2 "2d simulations"_#howto_2
 6.3 "CHARMM, AMBER, and DREIDING force fields"_#howto_3
 6.4 "Running multiple simulations from one input script"_#howto_4
 6.5 "Multi-replica simulations"_#howto_5
 6.6 "Granular models"_#howto_6
 6.7 "TIP3P water model"_#howto_7
 6.8 "TIP4P water model"_#howto_8
 6.9 "SPC water model"_#howto_9
 6.10 "Coupling LAMMPS to other codes"_#howto_10
 6.11 "Visualizing LAMMPS snapshots"_#howto_11
 6.12 "Triclinic (non-orthogonal) simulation boxes"_#howto_12
 6.13 "NEMD simulations"_#howto_13
 6.14 "Finite-size spherical and aspherical particles"_#howto_14
 6.15 "Output from LAMMPS (thermo, dumps, computes, fixes, variables)"_#howto_15
 6.16 "Thermostatting, barostatting and computing temperature"_#howto_16
 6.17 "Walls"_#howto_17
 6.18 "Elastic constants"_#howto_18
 6.19 "Library interface to LAMMPS"_#howto_19
 6.20 "Calculating thermal conductivity"_#howto_20
 6.21 "Calculating viscosity"_#howto_21
 6.22 "Calculating a diffusion coefficient"_#howto_22
 6.23 "Using chunks to calculate system properties"_#howto_23
 6.24 "Setting parameters for the kspace_style pppm/disp command"_#howto_24
 6.25 "Polarizable models"_#howto_25
 6.26 "Adiabatic core/shell model"_#howto_26
 6.27 "Drude induced dipoles"_#howto_27 :all(b)
 
 The example input scripts included in the LAMMPS distribution and
 highlighted in "Section 7"_Section_example.html also show how to
 setup and run various kinds of simulations.
 
 :line
 :line
 
 6.1 Restarting a simulation :link(howto_1),h4
 
 There are 3 ways to continue a long LAMMPS simulation.  Multiple
 "run"_run.html commands can be used in the same input script.  Each
 run will continue from where the previous run left off.  Or binary
 restart files can be saved to disk using the "restart"_restart.html
 command.  At a later time, these binary files can be read via a
 "read_restart"_read_restart.html command in a new script.  Or they can
 be converted to text data files using the "-r command-line
 switch"_Section_start.html#start_7 and read by a
 "read_data"_read_data.html command in a new script.
 
 Here we give examples of 2 scripts that read either a binary restart
 file or a converted data file and then issue a new run command to
 continue where the previous run left off.  They illustrate what
 settings must be made in the new script.  Details are discussed in the
 documentation for the "read_restart"_read_restart.html and
 "read_data"_read_data.html commands.
 
 Look at the {in.chain} input script provided in the {bench} directory
 of the LAMMPS distribution to see the original script that these 2
 scripts are based on.  If that script had the line
 
 restart         50 tmp.restart :pre
 
 added to it, it would produce 2 binary restart files (tmp.restart.50
 and tmp.restart.100) as it ran.
 
 This script could be used to read the 1st restart file and re-run the
 last 50 timesteps:
 
 read_restart    tmp.restart.50 :pre
 
 neighbor        0.4 bin
 neigh_modify    every 1 delay 1 :pre
 
 fix             1 all nve
 fix             2 all langevin 1.0 1.0 10.0 904297 :pre
 
 timestep        0.012 :pre
 
 run             50 :pre
 
 Note that the following commands do not need to be repeated because
 their settings are included in the restart file: {units, atom_style,
 special_bonds, pair_style, bond_style}.  However these commands do
 need to be used, since their settings are not in the restart file:
 {neighbor, fix, timestep}.
 
 If you actually use this script to perform a restarted run, you will
 notice that the thermodynamic data match at step 50 (if you also put a
 "thermo 50" command in the original script), but do not match at step
 100.  This is because the "fix langevin"_fix_langevin.html command
 uses random numbers in a way that does not allow for perfect restarts.
 
 As an alternate approach, the restart file could be converted to a data
 file as follows:
 
 lmp_g++ -r tmp.restart.50 tmp.restart.data :pre
 
 Then, this script could be used to re-run the last 50 steps:
 
 units           lj
 atom_style      bond
 pair_style      lj/cut 1.12
 pair_modify     shift yes
 bond_style      fene
 special_bonds   0.0 1.0 1.0 :pre
 
 read_data       tmp.restart.data :pre
 
 neighbor        0.4 bin
 neigh_modify    every 1 delay 1 :pre
 
 fix             1 all nve
 fix             2 all langevin 1.0 1.0 10.0 904297 :pre
 
 timestep        0.012 :pre
 
 reset_timestep  50
 run             50 :pre
 
 Note that nearly all the settings specified in the original {in.chain}
 script must be repeated, except the {pair_coeff} and {bond_coeff}
 commands since the new data file lists the force field coefficients.
 Also, the "reset_timestep"_reset_timestep.html command is used to tell
 LAMMPS the current timestep.  This value is stored in restart files,
 but not in data files.
 
 :line
 
 6.2 2d simulations :link(howto_2),h4
 
 Use the "dimension"_dimension.html command to specify a 2d simulation.
 
 Make the simulation box periodic in z via the "boundary"_boundary.html
 command.  This is the default.
 
 If using the "create box"_create_box.html command to define a
 simulation box, set the z dimensions narrow, but finite, so that the
 create_atoms command will tile the 3d simulation box with a single z
 plane of atoms - e.g.
 
 "create box"_create_box.html 1 -10 10 -10 10 -0.25 0.25 :pre
 
 If using the "read data"_read_data.html command to read in a file of
 atom coordinates, set the "zlo zhi" values to be finite but narrow,
 similar to the create_box command settings just described.  For each
 atom in the file, assign a z coordinate so it falls inside the
 z-boundaries of the box - e.g. 0.0.
 
 Use the "fix enforce2d"_fix_enforce2d.html command as the last
 defined fix to insure that the z-components of velocities and forces
 are zeroed out every timestep.  The reason to make it the last fix is
 so that any forces induced by other fixes will be zeroed out.
 
 Many of the example input scripts included in the LAMMPS distribution
 are for 2d models.
 
 NOTE: Some models in LAMMPS treat particles as finite-size spheres, as
 opposed to point particles.  See the "atom_style
 sphere"_atom_style.html and "fix nve/sphere"_fix_nve_sphere.html
 commands for details.  By default, for 2d simulations, such particles
 will still be modeled as 3d spheres, not 2d discs (circles), meaning
 their moment of inertia will be that of a sphere.  If you wish to
 model them as 2d discs, see the "set density/disc"_set.html command
 and the {disc} option for the "fix nve/sphere"_fix_nve_sphere.html,
 "fix nvt/sphere"_fix_nvt_sphere.html, "fix
 nph/sphere"_fix_nph_sphere.html, "fix npt/sphere"_fix_npt_sphere.html
 commands.
 
 :line
 
 6.3 CHARMM, AMBER, and DREIDING force fields :link(howto_3),h4
 
 A force field has 2 parts: the formulas that define it and the
 coefficients used for a particular system.  Here we only discuss
 formulas implemented in LAMMPS that correspond to formulas commonly
 used in the CHARMM, AMBER, and DREIDING force fields.  Setting
 coefficients is done in the input data file via the
 "read_data"_read_data.html command or in the input script with
 commands like "pair_coeff"_pair_coeff.html or
 "bond_coeff"_bond_coeff.html.  See "Section 9"_Section_tools.html
 for additional tools that can use CHARMM or AMBER to assign force
 field coefficients and convert their output into LAMMPS input.
 
 See "(MacKerell)"_#howto-MacKerell for a description of the CHARMM force
 field.  See "(Cornell)"_#howto-Cornell for a description of the AMBER force
 field.
 
 :link(charmm,http://www.scripps.edu/brooks)
 :link(amber,http://amber.scripps.edu)
 
 These style choices compute force field formulas that are consistent
 with common options in CHARMM or AMBER.  See each command's
 documentation for the formula it computes.
 
 "bond_style"_bond_harmonic.html harmonic
 "angle_style"_angle_charmm.html charmm
 "dihedral_style"_dihedral_charmm.html charmm
 "pair_style"_pair_charmm.html lj/charmm/coul/charmm
 "pair_style"_pair_charmm.html lj/charmm/coul/charmm/implicit
 "pair_style"_pair_charmm.html lj/charmm/coul/long :ul
 
 "special_bonds"_special_bonds.html charmm
 "special_bonds"_special_bonds.html amber :ul
 
 DREIDING is a generic force field developed by the "Goddard
 group"_http://www.wag.caltech.edu at Caltech and is useful for
 predicting structures and dynamics of organic, biological and
 main-group inorganic molecules. The philosophy in DREIDING is to use
 general force constants and geometry parameters based on simple
 hybridization considerations, rather than individual force constants
 and geometric parameters that depend on the particular combinations of
 atoms involved in the bond, angle, or torsion terms. DREIDING has an
 "explicit hydrogen bond term"_pair_hbond_dreiding.html to describe
 interactions involving a hydrogen atom on very electronegative atoms
 (N, O, F).
 
 See "(Mayo)"_#howto-Mayo for a description of the DREIDING force field
 
 These style choices compute force field formulas that are consistent
 with the DREIDING force field.  See each command's
 documentation for the formula it computes.
 
 "bond_style"_bond_harmonic.html harmonic
 "bond_style"_bond_morse.html morse :ul
 
 "angle_style"_angle_harmonic.html harmonic
 "angle_style"_angle_cosine.html cosine
 "angle_style"_angle_cosine_periodic.html cosine/periodic :ul
 
 "dihedral_style"_dihedral_charmm.html charmm
 "improper_style"_improper_umbrella.html umbrella :ul
 
 "pair_style"_pair_buck.html buck
 "pair_style"_pair_buck.html buck/coul/cut
 "pair_style"_pair_buck.html buck/coul/long
 "pair_style"_pair_lj.html lj/cut
 "pair_style"_pair_lj.html lj/cut/coul/cut
 "pair_style"_pair_lj.html lj/cut/coul/long :ul
 
 "pair_style"_pair_hbond_dreiding.html hbond/dreiding/lj
 "pair_style"_pair_hbond_dreiding.html hbond/dreiding/morse :ul
 
 "special_bonds"_special_bonds.html dreiding :ul
 
 :line
 
 6.4 Running multiple simulations from one input script :link(howto_4),h4
 
 This can be done in several ways.  See the documentation for
 individual commands for more details on how these examples work.
 
 If "multiple simulations" means continue a previous simulation for
 more timesteps, then you simply use the "run"_run.html command
 multiple times.  For example, this script
 
 units lj
 atom_style atomic
 read_data data.lj
 run 10000
 run 10000
 run 10000
 run 10000
 run 10000 :pre
 
 would run 5 successive simulations of the same system for a total of
 50,000 timesteps.
 
 If you wish to run totally different simulations, one after the other,
 the "clear"_clear.html command can be used in between them to
 re-initialize LAMMPS.  For example, this script
 
 units lj
 atom_style atomic
 read_data data.lj
 run 10000
 clear
 units lj
 atom_style atomic
 read_data data.lj.new
 run 10000 :pre
 
 would run 2 independent simulations, one after the other.
 
 For large numbers of independent simulations, you can use
 "variables"_variable.html and the "next"_next.html and
 "jump"_jump.html commands to loop over the same input script
 multiple times with different settings.  For example, this
 script, named in.polymer
 
 variable d index run1 run2 run3 run4 run5 run6 run7 run8
 shell cd $d
 read_data data.polymer
 run 10000
 shell cd ..
 clear
 next d
 jump in.polymer :pre
 
 would run 8 simulations in different directories, using a data.polymer
 file in each directory.  The same concept could be used to run the
 same system at 8 different temperatures, using a temperature variable
 and storing the output in different log and dump files, for example
 
 variable a loop 8
 variable t index 0.8 0.85 0.9 0.95 1.0 1.05 1.1 1.15
 log log.$a
 read data.polymer
 velocity all create $t 352839
 fix 1 all nvt $t $t 100.0
 dump 1 all atom 1000 dump.$a
 run 100000
 clear
 next t
 next a
 jump in.polymer :pre
 
 All of the above examples work whether you are running on 1 or
 multiple processors, but assumed you are running LAMMPS on a single
 partition of processors.  LAMMPS can be run on multiple partitions via
 the "-partition" command-line switch as described in "this
 section"_Section_start.html#start_7 of the manual.
 
 In the last 2 examples, if LAMMPS were run on 3 partitions, the same
 scripts could be used if the "index" and "loop" variables were
 replaced with {universe}-style variables, as described in the
 "variable"_variable.html command.  Also, the "next t" and "next a"
 commands would need to be replaced with a single "next a t" command.
 With these modifications, the 8 simulations of each script would run
 on the 3 partitions one after the other until all were finished.
 Initially, 3 simulations would be started simultaneously, one on each
 partition.  When one finished, that partition would then start
 the 4th simulation, and so forth, until all 8 were completed.
 
 :line
 
 6.5 Multi-replica simulations :link(howto_5),h4
 
 Several commands in LAMMPS run mutli-replica simulations, meaning
 that multiple instances (replicas) of your simulation are run
 simultaneously, with small amounts of data exchanged between replicas
 periodically.
 
 These are the relevant commands:
 
 "neb"_neb.html for nudged elastic band calculations
 "prd"_prd.html for parallel replica dynamics
 "tad"_tad.html for temperature accelerated dynamics
 "temper"_temper.html for parallel tempering
 "fix pimd"_fix_pimd.html for path-integral molecular dynamics (PIMD) :ul
 
 NEB is a method for finding transition states and barrier energies.
 PRD and TAD are methods for performing accelerated dynamics to find
 and perform infrequent events.  Parallel tempering or replica exchange
 runs different replicas at a series of temperature to facilitate
 rare-event sampling.
 
 These commands can only be used if LAMMPS was built with the REPLICA
 package.  See the "Making LAMMPS"_Section_start.html#start_3 section
 for more info on packages.
 
 PIMD runs different replicas whose individual particles are coupled
 together by springs to model a system or ring-polymers.
 
 This commands can only be used if LAMMPS was built with the USER-MISC
 package.  See the "Making LAMMPS"_Section_start.html#start_3 section
 for more info on packages.
 
 In all these cases, you must run with one or more processors per
 replica.  The processors assigned to each replica are determined at
 run-time by using the "-partition command-line
 switch"_Section_start.html#start_7 to launch LAMMPS on multiple
 partitions, which in this context are the same as replicas.  E.g.
 these commands:
 
 mpirun -np 16 lmp_linux -partition 8x2 -in in.temper
 mpirun -np 8 lmp_linux -partition 8x1 -in in.neb :pre
 
 would each run 8 replicas, on either 16 or 8 processors.  Note the use
 of the "-in command-line switch"_Section_start.html#start_7 to specify
 the input script which is required when running in multi-replica mode.
 
 Also note that with MPI installed on a machine (e.g. your desktop),
 you can run on more (virtual) processors than you have physical
 processors.  Thus the above commands could be run on a
 single-processor (or few-processor) desktop so that you can run
 a multi-replica simulation on more replicas than you have
 physical processors.
 
 :line
 
 6.6 Granular models :link(howto_6),h4
 
 Granular system are composed of spherical particles with a diameter,
 as opposed to point particles.  This means they have an angular
 velocity and torque can be imparted to them to cause them to rotate.
 
 To run a simulation of a granular model, you will want to use
 the following commands:
 
 "atom_style sphere"_atom_style.html
 "fix nve/sphere"_fix_nve_sphere.html
 "fix gravity"_fix_gravity.html :ul
 
 This compute
 
 "compute erotate/sphere"_compute_erotate_sphere.html :ul
 
 calculates rotational kinetic energy which can be "output with
 thermodynamic info"_Section_howto.html#howto_15.
 
 Use one of these 3 pair potentials, which compute forces and torques
 between interacting pairs of particles:
 
 "pair_style"_pair_style.html gran/history
 "pair_style"_pair_style.html gran/no_history
 "pair_style"_pair_style.html gran/hertzian :ul
 
 These commands implement fix options specific to granular systems:
 
 "fix freeze"_fix_freeze.html
 "fix pour"_fix_pour.html
 "fix viscous"_fix_viscous.html
 "fix wall/gran"_fix_wall_gran.html :ul
 
 The fix style {freeze} zeroes both the force and torque of frozen
 atoms, and should be used for granular system instead of the fix style
 {setforce}.
 
 For computational efficiency, you can eliminate needless pairwise
 computations between frozen atoms by using this command:
 
 "neigh_modify"_neigh_modify.html exclude :ul
 
 NOTE: By default, for 2d systems, granular particles are still modeled
 as 3d spheres, not 2d discs (circles), meaning their moment of inertia
 will be the same as in 3d.  If you wish to model granular particles in
 2d as 2d discs, see the note on this topic in "Section
 6.2"_Section_howto.html#howto_2, where 2d simulations are disussed.
 
 :line
 
 6.7 TIP3P water model :link(howto_7),h4
 
 The TIP3P water model as implemented in CHARMM
 "(MacKerell)"_#howto-MacKerell specifies a 3-site rigid water molecule with
 charges and Lennard-Jones parameters assigned to each of the 3 atoms.
 In LAMMPS the "fix shake"_fix_shake.html command can be used to hold
 the two O-H bonds and the H-O-H angle rigid.  A bond style of
 {harmonic} and an angle style of {harmonic} or {charmm} should also be
 used.
 
 These are the additional parameters (in real units) to set for O and H
 atoms and the water molecule to run a rigid TIP3P-CHARMM model with a
 cutoff.  The K values can be used if a flexible TIP3P model (without
 fix shake) is desired.  If the LJ epsilon and sigma for HH and OH are
 set to 0.0, it corresponds to the original 1983 TIP3P model
 "(Jorgensen)"_#Jorgensen1.
 
 O mass = 15.9994
 H mass = 1.008
 O charge = -0.834
 H charge = 0.417
 LJ epsilon of OO = 0.1521
 LJ sigma of OO = 3.1507
 LJ epsilon of HH = 0.0460
 LJ sigma of HH = 0.4000
 LJ epsilon of OH = 0.0836
 LJ sigma of OH = 1.7753
 K of OH bond = 450
 r0 of OH bond = 0.9572
 K of HOH angle = 55
 theta of HOH angle = 104.52 :all(b),p
 
 These are the parameters to use for TIP3P with a long-range Coulombic
 solver (e.g. Ewald or PPPM in LAMMPS), see "(Price)"_#Price1 for
 details:
 
 O mass = 15.9994
 H mass = 1.008
 O charge = -0.830
 H charge = 0.415
 LJ epsilon of OO = 0.102
 LJ sigma of OO = 3.188
 LJ epsilon, sigma of OH, HH = 0.0
 K of OH bond = 450
 r0 of OH bond = 0.9572
 K of HOH angle = 55
 theta of HOH angle = 104.52 :all(b),p
 
 Wikipedia also has a nice article on "water
 models"_http://en.wikipedia.org/wiki/Water_model.
 
 :line
 
 6.8 TIP4P water model :link(howto_8),h4
 
 The four-point TIP4P rigid water model extends the traditional
 three-point TIP3P model by adding an additional site, usually
 massless, where the charge associated with the oxygen atom is placed.
 This site M is located at a fixed distance away from the oxygen along
 the bisector of the HOH bond angle.  A bond style of {harmonic} and an
 angle style of {harmonic} or {charmm} should also be used.
 
 A TIP4P model is run with LAMMPS using either this command
 for a cutoff model:
 
 "pair_style lj/cut/tip4p/cut"_pair_lj.html
 
 or these two commands for a long-range model:
 
 "pair_style lj/cut/tip4p/long"_pair_lj.html
 "kspace_style pppm/tip4p"_kspace_style.html :ul
 
 For both models, the bond lengths and bond angles should be held fixed
 using the "fix shake"_fix_shake.html command.
 
 These are the additional parameters (in real units) to set for O and H
 atoms and the water molecule to run a rigid TIP4P model with a cutoff
 "(Jorgensen)"_#Jorgensen1.  Note that the OM distance is specified in
 the "pair_style"_pair_style.html command, not as part of the pair
 coefficients.
 
 O mass = 15.9994
 H mass = 1.008
 O charge = -1.040
 H charge = 0.520
 r0 of OH bond = 0.9572
 theta of HOH angle = 104.52
 OM distance = 0.15
 LJ epsilon of O-O = 0.1550
 LJ sigma of O-O = 3.1536
 LJ epsilon, sigma of OH, HH = 0.0
 Coulombic cutoff = 8.5 :all(b),p
 
 For the TIP4/Ice model (J Chem Phys, 122, 234511 (2005);
 http://dx.doi.org/10.1063/1.1931662) these values can be used:
 
 O mass = 15.9994
 H mass =  1.008
 O charge = -1.1794
 H charge =  0.5897
 r0 of OH bond = 0.9572
 theta of HOH angle = 104.52
 OM distance = 0.1577
 LJ epsilon of O-O = 0.21084
 LJ sigma of O-O = 3.1668
 LJ epsilon, sigma of OH, HH = 0.0
 Coulombic cutoff = 8.5 :all(b),p
 
 For the TIP4P/2005 model (J Chem Phys, 123, 234505 (2005);
 http://dx.doi.org/10.1063/1.2121687), these values can be used:
 
 O mass = 15.9994
 H mass =  1.008
 O charge = -1.1128
 H charge = 0.5564
 r0 of OH bond = 0.9572
 theta of HOH angle = 104.52
 OM distance = 0.1546
 LJ epsilon of O-O = 0.1852
 LJ sigma of O-O = 3.1589
 LJ epsilon, sigma of OH, HH = 0.0
 Coulombic cutoff = 8.5 :all(b),p
 
 These are the parameters to use for TIP4P with a long-range Coulombic
 solver (e.g. Ewald or PPPM in LAMMPS):
 
 O mass = 15.9994
 H mass = 1.008
 O charge = -1.0484
 H charge = 0.5242
 r0 of OH bond = 0.9572
 theta of HOH angle = 104.52
 OM distance = 0.1250
 LJ epsilon of O-O = 0.16275
 LJ sigma of O-O = 3.16435
 LJ epsilon, sigma of OH, HH = 0.0 :all(b),p
 
 Note that the when using the TIP4P pair style, the neighbor list
 cutoff for Coulomb interactions is effectively extended by a distance
 2 * (OM distance), to account for the offset distance of the
 fictitious charges on O atoms in water molecules.  Thus it is
 typically best in an efficiency sense to use a LJ cutoff >= Coulomb
 cutoff + 2*(OM distance), to shrink the size of the neighbor list.
 This leads to slightly larger cost for the long-range calculation, so
 you can test the trade-off for your model.  The OM distance and the LJ
 and Coulombic cutoffs are set in the "pair_style
 lj/cut/tip4p/long"_pair_lj.html command.
 
 Wikipedia also has a nice article on "water
 models"_http://en.wikipedia.org/wiki/Water_model.
 
 :line
 
 6.9 SPC water model :link(howto_9),h4
 
 The SPC water model specifies a 3-site rigid water molecule with
 charges and Lennard-Jones parameters assigned to each of the 3 atoms.
 In LAMMPS the "fix shake"_fix_shake.html command can be used to hold
 the two O-H bonds and the H-O-H angle rigid.  A bond style of
 {harmonic} and an angle style of {harmonic} or {charmm} should also be
 used.
 
 These are the additional parameters (in real units) to set for O and H
 atoms and the water molecule to run a rigid SPC model.
 
 O mass = 15.9994
 H mass = 1.008
 O charge = -0.820
 H charge = 0.410
 LJ epsilon of OO = 0.1553
 LJ sigma of OO = 3.166
 LJ epsilon, sigma of OH, HH = 0.0
 r0 of OH bond = 1.0
 theta of HOH angle = 109.47 :all(b),p
 
 Note that as originally proposed, the SPC model was run with a 9
 Angstrom cutoff for both LJ and Coulommbic terms.  It can also be used
 with long-range Coulombics (Ewald or PPPM in LAMMPS), without changing
 any of the parameters above, though it becomes a different model in
 that mode of usage.
 
 The SPC/E (extended) water model is the same, except
 the partial charge assignments change:
 
 O charge = -0.8476
 H charge = 0.4238 :all(b),p
 
 See the "(Berendsen)"_#howto-Berendsen reference for more details on both
 the SPC and SPC/E models.
 
 Wikipedia also has a nice article on "water
 models"_http://en.wikipedia.org/wiki/Water_model.
 
 :line
 
 6.10 Coupling LAMMPS to other codes :link(howto_10),h4
 
 LAMMPS is designed to allow it to be coupled to other codes.  For
 example, a quantum mechanics code might compute forces on a subset of
 atoms and pass those forces to LAMMPS.  Or a continuum finite element
 (FE) simulation might use atom positions as boundary conditions on FE
 nodal points, compute a FE solution, and return interpolated forces on
 MD atoms.
 
 LAMMPS can be coupled to other codes in at least 3 ways.  Each has
 advantages and disadvantages, which you'll have to think about in the
 context of your application.
 
 (1) Define a new "fix"_fix.html command that calls the other code.  In
 this scenario, LAMMPS is the driver code.  During its timestepping,
 the fix is invoked, and can make library calls to the other code,
 which has been linked to LAMMPS as a library.  This is the way the
 "POEMS"_poems package that performs constrained rigid-body motion on
 groups of atoms is hooked to LAMMPS.  See the
 "fix poems"_fix_poems.html command for more details.  See "this
 section"_Section_modify.html of the documentation for info on how to add
 a new fix to LAMMPS.
 
 :link(poems,http://www.rpi.edu/~anderk5/lab)
 
 (2) Define a new LAMMPS command that calls the other code.  This is
 conceptually similar to method (1), but in this case LAMMPS and the
 other code are on a more equal footing.  Note that now the other code
 is not called during the timestepping of a LAMMPS run, but between
 runs.  The LAMMPS input script can be used to alternate LAMMPS runs
 with calls to the other code, invoked via the new command.  The
 "run"_run.html command facilitates this with its {every} option, which
 makes it easy to run a few steps, invoke the command, run a few steps,
 invoke the command, etc.
 
 In this scenario, the other code can be called as a library, as in
 (1), or it could be a stand-alone code, invoked by a system() call
 made by the command (assuming your parallel machine allows one or more
 processors to start up another program).  In the latter case the
 stand-alone code could communicate with LAMMPS thru files that the
 command writes and reads.
 
 See "Section 10"_Section_modify.html of the documentation for how
 to add a new command to LAMMPS.
 
 (3) Use LAMMPS as a library called by another code.  In this case the
 other code is the driver and calls LAMMPS as needed.  Or a wrapper
 code could link and call both LAMMPS and another code as libraries.
 Again, the "run"_run.html command has options that allow it to be
 invoked with minimal overhead (no setup or clean-up) if you wish to do
 multiple short runs, driven by another program.
 
 Examples of driver codes that call LAMMPS as a library are included in
 the examples/COUPLE directory of the LAMMPS distribution; see
 examples/COUPLE/README for more details:
 
 simple: simple driver programs in C++ and C which invoke LAMMPS as a
 library :ulb,l
 
 lammps_quest: coupling of LAMMPS and "Quest"_quest, to run classical
 MD with quantum forces calculated by a density functional code :l
 
 lammps_spparks: coupling of LAMMPS and "SPPARKS"_spparks, to couple
 a kinetic Monte Carlo model for grain growth using MD to calculate
 strain induced across grain boundaries :l
 :ule
 
 :link(quest,http://dft.sandia.gov/Quest)
 :link(spparks,http://www.sandia.gov/~sjplimp/spparks.html)
 
 "This section"_Section_start.html#start_5 of the documentation
 describes how to build LAMMPS as a library.  Once this is done, you
 can interface with LAMMPS either via C++, C, Fortran, or Python (or
 any other language that supports a vanilla C-like interface).  For
 example, from C++ you could create one (or more) "instances" of
 LAMMPS, pass it an input script to process, or execute individual
 commands, all by invoking the correct class methods in LAMMPS.  From C
 or Fortran you can make function calls to do the same things.  See
 "Section 11"_Section_python.html of the manual for a description
 of the Python wrapper provided with LAMMPS that operates through the
 LAMMPS library interface.
 
 The files src/library.cpp and library.h contain the C-style interface
 to LAMMPS.  See "Section 6.19"_Section_howto.html#howto_19 of the
 manual for a description of the interface and how to extend it for
 your needs.
 
 Note that the lammps_open() function that creates an instance of
 LAMMPS takes an MPI communicator as an argument.  This means that
 instance of LAMMPS will run on the set of processors in the
 communicator.  Thus the calling code can run LAMMPS on all or a subset
 of processors.  For example, a wrapper script might decide to
 alternate between LAMMPS and another code, allowing them both to run
 on all the processors.  Or it might allocate half the processors to
 LAMMPS and half to the other code and run both codes simultaneously
 before syncing them up periodically.  Or it might instantiate multiple
 instances of LAMMPS to perform different calculations.
 
 :line
 
 6.11 Visualizing LAMMPS snapshots :link(howto_11),h4
 
 LAMMPS itself does not do visualization, but snapshots from LAMMPS
 simulations can be visualized (and analyzed) in a variety of ways.
 
 LAMMPS snapshots are created by the "dump"_dump.html command which can
 create files in several formats.  The native LAMMPS dump format is a
 text file (see "dump atom" or "dump custom") which can be visualized
 by the "xmovie"_Section_tools.html#xmovie program, included with the
 LAMMPS package.  This produces simple, fast 2d projections of 3d
 systems, and can be useful for rapid debugging of simulation geometry
 and atom trajectories.
 
 Several programs included with LAMMPS as auxiliary tools can convert
 native LAMMPS dump files to other formats.  See the
 "Section 9"_Section_tools.html doc page for details.  The first is
 the "ch2lmp tool"_Section_tools.html#charmm, which contains a
 lammps2pdb Perl script which converts LAMMPS dump files into PDB
 files.  The second is the "lmp2arc tool"_Section_tools.html#arc which
 converts LAMMPS dump files into Accelrys' Insight MD program files.
 The third is the "lmp2cfg tool"_Section_tools.html#cfg which converts
 LAMMPS dump files into CFG files which can be read into the
 "AtomEye"_atomeye visualizer.
 
 A Python-based toolkit distributed by our group can read native LAMMPS
 dump files, including custom dump files with additional columns of
 user-specified atom information, and convert them to various formats
 or pipe them into visualization software directly.  See the "Pizza.py
 WWW site"_pizza for details.  Specifically, Pizza.py can convert
 LAMMPS dump files into PDB, XYZ, "Ensight"_ensight, and VTK formats.
 Pizza.py can pipe LAMMPS dump files directly into the Raster3d and
 RasMol visualization programs.  Pizza.py has tools that do interactive
 3d OpenGL visualization and one that creates SVG images of dump file
 snapshots.
 
 LAMMPS can create XYZ files directly (via "dump xyz") which is a
 simple text-based file format used by many visualization programs
 including "VMD"_vmd.
 
 LAMMPS can create DCD files directly (via "dump dcd") which can be
 read by "VMD"_vmd in conjunction with a CHARMM PSF file.  Using this
 form of output avoids the need to convert LAMMPS snapshots to PDB
 files.  See the "dump"_dump.html command for more information on DCD
 files.
 
 LAMMPS can create XTC files directly (via "dump xtc") which is GROMACS
 file format which can also be read by "VMD"_vmd for visualization.
 See the "dump"_dump.html command for more information on XTC files.
 
 :link(pizza,http://www.sandia.gov/~sjplimp/pizza.html)
 :link(vmd,http://www.ks.uiuc.edu/Research/vmd)
 :link(ensight,http://www.ensight.com)
 :link(atomeye,http://mt.seas.upenn.edu/Archive/Graphics/A)
 
 :line
 
 6.12 Triclinic (non-orthogonal) simulation boxes :link(howto_12),h4
 
 By default, LAMMPS uses an orthogonal simulation box to encompass the
 particles.  The "boundary"_boundary.html command sets the boundary
 conditions of the box (periodic, non-periodic, etc).  The orthogonal
 box has its "origin" at (xlo,ylo,zlo) and is defined by 3 edge vectors
 starting from the origin given by [a] = (xhi-xlo,0,0); [b] =
 (0,yhi-ylo,0); [c] = (0,0,zhi-zlo).  The 6 parameters
 (xlo,xhi,ylo,yhi,zlo,zhi) are defined at the time the simulation box
 is created, e.g. by the "create_box"_create_box.html or
 "read_data"_read_data.html or "read_restart"_read_restart.html
 commands.  Additionally, LAMMPS defines box size parameters lx,ly,lz
 where lx = xhi-xlo, and similarly in the y and z dimensions.  The 6
 parameters, as well as lx,ly,lz, can be output via the "thermo_style
 custom"_thermo_style.html command.
 
 LAMMPS also allows simulations to be performed in triclinic
 (non-orthogonal) simulation boxes shaped as a parallelepiped with
 triclinic symmetry.  The parallelepiped has its "origin" at
 (xlo,ylo,zlo) and is defined by 3 edge vectors starting from the
 origin given by [a] = (xhi-xlo,0,0); [b] = (xy,yhi-ylo,0); [c] =
 (xz,yz,zhi-zlo).  {xy,xz,yz} can be 0.0 or positive or negative values
 and are called "tilt factors" because they are the amount of
 displacement applied to faces of an originally orthogonal box to
 transform it into the parallelepiped.  In LAMMPS the triclinic
 simulation box edge vectors [a], [b], and [c] cannot be arbitrary
 vectors.  As indicated, [a] must lie on the positive x axis.  [b] must
 lie in the xy plane, with strictly positive y component. [c] may have
 any orientation with strictly positive z component.  The requirement
 that [a], [b], and [c] have strictly positive x, y, and z components,
 respectively, ensures that [a], [b], and [c] form a complete
 right-handed basis.  These restrictions impose no loss of generality,
 since it is possible to rotate/invert any set of 3 crystal basis
 vectors so that they conform to the restrictions.
 
 For example, assume that the 3 vectors [A],[B],[C] are the edge
 vectors of a general parallelepiped, where there is no restriction on
 [A],[B],[C] other than they form a complete right-handed basis i.e.
 [A] x [B] . [C] > 0.  The equivalent LAMMPS [a],[b],[c] are a linear
 rotation of [A], [B], and [C] and can be computed as follows:
 
 :c,image(Eqs/transform.jpg)
 
 where A = | [A] | indicates the scalar length of [A]. The hat symbol (^)
 indicates the corresponding unit vector. {beta} and {gamma} are angles
 between the vectors described below. Note that by construction,
 [a], [b], and [c] have strictly positive x, y, and z components, respectively.
 If it should happen that
 [A], [B], and [C] form a left-handed basis, then the above equations
 are not valid for [c]. In this case, it is necessary
 to first apply an inversion. This can be achieved
 by interchanging two basis vectors or by changing the sign of one of them.
 
 For consistency, the same rotation/inversion applied to the basis vectors
 must also be applied to atom positions, velocities,
 and any other vector quantities.
 This can be conveniently achieved by first converting to
 fractional coordinates in the
 old basis and then converting to distance coordinates in the new basis.
 The transformation is given by the following equation:
 
 :c,image(Eqs/rotate.jpg)
 
 where {V} is the volume of the box, [X] is the original vector quantity and
 [x] is the vector in the LAMMPS basis.
 
 There is no requirement that a triclinic box be periodic in any
 dimension, though it typically should be in at least the 2nd dimension
 of the tilt (y in xy) if you want to enforce a shift in periodic
 boundary conditions across that boundary.  Some commands that work
 with triclinic boxes, e.g. the "fix deform"_fix_deform.html and "fix
 npt"_fix_nh.html commands, require periodicity or non-shrink-wrap
 boundary conditions in specific dimensions.  See the command doc pages
 for details.
 
 The 9 parameters (xlo,xhi,ylo,yhi,zlo,zhi,xy,xz,yz) are defined at the
 time the simulation box is created.  This happens in one of 3 ways.
 If the "create_box"_create_box.html command is used with a region of
 style {prism}, then a triclinic box is setup.  See the
 "region"_region.html command for details.  If the
 "read_data"_read_data.html command is used to define the simulation
 box, and the header of the data file contains a line with the "xy xz
 yz" keyword, then a triclinic box is setup.  See the
 "read_data"_read_data.html command for details.  Finally, if the
 "read_restart"_read_restart.html command reads a restart file which
 was written from a simulation using a triclinic box, then a triclinic
 box will be setup for the restarted simulation.
 
 Note that you can define a triclinic box with all 3 tilt factors =
 0.0, so that it is initially orthogonal.  This is necessary if the box
 will become non-orthogonal, e.g. due to the "fix npt"_fix_nh.html or
 "fix deform"_fix_deform.html commands.  Alternatively, you can use the
 "change_box"_change_box.html command to convert a simulation box from
 orthogonal to triclinic and vice versa.
 
 As with orthogonal boxes, LAMMPS defines triclinic box size parameters
 lx,ly,lz where lx = xhi-xlo, and similarly in the y and z dimensions.
 The 9 parameters, as well as lx,ly,lz, can be output via the
 "thermo_style custom"_thermo_style.html command.
 
 To avoid extremely tilted boxes (which would be computationally
 inefficient), LAMMPS normally requires that no tilt factor can skew
 the box more than half the distance of the parallel box length, which
 is the 1st dimension in the tilt factor (x for xz).  This is required
 both when the simulation box is created, e.g. via the
 "create_box"_create_box.html or "read_data"_read_data.html commands,
 as well as when the box shape changes dynamically during a simulation,
 e.g. via the "fix deform"_fix_deform.html or "fix npt"_fix_nh.html
 commands.
 
 For example, if xlo = 2 and xhi = 12, then the x box length is 10 and
 the xy tilt factor must be between -5 and 5.  Similarly, both xz and
 yz must be between -(xhi-xlo)/2 and +(yhi-ylo)/2.  Note that this is
 not a limitation, since if the maximum tilt factor is 5 (as in this
 example), then configurations with tilt = ..., -15, -5, 5, 15, 25,
 ... are geometrically all equivalent.  If the box tilt exceeds this
 limit during a dynamics run (e.g. via the "fix deform"_fix_deform.html
 command), then the box is "flipped" to an equivalent shape with a tilt
 factor within the bounds, so the run can continue.  See the "fix
 deform"_fix_deform.html doc page for further details.
 
 One exception to this rule is if the 1st dimension in the tilt
 factor (x for xy) is non-periodic.  In that case, the limits on the
 tilt factor are not enforced, since flipping the box in that dimension
 does not change the atom positions due to non-periodicity.  In this
 mode, if you tilt the system to extreme angles, the simulation will
 simply become inefficient, due to the highly skewed simulation box.
 
 The limitation on not creating a simulation box with a tilt factor
 skewing the box more than half the distance of the parallel box length
 can be overridden via the "box"_box.html command.  Setting the {tilt}
 keyword to {large} allows any tilt factors to be specified.
 
 Box flips that may occur using the "fix deform"_fix_deform.html or
 "fix npt"_fix_nh.html commands can be turned off using the {flip no}
 option with either of the commands.
 
 Note that if a simulation box has a large tilt factor, LAMMPS will run
 less efficiently, due to the large volume of communication needed to
 acquire ghost atoms around a processor's irregular-shaped sub-domain.
 For extreme values of tilt, LAMMPS may also lose atoms and generate an
 error.
 
 Triclinic crystal structures are often defined using three lattice
 constants {a}, {b}, and {c}, and three angles {alpha}, {beta} and
 {gamma}. Note that in this nomenclature, the a, b, and c lattice
 constants are the scalar lengths of the edge vectors [a], [b], and [c]
 defined above.  The relationship between these 6 quantities
 (a,b,c,alpha,beta,gamma) and the LAMMPS box sizes (lx,ly,lz) =
 (xhi-xlo,yhi-ylo,zhi-zlo) and tilt factors (xy,xz,yz) is as follows:
 
 :c,image(Eqs/box.jpg)
 
 The inverse relationship can be written as follows:
 
 :c,image(Eqs/box_inverse.jpg)
 
 The values of {a}, {b}, {c} , {alpha}, {beta} , and {gamma} can be printed
 out or accessed by computes using the
 "thermo_style custom"_thermo_style.html keywords
 {cella}, {cellb}, {cellc}, {cellalpha}, {cellbeta}, {cellgamma},
 respectively.
 
 As discussed on the "dump"_dump.html command doc page, when the BOX
 BOUNDS for a snapshot is written to a dump file for a triclinic box,
 an orthogonal bounding box which encloses the triclinic simulation box
 is output, along with the 3 tilt factors (xy, xz, yz) of the triclinic
 box, formatted as follows:
 
 ITEM: BOX BOUNDS xy xz yz
 xlo_bound xhi_bound xy
 ylo_bound yhi_bound xz
 zlo_bound zhi_bound yz :pre
 
 This bounding box is convenient for many visualization programs and is
 calculated from the 9 triclinic box parameters
 (xlo,xhi,ylo,yhi,zlo,zhi,xy,xz,yz) as follows:
 
 xlo_bound = xlo + MIN(0.0,xy,xz,xy+xz)
 xhi_bound = xhi + MAX(0.0,xy,xz,xy+xz)
 ylo_bound = ylo + MIN(0.0,yz)
 yhi_bound = yhi + MAX(0.0,yz)
 zlo_bound = zlo
 zhi_bound = zhi :pre
 
 These formulas can be inverted if you need to convert the bounding box
 back into the triclinic box parameters, e.g. xlo = xlo_bound -
 MIN(0.0,xy,xz,xy+xz).
 
 One use of triclinic simulation boxes is to model solid-state crystals
 with triclinic symmetry.  The "lattice"_lattice.html command can be
 used with non-orthogonal basis vectors to define a lattice that will
 tile a triclinic simulation box via the
 "create_atoms"_create_atoms.html command.
 
 A second use is to run Parinello-Rahman dynamics via the "fix
 npt"_fix_nh.html command, which will adjust the xy, xz, yz tilt
 factors to compensate for off-diagonal components of the pressure
 tensor.  The analog for an "energy minimization"_minimize.html is
 the "fix box/relax"_fix_box_relax.html command.
 
 A third use is to shear a bulk solid to study the response of the
 material.  The "fix deform"_fix_deform.html command can be used for
 this purpose.  It allows dynamic control of the xy, xz, yz tilt
 factors as a simulation runs.  This is discussed in the next section
 on non-equilibrium MD (NEMD) simulations.
 
 :line
 
 6.13 NEMD simulations :link(howto_13),h4
 
 Non-equilibrium molecular dynamics or NEMD simulations are typically
 used to measure a fluid's rheological properties such as viscosity.
 In LAMMPS, such simulations can be performed by first setting up a
 non-orthogonal simulation box (see the preceding Howto section).
 
 A shear strain can be applied to the simulation box at a desired
 strain rate by using the "fix deform"_fix_deform.html command.  The
 "fix nvt/sllod"_fix_nvt_sllod.html command can be used to thermostat
 the sheared fluid and integrate the SLLOD equations of motion for the
 system.  Fix nvt/sllod uses "compute
 temp/deform"_compute_temp_deform.html to compute a thermal temperature
 by subtracting out the streaming velocity of the shearing atoms.  The
 velocity profile or other properties of the fluid can be monitored via
 the "fix ave/chunk"_fix_ave_chunk.html command.
 
 As discussed in the previous section on non-orthogonal simulation
 boxes, the amount of tilt or skew that can be applied is limited by
 LAMMPS for computational efficiency to be 1/2 of the parallel box
 length.  However, "fix deform"_fix_deform.html can continuously strain
 a box by an arbitrary amount.  As discussed in the "fix
 deform"_fix_deform.html command, when the tilt value reaches a limit,
 the box is flipped to the opposite limit which is an equivalent tiling
 of periodic space.  The strain rate can then continue to change as
 before.  In a long NEMD simulation these box re-shaping events may
 occur many times.
 
 In a NEMD simulation, the "remap" option of "fix
 deform"_fix_deform.html should be set to "remap v", since that is what
 "fix nvt/sllod"_fix_nvt_sllod.html assumes to generate a velocity
 profile consistent with the applied shear strain rate.
 
 An alternative method for calculating viscosities is provided via the
 "fix viscosity"_fix_viscosity.html command.
 
 NEMD simulations can also be used to measure transport properties of a fluid
 through a pore or channel. Simulations of steady-state flow can be performed
 using the "fix flow/gauss"_fix_flow_gauss.html command.
 
 :line
 
 6.14 Finite-size spherical and aspherical particles :link(howto_14),h4
 
 Typical MD models treat atoms or particles as point masses.  Sometimes
 it is desirable to have a model with finite-size particles such as
 spheroids or ellipsoids or generalized aspherical bodies.  The
 difference is that such particles have a moment of inertia, rotational
 energy, and angular momentum.  Rotation is induced by torque coming
 from interactions with other particles.
 
 LAMMPS has several options for running simulations with these kinds of
 particles.  The following aspects are discussed in turn:
 
 atom styles
 pair potentials
 time integration
 computes, thermodynamics, and dump output
 rigid bodies composed of finite-size particles :ul
 
 Example input scripts for these kinds of models are in the body,
 colloid, dipole, ellipse, line, peri, pour, and tri directories of the
 "examples directory"_Section_example.html in the LAMMPS distribution.
 
 Atom styles :h5
 
 There are several "atom styles"_atom_style.html that allow for
 definition of finite-size particles: sphere, dipole, ellipsoid, line,
 tri, peri, and body.
 
 The sphere style defines particles that are spheriods and each
 particle can have a unique diameter and mass (or density).  These
 particles store an angular velocity (omega) and can be acted upon by
 torque.  The "set" command can be used to modify the diameter and mass
 of individual particles, after then are created.
 
 The dipole style does not actually define finite-size particles, but
 is often used in conjunction with spherical particles, via a command
 like
 
 atom_style hybrid sphere dipole :pre
 
 This is because when dipoles interact with each other, they induce
 torques, and a particle must be finite-size (i.e. have a moment of
 inertia) in order to respond and rotate.  See the "atom_style
 dipole"_atom_style.html command for details.  The "set" command can be
 used to modify the orientation and length of the dipole moment of
 individual particles, after then are created.
 
 The ellipsoid style defines particles that are ellipsoids and thus can
 be aspherical.  Each particle has a shape, specified by 3 diameters,
 and mass (or density).  These particles store an angular momentum and
 their orientation (quaternion), and can be acted upon by torque.  They
 do not store an angular velocity (omega), which can be in a different
 direction than angular momentum, rather they compute it as needed.
 The "set" command can be used to modify the diameter, orientation, and
 mass of individual particles, after then are created.  It also has a
 brief explanation of what quaternions are.
 
 The line style defines line segment particles with two end points and
 a mass (or density).  They can be used in 2d simulations, and they can
 be joined together to form rigid bodies which represent arbitrary
 polygons.
 
 The tri style defines triangular particles with three corner points
 and a mass (or density).  They can be used in 3d simulations, and they
 can be joined together to form rigid bodies which represent arbitrary
 particles with a triangulated surface.
 
 The peri style is used with "Peridynamic models"_pair_peri.html and
 defines particles as having a volume, that is used internally in the
 "pair_style peri"_pair_peri.html potentials.
 
 The body style allows for definition of particles which can represent
 complex entities, such as surface meshes of discrete points,
 collections of sub-particles, deformable objects, etc.  The body style
 is discussed in more detail on the "body"_body.html doc page.
 
 Note that if one of these atom styles is used (or multiple styles via
 the "atom_style hybrid"_atom_style.html command), not all particles in
 the system are required to be finite-size or aspherical.
 
 For example, in the ellipsoid style, if the 3 shape parameters are set
 to the same value, the particle will be a sphere rather than an
 ellipsoid.  If the 3 shape parameters are all set to 0.0 or if the
 diameter is set to 0.0, it will be a point particle.  In the line or
 tri style, if the lineflag or triflag is specified as 0, then it
 will be a point particle.
 
 Some of the pair styles used to compute pairwise interactions between
 finite-size particles also compute the correct interaction with point
 particles as well, e.g. the interaction between a point particle and a
 finite-size particle or between two point particles.  If necessary,
 "pair_style hybrid"_pair_hybrid.html can be used to insure the correct
 interactions are computed for the appropriate style of interactions.
 Likewise, using groups to partition particles (ellipsoids versus
 spheres versus point particles) will allow you to use the appropriate
 time integrators and temperature computations for each class of
 particles.  See the doc pages for various commands for details.
 
 Also note that for "2d simulations"_dimension.html, atom styles sphere
 and ellipsoid still use 3d particles, rather than as circular disks or
 ellipses.  This means they have the same moment of inertia as the 3d
 object.  When temperature is computed, the correct degrees of freedom
 are used for rotation in a 2d versus 3d system.
 
 Pair potentials :h5
 
 When a system with finite-size particles is defined, the particles
 will only rotate and experience torque if the force field computes
 such interactions.  These are the various "pair
 styles"_pair_style.html that generate torque:
 
 "pair_style gran/history"_pair_gran.html
 "pair_style gran/hertzian"_pair_gran.html
 "pair_style gran/no_history"_pair_gran.html
 "pair_style dipole/cut"_pair_dipole.html
 "pair_style gayberne"_pair_gayberne.html
 "pair_style resquared"_pair_resquared.html
 "pair_style brownian"_pair_brownian.html
 "pair_style lubricate"_pair_lubricate.html
 "pair_style line/lj"_pair_line_lj.html
 "pair_style tri/lj"_pair_tri_lj.html
 "pair_style body"_pair_body.html :ul
 
 The granular pair styles are used with spherical particles.  The
 dipole pair style is used with the dipole atom style, which could be
 applied to spherical or ellipsoidal particles.  The GayBerne and
 REsquared potentials require ellipsoidal particles, though they will
 also work if the 3 shape parameters are the same (a sphere).  The
 Brownian and lubrication potentials are used with spherical particles.
 The line, tri, and body potentials are used with line segment,
 triangular, and body particles respectively.
 
 Time integration :h5
 
 There are several fixes that perform time integration on finite-size
 spherical particles, meaning the integrators update the rotational
 orientation and angular velocity or angular momentum of the particles:
 
 "fix nve/sphere"_fix_nve_sphere.html
 "fix nvt/sphere"_fix_nvt_sphere.html
 "fix npt/sphere"_fix_npt_sphere.html :ul
 
 Likewise, there are 3 fixes that perform time integration on
 ellipsoidal particles:
 
 "fix nve/asphere"_fix_nve_asphere.html
 "fix nvt/asphere"_fix_nvt_asphere.html
 "fix npt/asphere"_fix_npt_asphere.html :ul
 
 The advantage of these fixes is that those which thermostat the
 particles include the rotational degrees of freedom in the temperature
 calculation and thermostatting.  The "fix langevin"_fix_langevin
 command can also be used with its {omgea} or {angmom} options to
 thermostat the rotational degrees of freedom for spherical or
 ellipsoidal particles.  Other thermostatting fixes only operate on the
 translational kinetic energy of finite-size particles.
 
 These fixes perform constant NVE time integration on line segment,
 triangular, and body particles:
 
 "fix nve/line"_fix_nve_line.html
 "fix nve/tri"_fix_nve_tri.html
 "fix nve/body"_fix_nve_body.html :ul
 
 Note that for mixtures of point and finite-size particles, these
 integration fixes can only be used with "groups"_group.html which
 contain finite-size particles.
 
 Computes, thermodynamics, and dump output :h5
 
 There are several computes that calculate the temperature or
 rotational energy of spherical or ellipsoidal particles:
 
 "compute temp/sphere"_compute_temp_sphere.html
 "compute temp/asphere"_compute_temp_asphere.html
 "compute erotate/sphere"_compute_erotate_sphere.html
 "compute erotate/asphere"_compute_erotate_asphere.html :ul
 
 These include rotational degrees of freedom in their computation.  If
 you wish the thermodynamic output of temperature or pressure to use
 one of these computes (e.g. for a system entirely composed of
 finite-size particles), then the compute can be defined and the
 "thermo_modify"_thermo_modify.html command used.  Note that by default
 thermodynamic quantities will be calculated with a temperature that
 only includes translational degrees of freedom.  See the
 "thermo_style"_thermo_style.html command for details.
 
 These commands can be used to output various attributes of finite-size
 particles:
 
 "dump custom"_dump.html
 "compute property/atom"_compute_property_atom.html
 "dump local"_dump.html
 "compute body/local"_compute_body_local.html :ul
 
 Attributes include the dipole moment, the angular velocity, the
 angular momentum, the quaternion, the torque, the end-point and
 corner-point coordinates (for line and tri particles), and
 sub-particle attributes of body particles.
 
 Rigid bodies composed of finite-size particles :h5
 
 The "fix rigid"_fix_rigid.html command treats a collection of
 particles as a rigid body, computes its inertia tensor, sums the total
 force and torque on the rigid body each timestep due to forces on its
 constituent particles, and integrates the motion of the rigid body.
 
 If any of the constituent particles of a rigid body are finite-size
 particles (spheres or ellipsoids or line segments or triangles), then
 their contribution to the inertia tensor of the body is different than
 if they were point particles.  This means the rotational dynamics of
 the rigid body will be different.  Thus a model of a dimer is
 different if the dimer consists of two point masses versus two
 spheroids, even if the two particles have the same mass.  Finite-size
 particles that experience torque due to their interaction with other
 particles will also impart that torque to a rigid body they are part
 of.
 
 See the "fix rigid" command for example of complex rigid-body models
 it is possible to define in LAMMPS.
 
 Note that the "fix shake"_fix_shake.html command can also be used to
 treat 2, 3, or 4 particles as a rigid body, but it always assumes the
 particles are point masses.
 
 Also note that body particles cannot be modeled with the "fix
 rigid"_fix_rigid.html command.  Body particles are treated by LAMMPS
 as single particles, though they can store internal state, such as a
 list of sub-particles.  Individual body partices are typically treated
 as rigid bodies, and their motion integrated with a command like "fix
 nve/body"_fix_nve_body.html.  Interactions between pairs of body
 particles are computed via a command like "pair_style
 body"_pair_body.html.
 
 :line
 
 6.15 Output from LAMMPS (thermo, dumps, computes, fixes, variables) :link(howto_15),h4
 
 There are four basic kinds of LAMMPS output:
 
 "Thermodynamic output"_thermo_style.html, which is a list
 of quantities printed every few timesteps to the screen and logfile. :ulb,l
 
 "Dump files"_dump.html, which contain snapshots of atoms and various
 per-atom values and are written at a specified frequency. :l
 
 Certain fixes can output user-specified quantities to files: "fix
 ave/time"_fix_ave_time.html for time averaging, "fix
 ave/chunk"_fix_ave_chunk.html for spatial or other averaging, and "fix
 print"_fix_print.html for single-line output of
 "variables"_variable.html.  Fix print can also output to the
 screen. :l
 
 "Restart files"_restart.html. :l
 :ule
 
 A simulation prints one set of thermodynamic output and (optionally)
 restart files.  It can generate any number of dump files and fix
 output files, depending on what "dump"_dump.html and "fix"_fix.html
 commands you specify.
 
 As discussed below, LAMMPS gives you a variety of ways to determine
 what quantities are computed and printed when the thermodynamics,
 dump, or fix commands listed above perform output.  Throughout this
 discussion, note that users can also "add their own computes and fixes
 to LAMMPS"_Section_modify.html which can then generate values that can
 then be output with these commands.
 
 The following sub-sections discuss different LAMMPS command related
 to output and the kind of data they operate on and produce:
 
 "Global/per-atom/local data"_#global
 "Scalar/vector/array data"_#scalar
 "Thermodynamic output"_#thermo
 "Dump file output"_#dump
 "Fixes that write output files"_#fixoutput
 "Computes that process output quantities"_#computeoutput
 "Fixes that process output quantities"_#fixprocoutput
 "Computes that generate values to output"_#compute
 "Fixes that generate values to output"_#fix
 "Variables that generate values to output"_#variable
 "Summary table of output options and data flow between commands"_#table :ul
 
 Global/per-atom/local data :h5,link(global)
 
 Various output-related commands work with three different styles of
 data: global, per-atom, or local.  A global datum is one or more
 system-wide values, e.g. the temperature of the system.  A per-atom
 datum is one or more values per atom, e.g. the kinetic energy of each
 atom.  Local datums are calculated by each processor based on the
 atoms it owns, but there may be zero or more per atom, e.g. a list of
 bond distances.
 
 Scalar/vector/array data :h5,link(scalar)
 
 Global, per-atom, and local datums can each come in three kinds: a
 single scalar value, a vector of values, or a 2d array of values.  The
 doc page for a "compute" or "fix" or "variable" that generates data
 will specify both the style and kind of data it produces, e.g. a
 per-atom vector.
 
 When a quantity is accessed, as in many of the output commands
 discussed below, it can be referenced via the following bracket
 notation, where ID in this case is the ID of a compute.  The leading
 "c_" would be replaced by "f_" for a fix, or "v_" for a variable:
 
 c_ID | entire scalar, vector, or array
 c_ID\[I\] | one element of vector, one column of array
 c_ID\[I\]\[J\] | one element of array :tb(s=|)
 
 In other words, using one bracket reduces the dimension of the data
 once (vector -> scalar, array -> vector).  Using two brackets reduces
 the dimension twice (array -> scalar).  Thus a command that uses
 scalar values as input can typically also process elements of a vector
 or array.
 
 Thermodynamic output :h5,link(thermo)
 
 The frequency and format of thermodynamic output is set by the
 "thermo"_thermo.html, "thermo_style"_thermo_style.html, and
 "thermo_modify"_thermo_modify.html commands.  The
 "thermo_style"_thermo_style.html command also specifies what values
 are calculated and written out.  Pre-defined keywords can be specified
 (e.g. press, etotal, etc).  Three additional kinds of keywords can
 also be specified (c_ID, f_ID, v_name), where a "compute"_compute.html
 or "fix"_fix.html or "variable"_variable.html provides the value to be
 output.  In each case, the compute, fix, or variable must generate
 global values for input to the "thermo_style custom"_dump.html
 command.
 
 Note that thermodynamic output values can be "extensive" or
 "intensive".  The former scale with the number of atoms in the system
 (e.g. total energy), the latter do not (e.g. temperature).  The
 setting for "thermo_modify norm"_thermo_modify.html determines whether
 extensive quantities are normalized or not.  Computes and fixes
 produce either extensive or intensive values; see their individual doc
 pages for details.  "Equal-style variables"_variable.html produce only
 intensive values; you can include a division by "natoms" in the
 formula if desired, to make an extensive calculation produce an
 intensive result.
 
 Dump file output :h5,link(dump)
 
 Dump file output is specified by the "dump"_dump.html and
 "dump_modify"_dump_modify.html commands.  There are several
 pre-defined formats (dump atom, dump xtc, etc).
 
 There is also a "dump custom"_dump.html format where the user
 specifies what values are output with each atom.  Pre-defined atom
 attributes can be specified (id, x, fx, etc).  Three additional kinds
 of keywords can also be specified (c_ID, f_ID, v_name), where a
 "compute"_compute.html or "fix"_fix.html or "variable"_variable.html
 provides the values to be output.  In each case, the compute, fix, or
 variable must generate per-atom values for input to the "dump
 custom"_dump.html command.
 
 There is also a "dump local"_dump.html format where the user specifies
 what local values to output.  A pre-defined index keyword can be
 specified to enumerate the local values.  Two additional kinds of
 keywords can also be specified (c_ID, f_ID), where a
 "compute"_compute.html or "fix"_fix.html or "variable"_variable.html
 provides the values to be output.  In each case, the compute or fix
 must generate local values for input to the "dump local"_dump.html
 command.
 
 Fixes that write output files :h5,link(fixoutput)
 
 Several fixes take various quantities as input and can write output
 files: "fix ave/time"_fix_ave_time.html, "fix
 ave/chunk"_fix_ave_chunk.html, "fix ave/histo"_fix_ave_histo.html,
 "fix ave/correlate"_fix_ave_correlate.html, and "fix
 print"_fix_print.html.
 
 The "fix ave/time"_fix_ave_time.html command enables direct output to
 a file and/or time-averaging of global scalars or vectors.  The user
 specifies one or more quantities as input.  These can be global
 "compute"_compute.html values, global "fix"_fix.html values, or
 "variables"_variable.html of any style except the atom style which
 produces per-atom values.  Since a variable can refer to keywords used
 by the "thermo_style custom"_thermo_style.html command (like temp or
 press) and individual per-atom values, a wide variety of quantities
 can be time averaged and/or output in this way.  If the inputs are one
 or more scalar values, then the fix generate a global scalar or vector
 of output.  If the inputs are one or more vector values, then the fix
 generates a global vector or array of output.  The time-averaged
 output of this fix can also be used as input to other output commands.
 
 The "fix ave/chunk"_fix_ave_chunk.html command enables direct output
 to a file of chunk-averaged per-atom quantities like those output in
 dump files.  Chunks can represent spatial bins or other collections of
 atoms, e.g. individual molecules.  The per-atom quantities can be atom
 density (mass or number) or atom attributes such as position,
 velocity, force.  They can also be per-atom quantities calculated by a
 "compute"_compute.html, by a "fix"_fix.html, or by an atom-style
 "variable"_variable.html.  The chunk-averaged output of this fix can
 also be used as input to other output commands.
 
 The "fix ave/histo"_fix_ave_histo.html command enables direct output
 to a file of histogrammed quantities, which can be global or per-atom
 or local quantities.  The histogram output of this fix can also be
 used as input to other output commands.
 
 The "fix ave/correlate"_fix_ave_correlate.html command enables direct
 output to a file of time-correlated quantities, which can be global
 values.  The correlation matrix output of this fix can also be used as
 input to other output commands.
 
 The "fix print"_fix_print.html command can generate a line of output
 written to the screen and log file or to a separate file, periodically
 during a running simulation.  The line can contain one or more
 "variable"_variable.html values for any style variable except the
 vector or atom styles).  As explained above, variables themselves can
 contain references to global values generated by "thermodynamic
 keywords"_thermo_style.html, "computes"_compute.html,
 "fixes"_fix.html, or other "variables"_variable.html, or to per-atom
 values for a specific atom.  Thus the "fix print"_fix_print.html
 command is a means to output a wide variety of quantities separate
 from normal thermodynamic or dump file output.
 
 Computes that process output quantities :h5,link(computeoutput)
 
 The "compute reduce"_compute_reduce.html and "compute
 reduce/region"_compute_reduce.html commands take one or more per-atom
 or local vector quantities as inputs and "reduce" them (sum, min, max,
 ave) to scalar quantities.  These are produced as output values which
 can be used as input to other output commands.
 
 The "compute slice"_compute_slice.html command take one or more global
 vector or array quantities as inputs and extracts a subset of their
 values to create a new vector or array.  These are produced as output
 values which can be used as input to other output commands.
 
 The "compute property/atom"_compute_property_atom.html command takes a
 list of one or more pre-defined atom attributes (id, x, fx, etc) and
 stores the values in a per-atom vector or array.  These are produced
 as output values which can be used as input to other output commands.
 The list of atom attributes is the same as for the "dump
 custom"_dump.html command.
 
 The "compute property/local"_compute_property_local.html command takes
 a list of one or more pre-defined local attributes (bond info, angle
 info, etc) and stores the values in a local vector or array.  These
 are produced as output values which can be used as input to other
 output commands.
 
 Fixes that process output quantities :h5,link(fixprocoutput)
 
 The "fix vector"_fix_vector.html command can create global vectors as
 output from global scalars as input, accumulating them one element at
 a time.
 
 The "fix ave/atom"_fix_ave_atom.html command performs time-averaging
 of per-atom vectors.  The per-atom quantities can be atom attributes
 such as position, velocity, force.  They can also be per-atom
 quantities calculated by a "compute"_compute.html, by a
 "fix"_fix.html, or by an atom-style "variable"_variable.html.  The
 time-averaged per-atom output of this fix can be used as input to
 other output commands.
 
 The "fix store/state"_fix_store_state.html command can archive one or
 more per-atom attributes at a particular time, so that the old values
 can be used in a future calculation or output.  The list of atom
 attributes is the same as for the "dump custom"_dump.html command,
 including per-atom quantities calculated by a "compute"_compute.html,
 by a "fix"_fix.html, or by an atom-style "variable"_variable.html.
 The output of this fix can be used as input to other output commands.
 
 Computes that generate values to output :h5,link(compute)
 
 Every "compute"_compute.html in LAMMPS produces either global or
 per-atom or local values.  The values can be scalars or vectors or
 arrays of data.  These values can be output using the other commands
 described in this section.  The doc page for each compute command
 describes what it produces.  Computes that produce per-atom or local
 values have the word "atom" or "local" in their style name.  Computes
 without the word "atom" or "local" produce global values.
 
 Fixes that generate values to output :h5,link(fix)
 
 Some "fixes"_fix.html in LAMMPS produces either global or per-atom or
 local values which can be accessed by other commands.  The values can
 be scalars or vectors or arrays of data.  These values can be output
 using the other commands described in this section.  The doc page for
 each fix command tells whether it produces any output quantities and
 describes them.
 
 Variables that generate values to output :h5,link(variable)
 
 "Variables"_variable.html defined in an input script can store one or
 more strings.  But equal-style, vector-style, and atom-style or
 atomfile-style variables generate a global scalar value, global vector
 or values, or a per-atom vector, respectively, when accessed.  The
 formulas used to define these variables can contain references to the
 thermodynamic keywords and to global and per-atom data generated by
 computes, fixes, and other variables.  The values generated by
 variables can be used as input to and thus output by the other
 commands described in this section.
 
 Summary table of output options and data flow between commands :h5,link(table)
 
 This table summarizes the various commands that can be used for
 generating output from LAMMPS.  Each command produces output data of
 some kind and/or writes data to a file.  Most of the commands can take
 data from other commands as input.  Thus you can link many of these
 commands together in pipeline form, where data produced by one command
 is used as input to another command and eventually written to the
 screen or to a file.  Note that to hook two commands together the
 output and input data types must match, e.g. global/per-atom/local
 data and scalar/vector/array data.
 
 Also note that, as described above, when a command takes a scalar as
 input, that could be an element of a vector or array.  Likewise a
 vector input could be a column of an array.
 
 Command: Input: Output:
 "thermo_style custom"_thermo_style.html: global scalars: screen, log file:
 "dump custom"_dump.html: per-atom vectors: dump file:
 "dump local"_dump.html: local vectors: dump file:
 "fix print"_fix_print.html: global scalar from variable: screen, file:
 "print"_print.html: global scalar from variable: screen:
 "computes"_compute.html: N/A: global/per-atom/local scalar/vector/array:
 "fixes"_fix.html: N/A: global/per-atom/local scalar/vector/array:
 "variables"_variable.html: global scalars and vectors, per-atom vectors: global scalar and vector, per-atom vector:
 "compute reduce"_compute_reduce.html: per-atom/local vectors: global scalar/vector:
 "compute slice"_compute_slice.html: global vectors/arrays: global vector/array:
 "compute property/atom"_compute_property_atom.html: per-atom vectors: per-atom vector/array:
 "compute property/local"_compute_property_local.html: local vectors: local vector/array:
 "fix vector"_fix_vector.html: global scalars: global vector:
 "fix ave/atom"_fix_ave_atom.html: per-atom vectors: per-atom vector/array:
 "fix ave/time"_fix_ave_time.html: global scalars/vectors: global scalar/vector/array, file:
 "fix ave/chunk"_fix_ave_chunk.html: per-atom vectors: global array, file:
 "fix ave/histo"_fix_ave_histo.html: global/per-atom/local scalars and vectors: global array, file:
 "fix ave/correlate"_fix_ave_correlate.html: global scalars: global array, file:
 "fix store/state"_fix_store_state.html: per-atom vectors: per-atom vector/array :tb(c=3,s=:)
 
 :line
 
 6.16 Thermostatting, barostatting, and computing temperature :link(howto_16),h4
 
 Thermostatting means controlling the temperature of particles in an MD
 simulation.  Barostatting means controlling the pressure.  Since the
 pressure includes a kinetic component due to particle velocities, both
 these operations require calculation of the temperature.  Typically a
 target temperature (T) and/or pressure (P) is specified by the user,
 and the thermostat or barostat attempts to equilibrate the system to
 the requested T and/or P.
 
 Temperature is computed as kinetic energy divided by some number of
 degrees of freedom (and the Boltzmann constant).  Since kinetic energy
 is a function of particle velocity, there is often a need to
 distinguish between a particle's advection velocity (due to some
 aggregate motion of particles) and its thermal velocity.  The sum of
 the two is the particle's total velocity, but the latter is often what
 is wanted to compute a temperature.
 
 LAMMPS has several options for computing temperatures, any of which
 can be used in thermostatting and barostatting.  These "compute
 commands"_compute.html calculate temperature, and the "compute
 pressure"_compute_pressure.html command calculates pressure.
 
 "compute temp"_compute_temp.html
 "compute temp/sphere"_compute_temp_sphere.html
 "compute temp/asphere"_compute_temp_asphere.html
 "compute temp/com"_compute_temp_com.html
 "compute temp/deform"_compute_temp_deform.html
 "compute temp/partial"_compute_temp_partial.html
 "compute temp/profile"_compute_temp_profile.html
 "compute temp/ramp"_compute_temp_ramp.html
 "compute temp/region"_compute_temp_region.html :ul
 
 All but the first 3 calculate velocity biases directly (e.g. advection
 velocities) that are removed when computing the thermal temperature.
 "Compute temp/sphere"_compute_temp_sphere.html and "compute
 temp/asphere"_compute_temp_asphere.html compute kinetic energy for
 finite-size particles that includes rotational degrees of freedom.
 They both allow for velocity biases indirectly, via an optional extra
 argument, another temperature compute that subtracts a velocity bias.
 This allows the translational velocity of spherical or aspherical
 particles to be adjusted in prescribed ways.
 
 Thermostatting in LAMMPS is performed by "fixes"_fix.html, or in one
 case by a pair style.  Several thermostatting fixes are available:
 Nose-Hoover (nvt), Berendsen, CSVR, Langevin, and direct rescaling
 (temp/rescale).  Dissipative particle dynamics (DPD) thermostatting
 can be invoked via the {dpd/tstat} pair style:
 
 "fix nvt"_fix_nh.html
 "fix nvt/sphere"_fix_nvt_sphere.html
 "fix nvt/asphere"_fix_nvt_asphere.html
 "fix nvt/sllod"_fix_nvt_sllod.html
 "fix temp/berendsen"_fix_temp_berendsen.html
 "fix temp/csvr"_fix_temp_csvr.html
 "fix langevin"_fix_langevin.html
 "fix temp/rescale"_fix_temp_rescale.html
 "pair_style dpd/tstat"_pair_dpd.html :ul
 
 "Fix nvt"_fix_nh.html only thermostats the translational velocity of
 particles.  "Fix nvt/sllod"_fix_nvt_sllod.html also does this, except
 that it subtracts out a velocity bias due to a deforming box and
 integrates the SLLOD equations of motion.  See the "NEMD
 simulations"_#howto_13 section of this page for further details.  "Fix
 nvt/sphere"_fix_nvt_sphere.html and "fix
 nvt/asphere"_fix_nvt_asphere.html thermostat not only translation
 velocities but also rotational velocities for spherical and aspherical
 particles.
 
 DPD thermostatting alters pairwise interactions in a manner analogous
 to the per-particle thermostatting of "fix
 langevin"_fix_langevin.html.
 
 Any of the thermostatting fixes can use temperature computes that
 remove bias which has two effects.  First, the current calculated
 temperature, which is compared to the requested target temperature, is
 calculated with the velocity bias removed.  Second, the thermostat
 adjusts only the thermal temperature component of the particle's
 velocities, which are the velocities with the bias removed.  The
 removed bias is then added back to the adjusted velocities.  See the
 doc pages for the individual fixes and for the
 "fix_modify"_fix_modify.html command for instructions on how to assign
 a temperature compute to a thermostatting fix.  For example, you can
 apply a thermostat to only the x and z components of velocity by using
 it in conjunction with "compute
 temp/partial"_compute_temp_partial.html.  Of you could thermostat only
 the thermal temperature of a streaming flow of particles without
 affecting the streaming velocity, by using "compute
 temp/profile"_compute_temp_profile.html.
 
 NOTE: Only the nvt fixes perform time integration, meaning they update
 the velocities and positions of particles due to forces and velocities
 respectively.  The other thermostat fixes only adjust velocities; they
 do NOT perform time integration updates.  Thus they should be used in
 conjunction with a constant NVE integration fix such as these:
 
 "fix nve"_fix_nve.html
 "fix nve/sphere"_fix_nve_sphere.html
 "fix nve/asphere"_fix_nve_asphere.html :ul
 
 Barostatting in LAMMPS is also performed by "fixes"_fix.html.  Two
 barosttating methods are currently available: Nose-Hoover (npt and
 nph) and Berendsen:
 
 "fix npt"_fix_nh.html
 "fix npt/sphere"_fix_npt_sphere.html
 "fix npt/asphere"_fix_npt_asphere.html
 "fix nph"_fix_nh.html
 "fix press/berendsen"_fix_press_berendsen.html :ul
 
 The "fix npt"_fix_nh.html commands include a Nose-Hoover thermostat
 and barostat.  "Fix nph"_fix_nh.html is just a Nose/Hoover barostat;
 it does no thermostatting.  Both "fix nph"_fix_nh.html and "fix
 press/bernendsen"_fix_press_berendsen.html can be used in conjunction
 with any of the thermostatting fixes.
 
 As with the thermostats, "fix npt"_fix_nh.html and "fix
 nph"_fix_nh.html only use translational motion of the particles in
 computing T and P and performing thermo/barostatting.  "Fix
 npt/sphere"_fix_npt_sphere.html and "fix
 npt/asphere"_fix_npt_asphere.html thermo/barostat using not only
 translation velocities but also rotational velocities for spherical
 and aspherical particles.
 
 All of the barostatting fixes use the "compute
 pressure"_compute_pressure.html compute to calculate a current
 pressure.  By default, this compute is created with a simple "compute
 temp"_compute_temp.html (see the last argument of the "compute
 pressure"_compute_pressure.html command), which is used to calculated
 the kinetic component of the pressure.  The barostatting fixes can
 also use temperature computes that remove bias for the purpose of
 computing the kinetic component which contributes to the current
 pressure.  See the doc pages for the individual fixes and for the
 "fix_modify"_fix_modify.html command for instructions on how to assign
 a temperature or pressure compute to a barostatting fix.
 
 NOTE: As with the thermostats, the Nose/Hoover methods ("fix
 npt"_fix_nh.html and "fix nph"_fix_nh.html) perform time integration.
 "Fix press/berendsen"_fix_press_berendsen.html does NOT, so it should
 be used with one of the constant NVE fixes or with one of the NVT
 fixes.
 
 Finally, thermodynamic output, which can be setup via the
 "thermo_style"_thermo_style.html command, often includes temperature
 and pressure values.  As explained on the doc page for the
 "thermo_style"_thermo_style.html command, the default T and P are
 setup by the thermo command itself.  They are NOT the ones associated
 with any thermostatting or barostatting fix you have defined or with
 any compute that calculates a temperature or pressure.  Thus if you
 want to view these values of T and P, you need to specify them
 explicitly via a "thermo_style custom"_thermo_style.html command.  Or
 you can use the "thermo_modify"_thermo_modify.html command to
 re-define what temperature or pressure compute is used for default
 thermodynamic output.
 
 :line
 
 6.17 Walls :link(howto_17),h4
 
 Walls in an MD simulation are typically used to bound particle motion,
 i.e. to serve as a boundary condition.
 
 Walls in LAMMPS can be of rough (made of particles) or idealized
 surfaces.  Ideal walls can be smooth, generating forces only in the
 normal direction, or frictional, generating forces also in the
 tangential direction.
 
 Rough walls, built of particles, can be created in various ways.  The
 particles themselves can be generated like any other particle, via the
 "lattice"_lattice.html and "create_atoms"_create_atoms.html commands,
 or read in via the "read_data"_read_data.html command.
 
 Their motion can be constrained by many different commands, so that
 they do not move at all, move together as a group at constant velocity
 or in response to a net force acting on them, move in a prescribed
 fashion (e.g. rotate around a point), etc.  Note that if a time
 integration fix like "fix nve"_fix_nve.html or "fix nvt"_fix_nh.html
 is not used with the group that contains wall particles, their
 positions and velocities will not be updated.
 
 "fix aveforce"_fix_aveforce.html - set force on particles to average value, so they move together
 "fix setforce"_fix_setforce.html - set force on particles to a value, e.g. 0.0
 "fix freeze"_fix_freeze.html - freeze particles for use as granular walls
 "fix nve/noforce"_fix_nve_noforce.html - advect particles by their velocity, but without force
 "fix move"_fix_move.html - prescribe motion of particles by a linear velocity, oscillation, rotation, variable :ul
 
 The "fix move"_fix_move.html command offers the most generality, since
 the motion of individual particles can be specified with
 "variable"_variable.html formula which depends on time and/or the
 particle position.
 
 For rough walls, it may be useful to turn off pairwise interactions
 between wall particles via the "neigh_modify
 exclude"_neigh_modify.html command.
 
 Rough walls can also be created by specifying frozen particles that do
 not move and do not interact with mobile particles, and then tethering
 other particles to the fixed particles, via a "bond"_bond_style.html.
 The bonded particles do interact with other mobile particles.
 
 Idealized walls can be specified via several fix commands.  "Fix
 wall/gran"_fix_wall_gran.html creates frictional walls for use with
 granular particles; all the other commands create smooth walls.
 
 "fix wall/reflect"_fix_wall_reflect.html - reflective flat walls
 "fix wall/lj93"_fix_wall.html - flat walls, with Lennard-Jones 9/3 potential
 "fix wall/lj126"_fix_wall.html - flat walls, with Lennard-Jones 12/6 potential
 "fix wall/colloid"_fix_wall.html - flat walls, with "pair_style colloid"_pair_colloid.html potential
 "fix wall/harmonic"_fix_wall.html - flat walls, with repulsive harmonic spring potential
 "fix wall/region"_fix_wall_region.html - use region surface as wall
 "fix wall/gran"_fix_wall_gran.html - flat or curved walls with "pair_style granular"_pair_gran.html potential :ul
 
 The {lj93}, {lj126}, {colloid}, and {harmonic} styles all allow the
 flat walls to move with a constant velocity, or oscillate in time.
 The "fix wall/region"_fix_wall_region.html command offers the most
 generality, since the region surface is treated as a wall, and the
 geometry of the region can be a simple primitive volume (e.g. a
 sphere, or cube, or plane), or a complex volume made from the union
 and intersection of primitive volumes.  "Regions"_region.html can also
 specify a volume "interior" or "exterior" to the specified primitive
 shape or {union} or {intersection}.  "Regions"_region.html can also be
 "dynamic" meaning they move with constant velocity, oscillate, or
 rotate.
 
 The only frictional idealized walls currently in LAMMPS are flat or
 curved surfaces specified by the "fix wall/gran"_fix_wall_gran.html
 command.  At some point we plan to allow regoin surfaces to be used as
 frictional walls, as well as triangulated surfaces.
 
 :line
 
 6.18 Elastic constants :link(howto_18),h4
 
 Elastic constants characterize the stiffness of a material. The formal
 definition is provided by the linear relation that holds between the
 stress and strain tensors in the limit of infinitesimal deformation.
 In tensor notation, this is expressed as s_ij = C_ijkl * e_kl, where
 the repeated indices imply summation. s_ij are the elements of the
 symmetric stress tensor. e_kl are the elements of the symmetric strain
 tensor. C_ijkl are the elements of the fourth rank tensor of elastic
 constants. In three dimensions, this tensor has 3^4=81 elements. Using
 Voigt notation, the tensor can be written as a 6x6 matrix, where C_ij
 is now the derivative of s_i w.r.t. e_j. Because s_i is itself a
 derivative w.r.t. e_i, it follows that C_ij is also symmetric, with at
 most 7*6/2 = 21 distinct elements.
 
 At zero temperature, it is easy to estimate these derivatives by
 deforming the simulation box in one of the six directions using the
 "change_box"_change_box.html command and measuring the change in the
 stress tensor. A general-purpose script that does this is given in the
 examples/elastic directory described in "this
 section"_Section_example.html.
 
 Calculating elastic constants at finite temperature is more
 challenging, because it is necessary to run a simulation that perfoms
 time averages of differential properties. One way to do this is to
 measure the change in average stress tensor in an NVT simulations when
 the cell volume undergoes a finite deformation. In order to balance
 the systematic and statistical errors in this method, the magnitude of
 the deformation must be chosen judiciously, and care must be taken to
 fully equilibrate the deformed cell before sampling the stress
 tensor. Another approach is to sample the triclinic cell fluctuations
 that occur in an NPT simulation. This method can also be slow to
 converge and requires careful post-processing "(Shinoda)"_#Shinoda1
 
 :line
 
 6.19 Library interface to LAMMPS :link(howto_19),h4
 
 As described in "Section 2.5"_Section_start.html#start_5, LAMMPS
 can be built as a library, so that it can be called by another code,
 used in a "coupled manner"_Section_howto.html#howto_10 with other
 codes, or driven through a "Python interface"_Section_python.html.
 
 All of these methodologies use a C-style interface to LAMMPS that is
 provided in the files src/library.cpp and src/library.h.  The
 functions therein have a C-style argument list, but contain C++ code
 you could write yourself in a C++ application that was invoking LAMMPS
 directly.  The C++ code in the functions illustrates how to invoke
 internal LAMMPS operations.  Note that LAMMPS classes are defined
 within a LAMMPS namespace (LAMMPS_NS) if you use them from another C++
 application.
 
 Library.cpp contains these functions for creating and destroying an
 instance of LAMMPS and sending it commands to execute.  See the
 documentation in the src/library.cpp file for details:
 
 void lammps_open(int, char **, MPI_Comm, void **)
 void lammps_open_no_mpi(int, char **, void **)
 void lammps_close(void *)
 int lammps_version(void *)
 void lammps_file(void *, char *)
 char *lammps_command(void *, char *)
 void lammps_commands_list(void *, int, char **)
 void lammps_commands_string(void *, char *)
 void lammps_free(void *) :pre
 
 The lammps_open() function is used to initialize LAMMPS, passing in a
 list of strings as if they were "command-line
 arguments"_Section_start.html#start_7 when LAMMPS is run in
 stand-alone mode from the command line, and a MPI communicator for
 LAMMPS to run under.  It returns a ptr to the LAMMPS object that is
 created, and which is used in subsequent library calls.  The
 lammps_open() function can be called multiple times, to create
 multiple instances of LAMMPS.
 
 LAMMPS will run on the set of processors in the communicator.  This
 means the calling code can run LAMMPS on all or a subset of
 processors.  For example, a wrapper script might decide to alternate
 between LAMMPS and another code, allowing them both to run on all the
 processors.  Or it might allocate half the processors to LAMMPS and
 half to the other code and run both codes simultaneously before
 syncing them up periodically.  Or it might instantiate multiple
 instances of LAMMPS to perform different calculations.
 
 The lammps_open_no_mpi() function is similar except that no MPI
 communicator is passed from the caller.  Instead, MPI_COMM_WORLD is
 used to instantiate LAMMPS, and MPI is initialized if necessary.
 
 The lammps_close() function is used to shut down an instance of LAMMPS
 and free all its memory.
 
 The lammps_version() function can be used to determined the specific
 version of the underlying LAMMPS code. This is particularly useful
 when loading LAMMPS as a shared library via dlopen(). The code using
 the library interface can than use this information to adapt to
 changes to the LAMMPS command syntax between versions. The returned
 LAMMPS version code is an integer (e.g. 2 Sep 2015 results in
 20150902) that grows with every new LAMMPS version.
 
 The lammps_file(), lammps_command(), lammps_commands_list(), and
 lammps_commands_string() functions are used to pass one or more
 commands to LAMMPS to execute, the same as if they were coming from an
 input script.
 
 Via these functions, the calling code can read or generate a series of
 LAMMPS commands one or multiple at a time and pass it thru the library
 interface to setup a problem and then run it in stages.  The caller
 can interleave the command function calls with operations it performs,
 calls to extract information from or set information within LAMMPS, or
 calls to another code's library.
 
 The lammps_file() function passes the filename of an input script.
 The lammps_command() function passes a single command as a string.
 The lammps_commands_list() function passes multiple commands in a
 char** list.  In both lammps_command() and lammps_commands_list(),
 individual commands may or may not have a trailing newline.  The
 lammps_commands_string() function passes multiple commands
 concatenated into one long string, separated by newline characters.
 In both lammps_commands_list() and lammps_commands_string(), a single
 command can be spread across multiple lines, if the last printable
 character of all but the last line is "&", the same as if the lines
 appeared in an input script.
 
 The lammps_free() function is a clean-up function to free memory that
 the library allocated previously via other function calls.  See
 comments in src/library.cpp file for which other functions need this
 clean-up.
 
 Library.cpp also contains these functions for extracting information
 from LAMMPS and setting value within LAMMPS.  Again, see the
 documentation in the src/library.cpp file for details, including
 which quantities can be queried by name:
 
 void *lammps_extract_global(void *, char *)
 void lammps_extract_box(void *, double *, double *, 
                         double *, double *, double *, int *, int *)
 void *lammps_extract_atom(void *, char *)
 void *lammps_extract_compute(void *, char *, int, int)
 void *lammps_extract_fix(void *, char *, int, int, int, int)
 void *lammps_extract_variable(void *, char *, char *) :pre
 
 void lammps_reset_box(void *, double *, double *, double, double, double)
 int lammps_set_variable(void *, char *, char *) :pre
 
 double lammps_get_thermo(void *, char *)
 int lammps_get_natoms(void *)
 void lammps_gather_atoms(void *, double *)
 void lammps_scatter_atoms(void *, double *) :pre
 void lammps_create_atoms(void *, int, tagint *, int *, double *, double *,
                          imageint *, int) :pre
 
 The extract functions return a pointer to various global or per-atom
 quantities stored in LAMMPS or to values calculated by a compute, fix,
 or variable.  The pointer returned by the extract_global() function
 can be used as a permanent reference to a value which may change.  For
-the other extract functions, the underlying storage may be reallocated
-as LAMMPS runs, so you need to re-call the function to assure a
-current pointer or returned value(s).
+the extract_atom() method, see the extract() method in the
+src/atom.cpp file for a list of valid per-atom properties.  New names
+could easily be added if the property you want is not listed.  For the
+other extract functions, the underlying storage may be reallocated as
+LAMMPS runs, so you need to re-call the function to assure a current
+pointer or returned value(s).
 
 The lammps_reset_box() function resets the size and shape of the
 simulation box, e.g. as part of restoring a previously extracted and
 saved state of a simulation.
 
 The lammps_set_variable() function can set an existing string-style
 variable to a new string value, so that subsequent LAMMPS commands can
 access the variable.
 
 The lammps_get_thermo() function returns the current value of a thermo
 keyword as a double precision value.
 
 The lammps_get_natoms() function returns the total number of atoms in
 the system and can be used by the caller to allocate space for the
 lammps_gather_atoms() and lammps_scatter_atoms() functions.  The
-gather function collects atom info of the requested type (atom coords,
-types, forces, etc) from all processors, orders them by atom ID, and
-returns a full list to each calling processor.  The scatter function
-does the inverse.  It distributes the same kinds of values, 
+gather function collects peratom info of the requested type (atom
+coords, types, forces, etc) from all processors, orders them by atom
+ID, and returns a full list to each calling processor.  The scatter
+function does the inverse.  It distributes the same peratom values,
 passed by the caller, to each atom owned by individual processors.
+Both methods are thus a means to extract or assign (overwrite) any
+peratom quantities within LAMMPS.  See the extract() method in the
+src/atom.cpp file for a list of valid per-atom properties.  New names
+could easily be added if the property you want is not listed.
 
 The lammps_create_atoms() function takes a list of N atoms as input
 with atom types and coords (required), an optionally atom IDs and
 velocities and image flags.  It uses the coords of each atom to assign
 it as a new atom to the processor that owns it.  This function is
 useful to add atoms to a simulation or (in tandem with
 lammps_reset_box()) to restore a previously extracted and saved state
 of a simulation.  Additional properties for the new atoms can then be
 assigned via the lammps_scatter_atoms() or lammps_extract_atom()
 functions.
 
 The examples/COUPLE and python directories have example C++ and C and
 Python codes which show how a driver code can link to LAMMPS as a
 library, run LAMMPS on a subset of processors, grab data from LAMMPS,
 change it, and put it back into LAMMPS.
 
 NOTE: You can write code for additional functions as needed to define
 how your code talks to LAMMPS and add them to src/library.cpp and
 src/library.h, as well as to the "Python
 interface"_Section_python.html.  The added functions can access or
 change any LAMMPS data you wish.
 
 :line
 
 6.20 Calculating thermal conductivity :link(howto_20),h4
 
 The thermal conductivity kappa of a material can be measured in at
 least 4 ways using various options in LAMMPS.  See the examples/KAPPA
 directory for scripts that implement the 4 methods discussed here for
 a simple Lennard-Jones fluid model.  Also, see "this
 section"_Section_howto.html#howto_21 of the manual for an analogous
 discussion for viscosity.
 
 The thermal conductivity tensor kappa is a measure of the propensity
 of a material to transmit heat energy in a diffusive manner as given
 by Fourier's law
 
 J = -kappa grad(T)
 
 where J is the heat flux in units of energy per area per time and
 grad(T) is the spatial gradient of temperature.  The thermal
 conductivity thus has units of energy per distance per time per degree
 K and is often approximated as an isotropic quantity, i.e. as a
 scalar.
 
 The first method is to setup two thermostatted regions at opposite
 ends of a simulation box, or one in the middle and one at the end of a
 periodic box.  By holding the two regions at different temperatures
 with a "thermostatting fix"_Section_howto.html#howto_13, the energy
 added to the hot region should equal the energy subtracted from the
 cold region and be proportional to the heat flux moving between the
 regions.  See the papers by "Ikeshoji and Hafskjold"_#howto-Ikeshoji
 and "Wirnsberger et al"_#howto-Wirnsberger for details of this idea.
 Note that thermostatting fixes such as "fix nvt"_fix_nh.html, "fix
 langevin"_fix_langevin.html, and "fix
 temp/rescale"_fix_temp_rescale.html store the cumulative energy they
 add/subtract.
 
 Alternatively, as a second method, the "fix heat"_fix_heat.html or
 "fix ehex"_fix_ehex.html commands can be used in place of thermostats
 on each of two regions to add/subtract specified amounts of energy to
 both regions.  In both cases, the resulting temperatures of the two
 regions can be monitored with the "compute temp/region" command and
 the temperature profile of the intermediate region can be monitored
 with the "fix ave/chunk"_fix_ave_chunk.html and "compute
 ke/atom"_compute_ke_atom.html commands.
 
 The third method is to perform a reverse non-equilibrium MD simulation
 using the "fix thermal/conductivity"_fix_thermal_conductivity.html
 command which implements the rNEMD algorithm of Muller-Plathe.
 Kinetic energy is swapped between atoms in two different layers of the
 simulation box.  This induces a temperature gradient between the two
 layers which can be monitored with the "fix
 ave/chunk"_fix_ave_chunk.html and "compute
 ke/atom"_compute_ke_atom.html commands.  The fix tallies the
 cumulative energy transfer that it performs.  See the "fix
 thermal/conductivity"_fix_thermal_conductivity.html command for
 details.
 
 The fourth method is based on the Green-Kubo (GK) formula which
 relates the ensemble average of the auto-correlation of the heat flux
 to kappa.  The heat flux can be calculated from the fluctuations of
 per-atom potential and kinetic energies and per-atom stress tensor in
 a steady-state equilibrated simulation.  This is in contrast to the
 two preceding non-equilibrium methods, where energy flows continuously
 between hot and cold regions of the simulation box.
 
 The "compute heat/flux"_compute_heat_flux.html command can calculate
 the needed heat flux and describes how to implement the Green_Kubo
 formalism using additional LAMMPS commands, such as the "fix
 ave/correlate"_fix_ave_correlate.html command to calculate the needed
 auto-correlation.  See the doc page for the "compute
 heat/flux"_compute_heat_flux.html command for an example input script
 that calculates the thermal conductivity of solid Ar via the GK
 formalism.
 
 :line
 
 6.21 Calculating viscosity :link(howto_21),h4
 
 The shear viscosity eta of a fluid can be measured in at least 5 ways
 using various options in LAMMPS.  See the examples/VISCOSITY directory
 for scripts that implement the 5 methods discussed here for a simple
 Lennard-Jones fluid model.  Also, see "this
 section"_Section_howto.html#howto_20 of the manual for an analogous
 discussion for thermal conductivity.
 
 Eta is a measure of the propensity of a fluid to transmit momentum in
 a direction perpendicular to the direction of velocity or momentum
 flow.  Alternatively it is the resistance the fluid has to being
 sheared.  It is given by
 
 J = -eta grad(Vstream)
 
 where J is the momentum flux in units of momentum per area per time.
 and grad(Vstream) is the spatial gradient of the velocity of the fluid
 moving in another direction, normal to the area through which the
 momentum flows.  Viscosity thus has units of pressure-time.
 
 The first method is to perform a non-equilibrium MD (NEMD) simulation
 by shearing the simulation box via the "fix deform"_fix_deform.html
 command, and using the "fix nvt/sllod"_fix_nvt_sllod.html command to
 thermostat the fluid via the SLLOD equations of motion.
 Alternatively, as a second method, one or more moving walls can be
 used to shear the fluid in between them, again with some kind of
 thermostat that modifies only the thermal (non-shearing) components of
 velocity to prevent the fluid from heating up.
 
 In both cases, the velocity profile setup in the fluid by this
 procedure can be monitored by the "fix
 ave/chunk"_fix_ave_chunk.html command, which determines
 grad(Vstream) in the equation above.  E.g. the derivative in the
 y-direction of the Vx component of fluid motion or grad(Vstream) =
 dVx/dy.  The Pxy off-diagonal component of the pressure or stress
 tensor, as calculated by the "compute pressure"_compute_pressure.html
 command, can also be monitored, which is the J term in the equation
 above.  See "this section"_Section_howto.html#howto_13 of the manual
 for details on NEMD simulations.
 
 The third method is to perform a reverse non-equilibrium MD simulation
 using the "fix viscosity"_fix_viscosity.html command which implements
 the rNEMD algorithm of Muller-Plathe.  Momentum in one dimension is
 swapped between atoms in two different layers of the simulation box in
 a different dimension.  This induces a velocity gradient which can be
 monitored with the "fix ave/chunk"_fix_ave_chunk.html command.
 The fix tallies the cumulative momentum transfer that it performs.
 See the "fix viscosity"_fix_viscosity.html command for details.
 
 The fourth method is based on the Green-Kubo (GK) formula which
 relates the ensemble average of the auto-correlation of the
 stress/pressure tensor to eta.  This can be done in a fully
 equilibrated simulation which is in contrast to the two preceding
 non-equilibrium methods, where momentum flows continuously through the
 simulation box.
 
 Here is an example input script that calculates the viscosity of
 liquid Ar via the GK formalism:
 
 # Sample LAMMPS input script for viscosity of liquid Ar :pre
 
 units       real
 variable    T equal 86.4956
 variable    V equal vol
 variable    dt equal 4.0
 variable    p equal 400     # correlation length
 variable    s equal 5       # sample interval
 variable    d equal $p*$s   # dump interval :pre
 
 # convert from LAMMPS real units to SI :pre
 
 variable    kB equal 1.3806504e-23    # \[J/K/] Boltzmann
 variable    atm2Pa equal 101325.0
 variable    A2m equal 1.0e-10
 variable    fs2s equal 1.0e-15
 variable    convert equal $\{atm2Pa\}*$\{atm2Pa\}*$\{fs2s\}*$\{A2m\}*$\{A2m\}*$\{A2m\} :pre
 
 # setup problem :pre
 
 dimension    3
 boundary     p p p
 lattice      fcc 5.376 orient x 1 0 0 orient y 0 1 0 orient z 0 0 1
 region       box block 0 4 0 4 0 4
 create_box   1 box
 create_atoms 1 box
 mass         1 39.948
 pair_style   lj/cut 13.0
 pair_coeff   * * 0.2381 3.405
 timestep     $\{dt\}
 thermo       $d :pre
 
 # equilibration and thermalization :pre
 
 velocity     all create $T 102486 mom yes rot yes dist gaussian
 fix          NVT all nvt temp $T $T 10 drag 0.2
 run          8000 :pre
 
 # viscosity calculation, switch to NVE if desired :pre
 
 #unfix       NVT
 #fix         NVE all nve :pre
 
 reset_timestep 0
 variable     pxy equal pxy
 variable     pxz equal pxz
 variable     pyz equal pyz
 fix          SS all ave/correlate $s $p $d &
              v_pxy v_pxz v_pyz type auto file S0St.dat ave running
 variable     scale equal $\{convert\}/($\{kB\}*$T)*$V*$s*$\{dt\}
 variable     v11 equal trap(f_SS\[3\])*$\{scale\}
 variable     v22 equal trap(f_SS\[4\])*$\{scale\}
 variable     v33 equal trap(f_SS\[5\])*$\{scale\}
 thermo_style custom step temp press v_pxy v_pxz v_pyz v_v11 v_v22 v_v33
 run          100000
 variable     v equal (v_v11+v_v22+v_v33)/3.0
 variable     ndens equal count(all)/vol
 print        "average viscosity: $v \[Pa.s\] @ $T K, $\{ndens\} /A^3" :pre
 
 The fifth method is related to the above Green-Kubo method,
 but uses the Einstein formulation, analogous to the Einstein
 mean-square-displacement formulation for self-diffusivity. The
 time-integrated momentum fluxes play the role of Cartesian
 coordinates, whose mean-square displacement increases linearly
 with time at sufficiently long times.
 
 :line
 
 6.22 Calculating a diffusion coefficient :link(howto_22),h4
 
 The diffusion coefficient D of a material can be measured in at least
 2 ways using various options in LAMMPS.  See the examples/DIFFUSE
 directory for scripts that implement the 2 methods discussed here for
 a simple Lennard-Jones fluid model.
 
 The first method is to measure the mean-squared displacement (MSD) of
 the system, via the "compute msd"_compute_msd.html command.  The slope
 of the MSD versus time is proportional to the diffusion coefficient.
 The instantaneous MSD values can be accumulated in a vector via the
 "fix vector"_fix_vector.html command, and a line fit to the vector to
 compute its slope via the "variable slope"_variable.html function, and
 thus extract D.
 
 The second method is to measure the velocity auto-correlation function
 (VACF) of the system, via the "compute vacf"_compute_vacf.html
 command.  The time-integral of the VACF is proportional to the
 diffusion coefficient.  The instantaneous VACF values can be
 accumulated in a vector via the "fix vector"_fix_vector.html command,
 and time integrated via the "variable trap"_variable.html function,
 and thus extract D.
 
 :line
 
 6.23 Using chunks to calculate system properties :link(howto_23),h4
 
 In LAMMS, "chunks" are collections of atoms, as defined by the
 "compute chunk/atom"_compute_chunk_atom.html command, which assigns
 each atom to a chunk ID (or to no chunk at all).  The number of chunks
 and the assignment of chunk IDs to atoms can be static or change over
 time.  Examples of "chunks" are molecules or spatial bins or atoms
 with similar values (e.g. coordination number or potential energy).
 
 The per-atom chunk IDs can be used as input to two other kinds of
 commands, to calculate various properties of a system:
 
 "fix ave/chunk"_fix_ave_chunk.html
 any of the "compute */chunk"_compute.html commands :ul
 
 Here, each of the 3 kinds of chunk-related commands is briefly
 overviewed.  Then some examples are given of how to compute different
 properties with chunk commands.
 
 Compute chunk/atom command: :h5
 
 This compute can assign atoms to chunks of various styles.  Only atoms
 in the specified group and optional specified region are assigned to a
 chunk.  Here are some possible chunk definitions:
 
 atoms in same molecule | chunk ID = molecule ID |
 atoms of same atom type | chunk ID = atom type |
 all atoms with same atom property (charge, radius, etc) | chunk ID = output of compute property/atom |
 atoms in same cluster | chunk ID = output of "compute cluster/atom"_compute_cluster_atom.html command |
 atoms in same spatial bin | chunk ID = bin ID |
 atoms in same rigid body | chunk ID = molecule ID used to define rigid bodies |
 atoms with similar potential energy | chunk ID = output of "compute pe/atom"_compute_pe_atom.html |
 atoms with same local defect structure | chunk ID = output of "compute centro/atom"_compute_centro_atom.html or "compute coord/atom"_compute_coord_atom.html command :tb(s=|,c=2)
 
 Note that chunk IDs are integer values, so for atom properties or
 computes that produce a floating point value, they will be truncated
 to an integer.  You could also use the compute in a variable that
 scales the floating point value to spread it across multiple integers.
 
 Spatial bins can be of various kinds, e.g. 1d bins = slabs, 2d bins =
 pencils, 3d bins = boxes, spherical bins, cylindrical bins.
 
 This compute also calculates the number of chunks {Nchunk}, which is
 used by other commands to tally per-chunk data.  {Nchunk} can be a
 static value or change over time (e.g. the number of clusters).  The
 chunk ID for an individual atom can also be static (e.g. a molecule
 ID), or dynamic (e.g. what spatial bin an atom is in as it moves).
 
 Note that this compute allows the per-atom output of other
 "computes"_compute.html, "fixes"_fix.html, and
 "variables"_variable.html to be used to define chunk IDs for each
 atom.  This means you can write your own compute or fix to output a
 per-atom quantity to use as chunk ID.  See
 "Section 10"_Section_modify.html of the documentation for how to
 do this.  You can also define a "per-atom variable"_variable.html in
 the input script that uses a formula to generate a chunk ID for each
 atom.
 
 Fix ave/chunk command: :h5
 
 This fix takes the ID of a "compute
 chunk/atom"_compute_chunk_atom.html command as input.  For each chunk,
 it then sums one or more specified per-atom values over the atoms in
 each chunk.  The per-atom values can be any atom property, such as
 velocity, force, charge, potential energy, kinetic energy, stress,
 etc.  Additional keywords are defined for per-chunk properties like
 density and temperature.  More generally any per-atom value generated
 by other "computes"_compute.html, "fixes"_fix.html, and "per-atom
 variables"_variable.html, can be summed over atoms in each chunk.
 
 Similar to other averaging fixes, this fix allows the summed per-chunk
 values to be time-averaged in various ways, and output to a file.  The
 fix produces a global array as output with one row of values per
 chunk.
 
 Compute */chunk commands: :h5
 
 Currently the following computes operate on chunks of atoms to produce
 per-chunk values.
 
 "compute com/chunk"_compute_com_chunk.html
 "compute gyration/chunk"_compute_gyration_chunk.html
 "compute inertia/chunk"_compute_inertia_chunk.html
 "compute msd/chunk"_compute_msd_chunk.html
 "compute property/chunk"_compute_property_chunk.html
 "compute temp/chunk"_compute_temp_chunk.html
 "compute torque/chunk"_compute_vcm_chunk.html
 "compute vcm/chunk"_compute_vcm_chunk.html :ul
 
 They each take the ID of a "compute
 chunk/atom"_compute_chunk_atom.html command as input.  As their names
 indicate, they calculate the center-of-mass, radius of gyration,
 moments of inertia, mean-squared displacement, temperature, torque,
 and velocity of center-of-mass for each chunk of atoms.  The "compute
 property/chunk"_compute_property_chunk.html command can tally the
 count of atoms in each chunk and extract other per-chunk properties.
 
 The reason these various calculations are not part of the "fix
 ave/chunk command"_fix_ave_chunk.html, is that each requires a more
 complicated operation than simply summing and averaging over per-atom
 values in each chunk.  For example, many of them require calculation
 of a center of mass, which requires summing mass*position over the
 atoms and then dividing by summed mass.
 
 All of these computes produce a global vector or global array as
 output, wih one or more values per chunk.  They can be used
 in various ways:
 
 As input to the "fix ave/time"_fix_ave_time.html command, which can
 write the values to a file and optionally time average them. :ulb,l
 
 As input to the "fix ave/histo"_fix_ave_histo.html command to
 histogram values across chunks.  E.g. a histogram of cluster sizes or
 molecule diffusion rates. :l
 
 As input to special functions of "equal-style
 variables"_variable.html, like sum() and max().  E.g. to find the
 largest cluster or fastest diffusing molecule. :l
 :ule
 
 Example calculations with chunks :h5
 
 Here are examples using chunk commands to calculate various
 properties:
 
 (1) Average velocity in each of 1000 2d spatial bins:
 
 compute cc1 all chunk/atom bin/2d x 0.0 0.1 y lower 0.01 units reduced
 fix 1 all ave/chunk 100 10 1000 cc1 vx vy file tmp.out :pre
 
 (2) Temperature in each spatial bin, after subtracting a flow
 velocity:
 
 compute cc1 all chunk/atom bin/2d x 0.0 0.1 y lower 0.1 units reduced
 compute vbias all temp/profile 1 0 0 y 10
 fix 1 all ave/chunk 100 10 1000 cc1 temp bias vbias file tmp.out :pre
 
 (3) Center of mass of each molecule:
 
 compute cc1 all chunk/atom molecule
 compute myChunk all com/chunk cc1
 fix 1 all ave/time 100 1 100 c_myChunk\[*\] file tmp.out mode vector :pre
 
 (4) Total force on each molecule and ave/max across all molecules:
 
 compute cc1 all chunk/atom molecule
 fix 1 all ave/chunk 1000 1 1000 cc1 fx fy fz file tmp.out
 variable xave equal ave(f_1\[2\])
 variable xmax equal max(f_1\[2\])
 thermo 1000
 thermo_style custom step temp v_xave v_xmax :pre
 
 (5) Histogram of cluster sizes:
 
 compute cluster all cluster/atom 1.0
 compute cc1 all chunk/atom c_cluster compress yes
 compute size all property/chunk cc1 count
 fix 1 all ave/histo 100 1 100 0 20 20 c_size mode vector ave running beyond ignore file tmp.histo :pre
 
 :line
 
 6.24 Setting parameters for the "kspace_style pppm/disp"_kspace_style.html command :link(howto_24),h4
 
 The PPPM method computes interactions by splitting the pair potential
 into two parts, one of which is computed in a normal pairwise fashion,
 the so-called real-space part, and one of which is computed using the
 Fourier transform, the so called reciprocal-space or kspace part.  For
 both parts, the potential is not computed exactly but is approximated.
 Thus, there is an error in both parts of the computation, the
 real-space and the kspace error. The just mentioned facts are true
 both for the PPPM for Coulomb as well as dispersion interactions. The
 deciding difference - and also the reason why the parameters for
 pppm/disp have to be selected with more care - is the impact of the
 errors on the results: The kspace error of the PPPM for Coulomb and
 dispersion interaction and the real-space error of the PPPM for
 Coulomb interaction have the character of noise. In contrast, the
 real-space error of the PPPM for dispersion has a clear physical
 interpretation: the underprediction of cohesion. As a consequence, the
 real-space error has a much stronger effect than the kspace error on
 simulation results for pppm/disp.  Parameters must thus be chosen in a
 way that this error is much smaller than the kspace error.
 
 When using pppm/disp and not making any specifications on the PPPM
 parameters via the kspace modify command, parameters will be tuned
 such that the real-space error and the kspace error are equal.  This
 will result in simulations that are either inaccurate or slow, both of
 which is not desirable. For selecting parameters for the pppm/disp
 that provide fast and accurate simulations, there are two approaches,
 which both have their up- and downsides.
 
 The first approach is to set desired real-space an kspace accuracies
 via the {kspace_modify force/disp/real} and {kspace_modify
 force/disp/kspace} commands. Note that the accuracies have to be
 specified in force units and are thus dependent on the chosen unit
 settings. For real units, 0.0001 and 0.002 seem to provide reasonable
 accurate and efficient computations for the real-space and kspace
 accuracies.  0.002 and 0.05 work well for most systems using lj
 units. PPPM parameters will be generated based on the desired
 accuracies. The upside of this approach is that it usually provides a
 good set of parameters and will work for both the {kspace_modify diff
 ad} and {kspace_modify diff ik} options.  The downside of the method
 is that setting the PPPM parameters will take some time during the
 initialization of the simulation.
 
 The second approach is to set the parameters for the pppm/disp
 explicitly using the {kspace_modify mesh/disp}, {kspace_modify
 order/disp}, and {kspace_modify gewald/disp} commands. This approach
 requires a more experienced user who understands well the impact of
 the choice of parameters on the simulation accuracy and
 performance. This approach provides a fast initialization of the
 simulation. However, it is sensitive to errors: A combination of
 parameters that will perform well for one system might result in
 far-from-optimal conditions for other simulations. For example,
 parameters that provide accurate and fast computations for
 all-atomistic force fields can provide insufficient accuracy or
 united-atomistic force fields (which is related to that the latter
 typically have larger dispersion coefficients).
 
 To avoid inaccurate or inefficient simulations, the pppm/disp stops
 simulations with an error message if no action is taken to control the
 PPPM parameters. If the automatic parameter generation is desired and
 real-space and kspace accuracies are desired to be equal, this error
 message can be suppressed using the {kspace_modify disp/auto yes}
 command.
 
 A reasonable approach that combines the upsides of both methods is to
 make the first run using the {kspace_modify force/disp/real} and
 {kspace_modify force/disp/kspace} commands, write down the PPPM
 parameters from the outut, and specify these parameters using the
 second approach in subsequent runs (which have the same composition,
 force field, and approximately the same volume).
 
 Concerning the performance of the pppm/disp there are two more things
 to consider. The first is that when using the pppm/disp, the cutoff
 parameter does no longer affect the accuracy of the simulation
 (subject to that gewald/disp is adjusted when changing the cutoff).
 The performance can thus be increased by examining different values
 for the cutoff parameter. A lower bound for the cutoff is only set by
 the truncation error of the repulsive term of pair potentials.
 
 The second is that the mixing rule of the pair style has an impact on
 the computation time when using the pppm/disp. Fastest computations
 are achieved when using the geometric mixing rule. Using the
 arithmetic mixing rule substantially increases the computational cost.
 The computational overhead can be reduced using the {kspace_modify
 mix/disp geom} and {kspace_modify splittol} commands. The first
 command simply enforces geometric mixing of the dispersion
 coefficients in kspace computations.  This introduces some error in
 the computations but will also significantly speed-up the
 simulations. The second keyword sets the accuracy with which the
 dispersion coefficients are approximated using a matrix factorization
 approach.  This may result in better accuracy then using the first
 command, but will usually also not provide an equally good increase of
 efficiency.
 
 Finally, pppm/disp can also be used when no mixing rules apply.
 This can be achieved using the {kspace_modify mix/disp none} command.
 Note that the code does not check automatically whether any mixing
 rule is fulfilled. If mixing rules do not apply, the user will have
 to specify this command explicitly.
 
 :line
 
 6.25 Polarizable models :link(howto_25),h4
 
 In polarizable force fields the charge distributions in molecules and
 materials respond to their electrostatic environments. Polarizable
 systems can be simulated in LAMMPS using three methods:
 
 the fluctuating charge method, implemented in the "QEQ"_fix_qeq.html
 package, :ulb,l
 the adiabatic core-shell method, implemented in the
 "CORESHELL"_#howto_26 package, :l
 the thermalized Drude dipole method, implemented in the
 "USER-DRUDE"_#howto_27 package. :l
 :ule
 
 The fluctuating charge method calculates instantaneous charges on
 interacting atoms based on the electronegativity equalization
 principle. It is implemented in the "fix qeq"_fix_qeq.html which is
 available in several variants. It is a relatively efficient technique
 since no additional particles are introduced. This method allows for
 charge transfer between molecules or atom groups. However, because the
 charges are located at the interaction sites, off-plane components of
 polarization cannot be represented in planar molecules or atom groups.
 
 The two other methods share the same basic idea: polarizable atoms are
 split into one core atom and one satellite particle (called shell or
 Drude particle) attached to it by a harmonic spring.  Both atoms bear
 a charge and they represent collectively an induced electric dipole.
 These techniques are computationally more expensive than the QEq
 method because of additional particles and bonds. These two
 charge-on-spring methods differ in certain features, with the
 core-shell model being normally used for ionic/crystalline materials,
 whereas the so-called Drude model is normally used for molecular
 systems and fluid states.
 
 The core-shell model is applicable to crystalline materials where the
 high symmetry around each site leads to stable trajectories of the
 core-shell pairs. However, bonded atoms in molecules can be so close
 that a core would interact too strongly or even capture the Drude
 particle of a neighbor. The Drude dipole model is relatively more
 complex in order to remediate this and other issues. Specifically, the
 Drude model includes specific thermostating of the core-Drude pairs
 and short-range damping of the induced dipoles.
 
 The three polarization methods can be implemented through a
 self-consistent calculation of charges or induced dipoles at each
 timestep. In the fluctuating charge scheme this is done by the matrix
 inversion method in "fix qeq/point"_fix_qeq.html, but for core-shell
 or Drude-dipoles the relaxed-dipoles technique would require an slow
 iterative procedure. These self-consistent solutions yield accurate
 trajectories since the additional degrees of freedom representing
 polarization are massless.  An alternative is to attribute a mass to
 the additional degrees of freedom and perform time integration using
 an extended Lagrangian technique. For the fluctuating charge scheme
 this is done by "fix qeq/dynamic"_fix_qeq.html, and for the
 charge-on-spring models by the methods outlined in the next two
 sections. The assignment of masses to the additional degrees of
 freedom can lead to unphysical trajectories if care is not exerted in
 choosing the parameters of the polarizable models and the simulation
 conditions.
 
 In the core-shell model the vibration of the shells is kept faster
 than the ionic vibrations to mimic the fast response of the
 polarizable electrons.  But in molecular systems thermalizing the
 core-Drude pairs at temperatures comparable to the rest of the
 simulation leads to several problems (kinetic energy transfer, too
 short a timestep, etc.) In order to avoid these problems the relative
 motion of the Drude particles with respect to their cores is kept
 "cold" so the vibration of the core-Drude pairs is very slow,
 approaching the self-consistent regime.  In both models the
 temperature is regulated using the velocities of the center of mass of
 core+shell (or Drude) pairs, but in the Drude model the actual
 relative core-Drude particle motion is thermostated separately as
 well.
 
 :line
 
 6.26 Adiabatic core/shell model :link(howto_26),h4
 
 The adiabatic core-shell model by "Mitchell and
 Fincham"_#MitchellFincham is a simple method for adding
 polarizability to a system.  In order to mimic the electron shell of
 an ion, a satellite particle is attached to it. This way the ions are
 split into a core and a shell where the latter is meant to react to
 the electrostatic environment inducing polarizability.
 
 Technically, shells are attached to the cores by a spring force f =
 k*r where k is a parametrized spring constant and r is the distance
 between the core and the shell. The charges of the core and the shell
 add up to the ion charge, thus q(ion) = q(core) + q(shell). This
 setup introduces the ion polarizability (alpha) given by
 alpha = q(shell)^2 / k. In a
 similar fashion the mass of the ion is distributed on the core and the
 shell with the core having the larger mass.
 
 To run this model in LAMMPS, "atom_style"_atom_style.html {full} can
 be used since atom charge and bonds are needed.  Each kind of
 core/shell pair requires two atom types and a bond type.  The core and
 shell of a core/shell pair should be bonded to each other with a
 harmonic bond that provides the spring force. For example, a data file
 for NaCl, as found in examples/coreshell, has this format:
 
 432   atoms  # core and shell atoms
 216   bonds  # number of core/shell springs :pre
 
 4     atom types  # 2 cores and 2 shells for Na and Cl
 2     bond types :pre
 
 0.0 24.09597 xlo xhi
 0.0 24.09597 ylo yhi
 0.0 24.09597 zlo zhi :pre
 
 Masses       # core/shell mass ratio = 0.1 :pre
 
 1 20.690784  # Na core
 2 31.90500   # Cl core
 3 2.298976   # Na shell
 4 3.54500    # Cl shell :pre
 
 Atoms :pre
 
 1    1    2   1.5005    0.00000000   0.00000000   0.00000000 # core of core/shell pair 1
 2    1    4  -2.5005    0.00000000   0.00000000   0.00000000 # shell of core/shell pair 1
 3    2    1   1.5056    4.01599500   4.01599500   4.01599500 # core of core/shell pair 2
 4    2    3  -0.5056    4.01599500   4.01599500   4.01599500 # shell of core/shell pair 2
 (...) :pre
 
 Bonds   # Bond topology for spring forces :pre
 
 1     2     1     2   # spring for core/shell pair 1
 2     2     3     4   # spring for core/shell pair 2
 (...) :pre
 
 Non-Coulombic (e.g. Lennard-Jones) pairwise interactions are only
 defined between the shells.  Coulombic interactions are defined
 between all cores and shells.  If desired, additional bonds can be
 specified between cores.
 
 The "special_bonds"_special_bonds.html command should be used to
 turn-off the Coulombic interaction within core/shell pairs, since that
 interaction is set by the bond spring.  This is done using the
 "special_bonds"_special_bonds.html command with a 1-2 weight = 0.0,
 which is the default value.  It needs to be considered whether one has
 to adjust the "special_bonds"_special_bonds.html weighting according
 to the molecular topology since the interactions of the shells are
 bypassed over an extra bond.
 
 Note that this core/shell implementation does not require all ions to
 be polarized.  One can mix core/shell pairs and ions without a
 satellite particle if desired.
 
 Since the core/shell model permits distances of r = 0.0 between the
 core and shell, a pair style with a "cs" suffix needs to be used to
 implement a valid long-range Coulombic correction.  Several such pair
 styles are provided in the CORESHELL package.  See "this doc
 page"_pair_cs.html for details.  All of the core/shell enabled pair
 styles require the use of a long-range Coulombic solver, as specified
 by the "kspace_style"_kspace_style.html command.  Either the PPPM or
 Ewald solvers can be used.
 
 For the NaCL example problem, these pair style and bond style settings
 are used:
 
 pair_style      born/coul/long/cs 20.0 20.0
 pair_coeff      * *      0.0 1.000   0.00  0.00   0.00
 pair_coeff      3 3    487.0 0.23768 0.00  1.05   0.50 #Na-Na
 pair_coeff      3 4 145134.0 0.23768 0.00  6.99   8.70 #Na-Cl
 pair_coeff      4 4 405774.0 0.23768 0.00 72.40 145.40 #Cl-Cl :pre
 
 bond_style      harmonic
 bond_coeff      1 63.014 0.0
 bond_coeff      2 25.724 0.0 :pre
 
 When running dynamics with the adiabatic core/shell model, the
 following issues should be considered.  The relative motion of
 the core and shell particles corresponds to the polarization, 
 hereby an instantaneous relaxation of the shells is approximated 
 and a fast core/shell spring frequency ensures a nearly constant
 internal kinetic energy during the simulation. 
 Thermostats can alter this polarization behaviour, by scaling the
 internal kinetic energy, meaning the shell will not react freely to 
 its electrostatic environment. 
 Therefore it is typically desirable to decouple the relative motion of 
 the core/shell pair, which is an imaginary degree of freedom, from the
 real physical system.  To do that, the "compute
 temp/cs"_compute_temp_cs.html command can be used, in conjunction with
 any of the thermostat fixes, such as "fix nvt"_fix_nh.html or "fix
 langevin"_fix_langevin.  This compute uses the center-of-mass velocity
 of the core/shell pairs to calculate a temperature, and insures that
 velocity is what is rescaled for thermostatting purposes.  This
 compute also works for a system with both core/shell pairs and
 non-polarized ions (ions without an attached satellite particle).  The
 "compute temp/cs"_compute_temp_cs.html command requires input of two
 groups, one for the core atoms, another for the shell atoms.
 Non-polarized ions which might also be included in the treated system
 should not be included into either of these groups, they are taken
 into account by the {group-ID} (2nd argument) of the compute.  The
 groups can be defined using the "group {type}"_group.html command.
 Note that to perform thermostatting using this definition of
 temperature, the "fix modify temp"_fix_modify.html command should be
 used to assign the compute to the thermostat fix.  Likewise the
 "thermo_modify temp"_thermo_modify.html command can be used to make
 this temperature be output for the overall system.
 
 For the NaCl example, this can be done as follows:
 
 group cores type 1 2
 group shells type 3 4
 compute CSequ all temp/cs cores shells
 fix thermoberendsen all temp/berendsen 1427 1427 0.4    # thermostat for the true physical system
 fix thermostatequ all nve                               # integrator as needed for the berendsen thermostat
 fix_modify thermoberendsen temp CSequ
 thermo_modify temp CSequ                                # output of center-of-mass derived temperature :pre
 
 The pressure for the core/shell system is computed via the regular 
 LAMMPS convention by "treating the cores and shells as individual 
 particles"_#MitchellFincham2. For the thermo output of the pressure 
 as well as for the application of a barostat, it is necessary to 
 use an additional "pressure"_compute_pressure compute based on the 
 default "temperature"_compute_temp and specifying it as a second 
 argument in "fix modify"_fix_modify.html and 
 "thermo_modify"_thermo_modify.html resulting in:
 
 (...)
 compute CSequ all temp/cs cores shells
 compute thermo_press_lmp all pressure thermo_temp       # pressure for individual particles
 thermo_modify temp CSequ press thermo_press_lmp         # modify thermo to regular pressure
 fix press_bar all npt temp 300 300 0.04 iso 0 0 0.4
 fix_modify press_bar temp CSequ press thermo_press_lmp  # pressure modification for correct kinetic scalar :pre
 
 If "compute temp/cs"_compute_temp_cs.html is used, the decoupled
 relative motion of the core and the shell should in theory be
 stable.  However numerical fluctuation can introduce a small
 momentum to the system, which is noticable over long trajectories.
 Therefore it is recommendable to use the "fix
 momentum"_fix_momentum.html command in combination with "compute
 temp/cs"_compute_temp_cs.html when equilibrating the system to
 prevent any drift.
 
 When initializing the velocities of a system with core/shell pairs, it
 is also desirable to not introduce energy into the relative motion of
 the core/shell particles, but only assign a center-of-mass velocity to
 the pairs.  This can be done by using the {bias} keyword of the
 "velocity create"_velocity.html command and assigning the "compute
 temp/cs"_compute_temp_cs.html command to the {temp} keyword of the
 "velocity"_velocity.html command, e.g.
 
 velocity all create 1427 134 bias yes temp CSequ
 velocity all scale 1427 temp CSequ :pre
 
 To maintain the correct polarizability of the core/shell pairs, the 
 kinetic energy of the internal motion shall remain nearly constant. 
 Therefore the choice of spring force and mass ratio need to ensure 
 much faster relative motion of the 2 atoms within the core/shell pair 
 than their center-of-mass velocity. This allows the shells to 
 effectively react instantaneously to the electrostatic environment and 
 limits energy transfer to or from the core/shell oscillators.
 This fast movement also dictates the timestep that can be used.
 
 The primary literature of the adiabatic core/shell model suggests that
 the fast relative motion of the core/shell pairs only allows negligible
 energy transfer to the environment. 
 The mentioned energy transfer will typically lead to a small drift
 in total energy over time.  This internal energy can be monitored
 using the "compute chunk/atom"_compute_chunk_atom.html and "compute
 temp/chunk"_compute_temp_chunk.html commands.  The internal kinetic
 energies of each core/shell pair can then be summed using the sum()
 special function of the "variable"_variable.html command.  Or they can
 be time/averaged and output using the "fix ave/time"_fix_ave_time.html
 command.  To use these commands, each core/shell pair must be defined
 as a "chunk".  If each core/shell pair is defined as its own molecule,
 the molecule ID can be used to define the chunks.  If cores are bonded
 to each other to form larger molecules, the chunks can be identified
 by the "fix property/atom"_fix_property_atom.html via assigning a
 core/shell ID to each atom using a special field in the data file read
 by the "read_data"_read_data.html command.  This field can then be
 accessed by the "compute property/atom"_compute_property_atom.html
 command, to use as input to the "compute
 chunk/atom"_compute_chunk_atom.html command to define the core/shell
 pairs as chunks.
 
 For example if core/shell pairs are the only molecules:
 
 read_data NaCl_CS_x0.1_prop.data 
 compute prop all property/atom molecule
 compute cs_chunk all chunk/atom c_prop
 compute cstherm all temp/chunk cs_chunk temp internal com yes cdof 3.0     # note the chosen degrees of freedom for the core/shell pairs
 fix ave_chunk all ave/time 10 1 10 c_cstherm file chunk.dump mode vector :pre
 
 For example if core/shell pairs and other molecules are present:
 
 fix csinfo all property/atom i_CSID                       # property/atom command
 read_data NaCl_CS_x0.1_prop.data fix csinfo NULL CS-Info  # atom property added in the data-file
 compute prop all property/atom i_CSID
 (...) :pre
 
 The additional section in the date file would be formatted like this:
 
 CS-Info         # header of additional section :pre
 
 1   1           # column 1 = atom ID, column 2 = core/shell ID
 2   1
 3   2
 4   2
 5   3
 6   3
 7   4
 8   4
 (...) :pre
 
 :line
 
 6.27 Drude induced dipoles :link(howto_27),h4
 
 The thermalized Drude model, similarly to the "core-shell"_#howto_26
 model, represents induced dipoles by a pair of charges (the core atom
 and the Drude particle) connected by a harmonic spring. The Drude
 model has a number of features aimed at its use in molecular systems
 ("Lamoureux and Roux"_#howto-Lamoureux):
 
 Thermostating of the additional degrees of freedom associated with the
 induced dipoles at very low temperature, in terms of the reduced
 coordinates of the Drude particles with respect to their cores. This
 makes the trajectory close to that of relaxed induced dipoles. :ulb,l
 
 Consistent definition of 1-2 to 1-4 neighbors. A core-Drude particle
 pair represents a single (polarizable) atom, so the special screening
 factors in a covalent structure should be the same for the core and
 the Drude particle.  Drude particles have to inherit the 1-2, 1-3, 1-4
 special neighbor relations from their respective cores. :l
 
 Stabilization of the interactions between induced dipoles. Drude
 dipoles on covalently bonded atoms interact too strongly due to the
 short distances, so an atom may capture the Drude particle of a
 neighbor, or the induced dipoles within the same molecule may align
 too much. To avoid this, damping at short range can be done by Thole
 functions (for which there are physical grounds). This Thole damping
 is applied to the point charges composing the induced dipole (the
 charge of the Drude particle and the opposite charge on the core, not
 to the total charge of the core atom). :l
 :ule
 
 A detailed tutorial covering the usage of Drude induced dipoles in
 LAMMPS is "available here"_tutorial_drude.html.
 
 As with the core-shell model, the cores and Drude particles should
 appear in the data file as standard atoms. The same holds for the
 springs between them, which are described by standard harmonic bonds.
 The nature of the atoms (core, Drude particle or non-polarizable) is
 specified via the "fix drude"_fix_drude.html command.  The special
 list of neighbors is automatically refactored to account for the
 equivalence of core and Drude particles as regards special 1-2 to 1-4
 screening. It may be necessary to use the {extra} keyword of the
 "special_bonds"_special_bonds.html command. If using "fix
 shake"_fix_shake.html, make sure no Drude particle is in this fix
 group.
 
 There are two ways to thermostat the Drude particles at a low
 temperature: use either "fix langevin/drude"_fix_langevin_drude.html
 for a Langevin thermostat, or "fix
 drude/transform/*"_fix_drude_transform.html for a Nose-Hoover
 thermostat. The former requires use of the command "comm_modify vel
 yes"_comm_modify.html. The latter requires two separate integration
 fixes like {nvt} or {npt}. The correct temperatures of the reduced
 degrees of freedom can be calculated using the "compute
 temp/drude"_compute_temp_drude.html. This requires also to use the
 command {comm_modify vel yes}.
 
 Short-range damping of the induced dipole interactions can be achieved
 using Thole functions through the "pair style
 thole"_pair_thole.html in "pair_style hybrid/overlay"_pair_hybrid.html
 with a Coulomb pair style. It may be useful to use {coul/long/cs} or
 similar from the CORESHELL package if the core and Drude particle come
 too close, which can cause numerical issues.
 
 :line
 :line
 
 :link(howto-Berendsen)
 [(Berendsen)] Berendsen, Grigera, Straatsma, J Phys Chem, 91,
 6269-6271 (1987).
 
 :link(howto-Cornell)
 [(Cornell)] Cornell, Cieplak, Bayly, Gould, Merz, Ferguson,
 Spellmeyer, Fox, Caldwell, Kollman, JACS 117, 5179-5197 (1995).
 
 :link(Horn)
 [(Horn)] Horn, Swope, Pitera, Madura, Dick, Hura, and Head-Gordon,
 J Chem Phys, 120, 9665 (2004).
 
 :link(howto-Ikeshoji)
 [(Ikeshoji)] Ikeshoji and Hafskjold, Molecular Physics, 81, 251-261
 (1994).
 
 :link(howto-Wirnsberger)
 [(Wirnsberger)] Wirnsberger, Frenkel, and Dellago, J Chem Phys, 143, 124104
 (2015).
 
 :link(howto-MacKerell)
 [(MacKerell)] MacKerell, Bashford, Bellott, Dunbrack, Evanseck, Field,
 Fischer, Gao, Guo, Ha, et al, J Phys Chem, 102, 3586 (1998).
 
 :link(howto-Mayo)
 [(Mayo)] Mayo, Olfason, Goddard III, J Phys Chem, 94, 8897-8909
 (1990).
 
 :link(Jorgensen1)
 [(Jorgensen)] Jorgensen, Chandrasekhar, Madura, Impey, Klein, J Chem
 Phys, 79, 926 (1983).
 
 :link(Price1)
 [(Price)] Price and Brooks, J Chem Phys, 121, 10096 (2004).
 
 :link(Shinoda1)
 [(Shinoda)] Shinoda, Shiga, and Mikami, Phys Rev B, 69, 134103 (2004).
 
 :link(MitchellFincham)
 [(Mitchell and Fincham)] Mitchell, Fincham, J Phys Condensed Matter,
 5, 1031-1038 (1993).
 
 :link(MitchellFincham2)
 [(Fincham)] Fincham, Mackrodt and Mitchell, J Phys Condensed Matter,
 6, 393-404 (1994).
 
 :link(howto-Lamoureux)
 [(Lamoureux and Roux)] G. Lamoureux, B. Roux, J. Chem. Phys 119, 3025 (2003)
diff --git a/doc/src/Section_python.txt b/doc/src/Section_python.txt
index 50807e2d9..b994a5640 100644
--- a/doc/src/Section_python.txt
+++ b/doc/src/Section_python.txt
@@ -1,838 +1,846 @@
 "Previous Section"_Section_modify.html - "LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc - "Next Section"_Section_errors.html :c
 
 :link(lws,http://lammps.sandia.gov)
 :link(ld,Manual.html)
 :link(lc,Section_commands.html#comm)
 
 :line
 
 11. Python interface to LAMMPS :h3
 
 LAMMPS can work together with Python in three ways.  First, Python can
 wrap LAMMPS through the "LAMMPS library
 interface"_Section_howto.html#howto_19, so that a Python script can
 create one or more instances of LAMMPS and launch one or more
 simulations.  In Python lingo, this is "extending" Python with LAMMPS.
 
 Second, the low-level Python interface can be used indirectly through the
 PyLammps and IPyLammps wrapper classes in Python. These wrappers try to
 simplify the usage of LAMMPS in Python by providing an object-based interface
 to common LAMMPS functionality. It also reduces the amount of code necessary to
 parameterize LAMMPS scripts through Python and makes variables and computes
 directly accessible. See "PyLammps interface"_#py_9 for more details.
 
 Third, LAMMPS can use the Python interpreter, so that a LAMMPS input
 script can invoke Python code, and pass information back-and-forth
 between the input script and Python functions you write.  The Python
 code can also callback to LAMMPS to query or change its attributes.
 In Python lingo, this is "embedding" Python in LAMMPS.
 
 This section describes how to use these three approaches.
 
 11.1 "Overview of running LAMMPS from Python"_#py_1
 11.2 "Overview of using Python from a LAMMPS script"_#py_2
 11.3 "Building LAMMPS as a shared library"_#py_3
 11.4 "Installing the Python wrapper into Python"_#py_4
 11.5 "Extending Python with MPI to run in parallel"_#py_5
 11.6 "Testing the Python-LAMMPS interface"_#py_6
 11.7 "Using LAMMPS from Python"_#py_7
 11.8 "Example Python scripts that use LAMMPS"_#py_8
 11.9 "PyLammps interface"_#py_9 :ul
 
 If you are not familiar with it, "Python"_http://www.python.org is a
 powerful scripting and programming language which can essentially do
 anything that faster, lower-level languages like C or C++ can do, but
 typically with much fewer lines of code.  When used in embedded mode,
 Python can perform operations that the simplistic LAMMPS input script
 syntax cannot.  Python can be also be used as a "glue" language to
 drive a program through its library interface, or to hook multiple
 pieces of software together, such as a simulation package plus a
 visualization package, or to run a coupled multiscale or multiphysics
 model.
 
 See "Section 6.10"_Section_howto.html#howto_10 of the manual and
 the couple directory of the distribution for more ideas about coupling
 LAMMPS to other codes.  See "Section
 6.19"_Section_howto.html#howto_19 for a description of the LAMMPS
 library interface provided in src/library.cpp and src/library.h, and
 how to extend it for your needs.  As described below, that interface
 is what is exposed to Python either when calling LAMMPS from Python or
 when calling Python from a LAMMPS input script and then calling back
 to LAMMPS from Python code.  The library interface is designed to be
 easy to add functions to.  Thus the Python interface to LAMMPS is also
 easy to extend as well.
 
 If you create interesting Python scripts that run LAMMPS or
 interesting Python functions that can be called from a LAMMPS input
 script, that you think would be useful to other users, please "email
 them to the developers"_http://lammps.sandia.gov/authors.html.  We can
 include them in the LAMMPS distribution.
 
 :line
 :line
 
 11.1 Overview of running LAMMPS from Python :link(py_1),h4
 
 The LAMMPS distribution includes a python directory with all you need
 to run LAMMPS from Python.  The python/lammps.py file wraps the LAMMPS
 library interface, with one wrapper function per LAMMPS library
 function.  This file makes it is possible to do the following either
 from a Python script, or interactively from a Python prompt: create
 one or more instances of LAMMPS, invoke LAMMPS commands or give it an
 input script, run LAMMPS incrementally, extract LAMMPS results, an
 modify internal LAMMPS variables.  From a Python script you can do
 this in serial or parallel.  Running Python interactively in parallel
 does not generally work, unless you have a version of Python that
 extends standard Python to enable multiple instances of Python to read
 what you type.
 
 To do all of this, you must first build LAMMPS as a shared library,
 then insure that your Python can find the python/lammps.py file and
 the shared library.  These steps are explained in subsequent sections
 11.3 and 11.4.  Sections 11.5 and 11.6 discuss using MPI from a
 parallel Python program and how to test that you are ready to use
 LAMMPS from Python.  Section 11.7 lists all the functions in the
 current LAMMPS library interface and how to call them from Python.
 
 Section 11.8 gives some examples of coupling LAMMPS to other tools via
 Python.  For example, LAMMPS can easily be coupled to a GUI or other
 visualization tools that display graphs or animations in real time as
 LAMMPS runs.  Examples of such scripts are included in the python
 directory.
 
 Two advantages of using Python to run LAMMPS are how concise the
 language is, and that it can be run interactively, enabling rapid
 development and debugging of programs.  If you use it to mostly invoke
 costly operations within LAMMPS, such as running a simulation for a
 reasonable number of timesteps, then the overhead cost of invoking
 LAMMPS thru Python will be negligible.
 
 The Python wrapper for LAMMPS uses the amazing and magical (to me)
 "ctypes" package in Python, which auto-generates the interface code
 needed between Python and a set of C interface routines for a library.
 Ctypes is part of standard Python for versions 2.5 and later.  You can
 check which version of Python you have installed, by simply typing
 "python" at a shell prompt.
 
 :line
 
 11.2 Overview of using Python from a LAMMPS script :link(py_2),h4
 
 NOTE: It is not currently possible to use the "python"_python.html
 command described in this section with Python 3, only with Python 2.
 The C API changed from Python 2 to 3 and the LAMMPS code is not
 compatible with both.
 
 LAMMPS has a "python"_python.html command which can be used in an
 input script to define and execute a Python function that you write
 the code for.  The Python function can also be assigned to a LAMMPS
 python-style variable via the "variable"_variable.html command.  Each
 time the variable is evaluated, either in the LAMMPS input script
 itself, or by another LAMMPS command that uses the variable, this will
 trigger the Python function to be invoked.
 
 The Python code for the function can be included directly in the input
 script or in an auxiliary file.  The function can have arguments which
 are mapped to LAMMPS variables (also defined in the input script) and
 it can return a value to a LAMMPS variable.  This is thus a mechanism
 for your input script to pass information to a piece of Python code,
 ask Python to execute the code, and return information to your input
 script.
 
 Note that a Python function can be arbitrarily complex.  It can import
 other Python modules, instantiate Python classes, call other Python
 functions, etc.  The Python code that you provide can contain more
 code than the single function.  It can contain other functions or
 Python classes, as well as global variables or other mechanisms for
 storing state between calls from LAMMPS to the function.
 
 The Python function you provide can consist of "pure" Python code that
 only performs operations provided by standard Python.  However, the
 Python function can also "call back" to LAMMPS through its
 Python-wrapped library interface, in the manner described in the
 previous section 11.1.  This means it can issue LAMMPS input script
 commands or query and set internal LAMMPS state.  As an example, this
 can be useful in an input script to create a more complex loop with
 branching logic, than can be created using the simple looping and
 branching logic enabled by the "next"_next.html and "if"_if.html
 commands.
 
 See the "python"_python.html doc page and the "variable"_variable.html
 doc page for its python-style variables for more info, including
 examples of Python code you can write for both pure Python operations
 and callbacks to LAMMPS.
 
 To run pure Python code from LAMMPS, you only need to build LAMMPS
 with the PYTHON package installed:
 
 make yes-python
 make machine :pre
 
 Note that this will link LAMMPS with the Python library on your
 system, which typically requires several auxiliary system libraries to
 also be linked.  The list of these libraries and the paths to find
 them are specified in the lib/python/Makefile.lammps file.  You need
 to insure that file contains the correct information for your version
 of Python and your machine to successfully build LAMMPS.  See the
 lib/python/README file for more info.
 
 If you want to write Python code with callbacks to LAMMPS, then you
 must also follow the steps overviewed in the preceding section (11.1)
 for running LAMMPS from Python.  I.e. you must build LAMMPS as a
 shared library and insure that Python can find the python/lammps.py
 file and the shared library.
 
 :line
 
 11.3 Building LAMMPS as a shared library :link(py_3),h4
 
 Instructions on how to build LAMMPS as a shared library are given in
 "Section 2.5"_Section_start.html#start_5.  A shared library is one
 that is dynamically loadable, which is what Python requires to wrap
 LAMMPS.  On Linux this is a library file that ends in ".so", not ".a".
 
 From the src directory, type
 
 make foo mode=shlib :pre
 
 where foo is the machine target name, such as linux or g++ or serial.
 This should create the file liblammps_foo.so in the src directory, as
 well as a soft link liblammps.so, which is what the Python wrapper will
 load by default.  Note that if you are building multiple machine
 versions of the shared library, the soft link is always set to the
 most recently built version.
 
 NOTE: If you are building LAMMPS with an MPI or FFT library or other
 auxiliary libraries (used by various packages), then all of these
 extra libraries must also be shared libraries.  If the LAMMPS
 shared-library build fails with an error complaining about this, see
 "Section 2.5"_Section_start.html#start_5 for more details.
 
 :line
 
 11.4 Installing the Python wrapper into Python :link(py_4),h4
 
 For Python to invoke LAMMPS, there are 2 files it needs to know about:
 
 python/lammps.py
 src/liblammps.so :ul
 
 Lammps.py is the Python wrapper on the LAMMPS library interface.
 Liblammps.so is the shared LAMMPS library that Python loads, as
 described above.
 
 You can insure Python can find these files in one of two ways:
 
 set two environment variables
 run the python/install.py script :ul
 
 If you set the paths to these files as environment variables, you only
 have to do it once.  For the csh or tcsh shells, add something like
 this to your ~/.cshrc file, one line for each of the two files:
 
 setenv PYTHONPATH $\{PYTHONPATH\}:/home/sjplimp/lammps/python
 setenv LD_LIBRARY_PATH $\{LD_LIBRARY_PATH\}:/home/sjplimp/lammps/src :pre
 
 If you use the python/install.py script, you need to invoke it every
 time you rebuild LAMMPS (as a shared library) or make changes to the
 python/lammps.py file.
 
 You can invoke install.py from the python directory as
 
 % python install.py \[libdir\] \[pydir\] :pre
 
 The optional libdir is where to copy the LAMMPS shared library to; the
 default is /usr/local/lib.  The optional pydir is where to copy the
 lammps.py file to; the default is the site-packages directory of the
 version of Python that is running the install script.
 
 Note that libdir must be a location that is in your default
 LD_LIBRARY_PATH, like /usr/local/lib or /usr/lib.  And pydir must be a
 location that Python looks in by default for imported modules, like
 its site-packages dir.  If you want to copy these files to
 non-standard locations, such as within your own user space, you will
 need to set your PYTHONPATH and LD_LIBRARY_PATH environment variables
 accordingly, as above.
 
 If the install.py script does not allow you to copy files into system
 directories, prefix the python command with "sudo".  If you do this,
 make sure that the Python that root runs is the same as the Python you
 run.  E.g. you may need to do something like
 
 % sudo /usr/local/bin/python install.py \[libdir\] \[pydir\] :pre
 
 You can also invoke install.py from the make command in the src
 directory as
 
 % make install-python :pre
 
 In this mode you cannot append optional arguments.  Again, you may
 need to prefix this with "sudo".  In this mode you cannot control
 which Python is invoked by root.
 
 Note that if you want Python to be able to load different versions of
 the LAMMPS shared library (see "this section"_#py_5 below), you will
 need to manually copy files like liblammps_g++.so into the appropriate
 system directory.  This is not needed if you set the LD_LIBRARY_PATH
 environment variable as described above.
 
 :line
 
 11.5 Extending Python with MPI to run in parallel :link(py_5),h4
 
 If you wish to run LAMMPS in parallel from Python, you need to extend
 your Python with an interface to MPI.  This also allows you to
 make MPI calls directly from Python in your script, if you desire.
 
 There are several Python packages available that purport to wrap MPI
 as a library and allow MPI functions to be called from Python. However,
 development on most of them seems to be halted except on:
 
 "mpi4py"_https://bitbucket.org/mpi4py/mpi4py
 "PyPar"_https://github.com/daleroberts/pypar :ul
 
 Both packages, PyPar and mpi4py have been successfully tested with
 LAMMPS.  PyPar is simpler and easy to set up and use, but supports
 only a subset of MPI.  Mpi4py is more MPI-feature complete, but also a
 bit more complex to use.  As of version 2.0.0, mpi4py is the only
 python MPI wrapper that allows passing a custom MPI communicator to
 the LAMMPS constructor, which means one can easily run one or more
 LAMMPS instances on subsets of the total MPI ranks.
 
 :line
 
 PyPar requires the ubiquitous "Numpy package"_http://numpy.scipy.org
 be installed in your Python.  After launching Python, type
 
 import numpy :pre
 
 to see if it is installed.  If not, here is how to install it (version
 1.3.0b1 as of April 2009).  Unpack the numpy tarball and from its
 top-level directory, type
 
 python setup.py build
 sudo python setup.py install :pre
 
 The "sudo" is only needed if required to copy Numpy files into your
 Python distribution's site-packages directory.
 
 To install PyPar (version pypar-2.1.4_94 as of Aug 2012), unpack it
 and from its "source" directory, type
 
 python setup.py build
 sudo python setup.py install :pre
 
 Again, the "sudo" is only needed if required to copy PyPar files into
 your Python distribution's site-packages directory.
 
 If you have successfully installed PyPar, you should be able to run
 Python and type
 
 import pypar :pre
 
 without error.  You should also be able to run python in parallel
 on a simple test script
 
 % mpirun -np 4 python test.py :pre
 
 where test.py contains the lines
 
 import pypar
 print "Proc %d out of %d procs" % (pypar.rank(),pypar.size()) :pre
 
 and see one line of output for each processor you run on.
 
 NOTE: To use PyPar and LAMMPS in parallel from Python, you must insure
 both are using the same version of MPI.  If you only have one MPI
 installed on your system, this is not an issue, but it can be if you
 have multiple MPIs.  Your LAMMPS build is explicit about which MPI it
 is using, since you specify the details in your lo-level
 src/MAKE/Makefile.foo file.  PyPar uses the "mpicc" command to find
 information about the MPI it uses to build against.  And it tries to
 load "libmpi.so" from the LD_LIBRARY_PATH.  This may or may not find
 the MPI library that LAMMPS is using.  If you have problems running
 both PyPar and LAMMPS together, this is an issue you may need to
 address, e.g. by moving other MPI installations so that PyPar finds
 the right one.
 
 :line
 
 To install mpi4py (version mpi4py-2.0.0 as of Oct 2015), unpack it
 and from its main directory, type
 
 python setup.py build
 sudo python setup.py install :pre
 
 Again, the "sudo" is only needed if required to copy mpi4py files into
 your Python distribution's site-packages directory. To install with
 user privilege into the user local directory type
 
 python setup.py install --user :pre
 
 If you have successfully installed mpi4py, you should be able to run
 Python and type
 
 from mpi4py import MPI :pre
 
 without error.  You should also be able to run python in parallel
 on a simple test script
 
 % mpirun -np 4 python test.py :pre
 
 where test.py contains the lines
 
 from mpi4py import MPI
 comm = MPI.COMM_WORLD
 print "Proc %d out of %d procs" % (comm.Get_rank(),comm.Get_size()) :pre
 
 and see one line of output for each processor you run on.
 
 NOTE: To use mpi4py and LAMMPS in parallel from Python, you must
 insure both are using the same version of MPI.  If you only have one
 MPI installed on your system, this is not an issue, but it can be if
 you have multiple MPIs.  Your LAMMPS build is explicit about which MPI
 it is using, since you specify the details in your lo-level
 src/MAKE/Makefile.foo file.  Mpi4py uses the "mpicc" command to find
 information about the MPI it uses to build against.  And it tries to
 load "libmpi.so" from the LD_LIBRARY_PATH.  This may or may not find
 the MPI library that LAMMPS is using.  If you have problems running
 both mpi4py and LAMMPS together, this is an issue you may need to
 address, e.g. by moving other MPI installations so that mpi4py finds
 the right one.
 
 :line
 
 11.6 Testing the Python-LAMMPS interface :link(py_6),h4
 
 To test if LAMMPS is callable from Python, launch Python interactively
 and type:
 
 >>> from lammps import lammps
 >>> lmp = lammps() :pre
 
 If you get no errors, you're ready to use LAMMPS from Python.  If the
 2nd command fails, the most common error to see is
 
 OSError: Could not load LAMMPS dynamic library :pre
 
 which means Python was unable to load the LAMMPS shared library.  This
 typically occurs if the system can't find the LAMMPS shared library or
 one of the auxiliary shared libraries it depends on, or if something
 about the library is incompatible with your Python.  The error message
 should give you an indication of what went wrong.
 
 You can also test the load directly in Python as follows, without
 first importing from the lammps.py file:
 
 >>> from ctypes import CDLL
 >>> CDLL("liblammps.so") :pre
 
 If an error occurs, carefully go thru the steps in "Section
 2.5"_Section_start.html#start_5 and above about building a shared
 library and about insuring Python can find the necessary two files
 it needs.
 
 [Test LAMMPS and Python in serial:] :h5
 
 To run a LAMMPS test in serial, type these lines into Python
 interactively from the bench directory:
 
 >>> from lammps import lammps
 >>> lmp = lammps()
 >>> lmp.file("in.lj") :pre
 
 Or put the same lines in the file test.py and run it as
 
 % python test.py :pre
 
 Either way, you should see the results of running the in.lj benchmark
 on a single processor appear on the screen, the same as if you had
 typed something like:
 
 lmp_g++ -in in.lj :pre
 
 [Test LAMMPS and Python in parallel:] :h5
 
 To run LAMMPS in parallel, assuming you have installed the
 "PyPar"_https://github.com/daleroberts/pypar package as discussed
 above, create a test.py file containing these lines:
 
 import pypar
 from lammps import lammps
 lmp = lammps()
 lmp.file("in.lj")
 print "Proc %d out of %d procs has" % (pypar.rank(),pypar.size()),lmp
 pypar.finalize() :pre
 
 To run LAMMPS in parallel, assuming you have installed the
 "mpi4py"_https://bitbucket.org/mpi4py/mpi4py package as discussed
 above, create a test.py file containing these lines:
 
 from mpi4py import MPI
 from lammps import lammps
 lmp = lammps()
 lmp.file("in.lj")
 me = MPI.COMM_WORLD.Get_rank()
 nprocs = MPI.COMM_WORLD.Get_size()
 print "Proc %d out of %d procs has" % (me,nprocs),lmp
 MPI.Finalize() :pre
 
 You can either script in parallel as:
 
 % mpirun -np 4 python test.py :pre
 
 and you should see the same output as if you had typed
 
 % mpirun -np 4 lmp_g++ -in in.lj :pre
 
 Note that if you leave out the 3 lines from test.py that specify PyPar
 commands you will instantiate and run LAMMPS independently on each of
 the P processors specified in the mpirun command.  In this case you
 should get 4 sets of output, each showing that a LAMMPS run was made
 on a single processor, instead of one set of output showing that
 LAMMPS ran on 4 processors.  If the 1-processor outputs occur, it
 means that PyPar is not working correctly.
 
 Also note that once you import the PyPar module, PyPar initializes MPI
 for you, and you can use MPI calls directly in your Python script, as
 described in the PyPar documentation.  The last line of your Python
 script should be pypar.finalize(), to insure MPI is shut down
 correctly.
 
 [Running Python scripts:] :h5
 
 Note that any Python script (not just for LAMMPS) can be invoked in
 one of several ways:
 
 % python foo.script
 % python -i foo.script
 % foo.script :pre
 
 The last command requires that the first line of the script be
 something like this:
 
 #!/usr/local/bin/python
 #!/usr/local/bin/python -i :pre
 
 where the path points to where you have Python installed, and that you
 have made the script file executable:
 
 % chmod +x foo.script :pre
 
 Without the "-i" flag, Python will exit when the script finishes.
 With the "-i" flag, you will be left in the Python interpreter when
 the script finishes, so you can type subsequent commands.  As
 mentioned above, you can only run Python interactively when running
 Python on a single processor, not in parallel.
 
 :line
 :line
 
 11.7 Using LAMMPS from Python :link(py_7),h4
 
 As described above, the Python interface to LAMMPS consists of a
 Python "lammps" module, the source code for which is in
 python/lammps.py, which creates a "lammps" object, with a set of
 methods that can be invoked on that object.  The sample Python code
 below assumes you have first imported the "lammps" module in your
 Python script, as follows:
 
 from lammps import lammps :pre
 
 These are the methods defined by the lammps module.  If you look at
 the files src/library.cpp and src/library.h you will see that they
 correspond one-to-one with calls you can make to the LAMMPS library
 from a C++ or C or Fortran program, and which are described in
 "Section 6.19"_Section_howto.html#howto_19 of the manual.
 
 lmp = lammps()           # create a LAMMPS object using the default liblammps.so library
                          # 4 optional args are allowed: name, cmdargs, ptr, comm
 lmp = lammps(ptr=lmpptr) # use lmpptr as previously created LAMMPS object
 lmp = lammps(comm=split) # create a LAMMPS object with a custom communicator, requires mpi4py 2.0.0 or later
 lmp = lammps(name="g++")   # create a LAMMPS object using the liblammps_g++.so library
 lmp = lammps(name="g++",cmdargs=list)    # add LAMMPS command-line args, e.g. list = \["-echo","screen"\] :pre
 
 lmp.close()              # destroy a LAMMPS object :pre
 
 version = lmp.version()  # return the numerical version id, e.g. LAMMPS 2 Sep 2015 -> 20150902
 
 lmp.file(file)           # run an entire input script, file = "in.lj"
 lmp.command(cmd)         # invoke a single LAMMPS command, cmd = "run 100" :pre
 lmp.commands_list(cmdlist)     # invoke commands in cmdlist = ["run 10", "run 20"]
 lmp.commands_string(multicmd)  # invoke commands in multicmd = "run 10\nrun 20"
 
 xlo = lmp.extract_global(name,type)  # extract a global quantity
                                      # name = "boxxlo", "nlocal", etc
                                      # type = 0 = int
                                      #        1 = double :pre
 
 coords = lmp.extract_atom(name,type)      # extract a per-atom quantity
                                           # name = "x", "type", etc
                                           # type = 0 = vector of ints
                                           #        1 = array of ints
                                           #        2 = vector of doubles
                                           #        3 = array of doubles :pre
 
 eng = lmp.extract_compute(id,style,type)  # extract value(s) from a compute
 v3 = lmp.extract_fix(id,style,type,i,j)   # extract value(s) from a fix
                                           # id = ID of compute or fix
                                           # style = 0 = global data
                                           #         1 = per-atom data
                                           #         2 = local data
                                           # type = 0 = scalar
                                           #        1 = vector
                                           #        2 = array
                                           # i,j = indices of value in global vector or array :pre
 
 var = lmp.extract_variable(name,group,flag)  # extract value(s) from a variable
                                              # name = name of variable
                                              # group = group ID (ignored for equal-style variables)
                                              # flag = 0 = equal-style variable
                                              #        1 = atom-style variable :pre
 
 flag = lmp.set_variable(name,value)       # set existing named string-style variable to value, flag = 0 if successful
 value = lmp.get_thermo(name)              # return current value of a thermo keyword
 
 natoms = lmp.get_natoms()                 # total # of atoms as int
-data = lmp.gather_atoms(name,type,count)  # return atom attribute of all atoms gathered into data, ordered by atom ID
+data = lmp.gather_atoms(name,type,count)  # return per-atom property of all atoms gathered into data, ordered by atom ID
                                           # name = "x", "charge", "type", etc
                                           # count = # of per-atom values, 1 or 3, etc
-lmp.scatter_atoms(name,type,count,data)   # scatter atom attribute of all atoms from data, ordered by atom ID
+lmp.scatter_atoms(name,type,count,data)   # scatter per-atom property to all atoms from data, ordered by atom ID
                                           # name = "x", "charge", "type", etc
                                           # count = # of per-atom values, 1 or 3, etc :pre
 
 :line
 
 The lines
 
 from lammps import lammps
 lmp = lammps() :pre
 
 create an instance of LAMMPS, wrapped in a Python class by the lammps
 Python module, and return an instance of the Python class as lmp.  It
 is used to make all subsequent calls to the LAMMPS library.
 
 Additional arguments to lammps() can be used to tell Python the name
 of the shared library to load or to pass arguments to the LAMMPS
 instance, the same as if LAMMPS were launched from a command-line
 prompt.
 
 If the ptr argument is set like this:
 
 lmp = lammps(ptr=lmpptr) :pre
 
 then lmpptr must be an argument passed to Python via the LAMMPS
 "python"_python.html command, when it is used to define a Python
 function that is invoked by the LAMMPS input script.  This mode of
 using Python with LAMMPS is described above in 11.2.  The variable
 lmpptr refers to the instance of LAMMPS that called the embedded
 Python interpreter.  Using it as an argument to lammps() allows the
 returned Python class instance "lmp" to make calls to that instance of
 LAMMPS.  See the "python"_python.html command doc page for examples
 using this syntax.
 
 Note that you can create multiple LAMMPS objects in your Python
 script, and coordinate and run multiple simulations, e.g.
 
 from lammps import lammps
 lmp1 = lammps()
 lmp2 = lammps()
 lmp1.file("in.file1")
 lmp2.file("in.file2") :pre
 
 The file(), command(), commands_list(), commands_string() methods
 allow an input script, a single command, or multiple commands to be
 invoked.
 
 The extract_global(), extract_atom(), extract_compute(),
 extract_fix(), and extract_variable() methods return values or
 pointers to data structures internal to LAMMPS.
 
 For extract_global() see the src/library.cpp file for the list of
 valid names.  New names could easily be added.  A double or integer is
 returned.  You need to specify the appropriate data type via the type
 argument.
 
 For extract_atom(), a pointer to internal LAMMPS atom-based data is
 returned, which you can use via normal Python subscripting.  See the
 extract() method in the src/atom.cpp file for a list of valid names.
-Again, new names could easily be added.  A pointer to a vector of
-doubles or integers, or a pointer to an array of doubles (double **)
-or integers (int **) is returned.  You need to specify the appropriate
-data type via the type argument.
+Again, new names could easily be added if the property you want is not
+listed.  A pointer to a vector of doubles or integers, or a pointer to
+an array of doubles (double **) or integers (int **) is returned.  You
+need to specify the appropriate data type via the type argument.
 
 For extract_compute() and extract_fix(), the global, per-atom, or
 local data calculated by the compute or fix can be accessed.  What is
 returned depends on whether the compute or fix calculates a scalar or
 vector or array.  For a scalar, a single double value is returned.  If
 the compute or fix calculates a vector or array, a pointer to the
 internal LAMMPS data is returned, which you can use via normal Python
 subscripting.  The one exception is that for a fix that calculates a
 global vector or array, a single double value from the vector or array
 is returned, indexed by I (vector) or I and J (array).  I,J are
 zero-based indices.  The I,J arguments can be left out if not needed.
 See "Section 6.15"_Section_howto.html#howto_15 of the manual for a
 discussion of global, per-atom, and local data, and of scalar, vector,
 and array data types.  See the doc pages for individual
 "computes"_compute.html and "fixes"_fix.html for a description of what
 they calculate and store.
 
 For extract_variable(), an "equal-style or atom-style
 variable"_variable.html is evaluated and its result returned.
 
 For equal-style variables a single double value is returned and the
 group argument is ignored.  For atom-style variables, a vector of
 doubles is returned, one value per atom, which you can use via normal
 Python subscripting. The values will be zero for atoms not in the
 specified group.
 
 The get_natoms() method returns the total number of atoms in the
 simulation, as an int.
 
-The gather_atoms() method returns a ctypes vector of ints or doubles
-as specified by type, of length count*natoms, for the property of all
-the atoms in the simulation specified by name, ordered by count and
-then by atom ID.  The vector can be used via normal Python
-subscripting.  If atom IDs are not consecutively ordered within
-LAMMPS, a None is returned as indication of an error.
+The gather_atoms() method allows any per-atom property (coordinates,
+velocities, etc) to be extracted from LAMMPS.  It returns a ctypes
+vector of ints or doubles as specified by type, of length
+count*natoms, for the named property for all atoms in the simulation.
+The data is ordered by count and then by atom ID.  See the extract()
+method in the src/atom.cpp file for a list of valid names.  Again, new
+names could easily be added if the property you want is missing.  The
+vector can be used via normal Python subscripting.  If atom IDs are
+not consecutively ordered within LAMMPS, a None is returned as
+indication of an error.
 
 Note that the data structure gather_atoms("x") returns is different
 from the data structure returned by extract_atom("x") in four ways.
 (1) Gather_atoms() returns a vector which you index as x\[i\];
 extract_atom() returns an array which you index as x\[i\]\[j\].  (2)
 Gather_atoms() orders the atoms by atom ID while extract_atom() does
 not.  (3) Gathert_atoms() returns a list of all atoms in the
 simulation; extract_atoms() returns just the atoms local to each
 processor.  (4) Finally, the gather_atoms() data structure is a copy
 of the atom coords stored internally in LAMMPS, whereas extract_atom()
 returns an array that effectively points directly to the internal
 data.  This means you can change values inside LAMMPS from Python by
 assigning a new values to the extract_atom() array.  To do this with
 the gather_atoms() vector, you need to change values in the vector,
 then invoke the scatter_atoms() method.
 
-The scatter_atoms() method takes a vector of ints or doubles as
-specified by type, of length count*natoms, for the property of all the
-atoms in the simulation specified by name, ordered by bount and then
-by atom ID.  It uses the vector of data to overwrite the corresponding
-properties for each atom inside LAMMPS.  This requires LAMMPS to have
-its "map" option enabled; see the "atom_modify"_atom_modify.html
-command for details.  If it is not, or if atom IDs are not
-consecutively ordered, no coordinates are reset.
+The scatter_atoms() method allows any per-atom property (coordinates,
+velocities, etc) to be inserted into LAMMPS, overwriting the current
+property.  It takes a vector of ints or doubles as specified by type,
+of length count*natoms, for the named property for all atoms in the
+simulation.  The data should be ordered by count and then by atom ID.
+See the extract() method in the src/atom.cpp file for a list of valid
+names.  Again, new names could easily be added if the property you
+want is missing.  It uses the vector of data to overwrite the
+corresponding properties for each atom inside LAMMPS.  This requires
+LAMMPS to have its "map" option enabled; see the
+"atom_modify"_atom_modify.html command for details.  If it is not, or
+if atom IDs are not consecutively ordered, no coordinates are reset.
 
 The array of coordinates passed to scatter_atoms() must be a ctypes
 vector of ints or doubles, allocated and initialized something like
 this:
 
 from ctypes import *
 natoms = lmp.get_natoms()
 n3 = 3*natoms
 x = (n3*c_double)()
 x\[0\] = x coord of atom with ID 1
 x\[1\] = y coord of atom with ID 1
 x\[2\] = z coord of atom with ID 1
 x\[3\] = x coord of atom with ID 2
 ...
 x\[n3-1\] = z coord of atom with ID natoms
-lmp.scatter_coords("x",1,3,x) :pre
+lmp.scatter_atoms("x",1,3,x) :pre
 
 Alternatively, you can just change values in the vector returned by
 gather_atoms("x",1,3), since it is a ctypes vector of doubles.
 
 :line
 
 As noted above, these Python class methods correspond one-to-one with
 the functions in the LAMMPS library interface in src/library.cpp and
 library.h.  This means you can extend the Python wrapper via the
 following steps:
 
 Add a new interface function to src/library.cpp and
 src/library.h. :ulb,l
 
 Rebuild LAMMPS as a shared library. :l
 
 Add a wrapper method to python/lammps.py for this interface
 function. :l
 
 You should now be able to invoke the new interface function from a
 Python script.  Isn't ctypes amazing? :l
 :ule
 
 :line
 :line
 
 11.8 Example Python scripts that use LAMMPS :link(py_8),h4
 
 These are the Python scripts included as demos in the python/examples
 directory of the LAMMPS distribution, to illustrate the kinds of
 things that are possible when Python wraps LAMMPS.  If you create your
 own scripts, send them to us and we can include them in the LAMMPS
 distribution.
 
 trivial.py, read/run a LAMMPS input script thru Python,
 demo.py, invoke various LAMMPS library interface routines,
 simple.py, run in parallel, similar to examples/COUPLE/simple/simple.cpp,
 split.py, same as simple.py but running in parallel on a subset of procs,
 gui.py, GUI go/stop/temperature-slider to control LAMMPS,
 plot.py, real-time temperature plot with GnuPlot via Pizza.py,
 viz_tool.py, real-time viz via some viz package,
 vizplotgui_tool.py, combination of viz_tool.py and plot.py and gui.py :tb(c=2)
 
 :line
 
 For the viz_tool.py and vizplotgui_tool.py commands, replace "tool"
 with "gl" or "atomeye" or "pymol" or "vmd", depending on what
 visualization package you have installed.
 
 Note that for GL, you need to be able to run the Pizza.py GL tool,
 which is included in the pizza sub-directory.  See the "Pizza.py doc
 pages"_pizza for more info:
 
 :link(pizza,http://www.sandia.gov/~sjplimp/pizza.html)
 
 Note that for AtomEye, you need version 3, and there is a line in the
 scripts that specifies the path and name of the executable.  See the
 AtomEye WWW pages "here"_atomeye or "here"_atomeye3 for more details:
 
 http://mt.seas.upenn.edu/Archive/Graphics/A
 http://mt.seas.upenn.edu/Archive/Graphics/A3/A3.html :pre
 
 :link(atomeye,http://mt.seas.upenn.edu/Archive/Graphics/A)
 :link(atomeye3,http://mt.seas.upenn.edu/Archive/Graphics/A3/A3.html)
 
 The latter link is to AtomEye 3 which has the scriping
 capability needed by these Python scripts.
 
 Note that for PyMol, you need to have built and installed the
 open-source version of PyMol in your Python, so that you can import it
 from a Python script.  See the PyMol WWW pages "here"_pymolhome or
 "here"_pymolopen for more details:
 
 http://www.pymol.org
 http://sourceforge.net/scm/?type=svn&group_id=4546 :pre
 
 :link(pymolhome,http://www.pymol.org)
 :link(pymolopen,http://sourceforge.net/scm/?type=svn&group_id=4546)
 
 The latter link is to the open-source version.
 
 Note that for VMD, you need a fairly current version (1.8.7 works for
 me) and there are some lines in the pizza/vmd.py script for 4 PIZZA
 variables that have to match the VMD installation on your system.
 
 :line
 
 See the python/README file for instructions on how to run them and the
 source code for individual scripts for comments about what they do.
 
 Here are screenshots of the vizplotgui_tool.py script in action for
 different visualization package options.  Click to see larger images:
 
 :image(JPG/screenshot_gl_small.jpg,JPG/screenshot_gl.jpg)
 :image(JPG/screenshot_atomeye_small.jpg,JPG/screenshot_atomeye.jpg)
 :image(JPG/screenshot_pymol_small.jpg,JPG/screenshot_pymol.jpg)
 :image(JPG/screenshot_vmd_small.jpg,JPG/screenshot_vmd.jpg)
 
 11.9 PyLammps interface :link(py_9),h4
 
 Please see the "PyLammps Tutorial"_tutorial_pylammps.html.
diff --git a/doc/src/compute_modify.txt b/doc/src/compute_modify.txt
index acf14526a..637f9b5e4 100644
--- a/doc/src/compute_modify.txt
+++ b/doc/src/compute_modify.txt
@@ -1,64 +1,70 @@
 "LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c
 
 :link(lws,http://lammps.sandia.gov)
 :link(ld,Manual.html)
 :link(lc,Section_commands.html#comm)
 
 :line
 
 compute_modify command :h3
 
 [Syntax:]
 
 compute_modify compute-ID keyword value ... :pre
 
 compute-ID = ID of the compute to modify :ulb,l
 one or more keyword/value pairs may be listed :l
-keyword = {extra} or {dynamic} :l
-  {extra} value = N
+keyword = {extra/dof} or {extra} or {dynamic/dof} or {dynamic} :l
+  {extra/dof} value = N
     N = # of extra degrees of freedom to subtract
-  {dynamic} value = {yes} or {no}
-    yes/no = do or do not recompute the number of atoms contributing to the temperature :pre
+  {extra} syntax is identical to {extra/dof}, will be disabled at some point
+  {dynamic/dof} value = {yes} or {no}
+    yes/no = do or do not recompute the number of atoms contributing to the temperature
+  {dynamic} syntax is identical to {dynamic/dof}, will be disabled at some point :pre
 :ule
 
 [Examples:]
 
-compute_modify myTemp extra 0
-compute_modify newtemp dynamic yes extra 600 :pre
+compute_modify myTemp extra/dof 0
+compute_modify newtemp dynamic/dof yes extra/dof 600 :pre
 
 [Description:]
 
 Modify one or more parameters of a previously defined compute.  Not
 all compute styles support all parameters.
 
-The {extra} keyword refers to how many degrees-of-freedom are
-subtracted (typically from 3N) as a normalizing factor in a
-temperature computation.  Only computes that compute a temperature use
-this option.  The default is 2 or 3 for "2d or 3d
+The {extra/dof} or {extra} keyword refers to how many
+degrees-of-freedom are subtracted (typically from 3N) as a normalizing
+factor in a temperature computation.  Only computes that compute a
+temperature use this option.  The default is 2 or 3 for "2d or 3d
 systems"_dimension.html which is a correction factor for an ensemble
 of velocities with zero total linear momentum. For compute
 temp/partial, if one or more velocity components are excluded, the
 value used for {extra} is scaled accordingly. You can use a negative
 number for the {extra} parameter if you need to add
 degrees-of-freedom.  See the "compute
 temp/asphere"_compute_temp_asphere.html command for an example.
 
-The {dynamic} keyword determines whether the number of atoms N in the
-compute group is re-computed each time a temperature is computed.
-Only compute styles that calculate a temperature use this option.  By
-default, N is assumed to be constant.  If you are adding atoms to the
-system (see the "fix pour"_fix_pour.html or "fix
-deposit"_fix_deposit.html commands) or expect atoms to be lost
-(e.g. due to evaporation), then this option should be used to insure
-the temperature is correctly normalized.
+The {dynamic/dof} or {dynamic} keyword determines whether the number
+of atoms N in the compute group is re-computed each time a temperature
+is computed.  Only compute styles that calculate a temperature use
+this option.  By default, N is assumed to be constant.  If you are
+adding atoms to the system (see the "fix pour"_fix_pour.html, "fix
+deposit"_fix_deposit.html and "fix gcmc"_fix_gcmc.html commands) or
+expect atoms to be lost (e.g. due to evaporation), then this option
+should be used to insure the temperature is correctly normalized.
+
+NOTE: The {extra} and {dynamic} keywords should not be used as they
+are deprecated (March 2017) and will eventually be disabled.  Instead,
+use the equivalent {extra/dof} and {dynamic/dof} keywords.
 
 [Restrictions:] none
 
 [Related commands:]
 
 "compute"_compute.html
 
 [Default:]
 
-The option defaults are extra = 2 or 3 for 2d or 3d systems and
-dynamic = no.
+The option defaults are extra/dof = 2 or 3 for 2d or 3d systems and
+dynamic/dof = no.
diff --git a/doc/src/compute_sna_atom.txt b/doc/src/compute_sna_atom.txt
index 0332ab9c1..e2df70647 100644
--- a/doc/src/compute_sna_atom.txt
+++ b/doc/src/compute_sna_atom.txt
@@ -1,241 +1,250 @@
 "LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c
 
 :link(lws,http://lammps.sandia.gov)
 :link(ld,Manual.html)
 :link(lc,Section_commands.html#comm)
 
 :line
 
 compute sna/atom command :h3
 compute snad/atom command :h3
 compute snav/atom command :h3
 
 [Syntax:]
 
 compute ID group-ID sna/atom rcutfac rfac0 twojmax R_1 R_2 ... w_1 w_2 ... keyword values ...
 compute ID group-ID snad/atom rcutfac rfac0 twojmax R_1 R_2 ... w_1 w_2 ... keyword values ...
 compute ID group-ID snav/atom rcutfac rfac0 twojmax R_1 R_2 ... w_1 w_2 ... keyword values ... :pre
 
 ID, group-ID are documented in "compute"_compute.html command :ulb,l
 sna/atom = style name of this compute command :l
 rcutfac = scale factor applied to all cutoff radii (positive real) :l
 rfac0 = parameter in distance to angle conversion (0 < rcutfac < 1) :l
 twojmax = band limit for bispectrum components (non-negative integer) :l
 R_1, R_2,... = list of cutoff radii, one for each type (distance units) :l
 w_1, w_2,... = list of neighbor weights, one for each type  :l
 zero or more keyword/value pairs may be appended :l
-keyword = {diagonal} or {rmin0} or {switchflag} :l
+keyword = {diagonal} or {rmin0} or {switchflag} or {bzeroflag} :l
   {diagonal} value = {0} or {1} or {2} or {3}
      {0} = all j1, j2, j <= twojmax, j2 <= j1
      {1} = subset satisfying j1 == j2
      {2} = subset satisfying j1 == j2 == j3
      {3} = subset satisfying j2 <= j1 <= j
   {rmin0} value = parameter in distance to angle conversion (distance units)
   {switchflag} value = {0} or {1}
      {0} = do not use switching function
-     {1} = use switching function :pre
+     {1} = use switching function
+  {bzeroflag} value = {0} or {1}
+     {0} = do not subtract B0
+     {1} = subtract B0 :pre
 :ule
 
 [Examples:]
 
 compute b all sna/atom 1.4 0.99363 6 2.0 2.4 0.75 1.0 diagonal 3 rmin0 0.0
 compute db all sna/atom 1.4 0.95 6 2.0 1.0
 compute vb all sna/atom 1.4 0.95 6 2.0 1.0 :pre
 
 [Description:]
 
 Define a computation that calculates a set of bispectrum components
 for each atom in a group.
 
 Bispectrum components of an atom are order parameters characterizing
 the radial and angular distribution of neighbor atoms. The detailed
 mathematical definition is given in the paper by Thompson et
 al. "(Thompson)"_#Thompson20141
 
 The position of a neighbor atom {i'} relative to a central atom {i} is
 a point within the 3D ball of radius {R_ii' = rcutfac*(R_i + R_i')}
 
 Bartok et al. "(Bartok)"_#Bartok20101, proposed mapping this 3D ball
 onto the 3-sphere, the surface of the unit ball in a four-dimensional
 space.  The radial distance {r} within {R_ii'} is mapped on to a third
 polar angle {theta0} defined by,
 
 :c,image(Eqs/compute_sna_atom1.jpg)
 
 In this way, all possible neighbor positions are mapped on to a subset
 of the 3-sphere.  Points south of the latitude {theta0max=rfac0*Pi}
 are excluded.
 
 The natural basis for functions on the 3-sphere is formed by the 4D
 hyperspherical harmonics {U^j_m,m'(theta, phi, theta0).}  These
 functions are better known as {D^j_m,m',} the elements of the Wigner
 {D}-matrices "(Meremianin"_#Meremianin2006,
 "Varshalovich)"_#Varshalovich1987.
 
 The density of neighbors on the 3-sphere can be written as a sum of
 Dirac-delta functions, one for each neighbor, weighted by species and
 radial distance. Expanding this density function as a generalized
 Fourier series in the basis functions, we can write each Fourier
 coefficient as
 
 :c,image(Eqs/compute_sna_atom2.jpg)
 
 The {w_i'} neighbor weights are dimensionless numbers that are chosen
 to distinguish atoms of different types, while the central atom is
 arbitrarily assigned a unit weight.  The function {fc(r)} ensures that
 the contribution of each neighbor atom goes smoothly to zero at
 {R_ii'}:
 
 :c,image(Eqs/compute_sna_atom4.jpg)
 
 The expansion coefficients {u^j_m,m'} are complex-valued and they are
 not directly useful as descriptors, because they are not invariant
 under rotation of the polar coordinate frame. However, the following
 scalar triple products of expansion coefficients can be shown to be
 real-valued and invariant under rotation "(Bartok)"_#Bartok20101.
 
 :c,image(Eqs/compute_sna_atom3.jpg)
 
 The constants {H^jmm'_j1m1m1'_j2m2m2'} are coupling coefficients,
 analogous to Clebsch-Gordan coefficients for rotations on the
 2-sphere. These invariants are the components of the bispectrum and
 these are the quantities calculated by the compute {sna/atom}. They
 characterize the strength of density correlations at three points on
 the 3-sphere. The j2=0 subset form the power spectrum, which
 characterizes the correlations of two points. The lowest-order
 components describe the coarsest features of the density function,
 while higher-order components reflect finer detail.  Note that the
 central atom is included in the expansion, so three point-correlations
 can be either due to three neighbors, or two neighbors and the central
 atom.
 
 Compute {snad/atom} calculates the derivative of the bispectrum components
 summed separately for each atom type:
 
 :c,image(Eqs/compute_sna_atom5.jpg)
 
 The sum is over all atoms {i'} of atom type {I}.  For each atom {i},
 this compute evaluates the above expression for each direction, each
 atom type, and each bispectrum component.  See section below on output
 for a detailed explanation.
 
 Compute {snav/atom} calculates the virial contribution due to the
 derivatives:
 
 :c,image(Eqs/compute_sna_atom6.jpg)
 
 Again, the sum is over all atoms {i'} of atom type {I}.  For each atom
 {i}, this compute evaluates the above expression for each of the six
 virial components, each atom type, and each bispectrum component.  See
 section below on output for a detailed explanation.
 
 The value of all bispectrum components will be zero for atoms not in
 the group. Neighbor atoms not in the group do not contribute to the
 bispectrum of atoms in the group.
 
 The neighbor list needed to compute this quantity is constructed each
 time the calculation is performed (i.e. each time a snapshot of atoms
 is dumped).  Thus it can be inefficient to compute/dump this quantity
 too frequently.
 
 The argument {rcutfac} is a scale factor that controls the ratio of
 atomic radius to radial cutoff distance.
 
 The argument {rfac0} and the optional keyword {rmin0} define the
 linear mapping from radial distance to polar angle {theta0} on the
 3-sphere.
 
 The argument {twojmax} and the keyword {diagonal} define which
 bispectrum components are generated. See section below on output for a
 detailed explanation of the number of bispectrum components and the
 ordered in which they are listed
 
 The keyword {switchflag} can be used to turn off the switching
 function.
 
+The keyword {bzeroflag} determines whether or not {B0}, the bispectrum
+components of an atom with no neighbors, are subtracted from
+the calculated bispectrum components. This optional keyword is only
+available for compute {sna/atom}, as {snad/atom} and {snav/atom}
+are unaffected by the removal of constant terms.
+
 NOTE: If you have a bonded system, then the settings of
 "special_bonds"_special_bonds.html command can remove pairwise
 interactions between atoms in the same bond, angle, or dihedral.  This
 is the default setting for the "special_bonds"_special_bonds.html
 command, and means those pairwise interactions do not appear in the
 neighbor list.  Because this fix uses the neighbor list, it also means
 those pairs will not be included in the calculation.  One way to get
 around this, is to write a dump file, and use the "rerun"_rerun.html
 command to compute the bispectrum components for snapshots in the dump
 file.  The rerun script can use a "special_bonds"_special_bonds.html
 command that includes all pairs in the neighbor list.
 
 ;line
 
 [Output info:]
 
 Compute {sna/atom} calculates a per-atom array, each column
 corresponding to a particular bispectrum component.  The total number
 of columns and the identities of the bispectrum component contained in
 each column depend on the values of {twojmax} and {diagonal}, as
 described by the following piece of python code:
 
 for j1 in range(0,twojmax+1):
     if(diagonal==2):
         print j1/2.,j1/2.,j1/2.
     elif(diagonal==1):
         for j in range(0,min(twojmax,2*j1)+1,2):
             print j1/2.,j1/2.,j/2.
     elif(diagonal==0):
         for j2 in range(0,j1+1):
             for j in range(j1-j2,min(twojmax,j1+j2)+1,2):
                 print j1/2.,j2/2.,j/2.
     elif(diagonal==3):
         for j2 in range(0,j1+1):
             for j in range(j1-j2,min(twojmax,j1+j2)+1,2):
                 if (j>=j1): print j1/2.,j2/2.,j/2. :pre
 
 Compute {snad/atom} evaluates a per-atom array. The columns are
 arranged into {ntypes} blocks, listed in order of atom type {I}.  Each
 block contains three sub-blocks corresponding to the {x}, {y}, and {z}
 components of the atom position.  Each of these sub-blocks contains
 one column for each bispectrum component, the same as for compute
 {sna/atom}
 
 Compute {snav/atom} evaluates a per-atom array. The columns are
 arranged into {ntypes} blocks, listed in order of atom type {I}.  Each
 block contains six sub-blocks corresponding to the {xx}, {yy}, {zz},
 {yz}, {xz}, and {xy} components of the virial tensor in Voigt
 notation.  Each of these sub-blocks contains one column for each
 bispectrum component, the same as for compute {sna/atom}
 
 These values can be accessed by any command that uses per-atom values
 from a compute as input.  See "Section
 6.15"_Section_howto.html#howto_15 for an overview of LAMMPS output
 options.
 
 [Restrictions:]
 
 These computes are part of the SNAP package.  They are only enabled if
 LAMMPS was built with that package.  See the "Making
 LAMMPS"_Section_start.html#start_3 section for more info.
 
 [Related commands:]
 
 "pair_style snap"_pair_snap.html
 
 [Default:]
 
 The optional keyword defaults are {diagonal} = 0, {rmin0} = 0,
-{switchflag} = 1.
+{switchflag} = 1, {bzeroflag} = 0.
 
 :line
 
 :link(Thompson20141)
 [(Thompson)] Thompson, Swiler, Trott, Foiles, Tucker, under review, preprint
 available at "arXiv:1409.3880"_http://arxiv.org/abs/1409.3880
 
 :link(Bartok20101)
 [(Bartok)] Bartok, Payne, Risi, Csanyi, Phys Rev Lett, 104, 136403 (2010).
 
 :link(Meremianin2006)
 [(Meremianin)] Meremianin, J. Phys. A,  39, 3099 (2006).
 
 :link(Varshalovich1987)
 [(Varshalovich)] Varshalovich, Moskalev, Khersonskii, Quantum Theory
 of Angular Momentum, World Scientific, Singapore (1987).
diff --git a/doc/src/fix_deposit.txt b/doc/src/fix_deposit.txt
index a1dd5f643..477c14ea8 100644
--- a/doc/src/fix_deposit.txt
+++ b/doc/src/fix_deposit.txt
@@ -1,285 +1,291 @@
 "LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c
 
 :link(lws,http://lammps.sandia.gov)
 :link(ld,Manual.html)
 :link(lc,Section_commands.html#comm)
 
 :line
 
 fix deposit command :h3
 
 [Syntax:]
 
 fix ID group-ID deposit N type M seed keyword values ... :pre
 
 ID, group-ID are documented in "fix"_fix.html command :ulb,l
 deposit = style name of this fix command :l
 N = # of atoms or molecules to insert :l
 type = atom type to assign to inserted atoms (offset for molecule insertion) :l
 M = insert a single atom or molecule every M steps :l
 seed = random # seed (positive integer) :l
 one or more keyword/value pairs may be appended to args :l
 keyword = {region} or {id} or {global} or {local} or {near} or {gaussian} or {attempt} or {rate} or {vx} or {vy} or {vz} or {mol} or {rigid} or {shake} or {units} :l
   {region} value = region-ID
     region-ID = ID of region to use as insertion volume
   {id} value = {max} or {next}
     max = atom ID for new atom(s) is max ID of all current atoms plus one
     next = atom ID for new atom(s) increments by one for every deposition
   {global} values = lo hi
     lo,hi = put new atom/molecule a distance lo-hi above all other atoms (distance units)
   {local} values = lo hi delta
     lo,hi = put new atom/molecule a distance lo-hi above any nearby atom beneath it (distance units)
     delta = lateral distance within which a neighbor is considered "nearby" (distance units)
   {near} value = R
     R = only insert atom/molecule if further than R from existing particles (distance units)
   {gaussian} values = xmid ymid zmid sigma
     xmid,ymid,zmid = center of the gaussian distribution (distance units)
     sigma = width of gaussian distribution (distance units)
   {attempt} value = Q
     Q = attempt a single insertion up to Q times
   {rate} value = V
     V = z velocity (y in 2d) at which insertion volume moves (velocity units)
   {vx} values = vxlo vxhi
     vxlo,vxhi = range of x velocities for inserted atom/molecule (velocity units)
   {vy} values = vylo vyhi
     vylo,vyhi = range of y velocities for inserted atom/molecule (velocity units)
   {vz} values = vzlo vzhi
     vzlo,vzhi = range of z velocities for inserted atom/molecule (velocity units)
   {target} values = tx ty tz
     tx,ty,tz = location of target point (distance units)
   {mol} value = template-ID
     template-ID = ID of molecule template specified in a separate "molecule"_molecule.html command
   {molfrac} values = f1 f2 ... fN
     f1 to fN = relative probability of creating each of N molecules in template-ID
   {rigid} value = fix-ID
     fix-ID = ID of "fix rigid/small"_fix_rigid.html command
   {shake} value = fix-ID
     fix-ID = ID of "fix shake"_fix_shake.html command
   {units} value = {lattice} or {box}
     lattice = the geometry is defined in lattice units
     box = the geometry is defined in simulation box units :pre
 :ule
 
 [Examples:]
 
 fix 3 all deposit 1000 2 100 29494 region myblock local 1.0 1.0 1.0 units box
 fix 2 newatoms deposit 10000 1 500 12345 region disk near 2.0 vz -1.0 -0.8
 fix 4 sputter deposit 1000 2 500 12235 region sphere vz -1.0 -1.0 target 5.0 5.0 0.0 units lattice
 fix 5 insert deposit 200 2 100 777 region disk gaussian 5.0 5.0 9.0 1.0 units box :pre
 
 [Description:]
 
 Insert a single atom or molecule into the simulation domain every M
 timesteps until N atoms or molecules have been inserted.  This is
 useful for simulating deposition onto a surface.  For the remainder of
 this doc page, a single inserted atom or molecule is referred to as a
 "particle".
 
 If inserted particles are individual atoms, they are assigned the
 specified atom type.  If they are molecules, the type of each atom in
 the inserted molecule is specified in the file read by the
 "molecule"_molecule.html command, and those values are added to the
 specified atom type.  E.g. if the file specifies atom types 1,2,3, and
 those are the atom types you want for inserted molecules, then specify
 {type} = 0.  If you specify {type} = 2, the in the inserted molecule
 will have atom types 3,4,5.
 
 All atoms in the inserted particle are assigned to two groups: the
 default group "all" and the group specified in the fix deposit command
 (which can also be "all").
 
 If you are computing temperature values which include inserted
 particles, you will want to use the
 "compute_modify"_compute_modify.html dynamic option, which insures the
 current number of atoms is used as a normalizing factor each time the
 temperature is computed.
 
 Care must be taken that inserted particles are not too near existing
 atoms, using the options described below.  When inserting particles
 above a surface in a non-periodic box (see the
 "boundary"_boundary.html command), the possibility of a particle
 escaping the surface and flying upward should be considered, since the
 particle may be lost or the box size may grow infinitely large.  A
 "fix wall/reflect"_fix_wall_reflect.html command can be used to
 prevent this behavior.  Note that if a shrink-wrap boundary is used,
 it is OK to insert the new particle outside the box, however the box
 will immediately be expanded to include the new particle. When
 simulating a sputtering experiment it is probably more realistic to
 ignore those atoms using the "thermo_modify"_thermo_modify.html
 command with the {lost ignore} option and a fixed
 "boundary"_boundary.html.
 
 The fix deposit command must use the {region} keyword to define an
 insertion volume.  The specified region must have been previously
 defined with a "region"_region.html command.  It must be defined with
 side = {in}.
 
 NOTE: LAMMPS checks that the specified region is wholly inside the
 simulation box.  It can do this correctly for orthonormal simulation
 boxes.  However for "triclinic boxes"_Section_howto.html#howto_12, it
 only tests against the larger orthonormal box that bounds the tilted
 simulation box.  If the specified region includes volume outside the
 tilted box, then an insertion will likely fail, leading to a "lost
 atoms" error.  Thus for triclinic boxes you should insure the
 specified region is wholly inside the simulation box.
 
 The locations of inserted particles are taken from uniform distributed
 random numbers, unless the {gaussian} keyword is used. Then the
 individual coordinates are taken from a gaussian distribution of
 width {sigma} centered on {xmid,ymid,zmid}.
 
 Individual atoms are inserted, unless the {mol} keyword is used.  It
 specifies a {template-ID} previously defined using the
 "molecule"_molecule.html command, which reads files that define one or
 more molecules.  The coordinates, atom types, charges, etc, as well as
 any bond/angle/etc and special neighbor information for the molecule
 can be specified in the molecule file.  See the
 "molecule"_molecule.html command for details.  The only settings
 required to be in each file are the coordinates and types of atoms in
 the molecule.
 
 If the molecule template contains more than one molecule, the relative
 probability of depositing each molecule can be specified by the
 {molfrac} keyword.  N relative probabilities, each from 0.0 to 1.0, are
 specified, where N is the number of molecules in the template.  Each
 time a molecule is deposited, a random number is used to sample from
 the list of relative probabilities.  The N values must sum to 1.0.
 
 If you wish to insert molecules via the {mol} keyword, that will be
 treated as rigid bodies, use the {rigid} keyword, specifying as its
 value the ID of a separate "fix rigid/small"_fix_rigid.html
 command which also appears in your input script.
 
+NOTE: If you wish the new rigid molecules (and other rigid molecules)
+to be thermostatted correctly via "fix rigid/small/nvt"_fix_rigid.html
+or "fix rigid/small/npt"_fix_rigid.html, then you need to use the
+"fix_modify dynamic/dof yes" command for the rigid fix.  This is to
+inform that fix that the molecule count will vary dynamically.
+
 If you wish to insert molecules via the {mol} keyword, that will have
 their bonds or angles constrained via SHAKE, use the {shake} keyword,
 specifying as its value the ID of a separate "fix
 shake"_fix_shake.html command which also appears in your input script.
 
 Each timestep a particle is inserted, the coordinates for its atoms
 are chosen as follows.  For insertion of individual atoms, the
 "position" referred to in the following description is the coordinate
 of the atom.  For insertion of molecule, the "position" is the
 geometric center of the molecule; see the "molecule"_molecule.html doc
 page for details.  A random rotation of the molecule around its center
 point is performed, which determines the coordinates all the
 individual atoms.
 
 A random position within the region insertion volume is generated.  If
 neither the {global} or {local} keyword is used, the random position
 is the trial position.  If the {global} keyword is used, the random
 x,y values are used, but the z position of the new particle is set
 above the highest current atom in the simulation by a distance
 randomly chosen between lo/hi.  (For a 2d simulation, this is done for
 the y position.)  If the {local} keyword is used, the z position is
 set a distance between lo/hi above the highest current atom in the
 simulation that is "nearby" the chosen x,y position.  In this context,
 "nearby" means the lateral distance (in x,y) between the new and old
 particles is less than the {delta} setting.
 
 Once a trial x,y,z position has been selected, the insertion is only
 performed if no current atom in the simulation is within a distance R
 of any atom in the new particle, including the effect of periodic
 boundary conditions if applicable.  R is defined by the {near}
 keyword.  Note that the default value for R is 0.0, which will allow
 atoms to strongly overlap if you are inserting where other atoms are
 present.  This distance test is performed independently for each atom
 in an inserted molecule, based on the randomly rotated configuration
 of the molecule.  If this test fails, a new random position within the
 insertion volume is chosen and another trial is made.  Up to Q
 attempts are made.  If the particle is not successfully inserted,
 LAMMPS prints a warning message.
 
 NOTE: If you are inserting finite size particles or a molecule or
 rigid body consisting of finite-size particles, then you should
 typically set R larger than the distance at which any inserted
 particle may overlap with either a previously inserted particle or an
 existing particle.  LAMMPS will issue a warning if R is smaller than
 this value, based on the radii of existing and inserted particles.
 
 The {rate} option moves the insertion volume in the z direction (3d)
 or y direction (2d).  This enables particles to be inserted from a
 successively higher height over time.  Note that this parameter is
 ignored if the {global} or {local} keywords are used, since those
 options choose a z-coordinate for insertion independently.
 
 The vx, vy, and vz components of velocity for the inserted particle
 are set using the values specified for the {vx}, {vy}, and {vz}
 keywords.  Note that normally, new particles should be a assigned a
 negative vertical velocity so that they move towards the surface.  For
 molecules, the same velocity is given to every particle (no rotation
 or bond vibration).
 
 If the {target} option is used, the velocity vector of the inserted
 particle is changed so that it points from the insertion position
 towards the specified target point.  The magnitude of the velocity is
 unchanged.  This can be useful, for example, for simulating a
 sputtering process.  E.g. the target point can be far away, so that
 all incident particles strike the surface as if they are in an
 incident beam of particles at a prescribed angle.
 
 The {id} keyword determines how atom IDs and molecule IDs are assigned
 to newly deposited particles.  Molecule IDs are only assigned if
 molecules are being inserted.  For the {max} setting, the atom and
 molecule IDs of all current atoms are checked.  Atoms in the new
 particle are assigned IDs starting with the current maximum plus one.
 If a molecule is inserted it is assigned an ID = current maximum plus
 one.  This means that if particles leave the system, the new IDs may
 replace the lost ones.  For the {next} setting, the maximum ID of any
 atom and molecule is stored at the time the fix is defined.  Each time
 a new particle is added, this value is incremented to assign IDs to
 the new atom(s) or molecule.  Thus atom and molecule IDs for deposited
 particles will be consecutive even if particles leave the system over
 time.
 
 The {units} keyword determines the meaning of the distance units used
 for the other deposition parameters.  A {box} value selects standard
 distance units as defined by the "units"_units.html command,
 e.g. Angstroms for units = real or metal.  A {lattice} value means the
 distance units are in lattice spacings.  The "lattice"_lattice.html
 command must have been previously used to define the lattice spacing.
 Note that the units choice affects all the keyword values that have
 units of distance or velocity.
 
 NOTE: If you are monitoring the temperature of a system where the atom
 count is changing due to adding particles, you typically should use
 the "compute_modify dynamic yes"_compute_modify.html command for the
 temperature compute you are using.
 
 [Restart, fix_modify, output, run start/stop, minimize info:]
 
 This fix writes the state of the deposition to "binary restart
 files"_restart.html.  This includes information about how many
 particles have been depositied, the random number generator seed, the
 next timestep for deposition, etc.  See the
 "read_restart"_read_restart.html command for info on how to re-specify
 a fix in an input script that reads a restart file, so that the
 operation of the fix continues in an uninterrupted fashion.
 
 None of the "fix_modify"_fix_modify.html options are relevant to this
 fix.  No global or per-atom quantities are stored by this fix for
 access by various "output commands"_Section_howto.html#howto_15.  No
 parameter of this fix can be used with the {start/stop} keywords of
 the "run"_run.html command.  This fix is not invoked during "energy
 minimization"_minimize.html.
 
 [Restrictions:]
 
 This fix is part of the MISC package.  It is only enabled if LAMMPS
 was built with that package.  See the "Making
 LAMMPS"_Section_start.html#start_3 section for more info.
 
 The specified insertion region cannot be a "dynamic" region, as
 defined by the "region"_region.html command.
 
 [Related commands:]
 
 "fix pour"_fix_pour.html, "region"_region.html
 
 [Default:]
 
 Insertions are performed for individual atoms, i.e. no {mol} setting
 is defined.  If the {mol} keyword is used, the default for {molfrac}
 is an equal probabilities for all molecules in the template.
 Additional option defaults are id = max, delta = 0.0, near = 0.0,
 attempt = 10, rate = 0.0, vx = 0.0 0.0, vy = 0.0 0.0, vz = 0.0 0.0,
 and units = lattice.
diff --git a/doc/src/fix_gcmc.txt b/doc/src/fix_gcmc.txt
index 4eab458a6..1a419628e 100644
--- a/doc/src/fix_gcmc.txt
+++ b/doc/src/fix_gcmc.txt
@@ -1,368 +1,377 @@
 "LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c
 
 :link(lws,http://lammps.sandia.gov)
 :link(ld,Manual.html)
 :link(lc,Section_commands.html#comm)
 
 :line
 
 fix gcmc command :h3
 
 [Syntax:]
 
 fix ID group-ID gcmc N X M type seed T mu displace keyword values ... :pre
 
 ID, group-ID are documented in "fix"_fix.html command :ulb,l
 gcmc = style name of this fix command :l
 N = invoke this fix every N steps :l
 X = average number of GCMC exchanges to attempt every N steps :l
 M = average number of MC moves to attempt every N steps :l
 type = atom type for inserted atoms (must be 0 if mol keyword used) :l
 seed = random # seed (positive integer) :l
 T = temperature of the ideal gas reservoir (temperature units) :l
 mu = chemical potential of the ideal gas reservoir (energy units) :l
 displace = maximum Monte Carlo translation distance (length units) :l
 zero or more keyword/value pairs may be appended to args :l
 keyword = {mol}, {region}, {maxangle}, {pressure}, {fugacity_coeff}, {full_energy}, {charge}, {group}, {grouptype}, {intra_energy}, {tfac_insert}, or {overlap_cutoff}
   {mol} value = template-ID
     template-ID = ID of molecule template specified in a separate "molecule"_molecule.html command
+  {rigid} value = fix-ID
+    fix-ID = ID of "fix rigid/small"_fix_rigid.html command
   {shake} value = fix-ID
     fix-ID = ID of "fix shake"_fix_shake.html command
   {region} value = region-ID
     region-ID = ID of region where MC moves are allowed
   {maxangle} value = maximum molecular rotation angle (degrees)
   {pressure} value = pressure of the gas reservoir (pressure units)
   {fugacity_coeff} value = fugacity coefficient of the gas reservoir (unitless)
   {full_energy} = compute the entire system energy when performing MC moves
   {charge} value = charge of inserted atoms (charge units)
   {group} value = group-ID
     group-ID = group-ID for inserted atoms (string)
   {grouptype} values = type group-ID
     type = atom type (int)
     group-ID = group-ID for inserted atoms (string)
   {intra_energy} value = intramolecular energy (energy units)
   {tfac_insert} value = scale up/down temperature of inserted atoms (unitless)
   {overlap_cutoff} value = maximum pair distance for overlap rejection (distance units) :pre
 :ule
 
 [Examples:]
 
 fix 2 gas gcmc 10 1000 1000 2 29494 298.0 -0.5 0.01
 fix 3 water gcmc 10 100 100 0 3456543 3.0 -2.5 0.1 mol my_one_water maxangle 180 full_energy
 fix 4 my_gas gcmc 1 10 10 1 123456543 300.0 -12.5 1.0 region disk :pre
 
 [Description:]
 
 This fix performs grand canonical Monte Carlo (GCMC) exchanges of
 atoms or molecules of the given type with an imaginary ideal gas reservoir at
 the specified T and chemical potential (mu) as discussed in
 "(Frenkel)"_#Frenkel. If used with the "fix nvt"_fix_nh.html command,
 simulations in the grand canonical ensemble (muVT, constant chemical
 potential, constant volume, and constant temperature) can be
 performed.  Specific uses include computing isotherms in microporous
 materials, or computing vapor-liquid coexistence curves.
 
 Every N timesteps the fix attempts a number of GCMC exchanges (insertions
 or deletions) of gas atoms or molecules of
 the given type between the simulation cell and the imaginary
 reservoir. It also attempts a number of Monte Carlo
 moves (translations and molecule rotations) of gas of the given type
 within the simulation cell or region.  The average number of
 attempted GCMC exchanges is X. The average number of attempted MC moves is M.
 M should typically be chosen to be
 approximately equal to the expected number of gas atoms or molecules
 of the given type within the simulation cell or region,
 which will result in roughly one
 MC translation per atom or molecule per MC cycle.
 
 For MC moves of molecular gasses, rotations and translations are each
 attempted with 50% probability. For MC moves of atomic gasses,
 translations are attempted 100% of the time. For MC exchanges of
 either molecular or atomic gasses, deletions and insertions are each
 attempted with 50% probability.
 
 All inserted particles are always assigned to two groups: the default group
 "all" and the group specified in the fix gcmc command (which can also
 be "all"). In addition, particles are also added to any groups specified
 by the {group} and {grouptype} keywords.
 If inserted particles are individual atoms, they are
 assigned the atom type given by the type argument.  If they are molecules,
 the type argument has no effect and must be set to zero. Instead,
 the type of each atom in the inserted molecule is specified
 in the file read by the "molecule"_molecule.html command.
 
 This fix cannot be used to perform MC insertions of gas atoms or
 molecules other than the exchanged type, but MC deletions,
 translations, and rotations can be performed on any atom/molecule in
 the fix group.  All atoms in the simulation cell can be moved using
 regular time integration translations, e.g. via
 "fix nvt"_fix_nh.html, resulting in a hybrid GCMC+MD simulation. A
 smaller-than-usual timestep size may be needed when running such a
 hybrid simulation, especially if the inserted molecules are not well
 equilibrated.
 
 This command may optionally use the {region} keyword to define an
 exchange and move volume.  The specified region must have been
 previously defined with a "region"_region.html command.  It must be
 defined with side = {in}.  Insertion attempts occur only within the
 specified region. For non-rectangular regions, random trial
 points are generated within the rectangular bounding box until a point is found
 that lies inside the region. If no valid point is generated after 1000 trials,
 no insertion is performed, but it is counted as an attempted insertion.
 Move and deletion attempt candidates are selected
 from gas atoms or molecules within the region. If there are no candidates,
 no move or deletion is performed, but it is counted as an attempt move
 or deletion. If an attempted move places the atom or molecule center-of-mass outside
 the specified region, a new attempted move is generated. This process is repeated
 until the atom or molecule center-of-mass is inside the specified region.
 
 If used with "fix nvt"_fix_nh.html, the temperature of the imaginary
 reservoir, T, should be set to be equivalent to the target temperature
 used in fix nvt. Otherwise, the imaginary reservoir
 will not be in thermal equilibrium with the simulation cell. Also,
 it is important that the temperature used by fix nvt be dynamic,
 which can be achieved as follows:
 
 compute mdtemp mdatoms temp
 compute_modify mdtemp dynamic yes
 fix mdnvt mdatoms nvt temp 300.0 300.0 10.0
 fix_modify mdnvt temp mdtemp :pre
 
 Note that neighbor lists are re-built every timestep that this fix is
 invoked, so you should not set N to be too small.  However, periodic
 rebuilds are necessary in order to avoid dangerous rebuilds and missed
 interactions. Specifically, avoid performing so many MC translations
 per timestep that atoms can move beyond the neighbor list skin
 distance. See the "neighbor"_neighbor.html command for details.
 
 When an atom or molecule is to be inserted, its
 coordinates are chosen at a random position within the current
 simulation cell or region, and new atom velocities are randomly chosen from
 the specified temperature distribution given by T. The effective
 temperature for new atom velocities can be increased or decreased
 using the optional keyword {tfac_insert} (see below). Relative
 coordinates for atoms in a molecule are taken from the template
 molecule provided by the user. The center of mass of the molecule
 is placed at the insertion point. The orientation of the molecule
 is chosen at random by rotating about this point.
 
 Individual atoms are inserted, unless the {mol} keyword is used.  It
 specifies a {template-ID} previously defined using the
 "molecule"_molecule.html command, which reads a file that defines the
 molecule.  The coordinates, atom types, charges, etc, as well as any
 bond/angle/etc and special neighbor information for the molecule can
 be specified in the molecule file.  See the "molecule"_molecule.html
 command for details.  The only settings required to be in this file
 are the coordinates and types of atoms in the molecule.
 
 When not using the {mol} keyword, you should ensure you do not delete
 atoms that are bonded to other atoms, or LAMMPS will
 soon generate an error when it tries to find bonded neighbors.  LAMMPS will
 warn you if any of the atoms eligible for deletion have a non-zero
 molecule ID, but does not check for this at the time of deletion.
 
+If you wish to insert molecules via the {mol} keyword, that will be
+treated as rigid bodies, use the {rigid} keyword, specifying as its
+value the ID of a separate "fix rigid/small"_fix_rigid.html
+command which also appears in your input script.
+
+NOTE: If you wish the new rigid molecules (and other rigid molecules)
+to be thermostatted correctly via "fix rigid/small/nvt"_fix_rigid.html
+or "fix rigid/small/npt"_fix_rigid.html, then you need to use the
+"fix_modify dynamic/dof yes" command for the rigid fix.  This is to
+inform that fix that the molecule count will vary dynamically.
+
 If you wish to insert molecules via the {mol} keyword, that will have
 their bonds or angles constrained via SHAKE, use the {shake} keyword,
 specifying as its value the ID of a separate "fix
 shake"_fix_shake.html command which also appears in your input script.
 
 Optionally, users may specify the maximum rotation angle for
 molecular rotations using the {maxangle} keyword and specifying
 the angle in degrees. Rotations are performed by generating a random
 point on the unit sphere and a random rotation angle on the
 range \[0,maxangle). The molecule is then rotated by that angle about an
 axis passing through the molecule center of mass. The axis is parallel
 to the unit vector defined by the point on the unit sphere.
 The same procedure is used for randomly rotating molecules when they
 are inserted, except that the maximum angle is 360 degrees.
 
 Note that fix GCMC does not use configurational bias
 MC or any other kind of sampling of intramolecular degrees of freedom.
 Inserted molecules can have different orientations, but they will all
 have the same intramolecular configuration,
 which was specified in the molecule command input.
 
 For atomic gasses, inserted atoms have the specified atom type, but
 deleted atoms are any atoms that have been inserted or that belong
 to the user-specified fix group. For molecular gasses, exchanged
 molecules use the same atom types as in the template molecule
 supplied by the user.  In both cases, exchanged
 atoms/molecules are assigned to two groups: the default group "all"
 and the group specified in the fix gcmc command (which can also be
 "all").
 
 The gas reservoir pressure can be specified using the {pressure}
 keyword, in which case the user-specified chemical potential is
 ignored. For non-ideal gas reservoirs, the user may also specify the
 fugacity coefficient using the {fugacity_coeff} keyword.
 
 The {full_energy} option means that fix GCMC will compute the total
 potential energy of the entire simulated system. The total system
 energy before and after the proposed GCMC move is then used in the
 Metropolis criterion to determine whether or not to accept the
 proposed GCMC move. By default, this option is off, in which case
 only partial energies are computed to determine the difference in
 energy that would be caused by the proposed GCMC move.
 
 The {full_energy} option is needed for systems with complicated
 potential energy calculations, including the following:
 
   long-range electrostatics (kspace)
   many-body pair styles
   hybrid pair styles
   eam pair styles
   triclinic systems
   need to include potential energy contributions from other fixes :ul
 
 In these cases, LAMMPS will automatically apply the {full_energy}
 keyword and issue a warning message.
 
 When the {mol} keyword is used, the {full_energy} option also includes
 the intramolecular energy of inserted and deleted molecules. If this
 is not desired, the {intra_energy} keyword can be used to define an
 amount of energy that is subtracted from the final energy when a molecule
 is inserted, and added to the initial energy when a molecule is
 deleted. For molecules that have a non-zero intramolecular energy, this
 will ensure roughly the same behavior whether or not the {full_energy}
 option is used.
 
 Inserted atoms and molecules are assigned random velocities based on the
 specified temperature T. Because the relative velocity of
 all atoms in the molecule is zero, this may result in inserted molecules
 that are systematically too cold. In addition, the intramolecular potential
 energy of the inserted molecule may cause the kinetic energy
 of the molecule to quickly increase or decrease after insertion.
 The {tfac_insert} keyword allows the user to counteract these effects
 by changing the temperature used to assign velocities to
 inserted atoms and molecules by a constant factor. For a
 particular application, some experimentation may be required
 to find a value of {tfac_insert} that results in inserted molecules that
 equilibrate quickly to the correct temperature.
 
 Some fixes have an associated potential energy. Examples of such fixes
 include: "efield"_fix_efield.html, "gravity"_fix_gravity.html,
 "addforce"_fix_addforce.html, "langevin"_fix_langevin.html,
 "restrain"_fix_restrain.html, "temp/berendsen"_fix_temp_berendsen.html,
 "temp/rescale"_fix_temp_rescale.html, and "wall fixes"_fix_wall.html.
 For that energy to be included in the total potential energy of the
 system (the quantity used when performing GCMC moves),
 you MUST enable the "fix_modify"_fix_modify.html {energy} option for
 that fix.  The doc pages for individual "fix"_fix.html commands
 specify if this should be done.
 
 Use the {charge} option to insert atoms with a user-specified point
 charge. Note that doing so will cause the system to become non-neutral.
 LAMMPS issues a warning when using long-range electrostatics (kspace)
 with non-neutral systems. See the
 "compute group/group"_compute_group_group.html documentation for more
 details about simulating non-neutral systems with kspace on.
 
 Use of this fix typically will cause the number of atoms to fluctuate,
 therefore, you will want to use the
 "compute_modify"_compute_modify.html command to insure that the
 current number of atoms is used as a normalizing factor each time
 temperature is computed.  Here is the necessary command:
 
 With some pair_styles, such as "Buckingham"_pair_buck.html,
 "Born-Mayer-Huggins"_pair_born.html and "ReaxFF"_pair_reax_c.html,
 two atoms placed close to each other may have an arbitrary large, 
 negative potential energy due to the functional form of the potential.
 While these unphysical configurations are inaccessible
 to typical dynamical trajectories,
 they can be generated by Monte Carlo moves. The {overlap_cutoff}
 keyword suppresses these moves by effectively assigning an 
 infinite positive energy to all new configurations that place any
 pair of atoms closer than the specified overlap cutoff distance.
 
 compute_modify thermo_temp dynamic yes :pre
 
 If LJ units are used, note that a value of 0.18292026 is used by this
 fix as the reduced value for Planck's constant.  This value was
 derived from LJ parameters for argon, where h* = h/sqrt(sigma^2 *
 epsilon * mass), sigma = 3.429 angstroms, epsilon/k = 121.85 K, and
 mass = 39.948 amu.
 
 The {group} keyword assigns all inserted atoms to the "group"_group.html
 of the group-ID value. The {grouptype} keyword assigns all
 inserted atoms of the specified type to the "group"_group.html
 of the group-ID value.
 
 [Restart, fix_modify, output, run start/stop, minimize info:]
 
 This fix writes the state of the fix to "binary restart
 files"_restart.html.  This includes information about the random
 number generator seed, the next timestep for MC exchanges, etc.  See
 the "read_restart"_read_restart.html command for info on how to
 re-specify a fix in an input script that reads a restart file, so that
 the operation of the fix continues in an uninterrupted fashion.
 
 None of the "fix_modify"_fix_modify.html options are relevant to this
 fix.
 
 This fix computes a global vector of length 8, which can be accessed
 by various "output commands"_Section_howto.html#howto_15.  The vector
 values are the following global cumulative quantities:
 
 1 = translation attempts
 2 = translation successes
 3 = insertion attempts
 4 = insertion successes
 5 = deletion attempts
 6 = deletion successes
 7 = rotation attempts
 8 = rotation successes :ul
 
 The vector values calculated by this fix are "extensive".
 
 No parameter of this fix can be used with the {start/stop} keywords of
 the "run"_run.html command.  This fix is not invoked during "energy
 minimization"_minimize.html.
 
 [Restrictions:]
 
 This fix is part of the MC package.  It is only enabled if LAMMPS was
 built with that package.  See the "Making
 LAMMPS"_Section_start.html#start_3 section for more info.
 
 Do not set "neigh_modify once yes" or else this fix will never be
 called.  Reneighboring is required.
 
 Can be run in parallel, but aspects of the GCMC part will not scale
 well in parallel. Only usable for 3D simulations.
 
 Note that very lengthy simulations involving insertions/deletions of
 billions of gas molecules may run out of atom or molecule IDs and
 trigger an error, so it is better to run multiple shorter-duration
 simulations. Likewise, very large molecules have not been tested
 and may turn out to be problematic.
 
 Use of multiple fix gcmc commands in the same input script can be
 problematic if using a template molecule. The issue is that the
 user-referenced template molecule in the second fix gcmc command
 may no longer exist since it might have been deleted by the first
 fix gcmc command. An existing template molecule will need to be
 referenced by the user for each subsequent fix gcmc command.
 
-Because molecule insertion does not work in combination with
-fix rigid, simulataneous use of fix rigid or fix rigid/small
-with this fix is not allowed.
-
 [Related commands:]
 
 "fix atom/swap"_fix_atom_swap.html,
 "fix nvt"_fix_nh.html, "neighbor"_neighbor.html,
 "fix deposit"_fix_deposit.html, "fix evaporate"_fix_evaporate.html,
 "delete_atoms"_delete_atoms.html
 
 [Default:]
 
 The option defaults are mol = no, maxangle = 10, overlap_cutoff = 0.0,
 and full_energy = no,
 except for the situations where full_energy is required, as
 listed above.
 
 :line
 
 :link(Frenkel)
 [(Frenkel)] Frenkel and Smit, Understanding Molecular Simulation,
 Academic Press, London, 2002.
diff --git a/doc/src/fix_modify.txt b/doc/src/fix_modify.txt
index 9c95cdc45..5e097bb34 100644
--- a/doc/src/fix_modify.txt
+++ b/doc/src/fix_modify.txt
@@ -1,91 +1,114 @@
 "LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c
 
 :link(lws,http://lammps.sandia.gov)
 :link(ld,Manual.html)
 :link(lc,Section_commands.html#comm)
 
 :line
 
 fix_modify command :h3
 
 [Syntax:]
 
 fix_modify fix-ID keyword value ... :pre
 
 fix-ID = ID of the fix to modify :ulb,l
 one or more keyword/value pairs may be appended :l
-keyword = {temp} or {press} or {energy} or {respa} :l
+keyword = {temp} or {press} or {energy} or {respa} or {dynamic/dof} :l
   {temp} value = compute ID that calculates a temperature
   {press} value = compute ID that calculates a pressure
   {energy} value = {yes} or {no}
-  {respa} value = {1} to {max respa level} or {0} (for outermost level) :pre
+  {respa} value = {1} to {max respa level} or {0} (for outermost level)
+  {dynamic/dof} value = {yes} or {no}
+    yes/no = do or do not recompute the number of degrees of freedom (DOF) contributing to the temperature :pre
 :ule
 
 [Examples:]
 
 fix_modify 3 temp myTemp press myPress
 fix_modify 1 energy yes
 fix_modify tether respa 2 :pre
 
 [Description:]
 
 Modify one or more parameters of a previously defined fix.  Only
 specific fix styles support specific parameters.  See the doc pages
 for individual fix commands for info on which ones support which
 fix_modify parameters.
 
 The {temp} keyword is used to determine how a fix computes
 temperature.  The specified compute ID must have been previously
 defined by the user via the "compute"_compute.html command and it must
 be a style of compute that calculates a temperature.  All fixes that
 compute temperatures define their own compute by default, as described
 in their documentation.  Thus this option allows the user to override
 the default method for computing T.
 
 The {press} keyword is used to determine how a fix computes pressure.
 The specified compute ID must have been previously defined by the user
 via the "compute"_compute.html command and it must be a style of
 compute that calculates a pressure.  All fixes that compute pressures
 define their own compute by default, as described in their
 documentation.  Thus this option allows the user to override the
 default method for computing P.
 
 For fixes that calculate a contribution to the potential energy of the
 system, the {energy} keyword will include that contribution in
 thermodynamic output of potential energy.  This is because the {energy
 yes} setting must be specified to include the fix's global or per-atom
 energy in the calculation performed by the "compute
 pe"_compute_pe.html or "compute pe/atom"_compute_pe_atom.html
 commands.  See the "thermo_style"_thermo_style.html command for info
 on how potential energy is output.  For fixes that tally a global
 energy, it can be printed by using the keyword f_ID in the
 thermo_style custom command, where ID is the fix-ID of the appropriate
 fix.
 
 NOTE: You must also specify the {energy yes} setting for a fix if you
 are using it when performing an "energy minimization"_minimize.html
 and if you want the energy and forces it produces to be part of the
 optimization criteria.
 
 For fixes that set or modify forces, it may be possible to select at
 which "r-RESPA"_run_style.html level the fix operates via the {respa}
 keyword. The RESPA level at which the fix is active can be selected.
 This is a number ranging from 1 to the number of levels. If the RESPA
 level is larger than the current maximum, the outermost level will be
 used, which is also the default setting. This default can be restored
 using a value of {0} for the RESPA level. The affected fix has to be
 enabled to support this feature; if not, {fix_modify} will report an
 error. Active fixes with a custom RESPA level setting are reported
 with their specified level at the beginning of a r-RESPA run.
 
+The {dynamic/dof} keyword determines whether the number of atoms N in
+the fix group and their associated degrees of freedom are re-computed
+each time a temperature is computed.  Only fix styles that calculate
+their own internal temperature use this option.  Currently this is
+only the "fix rigid/nvt/small"_fix_rigid.html and "fix
+rigid/npt/small"_fix_rigid.html commands for the purpose of
+thermostatting rigid body translation and rotation.  By default, N and
+their DOF are assumed to be constant.  If you are adding atoms or
+molecules to the system (see the "fix pour"_fix_pour.html, "fix
+deposit"_fix_deposit.html, and "fix gcmc"_fix_gcmc.html commands) or
+expect atoms or molecules to be lost (e.g. due to exiting the
+simulation box or via "fix evaporation"_fix_evaporation.html), then
+this option should be used to insure the temperature is correctly
+normalized.
+
+NOTE: Other thermostatting fixes, such as "fix nvt"_fix_nh.html, do
+not use the {dynamic/dof} keyword because they use a temperature
+compute to calculate temperature.  See the "compute_modify
+dynamic/dof"_compute_modify.html command for a similar way to insure
+correct temperature normalization for those thermostats.
+
 [Restrictions:] none
 
 [Related commands:]
 
 "fix"_fix.html, "compute temp"_compute_temp.html, "compute
 pressure"_compute_pressure.html, "thermo_style"_thermo_style.html
 
 [Default:]
 
 The option defaults are temp = ID defined by fix, press = ID defined
 by fix, energy = no, respa = 0.
diff --git a/doc/src/fix_pour.txt b/doc/src/fix_pour.txt
index f2c10184d..54f78287e 100644
--- a/doc/src/fix_pour.txt
+++ b/doc/src/fix_pour.txt
@@ -1,264 +1,270 @@
 "LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c
 
 :link(lws,http://lammps.sandia.gov)
 :link(ld,Manual.html)
 :link(lc,Section_commands.html#comm)
 
 :line
 
 fix pour command :h3
 
 [Syntax:]
 
 fix ID group-ID pour N type seed keyword values ... :pre
 
 ID, group-ID are documented in "fix"_fix.html command :ulb,l
 pour = style name of this fix command :l
 N = # of particles to insert :l
 type = atom type to assign to inserted particles (offset for molecule insertion) :l
 seed = random # seed (positive integer) :l
 one or more keyword/value pairs may be appended to args :l
 keyword = {region} or {diam} or {vol} or {rate} or {dens} or {vel} or {mol} or {rigid} or {shake} or {ignore} :l
   {region} value = region-ID
     region-ID = ID of region to use as insertion volume
   {diam} values = dstyle args
     dstyle = {one} or {range} or {poly}
       {one} args = D
         D = single diameter for inserted particles (distance units)
       {range} args = Dlo Dhi
         Dlo,Dhi = range of diameters for inserted particles (distance units)
       {poly} args = Npoly D1 P1 D2 P2 ...
         Npoly = # of (D,P) pairs
         D1,D2,... = diameter for subset of inserted particles (distance units)
         P1,P2,... = percentage of inserted particles with this diameter (0-1)
   {id} values = idflag
     idflag = {max} or {next} = how to choose IDs for inserted particles and molecules
   {vol} values = fraction Nattempt
     fraction = desired volume fraction for filling insertion volume
     Nattempt = max # of insertion attempts per particle
   {rate} value = V
     V = z velocity (3d) or y velocity (2d) at which
         insertion volume moves (velocity units)
   {dens} values = Rholo Rhohi
     Rholo,Rhohi = range of densities for inserted particles (mass/volume units)
   {vel} values (3d) = vxlo vxhi vylo vyhi vz
   {vel} values (2d) = vxlo vxhi vy
     vxlo,vxhi = range of x velocities for inserted particles (velocity units)
     vylo,vyhi = range of y velocities for inserted particles (velocity units)
     vz = z velocity (3d) assigned to inserted particles (velocity units)
     vy = y velocity (2d) assigned to inserted particles (velocity units)
   {mol} value = template-ID
     template-ID = ID of molecule template specified in a separate "molecule"_molecule.html command
   {molfrac} values = f1 f2 ... fN
     f1 to fN = relative probability of creating each of N molecules in template-ID
   {rigid} value = fix-ID
     fix-ID = ID of "fix rigid/small"_fix_rigid.html command
   {shake} value = fix-ID
     fix-ID = ID of "fix shake"_fix_shake.html command
   {ignore} value = none
     skip any line or triangle particles when detecting possible
       overlaps with inserted particles :pre
 :ule
 
 [Examples:]
 
 fix 3 all pour 1000 2 29494 region myblock
 fix 2 all pour 10000 1 19985583 region disk vol 0.33 100 rate 1.0 diam range 0.9 1.1
 fix 2 all pour 10000 1 19985583 region disk diam poly 2 0.7 0.4 1.5 0.6
 fix ins all pour 500 1 4767548 vol 0.8 10 region slab mol object rigid myRigid :pre
 
 [Description:]
 
 Insert finite-size particles or molecules into the simulation box
 every few timesteps within a specified region until N particles or
 molecules have been inserted.  This is typically used to model the
 pouring of granular particles into a container under the influence of
 gravity.  For the remainder of this doc page, a single inserted atom
 or molecule is referred to as a "particle".
 
 If inserted particles are individual atoms, they are assigned the
 specified atom type.  If they are molecules, the type of each atom in
 the inserted molecule is specified in the file read by the
 "molecule"_molecule.html command, and those values are added to the
 specified atom type.  E.g. if the file specifies atom types 1,2,3, and
 those are the atom types you want for inserted molecules, then specify
 {type} = 0.  If you specify {type} = 2, the in the inserted molecule
 will have atom types 3,4,5.
 
 All atoms in the inserted particle are assigned to two groups: the
 default group "all" and the group specified in the fix pour command
 (which can also be "all").
 
 This command must use the {region} keyword to define an insertion
 volume.  The specified region must have been previously defined with a
 "region"_region.html command.  It must be of type {block} or a z-axis
 {cylinder} and must be defined with side = {in}.  The cylinder style
 of region can only be used with 3d simulations.
 
 Individual atoms are inserted, unless the {mol} keyword is used.  It
 specifies a {template-ID} previously defined using the
 "molecule"_molecule.html command, which reads a file that defines the
 molecule.  The coordinates, atom types, center-of-mass, moments of
 inertia, etc, as well as any bond/angle/etc and special neighbor
 information for the molecule can be specified in the molecule file.
 See the "molecule"_molecule.html command for details.  The only
 settings required to be in this file are the coordinates and types of
 atoms in the molecule.
 
 If the molecule template contains more than one molecule, the relative
 probability of depositing each molecule can be specified by the
 {molfrac} keyword.  N relative probabilities, each from 0.0 to 1.0, are
 specified, where N is the number of molecules in the template.  Each
 time a molecule is inserted, a random number is used to sample from
 the list of relative probabilities.  The N values must sum to 1.0.
 
 If you wish to insert molecules via the {mol} keyword, that will be
 treated as rigid bodies, use the {rigid} keyword, specifying as its
 value the ID of a separate "fix rigid/small"_fix_rigid.html
 command which also appears in your input script.
 
+NOTE: If you wish the new rigid molecules (and other rigid molecules)
+to be thermostatted correctly via "fix rigid/small/nvt"_fix_rigid.html
+or "fix rigid/small/npt"_fix_rigid.html, then you need to use the
+"fix_modify dynamic/dof yes" command for the rigid fix.  This is to
+inform that fix that the molecule count will vary dynamically.
+
 If you wish to insert molecules via the {mol} keyword, that will have
 their bonds or angles constrained via SHAKE, use the {shake} keyword,
 specifying as its value the ID of a separate "fix
 shake"_fix_shake.html command which also appears in your input script.
 
 Each timestep particles are inserted, they are placed randomly inside
 the insertion volume so as to mimic a stream of poured particles.  If
 they are molecules they are also oriented randomly.  Each atom in the
 particle is tested for overlaps with existing particles, including
 effects due to periodic boundary conditions if applicable.  If an
 overlap is detected, another random insertion attempt is made; see the
 {vol} keyword discussion below.  The larger the volume of the
 insertion region, the more particles that can be inserted at any one
 timestep.  Particles are inserted again after enough time has elapsed
 that the previously inserted particles fall out of the insertion
 volume under the influence of gravity.  Insertions continue every so
 many timesteps until the desired # of particles has been inserted.
 
 NOTE: If you are monitoring the temperature of a system where the
 particle count is changing due to adding particles, you typically
 should use the "compute_modify dynamic yes"_compute_modify.html
 command for the temperature compute you are using.
 
 :line
 
 All other keywords are optional with defaults as shown below.
 
 The {diam} option is only used when inserting atoms and specifies the
 diameters of inserted particles.  There are 3 styles: {one}, {range},
 or {poly}.  For {one}, all particles will have diameter {D}.  For
 {range}, the diameter of each particle will be chosen randomly and
 uniformly between the specified {Dlo} and {Dhi} bounds.  For {poly}, a
 series of {Npoly} diameters is specified.  For each diameter a
 percentage value from 0.0 to 1.0 is also specified.  The {Npoly}
 percentages must sum to 1.0.  For the example shown above with "diam 2
 0.7 0.4 1.5 0.6", all inserted particles will have a diameter of 0.7
 or 1.5.  40% of the particles will be small; 60% will be large.
 
 Note that for molecule insertion, the diameters of individual atoms in
 the molecule can be specified in the file read by the
 "molecule"_molecule.html command.  If not specified, the diameter of
 each atom in the molecule has a default diameter of 1.0.
 
 The {id} option has two settings which are used to determine the atom
 or molecule IDs to assign to inserted particles/molecules.  In both
 cases a check is done of the current system to find the maximum
 current atom and molecule ID of any existing particle.  Newly inserted
 particles and molecules are assigned IDs that increment those max
 values.  For the {max} setting, which is the default, this check is
 done at every insertion step, which allows for particles to leave the
 system, and their IDs to potentially be re-used.  For the {next}
 setting this check is done only once when the fix is specified, which
 can be more efficient if you are sure particles will not be added in
 some other way.
 
 The {vol} option specifies what volume fraction of the insertion
 volume will be filled with particles.  For particles with a size
 specified by the {diam range} keyword, they are assumed to all be of
 maximum diameter {Dhi} for purposes of computing their contribution to
 the volume fraction.
 
 The higher the volume fraction value, the more particles are inserted
 each timestep.  Since inserted particles cannot overlap, the maximum
 volume fraction should be no higher than about 0.6.  Each timestep
 particles are inserted, LAMMPS will make up to a total of M tries to
 insert the new particles without overlaps, where M = # of inserted
 particles * Nattempt.  If LAMMPS is unsuccessful at completing all
 insertions, it prints a warning.
 
 The {dens} and {vel} options enable inserted particles to have a range
 of densities or xy velocities.  The specific values for a particular
 inserted particle will be chosen randomly and uniformly between the
 specified bounds.  Internally, the density value for a particle is
 converted to a mass, based on the radius (volume) of the particle.
 The {vz} or {vy} value for option {vel} assigns a z-velocity (3d) or
 y-velocity (2d) to each inserted particle.
 
 The {rate} option moves the insertion volume in the z direction (3d)
 or y direction (2d).  This enables pouring particles from a
 successively higher height over time.
 
 The {ignore} option is useful when running a simulation that used line
 segment (2d) or triangle (3d) particles, typically to define
 boundaries for spherical granular particles to interact with.  See the
 "atom_style line or tri"_atom_style.html command for details.  Lines
 and triangles store their size, and if the size is large it may
 overlap (in a spherical sense) with the insertion region, even if the
 line/triangle is oriented such that there is no actual overlap.  This
 can prevent particles from being inserted.  The {ignore} keyword
 causes the overlap check to skip any line or triangle particles.
 Obviously you should only use it if there is in fact no overlap of the
 line or triangle particles with the insertion region.
 
 :line
 
 [Restart, fix_modify, output, run start/stop, minimize info:]
 
 No information about this fix is written to "binary restart
 files"_restart.html.  This means you must be careful when restarting a
 pouring simulation, when the restart file was written in the middle of
 the pouring operation.  Specifically, you should use a new fix pour
 command in the input script for the restarted simulation that
 continues the operation.  You will need to adjust the arguments of the
 original fix pour command to do this.
 
 Also note that because the state of the random number generator is not
 saved in restart files, you cannot do "exact" restarts with this fix,
 where the simulation continues on the same as if no restart had taken
 place.  However, in a statistical sense, a restarted simulation should
 produce the same behavior if you adjust the fix pour parameters
 appropriately.
 
 None of the "fix_modify"_fix_modify.html options are relevant to this
 fix.  No global or per-atom quantities are stored by this fix for
 access by various "output commands"_Section_howto.html#howto_15.  No
 parameter of this fix can be used with the {start/stop} keywords of
 the "run"_run.html command.  This fix is not invoked during "energy
 minimization"_minimize.html.
 
 [Restrictions:]
 
 This fix is part of the GRANULAR package.  It is only enabled if
 LAMMPS was built with that package.  See the "Making
 LAMMPS"_Section_start.html#start_3 section for more info.
 
 For 3d simulations, a gravity fix in the -z direction must be defined
 for use in conjunction with this fix.  For 2d simulations, gravity
 must be defined in the -y direction.
 
 The specified insertion region cannot be a "dynamic" region, as
 defined by the "region"_region.html command.
 
 [Related commands:]
 
 "fix deposit"_fix_deposit.html, "fix gravity"_fix_gravity.html,
 "region"_region.html
 
 [Default:]
 
 Insertions are performed for individual particles, i.e. no {mol}
 setting is defined.  If the {mol} keyword is used, the default for
 {molfrac} is an equal probabilities for all molecules in the template.
 Additional option defaults are diam = one 1.0, dens = 1.0 1.0, vol =
 0.25 50, rate = 0.0, vel = 0.0 0.0 0.0 0.0 0.0 (for 3d), vel = 0.0 0.0 0.0
 (for 2d), and id = max.
diff --git a/doc/src/pair_snap.txt b/doc/src/pair_snap.txt
index a24c6c316..ab7313832 100644
--- a/doc/src/pair_snap.txt
+++ b/doc/src/pair_snap.txt
@@ -1,192 +1,193 @@
 "LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c
 
 :link(lws,http://lammps.sandia.gov)
 :link(ld,Manual.html)
 :link(lc,Section_commands.html#comm)
 
 :line
 
 pair_style snap command :h3
 
 [Syntax:]
 
 pair_style snap :pre
 
 [Examples:]
 
 pair_style snap
 pair_coeff * * InP.snapcoeff In P InP.snapparam In In P P :pre
 
 [Description:]
 
 Style {snap} computes interactions
 using the spectral neighbor analysis potential (SNAP)
 "(Thompson)"_#Thompson20142. Like the GAP framework of Bartok et al.
 "(Bartok2010)"_#Bartok20102, "(Bartok2013)"_#Bartok2013
 it uses bispectrum components
 to characterize the local neighborhood of each atom
 in a very general way. The mathematical definition of the
 bispectrum calculation used by SNAP is identical
 to that used by "compute sna/atom"_compute_sna_atom.html.
 In SNAP, the total energy is decomposed into a sum over
 atom energies. The energy of atom {i } is
 expressed as a weighted sum over bispectrum components.
 
 :c,image(Eqs/pair_snap.jpg)
 
 where {B_k^i} is the {k}-th bispectrum component of atom {i},
 and {beta_k^alpha_i} is the corresponding linear coefficient
 that depends on {alpha_i}, the SNAP element of atom {i}. The
 number of bispectrum components used and their definitions
 depend on the values of {twojmax} and {diagonalstyle}
 defined in the SNAP parameter file described below.
 The bispectrum calculation is described in more detail
 in "compute sna/atom"_compute_sna_atom.html.
 
 Note that unlike for other potentials, cutoffs for SNAP potentials are
 not set in the pair_style or pair_coeff command; they are specified in
 the SNAP potential files themselves.
 
 Only a single pair_coeff command is used with the {snap} style which
 specifies two SNAP files and the list SNAP element(s) to be
 extracted.
 The SNAP elements are mapped to LAMMPS atom types by specifying
 N additional arguments after the 2nd filename in the pair_coeff
 command, where N is the number of LAMMPS atom types:
 
 SNAP element file
 Elem1, Elem2, ...
 SNAP parameter file
 N element names = mapping of SNAP elements to atom types :ul
 
 As an example, if a LAMMPS indium phosphide simulation has 4 atoms
 types, with the first two being indium and the 3rd and 4th being
 phophorous, the pair_coeff command would look like this:
 
 pair_coeff * * snap InP.snapcoeff In P InP.snapparam In In P P :pre
 
 The 1st 2 arguments must be * * so as to span all LAMMPS atom types.
 The two filenames are for the element and parameter files, respectively.
 The 'In' and 'P' arguments (between the file names) are the two elements
 which will be extracted from the element file. The
 two trailing 'In' arguments map LAMMPS atom types 1 and 2 to the
 SNAP 'In' element. The two trailing 'P' arguments map LAMMPS atom types
 3 and 4 to the SNAP 'P' element.
 
 If a SNAP mapping value is
 specified as NULL, the mapping is not performed.
 This can be used when a {snap} potential is used as part of the
 {hybrid} pair style.  The NULL values are placeholders for atom types
 that will be used with other potentials.
 
 The name of the SNAP element file usually ends in the
 ".snapcoeff" extension. It may contain coefficients
 for many SNAP elements.
 Only those elements listed in the pair_coeff command are extracted.
 The name of the SNAP parameter file usually ends in the ".snapparam"
 extension. It contains a small number
 of parameters that define the overall form of the SNAP potential.
 See the "pair_coeff"_pair_coeff.html doc page for alternate ways
 to specify the path for these files.
 
 Quite commonly,
 SNAP potentials are combined with one or more other LAMMPS pair styles
 using the {hybrid/overlay} pair style. As an example, the SNAP
 tantalum potential provided in the LAMMPS potentials directory
 combines the {snap} and {zbl} pair styles. It is invoked
 by the following commands:
 
         variable zblcutinner equal 4
         variable zblcutouter equal 4.8
         variable zblz equal 73
         pair_style hybrid/overlay &
         zbl $\{zblcutinner\} $\{zblcutouter\} snap
         pair_coeff * * zbl 0.0
         pair_coeff 1 1 zbl $\{zblz\}
         pair_coeff * * snap ../potentials/Ta06A.snapcoeff Ta &
         ../potentials/Ta06A.snapparam Ta :pre
 
 It is convenient to keep these commands in a separate file that can
 be inserted in any LAMMPS input script using the "include"_include.html
 command.
 
 The top of the SNAP element file can contain any number of blank and comment
 lines (start with #), but follows a strict
 format after that. The first non-blank non-comment
 line must contain two integers:
 
 nelem  = Number of elements
 ncoeff = Number of coefficients :ul
 
 This is followed by one block for each of the {nelem} elements.
 The first line of each block contains three entries:
 
 Element symbol (text string)
 R = Element radius (distance units)
 w = Element weight (dimensionless) :ul
 
 This line is followed by {ncoeff} coefficients, one per line.
 
 The SNAP parameter file can contain blank and comment lines (start
 with #) anywhere. Each non-blank non-comment line must contain one
 keyword/value pair. The required keywords are {rcutfac} and
 {twojmax}. Optional keywords are {rfac0}, {rmin0}, {diagonalstyle},
-and {switchflag}.
+{switchflag}, and {bzeroflag}.
 
 The default values for these keywords are
 
 {rfac0} = 0.99363
 {rmin0} = 0.0
 {diagonalstyle} = 3
-{switchflag} = 0 :ul
+{switchflag} = 0
+{bzeroflag} = 1 :ul
 
 Detailed definitions of these keywords are given on the "compute
 sna/atom"_compute_sna_atom.html doc page.
 
 :line
 
 [Mixing, shift, table, tail correction, restart, rRESPA info]:
 
 For atom type pairs I,J and I != J, where types I and J correspond to
 two different element types, mixing is performed by LAMMPS with
 user-specifiable parameters as described above.  You never need to
 specify a pair_coeff command with I != J arguments for this style.
 
 This pair style does not support the "pair_modify"_pair_modify.html
 shift, table, and tail options.
 
 This pair style does not write its information to "binary restart
 files"_restart.html, since it is stored in potential files.  Thus, you
 need to re-specify the pair_style and pair_coeff commands in an input
 script that reads a restart file.
 
 This pair style can only be used via the {pair} keyword of the
 "run_style respa"_run_style.html command.  It does not support the
 {inner}, {middle}, {outer} keywords.
 
 :line
 
 [Restrictions:]
 
 This style is part of the SNAP package.  It is only enabled if
 LAMMPS was built with that package.  See the "Making
 LAMMPS"_Section_start.html#start_3 section for more info.
 
 [Related commands:]
 
 "compute sna/atom"_compute_sna_atom.html,
 "compute snad/atom"_compute_sna_atom.html,
 "compute snav/atom"_compute_sna_atom.html
 
 [Default:] none
 
 :line
 
 :link(Thompson20142)
 [(Thompson)] Thompson, Swiler, Trott, Foiles, Tucker, J Comp Phys, 285, 316 (2015).
 
 :link(Bartok20102)
 [(Bartok2010)] Bartok, Payne, Risi, Csanyi, Phys Rev Lett, 104, 136403 (2010).
 
 :link(Bartok2013)
 [(Bartok2013)] Bartok, Gillan, Manby, Csanyi, Phys Rev B 87, 184115 (2013).
diff --git a/examples/accelerate/in.lc b/examples/accelerate/in.lc
index 66e3916fa..4dbdbb855 100644
--- a/examples/accelerate/in.lc
+++ b/examples/accelerate/in.lc
@@ -1,57 +1,57 @@
 # Gay-Berne benchmark
 # biaxial ellipsoid mesogens in isotropic phase
 # shape: 2 1.5 1
 # cutoff 4.0 with skin 0.8
 # NPT, T=2.4, P=8.0
 
 variable        x index 1
 variable        y index 1
 variable        z index 1
 variable        t index 100
 
 variable        i equal $x*32
 variable        j equal $y*32
 variable        k equal $z*32
 
 units	        lj
 atom_style      ellipsoid
 
 # create lattice of ellipsoids
 
 lattice	      sc 0.22
 region	      box block 0 $i 0 $j 0 $k
 create_box    1 box
 create_atoms  1 box
 
 set           type 1 mass 1.5
 set           type 1 shape 1 1.5 2
 set	      group all quat/random 982381
 
 compute	       rot all temp/asphere
 group	       spheroid type 1
 variable       dof equal count(spheroid)+3
-compute_modify rot extra ${dof}
+compute_modify rot extra/dof ${dof}
 
 velocity      all create 2.4 41787 loop geom
 
 pair_style    gayberne 1.0 3.0 1.0 4.0
 pair_coeff    1 1 1.0 1.0 1.0 0.5 0.2 1.0 0.5 0.2
 
 neighbor      0.8 bin
 
 timestep      0.002
 thermo	      100
 
 # equilibration run
 
 fix	       1 all npt/asphere temp 2.4 2.4 0.1 iso 5.0 8.0 0.1
-compute_modify 1_temp extra ${dof}
+compute_modify 1_temp extra/dof ${dof}
 run	       200
 
 # dynamics run
 
 reset_timestep 0
 unfix          1
 fix            1 all nve/asphere
 
 run	       $t
diff --git a/examples/deposit/in.deposit.atom b/examples/deposit/in.deposit.atom
index 8f2904039..3e48276bb 100644
--- a/examples/deposit/in.deposit.atom
+++ b/examples/deposit/in.deposit.atom
@@ -1,51 +1,51 @@
 # sample surface deposition script for atoms
 
 units		lj
 atom_style      atomic
 boundary        p p f
 
 lattice		fcc 1.0
 region          box block 0 5 0 5 0 10
 create_box      2 box
 
 region		substrate block INF INF INF INF INF 3
 create_atoms	1 region substrate
 
 pair_style	lj/cut 2.5
 pair_coeff	* * 1.0 1.0
 pair_coeff	1 2 1.0 1.0 5.0
 mass		* 1.0
 
 neigh_modify	delay 0
 
 group		addatoms type 2
 region          mobile block 0 5 0 5 2 INF
 group		mobile region mobile
 
 compute		add addatoms temp
-compute_modify	add dynamic yes extra 0
+compute_modify	add dynamic/dof yes extra/dof 0
 
 fix		1 addatoms nve
 fix		2 mobile langevin 1.0 1.0 0.1 587283
 fix		3 mobile nve
 
 region          slab block 0 5 0 5 8 9
 fix		4 addatoms deposit 100 2 100 12345 region slab near 1.0 &
                 vz -1.0 -1.0
 fix		5 addatoms wall/reflect zhi EDGE
 
 thermo_style	custom step atoms temp epair etotal press
 thermo          100
 thermo_modify	temp add
 
 #dump		1 all atom 50 dump.deposit.atom
 
 #dump		2 all image 50 image.*.jpg type type &
 #		axes yes 0.8 0.02 view 80 -30
 #dump_modify	2 pad 5
 
 #dump		3 all movie 50 movie.mpg type type &
 #		axes yes 0.8 0.02 view 80 -30
 #dump_modify	3 pad 5
 
 run             10000
diff --git a/examples/deposit/in.deposit.molecule b/examples/deposit/in.deposit.molecule
index 870d74072..16b0b9a72 100644
--- a/examples/deposit/in.deposit.molecule
+++ b/examples/deposit/in.deposit.molecule
@@ -1,55 +1,55 @@
 # sample surface deposition script for molecules
 
 units		lj
 atom_style      bond
 boundary        p p f
 
 lattice		fcc 1.0
 region          box block 0 5 0 5 0 10
 create_box      3 box bond/types 1 extra/bond/per/atom 1
 
 region		substrate block INF INF INF INF INF 3
 create_atoms	1 region substrate
 
 pair_style	lj/cut 2.5
 pair_coeff	* * 1.0 1.0
 pair_coeff	1 2 1.0 1.0 5.0
 mass		* 1.0
 
 bond_style	harmonic
 bond_coeff      1 5.0 1.0
 
 neigh_modify	delay 0
 
 group		addatoms type 2
 region          mobile block 0 5 0 5 2 INF
 group		mobile region mobile
 
 compute		add addatoms temp
-compute_modify	add dynamic yes extra 0
+compute_modify	add dynamic/dof yes extra/dof 0
 
 fix		1 addatoms nve
 fix		2 mobile langevin 0.1 0.1 0.1 587283
 fix		3 mobile nve
 
 molecule        dimer molecule.dimer
 region          slab block 0 5 0 5 8 9
 fix		4 addatoms deposit 100 0 100 12345 region slab near 1.0 &
                 mol dimer vz -1.0 -1.0
 fix		5 addatoms wall/reflect zhi EDGE
 
 thermo_style	custom step atoms temp epair etotal press
 thermo          100
 thermo_modify	temp add lost/bond ignore lost warn
 
 #dump		1 all atom 50 dump.deposit.atom
 
 #dump		2 all image 50 image.*.jpg type type &
 #		axes yes 0.8 0.02 view 80 -30
 #dump_modify	2 pad 5
 
 #dump		3 all movie 50 movie.mpg type type &
 #		axes yes 0.8 0.02 view 80 -30
 #dump_modify	3 pad 5
 
 run             10000
diff --git a/examples/deposit/in.deposit.molecule.shake b/examples/deposit/in.deposit.molecule.shake
index a1c141088..7bd701c92 100644
--- a/examples/deposit/in.deposit.molecule.shake
+++ b/examples/deposit/in.deposit.molecule.shake
@@ -1,56 +1,56 @@
 # sample surface deposition script for molecules
 
 units		lj
 atom_style      bond
 boundary        p p f
 
 lattice		fcc 1.0
 region          box block 0 5 0 5 0 10
 create_box      3 box bond/types 1 extra/bond/per/atom 1
 
 region		substrate block INF INF INF INF INF 3
 create_atoms	1 region substrate
 
 pair_style	lj/cut 2.5
 pair_coeff	* * 1.0 1.0
 pair_coeff	1 2 1.0 1.0 5.0
 mass		* 1.0
 
 bond_style	harmonic
 bond_coeff      1 5.0 1.0
 
 neigh_modify	delay 0
 
 group		addatoms type 2
 region          mobile block 0 5 0 5 2 INF
 group		mobile region mobile
 
 compute		add addatoms temp
-compute_modify	add dynamic yes extra 0
+compute_modify	add dynamic/dof yes extra/dof 0
 
 fix		1 addatoms nve
 fix		2 mobile langevin 0.1 0.1 0.1 587283
 fix		3 mobile nve
 
 molecule        dimer molecule.dimer.shake
 region          slab block 0 5 0 5 8 9
 fix		4 addatoms deposit 100 0 100 12345 region slab near 1.0 &
                 mol dimer vz -1.0 -1.0 shake 6
 fix		5 addatoms wall/reflect zhi EDGE
 fix             6 all shake 0.0001 20 1000 b 1 mol dimer
 
 thermo_style	custom step atoms temp epair etotal press
 thermo          100
 thermo_modify	temp add lost/bond ignore lost warn
 
 #dump		1 all atom 50 dump.deposit.atom
 
 #dump		2 all image 50 image.*.jpg type type &
 #		axes yes 0.8 0.02 view 80 -30
 #dump_modify	2 pad 5
 
 #dump		3 all movie 50 tmp.mpg type type &
 #		axes yes 0.8 0.02 view 80 -30
 #dump_modify	3 pad 5
 
 run             10000
diff --git a/examples/ellipse/in.ellipse.gayberne b/examples/ellipse/in.ellipse.gayberne
index 19e4e2414..fe783ac6d 100644
--- a/examples/ellipse/in.ellipse.gayberne
+++ b/examples/ellipse/in.ellipse.gayberne
@@ -1,66 +1,66 @@
 # GayBerne ellipsoids in LJ background fluid
 
 units	     lj
 atom_style   ellipsoid
 dimension    2
 
 lattice	     sq 0.02
 region	     box block 0 20 0 20 -0.5 0.5
 create_box   2 box
 create_atoms 1 box
 
 set	     group all type/fraction 2 0.1 95392
 set 	     type 1 mass 1.0
 set 	     type 2 mass 1.5
 set 	     type 1 shape 1 1 1
 set 	     type 2 shape 3 1 1
 set	     group all quat/random 18238
 
 compute	     rot all temp/asphere
 group	     spheroid type 1
 variable     dof equal count(spheroid)+2
-compute_modify rot extra ${dof}
+compute_modify rot extra/dof ${dof}
 
 velocity     all create 2.4 87287 loop geom
 
 pair_style   gayberne 1.0 3.0 1.0 4.0
 pair_coeff   1 1 3.0 1.0 1 1 1 1 1 1 2.5
 pair_coeff   1 2 3.0 1.0 1 1 1 0 0 0
 pair_coeff   2 2 1.0 1.0 1 1 0.2 0 0 0
 
 neighbor     0.8 bin
 
 thermo_style custom step c_rot epair etotal press vol
 thermo	     100
 
 timestep     0.002
 
 compute	     q all property/atom quatw quati quatj quatk
 
 #dump	     1 all custom 100 dump.ellipse.gayberne &
 #	     id type x y z c_q[1] c_q[2] c_q[3] c_q[4]
 
 #dump	     2 all image 100 image.*.jpg type type &
 #	     zoom 1.6 center d 0.5 0.5 0.5
 #dump_modify  2 pad 4 adiam 1 1.0 adiam 2 2.0
 
 #dump	     3 all movie 100 movie.mpg type type &
 #	     zoom 1.6 center d 0.5 0.5 0.5
 #dump_modify  3 pad 4 adiam 1 1.0 adiam 2 2.0
 
 fix	     1 all npt/asphere temp 2.0 2.0 0.1 iso 0.0 1.0 1.0 &
 	       mtk no pchain 0 tchain 1
 fix	     2 all enforce2d
 
-compute_modify 1_temp extra ${dof}
+compute_modify 1_temp extra/dof ${dof}
 
 # equilibrate to shrink box around dilute system
 
 run	     2000
 
 # run dynamics on dense system
 
 unfix	     1
 fix	     1 all nve/asphere
 
 run	     2000
diff --git a/examples/ellipse/in.ellipse.resquared b/examples/ellipse/in.ellipse.resquared
index df52aef66..82398987f 100644
--- a/examples/ellipse/in.ellipse.resquared
+++ b/examples/ellipse/in.ellipse.resquared
@@ -1,66 +1,66 @@
 # RESquared ellipsoids in LJ background fluid
 
 units	     lj
 atom_style   ellipsoid
 dimension    2
 
 lattice	     sq 0.02
 region	     box block 0 20 0 20 -0.5 0.5
 create_box   2 box
 create_atoms 1 box
 
 set	     group all type/fraction 2 0.1 95392
 set 	     type 1 mass 1.0
 set 	     type 2 mass 1.5
 set 	     type 1 shape 1 1 1
 set 	     type 2 shape 3 1 1
 set	     group all quat/random 18238
 
 compute	     rot all temp/asphere
 group	     spheroid type 1
 variable     dof equal count(spheroid)+2
-compute_modify rot extra ${dof}
+compute_modify rot extra/dof ${dof}
 
 velocity     all create 2.4 87287 loop geom
 
 pair_style   resquared 4.0
 pair_coeff   1 1 3.0 1 1 1 1 1 1 1 2.5
 pair_coeff   1 2 3.0 1 1 1 1 0 0 0
 pair_coeff   2 2 1.0 1 1 1 0.2 0 0 0
 
 neighbor     0.8 bin
 
 thermo_style custom step c_rot epair etotal press vol
 thermo	     100
 
 timestep     0.002
 
 compute	     q all property/atom quatw quati quatj quatk
 
 #dump	     1 all custom 100 dump.ellipse.resquared &
 #	     id type x y z c_q[1] c_q[2] c_q[3] c_q[4]
 
 #dump	     2 all image 100 image.*.jpg type type &
 #	     zoom 1.6 center d 0.5 0.5 0.5
 #dump_modify  2 pad 4 adiam 1 1.0 adiam 2 2.0
 
 #dump	     3 all movie 100 movie.mpg type type &
 #	     zoom 1.6 center d 0.5 0.5 0.5
 #dump_modify  3 pad 4 adiam 1 1.0 adiam 2 2.0
 
 fix	     1 all npt/asphere temp 2.0 2.0 0.1 iso 0.0 1.0 1.0 &
 	       mtk no pchain 0 tchain 1
 fix	     2 all enforce2d
 
-compute_modify 1_temp extra ${dof}
+compute_modify 1_temp extra/dof ${dof}
 
 # equilibrate to shrink box around dilute system
 
 run	     2000
 
 # run dynamics on dense system
 
 unfix	     1
 fix	     1 all nve/asphere
 
 run	     2000
diff --git a/examples/granregion/in.granregion.box b/examples/granregion/in.granregion.box
index 91f06744d..f1f20ad79 100644
--- a/examples/granregion/in.granregion.box
+++ b/examples/granregion/in.granregion.box
@@ -1,66 +1,66 @@
 # pouring spheres into container box
 
 units		lj
 atom_style      sphere
 boundary        f f f
 dimension	3
 comm_modify     vel yes
 
 region          box block -10 10 -10 10 -10 10 units box
 create_box      2 box
 
 pair_style	hybrid gran/hooke 4000.0 NULL 100.0 NULL 0.5 1
 pair_coeff	* * gran/hooke
 
 region          container block -6 6 -6 6 -6 6 units box
 fix		container all wall/gran/region hooke/history &
 		4000.0 NULL 100.0 NULL 0.5 1 region container
 
 neighbor	0.3 bin
 neigh_modify	delay 0 every 1 check yes
 
 fix             2 all nve/sphere
 fix		3 all gravity 1.0 vector 0 0 -1
 
 region          slab block -2 2 -2 2 -2 2 units box
 fix             ins all pour 100 2 4767548 vol 0.4 10 &
 	        diam one 1.0 region slab ignore
 
 timestep	0.005
 
 compute         1 all temp
-compute_modify  1 dynamic yes
+compute_modify  1 dynamic/dof yes
 
 compute         2 all temp/sphere
-compute_modify  2 dynamic yes
+compute_modify  2 dynamic/dof yes
 
 thermo		100
 thermo_style	custom step atoms temp c_1 c_2 press
 thermo_modify   lost ignore
-compute_modify  thermo_temp dynamic yes
+compute_modify  thermo_temp dynamic/dof yes
 
 #dump		2 all image 100 image.*.jpg type type &
 #		zoom 1.4 adiam 1.0 box no 0.0 axes yes 0.9 0.03
 #dump_modify	2 pad 5
 
 run	        5000
 
 region          container delete
 variable        theta equal (step-5000)*(4.0*PI/5000)
 region          container block -6 6 -6 6 -6 6 units box &
                 rotate v_theta 0 0 0 0 0 1
 run	        5000
 
 region          container delete
 region          container block -6 6 -6 6 -6 6 units box
 run             5000
 
 region          container delete
 variable        theta equal (step-15000)*(4.0*PI/5000)
 region          container block -6 6 -6 6 -6 6 units box &
                 rotate v_theta 0 0 0 1 1 1
 run	        5000
 
 region          container delete
 region          container block -6 6 -6 6 -6 6 units box
 run             5000
diff --git a/examples/pour/in.pour b/examples/pour/in.pour
index 9b24b2906..332ccf8b6 100644
--- a/examples/pour/in.pour
+++ b/examples/pour/in.pour
@@ -1,52 +1,52 @@
 # Pour granular particles into chute container, then induce flow
 
 atom_style	sphere
 boundary	p p fm
 newton		off
 comm_modify	vel yes
 
 region		reg block -10 10 -10 10 -0.5 16 units box
 create_box	1 reg
 
 neighbor	0.2 bin
 neigh_modify	delay 0
 
 # IMPORTANT NOTE: these values of stiffness (2000) and timestep (0.001)
 # are used in this example file to produce a quick simulation and movie.
 # More appropriate values for realistic simulations would be
 # k = 2.0e5 and dt = 0.0001, as in bench/in.chute.
 
 pair_style      gran/hooke/history 2000.0 NULL 50.0 NULL 0.5 0
 pair_coeff	* *
 
 timestep	0.001
 
 fix		1 all nve/sphere
 fix		2 all gravity 1.0 spherical 0.0 -180.0
 fix		zlower all wall/gran hooke/history 2000.0 NULL 50.0 NULL 0.5 0 &
 		zplane 0.0 2000.0 
 
 region		slab block -9.0 9.0 -9.0 9.0 10.0 15.0 units box
 fix		ins all pour 3000 1 300719 vol 0.13 50 region slab
 
 compute		1 all erotate/sphere
 thermo_style	custom step atoms ke c_1 vol
 thermo		1000
 thermo_modify	lost ignore norm no
-compute_modify	thermo_temp dynamic yes
+compute_modify	thermo_temp dynamic/dof yes
 
 #dump		id all atom 1000 dump.pour
 
 #dump		2 all image 1000 image.*.jpg type type &
 #		axes yes 0.8 0.02 view 80 -30
 #dump_modify	2 pad 5
 
 #dump		3 all movie 1000 movie.mpg type type &
 #		axes yes 0.8 0.02 view 80 -30
 #dump_modify	3 pad 5
 
 run		25000
 
 unfix		ins
 fix		2 all gravity 1.0 chute 26.0
 run		25000
diff --git a/examples/pour/in.pour.2d b/examples/pour/in.pour.2d
index 02fc794ff..5fa057c74 100644
--- a/examples/pour/in.pour.2d
+++ b/examples/pour/in.pour.2d
@@ -1,54 +1,54 @@
 # Pour 2d granular particles into container
 
 dimension	2
 atom_style	sphere
 boundary	f fm p
 newton		off
 comm_modify	vel yes
 
 region		reg block 0 100 0 50 -0.5 0.5 units box
 create_box	1 reg
 
 neighbor	0.2 bin
 neigh_modify	delay 0
 
 # IMPORTANT NOTE: these values of stiffness (4000) and timestep (0.001)
 # are used in this example file to produce a quick simulation and movie.
 # More appropriate values for realistic simulations would be
 # k = 4.0e5 and dt = 0.0001, as in bench/in.chute (if it were Hertzian).
 
 pair_style      gran/hertz/history 4000.0 NULL 100.0 NULL 0.5 0
 pair_coeff	* *
 
 timestep	0.001
 
 fix             1 all nve/sphere
 fix		2 all gravity 1.0 spherical 0.0 -180.0
 fix		xwalls all wall/gran hertz/history 4000.0 NULL 100.0 NULL 0 1 &
 		xplane 0 100
 fix		ywalls all wall/gran hertz/history 4000.0 NULL 100.0 NULL 0 1 &
 		yplane 0 NULL
  
 region          slab block 1.0 99.0 30 34.5 -0.5 0.5 units box
 fix             ins all pour 1000 1 4767548 vol 0.4 10 &
 	        diam range 0.5 1.0 region slab
 
 fix             3 all enforce2d
 
 compute		1 all erotate/sphere
 thermo_style	custom step atoms ke c_1 vol
 thermo		1000
 thermo_modify	lost ignore norm no
-compute_modify	thermo_temp dynamic yes
+compute_modify	thermo_temp dynamic/dof yes
 
 #dump		id all atom 250 dump.pour
 
 #dump		2 all image 500 image.*.jpg type type &
 #		zoom 1.6 adiam 1.5
 #dump_modify	2 pad 5
 
 #dump		3 all movie 500 movie.mpg type type &
 #		zoom 1.6 adiam 1.5
 #dump_modify	3 pad 5
 
 run		25000
diff --git a/examples/pour/in.pour.2d.molecule b/examples/pour/in.pour.2d.molecule
index 84869eb23..a65c72875 100644
--- a/examples/pour/in.pour.2d.molecule
+++ b/examples/pour/in.pour.2d.molecule
@@ -1,71 +1,71 @@
 # Pour 2d granular particles into container
 
 dimension	2
 atom_style	sphere
 atom_modify     map array
 boundary	f fm p
 newton		off
 comm_modify	vel yes cutoff 2.5
 
 fix             prop all property/atom mol ghost yes
 
 region		reg block 0 100 0 50 -0.5 0.5 units box
 create_box	1 reg
 
 neighbor	0.2 bin
 neigh_modify	delay 0
 
 # IMPORTANT NOTE: these values of stiffness (4000) and timestep (0.001)
 # are used in this example file to produce a quick simulation and movie.
 # More appropriate values for realistic simulations would be
 # k = 4.0e5 and dt = 0.0001, as in bench/in.chute (if it were Hertzian).
 
 pair_style      gran/hertz/history 4000.0 NULL 100.0 NULL 0.5 0
 pair_coeff	* *
 
 timestep	0.001
 
 fix		2 all gravity 1.0 spherical 0.0 -180.0
 fix		xwalls all wall/gran hertz/history 4000.0 NULL 100.0 NULL 0 1 &
 		xplane 0 100
 fix		ywalls all wall/gran hertz/history 4000.0 NULL 100.0 NULL 0 1 &
 		yplane 0 NULL
 
 molecule        object molecule.vshape
 fix             3 all rigid/small molecule mol object
 
 # insure region size + molecule size does not overlap wall
 
 region          slab block 3.0 97.0 30 34.5 -0.5 0.5 units box
 fix             ins all pour 500 0 4767548 vol 0.8 10 &
 	        region slab mol object rigid 3
 
 fix             4 all enforce2d
 
 compute		1 all erotate/sphere
 compute         Tsphere all temp/sphere
 thermo_style	custom step atoms ke c_1 vol
 thermo_modify	lost ignore norm no temp Tsphere
-compute_modify	Tsphere dynamic yes
+compute_modify	Tsphere dynamic/dof yes
 
 thermo		1000
 
 #dump		id all atom 100 tmp.dump
 
 #variable        colors string &
 #                "red green blue yellow white &
 #                purple pink orange lime gray"
 #variable	mol2 atom mol%10
 #dump		2 all image 250 image.*.jpg v_mol2 type &
 #		zoom 1.6 adiam 1.5
 #dump_modify	2 pad 5 amap 0 10 sa 1 10 ${colors}
 
 #variable        colors string &
 #                "red green blue yellow white &
 #                purple pink orange lime gray"
 #variable	mol3 atom mol%10
 #dump		3 all movie 250 movie.mpg v_mol3 type &
 #		zoom 1.6 adiam 1.5
 #dump_modify	3 pad 5 amap 0 10 sa 1 10 ${colors}
 
 run	        25000
diff --git a/examples/snap/Ta06A.snapparam b/examples/snap/Ta06A.snapparam
index 062725334..7b30312f5 100644
--- a/examples/snap/Ta06A.snapparam
+++ b/examples/snap/Ta06A.snapparam
@@ -1,14 +1,15 @@
 # DATE: 2014-09-05 CONTRIBUTOR: Aidan Thompson athomps@sandia.gov CITATION: Thompson, Swiler, Trott, Foiles and Tucker, arxiv.org, 1409.3880 (2014) 
 
 # LAMMPS SNAP parameters for Ta_Cand06A
 
 # required
 rcutfac 4.67637
 twojmax 6
 
 # optional
 
 gamma 1
 rfac0 0.99363
 rmin0 0
 diagonalstyle 3
+bzeroflag 0
diff --git a/examples/snap/W_2940_2017_2.snapparam b/examples/snap/W_2940_2017_2.snapparam
index e0b20005e..f17961bdd 100644
--- a/examples/snap/W_2940_2017_2.snapparam
+++ b/examples/snap/W_2940_2017_2.snapparam
@@ -1,12 +1,13 @@
 # DATE: 2017-02-20 CONTRIBUTOR: Mitchell Wood mitwood@sandia.gov CITATION: Wood, M. A. and Thompson, A. P. "Quantum-Accurate Molecular Dynamics Potential for Tungsten" arXiv:1702.07042 [physics.comp-ph]
 # 
 # required
 rcutfac 4.73442
 twojmax 8
 
 # optional
 
 gamma 1
 rfac0 0.99363
 rmin0 0
 diagonalstyle 3
+bzeroflag 0
diff --git a/src/MC/fix_gcmc.cpp b/src/MC/fix_gcmc.cpp
index e638cd887..780df25bd 100644
--- a/src/MC/fix_gcmc.cpp
+++ b/src/MC/fix_gcmc.cpp
@@ -1,2416 +1,2447 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Paul Crozier, Aidan Thompson (SNL)
 ------------------------------------------------------------------------- */
 
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include "fix_gcmc.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "atom_vec_hybrid.h"
 #include "molecule.h"
 #include "update.h"
 #include "modify.h"
 #include "fix.h"
 #include "comm.h"
 #include "compute.h"
 #include "group.h"
 #include "domain.h"
 #include "region.h"
 #include "random_park.h"
 #include "force.h"
 #include "pair.h"
 #include "bond.h"
 #include "angle.h"
 #include "dihedral.h"
 #include "improper.h"
 #include "kspace.h"
 #include "math_extra.h"
 #include "math_const.h"
 #include "memory.h"
 #include "error.h"
 #include "thermo.h"
 #include "output.h"
 #include "neighbor.h"
 #include <iostream>
 
 using namespace std;
 using namespace LAMMPS_NS;
 using namespace FixConst;
 using namespace MathConst;
 
 // large energy value used to signal overlap
 
 #define MAXENERGYSIGNAL 1.0e100
 
 // this must be lower than MAXENERGYSIGNAL
 // by a large amount, so that it is still
 // less than total energy when negative
-// energy changes are adddd to MAXENERGYSIGNAL
+// energy changes are added to MAXENERGYSIGNAL
 
 #define MAXENERGYTEST 1.0e50
 
 enum{ATOM,MOLECULE};
 
 /* ---------------------------------------------------------------------- */
 
 FixGCMC::FixGCMC(LAMMPS *lmp, int narg, char **arg) :
   Fix(lmp, narg, arg),
   idregion(NULL), full_flag(0), ngroups(0), groupstrings(NULL), ngrouptypes(0), grouptypestrings(NULL),
   grouptypebits(NULL), grouptypes(NULL), local_gas_list(NULL), atom_coord(NULL), random_equal(NULL), random_unequal(NULL), 
-  coords(NULL), imageflags(NULL), idshake(NULL)
+  coords(NULL), imageflags(NULL), idrigid(NULL), idshake(NULL), fixrigid(NULL), fixshake(NULL)
 {
   if (narg < 11) error->all(FLERR,"Illegal fix gcmc command");
 
   if (atom->molecular == 2)
     error->all(FLERR,"Fix gcmc does not (yet) work with atom_style template");
 
   dynamic_group_allow = 1;
 
   vector_flag = 1;
   size_vector = 8;
   global_freq = 1;
   extvector = 0;
   restart_global = 1;
   time_depend = 1;
 
   // required args
 
   nevery = force->inumeric(FLERR,arg[3]);
   nexchanges = force->inumeric(FLERR,arg[4]);
   nmcmoves = force->inumeric(FLERR,arg[5]);
   ngcmc_type = force->inumeric(FLERR,arg[6]);
   seed = force->inumeric(FLERR,arg[7]);
   reservoir_temperature = force->numeric(FLERR,arg[8]);
   chemical_potential = force->numeric(FLERR,arg[9]);
   displace = force->numeric(FLERR,arg[10]);
 
   if (nevery <= 0) error->all(FLERR,"Illegal fix gcmc command");
   if (nexchanges < 0) error->all(FLERR,"Illegal fix gcmc command");
   if (nmcmoves < 0) error->all(FLERR,"Illegal fix gcmc command");
   if (seed <= 0) error->all(FLERR,"Illegal fix gcmc command");
   if (reservoir_temperature < 0.0)
     error->all(FLERR,"Illegal fix gcmc command");
   if (displace < 0.0) error->all(FLERR,"Illegal fix gcmc command");
 
   // read options from end of input line
 
   options(narg-11,&arg[11]);
 
   // random number generator, same for all procs
 
   random_equal = new RanPark(lmp,seed);
 
   // random number generator, not the same for all procs
 
   random_unequal = new RanPark(lmp,seed);
 
   // error checks on region and its extent being inside simulation box
 
   region_xlo = region_xhi = region_ylo = region_yhi =
     region_zlo = region_zhi = 0.0;
   if (regionflag) {
     if (domain->regions[iregion]->bboxflag == 0)
       error->all(FLERR,"Fix gcmc region does not support a bounding box");
     if (domain->regions[iregion]->dynamic_check())
       error->all(FLERR,"Fix gcmc region cannot be dynamic");
 
     region_xlo = domain->regions[iregion]->extent_xlo;
     region_xhi = domain->regions[iregion]->extent_xhi;
     region_ylo = domain->regions[iregion]->extent_ylo;
     region_yhi = domain->regions[iregion]->extent_yhi;
     region_zlo = domain->regions[iregion]->extent_zlo;
     region_zhi = domain->regions[iregion]->extent_zhi;
 
     if (region_xlo < domain->boxlo[0] || region_xhi > domain->boxhi[0] ||
         region_ylo < domain->boxlo[1] || region_yhi > domain->boxhi[1] ||
         region_zlo < domain->boxlo[2] || region_zhi > domain->boxhi[2])
       error->all(FLERR,"Fix gcmc region extends outside simulation box");
 
     // estimate region volume using MC trials
 
     double coord[3];
     int inside = 0;
     int attempts = 10000000;
     for (int i = 0; i < attempts; i++) {
       coord[0] = region_xlo + random_equal->uniform() * (region_xhi-region_xlo);
       coord[1] = region_ylo + random_equal->uniform() * (region_yhi-region_ylo);
       coord[2] = region_zlo + random_equal->uniform() * (region_zhi-region_zlo);
       if (domain->regions[iregion]->match(coord[0],coord[1],coord[2]) != 0)
         inside++;
     }
 
     double max_region_volume = (region_xhi - region_xlo)*
      (region_yhi - region_ylo)*(region_zhi - region_zlo);
 
     region_volume = max_region_volume*static_cast<double> (inside)/
      static_cast<double> (attempts);
   }
 
   // error check and further setup for mode = MOLECULE
 
   if (mode == MOLECULE) {
     if (onemols[imol]->xflag == 0)
       error->all(FLERR,"Fix gcmc molecule must have coordinates");
     if (onemols[imol]->typeflag == 0)
       error->all(FLERR,"Fix gcmc molecule must have atom types");
     if (ngcmc_type != 0)
       error->all(FLERR,"Atom type must be zero in fix gcmc mol command");
     if (onemols[imol]->qflag == 1 && atom->q == NULL)
       error->all(FLERR,"Fix gcmc molecule has charges, but atom style does not");
 
     if (atom->molecular == 2 && onemols != atom->avec->onemols)
       error->all(FLERR,"Fix gcmc molecule template ID must be same "
                  "as atom_style template ID");
     onemols[imol]->check_attributes(0);
   }
 
   if (charge_flag && atom->q == NULL)
     error->all(FLERR,"Fix gcmc atom has charge, but atom style does not");
 
+  if (rigidflag && mode == ATOM)
+    error->all(FLERR,"Cannot use fix gcmc rigid and not molecule");
   if (shakeflag && mode == ATOM)
     error->all(FLERR,"Cannot use fix gcmc shake and not molecule");
+  if (rigidflag && shakeflag)
+    error->all(FLERR,"Cannot use fix gcmc rigid and shake");
 
   // setup of coords and imageflags array
 
   if (mode == ATOM) natoms_per_molecule = 1;
   else natoms_per_molecule = onemols[imol]->natoms;
   memory->create(coords,natoms_per_molecule,3,"gcmc:coords");
   memory->create(imageflags,natoms_per_molecule,"gcmc:imageflags");
   memory->create(atom_coord,natoms_per_molecule,3,"gcmc:atom_coord");
 
   // compute the number of MC cycles that occur nevery timesteps
 
   ncycles = nexchanges + nmcmoves;
 
   // set up reneighboring
 
   force_reneighbor = 1;
   next_reneighbor = update->ntimestep + 1;
 
   // zero out counters
 
   ntranslation_attempts = 0.0;
   ntranslation_successes = 0.0;
   nrotation_attempts = 0.0;
   nrotation_successes = 0.0;
   ndeletion_attempts = 0.0;
   ndeletion_successes = 0.0;
   ninsertion_attempts = 0.0;
   ninsertion_successes = 0.0;
 
   gcmc_nmax = 0;
   local_gas_list = NULL;
 }
 
 /* ----------------------------------------------------------------------
    parse optional parameters at end of input line
 ------------------------------------------------------------------------- */
 
 void FixGCMC::options(int narg, char **arg)
 {
   if (narg < 0) error->all(FLERR,"Illegal fix gcmc command");
 
   // defaults
 
   mode = ATOM;
   max_rotation_angle = 10*MY_PI/180;
   regionflag = 0;
   iregion = -1;
   region_volume = 0;
   max_region_attempts = 1000;
   molecule_group = 0;
   molecule_group_bit = 0;
   molecule_group_inversebit = 0;
   exclusion_group = 0;
   exclusion_group_bit = 0;
   pressure_flag = false;
   pressure = 0.0;
   fugacity_coeff = 1.0;
+  rigidflag = 0;
   shakeflag = 0;
   charge = 0.0;
   charge_flag = false;
   full_flag = false;
-  idshake = NULL;
   ngroups = 0;
   int ngroupsmax = 0;
   groupstrings = NULL;
   ngrouptypes = 0;
   int ngrouptypesmax = 0;
   grouptypestrings = NULL;
   grouptypes = NULL;
   grouptypebits = NULL;
   energy_intra = 0.0;
   tfac_insert = 1.0;
   overlap_cutoff = 0.0;
   overlap_flag = 0;
 
   int iarg = 0;
   while (iarg < narg) {
   if (strcmp(arg[iarg],"mol") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix gcmc command");
       imol = atom->find_molecule(arg[iarg+1]);
       if (imol == -1)
         error->all(FLERR,"Molecule template ID for fix gcmc does not exist");
       if (atom->molecules[imol]->nset > 1 && comm->me == 0)
         error->warning(FLERR,"Molecule template for "
                        "fix gcmc has multiple molecules");
       mode = MOLECULE;
       onemols = atom->molecules;
       nmol = onemols[imol]->nset;
       iarg += 2;
     } else if (strcmp(arg[iarg],"region") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix gcmc command");
       iregion = domain->find_region(arg[iarg+1]);
       if (iregion == -1)
         error->all(FLERR,"Region ID for fix gcmc does not exist");
       int n = strlen(arg[iarg+1]) + 1;
       idregion = new char[n];
       strcpy(idregion,arg[iarg+1]);
       regionflag = 1;
       iarg += 2;
     } else if (strcmp(arg[iarg],"maxangle") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix gcmc command");
       max_rotation_angle = force->numeric(FLERR,arg[iarg+1]);
       max_rotation_angle *= MY_PI/180;
       iarg += 2;
     } else if (strcmp(arg[iarg],"pressure") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix gcmc command");
       pressure = force->numeric(FLERR,arg[iarg+1]);
       pressure_flag = true;
       iarg += 2;
     } else if (strcmp(arg[iarg],"fugacity_coeff") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix gcmc command");
       fugacity_coeff = force->numeric(FLERR,arg[iarg+1]);
       iarg += 2;
     } else if (strcmp(arg[iarg],"charge") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix gcmc command");
       charge = force->numeric(FLERR,arg[iarg+1]);
       charge_flag = true;
       iarg += 2;
+    } else if (strcmp(arg[iarg],"rigid") == 0) {
+      if (iarg+2 > narg) error->all(FLERR,"Illegal fix gcmc command");
+      int n = strlen(arg[iarg+1]) + 1;
+      delete [] idrigid;
+      idrigid = new char[n];
+      strcpy(idrigid,arg[iarg+1]);
+      rigidflag = 1;
+      iarg += 2;
     } else if (strcmp(arg[iarg],"shake") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix gcmc command");
       int n = strlen(arg[iarg+1]) + 1;
       delete [] idshake;
       idshake = new char[n];
       strcpy(idshake,arg[iarg+1]);
       shakeflag = 1;
       iarg += 2;
     } else if (strcmp(arg[iarg],"full_energy") == 0) {
       full_flag = true;
       iarg += 1;
     } else if (strcmp(arg[iarg],"group") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix gcmc command");
       if (ngroups >= ngroupsmax) {
         ngroupsmax = ngroups+1;
         groupstrings = (char **)
           memory->srealloc(groupstrings,
                            ngroupsmax*sizeof(char *),
                            "fix_gcmc:groupstrings");
       }
       int n = strlen(arg[iarg+1]) + 1;
       groupstrings[ngroups] = new char[n];
       strcpy(groupstrings[ngroups],arg[iarg+1]);
       ngroups++;
       iarg += 2;
     } else if (strcmp(arg[iarg],"grouptype") == 0) {
       if (iarg+3 > narg) error->all(FLERR,"Illegal fix gcmc command");
       if (ngrouptypes >= ngrouptypesmax) {
         ngrouptypesmax = ngrouptypes+1;
         grouptypes = (int*) memory->srealloc(grouptypes,ngrouptypesmax*sizeof(int),
                          "fix_gcmc:grouptypes");
         grouptypestrings = (char**)
           memory->srealloc(grouptypestrings,
                            ngrouptypesmax*sizeof(char *),
                            "fix_gcmc:grouptypestrings");
       }
       grouptypes[ngrouptypes] = atoi(arg[iarg+1]);
       int n = strlen(arg[iarg+2]) + 1;
       grouptypestrings[ngrouptypes] = new char[n];
       strcpy(grouptypestrings[ngrouptypes],arg[iarg+2]);
       ngrouptypes++;
       iarg += 3;
     } else if (strcmp(arg[iarg],"intra_energy") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix gcmc command");
       energy_intra = force->numeric(FLERR,arg[iarg+1]);
       iarg += 2;
     } else if (strcmp(arg[iarg],"tfac_insert") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix gcmc command");
       tfac_insert = force->numeric(FLERR,arg[iarg+1]);
       iarg += 2;
     } else if (strcmp(arg[iarg],"overlap_cutoff") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix gcmc command");
       overlap_cutoff = force->numeric(FLERR,arg[iarg+1]);
       overlap_flag = 1;
       iarg += 2;
     } else error->all(FLERR,"Illegal fix gcmc command");
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 FixGCMC::~FixGCMC()
 {
   if (regionflag) delete [] idregion;
   delete random_equal;
   delete random_unequal;
 
   memory->destroy(local_gas_list);
   memory->destroy(atom_coord);
   memory->destroy(coords);
   memory->destroy(imageflags);
 
+  delete [] idrigid;
   delete [] idshake;
 
   if (ngroups > 0) {
     for (int igroup = 0; igroup < ngroups; igroup++)
       delete [] groupstrings[igroup];
     memory->sfree(groupstrings);
   }
 
   if (ngrouptypes > 0) {
     memory->destroy(grouptypes);
     memory->destroy(grouptypebits);
     for (int igroup = 0; igroup < ngrouptypes; igroup++)
       delete [] grouptypestrings[igroup];
     memory->sfree(grouptypestrings);
   }
   if (full_flag && group) {
     int igroupall = group->find("all");
     neighbor->exclusion_group_group_delete(exclusion_group,igroupall);
   }
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 int FixGCMC::setmask()
 {
   int mask = 0;
   mask |= PRE_EXCHANGE;
   return mask;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixGCMC::init()
 {
 
   triclinic = domain->triclinic;
 
   // decide whether to switch to the full_energy option
 
   if (!full_flag) {
     if ((force->kspace) ||
         (force->pair == NULL) ||
         (force->pair->single_enable == 0) ||
         (force->pair_match("hybrid",0)) ||
         (force->pair_match("eam",0))
         ) {
       full_flag = true;
       if (comm->me == 0)
         error->warning(FLERR,"Fix gcmc using full_energy option");
     }
   }
 
   if (full_flag) {
     char *id_pe = (char *) "thermo_pe";
     int ipe = modify->find_compute(id_pe);
     c_pe = modify->compute[ipe];
   }
 
   int *type = atom->type;
 
   if (mode == ATOM) {
     if (ngcmc_type <= 0 || ngcmc_type > atom->ntypes)
       error->all(FLERR,"Invalid atom type in fix gcmc command");
   }
 
   // if mode == ATOM, warn if any deletable atom has a mol ID
 
   if ((mode == ATOM) && atom->molecule_flag) {
     tagint *molecule = atom->molecule;
     int flag = 0;
     for (int i = 0; i < atom->nlocal; i++)
       if (type[i] == ngcmc_type)
         if (molecule[i]) flag = 1;
     int flagall;
     MPI_Allreduce(&flag,&flagall,1,MPI_INT,MPI_SUM,world);
     if (flagall && comm->me == 0)
       error->all(FLERR,
        "Fix gcmc cannot exchange individual atoms belonging to a molecule");
   }
 
   // if mode == MOLECULE, check for unset mol IDs
 
   if (mode == MOLECULE) {
     tagint *molecule = atom->molecule;
     int *mask = atom->mask;
     int flag = 0;
     for (int i = 0; i < atom->nlocal; i++)
       if (mask[i] == groupbit)
         if (molecule[i] == 0) flag = 1;
     int flagall;
     MPI_Allreduce(&flag,&flagall,1,MPI_INT,MPI_SUM,world);
     if (flagall && comm->me == 0)
       error->all(FLERR,
        "All mol IDs should be set for fix gcmc group atoms");
   }
 
   if (((mode == MOLECULE) && (atom->molecule_flag == 0)) ||
       ((mode == MOLECULE) && (!atom->tag_enable || !atom->map_style)))
     error->all(FLERR,
                "Fix gcmc molecule command requires that "
                "atoms have molecule attributes");
 
+  // if rigidflag defined, check for rigid/small fix
+  // its molecule template must be same as this one
+
+  fixrigid = NULL;
+  if (rigidflag) {
+    int ifix = modify->find_fix(idrigid);
+    if (ifix < 0) error->all(FLERR,"Fix gcmc rigid fix does not exist");
+    fixrigid = modify->fix[ifix];
+    int tmp;
+    if (onemols != (Molecule **) fixrigid->extract("onemol",tmp))
+      error->all(FLERR,
+                 "Fix gcmc and fix rigid/small not using "
+                 "same molecule template ID");
+  }
+
   // if shakeflag defined, check for SHAKE fix
   // its molecule template must be same as this one
 
   fixshake = NULL;
   if (shakeflag) {
     int ifix = modify->find_fix(idshake);
     if (ifix < 0) error->all(FLERR,"Fix gcmc shake fix does not exist");
     fixshake = modify->fix[ifix];
     int tmp;
     if (onemols != (Molecule **) fixshake->extract("onemol",tmp))
       error->all(FLERR,"Fix gcmc and fix shake not using "
                  "same molecule template ID");
   }
 
-  // check for fix rigid
-
-  for (int irigid = 0; irigid < modify->nfix; irigid++) {
-    if (strncmp(modify->fix[irigid]->style,"rigid",5) == 0)
-      error->all(FLERR,"Fix gcmc can not currently be used with any rigid fix");
-  }
-
   if (domain->dimension == 2)
     error->all(FLERR,"Cannot use fix gcmc in a 2d simulation");
 
   // create a new group for interaction exclusions
   // used for attempted atom or molecule deletions
   // skip if already exists from previous init()
 
   if (full_flag && !exclusion_group_bit) {
     char **group_arg = new char*[4];
 
     // create unique group name for atoms to be excluded
 
     int len = strlen(id) + 30;
     group_arg[0] = new char[len];
     sprintf(group_arg[0],"FixGCMC:gcmc_exclusion_group:%s",id);
     group_arg[1] = (char *) "subtract";
     group_arg[2] = (char *) "all";
     group_arg[3] = (char *) "all";
     group->assign(4,group_arg);
     exclusion_group = group->find(group_arg[0]);
     if (exclusion_group == -1)
       error->all(FLERR,"Could not find fix gcmc exclusion group ID");
     exclusion_group_bit = group->bitmask[exclusion_group];
 
     // neighbor list exclusion setup
     // turn off interactions between group all and the exclusion group
 
     int narg = 4;
     char **arg = new char*[narg];;
     arg[0] = (char *) "exclude";
     arg[1] = (char *) "group";
     arg[2] = group_arg[0];
     arg[3] = (char *) "all";
     neighbor->modify_params(narg,arg);
     delete [] group_arg[0];
     delete [] group_arg;
     delete [] arg;
   }
 
   // create a new group for temporary use with selected molecules
 
   if (mode == MOLECULE) {
     char **group_arg = new char*[3];
     // create unique group name for atoms to be rotated
     int len = strlen(id) + 30;
     group_arg[0] = new char[len];
     sprintf(group_arg[0],"FixGCMC:rotation_gas_atoms:%s",id);
     group_arg[1] = (char *) "molecule";
     char digits[12];
     sprintf(digits,"%d",-1);
     group_arg[2] = digits;
     group->assign(3,group_arg);
     molecule_group = group->find(group_arg[0]);
     if (molecule_group == -1)
       error->all(FLERR,"Could not find fix gcmc rotation group ID");
     molecule_group_bit = group->bitmask[molecule_group];
     molecule_group_inversebit = molecule_group_bit ^ ~0;
     delete [] group_arg[0];
     delete [] group_arg;
   }
 
   // get all of the needed molecule data if mode == MOLECULE,
   // otherwise just get the gas mass
 
   if (mode == MOLECULE) {
 
     onemols[imol]->compute_mass();
     onemols[imol]->compute_com();
     gas_mass = onemols[imol]->masstotal;
     for (int i = 0; i < onemols[imol]->natoms; i++) {
       onemols[imol]->x[i][0] -= onemols[imol]->com[0];
       onemols[imol]->x[i][1] -= onemols[imol]->com[1];
       onemols[imol]->x[i][2] -= onemols[imol]->com[2];
     }
 
   } else gas_mass = atom->mass[ngcmc_type];
 
   if (gas_mass <= 0.0)
     error->all(FLERR,"Illegal fix gcmc gas mass <= 0");
 
   // check that no deletable atoms are in atom->firstgroup
   // deleting such an atom would not leave firstgroup atoms first
 
   if (atom->firstgroup >= 0) {
     int *mask = atom->mask;
     int firstgroupbit = group->bitmask[atom->firstgroup];
 
     int flag = 0;
     for (int i = 0; i < atom->nlocal; i++)
       if ((mask[i] == groupbit) && (mask[i] && firstgroupbit)) flag = 1;
 
     int flagall;
     MPI_Allreduce(&flag,&flagall,1,MPI_INT,MPI_SUM,world);
 
     if (flagall)
       error->all(FLERR,"Cannot do GCMC on atoms in atom_modify first group");
   }
 
   // compute beta, lambda, sigma, and the zz factor
 
   beta = 1.0/(force->boltz*reservoir_temperature);
   double lambda = sqrt(force->hplanck*force->hplanck/
                        (2.0*MY_PI*gas_mass*force->mvv2e*
                         force->boltz*reservoir_temperature));
   sigma = sqrt(force->boltz*reservoir_temperature*tfac_insert/gas_mass/force->mvv2e);
   zz = exp(beta*chemical_potential)/(pow(lambda,3.0));
   if (pressure_flag) zz = pressure*fugacity_coeff*beta/force->nktv2p;
 
   imagezero = ((imageint) IMGMAX << IMG2BITS) |
              ((imageint) IMGMAX << IMGBITS) | IMGMAX;
 
   // construct group bitmask for all new atoms
   // aggregated over all group keywords
 
   groupbitall = 1 | groupbit;
   for (int igroup = 0; igroup < ngroups; igroup++) {
     int jgroup = group->find(groupstrings[igroup]);
     if (jgroup == -1)
       error->all(FLERR,"Could not find specified fix gcmc group ID");
     groupbitall |= group->bitmask[jgroup];
   }
 
   // construct group type bitmasks
   // not aggregated over all group keywords
 
   if (ngrouptypes > 0) {
     memory->create(grouptypebits,ngrouptypes,"fix_gcmc:grouptypebits");
     for (int igroup = 0; igroup < ngrouptypes; igroup++) {
       int jgroup = group->find(grouptypestrings[igroup]);
       if (jgroup == -1)
         error->all(FLERR,"Could not find specified fix gcmc group ID");
       grouptypebits[igroup] = group->bitmask[jgroup];
     }
   }
 
 }
 
 /* ----------------------------------------------------------------------
    attempt Monte Carlo translations, rotations, insertions, and deletions
    done before exchange, borders, reneighbor
    so that ghost atoms and neighbor lists will be correct
 ------------------------------------------------------------------------- */
 
 void FixGCMC::pre_exchange()
 {
   // just return if should not be called on this timestep
 
   if (next_reneighbor != update->ntimestep) return;
 
   xlo = domain->boxlo[0];
   xhi = domain->boxhi[0];
   ylo = domain->boxlo[1];
   yhi = domain->boxhi[1];
   zlo = domain->boxlo[2];
   zhi = domain->boxhi[2];
   if (triclinic) {
     sublo = domain->sublo_lamda;
     subhi = domain->subhi_lamda;
   } else {
     sublo = domain->sublo;
     subhi = domain->subhi;
   }
 
   if (regionflag) volume = region_volume;
   else volume = domain->xprd * domain->yprd * domain->zprd;
 
   if (triclinic) domain->x2lamda(atom->nlocal);
   domain->pbc();
   comm->exchange();
   atom->nghost = 0;
   comm->borders();
   if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost);
   update_gas_atoms_list();
 
   if (full_flag) {
     energy_stored = energy_full();
 
     if (mode == MOLECULE) {
       for (int i = 0; i < ncycles; i++) {
         int random_int_fraction =
           static_cast<int>(random_equal->uniform()*ncycles) + 1;
         if (random_int_fraction <= nmcmoves) {
           if (random_equal->uniform() < 0.5) attempt_molecule_translation_full();
           else attempt_molecule_rotation_full();
         } else {
           if (random_equal->uniform() < 0.5) attempt_molecule_deletion_full();
           else attempt_molecule_insertion_full();
         }
       }
     } else {
       for (int i = 0; i < ncycles; i++) {
         int random_int_fraction =
           static_cast<int>(random_equal->uniform()*ncycles) + 1;
         if (random_int_fraction <= nmcmoves) {
           attempt_atomic_translation_full();
         } else {
           if (random_equal->uniform() < 0.5) attempt_atomic_deletion_full();
           else attempt_atomic_insertion_full();
         }
       }
     }
     if (triclinic) domain->x2lamda(atom->nlocal);
     domain->pbc();
     comm->exchange();
     atom->nghost = 0;
     comm->borders();
     if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost);
 
   } else {
 
     if (mode == MOLECULE) {
       for (int i = 0; i < ncycles; i++) {
         int random_int_fraction =
           static_cast<int>(random_equal->uniform()*ncycles) + 1;
         if (random_int_fraction <= nmcmoves) {
           if (random_equal->uniform() < 0.5) attempt_molecule_translation();
           else attempt_molecule_rotation();
         } else {
           if (random_equal->uniform() < 0.5) attempt_molecule_deletion();
           else attempt_molecule_insertion();
         }
       }
     } else {
       for (int i = 0; i < ncycles; i++) {
         int random_int_fraction =
           static_cast<int>(random_equal->uniform()*ncycles) + 1;
         if (random_int_fraction <= nmcmoves) {
           attempt_atomic_translation();
         } else {
           if (random_equal->uniform() < 0.5) attempt_atomic_deletion();
           else attempt_atomic_insertion();
         }
       }
     }
   }
   next_reneighbor = update->ntimestep + nevery;
 }
 
 /* ----------------------------------------------------------------------
 ------------------------------------------------------------------------- */
 
 void FixGCMC::attempt_atomic_translation()
 {
   ntranslation_attempts += 1.0;
 
   if (ngas == 0) return;
 
   int i = pick_random_gas_atom();
 
   int success = 0;
   if (i >= 0) {
     double **x = atom->x;
     double energy_before = energy(i,ngcmc_type,-1,x[i]);
     double rsq = 1.1;
     double rx,ry,rz;
     rx = ry = rz = 0.0;
     double coord[3];
     while (rsq > 1.0) {
       rx = 2*random_unequal->uniform() - 1.0;
       ry = 2*random_unequal->uniform() - 1.0;
       rz = 2*random_unequal->uniform() - 1.0;
       rsq = rx*rx + ry*ry + rz*rz;
     }
     coord[0] = x[i][0] + displace*rx;
     coord[1] = x[i][1] + displace*ry;
     coord[2] = x[i][2] + displace*rz;
     if (regionflag) {
       while (domain->regions[iregion]->match(coord[0],coord[1],coord[2]) == 0) {
         rsq = 1.1;
         while (rsq > 1.0) {
           rx = 2*random_unequal->uniform() - 1.0;
           ry = 2*random_unequal->uniform() - 1.0;
           rz = 2*random_unequal->uniform() - 1.0;
           rsq = rx*rx + ry*ry + rz*rz;
         }
         coord[0] = x[i][0] + displace*rx;
         coord[1] = x[i][1] + displace*ry;
         coord[2] = x[i][2] + displace*rz;
       }
     }
     if (!domain->inside_nonperiodic(coord))
       error->one(FLERR,"Fix gcmc put atom outside box");
 
     double energy_after = energy(i,ngcmc_type,-1,coord);
 
     if (energy_after < MAXENERGYTEST &&
         random_unequal->uniform() <
         exp(beta*(energy_before - energy_after))) {
       x[i][0] = coord[0];
       x[i][1] = coord[1];
       x[i][2] = coord[2];
       success = 1;
     }
   }
 
   int success_all = 0;
   MPI_Allreduce(&success,&success_all,1,MPI_INT,MPI_MAX,world);
 
   if (success_all) {
     if (triclinic) domain->x2lamda(atom->nlocal);
     domain->pbc();
     comm->exchange();
     atom->nghost = 0;
     comm->borders();
     if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost);
     update_gas_atoms_list();
     ntranslation_successes += 1.0;
   }
 }
 
 /* ----------------------------------------------------------------------
 ------------------------------------------------------------------------- */
 
 void FixGCMC::attempt_atomic_deletion()
 {
   ndeletion_attempts += 1.0;
 
   if (ngas == 0) return;
 
   int i = pick_random_gas_atom();
 
   int success = 0;
   if (i >= 0) {
     double deletion_energy = energy(i,ngcmc_type,-1,atom->x[i]);
     if (random_unequal->uniform() <
         ngas*exp(beta*deletion_energy)/(zz*volume)) {
       atom->avec->copy(atom->nlocal-1,i,1);
       atom->nlocal--;
       success = 1;
     }
   }
 
   int success_all = 0;
   MPI_Allreduce(&success,&success_all,1,MPI_INT,MPI_MAX,world);
 
   if (success_all) {
     atom->natoms--;
     if (atom->tag_enable) {
       if (atom->map_style) atom->map_init();
     }
     atom->nghost = 0;
     if (triclinic) domain->x2lamda(atom->nlocal);
     comm->borders();
     if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost);
     update_gas_atoms_list();
     ndeletion_successes += 1.0;
   }
 }
 
 /* ----------------------------------------------------------------------
 ------------------------------------------------------------------------- */
 
 void FixGCMC::attempt_atomic_insertion()
 {
   double lamda[3];
 
   ninsertion_attempts += 1.0;
 
   // pick coordinates for insertion point
 
   double coord[3];
   if (regionflag) {
     int region_attempt = 0;
     coord[0] = region_xlo + random_equal->uniform() * (region_xhi-region_xlo);
     coord[1] = region_ylo + random_equal->uniform() * (region_yhi-region_ylo);
     coord[2] = region_zlo + random_equal->uniform() * (region_zhi-region_zlo);
     while (domain->regions[iregion]->match(coord[0],coord[1],coord[2]) == 0) {
       coord[0] = region_xlo + random_equal->uniform() * (region_xhi-region_xlo);
       coord[1] = region_ylo + random_equal->uniform() * (region_yhi-region_ylo);
       coord[2] = region_zlo + random_equal->uniform() * (region_zhi-region_zlo);
       region_attempt++;
       if (region_attempt >= max_region_attempts) return;
     }
     if (triclinic) domain->x2lamda(coord,lamda);
   } else {
     if (triclinic == 0) {
       coord[0] = xlo + random_equal->uniform() * (xhi-xlo);
       coord[1] = ylo + random_equal->uniform() * (yhi-ylo);
       coord[2] = zlo + random_equal->uniform() * (zhi-zlo);
     } else {
       lamda[0] = random_equal->uniform();
       lamda[1] = random_equal->uniform();
       lamda[2] = random_equal->uniform();
 
       // wasteful, but necessary
 
       if (lamda[0] == 1.0) lamda[0] = 0.0;
       if (lamda[1] == 1.0) lamda[1] = 0.0;
       if (lamda[2] == 1.0) lamda[2] = 0.0;
 
       domain->lamda2x(lamda,coord);
     }
   }
 
   int proc_flag = 0;
   if (triclinic == 0) {
     domain->remap(coord);
     if (!domain->inside(coord))
       error->one(FLERR,"Fix gcmc put atom outside box");
     if (coord[0] >= sublo[0] && coord[0] < subhi[0] &&
         coord[1] >= sublo[1] && coord[1] < subhi[1] &&
         coord[2] >= sublo[2] && coord[2] < subhi[2]) proc_flag = 1;
   } else {
     if (lamda[0] >= sublo[0] && lamda[0] < subhi[0] &&
         lamda[1] >= sublo[1] && lamda[1] < subhi[1] &&
         lamda[2] >= sublo[2] && lamda[2] < subhi[2]) proc_flag = 1;
   }
 
   int success = 0;
   if (proc_flag) {
     int ii = -1;
     if (charge_flag) {
       ii = atom->nlocal + atom->nghost;
       if (ii >= atom->nmax) atom->avec->grow(0);
       atom->q[ii] = charge;
     }
     double insertion_energy = energy(ii,ngcmc_type,-1,coord);
 
     if (insertion_energy < MAXENERGYTEST &&
         random_unequal->uniform() <
         zz*volume*exp(-beta*insertion_energy)/(ngas+1)) {
       atom->avec->create_atom(ngcmc_type,coord);
       int m = atom->nlocal - 1;
       
       // add to groups
       // optionally add to type-based groups
       
       atom->mask[m] = groupbitall;
       for (int igroup = 0; igroup < ngrouptypes; igroup++) {
         if (ngcmc_type == grouptypes[igroup])
           atom->mask[m] |= grouptypebits[igroup];
       }
       
       atom->v[m][0] = random_unequal->gaussian()*sigma;
       atom->v[m][1] = random_unequal->gaussian()*sigma;
       atom->v[m][2] = random_unequal->gaussian()*sigma;
       modify->create_attribute(m);
       
       success = 1;
     }
   }
 
   int success_all = 0;
   MPI_Allreduce(&success,&success_all,1,MPI_INT,MPI_MAX,world);
 
   if (success_all) {
     atom->natoms++;
     if (atom->tag_enable) {
       atom->tag_extend();
       if (atom->map_style) atom->map_init();
     }
     atom->nghost = 0;
     if (triclinic) domain->x2lamda(atom->nlocal);
     comm->borders();
     if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost);
     update_gas_atoms_list();
     ninsertion_successes += 1.0;
   }
 }
 
 /* ----------------------------------------------------------------------
 ------------------------------------------------------------------------- */
 
 void FixGCMC::attempt_molecule_translation()
 {
   ntranslation_attempts += 1.0;
 
   if (ngas == 0) return;
 
   tagint translation_molecule = pick_random_gas_molecule();
   if (translation_molecule == -1) return;
 
   double energy_before_sum = molecule_energy(translation_molecule);
 
   double **x = atom->x;
   double rx,ry,rz;
   double com_displace[3],coord[3];
   double rsq = 1.1;
   while (rsq > 1.0) {
     rx = 2*random_equal->uniform() - 1.0;
     ry = 2*random_equal->uniform() - 1.0;
     rz = 2*random_equal->uniform() - 1.0;
     rsq = rx*rx + ry*ry + rz*rz;
   }
   com_displace[0] = displace*rx;
   com_displace[1] = displace*ry;
   com_displace[2] = displace*rz;
 
   int nlocal = atom->nlocal;
   if (regionflag) {
     int *mask = atom->mask;
     for (int i = 0; i < nlocal; i++) {
       if (atom->molecule[i] == translation_molecule) {
         mask[i] |= molecule_group_bit;
       } else {
         mask[i] &= molecule_group_inversebit;
       }
     }
     double com[3];
     com[0] = com[1] = com[2] = 0.0;
     group->xcm(molecule_group,gas_mass,com);
     coord[0] = com[0] + displace*rx;
     coord[1] = com[1] + displace*ry;
     coord[2] = com[2] + displace*rz;
     while (domain->regions[iregion]->match(coord[0],coord[1],coord[2]) == 0) {
       rsq = 1.1;
       while (rsq > 1.0) {
         rx = 2*random_equal->uniform() - 1.0;
         ry = 2*random_equal->uniform() - 1.0;
         rz = 2*random_equal->uniform() - 1.0;
         rsq = rx*rx + ry*ry + rz*rz;
       }
       coord[0] = com[0] + displace*rx;
       coord[1] = com[1] + displace*ry;
       coord[2] = com[2] + displace*rz;
     }
     com_displace[0] = displace*rx;
     com_displace[1] = displace*ry;
     com_displace[2] = displace*rz;
   }
 
   double energy_after = 0.0;
   for (int i = 0; i < nlocal; i++) {
     if (atom->molecule[i] == translation_molecule) {
       coord[0] = x[i][0] + com_displace[0];
       coord[1] = x[i][1] + com_displace[1];
       coord[2] = x[i][2] + com_displace[2];
       if (!domain->inside_nonperiodic(coord))
         error->one(FLERR,"Fix gcmc put atom outside box");
       energy_after += energy(i,atom->type[i],translation_molecule,coord);
     }
   }
 
   double energy_after_sum = 0.0;
   MPI_Allreduce(&energy_after,&energy_after_sum,1,MPI_DOUBLE,MPI_SUM,world);
 
   if (energy_after_sum < MAXENERGYTEST &&
       random_equal->uniform() <
       exp(beta*(energy_before_sum - energy_after_sum))) {
     for (int i = 0; i < nlocal; i++) {
       if (atom->molecule[i] == translation_molecule) {
         x[i][0] += com_displace[0];
         x[i][1] += com_displace[1];
         x[i][2] += com_displace[2];
       }
     }
     if (triclinic) domain->x2lamda(atom->nlocal);
     domain->pbc();
     comm->exchange();
     atom->nghost = 0;
     comm->borders();
     if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost);
     update_gas_atoms_list();
     ntranslation_successes += 1.0;
   }
 }
 
 /* ----------------------------------------------------------------------
 ------------------------------------------------------------------------- */
 
 void FixGCMC::attempt_molecule_rotation()
 {
   nrotation_attempts += 1.0;
 
   if (ngas == 0) return;
 
   tagint rotation_molecule = pick_random_gas_molecule();
   if (rotation_molecule == -1) return;
 
   double energy_before_sum = molecule_energy(rotation_molecule);
 
   int nlocal = atom->nlocal;
   int *mask = atom->mask;
   for (int i = 0; i < nlocal; i++) {
     if (atom->molecule[i] == rotation_molecule) {
       mask[i] |= molecule_group_bit;
     } else {
       mask[i] &= molecule_group_inversebit;
     }
   }
 
   double com[3];
   com[0] = com[1] = com[2] = 0.0;
   group->xcm(molecule_group,gas_mass,com);
 
   // generate point in unit cube
   // then restrict to unit sphere
 
   double r[3],rotmat[3][3],quat[4];
   double rsq = 1.1;
   while (rsq > 1.0) {
     r[0] = 2.0*random_equal->uniform() - 1.0;
     r[1] = 2.0*random_equal->uniform() - 1.0;
     r[2] = 2.0*random_equal->uniform() - 1.0;
     rsq = MathExtra::dot3(r, r);
   }
 
   double theta = random_equal->uniform() * max_rotation_angle;
   MathExtra::norm3(r);
   MathExtra::axisangle_to_quat(r,theta,quat);
   MathExtra::quat_to_mat(quat,rotmat);
 
   double **x = atom->x;
   imageint *image = atom->image;
   double energy_after = 0.0;
   int n = 0;
   for (int i = 0; i < nlocal; i++) {
     if (mask[i] & molecule_group_bit) {
       double xtmp[3];
       domain->unmap(x[i],image[i],xtmp);
       xtmp[0] -= com[0];
       xtmp[1] -= com[1];
       xtmp[2] -= com[2];
       MathExtra::matvec(rotmat,xtmp,atom_coord[n]);
       atom_coord[n][0] += com[0];
       atom_coord[n][1] += com[1];
       atom_coord[n][2] += com[2];
       xtmp[0] = atom_coord[n][0];
       xtmp[1] = atom_coord[n][1];
       xtmp[2] = atom_coord[n][2];
       domain->remap(xtmp);
       if (!domain->inside(xtmp))
         error->one(FLERR,"Fix gcmc put atom outside box");
       energy_after += energy(i,atom->type[i],rotation_molecule,xtmp);
       n++;
     }
   }
 
   double energy_after_sum = 0.0;
   MPI_Allreduce(&energy_after,&energy_after_sum,1,MPI_DOUBLE,MPI_SUM,world);
 
   if (energy_after_sum < MAXENERGYTEST &&
       random_equal->uniform() <
       exp(beta*(energy_before_sum - energy_after_sum))) {
     int n = 0;
     for (int i = 0; i < nlocal; i++) {
       if (mask[i] & molecule_group_bit) {
         image[i] = imagezero;
         x[i][0] = atom_coord[n][0];
         x[i][1] = atom_coord[n][1];
         x[i][2] = atom_coord[n][2];
         domain->remap(x[i],image[i]);
         n++;
       }
     }
     if (triclinic) domain->x2lamda(atom->nlocal);
     domain->pbc();
     comm->exchange();
     atom->nghost = 0;
     comm->borders();
     if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost);
     update_gas_atoms_list();
     nrotation_successes += 1.0;
   }
 }
 
 /* ----------------------------------------------------------------------
 ------------------------------------------------------------------------- */
 
 void FixGCMC::attempt_molecule_deletion()
 {
   ndeletion_attempts += 1.0;
 
   if (ngas == 0) return;
 
   tagint deletion_molecule = pick_random_gas_molecule();
   if (deletion_molecule == -1) return;
 
   double deletion_energy_sum = molecule_energy(deletion_molecule);
 
   if (random_equal->uniform() <
       ngas*exp(beta*deletion_energy_sum)/(zz*volume*natoms_per_molecule)) {
     int i = 0;
     while (i < atom->nlocal) {
       if (atom->molecule[i] == deletion_molecule) {
         atom->avec->copy(atom->nlocal-1,i,1);
         atom->nlocal--;
       } else i++;
     }
     atom->natoms -= natoms_per_molecule;
     if (atom->map_style) atom->map_init();
     atom->nghost = 0;
     if (triclinic) domain->x2lamda(atom->nlocal);
     comm->borders();
     if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost);
     update_gas_atoms_list();
     ndeletion_successes += 1.0;
   }
 }
 
 /* ----------------------------------------------------------------------
 ------------------------------------------------------------------------- */
 
 void FixGCMC::attempt_molecule_insertion()
 {
   double lamda[3];
   ninsertion_attempts += 1.0;
 
   double com_coord[3];
   if (regionflag) {
     int region_attempt = 0;
     com_coord[0] = region_xlo + random_equal->uniform() *
       (region_xhi-region_xlo);
     com_coord[1] = region_ylo + random_equal->uniform() *
       (region_yhi-region_ylo);
     com_coord[2] = region_zlo + random_equal->uniform() *
       (region_zhi-region_zlo);
     while (domain->regions[iregion]->match(com_coord[0],com_coord[1],
                                            com_coord[2]) == 0) {
       com_coord[0] = region_xlo + random_equal->uniform() *
         (region_xhi-region_xlo);
       com_coord[1] = region_ylo + random_equal->uniform() *
         (region_yhi-region_ylo);
       com_coord[2] = region_zlo + random_equal->uniform() *
         (region_zhi-region_zlo);
       region_attempt++;
       if (region_attempt >= max_region_attempts) return;
     }
     if (triclinic) domain->x2lamda(com_coord,lamda);
   } else {
     if (triclinic == 0) {
       com_coord[0] = xlo + random_equal->uniform() * (xhi-xlo);
       com_coord[1] = ylo + random_equal->uniform() * (yhi-ylo);
       com_coord[2] = zlo + random_equal->uniform() * (zhi-zlo);
     } else {
       lamda[0] = random_equal->uniform();
       lamda[1] = random_equal->uniform();
       lamda[2] = random_equal->uniform();
 
       // wasteful, but necessary
 
       if (lamda[0] == 1.0) lamda[0] = 0.0;
       if (lamda[1] == 1.0) lamda[1] = 0.0;
       if (lamda[2] == 1.0) lamda[2] = 0.0;
 
       domain->lamda2x(lamda,com_coord);
     }
   }
 
   // generate point in unit cube
   // then restrict to unit sphere
 
   double r[3],rotmat[3][3],quat[4];
   double rsq = 1.1;
   while (rsq > 1.0) {
     r[0] = 2.0*random_equal->uniform() - 1.0;
     r[1] = 2.0*random_equal->uniform() - 1.0;
     r[2] = 2.0*random_equal->uniform() - 1.0;
     rsq = MathExtra::dot3(r, r);
   }
 
   double theta = random_equal->uniform() * MY_2PI;
   MathExtra::norm3(r);
   MathExtra::axisangle_to_quat(r,theta,quat);
   MathExtra::quat_to_mat(quat,rotmat);
 
   double insertion_energy = 0.0;
   bool procflag[natoms_per_molecule];
 
   for (int i = 0; i < natoms_per_molecule; i++) {
     MathExtra::matvec(rotmat,onemols[imol]->x[i],atom_coord[i]);
     atom_coord[i][0] += com_coord[0];
     atom_coord[i][1] += com_coord[1];
     atom_coord[i][2] += com_coord[2];
 
     // use temporary variable for remapped position
     // so unmapped position is preserved in atom_coord
 
     double xtmp[3];
     xtmp[0] = atom_coord[i][0];
     xtmp[1] = atom_coord[i][1];
     xtmp[2] = atom_coord[i][2];
     domain->remap(xtmp);
     if (!domain->inside(xtmp))
       error->one(FLERR,"Fix gcmc put atom outside box");
 
     procflag[i] = false;
     if (triclinic == 0) {
       if (xtmp[0] >= sublo[0] && xtmp[0] < subhi[0] &&
           xtmp[1] >= sublo[1] && xtmp[1] < subhi[1] &&
           xtmp[2] >= sublo[2] && xtmp[2] < subhi[2]) procflag[i] = true;
     } else {
       domain->x2lamda(xtmp,lamda);
       if (lamda[0] >= sublo[0] && lamda[0] < subhi[0] &&
           lamda[1] >= sublo[1] && lamda[1] < subhi[1] &&
           lamda[2] >= sublo[2] && lamda[2] < subhi[2]) procflag[i] = true;
     }
 
     if (procflag[i]) {
       int ii = -1;
       if (onemols[imol]->qflag == 1) {
         ii = atom->nlocal + atom->nghost;
         if (ii >= atom->nmax) atom->avec->grow(0);
         atom->q[ii] = onemols[imol]->q[i];
       }
       insertion_energy += energy(ii,onemols[imol]->type[i],-1,xtmp);
     }
   }
 
   double insertion_energy_sum = 0.0;
   MPI_Allreduce(&insertion_energy,&insertion_energy_sum,1,
                 MPI_DOUBLE,MPI_SUM,world);
 
   if (insertion_energy_sum < MAXENERGYTEST &&
       random_equal->uniform() < zz*volume*natoms_per_molecule*
       exp(-beta*insertion_energy_sum)/(ngas + natoms_per_molecule)) {
       
     tagint maxmol = 0;
     for (int i = 0; i < atom->nlocal; i++) maxmol = MAX(maxmol,atom->molecule[i]);
     tagint maxmol_all;
     MPI_Allreduce(&maxmol,&maxmol_all,1,MPI_LMP_TAGINT,MPI_MAX,world);
     maxmol_all++;
     if (maxmol_all >= MAXTAGINT)
       error->all(FLERR,"Fix gcmc ran out of available molecule IDs");
     
     tagint maxtag = 0;
     for (int i = 0; i < atom->nlocal; i++) maxtag = MAX(maxtag,atom->tag[i]);
     tagint maxtag_all;
     MPI_Allreduce(&maxtag,&maxtag_all,1,MPI_LMP_TAGINT,MPI_MAX,world);
     
     int nlocalprev = atom->nlocal;
     
     double vnew[3];
     vnew[0] = random_equal->gaussian()*sigma;
     vnew[1] = random_equal->gaussian()*sigma;
     vnew[2] = random_equal->gaussian()*sigma;
     
     for (int i = 0; i < natoms_per_molecule; i++) {
       if (procflag[i]) {
         atom->avec->create_atom(onemols[imol]->type[i],atom_coord[i]);
         int m = atom->nlocal - 1;
         
         // add to groups
         // optionally add to type-based groups
         
         atom->mask[m] = groupbitall;
         for (int igroup = 0; igroup < ngrouptypes; igroup++) {
           if (ngcmc_type == grouptypes[igroup])
             atom->mask[m] |= grouptypebits[igroup];
         }
         
         atom->image[m] = imagezero;
         domain->remap(atom->x[m],atom->image[m]);
         atom->molecule[m] = maxmol_all;
         if (maxtag_all+i+1 >= MAXTAGINT)
           error->all(FLERR,"Fix gcmc ran out of available atom IDs");
         atom->tag[m] = maxtag_all + i + 1;
         atom->v[m][0] = vnew[0];
         atom->v[m][1] = vnew[1];
         atom->v[m][2] = vnew[2];
         
         atom->add_molecule_atom(onemols[imol],i,m,maxtag_all);
         modify->create_attribute(m);
       }
     }
-      
-    if (shakeflag)
+
+    // FixRigidSmall::set_molecule stores rigid body attributes
+    // FixShake::set_molecule stores shake info for molecule
+    
+    if (rigidflag)
+      fixrigid->set_molecule(nlocalprev,maxtag_all,imol,com_coord,vnew,quat);
+    else if (shakeflag)
       fixshake->set_molecule(nlocalprev,maxtag_all,imol,com_coord,vnew,quat);
-      
+
     atom->natoms += natoms_per_molecule;
     if (atom->natoms < 0)
       error->all(FLERR,"Too many total atoms");
     atom->nbonds += onemols[imol]->nbonds;
     atom->nangles += onemols[imol]->nangles;
     atom->ndihedrals += onemols[imol]->ndihedrals;
     atom->nimpropers += onemols[imol]->nimpropers;
     if (atom->map_style) atom->map_init();
     atom->nghost = 0;
     if (triclinic) domain->x2lamda(atom->nlocal);
     comm->borders();
     if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost);
     update_gas_atoms_list();
     ninsertion_successes += 1.0;
   }
 }
 
 /* ----------------------------------------------------------------------
 ------------------------------------------------------------------------- */
 
 void FixGCMC::attempt_atomic_translation_full()
 {
   ntranslation_attempts += 1.0;
 
   if (ngas == 0) return;
 
   double energy_before = energy_stored;
 
   int i = pick_random_gas_atom();
 
   double **x = atom->x;
   double xtmp[3];
 
   xtmp[0] = xtmp[1] = xtmp[2] = 0.0;
 
   tagint tmptag = -1;
 
   if (i >= 0) {
 
     double rsq = 1.1;
     double rx,ry,rz;
     rx = ry = rz = 0.0;
     double coord[3];
     while (rsq > 1.0) {
       rx = 2*random_unequal->uniform() - 1.0;
       ry = 2*random_unequal->uniform() - 1.0;
       rz = 2*random_unequal->uniform() - 1.0;
       rsq = rx*rx + ry*ry + rz*rz;
     }
     coord[0] = x[i][0] + displace*rx;
     coord[1] = x[i][1] + displace*ry;
     coord[2] = x[i][2] + displace*rz;
     if (regionflag) {
       while (domain->regions[iregion]->match(coord[0],coord[1],coord[2]) == 0) {
         rsq = 1.1;
         while (rsq > 1.0) {
           rx = 2*random_unequal->uniform() - 1.0;
           ry = 2*random_unequal->uniform() - 1.0;
           rz = 2*random_unequal->uniform() - 1.0;
           rsq = rx*rx + ry*ry + rz*rz;
         }
         coord[0] = x[i][0] + displace*rx;
         coord[1] = x[i][1] + displace*ry;
         coord[2] = x[i][2] + displace*rz;
       }
     }
     if (!domain->inside_nonperiodic(coord))
       error->one(FLERR,"Fix gcmc put atom outside box");
     xtmp[0] = x[i][0];
     xtmp[1] = x[i][1];
     xtmp[2] = x[i][2];
     x[i][0] = coord[0];
     x[i][1] = coord[1];
     x[i][2] = coord[2];
 
     tmptag = atom->tag[i];
   }
 
   double energy_after = energy_full();
 
   if (energy_after < MAXENERGYTEST &&
       random_equal->uniform() <
       exp(beta*(energy_before - energy_after))) {
     energy_stored = energy_after;
     ntranslation_successes += 1.0;
   } else {
     
     tagint tmptag_all;
     MPI_Allreduce(&tmptag,&tmptag_all,1,MPI_LMP_TAGINT,MPI_MAX,world);
     
     double xtmp_all[3];
     MPI_Allreduce(&xtmp,&xtmp_all,3,MPI_DOUBLE,MPI_SUM,world);
     
     for (int i = 0; i < atom->nlocal; i++) {
       if (tmptag_all == atom->tag[i]) {
         x[i][0] = xtmp_all[0];
         x[i][1] = xtmp_all[1];
         x[i][2] = xtmp_all[2];
       }
     }
     energy_stored = energy_before;
   }
   update_gas_atoms_list();
 }
 
 /* ----------------------------------------------------------------------
 ------------------------------------------------------------------------- */
 
 void FixGCMC::attempt_atomic_deletion_full()
 {
   double q_tmp;
   const int q_flag = atom->q_flag;
 
   ndeletion_attempts += 1.0;
 
   if (ngas == 0) return;
 
   double energy_before = energy_stored;
 
   const int i = pick_random_gas_atom();
 
   int tmpmask;
   if (i >= 0) {
     tmpmask = atom->mask[i];
     atom->mask[i] = exclusion_group_bit;
     if (q_flag) {
       q_tmp = atom->q[i];
       atom->q[i] = 0.0;
     }
   }
   if (force->kspace) force->kspace->qsum_qsq();
   double energy_after = energy_full();
 
   if (random_equal->uniform() <
       ngas*exp(beta*(energy_before - energy_after))/(zz*volume)) {
     if (i >= 0) {
       atom->avec->copy(atom->nlocal-1,i,1);
       atom->nlocal--;
     }
     atom->natoms--;
     if (atom->map_style) atom->map_init();
     ndeletion_successes += 1.0;
     energy_stored = energy_after;
   } else {
     if (i >= 0) {
       atom->mask[i] = tmpmask;
       if (q_flag) atom->q[i] = q_tmp;
     }
     if (force->kspace) force->kspace->qsum_qsq();
     energy_stored = energy_before;
   }
   update_gas_atoms_list();
 }
 
 /* ----------------------------------------------------------------------
 ------------------------------------------------------------------------- */
 
 void FixGCMC::attempt_atomic_insertion_full()
 {
   double lamda[3];
   ninsertion_attempts += 1.0;
 
   double energy_before = energy_stored;
 
   double coord[3];
   if (regionflag) {
     int region_attempt = 0;
     coord[0] = region_xlo + random_equal->uniform() * (region_xhi-region_xlo);
     coord[1] = region_ylo + random_equal->uniform() * (region_yhi-region_ylo);
     coord[2] = region_zlo + random_equal->uniform() * (region_zhi-region_zlo);
     while (domain->regions[iregion]->match(coord[0],coord[1],coord[2]) == 0) {
       coord[0] = region_xlo + random_equal->uniform() * (region_xhi-region_xlo);
       coord[1] = region_ylo + random_equal->uniform() * (region_yhi-region_ylo);
       coord[2] = region_zlo + random_equal->uniform() * (region_zhi-region_zlo);
       region_attempt++;
       if (region_attempt >= max_region_attempts) return;
     }
     if (triclinic) domain->x2lamda(coord,lamda);
   } else {
     if (triclinic == 0) {
       coord[0] = xlo + random_equal->uniform() * (xhi-xlo);
       coord[1] = ylo + random_equal->uniform() * (yhi-ylo);
       coord[2] = zlo + random_equal->uniform() * (zhi-zlo);
     } else {
       lamda[0] = random_equal->uniform();
       lamda[1] = random_equal->uniform();
       lamda[2] = random_equal->uniform();
 
       // wasteful, but necessary
 
       if (lamda[0] == 1.0) lamda[0] = 0.0;
       if (lamda[1] == 1.0) lamda[1] = 0.0;
       if (lamda[2] == 1.0) lamda[2] = 0.0;
 
       domain->lamda2x(lamda,coord);
     }
   }
 
   int proc_flag = 0;
   if (triclinic == 0) {
     domain->remap(coord);
     if (!domain->inside(coord))
       error->one(FLERR,"Fix gcmc put atom outside box");
     if (coord[0] >= sublo[0] && coord[0] < subhi[0] &&
         coord[1] >= sublo[1] && coord[1] < subhi[1] &&
         coord[2] >= sublo[2] && coord[2] < subhi[2]) proc_flag = 1;
   } else {
     if (lamda[0] >= sublo[0] && lamda[0] < subhi[0] &&
         lamda[1] >= sublo[1] && lamda[1] < subhi[1] &&
         lamda[2] >= sublo[2] && lamda[2] < subhi[2]) proc_flag = 1;
   }
 
   if (proc_flag) {
     atom->avec->create_atom(ngcmc_type,coord);
     int m = atom->nlocal - 1;
 
     // add to groups
     // optionally add to type-based groups
 
     atom->mask[m] = groupbitall;
     for (int igroup = 0; igroup < ngrouptypes; igroup++) {
       if (ngcmc_type == grouptypes[igroup])
         atom->mask[m] |= grouptypebits[igroup];
     }
 
     atom->v[m][0] = random_unequal->gaussian()*sigma;
     atom->v[m][1] = random_unequal->gaussian()*sigma;
     atom->v[m][2] = random_unequal->gaussian()*sigma;
     if (charge_flag) atom->q[m] = charge;
     modify->create_attribute(m);
   }
 
   atom->natoms++;
   if (atom->tag_enable) {
     atom->tag_extend();
     if (atom->map_style) atom->map_init();
   }
   atom->nghost = 0;
   if (triclinic) domain->x2lamda(atom->nlocal);
   comm->borders();
   if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost);
   if (force->kspace) force->kspace->qsum_qsq();
   double energy_after = energy_full();
 
   if (energy_after < MAXENERGYTEST &&
       random_equal->uniform() <
       zz*volume*exp(beta*(energy_before - energy_after))/(ngas+1)) {
     
     ninsertion_successes += 1.0;
     energy_stored = energy_after;
   } else {
     atom->natoms--;
     if (proc_flag) atom->nlocal--;
     if (force->kspace) force->kspace->qsum_qsq();
     energy_stored = energy_before;
   }
   update_gas_atoms_list();
 }
 
 /* ----------------------------------------------------------------------
 ------------------------------------------------------------------------- */
 
 void FixGCMC::attempt_molecule_translation_full()
 {
   ntranslation_attempts += 1.0;
 
   if (ngas == 0) return;
 
   tagint translation_molecule = pick_random_gas_molecule();
   if (translation_molecule == -1) return;
 
   double energy_before = energy_stored;
 
   double **x = atom->x;
   double rx,ry,rz;
   double com_displace[3],coord[3];
   double rsq = 1.1;
   while (rsq > 1.0) {
     rx = 2*random_equal->uniform() - 1.0;
     ry = 2*random_equal->uniform() - 1.0;
     rz = 2*random_equal->uniform() - 1.0;
     rsq = rx*rx + ry*ry + rz*rz;
   }
   com_displace[0] = displace*rx;
   com_displace[1] = displace*ry;
   com_displace[2] = displace*rz;
 
   int nlocal = atom->nlocal;
   if (regionflag) {
     int *mask = atom->mask;
     for (int i = 0; i < nlocal; i++) {
       if (atom->molecule[i] == translation_molecule) {
         mask[i] |= molecule_group_bit;
       } else {
         mask[i] &= molecule_group_inversebit;
       }
     }
     double com[3];
     com[0] = com[1] = com[2] = 0.0;
     group->xcm(molecule_group,gas_mass,com);
     coord[0] = com[0] + displace*rx;
     coord[1] = com[1] + displace*ry;
     coord[2] = com[2] + displace*rz;
     while (domain->regions[iregion]->match(coord[0],coord[1],coord[2]) == 0) {
       rsq = 1.1;
       while (rsq > 1.0) {
         rx = 2*random_equal->uniform() - 1.0;
         ry = 2*random_equal->uniform() - 1.0;
         rz = 2*random_equal->uniform() - 1.0;
         rsq = rx*rx + ry*ry + rz*rz;
       }
       coord[0] = com[0] + displace*rx;
       coord[1] = com[1] + displace*ry;
       coord[2] = com[2] + displace*rz;
     }
     com_displace[0] = displace*rx;
     com_displace[1] = displace*ry;
     com_displace[2] = displace*rz;
   }
 
   for (int i = 0; i < nlocal; i++) {
     if (atom->molecule[i] == translation_molecule) {
       x[i][0] += com_displace[0];
       x[i][1] += com_displace[1];
       x[i][2] += com_displace[2];
       if (!domain->inside_nonperiodic(x[i]))
         error->one(FLERR,"Fix gcmc put atom outside box");
     }
   }
 
   double energy_after = energy_full();
 
   if (energy_after < MAXENERGYTEST &&
       random_equal->uniform() <
       exp(beta*(energy_before - energy_after))) {
     ntranslation_successes += 1.0;
     energy_stored = energy_after;
   } else {
     energy_stored = energy_before;
     for (int i = 0; i < nlocal; i++) {
       if (atom->molecule[i] == translation_molecule) {
         x[i][0] -= com_displace[0];
         x[i][1] -= com_displace[1];
         x[i][2] -= com_displace[2];
       }
     }
   }
   update_gas_atoms_list();
 }
 
 /* ----------------------------------------------------------------------
 ------------------------------------------------------------------------- */
 
 void FixGCMC::attempt_molecule_rotation_full()
 {
   nrotation_attempts += 1.0;
 
   if (ngas == 0) return;
 
   tagint rotation_molecule = pick_random_gas_molecule();
   if (rotation_molecule == -1) return;
 
   double energy_before = energy_stored;
 
   int nlocal = atom->nlocal;
   int *mask = atom->mask;
   for (int i = 0; i < nlocal; i++) {
     if (atom->molecule[i] == rotation_molecule) {
       mask[i] |= molecule_group_bit;
     } else {
       mask[i] &= molecule_group_inversebit;
     }
   }
 
   double com[3];
   com[0] = com[1] = com[2] = 0.0;
   group->xcm(molecule_group,gas_mass,com);
 
   // generate point in unit cube
   // then restrict to unit sphere
 
   double r[3],rotmat[3][3],quat[4];
   double rsq = 1.1;
   while (rsq > 1.0) {
     r[0] = 2.0*random_equal->uniform() - 1.0;
     r[1] = 2.0*random_equal->uniform() - 1.0;
     r[2] = 2.0*random_equal->uniform() - 1.0;
     rsq = MathExtra::dot3(r, r);
   }
 
   double theta = random_equal->uniform() * max_rotation_angle;
   MathExtra::norm3(r);
   MathExtra::axisangle_to_quat(r,theta,quat);
   MathExtra::quat_to_mat(quat,rotmat);
 
   double **x = atom->x;
   imageint *image = atom->image;
   imageint image_orig[natoms_per_molecule];
   int n = 0;
   for (int i = 0; i < nlocal; i++) {
     if (mask[i] & molecule_group_bit) {
       atom_coord[n][0] = x[i][0];
       atom_coord[n][1] = x[i][1];
       atom_coord[n][2] = x[i][2];
       image_orig[n] = image[i];
       double xtmp[3];
       domain->unmap(x[i],image[i],xtmp);
       xtmp[0] -= com[0];
       xtmp[1] -= com[1];
       xtmp[2] -= com[2];
       MathExtra::matvec(rotmat,xtmp,x[i]);
       x[i][0] += com[0];
       x[i][1] += com[1];
       x[i][2] += com[2];
       image[i] = imagezero;
       domain->remap(x[i],image[i]);
       if (!domain->inside(x[i]))
         error->one(FLERR,"Fix gcmc put atom outside box");
       n++;
     }
   }
 
   double energy_after = energy_full();
 
   if (energy_after < MAXENERGYTEST &&
       random_equal->uniform() <
       exp(beta*(energy_before - energy_after))) {
     nrotation_successes += 1.0;
     energy_stored = energy_after;
   } else {
     energy_stored = energy_before;
     int n = 0;
     for (int i = 0; i < nlocal; i++) {
       if (mask[i] & molecule_group_bit) {
         x[i][0] = atom_coord[n][0];
         x[i][1] = atom_coord[n][1];
         x[i][2] = atom_coord[n][2];
         image[i] = image_orig[n];
         n++;
       }
     }
   }
   update_gas_atoms_list();
 }
 
 /* ----------------------------------------------------------------------
 ------------------------------------------------------------------------- */
 
 void FixGCMC::attempt_molecule_deletion_full()
 {
   ndeletion_attempts += 1.0;
 
   if (ngas == 0) return;
 
   tagint deletion_molecule = pick_random_gas_molecule();
   if (deletion_molecule == -1) return;
 
   double energy_before = energy_stored;
 
   int m = 0;
   double q_tmp[natoms_per_molecule];
   int tmpmask[atom->nlocal];
   for (int i = 0; i < atom->nlocal; i++) {
     if (atom->molecule[i] == deletion_molecule) {
       tmpmask[i] = atom->mask[i];
       atom->mask[i] = exclusion_group_bit;
       toggle_intramolecular(i);
       if (atom->q_flag) {
         q_tmp[m] = atom->q[i];
         m++;
         atom->q[i] = 0.0;
       }
     }
   }
   if (force->kspace) force->kspace->qsum_qsq();
   double energy_after = energy_full();
 
   // energy_before corrected by energy_intra
 
   double deltaphi = ngas*exp(beta*((energy_before - energy_intra) - energy_after))/(zz*volume*natoms_per_molecule);
 
   if (random_equal->uniform() < deltaphi) {
     int i = 0;
     while (i < atom->nlocal) {
       if (atom->molecule[i] == deletion_molecule) {
         atom->avec->copy(atom->nlocal-1,i,1);
         atom->nlocal--;
       } else i++;
     }
     atom->natoms -= natoms_per_molecule;
     if (atom->map_style) atom->map_init();
     ndeletion_successes += 1.0;
     energy_stored = energy_after;
   } else {
     energy_stored = energy_before;
     int m = 0;
     for (int i = 0; i < atom->nlocal; i++) {
       if (atom->molecule[i] == deletion_molecule) {
         atom->mask[i] = tmpmask[i];
         toggle_intramolecular(i);
         if (atom->q_flag) {
           atom->q[i] = q_tmp[m];
           m++;
         }
       }
     }
     if (force->kspace) force->kspace->qsum_qsq();
   }
   update_gas_atoms_list();
 }
 
 /* ----------------------------------------------------------------------
 ------------------------------------------------------------------------- */
 
 void FixGCMC::attempt_molecule_insertion_full()
 {
   double lamda[3];
   ninsertion_attempts += 1.0;
 
   double energy_before = energy_stored;
 
   tagint maxmol = 0;
   for (int i = 0; i < atom->nlocal; i++) maxmol = MAX(maxmol,atom->molecule[i]);
   tagint maxmol_all;
   MPI_Allreduce(&maxmol,&maxmol_all,1,MPI_LMP_TAGINT,MPI_MAX,world);
   maxmol_all++;
   if (maxmol_all >= MAXTAGINT)
     error->all(FLERR,"Fix gcmc ran out of available molecule IDs");
   int insertion_molecule = maxmol_all;
 
   tagint maxtag = 0;
   for (int i = 0; i < atom->nlocal; i++) maxtag = MAX(maxtag,atom->tag[i]);
   tagint maxtag_all;
   MPI_Allreduce(&maxtag,&maxtag_all,1,MPI_LMP_TAGINT,MPI_MAX,world);
 
   int nlocalprev = atom->nlocal;
 
   double com_coord[3];
   if (regionflag) {
     int region_attempt = 0;
     com_coord[0] = region_xlo + random_equal->uniform() *
       (region_xhi-region_xlo);
     com_coord[1] = region_ylo + random_equal->uniform() *
       (region_yhi-region_ylo);
     com_coord[2] = region_zlo + random_equal->uniform() *
       (region_zhi-region_zlo);
     while (domain->regions[iregion]->match(com_coord[0],com_coord[1],
                                            com_coord[2]) == 0) {
       com_coord[0] = region_xlo + random_equal->uniform() *
         (region_xhi-region_xlo);
       com_coord[1] = region_ylo + random_equal->uniform() *
         (region_yhi-region_ylo);
       com_coord[2] = region_zlo + random_equal->uniform() *
         (region_zhi-region_zlo);
       region_attempt++;
       if (region_attempt >= max_region_attempts) return;
     }
     if (triclinic) domain->x2lamda(com_coord,lamda);
   } else {
     if (triclinic == 0) {
       com_coord[0] = xlo + random_equal->uniform() * (xhi-xlo);
       com_coord[1] = ylo + random_equal->uniform() * (yhi-ylo);
       com_coord[2] = zlo + random_equal->uniform() * (zhi-zlo);
     } else {
       lamda[0] = random_equal->uniform();
       lamda[1] = random_equal->uniform();
       lamda[2] = random_equal->uniform();
 
       // wasteful, but necessary
 
       if (lamda[0] == 1.0) lamda[0] = 0.0;
       if (lamda[1] == 1.0) lamda[1] = 0.0;
       if (lamda[2] == 1.0) lamda[2] = 0.0;
 
       domain->lamda2x(lamda,com_coord);
     }
 
   }
 
   // generate point in unit cube
   // then restrict to unit sphere
 
   double r[3],rotmat[3][3],quat[4];
   double rsq = 1.1;
   while (rsq > 1.0) {
     r[0] = 2.0*random_equal->uniform() - 1.0;
     r[1] = 2.0*random_equal->uniform() - 1.0;
     r[2] = 2.0*random_equal->uniform() - 1.0;
     rsq = MathExtra::dot3(r, r);
   }
 
   double theta = random_equal->uniform() * MY_2PI;
   MathExtra::norm3(r);
   MathExtra::axisangle_to_quat(r,theta,quat);
   MathExtra::quat_to_mat(quat,rotmat);
 
   double vnew[3];
   vnew[0] = random_equal->gaussian()*sigma;
   vnew[1] = random_equal->gaussian()*sigma;
   vnew[2] = random_equal->gaussian()*sigma;
 
   for (int i = 0; i < natoms_per_molecule; i++) {
     double xtmp[3];
     MathExtra::matvec(rotmat,onemols[imol]->x[i],xtmp);
     xtmp[0] += com_coord[0];
     xtmp[1] += com_coord[1];
     xtmp[2] += com_coord[2];
 
     // need to adjust image flags in remap()
 
     imageint imagetmp = imagezero;
     domain->remap(xtmp,imagetmp);
     if (!domain->inside(xtmp))
       error->one(FLERR,"Fix gcmc put atom outside box");
 
     int proc_flag = 0;
     if (triclinic == 0) {
       if (xtmp[0] >= sublo[0] && xtmp[0] < subhi[0] &&
           xtmp[1] >= sublo[1] && xtmp[1] < subhi[1] &&
           xtmp[2] >= sublo[2] && xtmp[2] < subhi[2]) proc_flag = 1;
     } else {
       domain->x2lamda(xtmp,lamda);
       if (lamda[0] >= sublo[0] && lamda[0] < subhi[0] &&
           lamda[1] >= sublo[1] && lamda[1] < subhi[1] &&
           lamda[2] >= sublo[2] && lamda[2] < subhi[2]) proc_flag = 1;
     }
 
     if (proc_flag) {
       atom->avec->create_atom(onemols[imol]->type[i],xtmp);
       int m = atom->nlocal - 1;
 
       // add to groups
       // optionally add to type-based groups
 
       atom->mask[m] = groupbitall;
       for (int igroup = 0; igroup < ngrouptypes; igroup++) {
         if (ngcmc_type == grouptypes[igroup])
           atom->mask[m] |= grouptypebits[igroup];
       }
 
       atom->image[m] = imagetmp;
       atom->molecule[m] = insertion_molecule;
       if (maxtag_all+i+1 >= MAXTAGINT)
         error->all(FLERR,"Fix gcmc ran out of available atom IDs");
       atom->tag[m] = maxtag_all + i + 1;
       atom->v[m][0] = vnew[0];
       atom->v[m][1] = vnew[1];
       atom->v[m][2] = vnew[2];
 
       atom->add_molecule_atom(onemols[imol],i,m,maxtag_all);
       modify->create_attribute(m);
     }
   }
 
-  if (shakeflag)
+  // FixRigidSmall::set_molecule stores rigid body attributes
+  // FixShake::set_molecule stores shake info for molecule
+
+  if (rigidflag)
+    fixrigid->set_molecule(nlocalprev,maxtag_all,imol,com_coord,vnew,quat);
+  else if (shakeflag)
     fixshake->set_molecule(nlocalprev,maxtag_all,imol,com_coord,vnew,quat);
 
   atom->natoms += natoms_per_molecule;
   if (atom->natoms < 0)
     error->all(FLERR,"Too many total atoms");
   atom->nbonds += onemols[imol]->nbonds;
   atom->nangles += onemols[imol]->nangles;
   atom->ndihedrals += onemols[imol]->ndihedrals;
   atom->nimpropers += onemols[imol]->nimpropers;
   if (atom->map_style) atom->map_init();
   atom->nghost = 0;
   if (triclinic) domain->x2lamda(atom->nlocal);
   comm->borders();
   if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost);
   if (force->kspace) force->kspace->qsum_qsq();
   double energy_after = energy_full();
 
   // energy_after corrected by energy_intra
 
   double deltaphi = zz*volume*natoms_per_molecule*
     exp(beta*(energy_before - (energy_after - energy_intra)))/(ngas + natoms_per_molecule);
 
   if (energy_after < MAXENERGYTEST &&
       random_equal->uniform() < deltaphi) {
 
     ninsertion_successes += 1.0;
     energy_stored = energy_after;
 
   } else {
 
     atom->nbonds -= onemols[imol]->nbonds;
     atom->nangles -= onemols[imol]->nangles;
     atom->ndihedrals -= onemols[imol]->ndihedrals;
     atom->nimpropers -= onemols[imol]->nimpropers;
     atom->natoms -= natoms_per_molecule;
 
     energy_stored = energy_before;
     int i = 0;
     while (i < atom->nlocal) {
       if (atom->molecule[i] == insertion_molecule) {
         atom->avec->copy(atom->nlocal-1,i,1);
         atom->nlocal--;
       } else i++;
     }
     if (force->kspace) force->kspace->qsum_qsq();
   }
   update_gas_atoms_list();
 }
 
 /* ----------------------------------------------------------------------
    compute particle's interaction energy with the rest of the system
 ------------------------------------------------------------------------- */
 
 double FixGCMC::energy(int i, int itype, tagint imolecule, double *coord)
 {
   double delx,dely,delz,rsq;
 
   double **x = atom->x;
   int *type = atom->type;
   tagint *molecule = atom->molecule;
   int nall = atom->nlocal + atom->nghost;
   pair = force->pair;
   cutsq = force->pair->cutsq;
 
   double fpair = 0.0;
   double factor_coul = 1.0;
   double factor_lj = 1.0;
 
   double total_energy = 0.0;
 
   for (int j = 0; j < nall; j++) {
 
     if (i == j) continue;
     if (mode == MOLECULE)
       if (imolecule == molecule[j]) continue;
 
     delx = coord[0] - x[j][0];
     dely = coord[1] - x[j][1];
     delz = coord[2] - x[j][2];
     rsq = delx*delx + dely*dely + delz*delz;
     int jtype = type[j];
 
     // if overlap check requested, if overlap,
-    // return signal value = MAXENERGYSIGNAL 
+    // return signal value for energy 
 
     if (overlap_flag && rsq < overlap_cutoff)
       return MAXENERGYSIGNAL;
     
     if (rsq < cutsq[itype][jtype])
       total_energy +=
         pair->single(i,j,itype,jtype,rsq,factor_coul,factor_lj,fpair);
   }
 
   return total_energy;
 }
 
 /* ----------------------------------------------------------------------
    compute the energy of the given gas molecule in its current position
    sum across all procs that own atoms of the given molecule
 ------------------------------------------------------------------------- */
 
 double FixGCMC::molecule_energy(tagint gas_molecule_id)
 {
   double mol_energy = 0.0;
   for (int i = 0; i < atom->nlocal; i++)
     if (atom->molecule[i] == gas_molecule_id) {
       mol_energy += energy(i,atom->type[i],gas_molecule_id,atom->x[i]);
     }
 
   double mol_energy_sum = 0.0;
   MPI_Allreduce(&mol_energy,&mol_energy_sum,1,MPI_DOUBLE,MPI_SUM,world);
 
   return mol_energy_sum;
 }
 
 /* ----------------------------------------------------------------------
    compute system potential energy
 ------------------------------------------------------------------------- */
 
 double FixGCMC::energy_full()
 {
   if (triclinic) domain->x2lamda(atom->nlocal);
   domain->pbc();
   comm->exchange();
   atom->nghost = 0;
   comm->borders();
   if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost);
   if (modify->n_pre_neighbor) modify->pre_neighbor();
   neighbor->build();
   int eflag = 1;
   int vflag = 0;
 
   // if overlap check requested, if overlap,
-  // return signal value = MAXENERGYSIGNAL 
+  // return signal value for energy 
 
   if (overlap_flag) {
     double delx,dely,delz,rsq;
     double **x = atom->x;
     tagint *molecule = atom->molecule;
     int nall = atom->nlocal + atom->nghost;
     for (int i = 0; i < atom->nlocal; i++) {
       int imolecule = molecule[i];
       for (int j = i+1; j < nall; j++) {
 
         if (mode == MOLECULE)
           if (imolecule == molecule[j]) continue;
       
         delx = x[i][0] - x[j][0];
         dely = x[i][1] - x[j][1];
         delz = x[i][2] - x[j][2];
         rsq = delx*delx + dely*dely + delz*delz;
       
         if (rsq < overlap_cutoff) return MAXENERGYSIGNAL;
       }
     }
   }
   
   // clear forces so they don't accumulate over multiple
   // calls within fix gcmc timestep, e.g. for fix shake
   
   size_t nbytes = sizeof(double) * (atom->nlocal + atom->nghost);
   if (nbytes) memset(&atom->f[0][0],0,3*nbytes);
 
   if (modify->n_pre_force) modify->pre_force(vflag);
 
   if (force->pair) force->pair->compute(eflag,vflag);
 
   if (atom->molecular) {
     if (force->bond) force->bond->compute(eflag,vflag);
     if (force->angle) force->angle->compute(eflag,vflag);
     if (force->dihedral) force->dihedral->compute(eflag,vflag);
     if (force->improper) force->improper->compute(eflag,vflag);
   }
 
   if (force->kspace) force->kspace->compute(eflag,vflag);
 
   // unlike Verlet, not performing a reverse_comm() or forces here
   // b/c GCMC does not care about forces
   // don't think it will mess up energy due to any post_force() fixes
 
   if (modify->n_post_force) modify->post_force(vflag);
   if (modify->n_end_of_step) modify->end_of_step();
 
   // NOTE: all fixes with THERMO_ENERGY mask set and which
   //   operate at pre_force() or post_force() or end_of_step()
   //   and which user has enable via fix_modify thermo yes,
   //   will contribute to total MC energy via pe->compute_scalar()
 
   update->eflag_global = update->ntimestep;
   double total_energy = c_pe->compute_scalar();
 
   return total_energy;
 }
 
 /* ----------------------------------------------------------------------
 ------------------------------------------------------------------------- */
 
 int FixGCMC::pick_random_gas_atom()
 {
   int i = -1;
   int iwhichglobal = static_cast<int> (ngas*random_equal->uniform());
   if ((iwhichglobal >= ngas_before) &&
       (iwhichglobal < ngas_before + ngas_local)) {
     int iwhichlocal = iwhichglobal - ngas_before;
     i = local_gas_list[iwhichlocal];
   }
 
   return i;
 }
 
 /* ----------------------------------------------------------------------
 ------------------------------------------------------------------------- */
 
 tagint FixGCMC::pick_random_gas_molecule()
 {
   int iwhichglobal = static_cast<int> (ngas*random_equal->uniform());
   tagint gas_molecule_id = 0;
   if ((iwhichglobal >= ngas_before) &&
       (iwhichglobal < ngas_before + ngas_local)) {
     int iwhichlocal = iwhichglobal - ngas_before;
     int i = local_gas_list[iwhichlocal];
     gas_molecule_id = atom->molecule[i];
   }
 
   tagint gas_molecule_id_all = 0;
   MPI_Allreduce(&gas_molecule_id,&gas_molecule_id_all,1,
                 MPI_LMP_TAGINT,MPI_MAX,world);
 
   return gas_molecule_id_all;
 }
 
 /* ----------------------------------------------------------------------
 ------------------------------------------------------------------------- */
 
 void FixGCMC::toggle_intramolecular(int i)
 {
   if (atom->avec->bonds_allow)
     for (int m = 0; m < atom->num_bond[i]; m++)
       atom->bond_type[i][m] = -atom->bond_type[i][m];
 
   if (atom->avec->angles_allow)
     for (int m = 0; m < atom->num_angle[i]; m++)
       atom->angle_type[i][m] = -atom->angle_type[i][m];
 
   if (atom->avec->dihedrals_allow)
     for (int m = 0; m < atom->num_dihedral[i]; m++)
       atom->dihedral_type[i][m] = -atom->dihedral_type[i][m];
 
   if (atom->avec->impropers_allow)
     for (int m = 0; m < atom->num_improper[i]; m++)
       atom->improper_type[i][m] = -atom->improper_type[i][m];
 }
 
 /* ----------------------------------------------------------------------
    update the list of gas atoms
 ------------------------------------------------------------------------- */
 
 void FixGCMC::update_gas_atoms_list()
 {
   int nlocal = atom->nlocal;
   int *mask = atom->mask;
   tagint *molecule = atom->molecule;
   double **x = atom->x;
 
   if (atom->nmax > gcmc_nmax) {
     memory->sfree(local_gas_list);
     gcmc_nmax = atom->nmax;
     local_gas_list = (int *) memory->smalloc(gcmc_nmax*sizeof(int),
      "GCMC:local_gas_list");
   }
 
   ngas_local = 0;
 
   if (regionflag) {
 
     if (mode == MOLECULE) {
 
       tagint maxmol = 0;
       for (int i = 0; i < nlocal; i++) maxmol = MAX(maxmol,molecule[i]);
       tagint maxmol_all;
       MPI_Allreduce(&maxmol,&maxmol_all,1,MPI_LMP_TAGINT,MPI_MAX,world);
       double comx[maxmol_all];
       double comy[maxmol_all];
       double comz[maxmol_all];
       for (int imolecule = 0; imolecule < maxmol_all; imolecule++) {
         for (int i = 0; i < nlocal; i++) {
           if (molecule[i] == imolecule) {
             mask[i] |= molecule_group_bit;
           } else {
             mask[i] &= molecule_group_inversebit;
           }
         }
         double com[3];
         com[0] = com[1] = com[2] = 0.0;
         group->xcm(molecule_group,gas_mass,com);
 
         // remap unwrapped com into periodic box
         
         domain->remap(com);
         comx[imolecule] = com[0];
         comy[imolecule] = com[1];
         comz[imolecule] = com[2];
       }
 
       for (int i = 0; i < nlocal; i++) {
         if (mask[i] & groupbit) {
           if (domain->regions[iregion]->match(comx[molecule[i]],
              comy[molecule[i]],comz[molecule[i]]) == 1) {
             local_gas_list[ngas_local] = i;
             ngas_local++;
           }
         }
       }
 
     } else {
       for (int i = 0; i < nlocal; i++) {
         if (mask[i] & groupbit) {
           if (domain->regions[iregion]->match(x[i][0],x[i][1],x[i][2]) == 1) {
             local_gas_list[ngas_local] = i;
             ngas_local++;
           }
         }
       }
     }
 
   } else {
     for (int i = 0; i < nlocal; i++) {
       if (mask[i] & groupbit) {
         local_gas_list[ngas_local] = i;
         ngas_local++;
       }
     }
   }
 
   MPI_Allreduce(&ngas_local,&ngas,1,MPI_INT,MPI_SUM,world);
   MPI_Scan(&ngas_local,&ngas_before,1,MPI_INT,MPI_SUM,world);
   ngas_before -= ngas_local;
 }
 
 /* ----------------------------------------------------------------------
   return acceptance ratios
 ------------------------------------------------------------------------- */
 
 double FixGCMC::compute_vector(int n)
 {
   if (n == 0) return ntranslation_attempts;
   if (n == 1) return ntranslation_successes;
   if (n == 2) return ninsertion_attempts;
   if (n == 3) return ninsertion_successes;
   if (n == 4) return ndeletion_attempts;
   if (n == 5) return ndeletion_successes;
   if (n == 6) return nrotation_attempts;
   if (n == 7) return nrotation_successes;
   return 0.0;
 }
 
 /* ----------------------------------------------------------------------
    memory usage of local atom-based arrays
 ------------------------------------------------------------------------- */
 
 double FixGCMC::memory_usage()
 {
   double bytes = gcmc_nmax * sizeof(int);
   return bytes;
 }
 
 /* ----------------------------------------------------------------------
    pack entire state of Fix into one write
 ------------------------------------------------------------------------- */
 
 void FixGCMC::write_restart(FILE *fp)
 {
   int n = 0;
   double list[4];
   list[n++] = random_equal->state();
   list[n++] = random_unequal->state();
   list[n++] = next_reneighbor;
 
   if (comm->me == 0) {
     int size = n * sizeof(double);
     fwrite(&size,sizeof(int),1,fp);
     fwrite(list,sizeof(double),n,fp);
   }
 }
 
 /* ----------------------------------------------------------------------
    use state info from restart file to restart the Fix
 ------------------------------------------------------------------------- */
 
 void FixGCMC::restart(char *buf)
 {
   int n = 0;
   double *list = (double *) buf;
 
   seed = static_cast<int> (list[n++]);
   random_equal->reset(seed);
 
   seed = static_cast<int> (list[n++]);
   random_unequal->reset(seed);
 
   next_reneighbor = static_cast<int> (list[n++]);
 }
diff --git a/src/MC/fix_gcmc.h b/src/MC/fix_gcmc.h
index 8261f6e38..9b2184dda 100644
--- a/src/MC/fix_gcmc.h
+++ b/src/MC/fix_gcmc.h
@@ -1,299 +1,299 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef FIX_CLASS
 
 FixStyle(gcmc,FixGCMC)
 
 #else
 
 #ifndef LMP_FIX_GCMC_H
 #define LMP_FIX_GCMC_H
 
 #include <stdio.h>
 #include "fix.h"
 
 namespace LAMMPS_NS {
 
 class FixGCMC : public Fix {
  public:
   FixGCMC(class LAMMPS *, int, char **);
   ~FixGCMC();
   int setmask();
   void init();
   void pre_exchange();
   void attempt_atomic_translation();
   void attempt_atomic_deletion();
   void attempt_atomic_insertion();
   void attempt_molecule_translation();
   void attempt_molecule_rotation();
   void attempt_molecule_deletion();
   void attempt_molecule_insertion();
   void attempt_atomic_translation_full();
   void attempt_atomic_deletion_full();
   void attempt_atomic_insertion_full();
   void attempt_molecule_translation_full();
   void attempt_molecule_rotation_full();
   void attempt_molecule_deletion_full();
   void attempt_molecule_insertion_full();
   double energy(int, int, tagint, double *);
   double molecule_energy(tagint);
   double energy_full();
   int pick_random_gas_atom();
   tagint pick_random_gas_molecule();
   void toggle_intramolecular(int);
   void update_gas_atoms_list();
   double compute_vector(int);
   double memory_usage();
   void write_restart(FILE *);
   void restart(char *);
 
  private:
   int molecule_group,molecule_group_bit;
   int molecule_group_inversebit;
   int exclusion_group,exclusion_group_bit;
   int ngcmc_type,nevery,seed;
   int ncycles,nexchanges,nmcmoves;
   int ngas;                 // # of gas atoms on all procs
   int ngas_local;           // # of gas atoms on this proc
   int ngas_before;          // # of gas atoms on procs < this proc
   int mode;                 // ATOM or MOLECULE
   int regionflag;           // 0 = anywhere in box, 1 = specific region
   int iregion;              // gcmc region
   char *idregion;           // gcmc region id
   bool pressure_flag;       // true if user specified reservoir pressure
   bool charge_flag;         // true if user specified atomic charge
   bool full_flag;           // true if doing full system energy calculations
 
   int natoms_per_molecule;  // number of atoms in each gas molecule
 
   int groupbitall;          // group bitmask for inserted atoms
   int ngroups;              // number of group-ids for inserted atoms
   char** groupstrings;      // list of group-ids for inserted atoms
   int ngrouptypes;          // number of type-based group-ids for inserted atoms
   char** grouptypestrings;  // list of type-based group-ids for inserted atoms
   int* grouptypebits;       // list of type-based group bitmasks
   int* grouptypes;          // list of type-based group types
   double ntranslation_attempts;
   double ntranslation_successes;
   double nrotation_attempts;
   double nrotation_successes;
   double ndeletion_attempts;
   double ndeletion_successes;
   double ninsertion_attempts;
   double ninsertion_successes;
 
   int gcmc_nmax;
   int max_region_attempts;
   double gas_mass;
   double reservoir_temperature;
   double tfac_insert;
   double chemical_potential;
   double displace;
   double max_rotation_angle;
   double beta,zz,sigma,volume;
   double pressure,fugacity_coeff,charge;
   double xlo,xhi,ylo,yhi,zlo,zhi;
   double region_xlo,region_xhi,region_ylo,region_yhi,region_zlo,region_zhi;
   double region_volume;
   double energy_stored;
   double *sublo,*subhi;
   int *local_gas_list;
   double **cutsq;
   double **atom_coord;
   imageint imagezero;
   double overlap_cutoff;
   int overlap_flag;
   
   double energy_intra;
 
   class Pair *pair;
 
   class RanPark *random_equal;
   class RanPark *random_unequal;
 
   class Atom *model_atom;
 
   class Molecule **onemols;
   int imol,nmol;
   double **coords;
   imageint *imageflags;
-  class Fix *fixshake;
-  int shakeflag;
-  char *idshake;
+  class Fix *fixrigid, *fixshake;
+  int rigidflag, shakeflag;
+  char *idrigid, *idshake;
   int triclinic;                         // 0 = orthog box, 1 = triclinic
 
   class Compute *c_pe;
 
   void options(int, char **);
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
 E: Fix gcmc does not (yet) work with atom_style template
 
 Self-explanatory.
 
 E: Fix gcmc region does not support a bounding box
 
 Not all regions represent bounded volumes.  You cannot use
 such a region with the fix gcmc command.
 
 E: Fix gcmc region cannot be dynamic
 
 Only static regions can be used with fix gcmc.
 
 E: Fix gcmc region extends outside simulation box
 
 Self-explanatory.
 
 E: Fix gcmc molecule must have coordinates
 
 The defined molecule does not specify coordinates.
 
 E: Fix gcmc molecule must have atom types
 
 The defined molecule does not specify atom types.
 
 E: Atom type must be zero in fix gcmc mol command
 
 Self-explanatory.
 
 E: Fix gcmc molecule has charges, but atom style does not
 
 Self-explanatory.
 
 E: Fix gcmc molecule template ID must be same as atom_style template ID
 
 When using atom_style template, you cannot insert molecules that are
 not in that template.
 
 E: Fix gcmc atom has charge, but atom style does not
 
 Self-explanatory.
 
 E: Cannot use fix gcmc shake and not molecule
 
 Self-explanatory.
 
 E: Molecule template ID for fix gcmc does not exist
 
 Self-explanatory.
 
 W: Molecule template for fix gcmc has multiple molecules
 
 The fix gcmc command will only create molecules of a single type,
 i.e. the first molecule in the template.
 
 E: Region ID for fix gcmc does not exist
 
 Self-explanatory.
 
 W: Fix gcmc using full_energy option
 
 Fix gcmc has automatically turned on the full_energy option since it
 is required for systems like the one specified by the user. User input
 included one or more of the following: kspace, triclinic, a hybrid
 pair style, an eam pair style, or no "single" function for the pair
 style.
 
 E: Invalid atom type in fix gcmc command
 
 The atom type specified in the gcmc command does not exist.
 
 E: Fix gcmc cannot exchange individual atoms belonging to a molecule
 
 This is an error since you should not delete only one atom of a
 molecule.  The user has specified atomic (non-molecular) gas
 exchanges, but an atom belonging to a molecule could be deleted.
 
 E: All mol IDs should be set for fix gcmc group atoms
 
 The molecule flag is on, yet not all molecule ids in the fix group
 have been set to non-zero positive values by the user. This is an
 error since all atoms in the fix gcmc group are eligible for deletion,
 rotation, and translation and therefore must have valid molecule ids.
 
 E: Fix gcmc molecule command requires that atoms have molecule attributes
 
 Should not choose the gcmc molecule feature if no molecules are being
 simulated. The general molecule flag is off, but gcmc's molecule flag
 is on.
 
 E: Fix gcmc shake fix does not exist
 
 Self-explanatory.
 
 E: Fix gcmc and fix shake not using same molecule template ID
 
 Self-explanatory.
 
 E: Fix gcmc can not currently be used with fix rigid or fix rigid/small
 
 Self-explanatory.
 
 E: Cannot use fix gcmc in a 2d simulation
 
 Fix gcmc is set up to run in 3d only. No 2d simulations with fix gcmc
 are allowed.
 
 E: Could not find fix gcmc exclusion group ID
 
 Self-explanatory.
 
 E: Could not find fix gcmc rotation group ID
 
 Self-explanatory.
 
 E: Illegal fix gcmc gas mass <= 0
 
 The computed mass of the designated gas molecule or atom type was less
 than or equal to zero.
 
 E: Cannot do GCMC on atoms in atom_modify first group
 
 This is a restriction due to the way atoms are organized in a list to
 enable the atom_modify first command.
 
 E: Could not find specified fix gcmc group ID
 
 Self-explanatory.
 
 E: Fix gcmc put atom outside box
 
 This should not normally happen.  Contact the developers.
 
 E: Fix gcmc ran out of available molecule IDs
 
 See the setting for tagint in the src/lmptype.h file.
 
 E: Fix gcmc ran out of available atom IDs
 
 See the setting for tagint in the src/lmptype.h file.
 
 E: Too many total atoms
 
 See the setting for bigint in the src/lmptype.h file.
 
 */
diff --git a/src/MISC/fix_deposit.cpp b/src/MISC/fix_deposit.cpp
index d841482f8..9c1082f81 100644
--- a/src/MISC/fix_deposit.cpp
+++ b/src/MISC/fix_deposit.cpp
@@ -1,871 +1,871 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include "fix_deposit.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "molecule.h"
 #include "force.h"
 #include "update.h"
 #include "modify.h"
 #include "fix.h"
 #include "comm.h"
 #include "domain.h"
 #include "lattice.h"
 #include "region.h"
 #include "random_park.h"
 #include "math_extra.h"
 #include "math_const.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 using namespace FixConst;
 using namespace MathConst;
 
 enum{ATOM,MOLECULE};
 enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED};    // several files
 enum{DIST_UNIFORM,DIST_GAUSSIAN};
 
 #define EPSILON 1.0e6
 
 /* ---------------------------------------------------------------------- */
 
 FixDeposit::FixDeposit(LAMMPS *lmp, int narg, char **arg) :
   Fix(lmp, narg, arg), idregion(NULL), idrigid(NULL),
   idshake(NULL), onemols(NULL), molfrac(NULL), coords(NULL), imageflags(NULL),
   fixrigid(NULL), fixshake(NULL), random(NULL)
 {
   if (narg < 7) error->all(FLERR,"Illegal fix deposit command");
 
   restart_global = 1;
   time_depend = 1;
 
   // required args
 
   ninsert = force->inumeric(FLERR,arg[3]);
   ntype = force->inumeric(FLERR,arg[4]);
   nfreq = force->inumeric(FLERR,arg[5]);
   seed = force->inumeric(FLERR,arg[6]);
 
   if (seed <= 0) error->all(FLERR,"Illegal fix deposit command");
 
   // read options from end of input line
 
   options(narg-7,&arg[7]);
 
   // error check on type
 
   if (mode == ATOM && (ntype <= 0 || ntype > atom->ntypes))
     error->all(FLERR,"Invalid atom type in fix deposit command");
 
   // error checks on region and its extent being inside simulation box
 
   if (iregion == -1) error->all(FLERR,"Must specify a region in fix deposit");
   if (domain->regions[iregion]->bboxflag == 0)
     error->all(FLERR,"Fix deposit region does not support a bounding box");
   if (domain->regions[iregion]->dynamic_check())
     error->all(FLERR,"Fix deposit region cannot be dynamic");
 
   xlo = domain->regions[iregion]->extent_xlo;
   xhi = domain->regions[iregion]->extent_xhi;
   ylo = domain->regions[iregion]->extent_ylo;
   yhi = domain->regions[iregion]->extent_yhi;
   zlo = domain->regions[iregion]->extent_zlo;
   zhi = domain->regions[iregion]->extent_zhi;
 
   if (domain->triclinic == 0) {
     if (xlo < domain->boxlo[0] || xhi > domain->boxhi[0] ||
         ylo < domain->boxlo[1] || yhi > domain->boxhi[1] ||
         zlo < domain->boxlo[2] || zhi > domain->boxhi[2])
       error->all(FLERR,"Deposition region extends outside simulation box");
   } else {
     if (xlo < domain->boxlo_bound[0] || xhi > domain->boxhi_bound[0] ||
         ylo < domain->boxlo_bound[1] || yhi > domain->boxhi_bound[1] ||
         zlo < domain->boxlo_bound[2] || zhi > domain->boxhi_bound[2])
       error->all(FLERR,"Deposition region extends outside simulation box");
   }
 
   // error check and further setup for mode = MOLECULE
 
   if (atom->tag_enable == 0)
     error->all(FLERR,"Cannot use fix_deposit unless atoms have IDs");
 
   if (mode == MOLECULE) {
     for (int i = 0; i < nmol; i++) {
       if (onemols[i]->xflag == 0)
         error->all(FLERR,"Fix deposit molecule must have coordinates");
       if (onemols[i]->typeflag == 0)
         error->all(FLERR,"Fix deposit molecule must have atom types");
       if (ntype+onemols[i]->ntypes <= 0 ||
           ntype+onemols[i]->ntypes > atom->ntypes)
         error->all(FLERR,"Invalid atom type in fix deposit mol command");
 
       if (atom->molecular == 2 && onemols != atom->avec->onemols)
         error->all(FLERR,"Fix deposit molecule template ID must be same "
                    "as atom_style template ID");
       onemols[i]->check_attributes(0);
 
       // fix deposit uses geoemetric center of molecule for insertion
 
       onemols[i]->compute_center();
     }
   }
 
   if (rigidflag && mode == ATOM)
     error->all(FLERR,"Cannot use fix deposit rigid and not molecule");
   if (shakeflag && mode == ATOM)
     error->all(FLERR,"Cannot use fix deposit shake and not molecule");
   if (rigidflag && shakeflag)
     error->all(FLERR,"Cannot use fix deposit rigid and shake");
 
   // setup of coords and imageflags array
 
   if (mode == ATOM) natom_max = 1;
   else {
     natom_max = 0;
     for (int i = 0; i < nmol; i++)
       natom_max = MAX(natom_max,onemols[i]->natoms);
   }
   memory->create(coords,natom_max,3,"deposit:coords");
   memory->create(imageflags,natom_max,"deposit:imageflags");
 
   // setup scaling
 
   double xscale,yscale,zscale;
   if (scaleflag) {
     xscale = domain->lattice->xlattice;
     yscale = domain->lattice->ylattice;
     zscale = domain->lattice->zlattice;
   }
   else xscale = yscale = zscale = 1.0;
 
   // apply scaling to all input parameters with dist/vel units
 
   if (domain->dimension == 2) {
     lo *= yscale;
     hi *= yscale;
     rate *= yscale;
   } else {
     lo *= zscale;
     hi *= zscale;
     rate *= zscale;
   }
   deltasq *= xscale*xscale;
   nearsq *= xscale*xscale;
   vxlo *= xscale;
   vxhi *= xscale;
   vylo *= yscale;
   vyhi *= yscale;
   vzlo *= zscale;
   vzhi *= zscale;
   xmid *= xscale;
   ymid *= yscale;
   zmid *= zscale;
   sigma *= xscale; // same as in region sphere
   tx *= xscale;
   ty *= yscale;
   tz *= zscale;
 
   // find current max atom and molecule IDs if necessary
 
   if (idnext) find_maxid();
 
   // random number generator, same for all procs
 
   random = new RanPark(lmp,seed);
 
   // set up reneighboring
 
   force_reneighbor = 1;
   next_reneighbor = update->ntimestep + 1;
   nfirst = next_reneighbor;
   ninserted = 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 FixDeposit::~FixDeposit()
 {
   delete random;
   delete [] molfrac;
   delete [] idrigid;
   delete [] idshake;
   delete [] idregion;
   memory->destroy(coords);
   memory->destroy(imageflags);
 }
 
 /* ---------------------------------------------------------------------- */
 
 int FixDeposit::setmask()
 {
   int mask = 0;
   mask |= PRE_EXCHANGE;
   return mask;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixDeposit::init()
 {
   // set index and check validity of region
 
   iregion = domain->find_region(idregion);
   if (iregion == -1)
     error->all(FLERR,"Region ID for fix deposit does not exist");
 
   // if rigidflag defined, check for rigid/small fix
   // its molecule template must be same as this one
 
   fixrigid = NULL;
   if (rigidflag) {
     int ifix = modify->find_fix(idrigid);
-    if (ifix < 0) error->all(FLERR,"Fix pour rigid fix does not exist");
+    if (ifix < 0) error->all(FLERR,"Fix deposit rigid fix does not exist");
     fixrigid = modify->fix[ifix];
     int tmp;
     if (onemols != (Molecule **) fixrigid->extract("onemol",tmp))
       error->all(FLERR,
                  "Fix deposit and fix rigid/small not using "
                  "same molecule template ID");
   }
 
   // if shakeflag defined, check for SHAKE fix
   // its molecule template must be same as this one
 
   fixshake = NULL;
   if (shakeflag) {
     int ifix = modify->find_fix(idshake);
     if (ifix < 0) error->all(FLERR,"Fix deposit shake fix does not exist");
     fixshake = modify->fix[ifix];
     int tmp;
     if (onemols != (Molecule **) fixshake->extract("onemol",tmp))
       error->all(FLERR,"Fix deposit and fix shake not using "
                  "same molecule template ID");
   }
 
   // for finite size spherical particles:
   // warn if near < 2 * maxrad of existing and inserted particles
   //   since may lead to overlaps
   // if inserted molecule does not define diameters,
   //   use AtomVecSphere::create_atom() default radius = 0.5
 
   if (atom->radius_flag) {
     double *radius = atom->radius;
     int nlocal = atom->nlocal;
 
     double maxrad = 0.0;
     for (int i = 0; i < nlocal; i++)
       maxrad = MAX(maxrad,radius[i]);
 
     double maxradall;
     MPI_Allreduce(&maxrad,&maxradall,1,MPI_DOUBLE,MPI_MAX,world);
 
     double maxradinsert = 0.0;
     if (mode == MOLECULE) {
       for (int i = 0; i < nmol; i++) {
         if (onemols[i]->radiusflag)
           maxradinsert = MAX(maxradinsert,onemols[i]->maxradius);
         else maxradinsert = MAX(maxradinsert,0.5);
       }
     } else maxradinsert = 0.5;
 
     double separation = MAX(2.0*maxradinsert,maxradall+maxradinsert);
     if (sqrt(nearsq) < separation && comm->me == 0) {
       char str[128];
       sprintf(str,"Fix deposit near setting < possible overlap separation %g",
               separation);
       error->warning(FLERR,str);
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    perform particle insertion
 ------------------------------------------------------------------------- */
 
 void FixDeposit::pre_exchange()
 {
   int i,m,n,nlocalprev,imol,natom,flag,flagall;
   double coord[3],lamda[3],delx,dely,delz,rsq;
   double r[3],vnew[3],rotmat[3][3],quat[4];
   double *newcoord;
 
   // just return if should not be called on this timestep
 
   if (next_reneighbor != update->ntimestep) return;
 
   // clear ghost count and any ghost bonus data internal to AtomVec
   // same logic as beginning of Comm::exchange()
   // do it now b/c inserting atoms will overwrite ghost atoms
 
   atom->nghost = 0;
   atom->avec->clear_bonus();
 
   // compute current offset = bottom of insertion volume
 
   double offset = 0.0;
   if (rateflag) offset = (update->ntimestep - nfirst) * update->dt * rate;
 
   double *sublo,*subhi;
   if (domain->triclinic == 0) {
     sublo = domain->sublo;
     subhi = domain->subhi;
   } else {
     sublo = domain->sublo_lamda;
     subhi = domain->subhi_lamda;
   }
 
   // find current max atom and molecule IDs if necessary
 
   if (!idnext) find_maxid();
 
   // attempt an insertion until successful
 
   int dimension = domain->dimension;
 
   int success = 0;
   int attempt = 0;
   while (attempt < maxattempt) {
     attempt++;
 
     // choose random position for new particle within region
     if (distflag == DIST_UNIFORM) {
       do {
         coord[0] = xlo + random->uniform() * (xhi-xlo);
         coord[1] = ylo + random->uniform() * (yhi-ylo);
         coord[2] = zlo + random->uniform() * (zhi-zlo);
       } while (domain->regions[iregion]->match(coord[0],coord[1],coord[2]) == 0);
     } else if (distflag == DIST_GAUSSIAN) {
       do {
         coord[0] = xmid + random->gaussian() * sigma;
         coord[1] = ymid + random->gaussian() * sigma;
         coord[2] = zmid + random->gaussian() * sigma;
       } while (domain->regions[iregion]->match(coord[0],coord[1],coord[2]) == 0);
     } else error->all(FLERR,"Unknown particle distribution in fix deposit");
 
     // adjust vertical coord by offset
 
     if (dimension == 2) coord[1] += offset;
     else coord[2] += offset;
 
     // if global, reset vertical coord to be lo-hi above highest atom
     // if local, reset vertical coord to be lo-hi above highest "nearby" atom
     // local computation computes lateral distance between 2 particles w/ PBC
     // when done, have final coord of atom or center pt of molecule
 
     if (globalflag || localflag) {
       int dim;
       double max,maxall,delx,dely,delz,rsq;
 
       if (dimension == 2) {
         dim = 1;
         max = domain->boxlo[1];
       } else {
         dim = 2;
         max = domain->boxlo[2];
       }
 
       double **x = atom->x;
       int nlocal = atom->nlocal;
       for (i = 0; i < nlocal; i++) {
         if (localflag) {
           delx = coord[0] - x[i][0];
           dely = coord[1] - x[i][1];
           delz = 0.0;
           domain->minimum_image(delx,dely,delz);
           if (dimension == 2) rsq = delx*delx;
           else rsq = delx*delx + dely*dely;
           if (rsq > deltasq) continue;
         }
         if (x[i][dim] > max) max = x[i][dim];
       }
 
       MPI_Allreduce(&max,&maxall,1,MPI_DOUBLE,MPI_MAX,world);
       if (dimension == 2)
         coord[1] = maxall + lo + random->uniform()*(hi-lo);
       else
         coord[2] = maxall + lo + random->uniform()*(hi-lo);
     }
 
     // coords = coords of all atoms
     // for molecule, perform random rotation around center pt
     // apply PBC so final coords are inside box
     // also modify image flags due to PBC
 
     if (mode == ATOM) {
       natom = 1;
       coords[0][0] = coord[0];
       coords[0][1] = coord[1];
       coords[0][2] = coord[2];
       imageflags[0] = ((imageint) IMGMAX << IMG2BITS) |
         ((imageint) IMGMAX << IMGBITS) | IMGMAX;
     } else {
       double rng = random->uniform();
       imol = 0;
       while (rng > molfrac[imol]) imol++;
       natom = onemols[imol]->natoms;
       if (dimension == 3) {
         r[0] = random->uniform() - 0.5;
         r[1] = random->uniform() - 0.5;
         r[2] = random->uniform() - 0.5;
       } else {
         r[0] = r[1] = 0.0;
         r[2] = 1.0;
       }
       double theta = random->uniform() * MY_2PI;
       MathExtra::norm3(r);
       MathExtra::axisangle_to_quat(r,theta,quat);
       MathExtra::quat_to_mat(quat,rotmat);
       for (i = 0; i < natom; i++) {
         MathExtra::matvec(rotmat,onemols[imol]->dx[i],coords[i]);
         coords[i][0] += coord[0];
         coords[i][1] += coord[1];
         coords[i][2] += coord[2];
 
         imageflags[i] = ((imageint) IMGMAX << IMG2BITS) |
           ((imageint) IMGMAX << IMGBITS) | IMGMAX;
         domain->remap(coords[i],imageflags[i]);
       }
     }
 
     // check distance between any existing atom and any inserted atom
     // if less than near, try again
     // use minimum_image() to account for PBC
 
     double **x = atom->x;
     int nlocal = atom->nlocal;
 
     flag = 0;
     for (m = 0; m < natom; m++) {
       for (i = 0; i < nlocal; i++) {
         delx = coords[m][0] - x[i][0];
         dely = coords[m][1] - x[i][1];
         delz = coords[m][2] - x[i][2];
         domain->minimum_image(delx,dely,delz);
         rsq = delx*delx + dely*dely + delz*delz;
         if (rsq < nearsq) flag = 1;
       }
     }
     MPI_Allreduce(&flag,&flagall,1,MPI_INT,MPI_MAX,world);
     if (flagall) continue;
 
     // proceed with insertion
 
     nlocalprev = atom->nlocal;
 
     // choose random velocity for new particle
     // used for every atom in molecule
 
     vnew[0] = vxlo + random->uniform() * (vxhi-vxlo);
     vnew[1] = vylo + random->uniform() * (vyhi-vylo);
     vnew[2] = vzlo + random->uniform() * (vzhi-vzlo);
 
     // if target specified, change velocity vector accordingly
 
     if (targetflag) {
       double vel = sqrt(vnew[0]*vnew[0] + vnew[1]*vnew[1] + vnew[2]*vnew[2]);
       delx = tx - coord[0];
       dely = ty - coord[1];
       delz = tz - coord[2];
       double rsq = delx*delx + dely*dely + delz*delz;
       if (rsq > 0.0) {
         double rinv = sqrt(1.0/rsq);
         vnew[0] = delx*rinv*vel;
         vnew[1] = dely*rinv*vel;
         vnew[2] = delz*rinv*vel;
       }
     }
 
     // check if new atoms are in my sub-box or above it if I am highest proc
     // if so, add atom to my list via create_atom()
     // initialize additional info about the atoms
     // set group mask to "all" plus fix group
 
     for (m = 0; m < natom; m++) {
       if (domain->triclinic) {
         domain->x2lamda(coords[m],lamda);
         newcoord = lamda;
       } else newcoord = coords[m];
 
       flag = 0;
       if (newcoord[0] >= sublo[0] && newcoord[0] < subhi[0] &&
           newcoord[1] >= sublo[1] && newcoord[1] < subhi[1] &&
           newcoord[2] >= sublo[2] && newcoord[2] < subhi[2]) flag = 1;
       else if (dimension == 3 && newcoord[2] >= domain->boxhi[2]) {
         if (comm->layout != LAYOUT_TILED) {
           if (comm->myloc[2] == comm->procgrid[2]-1 &&
               newcoord[0] >= sublo[0] && newcoord[0] < subhi[0] &&
               newcoord[1] >= sublo[1] && newcoord[1] < subhi[1]) flag = 1;
         } else {
           if (comm->mysplit[2][1] == 1.0 &&
               newcoord[0] >= sublo[0] && newcoord[0] < subhi[0] &&
               newcoord[1] >= sublo[1] && newcoord[1] < subhi[1]) flag = 1;
         }
       } else if (dimension == 2 && newcoord[1] >= domain->boxhi[1]) {
         if (comm->layout != LAYOUT_TILED) {
           if (comm->myloc[1] == comm->procgrid[1]-1 &&
               newcoord[0] >= sublo[0] && newcoord[0] < subhi[0]) flag = 1;
         } else {
           if (comm->mysplit[1][1] == 1.0 &&
               newcoord[0] >= sublo[0] && newcoord[0] < subhi[0]) flag = 1;
         }
       }
 
       if (flag) {
         if (mode == ATOM) atom->avec->create_atom(ntype,coords[m]);
         else atom->avec->create_atom(ntype+onemols[imol]->type[m],coords[m]);
         n = atom->nlocal - 1;
         atom->tag[n] = maxtag_all + m+1;
         if (mode == MOLECULE) {
           if (atom->molecule_flag) atom->molecule[n] = maxmol_all+1;
           if (atom->molecular == 2) {
             atom->molindex[n] = 0;
             atom->molatom[n] = m;
           }
         }
         atom->mask[n] = 1 | groupbit;
         atom->image[n] = imageflags[m];
         atom->v[n][0] = vnew[0];
         atom->v[n][1] = vnew[1];
         atom->v[n][2] = vnew[2];
         if (mode == MOLECULE) {
 	  onemols[imol]->quat_external = quat;
           atom->add_molecule_atom(onemols[imol],m,n,maxtag_all);
 	}
         modify->create_attribute(n);
       }
     }
 
     // FixRigidSmall::set_molecule stores rigid body attributes
     //   coord is new position of geometric center of mol, not COM
     // FixShake::set_molecule stores shake info for molecule
 
     if (rigidflag)
       fixrigid->set_molecule(nlocalprev,maxtag_all,imol,coord,vnew,quat);
     else if (shakeflag)
       fixshake->set_molecule(nlocalprev,maxtag_all,imol,coord,vnew,quat);
 
     // old code: unsuccessful if no proc performed insertion of an atom
     // don't think that check is necessary
     // if get this far, should always be succesful
     // would be hard to undo partial insertion for a molecule
     // better to check how many atoms could be inserted (w/out inserting)
     //   then sum to insure all are inserted, before doing actual insertion
     // MPI_Allreduce(&flag,&success,1,MPI_INT,MPI_MAX,world);
 
     success = 1;
     break;
   }
 
   // warn if not successful b/c too many attempts
 
   if (!success && comm->me == 0)
     error->warning(FLERR,"Particle deposition was unsuccessful",0);
 
   // reset global natoms,nbonds,etc
   // increment maxtag_all and maxmol_all if necessary
   // if global map exists, reset it now instead of waiting for comm
   //   since other pre-exchange fixes may use it
   //   invoke map_init() b/c atom count has grown
 
   if (success) {
     atom->natoms += natom;
     if (atom->natoms < 0)
       error->all(FLERR,"Too many total atoms");
     if (mode == MOLECULE) {
       atom->nbonds += onemols[imol]->nbonds;
       atom->nangles += onemols[imol]->nangles;
       atom->ndihedrals += onemols[imol]->ndihedrals;
       atom->nimpropers += onemols[imol]->nimpropers;
     }
     maxtag_all += natom;
     if (maxtag_all >= MAXTAGINT)
       error->all(FLERR,"New atom IDs exceed maximum allowed ID");
     if (mode == MOLECULE && atom->molecule_flag) maxmol_all++;
     if (atom->map_style) {
       atom->map_init();
       atom->map_set();
     }
   }
 
   // next timestep to insert
   // next_reneighbor = 0 if done
 
   if (success) ninserted++;
   if (ninserted < ninsert) next_reneighbor += nfreq;
   else next_reneighbor = 0;
 }
 
 /* ----------------------------------------------------------------------
    maxtag_all = current max atom ID for all atoms
    maxmol_all = current max molecule ID for all atoms
 ------------------------------------------------------------------------- */
 
 void FixDeposit::find_maxid()
 {
   tagint *tag = atom->tag;
   tagint *molecule = atom->molecule;
   int nlocal = atom->nlocal;
 
   tagint max = 0;
   for (int i = 0; i < nlocal; i++) max = MAX(max,tag[i]);
   MPI_Allreduce(&max,&maxtag_all,1,MPI_LMP_TAGINT,MPI_MAX,world);
 
   if (mode == MOLECULE && molecule) {
     max = 0;
     for (int i = 0; i < nlocal; i++) max = MAX(max,molecule[i]);
     MPI_Allreduce(&max,&maxmol_all,1,MPI_LMP_TAGINT,MPI_MAX,world);
   }
 }
 
 /* ----------------------------------------------------------------------
    parse optional parameters at end of input line
 ------------------------------------------------------------------------- */
 
 void FixDeposit::options(int narg, char **arg)
 {
   // defaults
 
   iregion = -1;
   idregion = NULL;
   mode = ATOM;
   molfrac = NULL;
   rigidflag = 0;
   idrigid = NULL;
   shakeflag = 0;
   idshake = NULL;
   idnext = 0;
   globalflag = localflag = 0;
   lo = hi = deltasq = 0.0;
   nearsq = 0.0;
   maxattempt = 10;
   rateflag = 0;
   vxlo = vxhi = vylo = vyhi = vzlo = vzhi = 0.0;
   distflag = DIST_UNIFORM;
   sigma = 1.0;
   xmid = ymid = zmid = 0.0;
   scaleflag = 1;
   targetflag = 0;
 
   int iarg = 0;
   while (iarg < narg) {
     if (strcmp(arg[iarg],"region") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix deposit command");
       iregion = domain->find_region(arg[iarg+1]);
       if (iregion == -1)
         error->all(FLERR,"Region ID for fix deposit does not exist");
       int n = strlen(arg[iarg+1]) + 1;
       idregion = new char[n];
       strcpy(idregion,arg[iarg+1]);
       iarg += 2;
 
     } else if (strcmp(arg[iarg],"mol") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix deposit command");
       int imol = atom->find_molecule(arg[iarg+1]);
       if (imol == -1)
         error->all(FLERR,"Molecule template ID for fix deposit does not exist");
       mode = MOLECULE;
       onemols = &atom->molecules[imol];
       nmol = onemols[0]->nset;
       delete [] molfrac;
       molfrac = new double[nmol];
       molfrac[0] = 1.0/nmol;
       for (int i = 1; i < nmol-1; i++) molfrac[i] = molfrac[i-1] + 1.0/nmol;
       molfrac[nmol-1] = 1.0;
       iarg += 2;
     } else if (strcmp(arg[iarg],"molfrac") == 0) {
       if (mode != MOLECULE) error->all(FLERR,"Illegal fix deposit command");
       if (iarg+nmol+1 > narg) error->all(FLERR,"Illegal fix deposit command");
       molfrac[0] = force->numeric(FLERR,arg[iarg+1]);
       for (int i = 1; i < nmol; i++)
         molfrac[i] = molfrac[i-1] + force->numeric(FLERR,arg[iarg+i+1]);
       if (molfrac[nmol-1] < 1.0-EPSILON || molfrac[nmol-1] > 1.0+EPSILON)
         error->all(FLERR,"Illegal fix deposit command");
       molfrac[nmol-1] = 1.0;
       iarg += nmol+1;
 
     } else if (strcmp(arg[iarg],"rigid") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix deposit command");
       int n = strlen(arg[iarg+1]) + 1;
       delete [] idrigid;
       idrigid = new char[n];
       strcpy(idrigid,arg[iarg+1]);
       rigidflag = 1;
       iarg += 2;
     } else if (strcmp(arg[iarg],"shake") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix deposit command");
       int n = strlen(arg[iarg+1]) + 1;
       delete [] idshake;
       idshake = new char[n];
       strcpy(idshake,arg[iarg+1]);
       shakeflag = 1;
       iarg += 2;
 
     } else if (strcmp(arg[iarg],"id") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix deposit command");
       if (strcmp(arg[iarg+1],"max") == 0) idnext = 0;
       else if (strcmp(arg[iarg+1],"next") == 0) idnext = 1;
       else error->all(FLERR,"Illegal fix deposit command");
       iarg += 2;
     } else if (strcmp(arg[iarg],"global") == 0) {
       if (iarg+3 > narg) error->all(FLERR,"Illegal fix deposit command");
       globalflag = 1;
       localflag = 0;
       lo = force->numeric(FLERR,arg[iarg+1]);
       hi = force->numeric(FLERR,arg[iarg+2]);
       iarg += 3;
     } else if (strcmp(arg[iarg],"local") == 0) {
       if (iarg+4 > narg) error->all(FLERR,"Illegal fix deposit command");
       localflag = 1;
       globalflag = 0;
       lo = force->numeric(FLERR,arg[iarg+1]);
       hi = force->numeric(FLERR,arg[iarg+2]);
       deltasq = force->numeric(FLERR,arg[iarg+3]) *
         force->numeric(FLERR,arg[iarg+3]);
       iarg += 4;
 
     } else if (strcmp(arg[iarg],"near") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix deposit command");
       nearsq = force->numeric(FLERR,arg[iarg+1]) *
         force->numeric(FLERR,arg[iarg+1]);
       iarg += 2;
     } else if (strcmp(arg[iarg],"attempt") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix deposit command");
       maxattempt = force->inumeric(FLERR,arg[iarg+1]);
       iarg += 2;
     } else if (strcmp(arg[iarg],"rate") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix deposit command");
       rateflag = 1;
       rate = force->numeric(FLERR,arg[iarg+1]);
       iarg += 2;
     } else if (strcmp(arg[iarg],"vx") == 0) {
       if (iarg+3 > narg) error->all(FLERR,"Illegal fix deposit command");
       vxlo = force->numeric(FLERR,arg[iarg+1]);
       vxhi = force->numeric(FLERR,arg[iarg+2]);
       iarg += 3;
     } else if (strcmp(arg[iarg],"vy") == 0) {
       if (iarg+3 > narg) error->all(FLERR,"Illegal fix deposit command");
       vylo = force->numeric(FLERR,arg[iarg+1]);
       vyhi = force->numeric(FLERR,arg[iarg+2]);
       iarg += 3;
     } else if (strcmp(arg[iarg],"vz") == 0) {
       if (iarg+3 > narg) error->all(FLERR,"Illegal fix deposit command");
       vzlo = force->numeric(FLERR,arg[iarg+1]);
       vzhi = force->numeric(FLERR,arg[iarg+2]);
       iarg += 3;
     } else if (strcmp(arg[iarg],"units") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix deposit command");
       if (strcmp(arg[iarg+1],"box") == 0) scaleflag = 0;
       else if (strcmp(arg[iarg+1],"lattice") == 0) scaleflag = 1;
       else error->all(FLERR,"Illegal fix deposit command");
       iarg += 2;
     } else if (strcmp(arg[iarg],"gaussian") == 0) {
       if (iarg+5 > narg) error->all(FLERR,"Illegal fix deposit command");
       xmid = force->numeric(FLERR,arg[iarg+1]);
       ymid = force->numeric(FLERR,arg[iarg+2]);
       zmid = force->numeric(FLERR,arg[iarg+3]);
       sigma = force->numeric(FLERR,arg[iarg+4]);
       distflag = DIST_GAUSSIAN;
       iarg += 5;
     } else if (strcmp(arg[iarg],"target") == 0) {
       if (iarg+4 > narg) error->all(FLERR,"Illegal fix deposit command");
       tx = force->numeric(FLERR,arg[iarg+1]);
       ty = force->numeric(FLERR,arg[iarg+2]);
       tz = force->numeric(FLERR,arg[iarg+3]);
       targetflag = 1;
       iarg += 4;
     } else error->all(FLERR,"Illegal fix deposit command");
   }
 }
 
 /* ----------------------------------------------------------------------
    pack entire state of Fix into one write
 ------------------------------------------------------------------------- */
 
 void FixDeposit::write_restart(FILE *fp)
 {
   int n = 0;
   double list[4];
   list[n++] = random->state();
   list[n++] = ninserted;
   list[n++] = nfirst;
   list[n++] = next_reneighbor;
 
   if (comm->me == 0) {
     int size = n * sizeof(double);
     fwrite(&size,sizeof(int),1,fp);
     fwrite(list,sizeof(double),n,fp);
   }
 }
 
 /* ----------------------------------------------------------------------
    use state info from restart file to restart the Fix
 ------------------------------------------------------------------------- */
 
 void FixDeposit::restart(char *buf)
 {
   int n = 0;
   double *list = (double *) buf;
 
   seed = static_cast<int> (list[n++]);
   ninserted = static_cast<int> (list[n++]);
   nfirst = static_cast<int> (list[n++]);
   next_reneighbor = static_cast<int> (list[n++]);
 
   random->reset(seed);
 }
 
 /* ----------------------------------------------------------------------
    extract particle radius for atom type = itype
 ------------------------------------------------------------------------- */
 
 void *FixDeposit::extract(const char *str, int &itype)
 {
   if (strcmp(str,"radius") == 0) {
     if (mode == ATOM) {
       if (itype == ntype) oneradius = 0.5;
       else oneradius = 0.0;
 
     } else {
 
       // loop over onemols molecules
       // skip a molecule with no atoms as large as itype
 
       oneradius = 0.0;
       for (int i = 0; i < nmol; i++) {
         if (itype > ntype+onemols[i]->ntypes) continue;
         double *radius = onemols[i]->radius;
         int *type = onemols[i]->type;
         int natoms = onemols[i]->natoms;
 
         // check radii of atoms in Molecule with matching types
         // default to 0.5, if radii not defined in Molecule
         //   same as atom->avec->create_atom(), invoked in pre_exchange()
 
         for (int i = 0; i < natoms; i++)
           if (type[i]+ntype == itype) {
             if (radius) oneradius = MAX(oneradius,radius[i]);
             else oneradius = MAX(oneradius,0.5);
           }
       }
     }
     itype = 0;
     return &oneradius;
   }
 
   return NULL;
 }
diff --git a/src/RIGID/fix_rigid_nh_small.cpp b/src/RIGID/fix_rigid_nh_small.cpp
index b08c80b62..199c04bd9 100644
--- a/src/RIGID/fix_rigid_nh_small.cpp
+++ b/src/RIGID/fix_rigid_nh_small.cpp
@@ -1,1511 +1,1517 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Trung Dac Nguyen (ORNL)
    references: Kamberaj et al., J. Chem. Phys. 122, 224114 (2005)
                Miller et al., J Chem Phys. 116, 8649-8659 (2002)
 ------------------------------------------------------------------------- */
 
 #include <math.h>
 #include <stdio.h>
 #include <string.h>
 #include "fix_rigid_nh_small.h"
 #include "math_extra.h"
 #include "atom.h"
 #include "compute.h"
 #include "domain.h"
 #include "update.h"
 #include "modify.h"
 #include "fix_deform.h"
 #include "group.h"
 #include "comm.h"
 #include "force.h"
 #include "kspace.h"
 #include "output.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 using namespace FixConst;
 using namespace MathExtra;
 
 enum{NONE,XYZ,XY,YZ,XZ};     // same as in FixRigid
 enum{ISO,ANISO,TRICLINIC};   // same as in FixRigid
 
 #define EPSILON 1.0e-7
 
 enum{FULL_BODY,INITIAL,FINAL,FORCE_TORQUE,VCM_ANGMOM,XCM_MASS,ITENSOR,DOF};
 
 /* ---------------------------------------------------------------------- */
 
 FixRigidNHSmall::FixRigidNHSmall(LAMMPS *lmp, int narg, char **arg) :
   FixRigidSmall(lmp, narg, arg), w(NULL), wdti1(NULL), 
   wdti2(NULL), wdti4(NULL), q_t(NULL), q_r(NULL), eta_t(NULL), 
   eta_r(NULL), eta_dot_t(NULL), eta_dot_r(NULL), f_eta_t(NULL), 
   f_eta_r(NULL), q_b(NULL), eta_b(NULL), eta_dot_b(NULL), 
   f_eta_b(NULL), rfix(NULL), id_temp(NULL), id_press(NULL), 
   temperature(NULL), pressure(NULL)
 {
   // error checks
 
   if ((p_flag[0] == 1 && p_period[0] <= 0.0) ||
       (p_flag[1] == 1 && p_period[1] <= 0.0) ||
       (p_flag[2] == 1 && p_period[2] <= 0.0))
     error->all(FLERR,"Fix rigid/small npt/nph period must be > 0.0");
 
   dimension = domain->dimension;
 
   if (dimension == 2 && p_flag[2])
     error->all(FLERR,"Invalid fix rigid/small npt/nph command "
                "for a 2d simulation");
   if (dimension == 2 && (pcouple == YZ || pcouple == XZ))
     error->all(FLERR,"Invalid fix rigid/small npt/nph command "
                "for a 2d simulation");
 
   if (pcouple == XYZ && (p_flag[0] == 0 || p_flag[1] == 0))
     error->all(FLERR,"Invalid fix rigid/small npt/nph command "
                "pressure settings");
   if (pcouple == XYZ && dimension == 3 && p_flag[2] == 0)
     error->all(FLERR,"Invalid fix rigid/small npt/nph command "
                "pressure settings");
   if (pcouple == XY && (p_flag[0] == 0 || p_flag[1] == 0))
     error->all(FLERR,"Invalid fix rigid/small npt/nph command "
                "pressure settings");
   if (pcouple == YZ && (p_flag[1] == 0 || p_flag[2] == 0))
     error->all(FLERR,"Invalid fix rigid/small npt/nph command "
                "pressure settings");
   if (pcouple == XZ && (p_flag[0] == 0 || p_flag[2] == 0))
     error->all(FLERR,"Invalid fix rigid/small npt/nph command "
                "pressure settings");
 
   // require periodicity in tensile dimension
 
   if (p_flag[0] && domain->xperiodic == 0)
     error->all(FLERR,
                "Cannot use fix rigid/small npt/nph on a "
                "non-periodic dimension");
   if (p_flag[1] && domain->yperiodic == 0)
     error->all(FLERR,
                "Cannot use fix rigid/small npt/nph on a "
                "non-periodic dimension");
   if (p_flag[2] && domain->zperiodic == 0)
     error->all(FLERR,
                "Cannot use fix rigid/small npt/nph on a "
                "non-periodic dimension");
 
   if (pcouple == XYZ && dimension == 3 &&
       (p_start[0] != p_start[1] || p_start[0] != p_start[2] ||
        p_stop[0] != p_stop[1] || p_stop[0] != p_stop[2] ||
        p_period[0] != p_period[1] || p_period[0] != p_period[2]))
     error->all(FLERR,"Invalid fix rigid/small npt/nph command pressure settings");
   if (pcouple == XYZ && dimension == 2 &&
       (p_start[0] != p_start[1] || p_stop[0] != p_stop[1] ||
        p_period[0] != p_period[1]))
     error->all(FLERR,"Invalid fix rigid/small npt/nph command pressure settings");
   if (pcouple == XY &&
       (p_start[0] != p_start[1] || p_stop[0] != p_stop[1] ||
        p_period[0] != p_period[1]))
     error->all(FLERR,"Invalid fix rigid/small npt/nph command pressure settings");
   if (pcouple == YZ &&
       (p_start[1] != p_start[2] || p_stop[1] != p_stop[2] ||
        p_period[1] != p_period[2]))
     error->all(FLERR,"Invalid fix rigid/small npt/nph command pressure settings");
   if (pcouple == XZ &&
       (p_start[0] != p_start[2] || p_stop[0] != p_stop[2] ||
        p_period[0] != p_period[2]))
     error->all(FLERR,"Invalid fix rigid/small npt/nph command pressure settings");
 
   if ((tstat_flag && t_period <= 0.0) ||
       (p_flag[0] && p_period[0] <= 0.0) ||
       (p_flag[1] && p_period[1] <= 0.0) ||
       (p_flag[2] && p_period[2] <= 0.0))
     error->all(FLERR,"Fix rigid/small nvt/npt/nph damping parameters "
                "must be > 0.0");
 
   // memory allocation and initialization
 
   if (tstat_flag || pstat_flag) {
     allocate_chain();
     allocate_order();
   }
 
   if (tstat_flag) {
     eta_t[0] = eta_r[0] = 0.0;
     eta_dot_t[0] = eta_dot_r[0] = 0.0;
     f_eta_t[0] = f_eta_r[0] = 0.0;
 
     for (int i = 1; i < t_chain; i++) {
       eta_t[i] = eta_r[i] = 0.0;
       eta_dot_t[i] = eta_dot_r[i] = 0.0;
     }
   }
 
   if (pstat_flag) {
     epsilon_dot[0] = epsilon_dot[1] = epsilon_dot[2] = 0.0;
     eta_b[0] = eta_dot_b[0] = f_eta_b[0] = 0.0;
     for (int i = 1; i < p_chain; i++)
       eta_b[i] = eta_dot_b[i] = 0.0;
   }
 
   // rigid body pointers
 
   nrigidfix = 0;
   rfix = NULL;
 
   vol0 = 0.0;
   t0 = 1.0;
 
   tcomputeflag = 0;
   pcomputeflag = 0;
 
   id_temp = NULL;
   id_press = NULL;
 }
 
 /* ---------------------------------------------------------------------- */
 
 FixRigidNHSmall::~FixRigidNHSmall()
 {
   if (tstat_flag || pstat_flag) {
     deallocate_chain();
     deallocate_order();
   }
 
   if (rfix) delete [] rfix;
 
   if (tcomputeflag) modify->delete_compute(id_temp);
   delete [] id_temp;
 
   // delete pressure if fix created it
 
   if (pstat_flag) {
     if (pcomputeflag) modify->delete_compute(id_press);
     delete [] id_press;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 int FixRigidNHSmall::setmask()
 {
   int mask = 0;
   mask = FixRigidSmall::setmask();
   if (tstat_flag || pstat_flag) mask |= THERMO_ENERGY;
 
   return mask;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixRigidNHSmall::init()
 {
   FixRigidSmall::init();
 
   // recheck that dilate group has not been deleted
 
   if (allremap == 0) {
     int idilate = group->find(id_dilate);
     if (idilate == -1)
       error->all(FLERR,"Fix rigid npt/nph dilate group ID does not exist");
     dilate_group_bit = group->bitmask[idilate];
   }
 
   // initialize thermostats
   // set timesteps, constants
   // store Yoshida-Suzuki integrator parameters
 
   dtv = update->dt;
   dtf = 0.5 * update->dt * force->ftm2v;
   dtq = 0.5 * update->dt;
 
   boltz = force->boltz;
   nktv2p = force->nktv2p;
   mvv2e = force->mvv2e;
 
   if (force->kspace) kspace_flag = 1;
   else kspace_flag = 0;
 
   // see Table 1 in Kamberaj et al
 
   if (tstat_flag || pstat_flag) {
     if (t_order == 3) {
       w[0] = 1.0 / (2.0 - pow(2.0, 1.0/3.0));
       w[1] = 1.0 - 2.0*w[0];
       w[2] = w[0];
     } else if (t_order == 5) {
       w[0] = 1.0 / (4.0 - pow(4.0, 1.0/3.0));
       w[1] = w[0];
       w[2] = 1.0 - 4.0 * w[0];
       w[3] = w[0];
       w[4] = w[0];
     }
   }
 
   int icompute;
   if (tcomputeflag) {
     icompute = modify->find_compute(id_temp);
     if (icompute < 0)
       error->all(FLERR,"Temperature ID for fix rigid nvt/npt/nph does not exist");
     temperature = modify->compute[icompute];
   }
 
   if (pstat_flag) {
     if (domain->triclinic)
       error->all(FLERR,"Fix rigid npt/nph does not yet allow triclinic box");
 
     // ensure no conflict with fix deform
 
     for (int i = 0; i < modify->nfix; i++)
       if (strcmp(modify->fix[i]->style,"deform") == 0) {
       	int *dimflag = ((FixDeform *) modify->fix[i])->dimflag;
       	if ((p_flag[0] && dimflag[0]) || (p_flag[1] && dimflag[1]) ||
       	    (p_flag[2] && dimflag[2]))
           error->all(FLERR,"Cannot use fix rigid npt/nph and fix deform on "
                      "same component of stress tensor");
       }
 
     // set frequency
 
     p_freq_max = 0.0;
     p_freq_max = MAX(p_freq[0],p_freq[1]);
     p_freq_max = MAX(p_freq_max,p_freq[2]);
 
     // tally the number of dimensions that are barostatted
     // set initial volume and reference cell, if not already done
 
     pdim = p_flag[0] + p_flag[1] + p_flag[2];
     if (vol0 == 0.0) {
       if (dimension == 2) vol0 = domain->xprd * domain->yprd;
       else vol0 = domain->xprd * domain->yprd * domain->zprd;
     }
 
     // set pressure compute ptr
 
     icompute = modify->find_compute(id_press);
     if (icompute < 0)
       error->all(FLERR,"Pressure ID for fix rigid npt/nph does not exist");
     pressure = modify->compute[icompute];
 
     // detect if any rigid fixes exist so rigid bodies move on remap
     // rfix[] = indices to each fix rigid
     // this will include self
 
     if (rfix) delete [] rfix;
     nrigidfix = 0;
     rfix = NULL;
 
     for (int i = 0; i < modify->nfix; i++)
       if (modify->fix[i]->rigid_flag) nrigidfix++;
     if (nrigidfix) {
       rfix = new int[nrigidfix];
       nrigidfix = 0;
       for (int i = 0; i < modify->nfix; i++)
         if (modify->fix[i]->rigid_flag) rfix[nrigidfix++] = i;
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixRigidNHSmall::setup(int vflag)
 {
   FixRigidSmall::setup(vflag);
-
-  // total translational and rotational degrees of freedom
-
-  nf_t = dimension * nlocal_body;
-  if (dimension == 3) {
-    nf_r = dimension * nlocal_body;
-    for (int ibody = 0; ibody < nlocal_body; ibody++) {
-      Body *b = &body[ibody];
-      for (int k = 0; k < dimension; k++)
-        if (fabs(b->inertia[k]) < EPSILON) nf_r--;
-    }
-  } else if (dimension == 2) {
-    nf_r = nlocal_body;
-    for (int ibody = 0; ibody < nlocal_body; ibody++) {
-      Body *b = &body[ibody];
-      if (fabs(b->inertia[2]) < EPSILON) nf_r--;
-    }
-  }
-
-  double nf[2], nfall[2];
-  nf[0] = nf_t;
-  nf[1] = nf_r;
-  MPI_Allreduce(nf,nfall,2,MPI_DOUBLE,MPI_SUM,world);
-  nf_t = nfall[0];
-  nf_r = nfall[1];
-
-  g_f = nf_t + nf_r;
-  onednft = 1.0 + (double)(dimension) / (double)g_f;
-  onednfr = (double) (dimension) / (double)g_f;
-
+  compute_dof();
+  
   double mbody[3];
   akin_t = akin_r = 0.0;
   for (int ibody = 0; ibody < nlocal_body; ibody++) {
     Body *b = &body[ibody];
     MathExtra::transpose_matvec(b->ex_space,b->ey_space,b->ez_space,
                                 b->angmom,mbody);
     MathExtra::quatvec(b->quat,mbody,b->conjqm);
     b->conjqm[0] *= 2.0;
     b->conjqm[1] *= 2.0;
     b->conjqm[2] *= 2.0;
     b->conjqm[3] *= 2.0;
 
     if (tstat_flag || pstat_flag) {
       akin_t += b->mass*(b->vcm[0]*b->vcm[0] + b->vcm[1]*b->vcm[1] +
         b->vcm[2]*b->vcm[2]);
       akin_r += b->angmom[0]*b->omega[0] + b->angmom[1]*b->omega[1] +
         b->angmom[2]*b->omega[2];
     }
   }
 
   // accumulate translational and rotational kinetic energies
 
   if (tstat_flag || pstat_flag) {
     double ke[2],keall[2];
     ke[0] = akin_t;
     ke[1] = akin_r;
     MPI_Allreduce(ke,keall,2,MPI_DOUBLE,MPI_SUM,world);
     akin_t = keall[0];
     akin_r = keall[1];
   }
 
   // compute target temperature
 
   if (tstat_flag) compute_temp_target();
   else if (pstat_flag) {
     t0 = temperature->compute_scalar();
     if (t0 == 0.0) {
       if (strcmp(update->unit_style,"lj") == 0) t0 = 1.0;
       else t0 = 300.0;
     }
     t_target = t0;
   }
 
   // compute target pressure
   // compute current pressure
   // trigger virial computation on next timestep
 
   if (pstat_flag) {
     compute_press_target();
 
     if (pstyle == ISO) {
       temperature->compute_scalar();
       pressure->compute_scalar();
     } else {
       temperature->compute_vector();
       pressure->compute_vector();
     }
 
     couple();
     pressure->addstep(update->ntimestep+1);
   }
 
   // initialize thermostat/barostat settings
 
   double kt, t_mass, tb_mass;
   kt = boltz * t_target;
 
   if (tstat_flag) {
     t_mass = kt / (t_freq*t_freq);
     q_t[0] = nf_t * t_mass;
     q_r[0] = nf_r * t_mass;
     for (int i = 1; i < t_chain; i++)
       q_t[i] = q_r[i] = t_mass;
 
     for (int i = 1; i < t_chain; i++) {
       f_eta_t[i] = (q_t[i-1] * eta_dot_t[i-1] * eta_dot_t[i-1] - kt)/q_t[i];
       f_eta_r[i] = (q_r[i-1] * eta_dot_r[i-1] * eta_dot_r[i-1] - kt)/q_r[i];
     }
   }
 
   // initial forces on barostat thermostat variables
 
   if (pstat_flag) {
     for (int i = 0; i < 3; i++)
       if (p_flag[i]) {
         epsilon_mass[i] = (g_f + dimension) * kt / (p_freq[i]*p_freq[i]);
         epsilon[i] = log(vol0)/dimension;
       }
 
     tb_mass = kt / (p_freq_max * p_freq_max);
     q_b[0] = dimension * dimension * tb_mass;
     for (int i = 1; i < p_chain; i++) {
       q_b[i] = tb_mass;
       f_eta_b[i] = (q_b[i] * eta_dot_b[i-1] * eta_dot_b[i-1] - kt)/q_b[i];
     }
   }
 
   // update order/timestep dependent coefficients
 
   if (tstat_flag || pstat_flag) {
     for (int i = 0; i < t_order; i++) {
       wdti1[i] = w[i] * dtv / t_iter;
       wdti2[i] = wdti1[i] / 2.0;
       wdti4[i] = wdti1[i] / 4.0;
     }
   }
 
   if (pstat_flag) {
     compute_press_target();
     nh_epsilon_dot();
   }
 }
 
 /* ----------------------------------------------------------------------
    perform preforce velocity Verlet integration
    see Kamberaj paper for step references
 ------------------------------------------------------------------------- */
 
 void FixRigidNHSmall::initial_integrate(int vflag)
 {
   double tmp,scale_r,scale_t[3],scale_v[3];
   double dtfm,mbody[3],tbody[3],fquat[4];
   double dtf2 = dtf * 2.0;
 
   // compute scale variables
 
   scale_t[0] = scale_t[1] = scale_t[2] = 1.0;
   scale_v[0] = scale_v[1] = scale_v[2] = 1.0;
   scale_r = 1.0;
 
   if (tstat_flag) {
     tmp = exp(-dtq * eta_dot_t[0]);
     scale_t[0] = scale_t[1] = scale_t[2] = tmp;
     tmp = exp(-dtq * eta_dot_r[0]);
     scale_r = tmp;
   }
 
   if (pstat_flag) {
     scale_t[0] *= exp(-dtq * (epsilon_dot[0] + mtk_term2));
     scale_t[1] *= exp(-dtq * (epsilon_dot[1] + mtk_term2));
     scale_t[2] *= exp(-dtq * (epsilon_dot[2] + mtk_term2));
     scale_r *= exp(-dtq * (pdim * mtk_term2));
 
     tmp = dtq * epsilon_dot[0];
     scale_v[0] = dtv * exp(tmp) * maclaurin_series(tmp);
     tmp = dtq * epsilon_dot[1];
     scale_v[1] = dtv * exp(tmp) * maclaurin_series(tmp);
     tmp = dtq * epsilon_dot[2];
     scale_v[2] = dtv * exp(tmp) * maclaurin_series(tmp);
   }
 
   // update xcm, vcm, quat, conjqm and angmom
 
   for (int ibody = 0; ibody < nlocal_body; ibody++) {
     Body *b = &body[ibody];
 
     // step 1.1 - update vcm by 1/2 step
 
     dtfm = dtf / b->mass;
     b->vcm[0] += dtfm * b->fcm[0];
     b->vcm[1] += dtfm * b->fcm[1];
     b->vcm[2] += dtfm * b->fcm[2];
 
     if (tstat_flag || pstat_flag) {
       b->vcm[0] *= scale_t[0];
       b->vcm[1] *= scale_t[1];
       b->vcm[2] *= scale_t[2];
     }
 
     // step 1.2 - update xcm by full step
 
     if (!pstat_flag) {
       b->xcm[0] += dtv * b->vcm[0];
       b->xcm[1] += dtv * b->vcm[1];
       b->xcm[2] += dtv * b->vcm[2];
     } else {
       b->xcm[0] += scale_v[0] * b->vcm[0];
       b->xcm[1] += scale_v[1] * b->vcm[1];
       b->xcm[2] += scale_v[2] * b->vcm[2];
     }
 
     // step 1.3 - apply torque (body coords) to quaternion momentum
 
     MathExtra::transpose_matvec(b->ex_space,b->ey_space,b->ez_space,
                                 b->torque,tbody);
     MathExtra::quatvec(b->quat,tbody,fquat);
 
     b->conjqm[0] += dtf2 * fquat[0];
     b->conjqm[1] += dtf2 * fquat[1];
     b->conjqm[2] += dtf2 * fquat[2];
     b->conjqm[3] += dtf2 * fquat[3];
 
     if (tstat_flag || pstat_flag) {
       b->conjqm[0] *= scale_r;
       b->conjqm[1] *= scale_r;
       b->conjqm[2] *= scale_r;
       b->conjqm[3] *= scale_r;
     }
 
     // step 1.4 to 1.13 - use no_squish rotate to update p and q
 
     no_squish_rotate(3,b->conjqm,b->quat,b->inertia,dtq);
     no_squish_rotate(2,b->conjqm,b->quat,b->inertia,dtq);
     no_squish_rotate(1,b->conjqm,b->quat,b->inertia,dtv);
     no_squish_rotate(2,b->conjqm,b->quat,b->inertia,dtq);
     no_squish_rotate(3,b->conjqm,b->quat,b->inertia,dtq);
 
     // update exyz_space
     // transform p back to angmom
     // update angular velocity
 
     MathExtra::q_to_exyz(b->quat,b->ex_space,b->ey_space,
                          b->ez_space);
     MathExtra::invquatvec(b->quat,b->conjqm,mbody);
     MathExtra::matvec(b->ex_space,b->ey_space,b->ez_space,
                       mbody,b->angmom);
 
     b->angmom[0] *= 0.5;
     b->angmom[1] *= 0.5;
     b->angmom[2] *= 0.5;
 
     MathExtra::angmom_to_omega(b->angmom,b->ex_space,b->ey_space,
                                b->ez_space,b->inertia,b->omega);
   }
 
   // forward communicate updated info of all bodies
 
   commflag = INITIAL;
   comm->forward_comm_fix(this,26);
 
   // accumulate translational and rotational kinetic energies
 
   if (tstat_flag || pstat_flag) {
 
     akin_t = akin_r = 0.0;
     for (int ibody = 0; ibody < nlocal_body; ibody++) {
       Body *b = &body[ibody];
       akin_t += b->mass*(b->vcm[0]*b->vcm[0] + b->vcm[1]*b->vcm[1] +
         b->vcm[2]*b->vcm[2]);
       akin_r += b->angmom[0]*b->omega[0] + b->angmom[1]*b->omega[1] +
         b->angmom[2]*b->omega[2];
     }
 
     double ke[2],keall[2];
     ke[0] = akin_t;
     ke[1] = akin_r;
     MPI_Allreduce(ke,keall,2,MPI_DOUBLE,MPI_SUM,world);
     akin_t = keall[0];
     akin_r = keall[1];
   }
 
   // compute target temperature
   // update thermostat chains using akin_t and akin_r
   // refer to update_nhcp() in Kamberaj et al.
 
   if (tstat_flag) {
     compute_temp_target();
+    if (dynamic) compute_dof();
     nhc_temp_integrate();
   }
 
   // update thermostat chains coupled with barostat
   // refer to update_nhcb() in Kamberaj et al.
 
   if (pstat_flag) {
     nhc_press_integrate();
   }
 
   // virial setup before call to set_xv
 
   if (vflag) v_setup(vflag);
   else evflag = 0;
 
   // remap simulation box by 1/2 step
 
   if (pstat_flag) remap();
 
   // set coords/orient and velocity/rotation of atoms in rigid bodies
   // from quarternion and omega
 
   set_xv();
 
   // remap simulation box by full step
   // redo KSpace coeffs since volume has changed
 
   if (pstat_flag) {
     remap();
     if (kspace_flag) force->kspace->setup();
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixRigidNHSmall::final_integrate()
 {
   int i,ibody;
   double tmp,scale_t[3],scale_r;
   double dtfm;
   double mbody[3],tbody[3],fquat[4];
 
   double dtf2 = dtf * 2.0;
 
   // compute scale variables
 
   scale_t[0] = scale_t[1] = scale_t[2] = 1.0;
   scale_r = 1.0;
 
   if (tstat_flag) {
     tmp = exp(-1.0 * dtq * eta_dot_t[0]);
     scale_t[0] = scale_t[1] = scale_t[2] = tmp;
     scale_r = exp(-1.0 * dtq * eta_dot_r[0]);
   }
 
   if (pstat_flag) {
     scale_t[0] *= exp(-dtq * (epsilon_dot[0] + mtk_term2));
     scale_t[1] *= exp(-dtq * (epsilon_dot[1] + mtk_term2));
     scale_t[2] *= exp(-dtq * (epsilon_dot[2] + mtk_term2));
     scale_r *= exp(-dtq * (pdim * mtk_term2));
   }
 
   // sum over atoms to get force and torque on rigid body
 
   double **x = atom->x;
   double **f = atom->f;
   int nlocal = atom->nlocal;
 
   double dx,dy,dz;
   double unwrap[3];
   double *xcm,*fcm,*tcm;
 
   for (ibody = 0; ibody < nlocal_body+nghost_body; ibody++) {
     fcm = body[ibody].fcm;
     fcm[0] = fcm[1] = fcm[2] = 0.0;
     tcm = body[ibody].torque;
     tcm[0] = tcm[1] = tcm[2] = 0.0;
   }
 
   for (i = 0; i < nlocal; i++) {
     if (atom2body[i] < 0) continue;
     Body *b = &body[atom2body[i]];
 
     fcm = b->fcm;
     fcm[0] += f[i][0];
     fcm[1] += f[i][1];
     fcm[2] += f[i][2];
 
     domain->unmap(x[i],xcmimage[i],unwrap);
     xcm = b->xcm;
     dx = unwrap[0] - xcm[0];
     dy = unwrap[1] - xcm[1];
     dz = unwrap[2] - xcm[2];
 
     tcm = b->torque;
     tcm[0] += dy*f[i][2] - dz*f[i][1];
     tcm[1] += dz*f[i][0] - dx*f[i][2];
     tcm[2] += dx*f[i][1] - dy*f[i][0];
   }
 
   // extended particles add their torque to torque of body
 
   if (extended) {
     double **torque = atom->torque;
 
     for (i = 0; i < nlocal; i++) {
       if (atom2body[i] < 0) continue;
 
       if (eflags[i] & TORQUE) {
         tcm = body[atom2body[i]].torque;
         tcm[0] += torque[i][0];
         tcm[1] += torque[i][1];
         tcm[2] += torque[i][2];
       }
     }
   }
 
   // reverse communicate fcm, torque of all bodies
 
   commflag = FORCE_TORQUE;
   comm->reverse_comm_fix(this,6);
 
   // include Langevin thermostat forces and torques
 
   if (langflag) {
     for (int ibody = 0; ibody < nlocal_body; ibody++) {
       fcm = body[ibody].fcm;
       fcm[0] += langextra[ibody][0];
       fcm[1] += langextra[ibody][1];
       fcm[2] += langextra[ibody][2];
       tcm = body[ibody].torque;
       tcm[0] += langextra[ibody][3];
       tcm[1] += langextra[ibody][4];
       tcm[2] += langextra[ibody][5];
     }
   }
 
   // update vcm and angmom
   // include Langevin thermostat forces
   // fflag,tflag = 0 for some dimensions in 2d
 
   for (ibody = 0; ibody < nlocal_body; ibody++) {
     Body *b = &body[ibody];
 
     // update vcm by 1/2 step
 
     dtfm = dtf / b->mass;
     if (tstat_flag || pstat_flag) {
       b->vcm[0] *= scale_t[0];
       b->vcm[1] *= scale_t[1];
       b->vcm[2] *= scale_t[2];
     }
 
     b->vcm[0] += dtfm * b->fcm[0];
     b->vcm[1] += dtfm * b->fcm[1];
     b->vcm[2] += dtfm * b->fcm[2];
 
     // update conjqm, then transform to angmom, set velocity again
     // virial is already setup from initial_integrate
 
     MathExtra::transpose_matvec(b->ex_space,b->ey_space,
                                 b->ez_space,b->torque,tbody);
     MathExtra::quatvec(b->quat,tbody,fquat);
 
     if (tstat_flag || pstat_flag) {
       b->conjqm[0] = scale_r * b->conjqm[0] + dtf2 * fquat[0];
       b->conjqm[1] = scale_r * b->conjqm[1] + dtf2 * fquat[1];
       b->conjqm[2] = scale_r * b->conjqm[2] + dtf2 * fquat[2];
       b->conjqm[3] = scale_r * b->conjqm[3] + dtf2 * fquat[3];
     } else {
       b->conjqm[0] += dtf2 * fquat[0];
       b->conjqm[1] += dtf2 * fquat[1];
       b->conjqm[2] += dtf2 * fquat[2];
       b->conjqm[3] += dtf2 * fquat[3];
     }
 
     MathExtra::invquatvec(b->quat,b->conjqm,mbody);
     MathExtra::matvec(b->ex_space,b->ey_space,b->ez_space,mbody,b->angmom);
 
     b->angmom[0] *= 0.5;
     b->angmom[1] *= 0.5;
     b->angmom[2] *= 0.5;
 
     MathExtra::angmom_to_omega(b->angmom,b->ex_space,b->ey_space,
                                b->ez_space,b->inertia,b->omega);
   }
 
   // forward communicate updated info of all bodies
 
   commflag = FINAL;
   comm->forward_comm_fix(this,10);
 
   // accumulate translational and rotational kinetic energies
 
   if (pstat_flag) {
 
     akin_t = akin_r = 0.0;
     for (int ibody = 0; ibody < nlocal_body; ibody++) {
       Body *b = &body[ibody];
       akin_t += b->mass*(b->vcm[0]*b->vcm[0] + b->vcm[1]*b->vcm[1] +
         b->vcm[2]*b->vcm[2]);
       akin_r += b->angmom[0]*b->omega[0] + b->angmom[1]*b->omega[1] +
         b->angmom[2]*b->omega[2];
     }
 
     double ke[2],keall[2];
     ke[0] = akin_t;
     ke[1] = akin_r;
     MPI_Allreduce(ke,keall,2,MPI_DOUBLE,MPI_SUM,world);
     akin_t = keall[0];
     akin_r = keall[1];
   }
 
   // set velocity/rotation of atoms in rigid bodies
   // virial is already setup from initial_integrate
 
   set_v();
 
   // compute current temperature
   if (tcomputeflag) t_current = temperature->compute_scalar();
 
   // compute current and target pressures
   // update epsilon dot using akin_t and akin_r
 
   if (pstat_flag) {
     if (pstyle == ISO) {
       temperature->compute_scalar();
       pressure->compute_scalar();
     } else {
       temperature->compute_vector();
       pressure->compute_vector();
     }
     couple();
     pressure->addstep(update->ntimestep+1);
 
     compute_press_target();
 
     nh_epsilon_dot();
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixRigidNHSmall::nhc_temp_integrate()
 {
   int i,j,k;
   double kt,gfkt_t,gfkt_r,tmp,ms,s,s2;
 
   kt = boltz * t_target;
   gfkt_t = nf_t * kt;
   gfkt_r = nf_r * kt;
 
   // update thermostat masses
 
   double t_mass = boltz * t_target / (t_freq * t_freq);
   q_t[0] = nf_t * t_mass;
   q_r[0] = nf_r * t_mass;
   for (i = 1; i < t_chain; i++)
     q_t[i] = q_r[i] = t_mass;
 
   // update force of thermostats coupled to particles
 
   f_eta_t[0] = (akin_t * mvv2e - gfkt_t) / q_t[0];
   f_eta_r[0] = (akin_r * mvv2e - gfkt_r) / q_r[0];
 
   // multiple timestep iteration
 
   for (i = 0; i < t_iter; i++) {
     for (j = 0; j < t_order; j++) {
 
       // update thermostat velocities half step
 
       eta_dot_t[t_chain-1] += wdti2[j] * f_eta_t[t_chain-1];
       eta_dot_r[t_chain-1] += wdti2[j] * f_eta_r[t_chain-1];
 
       for (k = 1; k < t_chain; k++) {
         tmp = wdti4[j] * eta_dot_t[t_chain-k];
         ms = maclaurin_series(tmp);
         s = exp(-1.0 * tmp);
         s2 = s * s;
         eta_dot_t[t_chain-k-1] = eta_dot_t[t_chain-k-1] * s2 +
           wdti2[j] * f_eta_t[t_chain-k-1] * s * ms;
 
         tmp = wdti4[j] * eta_dot_r[t_chain-k];
         ms = maclaurin_series(tmp);
         s = exp(-1.0 * tmp);
         s2 = s * s;
         eta_dot_r[t_chain-k-1] = eta_dot_r[t_chain-k-1] * s2 +
           wdti2[j] * f_eta_r[t_chain-k-1] * s * ms;
       }
 
       // update thermostat positions a full step
 
       for (k = 0; k < t_chain; k++) {
         eta_t[k] += wdti1[j] * eta_dot_t[k];
         eta_r[k] += wdti1[j] * eta_dot_r[k];
       }
 
       // update thermostat forces
 
       for (k = 1; k < t_chain; k++) {
         f_eta_t[k] = q_t[k-1] * eta_dot_t[k-1] * eta_dot_t[k-1] - kt;
         f_eta_t[k] /= q_t[k];
         f_eta_r[k] = q_r[k-1] * eta_dot_r[k-1] * eta_dot_r[k-1] - kt;
         f_eta_r[k] /= q_r[k];
       }
 
       // update thermostat velocities a full step
 
       for (k = 0; k < t_chain-1; k++) {
         tmp = wdti4[j] * eta_dot_t[k+1];
         ms = maclaurin_series(tmp);
         s = exp(-1.0 * tmp);
         s2 = s * s;
         eta_dot_t[k] = eta_dot_t[k] * s2 + wdti2[j] * f_eta_t[k] * s * ms;
         tmp = q_t[k] * eta_dot_t[k] * eta_dot_t[k] - kt;
         f_eta_t[k+1] = tmp / q_t[k+1];
 
         tmp = wdti4[j] * eta_dot_r[k+1];
         ms = maclaurin_series(tmp);
         s = exp(-1.0 * tmp);
         s2 = s * s;
         eta_dot_r[k] = eta_dot_r[k] * s2 + wdti2[j] * f_eta_r[k] * s * ms;
         tmp = q_r[k] * eta_dot_r[k] * eta_dot_r[k] - kt;
         f_eta_r[k+1] = tmp / q_r[k+1];
       }
 
       eta_dot_t[t_chain-1] += wdti2[j] * f_eta_t[t_chain-1];
       eta_dot_r[t_chain-1] += wdti2[j] * f_eta_r[t_chain-1];
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixRigidNHSmall::nhc_press_integrate()
 {
   int i,j,k;
   double tmp,s,s2,ms,kecurrent;
   double kt = boltz * t_target;
   double lkt_press = kt;
 
   // update thermostat masses
 
   double tb_mass = kt / (p_freq_max * p_freq_max);
   q_b[0] = dimension * dimension * tb_mass;
   for (int i = 1; i < p_chain; i++) {
     q_b[i] = tb_mass;
     f_eta_b[i] = q_b[i-1] * eta_dot_b[i-1] * eta_dot_b[i-1] - kt;
     f_eta_b[i] /= q_b[i];
   }
 
   // update forces acting on thermostat
 
   kecurrent = 0.0;
   for (i = 0; i < 3; i++)
     if (p_flag[i]) {
       epsilon_mass[i] = (g_f + dimension) * kt / (p_freq[i] * p_freq[i]);
       kecurrent += epsilon_mass[i] * epsilon_dot[i] * epsilon_dot[i];
     }
   kecurrent /= pdim;
 
   f_eta_b[0] = (kecurrent - lkt_press) / q_b[0];
 
   // multiple timestep iteration
 
   for (i = 0; i < t_iter; i++) {
     for (j = 0; j < t_order; j++) {
 
       // update thermostat velocities a half step
 
       eta_dot_b[p_chain-1] += wdti2[j] * f_eta_b[p_chain-1];
 
       for (k = 1; k < p_chain; k++) {
         tmp = wdti4[j] * eta_dot_b[p_chain-k];
         ms = maclaurin_series(tmp);
         s = exp(-0.5 * tmp);
         s2 = s * s;
         eta_dot_b[p_chain-k-1] = eta_dot_b[p_chain-k-1] * s2 +
           wdti2[j] * f_eta_b[p_chain-k-1] * s * ms;
       }
 
       // update thermostat positions
 
       for (k = 0; k < p_chain; k++)
         eta_b[k] += wdti1[j] * eta_dot_b[k];
 
       // update thermostat forces
 
       for (k = 1; k < p_chain; k++) {
         f_eta_b[k] = q_b[k-1] * eta_dot_b[k-1] * eta_dot_b[k-1] - kt;
         f_eta_b[k] /= q_b[k];
       }
 
       // update thermostat velocites a full step
 
       for (k = 0; k < p_chain-1; k++) {
         tmp = wdti4[j] * eta_dot_b[k+1];
         ms = maclaurin_series(tmp);
         s = exp(-0.5 * tmp);
         s2 = s * s;
         eta_dot_b[k] = eta_dot_b[k] * s2 + wdti2[j] * f_eta_b[k] * s * ms;
         tmp = q_b[k] * eta_dot_b[k] * eta_dot_b[k] - kt;
         f_eta_b[k+1] = tmp / q_b[k+1];
       }
 
       eta_dot_b[p_chain-1] += wdti2[j] * f_eta_b[p_chain-1];
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    compute kinetic energy in the extended Hamiltonian
    conserved quantity = sum of returned energy and potential energy
 -----------------------------------------------------------------------*/
 
 double FixRigidNHSmall::compute_scalar()
 {
   int i,k;
   double kt = boltz * t_target;
   double energy,ke_t,ke_q,tmp,Pkq[4];
 
   double *vcm,*quat;
 
   // compute the kinetic parts of H_NVE in Kameraj et al (JCP 2005, pp 224114)
 
   // translational and rotational kinetic energies
 
   ke_t = 0.0;
   ke_q = 0.0;
 
   for (int i = 0; i < nlocal_body; i++) {
     vcm = body[i].vcm;
     quat = body[i].quat;
     ke_t += body[i].mass * (vcm[0]*vcm[0] + vcm[1]*vcm[1] +
       vcm[2]*vcm[2]);
 
     for (k = 1; k < 4; k++) {
       if (k == 1) {
         Pkq[0] = -quat[1];
         Pkq[1] =  quat[0];
         Pkq[2] =  quat[3];
         Pkq[3] = -quat[2];
       } else if (k == 2) {
         Pkq[0] = -quat[2];
         Pkq[1] = -quat[3];
         Pkq[2] =  quat[0];
         Pkq[3] =  quat[1];
       } else if (k == 3) {
         Pkq[0] = -quat[3];
         Pkq[1] =  quat[2];
         Pkq[2] = -quat[1];
         Pkq[3] =  quat[0];
       }
 
       tmp = body[i].conjqm[0]*Pkq[0] + body[i].conjqm[1]*Pkq[1] +
         body[i].conjqm[2]*Pkq[2] + body[i].conjqm[3]*Pkq[3];
       tmp *= tmp;
 
       if (fabs(body[i].inertia[k-1]) < 1e-6) tmp = 0.0;
       else tmp /= (8.0 * body[i].inertia[k-1]);
       ke_q += tmp;
     }
   }
 
   double ke[2],keall[2];
   ke[0] = ke_t;
   ke[1] = ke_q;
   MPI_Allreduce(ke,keall,2,MPI_DOUBLE,MPI_SUM,world);
   ke_t = keall[0];
   ke_q = keall[1];
 
   energy = (ke_t + ke_q) * mvv2e;
 
   if (tstat_flag) {
 
     // thermostat chain energy: from equation 12 in Kameraj et al (JCP 2005)
 
     energy += kt * (nf_t * eta_t[0] + nf_r * eta_r[0]);
 
     for (i = 1; i < t_chain; i++)
       energy += kt * (eta_t[i] + eta_r[i]);
 
     for (i = 0;  i < t_chain; i++) {
       energy += 0.5 * q_t[i] * (eta_dot_t[i] * eta_dot_t[i]);
       energy += 0.5 * q_r[i] * (eta_dot_r[i] * eta_dot_r[i]);
     }
   }
 
   if (pstat_flag) {
 
     // using equation 22 in Kameraj et al for H_NPT
 
     double e = 0.0;
     for (i = 0; i < 3; i++)
       if (p_flag[i])
         e += epsilon_mass[i] * epsilon_dot[i] * epsilon_dot[i];
     energy += e*(0.5/pdim);
 
     double vol;
     if (dimension == 2) vol = domain->xprd * domain->yprd;
     else vol = domain->xprd * domain->yprd * domain->zprd;
 
     double p0 = (p_target[0] + p_target[1] + p_target[2]) / 3.0;
     energy += p0 * vol / nktv2p;
 
     for (i = 0;  i < p_chain; i++) {
       energy += kt * eta_b[i];
       energy += 0.5 * q_b[i] * (eta_dot_b[i] * eta_dot_b[i]);
     }
   }
 
   return energy;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixRigidNHSmall::couple()
 {
   double *tensor = pressure->vector;
 
   if (pstyle == ISO) {
     p_current[0] = p_current[1] = p_current[2] = pressure->scalar;
   } else if (pcouple == XYZ) {
     double ave = 1.0/3.0 * (tensor[0] + tensor[1] + tensor[2]);
     p_current[0] = p_current[1] = p_current[2] = ave;
   } else if (pcouple == XY) {
     double ave = 0.5 * (tensor[0] + tensor[1]);
     p_current[0] = p_current[1] = ave;
     p_current[2] = tensor[2];
   } else if (pcouple == YZ) {
     double ave = 0.5 * (tensor[1] + tensor[2]);
     p_current[1] = p_current[2] = ave;
     p_current[0] = tensor[0];
   } else if (pcouple == XZ) {
     double ave = 0.5 * (tensor[0] + tensor[2]);
     p_current[0] = p_current[2] = ave;
     p_current[1] = tensor[1];
   } else {
     p_current[0] = tensor[0];
     p_current[1] = tensor[1];
     p_current[2] = tensor[2];
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixRigidNHSmall::remap()
 {
   int i;
   double oldlo,oldhi,ctr,expfac;
 
   double **x = atom->x;
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
 
   // epsilon is not used, except for book-keeping
 
   for (i = 0; i < 3; i++) epsilon[i] += dtq * epsilon_dot[i];
 
   // convert pertinent atoms and rigid bodies to lamda coords
 
   if (allremap) domain->x2lamda(nlocal);
   else {
     for (i = 0; i < nlocal; i++)
       if (mask[i] & dilate_group_bit)
         domain->x2lamda(x[i],x[i]);
   }
 
   if (nrigidfix)
     for (i = 0; i < nrigidfix; i++)
       modify->fix[rfix[i]]->deform(0);
 
   // reset global and local box to new size/shape
 
   for (i = 0; i < 3; i++) {
     if (p_flag[i]) {
       oldlo = domain->boxlo[i];
       oldhi = domain->boxhi[i];
       ctr = 0.5 * (oldlo + oldhi);
       expfac = exp(dtq * epsilon_dot[i]);
       domain->boxlo[i] = (oldlo-ctr)*expfac + ctr;
       domain->boxhi[i] = (oldhi-ctr)*expfac + ctr;
     }
   }
 
   domain->set_global_box();
   domain->set_local_box();
 
   // convert pertinent atoms and rigid bodies back to box coords
 
   if (allremap) domain->lamda2x(nlocal);
   else {
     for (i = 0; i < nlocal; i++)
       if (mask[i] & dilate_group_bit)
         domain->lamda2x(x[i],x[i]);
   }
 
   if (nrigidfix)
     for (i = 0; i< nrigidfix; i++)
       modify->fix[rfix[i]]->deform(1);
 }
 
 /* ----------------------------------------------------------------------
    compute target temperature and kinetic energy
 -----------------------------------------------------------------------*/
 
 void FixRigidNHSmall::compute_temp_target()
 {
   double delta = update->ntimestep - update->beginstep;
   if (delta != 0.0) delta /= update->endstep - update->beginstep;
 
   t_target = t_start + delta * (t_stop-t_start);
 }
 
 /* ----------------------------------------------------------------------
    compute hydrostatic target pressure
 -----------------------------------------------------------------------*/
 
 void FixRigidNHSmall::compute_press_target()
 {
   double delta = update->ntimestep - update->beginstep;
   if (delta != 0.0) delta /= update->endstep - update->beginstep;
 
   p_hydro = 0.0;
   for (int i = 0; i < 3; i++)
     if (p_flag[i]) {
       p_target[i] = p_start[i] + delta * (p_stop[i]-p_start[i]);
       p_hydro += p_target[i];
     }
   p_hydro /= pdim;
 }
 
 /* ----------------------------------------------------------------------
    update epsilon_dot
 -----------------------------------------------------------------------*/
 
 void FixRigidNHSmall::nh_epsilon_dot()
 {
   int i;
   double volume,scale,f_epsilon;
 
   if (dimension == 2) volume = domain->xprd*domain->yprd;
   else volume = domain->xprd*domain->yprd*domain->zprd;
 
   // MTK terms
 
   mtk_term1 = (akin_t + akin_r) * mvv2e / g_f;
 
   scale = exp(-1.0 * dtq * eta_dot_b[0]);
 
   for (i = 0; i < 3; i++)
     if (p_flag[i]) {
       f_epsilon = (p_current[i]-p_hydro)*volume / nktv2p + mtk_term1;
       f_epsilon /= epsilon_mass[i];
       epsilon_dot[i] += dtq * f_epsilon;
       epsilon_dot[i] *= scale;
     }
 
   mtk_term2 = 0.0;
   for (i = 0; i < 3; i++)
     if (p_flag[i]) mtk_term2 += epsilon_dot[i];
   mtk_term2 /= g_f;
 }
 
+/* ---------------------------------------------------------------------- */
+
+void FixRigidNHSmall::compute_dof()
+{
+  // total translational and rotational degrees of freedom
+
+  nf_t = dimension * nlocal_body;
+  if (dimension == 3) {
+    nf_r = dimension * nlocal_body;
+    for (int ibody = 0; ibody < nlocal_body; ibody++) {
+      Body *b = &body[ibody];
+      for (int k = 0; k < dimension; k++)
+        if (fabs(b->inertia[k]) < EPSILON) nf_r--;
+    }
+  } else if (dimension == 2) {
+    nf_r = nlocal_body;
+    for (int ibody = 0; ibody < nlocal_body; ibody++) {
+      Body *b = &body[ibody];
+      if (fabs(b->inertia[2]) < EPSILON) nf_r--;
+    }
+  }
+
+  double nf[2], nfall[2];
+  nf[0] = nf_t;
+  nf[1] = nf_r;
+  MPI_Allreduce(nf,nfall,2,MPI_DOUBLE,MPI_SUM,world);
+  nf_t = nfall[0];
+  nf_r = nfall[1];
+
+  g_f = nf_t + nf_r;
+  onednft = 1.0 + (double)(dimension) / (double)g_f;
+  onednfr = (double) (dimension) / (double)g_f;
+}
+
 /* ----------------------------------------------------------------------
    pack entire state of Fix into one write
 ------------------------------------------------------------------------- */
 
 void FixRigidNHSmall::write_restart(FILE *fp)
 {
   if (tstat_flag == 0 && pstat_flag == 0) return;
 
   int nsize = 2; // tstat_flag and pstat_flag
 
   if (tstat_flag) {
     nsize += 1;         // t_chain
     nsize += 4*t_chain; // eta_t, eta_r, eta_dot_t, eta_dot_r
   }
 
   if (pstat_flag) {
     nsize += 7;         // p_chain, epsilon(3) and epsilon_dot(3)
     nsize += 2*p_chain;
   }
 
   double *list;
   memory->create(list,nsize,"rigid_nh:list");
 
   int n = 0;
 
   list[n++] = tstat_flag;
   if (tstat_flag) {
     list[n++] = t_chain;
     for (int i = 0; i < t_chain; i++) {
       list[n++] = eta_t[i];
       list[n++] = eta_r[i];
       list[n++] = eta_dot_t[i];
       list[n++] = eta_dot_r[i];
     }
   }
 
   list[n++] = pstat_flag;
   if (pstat_flag) {
     list[n++] = epsilon[0];
     list[n++] = epsilon[1];
     list[n++] = epsilon[2];
     list[n++] = epsilon_dot[0];
     list[n++] = epsilon_dot[1];
     list[n++] = epsilon_dot[2];
 
     list[n++] = p_chain;
     for (int i = 0; i < p_chain; i++) {
       list[n++] = eta_b[i];
       list[n++] = eta_dot_b[i];
     }
   }
 
   if (comm->me == 0) {
     int size = (nsize)*sizeof(double);
     fwrite(&size,sizeof(int),1,fp);
     fwrite(list,sizeof(double),nsize,fp);
   }
 
   memory->destroy(list);
 }
 
 /* ----------------------------------------------------------------------
    use state info from restart file to restart the Fix
 ------------------------------------------------------------------------- */
 
 void FixRigidNHSmall::restart(char *buf)
 {
   int n = 0;
   double *list = (double *) buf;
   int flag = static_cast<int> (list[n++]);
 
   if (flag) {
     int m = static_cast<int> (list[n++]);
     if (tstat_flag && m == t_chain) {
       for (int i = 0; i < t_chain; i++) {
         eta_t[i] = list[n++];
         eta_r[i] = list[n++];
         eta_dot_t[i] = list[n++];
         eta_dot_r[i] = list[n++];
       }
     } else n += 4*m;
   }
 
   flag = static_cast<int> (list[n++]);
   if (flag) {
     epsilon[0] = list[n++];
     epsilon[1] = list[n++];
     epsilon[2] = list[n++];
     epsilon_dot[0] = list[n++];
     epsilon_dot[1] = list[n++];
     epsilon_dot[2] = list[n++];
 
     int m = static_cast<int> (list[n++]);
     if (pstat_flag && m == p_chain) {
       for (int i = 0; i < p_chain; i++) {
         eta_b[i] = list[n++];
         eta_dot_b[i] = list[n++];
       }
     } else n += 2*m;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 int FixRigidNHSmall::modify_param(int narg, char **arg)
 {
   if (strcmp(arg[0],"temp") == 0) {
     if (narg < 2) error->all(FLERR,"Illegal fix_modify command");
     if (tcomputeflag) {
       modify->delete_compute(id_temp);
       tcomputeflag = 0;
     }
     delete [] id_temp;
     int n = strlen(arg[1]) + 1;
     id_temp = new char[n];
     strcpy(id_temp,arg[1]);
 
     int icompute = modify->find_compute(arg[1]);
     if (icompute < 0)
       error->all(FLERR,"Could not find fix_modify temperature ID");
     temperature = modify->compute[icompute];
 
     if (temperature->tempflag == 0)
       error->all(FLERR,
                  "Fix_modify temperature ID does not compute temperature");
     if (temperature->igroup != 0 && comm->me == 0)
       error->warning(FLERR,"Temperature for fix modify is not for group all");
 
     // reset id_temp of pressure to new temperature ID
 
     if (pstat_flag) {
       icompute = modify->find_compute(id_press);
       if (icompute < 0)
         error->all(FLERR,"Pressure ID for fix modify does not exist");
       modify->compute[icompute]->reset_extra_compute_fix(id_temp);
     }
 
     return 2;
 
   } else if (strcmp(arg[0],"press") == 0) {
     if (narg < 2) error->all(FLERR,"Illegal fix_modify command");
     if (!pstat_flag) error->all(FLERR,"Illegal fix_modify command");
     if (pcomputeflag) {
       modify->delete_compute(id_press);
       pcomputeflag = 0;
     }
     delete [] id_press;
     int n = strlen(arg[1]) + 1;
     id_press = new char[n];
     strcpy(id_press,arg[1]);
 
     int icompute = modify->find_compute(arg[1]);
     if (icompute < 0) error->all(FLERR,"Could not find fix_modify pressure ID");
     pressure = modify->compute[icompute];
 
     if (pressure->pressflag == 0)
       error->all(FLERR,"Fix_modify pressure ID does not compute pressure");
     return 2;
   }
 
   return 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixRigidNHSmall::allocate_chain()
 {
   if (tstat_flag) {
     q_t = new double[t_chain];
     q_r = new double[t_chain];
     eta_t = new double[t_chain];
     eta_r = new double[t_chain];
     eta_dot_t = new double[t_chain];
     eta_dot_r = new double[t_chain];
     f_eta_t = new double[t_chain];
     f_eta_r = new double[t_chain];
   }
 
   if (pstat_flag) {
     q_b = new double[p_chain];
     eta_b = new double[p_chain];
     eta_dot_b = new double[p_chain];
     f_eta_b = new double[p_chain];
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixRigidNHSmall::reset_target(double t_new)
 {
   t_start = t_stop = t_new;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixRigidNHSmall::allocate_order()
 {
   w = new double[t_order];
   wdti1 = new double[t_order];
   wdti2 = new double[t_order];
   wdti4 = new double[t_order];
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixRigidNHSmall::deallocate_chain()
 {
   if (tstat_flag) {
     delete [] q_t;
     delete [] q_r;
     delete [] eta_t;
     delete [] eta_r;
     delete [] eta_dot_t;
     delete [] eta_dot_r;
     delete [] f_eta_t;
     delete [] f_eta_r;
   }
 
   if (pstat_flag) {
     delete [] q_b;
     delete [] eta_b;
     delete [] eta_dot_b;
     delete [] f_eta_b;
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixRigidNHSmall::deallocate_order()
 {
   delete [] w;
   delete [] wdti1;
   delete [] wdti2;
   delete [] wdti4;
 }
-
diff --git a/src/RIGID/fix_rigid_nh_small.h b/src/RIGID/fix_rigid_nh_small.h
index 07510a59e..6b5193327 100644
--- a/src/RIGID/fix_rigid_nh_small.h
+++ b/src/RIGID/fix_rigid_nh_small.h
@@ -1,178 +1,179 @@
 /* -*- c++ -*- ----------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifndef LMP_FIX_RIGID_NH_SMALL_H
 #define LMP_FIX_RIGID_NH_SMALL_H
 
 #include "fix_rigid_small.h"
 
 namespace LAMMPS_NS {
 
 class FixRigidNHSmall : public FixRigidSmall {
  public:
   FixRigidNHSmall(class LAMMPS *, int, char **);
   virtual ~FixRigidNHSmall();
   virtual int setmask();
   virtual void init();
   virtual void setup(int);
   virtual void initial_integrate(int);
   virtual void final_integrate();
   virtual double compute_scalar();
   int modify_param(int, char **);
   void write_restart(FILE *);
   void restart(char *buf);
   void reset_target(double);
 
  protected:
   double boltz,nktv2p,mvv2e;          // boltzman constant, conversion factors
 
   int dimension;                      // # of dimensions
   int nf_t,nf_r;                      // trans/rot degrees of freedom
   double onednft,onednfr;             // factors 1 + dimension/trans(rot) degrees of freedom
   double *w,*wdti1,*wdti2,*wdti4;     // Yoshida-Suzuki coefficients
   double *q_t,*q_r;                   // trans/rot thermostat masses
   double *eta_t,*eta_r;               // trans/rot thermostat positions
   double *eta_dot_t,*eta_dot_r;       // trans/rot thermostat velocities
   double *f_eta_t,*f_eta_r;           // trans/rot thermostat forces
 
   double epsilon_mass[3], *q_b;       // baro/thermo masses
   double epsilon[3],*eta_b;           // baro/thermo positions
   double epsilon_dot[3],*eta_dot_b;   // baro/thermo velocities
   double *f_eta_b;                    // thermo forces
   double akin_t,akin_r;               // translational/rotational kinetic energies
 
   int kspace_flag;                    // 1 if KSpace invoked, 0 if not
   int nrigidfix;                      // number of rigid fixes
   int *rfix;                          // indicies of rigid fixes
 
   double vol0;                        // reference volume
   double t0;                          // reference temperature
   int pdim,g_f;                       // number of barostatted dims, total DoFs
   double p_hydro;                     // hydrostatic target pressure
   double p_freq_max;                  // maximum barostat frequency
 
   double mtk_term1,mtk_term2;         // Martyna-Tobias-Klein corrections
 
   double t_target,t_current;
   double t_freq;
 
   char *id_temp,*id_press;
   class Compute *temperature,*pressure;
   int tcomputeflag,pcomputeflag;      // 1 = compute was created by fix. 0 = external
 
   void couple();
   void remap();
   void nhc_temp_integrate();
   void nhc_press_integrate();
 
   virtual void compute_temp_target();
   void compute_press_target();
   void nh_epsilon_dot();
-
+  void compute_dof();
+  
   void allocate_chain();
   void allocate_order();
   void deallocate_chain();
   void deallocate_order();
 
   inline double maclaurin_series(double);
 };
 
 inline double FixRigidNHSmall::maclaurin_series(double x)
 {
   double x2,x4;
   x2 = x * x;
   x4 = x2 * x2;
   return (1.0 + (1.0/6.0) * x2 + (1.0/120.0) * x4 + (1.0/5040.0) * x2 * x4 +
          (1.0/362880.0) * x4 * x4);
 }
 
 }
 
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Fix rigid/small npt/nph period must be > 0.0
 
 Self-explanatory.
 
 E: Invalid fix rigid/small npt/nph command for a 2d simulation
 
 Cannot control z dimension in a 2d model.
 
 E: Invalid fix rigid/small npt/nph command pressure settings
 
 If multiple dimensions are coupled, those dimensions must be
 specified.
 
 E: Cannot use fix rigid/small npt/nph on a non-periodic dimension
 
 When specifying a diagonal pressure component, the dimension must be
 periodic.
 
 E: Fix rigid/small nvt/npt/nph damping parameters must be > 0.0
 
 Self-explanatory.
 
 E: Fix rigid npt/nph dilate group ID does not exist
 
 Self-explanatory.
 
 E: Temperature ID for fix rigid nvt/npt/nph does not exist
 
 Self-explanatory.
 
 E: Fix rigid npt/nph does not yet allow triclinic box
 
 This is a current restriction in LAMMPS.
 
 E: Cannot use fix rigid npt/nph and fix deform on same component of stress tensor
 
 This would be changing the same box dimension twice.
 
 E: Pressure ID for fix rigid npt/nph does not exist
 
 Self-explanatory.
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
 E: Could not find fix_modify temperature ID
 
 The compute ID for computing temperature does not exist.
 
 E: Fix_modify temperature ID does not compute temperature
 
 The compute ID assigned to the fix must compute temperature.
 
 W: Temperature for fix modify is not for group all
 
 The temperature compute is being used with a pressure calculation
 which does operate on group all, so this may be inconsistent.
 
 E: Pressure ID for fix modify does not exist
 
 Self-explanatory.
 
 E: Could not find fix_modify pressure ID
 
 The compute ID for computing pressure does not exist.
 
 E: Fix_modify pressure ID does not compute pressure
 
 The compute ID assigned to the fix must compute pressure.
 
 */
diff --git a/src/SNAP/compute_sna_atom.cpp b/src/SNAP/compute_sna_atom.cpp
index 326d2d620..ad934535a 100644
--- a/src/SNAP/compute_sna_atom.cpp
+++ b/src/SNAP/compute_sna_atom.cpp
@@ -1,280 +1,286 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 #include "sna.h"
 #include <string.h>
 #include <stdlib.h>
 #include "compute_sna_atom.h"
 #include "atom.h"
 #include "update.h"
 #include "modify.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "force.h"
 #include "pair.h"
 #include "comm.h"
 #include "memory.h"
 #include "error.h"
 #include "openmp_snap.h"
 
 using namespace LAMMPS_NS;
 
 ComputeSNAAtom::ComputeSNAAtom(LAMMPS *lmp, int narg, char **arg) :
   Compute(lmp, narg, arg), cutsq(NULL), list(NULL), sna(NULL), 
   radelem(NULL), wjelem(NULL)
 {
   double rmin0, rfac0;
-  int twojmax, switchflag;
+  int twojmax, switchflag, bzeroflag;
   radelem = NULL;
   wjelem = NULL;
 
   int ntypes = atom->ntypes;
   int nargmin = 6+2*ntypes;
 
   if (narg < nargmin) error->all(FLERR,"Illegal compute sna/atom command");
 
   // default values
 
   diagonalstyle = 0;
   rmin0 = 0.0;
   switchflag = 1;
+  bzeroflag = 0;
 
   // offset by 1 to match up with types
 
   memory->create(radelem,ntypes+1,"sna/atom:radelem");
   memory->create(wjelem,ntypes+1,"sna/atom:wjelem");
 
   rcutfac = atof(arg[3]);
   rfac0 = atof(arg[4]);
   twojmax = atoi(arg[5]);
 
   for(int i = 0; i < ntypes; i++)
     radelem[i+1] = atof(arg[6+i]);
   for(int i = 0; i < ntypes; i++)
     wjelem[i+1] = atof(arg[6+ntypes+i]);
 
   // construct cutsq
 
   double cut;
   cutmax = 0.0;
   memory->create(cutsq,ntypes+1,ntypes+1,"sna/atom:cutsq");
   for(int i = 1; i <= ntypes; i++) {
     cut = 2.0*radelem[i]*rcutfac;
     if (cut > cutmax) cutmax = cut;
     cutsq[i][i] = cut*cut;
     for(int j = i+1; j <= ntypes; j++) {
       cut = (radelem[i]+radelem[j])*rcutfac;
       cutsq[i][j] = cutsq[j][i] = cut*cut;
     }
   }
 
   // process optional args
 
   int iarg = nargmin;
 
   while (iarg < narg) {
     if (strcmp(arg[iarg],"diagonal") == 0) {
       if (iarg+2 > narg)
 	error->all(FLERR,"Illegal compute sna/atom command");
       diagonalstyle = atoi(arg[iarg+1]);
       if (diagonalstyle < 0 || diagonalstyle > 3)
 	error->all(FLERR,"Illegal compute sna/atom command");
       iarg += 2;
     } else if (strcmp(arg[iarg],"rmin0") == 0) {
       if (iarg+2 > narg)
 	error->all(FLERR,"Illegal compute sna/atom command");
       rmin0 = atof(arg[iarg+1]);
       iarg += 2;
     } else if (strcmp(arg[iarg],"switchflag") == 0) {
       if (iarg+2 > narg)
 	error->all(FLERR,"Illegal compute sna/atom command");
       switchflag = atoi(arg[iarg+1]);
       iarg += 2;
+    } else if (strcmp(arg[iarg],"bzeroflag") == 0) {
+      if (iarg+2 > narg)
+	error->all(FLERR,"Illegal compute sna/atom command");
+      bzeroflag = atoi(arg[iarg+1]);
+      iarg += 2;
     } else error->all(FLERR,"Illegal compute sna/atom command");
   }
 
   snaptr = new SNA*[comm->nthreads];
 #if defined(_OPENMP)
-#pragma omp parallel default(none) shared(lmp,rfac0,twojmax,rmin0,switchflag)
+#pragma omp parallel default(none) shared(lmp,rfac0,twojmax,rmin0,switchflag,bzeroflag)
 #endif
   {
     int tid = omp_get_thread_num();
 
     // always unset use_shared_arrays since it does not work with computes
     snaptr[tid] = new SNA(lmp,rfac0,twojmax,diagonalstyle,
-                          0 /*use_shared_arrays*/, rmin0,switchflag);
+                          0 /*use_shared_arrays*/, rmin0,switchflag,bzeroflag);
   }
 
   ncoeff = snaptr[0]->ncoeff;
   peratom_flag = 1;
   size_peratom_cols = ncoeff;
 
   nmax = 0;
   njmax = 0;
   sna = NULL;
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 ComputeSNAAtom::~ComputeSNAAtom()
 {
   memory->destroy(sna);
   memory->destroy(radelem);
   memory->destroy(wjelem);
   memory->destroy(cutsq);
   delete [] snaptr;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputeSNAAtom::init()
 {
   if (force->pair == NULL)
     error->all(FLERR,"Compute sna/atom requires a pair style be defined");
 
   if (cutmax > force->pair->cutforce)
     error->all(FLERR,"Compute sna/atom cutoff is longer than pairwise cutoff");
 
   // need an occasional full neighbor list
 
   int irequest = neighbor->request(this,instance_me);
   neighbor->requests[irequest]->pair = 0;
   neighbor->requests[irequest]->compute = 1;
   neighbor->requests[irequest]->half = 0;
   neighbor->requests[irequest]->full = 1;
   neighbor->requests[irequest]->occasional = 1;
 
   int count = 0;
   for (int i = 0; i < modify->ncompute; i++)
     if (strcmp(modify->compute[i]->style,"sna/atom") == 0) count++;
   if (count > 1 && comm->me == 0)
     error->warning(FLERR,"More than one compute sna/atom");
 #if defined(_OPENMP)
 #pragma omp parallel default(none)
 #endif
   {
     int tid = omp_get_thread_num();
     snaptr[tid]->init();
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputeSNAAtom::init_list(int id, NeighList *ptr)
 {
   list = ptr;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputeSNAAtom::compute_peratom()
 {
   invoked_peratom = update->ntimestep;
 
   // grow sna array if necessary
 
   if (atom->nmax > nmax) {
     memory->destroy(sna);
     nmax = atom->nmax;
     memory->create(sna,nmax,size_peratom_cols,"sna/atom:sna");
     array_atom = sna;
   }
 
   // invoke full neighbor list (will copy or build if necessary)
 
   neighbor->build_one(list);
 
   const int inum = list->inum;
   const int* const ilist = list->ilist;
   const int* const numneigh = list->numneigh;
   int** const firstneigh = list->firstneigh;
   int * const type = atom->type;
 
   // compute sna for each atom in group
   // use full neighbor list to count atoms less than cutoff
 
   double** const x = atom->x;
   const int* const mask = atom->mask;
 
 #if defined(_OPENMP)
 #pragma omp parallel for default(none)
 #endif
   for (int ii = 0; ii < inum; ii++) {
     const int tid = omp_get_thread_num();
     const int i = ilist[ii];
     if (mask[i] & groupbit) {
 
       const double xtmp = x[i][0];
       const double ytmp = x[i][1];
       const double ztmp = x[i][2];
       const int itype = type[i];
       const double radi = radelem[itype];
       const int* const jlist = firstneigh[i];
       const int jnum = numneigh[i];
 
       // insure rij, inside, and typej  are of size jnum
 
       snaptr[tid]->grow_rij(jnum);
 
       // rij[][3] = displacements between atom I and those neighbors
       // inside = indices of neighbors of I within cutoff
       // typej = types of neighbors of I within cutoff
 
       int ninside = 0;
       for (int jj = 0; jj < jnum; jj++) {
 	int j = jlist[jj];
 	j &= NEIGHMASK;
 
 	const double delx = xtmp - x[j][0];
 	const double dely = ytmp - x[j][1];
 	const double delz = ztmp - x[j][2];
 	const double rsq = delx*delx + dely*dely + delz*delz;
 	int jtype = type[j];
 	if (rsq < cutsq[itype][jtype] && rsq>1e-20) {
 	  snaptr[tid]->rij[ninside][0] = delx;
 	  snaptr[tid]->rij[ninside][1] = dely;
 	  snaptr[tid]->rij[ninside][2] = delz;
 	  snaptr[tid]->inside[ninside] = j;
 	  snaptr[tid]->wj[ninside] = wjelem[jtype];
 	  snaptr[tid]->rcutij[ninside] = (radi+radelem[jtype])*rcutfac;
 	  ninside++;
 	}
       }
 
       snaptr[tid]->compute_ui(ninside);
       snaptr[tid]->compute_zi();
       snaptr[tid]->compute_bi();
       snaptr[tid]->copy_bi2bvec();
       for (int icoeff = 0; icoeff < ncoeff; icoeff++)
 	sna[i][icoeff] = snaptr[tid]->bvec[icoeff];
     } else {
       for (int icoeff = 0; icoeff < ncoeff; icoeff++)
 	sna[i][icoeff] = 0.0;
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    memory usage
 ------------------------------------------------------------------------- */
 
 double ComputeSNAAtom::memory_usage()
 {
   double bytes = nmax*size_peratom_cols * sizeof(double);
   bytes += 3*njmax*sizeof(double);
   bytes += njmax*sizeof(int);
   bytes += snaptr[0]->memory_usage()*comm->nthreads;
   return bytes;
 }
 
diff --git a/src/SNAP/compute_snad_atom.cpp b/src/SNAP/compute_snad_atom.cpp
index efd4cafbc..73452427b 100644
--- a/src/SNAP/compute_snad_atom.cpp
+++ b/src/SNAP/compute_snad_atom.cpp
@@ -1,336 +1,337 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 #include "sna.h"
 #include <string.h>
 #include <stdlib.h>
 #include "compute_snad_atom.h"
 #include "atom.h"
 #include "update.h"
 #include "modify.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "force.h"
 #include "pair.h"
 #include "comm.h"
 #include "memory.h"
 #include "error.h"
 #include "openmp_snap.h"
 
 using namespace LAMMPS_NS;
 
 ComputeSNADAtom::ComputeSNADAtom(LAMMPS *lmp, int narg, char **arg) :
   Compute(lmp, narg, arg), cutsq(NULL), list(NULL), snad(NULL), 
   radelem(NULL), wjelem(NULL)
 {
   double rfac0, rmin0;
-  int twojmax, switchflag;
+  int twojmax, switchflag, bzeroflag;
   radelem = NULL;
   wjelem = NULL;
 
   int ntypes = atom->ntypes;
   int nargmin = 6+2*ntypes;
 
   if (narg < nargmin) error->all(FLERR,"Illegal compute snad/atom command");
 
   // default values
 
   diagonalstyle = 0;
   rmin0 = 0.0;
   switchflag = 1;
-
+  bzeroflag = 0;
+  
   // process required arguments
   memory->create(radelem,ntypes+1,"sna/atom:radelem"); // offset by 1 to match up with types
   memory->create(wjelem,ntypes+1,"sna/atom:wjelem");
   rcutfac = atof(arg[3]);
   rfac0 = atof(arg[4]);
   twojmax = atoi(arg[5]);
   for(int i = 0; i < ntypes; i++)
     radelem[i+1] = atof(arg[6+i]);
   for(int i = 0; i < ntypes; i++)
     wjelem[i+1] = atof(arg[6+ntypes+i]);
   // construct cutsq
   double cut;
   memory->create(cutsq,ntypes+1,ntypes+1,"sna/atom:cutsq");
   for(int i = 1; i <= ntypes; i++) {
     cut = 2.0*radelem[i]*rcutfac;
     cutsq[i][i] = cut*cut;
     for(int j = i+1; j <= ntypes; j++) {
       cut = (radelem[i]+radelem[j])*rcutfac;
       cutsq[i][j] = cutsq[j][i] = cut*cut;
     }
   }
 
   // process optional args
 
   int iarg = nargmin;
 
   while (iarg < narg) {
     if (strcmp(arg[iarg],"diagonal") == 0) {
       if (iarg+2 > narg)
 	error->all(FLERR,"Illegal compute snad/atom command");
       diagonalstyle = atof(arg[iarg+1]);
       if (diagonalstyle < 0 || diagonalstyle > 3)
 	error->all(FLERR,"Illegal compute snad/atom command");
       iarg += 2;
     } else if (strcmp(arg[iarg],"rmin0") == 0) {
       if (iarg+2 > narg)
         error->all(FLERR,"Illegal compute snad/atom command");
       rmin0 = atof(arg[iarg+1]);
       iarg += 2;
     } else if (strcmp(arg[iarg],"switchflag") == 0) {
       if (iarg+2 > narg)
 	error->all(FLERR,"Illegal compute snad/atom command");
       switchflag = atoi(arg[iarg+1]);
       iarg += 2;
     } else error->all(FLERR,"Illegal compute snad/atom command");
   }
 
   snaptr = new SNA*[comm->nthreads];
 #if defined(_OPENMP)
-#pragma omp parallel default(none) shared(lmp,rfac0,twojmax,rmin0,switchflag)
+#pragma omp parallel default(none) shared(lmp,rfac0,twojmax,rmin0,switchflag,bzeroflag)
 #endif
   {
     int tid = omp_get_thread_num();
 
     // always unset use_shared_arrays since it does not work with computes
     snaptr[tid] = new SNA(lmp,rfac0,twojmax,diagonalstyle,
-                          0 /*use_shared_arrays*/, rmin0,switchflag);
+                          0 /*use_shared_arrays*/, rmin0,switchflag,bzeroflag);
   }
 
   ncoeff = snaptr[0]->ncoeff;
   peratom_flag = 1;
   size_peratom_cols = 3*ncoeff*atom->ntypes;
   comm_reverse = size_peratom_cols;
   nmax = 0;
   njmax = 0;
   snad = NULL;
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 ComputeSNADAtom::~ComputeSNADAtom()
 {
   memory->destroy(snad);
   memory->destroy(radelem);
   memory->destroy(wjelem);
   memory->destroy(cutsq);
   delete [] snaptr;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputeSNADAtom::init()
 {
   if (force->pair == NULL)
     error->all(FLERR,"Compute snad/atom requires a pair style be defined");
   // TODO: Not sure what to do with this error check since cutoff radius is not
   // a single number
   //if (sqrt(cutsq) > force->pair->cutforce)
     //error->all(FLERR,"Compute snad/atom cutoff is longer than pairwise cutoff");
 
   // need an occasional full neighbor list
 
   int irequest = neighbor->request(this,instance_me);
   neighbor->requests[irequest]->pair = 0;
   neighbor->requests[irequest]->compute = 1;
   neighbor->requests[irequest]->half = 0;
   neighbor->requests[irequest]->full = 1;
   neighbor->requests[irequest]->occasional = 1;
 
   int count = 0;
   for (int i = 0; i < modify->ncompute; i++)
     if (strcmp(modify->compute[i]->style,"snad/atom") == 0) count++;
   if (count > 1 && comm->me == 0)
     error->warning(FLERR,"More than one compute snad/atom");
 #if defined(_OPENMP)
 #pragma omp parallel default(none)
 #endif
   {
     int tid = omp_get_thread_num();
     snaptr[tid]->init();
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputeSNADAtom::init_list(int id, NeighList *ptr)
 {
   list = ptr;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputeSNADAtom::compute_peratom()
 {
   int ntotal = atom->nlocal + atom->nghost;
 
   invoked_peratom = update->ntimestep;
 
   // grow snad array if necessary
 
   if (atom->nmax > nmax) {
     memory->destroy(snad);
     nmax = atom->nmax;
     memory->create(snad,nmax,size_peratom_cols,
 		   "snad/atom:snad");
     array_atom = snad;
   }
 
   // clear local array
 
   for (int i = 0; i < ntotal; i++)
     for (int icoeff = 0; icoeff < size_peratom_cols; icoeff++) {
       snad[i][icoeff] = 0.0;
     }
 
   // invoke full neighbor list (will copy or build if necessary)
 
   neighbor->build_one(list);
 
   const int inum = list->inum;
   const int* const ilist = list->ilist;
   const int* const numneigh = list->numneigh;
   int** const firstneigh = list->firstneigh;
   int * const type = atom->type;
 
   // compute sna derivatives for each atom in group
   // use full neighbor list to count atoms less than cutoff
 
   double** const x = atom->x;
   const int* const mask = atom->mask;
 
 #if defined(_OPENMP)
 #pragma omp parallel for default(none)
 #endif
   for (int ii = 0; ii < inum; ii++) {
     const int tid = omp_get_thread_num();
     const int i = ilist[ii];
     if (mask[i] & groupbit) {
 
       const double xtmp = x[i][0];
       const double ytmp = x[i][1];
       const double ztmp = x[i][2];
       const int itype = type[i];
       const double radi = radelem[itype];
       const int* const jlist = firstneigh[i];
       const int jnum = numneigh[i];
 
       const int typeoffset = 3*ncoeff*(atom->type[i]-1);
 
       // insure rij, inside, and typej  are of size jnum
 
       snaptr[tid]->grow_rij(jnum);
 
       // rij[][3] = displacements between atom I and those neighbors
       // inside = indices of neighbors of I within cutoff
       // typej = types of neighbors of I within cutoff
       // note Rij sign convention => dU/dRij = dU/dRj = -dU/dRi
 
       int ninside = 0;
       for (int jj = 0; jj < jnum; jj++) {
 	int j = jlist[jj];
 	j &= NEIGHMASK;
 
 	const double delx = x[j][0] - xtmp;
 	const double dely = x[j][1] - ytmp;
 	const double delz = x[j][2] - ztmp;
 	const double rsq = delx*delx + dely*dely + delz*delz;
         int jtype = type[j];
 	if (rsq < cutsq[itype][jtype]&&rsq>1e-20) {
 	  snaptr[tid]->rij[ninside][0] = delx;
 	  snaptr[tid]->rij[ninside][1] = dely;
 	  snaptr[tid]->rij[ninside][2] = delz;
 	  snaptr[tid]->inside[ninside] = j;
 	  snaptr[tid]->wj[ninside] = wjelem[jtype];
 	  snaptr[tid]->rcutij[ninside] = (radi+radelem[jtype])*rcutfac;
 	  ninside++;
 	}
       }
 
       snaptr[tid]->compute_ui(ninside);
       snaptr[tid]->compute_zi();
 
       for (int jj = 0; jj < ninside; jj++) {
 	const int j = snaptr[tid]->inside[jj];
 	snaptr[tid]->compute_duidrj(snaptr[tid]->rij[jj],
 				    snaptr[tid]->wj[jj],
 				    snaptr[tid]->rcutij[jj]);
 	snaptr[tid]->compute_dbidrj();
 	snaptr[tid]->copy_dbi2dbvec();
 
 	// Accumulate -dBi/dRi, -dBi/dRj
 
 	double *snadi = snad[i]+typeoffset;
 	double *snadj = snad[j]+typeoffset;
 
 	for (int icoeff = 0; icoeff < ncoeff; icoeff++) {
 	  snadi[icoeff] += snaptr[tid]->dbvec[icoeff][0];
 	  snadi[icoeff+ncoeff] += snaptr[tid]->dbvec[icoeff][1];
 	  snadi[icoeff+2*ncoeff] += snaptr[tid]->dbvec[icoeff][2];
 	  snadj[icoeff] -= snaptr[tid]->dbvec[icoeff][0];
 	  snadj[icoeff+ncoeff] -= snaptr[tid]->dbvec[icoeff][1];
 	  snadj[icoeff+2*ncoeff] -= snaptr[tid]->dbvec[icoeff][2];
 	}
       }
     }
   }
 
   // communicate snad contributions between neighbor procs
 
   comm->reverse_comm_compute(this);
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 int ComputeSNADAtom::pack_reverse_comm(int n, int first, double *buf)
 {
   int i,m,last,icoeff;
 
   m = 0;
   last = first + n;
   for (i = first; i < last; i++)
     for (icoeff = 0; icoeff < size_peratom_cols; icoeff++)
       buf[m++] = snad[i][icoeff];
   return comm_reverse;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputeSNADAtom::unpack_reverse_comm(int n, int *list, double *buf)
 {
   int i,j,m,icoeff;
 
   m = 0;
   for (i = 0; i < n; i++) {
     j = list[i];
     for (icoeff = 0; icoeff < size_peratom_cols; icoeff++)
       snad[j][icoeff] += buf[m++];
   }
 }
 
 /* ----------------------------------------------------------------------
    memory usage
 ------------------------------------------------------------------------- */
 
 double ComputeSNADAtom::memory_usage()
 {
   double bytes = nmax*size_peratom_cols * sizeof(double);
   bytes += 3*njmax*sizeof(double);
   bytes += njmax*sizeof(int);
   bytes += ncoeff*3;
   bytes += snaptr[0]->memory_usage()*comm->nthreads;
   return bytes;
 }
diff --git a/src/SNAP/compute_snav_atom.cpp b/src/SNAP/compute_snav_atom.cpp
index c1398864e..f75b02fba 100644
--- a/src/SNAP/compute_snav_atom.cpp
+++ b/src/SNAP/compute_snav_atom.cpp
@@ -1,346 +1,347 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 #include "sna.h"
 #include <string.h>
 #include <stdlib.h>
 #include "compute_snav_atom.h"
 #include "atom.h"
 #include "update.h"
 #include "modify.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "force.h"
 #include "pair.h"
 #include "comm.h"
 #include "memory.h"
 #include "error.h"
 #include "openmp_snap.h"
 
 using namespace LAMMPS_NS;
 
 ComputeSNAVAtom::ComputeSNAVAtom(LAMMPS *lmp, int narg, char **arg) :
   Compute(lmp, narg, arg), cutsq(NULL), list(NULL), snav(NULL),
   radelem(NULL), wjelem(NULL)
 {
   double rfac0, rmin0;
-  int twojmax, switchflag;
+  int twojmax, switchflag, bzeroflag;
   radelem = NULL;
   wjelem = NULL;
 
   nvirial = 6;
 
   int ntypes = atom->ntypes;
   int nargmin = 6+2*ntypes;
 
   if (narg < nargmin) error->all(FLERR,"Illegal compute snav/atom command");
 
   // default values
 
   diagonalstyle = 0;
   rmin0 = 0.0;
   switchflag = 1;
+  bzeroflag = 0;
 
   // process required arguments
   memory->create(radelem,ntypes+1,"sna/atom:radelem"); // offset by 1 to match up with types
   memory->create(wjelem,ntypes+1,"sna/atom:wjelem");
   rcutfac = atof(arg[3]);
   rfac0 = atof(arg[4]);
   twojmax = atoi(arg[5]);
   for(int i = 0; i < ntypes; i++)
     radelem[i+1] = atof(arg[6+i]);
   for(int i = 0; i < ntypes; i++)
     wjelem[i+1] = atof(arg[6+ntypes+i]);
   // construct cutsq
   double cut;
   memory->create(cutsq,ntypes+1,ntypes+1,"sna/atom:cutsq");
   for(int i = 1; i <= ntypes; i++) {
     cut = 2.0*radelem[i]*rcutfac;
     cutsq[i][i] = cut*cut;
     for(int j = i+1; j <= ntypes; j++) {
       cut = (radelem[i]+radelem[j])*rcutfac;
       cutsq[i][j] = cutsq[j][i] = cut*cut;
     }
   }
 
   // process optional args
 
   int iarg = nargmin;
 
   while (iarg < narg) {
     if (strcmp(arg[iarg],"diagonal") == 0) {
       if (iarg+2 > narg)
 	error->all(FLERR,"Illegal compute snav/atom command");
       diagonalstyle = atof(arg[iarg+1]);
       if (diagonalstyle < 0 || diagonalstyle > 3)
 	error->all(FLERR,"Illegal compute snav/atom command");
       iarg += 2;
     } else if (strcmp(arg[iarg],"rmin0") == 0) {
       if (iarg+2 > narg)
 	error->all(FLERR,"Illegal compute snav/atom command");
       rmin0 = atof(arg[iarg+1]);
       iarg += 2;
     } else if (strcmp(arg[iarg],"switchflag") == 0) {
       if (iarg+2 > narg)
 	error->all(FLERR,"Illegal compute snav/atom command");
       switchflag = atoi(arg[iarg+1]);
       iarg += 2;
     } else error->all(FLERR,"Illegal compute snav/atom command");
   }
 
   snaptr = new SNA*[comm->nthreads];
 #if defined(_OPENMP)
-#pragma omp parallel default(none) shared(lmp,rfac0,twojmax,rmin0,switchflag)
+#pragma omp parallel default(none) shared(lmp,rfac0,twojmax,rmin0,switchflag,bzeroflag)
 #endif
   {
     int tid = omp_get_thread_num();
 
     // always unset use_shared_arrays since it does not work with computes
     snaptr[tid] = new SNA(lmp,rfac0,twojmax,diagonalstyle,
-                          0 /*use_shared_arrays*/, rmin0,switchflag);
+                          0 /*use_shared_arrays*/, rmin0,switchflag,bzeroflag);
   }
 
   ncoeff = snaptr[0]->ncoeff;
   peratom_flag = 1;
   size_peratom_cols = nvirial*ncoeff*atom->ntypes;
   comm_reverse = size_peratom_cols;
 
   nmax = 0;
   njmax = 0;
   snav = NULL;
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 ComputeSNAVAtom::~ComputeSNAVAtom()
 {
   memory->destroy(snav);
   memory->destroy(radelem);
   memory->destroy(wjelem);
   memory->destroy(cutsq);
   delete [] snaptr;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputeSNAVAtom::init()
 {
   if (force->pair == NULL)
     error->all(FLERR,"Compute snav/atom requires a pair style be defined");
    // TODO: Not sure what to do with this error check since cutoff radius is not
   // a single number
  //if (sqrt(cutsq) > force->pair->cutforce)
    // error->all(FLERR,"Compute snav/atom cutoff is longer than pairwise cutoff");
 
   // need an occasional full neighbor list
 
   int irequest = neighbor->request(this,instance_me);
   neighbor->requests[irequest]->pair = 0;
   neighbor->requests[irequest]->compute = 1;
   neighbor->requests[irequest]->half = 0;
   neighbor->requests[irequest]->full = 1;
   neighbor->requests[irequest]->occasional = 1;
 
   int count = 0;
   for (int i = 0; i < modify->ncompute; i++)
     if (strcmp(modify->compute[i]->style,"snav/atom") == 0) count++;
   if (count > 1 && comm->me == 0)
     error->warning(FLERR,"More than one compute snav/atom");
 #if defined(_OPENMP)
 #pragma omp parallel default(none)
 #endif
   {
     int tid = omp_get_thread_num();
     snaptr[tid]->init();
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputeSNAVAtom::init_list(int id, NeighList *ptr)
 {
   list = ptr;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputeSNAVAtom::compute_peratom()
 {
   int ntotal = atom->nlocal + atom->nghost;
 
   invoked_peratom = update->ntimestep;
 
   // grow snav array if necessary
 
   if (atom->nmax > nmax) {
     memory->destroy(snav);
     nmax = atom->nmax;
     memory->create(snav,nmax,size_peratom_cols,
 		   "snav/atom:snav");
     array_atom = snav;
   }
 
   // clear local array
 
   for (int i = 0; i < ntotal; i++)
     for (int icoeff = 0; icoeff < size_peratom_cols; icoeff++) {
       snav[i][icoeff] = 0.0;
     }
 
   // invoke full neighbor list (will copy or build if necessary)
 
   neighbor->build_one(list);
 
   const int inum = list->inum;
   const int* const ilist = list->ilist;
   const int* const numneigh = list->numneigh;
   int** const firstneigh = list->firstneigh;
   int * const type = atom->type;
   // compute sna derivatives for each atom in group
   // use full neighbor list to count atoms less than cutoff
 
   double** const x = atom->x;
   const int* const mask = atom->mask;
 
 #if defined(_OPENMP)
 #pragma omp parallel for default(none)
 #endif
   for (int ii = 0; ii < inum; ii++) {
     const int tid = omp_get_thread_num();
     const int i = ilist[ii];
     if (mask[i] & groupbit) {
 
       const double xtmp = x[i][0];
       const double ytmp = x[i][1];
       const double ztmp = x[i][2];
       const int itype = type[i];
       const double radi = radelem[itype];
 
       const int* const jlist = firstneigh[i];
       const int jnum = numneigh[i];
 
       const int typeoffset = nvirial*ncoeff*(atom->type[i]-1);
 
       // insure rij, inside, and typej  are of size jnum
 
 	  snaptr[tid]->grow_rij(jnum);
 
       // rij[][3] = displacements between atom I and those neighbors
       // inside = indices of neighbors of I within cutoff
       // typej = types of neighbors of I within cutoff
       // note Rij sign convention => dU/dRij = dU/dRj = -dU/dRi
 
       int ninside = 0;
       for (int jj = 0; jj < jnum; jj++) {
 	int j = jlist[jj];
 	j &= NEIGHMASK;
 
 	const double delx = x[j][0] - xtmp;
 	const double dely = x[j][1] - ytmp;
 	const double delz = x[j][2] - ztmp;
 	const double rsq = delx*delx + dely*dely + delz*delz;
 	int jtype = type[j];
 	if (rsq < cutsq[itype][jtype]&&rsq>1e-20) {
 	  snaptr[tid]->rij[ninside][0] = delx;
 	  snaptr[tid]->rij[ninside][1] = dely;
 	  snaptr[tid]->rij[ninside][2] = delz;
 	  snaptr[tid]->inside[ninside] = j;
 	  snaptr[tid]->wj[ninside] = wjelem[jtype];
 	  snaptr[tid]->rcutij[ninside] = (radi+radelem[jtype])*rcutfac;
 	  ninside++;
 	}
       }
 
       snaptr[tid]->compute_ui(ninside);
       snaptr[tid]->compute_zi();
 
       for (int jj = 0; jj < ninside; jj++) {
 	const int j = snaptr[tid]->inside[jj];
 
 	snaptr[tid]->compute_duidrj(snaptr[tid]->rij[jj],
 				    snaptr[tid]->wj[jj],
 				    snaptr[tid]->rcutij[jj]);
 	snaptr[tid]->compute_dbidrj();
 	snaptr[tid]->copy_dbi2dbvec();
 
 	// Accumulate -dBi/dRi*Ri, -dBi/dRj*Rj
 
 	double *snavi = snav[i]+typeoffset;
 	double *snavj = snav[j]+typeoffset;
 
 	for (int icoeff = 0; icoeff < ncoeff; icoeff++) {
 	  snavi[icoeff]          += snaptr[tid]->dbvec[icoeff][0]*xtmp;
 	  snavi[icoeff+ncoeff]   += snaptr[tid]->dbvec[icoeff][1]*ytmp;
 	  snavi[icoeff+2*ncoeff] += snaptr[tid]->dbvec[icoeff][2]*ztmp;
 	  snavi[icoeff+3*ncoeff] += snaptr[tid]->dbvec[icoeff][1]*ztmp;
 	  snavi[icoeff+4*ncoeff] += snaptr[tid]->dbvec[icoeff][0]*ztmp;
 	  snavi[icoeff+5*ncoeff] += snaptr[tid]->dbvec[icoeff][0]*ytmp;
 	  snavj[icoeff]          -= snaptr[tid]->dbvec[icoeff][0]*x[j][0];
 	  snavj[icoeff+ncoeff]   -= snaptr[tid]->dbvec[icoeff][1]*x[j][1];
 	  snavj[icoeff+2*ncoeff] -= snaptr[tid]->dbvec[icoeff][2]*x[j][2];
 	  snavj[icoeff+3*ncoeff] -= snaptr[tid]->dbvec[icoeff][1]*x[j][2];
 	  snavj[icoeff+4*ncoeff] -= snaptr[tid]->dbvec[icoeff][0]*x[j][2];
 	  snavj[icoeff+5*ncoeff] -= snaptr[tid]->dbvec[icoeff][0]*x[j][1];
 	}
       }
     }
   }
 
   // communicate snav contributions between neighbor procs
 
   comm->reverse_comm_compute(this);
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 int ComputeSNAVAtom::pack_reverse_comm(int n, int first, double *buf)
 {
   int i,m,last,icoeff;
 
   m = 0;
   last = first + n;
   for (i = first; i < last; i++)
     for (icoeff = 0; icoeff < size_peratom_cols; icoeff++)
       buf[m++] = snav[i][icoeff];
   return comm_reverse;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void ComputeSNAVAtom::unpack_reverse_comm(int n, int *list, double *buf)
 {
   int i,j,m,icoeff;
 
   m = 0;
   for (i = 0; i < n; i++) {
     j = list[i];
     for (icoeff = 0; icoeff < size_peratom_cols; icoeff++)
       snav[j][icoeff] += buf[m++];
   }
 }
 
 /* ----------------------------------------------------------------------
    memory usage
 ------------------------------------------------------------------------- */
 
 double ComputeSNAVAtom::memory_usage()
 {
   double bytes = nmax*size_peratom_cols * sizeof(double);
   bytes += 3*njmax*sizeof(double);
   bytes += njmax*sizeof(int);
   bytes += ncoeff*nvirial;
   bytes += snaptr[0]->memory_usage()*comm->nthreads;
   return bytes;
 }
diff --git a/src/SNAP/pair_snap.cpp b/src/SNAP/pair_snap.cpp
index dc84b0be0..06c2e4848 100644
--- a/src/SNAP/pair_snap.cpp
+++ b/src/SNAP/pair_snap.cpp
@@ -1,1730 +1,1733 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include "pair_snap.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "force.h"
 #include "comm.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "sna.h"
 #include "openmp_snap.h"
 #include "domain.h"
 #include "memory.h"
 #include "error.h"
 
 #include <cmath>
 
 using namespace LAMMPS_NS;
 
 #define MAXLINE 1024
 #define MAXWORD 3
 
 /* ---------------------------------------------------------------------- */
 
 PairSNAP::PairSNAP(LAMMPS *lmp) : Pair(lmp)
 {
   single_enable = 0;
   restartinfo = 0;
   one_coeff = 1;
   manybody_flag = 1;
 
   nelements = 0;
   elements = NULL;
   radelem = NULL;
   wjelem = NULL;
   coeffelem = NULL;
 
   nmax = 0;
   nthreads = 1;
 
   schedule_user = 0;
   schedule_time_guided = -1;
   schedule_time_dynamic = -1;
   ncalls_neigh =-1;
 
   ilistmask_max = 0;
   ilistmask = NULL;
   ghostinum = 0;
   ghostilist_max = 0;
   ghostilist = NULL;
   ghostnumneigh_max = 0;
   ghostnumneigh = NULL;
   ghostneighs = NULL;
   ghostfirstneigh = NULL;
   ghostneighs_total = 0;
   ghostneighs_max = 0;
 
   i_max = 0;
   i_neighmax = 0;
   i_numpairs = 0;
   i_rij = NULL;
   i_inside = NULL;
   i_wj = NULL;
   i_rcutij = NULL;
   i_ninside = NULL;
   i_pairs = NULL;
   i_uarraytot_r = NULL;
   i_uarraytot_i = NULL;
   i_zarray_r = NULL;
   i_zarray_i =NULL;
 
   use_shared_arrays = 0;
 
 #ifdef TIMING_INFO
   timers[0] = 0;
   timers[1] = 0;
   timers[2] = 0;
   timers[3] = 0;
 #endif
 
   // Need to set this because restart not handled by PairHybrid
 
   sna = NULL;
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 PairSNAP::~PairSNAP()
 {
   if (nelements) {
     for (int i = 0; i < nelements; i++)
       delete[] elements[i];
     delete[] elements;
     memory->destroy(radelem);
     memory->destroy(wjelem);
     memory->destroy(coeffelem);
   }
 
   // Need to set this because restart not handled by PairHybrid
 
   if (sna) {
 
 #ifdef TIMING_INFO
     double time[5];
     double timeave[5];
     double timeave_mpi[5];
     double timemax_mpi[5];
 
     for (int i = 0; i < 5; i++) {
       time[i] = 0;
       timeave[i] = 0;
       for (int tid = 0; tid<nthreads; tid++) {
 	if (sna[tid]->timers[i]>time[i])
 	  time[i] = sna[tid]->timers[i];
 	timeave[i] += sna[tid]->timers[i];
       }
       timeave[i] /= nthreads;
     }
     MPI_Reduce(timeave, timeave_mpi, 5, MPI_DOUBLE, MPI_SUM, 0, world);
     MPI_Reduce(time, timemax_mpi, 5, MPI_DOUBLE, MPI_MAX, 0, world);
 #endif
 
     for (int tid = 0; tid<nthreads; tid++)
       delete sna[tid];
     delete [] sna;
 
   }
 
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
     memory->destroy(map);
   }
 
 }
 
 void PairSNAP::compute(int eflag, int vflag)
 {
   if (use_optimized)
     compute_optimized(eflag, vflag);
   else
     compute_regular(eflag, vflag);
 }
 
 /* ----------------------------------------------------------------------
    This version is a straightforward implementation
    ---------------------------------------------------------------------- */
 
 void PairSNAP::compute_regular(int eflag, int vflag)
 {
   int i,j,jnum,ninside;
   double delx,dely,delz,evdwl,rsq;
   double fij[3];
   int *jlist,*numneigh,**firstneigh;
   evdwl = 0.0;
 
   if (eflag || vflag) ev_setup(eflag,vflag);
   else evflag = vflag_fdotr = 0;
 
   double **x = atom->x;
   double **f = atom->f;
   int *type = atom->type;
   int nlocal = atom->nlocal;
   int newton_pair = force->newton_pair;
   class SNA* snaptr = sna[0];
 
   numneigh = list->numneigh;
   firstneigh = list->firstneigh;
 
   for (int ii = 0; ii < list->inum; ii++) {
     i = list->ilist[ii];
 
     const double xtmp = x[i][0];
     const double ytmp = x[i][1];
     const double ztmp = x[i][2];
     const int itype = type[i];
     const int ielem = map[itype];
     const double radi = radelem[ielem];
 
     jlist = firstneigh[i];
     jnum = numneigh[i];
 
     // insure rij, inside, wj, and rcutij are of size jnum
 
     snaptr->grow_rij(jnum);
 
     // rij[][3] = displacements between atom I and those neighbors
     // inside = indices of neighbors of I within cutoff
     // wj = weights for neighbors of I within cutoff
     // rcutij = cutoffs for neighbors of I within cutoff
     // note Rij sign convention => dU/dRij = dU/dRj = -dU/dRi
 
     ninside = 0;
     for (int jj = 0; jj < jnum; jj++) {
       j = jlist[jj];
       j &= NEIGHMASK;
       delx = x[j][0] - xtmp;
       dely = x[j][1] - ytmp;
       delz = x[j][2] - ztmp;
       rsq = delx*delx + dely*dely + delz*delz;
       int jtype = type[j];
       int jelem = map[jtype];
 
       if (rsq < cutsq[itype][jtype]&&rsq>1e-20) {
 	snaptr->rij[ninside][0] = delx;
 	snaptr->rij[ninside][1] = dely;
 	snaptr->rij[ninside][2] = delz;
 	snaptr->inside[ninside] = j;
 	snaptr->wj[ninside] = wjelem[jelem];
 	snaptr->rcutij[ninside] = (radi + radelem[jelem])*rcutfac;
 	ninside++;
       }
     }
 
     // compute Ui, Zi, and Bi for atom I
 
     snaptr->compute_ui(ninside);
     snaptr->compute_zi();
     if (!gammaoneflag) {
       snaptr->compute_bi();
       snaptr->copy_bi2bvec();
     }
 
     // for neighbors of I within cutoff:
     // compute dUi/drj and dBi/drj
     // Fij = dEi/dRj = -dEi/dRi => add to Fi, subtract from Fj
 
     double* coeffi = coeffelem[ielem];
 
     for (int jj = 0; jj < ninside; jj++) {
       int j = snaptr->inside[jj];
       snaptr->compute_duidrj(snaptr->rij[jj],
 			     snaptr->wj[jj],snaptr->rcutij[jj]);
 
       snaptr->compute_dbidrj();
       snaptr->copy_dbi2dbvec();
 
       fij[0] = 0.0;
       fij[1] = 0.0;
       fij[2] = 0.0;
 
       for (int k = 1; k <= ncoeff; k++) {
 	double bgb;
 	if (gammaoneflag)
 	  bgb = coeffi[k];
 	else bgb = coeffi[k]*
 	       gamma*pow(snaptr->bvec[k-1],gamma-1.0);
 	fij[0] += bgb*snaptr->dbvec[k-1][0];
 	fij[1] += bgb*snaptr->dbvec[k-1][1];
 	fij[2] += bgb*snaptr->dbvec[k-1][2];
       }
 
       f[i][0] += fij[0];
       f[i][1] += fij[1];
       f[i][2] += fij[2];
       f[j][0] -= fij[0];
       f[j][1] -= fij[1];
       f[j][2] -= fij[2];
 
       if (evflag)
 	ev_tally_xyz(i,j,nlocal,newton_pair,0.0,0.0,
 		     fij[0],fij[1],fij[2],
 		     snaptr->rij[jj][0],snaptr->rij[jj][1],
 		     snaptr->rij[jj][2]);
     }
 
     if (eflag) {
 
       // evdwl = energy of atom I, sum over coeffs_k * Bi_k
 
       evdwl = coeffi[0];
       if (gammaoneflag) {
 	snaptr->compute_bi();
 	snaptr->copy_bi2bvec();
 	for (int k = 1; k <= ncoeff; k++)
 	  evdwl += coeffi[k]*snaptr->bvec[k-1];
       } else
       	for (int k = 1; k <= ncoeff; k++)
       	  evdwl += coeffi[k]*pow(snaptr->bvec[k-1],gamma);
       ev_tally_full(i,2.0*evdwl,0.0,0.0,delx,dely,delz);
     }
 
   }
 
   if (vflag_fdotr) virial_fdotr_compute();
 }
 
 
 /* ----------------------------------------------------------------------
    This version is optimized for threading, micro-load balancing
    ---------------------------------------------------------------------- */
 
 void PairSNAP::compute_optimized(int eflag, int vflag)
 {
   // if reneighboring took place do load_balance if requested
   if (do_load_balance > 0 &&
       (neighbor->ncalls != ncalls_neigh)) {
     ghostinum = 0;
     // reset local ghost neighbor lists
     ncalls_neigh = neighbor->ncalls;
     if (ilistmask_max < list->inum) {
       memory->grow(ilistmask,list->inum,"PairSnap::ilistmask");
       ilistmask_max = list->inum;
     }
     for (int i = 0; i < list->inum; i++)
       ilistmask[i] = 1;
 
     //multiple passes for loadbalancing
     for (int i = 0; i < do_load_balance; i++)
       load_balance();
   }
 
   int numpairs = 0;
   for (int ii = 0; ii < list->inum; ii++) {
     if ((do_load_balance <= 0) || ilistmask[ii]) {
       int i = list->ilist[ii];
       int jnum = list->numneigh[i];
       numpairs += jnum;
     }
   }
 
   if (do_load_balance)
     for (int ii = 0; ii < ghostinum; ii++) {
       int i = ghostilist[ii];
       int jnum = ghostnumneigh[i];
       numpairs += jnum;
     }
 
   // optimized schedule setting
 
   int time_dynamic = 0;
   int time_guided = 0;
 
   if (schedule_user == 0) schedule_user = 4;
 
   switch (schedule_user) {
   case 1:
     omp_set_schedule(omp_sched_static,1);
     break;
   case 2:
     omp_set_schedule(omp_sched_dynamic,1);
     break;
   case 3:
     omp_set_schedule(omp_sched_guided,2);
     break;
   case 4:
     omp_set_schedule(omp_sched_auto,0);
     break;
   case 5:
     if (numpairs < 8*nthreads) omp_set_schedule(omp_sched_dynamic,1);
     else if (schedule_time_guided < 0.0) {
       omp_set_schedule(omp_sched_guided,2);
       if (!eflag && !vflag) time_guided = 1;
     } else if (schedule_time_dynamic<0.0) {
       omp_set_schedule(omp_sched_dynamic,1);
       if (!eflag && !vflag) time_dynamic = 1;
     } else if (schedule_time_guided<schedule_time_dynamic)
       omp_set_schedule(omp_sched_guided,2);
     else
       omp_set_schedule(omp_sched_dynamic,1);
     break;
   }
 
   if (use_shared_arrays)
     build_per_atom_arrays();
 
 #if defined(_OPENMP)
 #pragma omp parallel shared(eflag,vflag,time_dynamic,time_guided) firstprivate(numpairs) default(none)
 #endif
   {
     // begin of pragma omp parallel
 
     int tid = omp_get_thread_num();
     int** pairs_tid_unique = NULL;
 
     int** pairs;
     if (use_shared_arrays) pairs = i_pairs;
     else {
       memory->create(pairs_tid_unique,numpairs,4,"numpairs");
       pairs = pairs_tid_unique;
     }
 
     if (!use_shared_arrays) {
       numpairs = 0;
       for (int ii = 0; ii < list->inum; ii++) {
         if ((do_load_balance <= 0) || ilistmask[ii]) {
           int i = list->ilist[ii];
           int jnum = list->numneigh[i];
           for (int jj = 0; jj<jnum; jj++) {
             pairs[numpairs][0] = i;
             pairs[numpairs][1] = jj;
             pairs[numpairs][2] = -1;
             numpairs++;
           }
         }
       }
 
       for (int ii = 0; ii < ghostinum; ii++) {
         int i = ghostilist[ii];
         int jnum = ghostnumneigh[i];
         for (int jj = 0; jj<jnum; jj++) {
           pairs[numpairs][0] = i;
           pairs[numpairs][1] = jj;
           pairs[numpairs][2] = -1;
           numpairs++;
         }
       }
     }
 
     int ielem;
     int jj,k,jnum,jtype,ninside;
     double delx,dely,delz,evdwl,rsq;
     double fij[3];
     int *jlist,*numneigh,**firstneigh;
     evdwl = 0.0;
 
 #if defined(_OPENMP)
 #pragma omp master
 #endif
     {
       if (eflag || vflag) ev_setup(eflag,vflag);
       else evflag = vflag_fdotr = 0;
     }
 
 #if defined(_OPENMP)
 #pragma omp barrier
     { ; }
 #endif
 
     double **x = atom->x;
     double **f = atom->f;
     int *type = atom->type;
     int nlocal = atom->nlocal;
     int newton_pair = force->newton_pair;
 
     numneigh = list->numneigh;
     firstneigh = list->firstneigh;
 
 #ifdef TIMING_INFO
     // only update micro timers after setup
     static int count=0;
     if (count<2) {
       sna[tid]->timers[0] = 0;
       sna[tid]->timers[1] = 0;
       sna[tid]->timers[2] = 0;
       sna[tid]->timers[3] = 0;
       sna[tid]->timers[4] = 0;
     }
     count++;
 #endif
 
     // did thread start working on interactions of new atom
     int iold = -1;
 
     double starttime, endtime;
     if (time_dynamic || time_guided)
       starttime = MPI_Wtime();
 
 #if defined(_OPENMP)
 #pragma omp for schedule(runtime)
 #endif
     for (int iijj = 0; iijj < numpairs; iijj++) {
       int i = 0;
       if (use_shared_arrays) {
         i = i_pairs[iijj][0];
         if (iold != i) {
           set_sna_to_shared(tid,i_pairs[iijj][3]);
 	  ielem = map[type[i]];
 	}
         iold = i;
       } else {
         i = pairs[iijj][0];
         if (iold != i) {
           iold = i;
           const double xtmp = x[i][0];
           const double ytmp = x[i][1];
           const double ztmp = x[i][2];
           const int itype = type[i];
 	  ielem = map[itype];
 	  const double radi = radelem[ielem];
 
           if (i < nlocal) {
             jlist = firstneigh[i];
             jnum = numneigh[i];
           } else {
             jlist = ghostneighs+ghostfirstneigh[i];
             jnum = ghostnumneigh[i];
           }
 
           // insure rij, inside, wj, and rcutij are of size jnum
 
           sna[tid]->grow_rij(jnum);
 
           // rij[][3] = displacements between atom I and those neighbors
           // inside = indices of neighbors of I within cutoff
           // wj = weights of neighbors of I within cutoff
           // rcutij = cutoffs of neighbors of I within cutoff
           // note Rij sign convention => dU/dRij = dU/dRj = -dU/dRi
 
           ninside = 0;
           for (jj = 0; jj < jnum; jj++) {
             int j = jlist[jj];
             j &= NEIGHMASK;
             delx = x[j][0] - xtmp; //unitialised
             dely = x[j][1] - ytmp;
             delz = x[j][2] - ztmp;
             rsq = delx*delx + dely*dely + delz*delz;
             jtype = type[j];
 	    int jelem = map[jtype];
 
             if (rsq < cutsq[itype][jtype]&&rsq>1e-20) { //unitialised
               sna[tid]->rij[ninside][0] = delx;
               sna[tid]->rij[ninside][1] = dely;
               sna[tid]->rij[ninside][2] = delz;
               sna[tid]->inside[ninside] = j;
               sna[tid]->wj[ninside] = wjelem[jelem];
               sna[tid]->rcutij[ninside] = (radi + radelem[jelem])*rcutfac;
 	      ninside++;
 
               // update index list with inside index
               pairs[iijj + (jj - pairs[iijj][1])][2] =
                 ninside-1; //unitialised
             }
           }
 
           // compute Ui and Zi for atom I
 
           sna[tid]->compute_ui(ninside); //unitialised
           sna[tid]->compute_zi();
         }
       }
 
       // for neighbors of I within cutoff:
       // compute dUi/drj and dBi/drj
       // Fij = dEi/dRj = -dEi/dRi => add to Fi, subtract from Fj
 
       // entry into loop if inside index is set
 
       double* coeffi = coeffelem[ielem];
 
       if (pairs[iijj][2] >= 0) {
         jj = pairs[iijj][2];
         int j = sna[tid]->inside[jj];
         sna[tid]->compute_duidrj(sna[tid]->rij[jj],
 				 sna[tid]->wj[jj],sna[tid]->rcutij[jj]);
 
         sna[tid]->compute_dbidrj();
         sna[tid]->copy_dbi2dbvec();
 	if (!gammaoneflag) {
 	  sna[tid]->compute_bi();
 	  sna[tid]->copy_bi2bvec();
 	}
 
         fij[0] = 0.0;
         fij[1] = 0.0;
         fij[2] = 0.0;
 
         for (k = 1; k <= ncoeff; k++) {
 	  double bgb;
 	  if (gammaoneflag)
 	    bgb = coeffi[k];
 	  else bgb = coeffi[k]*
 		 gamma*pow(sna[tid]->bvec[k-1],gamma-1.0);
 	  fij[0] += bgb*sna[tid]->dbvec[k-1][0];
 	  fij[1] += bgb*sna[tid]->dbvec[k-1][1];
 	  fij[2] += bgb*sna[tid]->dbvec[k-1][2];
         }
 
 #if defined(_OPENMP)
 #pragma omp critical
 #endif
         {
           f[i][0] += fij[0];
           f[i][1] += fij[1];
           f[i][2] += fij[2];
           f[j][0] -= fij[0];
           f[j][1] -= fij[1];
           f[j][2] -= fij[2];
           if (evflag)
             ev_tally_xyz(i,j,nlocal,newton_pair,0.0,0.0,
                          fij[0],fij[1],fij[2],
                          sna[tid]->rij[jj][0],sna[tid]->rij[jj][1],
                          sna[tid]->rij[jj][2]);
         }
       }
 
       // evdwl = energy of atom I, sum over coeffs_k * Bi_k
       // only call this for first pair of each atom i
       // if atom has no pairs, eatom=0, which is wrong
 
       if (eflag&&pairs[iijj][1] == 0) {
 	evdwl = coeffi[0];
 	if (gammaoneflag) {
 	  sna[tid]->compute_bi();
 	  sna[tid]->copy_bi2bvec();
 	  for (int k = 1; k <= ncoeff; k++)
 	    evdwl += coeffi[k]*sna[tid]->bvec[k-1];
 	} else
 	  for (int k = 1; k <= ncoeff; k++)
 	    evdwl += coeffi[k]*pow(sna[tid]->bvec[k-1],gamma);
 
 #if defined(_OPENMP)
 #pragma omp critical
 #endif
         ev_tally_full(i,2.0*evdwl,0.0,0.0,delx,dely,delz);
       }
 
     }
     if (time_dynamic || time_guided)
       endtime = MPI_Wtime();
     if (time_dynamic) schedule_time_dynamic = endtime - starttime;
     if (time_guided) schedule_time_guided = endtime - starttime;
     if (!use_shared_arrays) memory->destroy(pairs);
 
   }// end of pragma omp parallel
 
   if (vflag_fdotr) virial_fdotr_compute();
 
 }
 
 inline int PairSNAP::equal(double* x,double* y)
 {
   double dist2 =
     (x[0]-y[0])*(x[0]-y[0]) +
     (x[1]-y[1])*(x[1]-y[1]) +
     (x[2]-y[2])*(x[2]-y[2]);
   if (dist2 < 1e-20) return 1;
   return 0;
 }
 
 inline double PairSNAP::dist2(double* x,double* y)
 {
   return
     (x[0]-y[0])*(x[0]-y[0]) +
     (x[1]-y[1])*(x[1]-y[1]) +
     (x[2]-y[2])*(x[2]-y[2]);
 }
 
 // return extra communication cutoff
 // extra_cutoff = max(subdomain_length)
 
 double PairSNAP::extra_cutoff()
 {
   double sublo[3],subhi[3];
 
   if (domain->triclinic == 0) {
     for (int dim = 0 ; dim < 3 ; dim++) {
       sublo[dim] = domain->sublo[dim];
       subhi[dim] = domain->subhi[dim];
     }
   } else {
     domain->lamda2x(domain->sublo_lamda,sublo);
     domain->lamda2x(domain->subhi_lamda,subhi);
   }
 
   double sub_size[3];
   for (int dim = 0; dim < 3; dim++)
     sub_size[dim] = subhi[dim] - sublo[dim];
 
   double max_sub_size = 0;
   for (int dim = 0; dim < 3; dim++)
     max_sub_size = MAX(max_sub_size,sub_size[dim]);
 
   // note: for triclinic, probably need something different
   // see Comm::setup()
 
   return max_sub_size;
 }
 
 // micro load_balancer: each MPI process will
 // check with each of its 26 neighbors,
 // whether an imbalance exists in the number
 // of atoms to calculate forces for.
 // If it does it will set ilistmask of one of
 // its local atoms to zero, and send its Tag
 // to the neighbor process. The neighboring process
 // will check its ghost list for the
 // ghost atom with the same Tag which is closest
 // to its domain center, and build a
 // neighborlist for this ghost atom. For this to work,
 // the communication cutoff has to be
 // as large as the neighbor cutoff +
 // maximum subdomain length.
 
 // Note that at most one atom is exchanged per processor pair.
 
 // Also note that the local atom assignment
 // doesn't change. This load balancer will cause
 // some ghost atoms to have full neighborlists
 // which are unique to PairSNAP.
 // They are not part of the generally accessible neighborlist.
 // At the same time corresponding local atoms on
 // other MPI processes will not be
 // included in the force computation since
 // their ilistmask is 0. This does not effect
 // any other classes which might
 // access the same general neighborlist.
 // Reverse communication (newton on) of forces is required.
 
 // Currently the load balancer does two passes,
 // since its exchanging atoms upstream and downstream.
 
 void PairSNAP::load_balance()
 {
   double sublo[3],subhi[3];
   if (domain->triclinic == 0) {
     double* sublotmp = domain->sublo;
     double* subhitmp = domain->subhi;
     for (int dim = 0 ; dim<3 ; dim++) {
       sublo[dim]=sublotmp[dim];
       subhi[dim]=subhitmp[dim];
     }
   } else {
     double* sublotmp = domain->sublo_lamda;
     double* subhitmp = domain->subhi_lamda;
     domain->lamda2x(sublotmp,sublo);
     domain->lamda2x(subhitmp,subhi);
   }
 
   //if (list->inum==0) list->grow(atom->nmax);
 
   int nlocal = ghostinum;
   for (int i=0; i < list->inum; i++)
     if (ilistmask[i]) nlocal++;
   int ***grid2proc = comm->grid2proc;
   int* procgrid = comm->procgrid;
 
   int nlocal_up,nlocal_down;
   MPI_Request request;
 
   double sub_mid[3];
   for (int dim=0; dim<3; dim++)
     sub_mid[dim] = (subhi[dim] + sublo[dim])/2;
 
   if (comm->cutghostuser <
       neighbor->cutneighmax+extra_cutoff())
     error->all(FLERR,"Communication cutoff too small for SNAP micro load balancing");
 
   int nrecv = ghostinum;
   int totalsend = 0;
   int nsend = 0;
   int depth = 1;
 
   for (int dx = -depth; dx < depth+1; dx++)
     for (int dy = -depth; dy < depth+1; dy++)
       for (int dz = -depth; dz < depth+1; dz++) {
 
         if (dx == dy && dy == dz && dz == 0) continue;
 
         int sendloc[3] = {comm->myloc[0],
                           comm->myloc[1], comm->myloc[2]
                          };
         sendloc[0] += dx;
         sendloc[1] += dy;
         sendloc[2] += dz;
         for (int dim = 0; dim < 3; dim++)
           if (sendloc[dim] >= procgrid[dim])
             sendloc[dim] = sendloc[dim] - procgrid[dim];
         for (int dim = 0; dim < 3; dim++)
           if (sendloc[dim] < 0)
             sendloc[dim] = procgrid[dim] + sendloc[dim];
         int recvloc[3] = {comm->myloc[0],
                           comm->myloc[1], comm->myloc[2]
                          };
         recvloc[0] -= dx;
         recvloc[1] -= dy;
         recvloc[2] -= dz;
         for (int dim = 0; dim < 3; dim++)
           if (recvloc[dim] < 0)
             recvloc[dim] = procgrid[dim] + recvloc[dim];
         for (int dim = 0; dim < 3; dim++)
           if (recvloc[dim] >= procgrid[dim])
             recvloc[dim] = recvloc[dim] - procgrid[dim];
 
         int sendproc = grid2proc[sendloc[0]][sendloc[1]][sendloc[2]];
         int recvproc = grid2proc[recvloc[0]][recvloc[1]][recvloc[2]];
 
         // two stage process, first upstream movement, then downstream
 
         MPI_Sendrecv(&nlocal,1,MPI_INT,sendproc,0,
                      &nlocal_up,1,MPI_INT,recvproc,0,world,MPI_STATUS_IGNORE);
         MPI_Sendrecv(&nlocal,1,MPI_INT,recvproc,0,
                      &nlocal_down,1,MPI_INT,sendproc,0,world,MPI_STATUS_IGNORE);
         nsend = 0;
 
         // send upstream
 
         if (nlocal > nlocal_up+1) {
 
           int i = totalsend++;
           while(i < list->inum && ilistmask[i] == 0)
             i = totalsend++;
 
           if (i < list->inum)
             MPI_Isend(&atom->tag[i],1,MPI_INT,recvproc,0,world,&request);
           else {
             int j = -1;
             MPI_Isend(&j,1,MPI_INT,recvproc,0,world,&request);
           }
 
           if (i < list->inum) {
             for (int j = 0; j < list->inum; j++)
               if (list->ilist[j] == i)
                 ilistmask[j] = 0;
             nsend = 1;
           }
         }
 
         // recv downstream
 
         if (nlocal < nlocal_down-1) {
           nlocal++;
           int get_tag = -1;
           MPI_Recv(&get_tag,1,MPI_INT,sendproc,0,world,MPI_STATUS_IGNORE);
 
           // if get_tag -1 the other process didnt have local atoms to send
 
           if (get_tag >= 0) {
             if (ghostinum >= ghostilist_max) {
               memory->grow(ghostilist,ghostinum+10,
                            "PairSnap::ghostilist");
               ghostilist_max = ghostinum+10;
             }
             if (atom->nlocal + atom->nghost >= ghostnumneigh_max) {
               ghostnumneigh_max = atom->nlocal+atom->nghost+100;
               memory->grow(ghostnumneigh,ghostnumneigh_max,
                            "PairSnap::ghostnumneigh");
               memory->grow(ghostfirstneigh,ghostnumneigh_max,
                            "PairSnap::ghostfirstneigh");
             }
 
             // find closest ghost image of the transfered particle
 
             double mindist = 1e200;
             int closestghost = -1;
             for (int j = 0; j < atom->nlocal + atom->nghost; j++)
               if (atom->tag[j] == get_tag)
                 if (dist2(sub_mid, atom->x[j]) < mindist) {
                   closestghost = j;
                   mindist = dist2(sub_mid, atom->x[j]);
                 }
 
             // build neighborlist for this particular
             // ghost atom, and add it to list->ilist
 
             if (ghostneighs_max - ghostneighs_total <
                 neighbor->oneatom) {
               memory->grow(ghostneighs,
                            ghostneighs_total + neighbor->oneatom,
                            "PairSnap::ghostneighs");
               ghostneighs_max = ghostneighs_total + neighbor->oneatom;
             }
 
             int j = closestghost;
 
             ghostilist[ghostinum] = j;
             ghostnumneigh[j] = 0;
             ghostfirstneigh[j] = ghostneighs_total;
 
             ghostinum++;
             int* jlist = ghostneighs + ghostfirstneigh[j];
 
             // find all neighbors by looping
             // over all local and ghost atoms
 
             for (int k = 0; k < atom->nlocal + atom->nghost; k++)
               if (dist2(atom->x[j],atom->x[k]) <
                   neighbor->cutneighmax*neighbor->cutneighmax) {
                 jlist[ghostnumneigh[j]] = k;
                 ghostnumneigh[j]++;
                 ghostneighs_total++;
               }
           }
 
           if (get_tag >= 0) nrecv++;
         }
 
         // decrease nlocal later, so that it is the
         // initial number both for receiving and sending
 
         if (nsend) nlocal--;
 
         // second pass through the grid
 
         MPI_Sendrecv(&nlocal,1,MPI_INT,sendproc,0,
                      &nlocal_up,1,MPI_INT,recvproc,0,world,MPI_STATUS_IGNORE);
         MPI_Sendrecv(&nlocal,1,MPI_INT,recvproc,0,
                      &nlocal_down,1,MPI_INT,sendproc,0,world,MPI_STATUS_IGNORE);
 
         // send downstream
 
         nsend=0;
         if (nlocal > nlocal_down+1) {
           int i = totalsend++;
           while(i < list->inum && ilistmask[i]==0) i = totalsend++;
 
           if (i < list->inum)
             MPI_Isend(&atom->tag[i],1,MPI_INT,sendproc,0,world,&request);
           else {
             int j =- 1;
             MPI_Isend(&j,1,MPI_INT,sendproc,0,world,&request);
           }
 
           if (i < list->inum) {
             for (int j=0; j<list->inum; j++)
               if (list->ilist[j] == i) ilistmask[j] = 0;
             nsend = 1;
           }
         }
 
         // receive upstream
 
         if (nlocal < nlocal_up-1) {
           nlocal++;
           int get_tag = -1;
 
           MPI_Recv(&get_tag,1,MPI_INT,recvproc,0,world,MPI_STATUS_IGNORE);
 
           if (get_tag >= 0) {
             if (ghostinum >= ghostilist_max) {
               memory->grow(ghostilist,ghostinum+10,
                            "PairSnap::ghostilist");
               ghostilist_max = ghostinum+10;
             }
             if (atom->nlocal + atom->nghost >= ghostnumneigh_max) {
               ghostnumneigh_max = atom->nlocal + atom->nghost + 100;
               memory->grow(ghostnumneigh,ghostnumneigh_max,
                            "PairSnap::ghostnumneigh");
               memory->grow(ghostfirstneigh,ghostnumneigh_max,
                            "PairSnap::ghostfirstneigh");
             }
 
             // find closest ghost image of the transfered particle
 
             double mindist = 1e200;
             int closestghost = -1;
             for (int j = 0; j < atom->nlocal + atom->nghost; j++)
               if (atom->tag[j] == get_tag)
                 if (dist2(sub_mid,atom->x[j])<mindist) {
                   closestghost = j;
                   mindist = dist2(sub_mid,atom->x[j]);
                 }
 
             // build neighborlist for this particular ghost atom
 
             if (ghostneighs_max-ghostneighs_total < neighbor->oneatom) {
               memory->grow(ghostneighs,ghostneighs_total + neighbor->oneatom,
                            "PairSnap::ghostneighs");
               ghostneighs_max = ghostneighs_total + neighbor->oneatom;
             }
 
             int j = closestghost;
 
             ghostilist[ghostinum] = j;
             ghostnumneigh[j] = 0;
             ghostfirstneigh[j] = ghostneighs_total;
 
             ghostinum++;
             int* jlist = ghostneighs + ghostfirstneigh[j];
 
             for (int k = 0; k < atom->nlocal + atom->nghost; k++)
               if (dist2(atom->x[j],atom->x[k]) <
                   neighbor->cutneighmax*neighbor->cutneighmax) {
                 jlist[ghostnumneigh[j]] = k;
                 ghostnumneigh[j]++;
                 ghostneighs_total++;
               }
           }
 
           if (get_tag >= 0) nrecv++;
         }
         if (nsend) nlocal--;
       }
 }
 
 void PairSNAP::set_sna_to_shared(int snaid,int i)
 {
   sna[snaid]->rij = i_rij[i];
   sna[snaid]->inside = i_inside[i];
   sna[snaid]->wj = i_wj[i];
   sna[snaid]->rcutij = i_rcutij[i];
   sna[snaid]->zarray_r = i_zarray_r[i];
   sna[snaid]->zarray_i = i_zarray_i[i];
   sna[snaid]->uarraytot_r = i_uarraytot_r[i];
   sna[snaid]->uarraytot_i = i_uarraytot_i[i];
 }
 
 void PairSNAP::build_per_atom_arrays()
 {
 
 #ifdef TIMING_INFO
   clock_gettime(CLOCK_REALTIME,&starttime);
 #endif
 
   int count = 0;
   int neighmax = 0;
   for (int ii = 0; ii < list->inum; ii++)
     if ((do_load_balance <= 0) || ilistmask[ii]) {
       neighmax=MAX(neighmax,list->numneigh[list->ilist[ii]]);
       ++count;
     }
   for (int ii = 0; ii < ghostinum; ii++) {
     neighmax=MAX(neighmax,ghostnumneigh[ghostilist[ii]]);
     ++count;
   }
 
   if (i_max < count || i_neighmax < neighmax) {
     int i_maxt = MAX(count,i_max);
     i_neighmax = MAX(neighmax,i_neighmax);
     memory->destroy(i_rij);
     memory->destroy(i_inside);
     memory->destroy(i_wj);
     memory->destroy(i_rcutij);
     memory->destroy(i_ninside);
     memory->destroy(i_pairs);
     memory->create(i_rij,i_maxt,i_neighmax,3,"PairSNAP::i_rij");
     memory->create(i_inside,i_maxt,i_neighmax,"PairSNAP::i_inside");
     memory->create(i_wj,i_maxt,i_neighmax,"PairSNAP::i_wj");
     memory->create(i_rcutij,i_maxt,i_neighmax,"PairSNAP::i_rcutij");
     memory->create(i_ninside,i_maxt,"PairSNAP::i_ninside");
     memory->create(i_pairs,i_maxt*i_neighmax,4,"PairSNAP::i_pairs");
   }
 
   if (i_max < count) {
     int jdim = sna[0]->twojmax+1;
     memory->destroy(i_uarraytot_r);
     memory->destroy(i_uarraytot_i);
     memory->create(i_uarraytot_r,count,jdim,jdim,jdim,
                    "PairSNAP::i_uarraytot_r");
     memory->create(i_uarraytot_i,count,jdim,jdim,jdim,
                    "PairSNAP::i_uarraytot_i");
     if (i_zarray_r != NULL)
       for (int i = 0; i < i_max; i++) {
         memory->destroy(i_zarray_r[i]);
         memory->destroy(i_zarray_i[i]);
       }
 
     delete [] i_zarray_r;
     delete [] i_zarray_i;
     i_zarray_r = new double*****[count];
     i_zarray_i = new double*****[count];
     for (int i = 0; i < count; i++) {
       memory->create(i_zarray_r[i],jdim,jdim,jdim,jdim,jdim,
                      "PairSNAP::i_zarray_r");
       memory->create(i_zarray_i[i],jdim,jdim,jdim,jdim,jdim,
                      "PairSNAP::i_zarray_i");
     }
   }
 
   if (i_max < count)
     i_max = count;
 
   count = 0;
   i_numpairs = 0;
   for (int ii = 0; ii < list->inum; ii++) {
     if ((do_load_balance <= 0) || ilistmask[ii]) {
       int i = list->ilist[ii];
       int jnum = list->numneigh[i];
       int* jlist = list->firstneigh[i];
       const double xtmp = atom->x[i][0];
       const double ytmp = atom->x[i][1];
       const double ztmp = atom->x[i][2];
       const int itype = atom->type[i];
       const int ielem = map[itype];
       const double radi = radelem[ielem];
       int ninside = 0;
       for (int jj = 0; jj < jnum; jj++) {
         int j = jlist[jj];
         j &= NEIGHMASK;
         const double delx = atom->x[j][0] - xtmp;
         const double dely = atom->x[j][1] - ytmp;
         const double delz = atom->x[j][2] - ztmp;
         const double rsq = delx*delx + dely*dely + delz*delz;
         int jtype = atom->type[j];
 	int jelem = map[jtype];
 
         i_pairs[i_numpairs][0] = i;
         i_pairs[i_numpairs][1] = jj;
         i_pairs[i_numpairs][2] = -1;
         i_pairs[i_numpairs][3] = count;
         if (rsq < cutsq[itype][jtype]&&rsq>1e-20) {
           i_rij[count][ninside][0] = delx;
           i_rij[count][ninside][1] = dely;
           i_rij[count][ninside][2] = delz;
           i_inside[count][ninside] = j;
           i_wj[count][ninside] = wjelem[jelem];
           i_rcutij[count][ninside] = (radi + radelem[jelem])*rcutfac;
 
           // update index list with inside index
           i_pairs[i_numpairs][2] = ninside++;
         }
         i_numpairs++;
       }
       i_ninside[count] = ninside;
       count++;
     }
   }
 
   for (int ii = 0; ii < ghostinum; ii++) {
     int i = ghostilist[ii];
     int jnum = ghostnumneigh[i];
     int* jlist = ghostneighs+ghostfirstneigh[i];
     const double xtmp = atom->x[i][0];
     const double ytmp = atom->x[i][1];
     const double ztmp = atom->x[i][2];
     const int itype = atom->type[i];
     const int ielem = map[itype];
     const double radi = radelem[ielem];
     int ninside = 0;
 
     for (int jj = 0; jj < jnum; jj++) {
       int j = jlist[jj];
       j &= NEIGHMASK;
       const double delx = atom->x[j][0] - xtmp;
       const double dely = atom->x[j][1] - ytmp;
       const double delz = atom->x[j][2] - ztmp;
       const double rsq = delx*delx + dely*dely + delz*delz;
       int jtype = atom->type[j];
       int jelem = map[jtype];
 
       i_pairs[i_numpairs][0] = i;
       i_pairs[i_numpairs][1] = jj;
       i_pairs[i_numpairs][2] = -1;
       i_pairs[i_numpairs][3] = count;
       if (rsq < cutsq[itype][jtype]&&rsq>1e-20) {
         i_rij[count][ninside][0] = delx;
         i_rij[count][ninside][1] = dely;
         i_rij[count][ninside][2] = delz;
         i_inside[count][ninside] = j;
         i_wj[count][ninside] = wjelem[jelem];
         i_rcutij[count][ninside] = (radi + radelem[jelem])*rcutfac;
         // update index list with inside index
         i_pairs[i_numpairs][2] = ninside++;
       }
       i_numpairs++;
     }
     i_ninside[count] = ninside;
     count++;
   }
 #ifdef TIMING_INFO
   clock_gettime(CLOCK_REALTIME,&endtime);
   timers[0]+=(endtime.tv_sec-starttime.tv_sec+1.0*
               (endtime.tv_nsec-starttime.tv_nsec)/1000000000);
 #endif
 #ifdef TIMING_INFO
   clock_gettime(CLOCK_REALTIME,&starttime);
 #endif
 
 #if defined(_OPENMP)
 #pragma omp parallel for shared(count) default(none)
 #endif
   for (int ii=0; ii < count; ii++) {
     int tid = omp_get_thread_num();
     set_sna_to_shared(tid,ii);
     //sna[tid]->compute_ui(i_ninside[ii]);
 #ifdef TIMING_INFO
     clock_gettime(CLOCK_REALTIME,&starttime);
 #endif
     sna[tid]->compute_ui_omp(i_ninside[ii],MAX(int(nthreads/count),1));
 #ifdef TIMING_INFO
     clock_gettime(CLOCK_REALTIME,&endtime);
     sna[tid]->timers[0]+=(endtime.tv_sec-starttime.tv_sec+1.0*
                           (endtime.tv_nsec-starttime.tv_nsec)/1000000000);
 #endif
   }
 
 #ifdef TIMING_INFO
   clock_gettime(CLOCK_REALTIME,&starttime);
 #endif
   for (int ii=0; ii < count; ii++) {
     int tid = 0;//omp_get_thread_num();
     set_sna_to_shared(tid,ii);
     sna[tid]->compute_zi_omp(MAX(int(nthreads/count),1));
   }
 #ifdef TIMING_INFO
   clock_gettime(CLOCK_REALTIME,&endtime);
   sna[0]->timers[1]+=(endtime.tv_sec-starttime.tv_sec+1.0*
                       (endtime.tv_nsec-starttime.tv_nsec)/1000000000);
 #endif
 
 #ifdef TIMING_INFO
   clock_gettime(CLOCK_REALTIME,&endtime);
   timers[1]+=(endtime.tv_sec-starttime.tv_sec+1.0*
               (endtime.tv_nsec-starttime.tv_nsec)/1000000000);
 #endif
 }
 
 /* ----------------------------------------------------------------------
    allocate all arrays
 ------------------------------------------------------------------------- */
 
 void PairSNAP::allocate()
 {
   allocated = 1;
   int n = atom->ntypes;
 
   memory->create(setflag,n+1,n+1,"pair:setflag");
   memory->create(cutsq,n+1,n+1,"pair:cutsq");
   memory->create(map,n+1,"pair:map");
 }
 
 /* ----------------------------------------------------------------------
    global settings
 ------------------------------------------------------------------------- */
 
 void PairSNAP::settings(int narg, char **arg)
 {
 
   // set default values for optional arguments
 
   nthreads = -1;
   use_shared_arrays=-1;
   do_load_balance = 0;
   use_optimized = 1;
 
   // optional arguments
 
   for (int i=0; i < narg; i++) {
     if (i+2>narg) error->all(FLERR,"Illegal pair_style command");
     if (strcmp(arg[i],"nthreads")==0) {
       nthreads=force->inumeric(FLERR,arg[++i]);
 #if defined(LMP_USER_OMP)
       error->all(FLERR,"Must set number of threads via package omp command");
 #else
       omp_set_num_threads(nthreads);
       comm->nthreads=nthreads;
 #endif
       continue;
     }
     if (strcmp(arg[i],"optimized")==0) {
       use_optimized=force->inumeric(FLERR,arg[++i]);
       continue;
     }
     if (strcmp(arg[i],"shared")==0) {
       use_shared_arrays=force->inumeric(FLERR,arg[++i]);
       continue;
     }
     if (strcmp(arg[i],"loadbalance")==0) {
       do_load_balance = force->inumeric(FLERR,arg[++i]);
       if (do_load_balance) {
 	double mincutoff = extra_cutoff() +
 	  rcutmax + neighbor->skin;
 	if (comm->cutghostuser < mincutoff) {
 	  char buffer[255];
 
 	  //apparently mincutoff is 0 after sprintf command ?????
 
 	  double tmp = mincutoff + 0.1;
 	  sprintf(buffer, "Communication cutoff is too small "
 		  "for SNAP micro load balancing, increased to %lf",
 		  mincutoff+0.1);
 	  if (comm->me==0)
 	    error->warning(FLERR,buffer);
 
 	  comm->cutghostuser = tmp;
 
 	}
       }
       continue;
     }
     if (strcmp(arg[i],"schedule")==0) {
       i++;
       if (strcmp(arg[i],"static")==0)
 	schedule_user = 1;
       if (strcmp(arg[i],"dynamic")==0)
 	schedule_user = 2;
       if (strcmp(arg[i],"guided")==0)
 	schedule_user = 3;
       if (strcmp(arg[i],"auto")==0)
 	schedule_user = 4;
       if (strcmp(arg[i],"determine")==0)
 	schedule_user = 5;
       if (schedule_user == 0)
 	error->all(FLERR,"Illegal pair_style command");
       continue;
     }
     error->all(FLERR,"Illegal pair_style command");
   }
 
   if (nthreads < 0)
     nthreads = comm->nthreads;
 
   if (use_shared_arrays < 0) {
     if (nthreads > 1 && atom->nlocal <= 2*nthreads)
       use_shared_arrays = 1;
     else use_shared_arrays = 0;
   }
 
   // check if running non-optimized code with
   // optimization flags set
 
   if (!use_optimized)
     if (nthreads > 1 ||
 	use_shared_arrays ||
 	do_load_balance ||
 	schedule_user)
       error->all(FLERR,"Illegal pair_style command");
 }
 
 /* ----------------------------------------------------------------------
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
 void PairSNAP::coeff(int narg, char **arg)
 {
   // read SNAP element names between 2 filenames
   // nelements = # of SNAP elements
   // elements = list of unique element names
 
   if (narg < 6) error->all(FLERR,"Incorrect args for pair coefficients");
   if (!allocated) allocate();
 
   if (nelements) {
     for (int i = 0; i < nelements; i++)
       delete[] elements[i];
     delete[] elements;
     memory->destroy(radelem);
     memory->destroy(wjelem);
     memory->destroy(coeffelem);
   }
 
   nelements = narg - 4 - atom->ntypes;
   if (nelements < 1) error->all(FLERR,"Incorrect args for pair coefficients");
 
   char* type1 = arg[0];
   char* type2 = arg[1];
   char* coefffilename = arg[2];
   char** elemlist = &arg[3];
   char* paramfilename = arg[3+nelements];
   char** elemtypes = &arg[4+nelements];
 
   // insure I,J args are * *
 
   if (strcmp(type1,"*") != 0 || strcmp(type2,"*") != 0)
     error->all(FLERR,"Incorrect args for pair coefficients");
 
   elements = new char*[nelements];
 
   for (int i = 0; i < nelements; i++) {
     char* elemname = elemlist[i];
     int n = strlen(elemname) + 1;
     elements[i] = new char[n];
     strcpy(elements[i],elemname);
   }
 
   // read snapcoeff and snapparam files
 
   read_files(coefffilename,paramfilename);
 
   // read args that map atom types to SNAP elements
   // map[i] = which element the Ith atom type is, -1 if not mapped
   // map[0] is not used
 
   for (int i = 1; i <= atom->ntypes; i++) {
     char* elemname = elemtypes[i-1];
     int jelem;
     for (jelem = 0; jelem < nelements; jelem++)
       if (strcmp(elemname,elements[jelem]) == 0)
 	break;
 
     if (jelem < nelements)
       map[i] = jelem;
     else if (strcmp(elemname,"NULL") == 0) map[i] = -1;
     else error->all(FLERR,"Incorrect args for pair coefficients");
   }
 
   // clear setflag since coeff() called once with I,J = * *
 
   int n = atom->ntypes;
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       setflag[i][j] = 0;
 
   // set setflag i,j for type pairs where both are mapped to elements
 
   int count = 0;
   for (int i = 1; i <= n; i++)
     for (int j = i; j <= n; j++)
       if (map[i] >= 0 && map[j] >= 0) {
         setflag[i][j] = 1;
         count++;
       }
 
   if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
 
   sna = new SNA*[nthreads];
 
   // allocate memory for per OpenMP thread data which
   // is wrapped into the sna class
 
 #if defined(_OPENMP)
 #pragma omp parallel default(none)
 #endif
   {
     int tid = omp_get_thread_num();
     sna[tid] = new SNA(lmp,rfac0,twojmax,
                        diagonalstyle,use_shared_arrays,
-		       rmin0,switchflag);
+		       rmin0,switchflag,bzeroflag);
     if (!use_shared_arrays)
       sna[tid]->grow_rij(nmax);
   }
 
   if (ncoeff != sna[0]->ncoeff) {
     printf("ncoeff = %d snancoeff = %d \n",ncoeff,sna[0]->ncoeff);
     error->all(FLERR,"Incorrect SNAP parameter file");
   }
 
   // Calculate maximum cutoff for all elements
 
   rcutmax = 0.0;
   for (int ielem = 0; ielem < nelements; ielem++)
     rcutmax = MAX(2.0*radelem[ielem]*rcutfac,rcutmax);
 
 }
 
 /* ----------------------------------------------------------------------
    init specific to this pair style
 ------------------------------------------------------------------------- */
 
 void PairSNAP::init_style()
 {
   if (force->newton_pair == 0)
     error->all(FLERR,"Pair style SNAP requires newton pair on");
 
   // need a full neighbor list
 
   int irequest = neighbor->request(this,instance_me);
   neighbor->requests[irequest]->half = 0;
   neighbor->requests[irequest]->full = 1;
 
 #if defined(_OPENMP)
 #pragma omp parallel default(none)
 #endif
   {
     int tid = omp_get_thread_num();
     sna[tid]->init();
   }
 
 }
 
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 
 double PairSNAP::init_one(int i, int j)
 {
   if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set");
   return (radelem[map[i]] +
   	  radelem[map[j]])*rcutfac;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void PairSNAP::read_files(char *coefffilename, char *paramfilename)
 {
 
   // open SNAP ceofficient file on proc 0
 
   FILE *fpcoeff;
   if (comm->me == 0) {
     fpcoeff = force->open_potential(coefffilename);
     if (fpcoeff == NULL) {
       char str[128];
       sprintf(str,"Cannot open SNAP coefficient file %s",coefffilename);
       error->one(FLERR,str);
     }
   }
 
   char line[MAXLINE],*ptr;
   int eof = 0;
 
   int n;
   int nwords = 0;
   while (nwords == 0) {
     if (comm->me == 0) {
       ptr = fgets(line,MAXLINE,fpcoeff);
       if (ptr == NULL) {
         eof = 1;
         fclose(fpcoeff);
       } else n = strlen(line) + 1;
     }
     MPI_Bcast(&eof,1,MPI_INT,0,world);
     if (eof) break;
     MPI_Bcast(&n,1,MPI_INT,0,world);
     MPI_Bcast(line,n,MPI_CHAR,0,world);
 
     // strip comment, skip line if blank
 
     if ((ptr = strchr(line,'#'))) *ptr = '\0';
     nwords = atom->count_words(line);
   }
   if (nwords != 2)
     error->all(FLERR,"Incorrect format in SNAP coefficient file");
 
   // words = ptrs to all words in line
   // strip single and double quotes from words
 
   char* words[MAXWORD];
   int iword = 0;
   words[iword] = strtok(line,"' \t\n\r\f");
   iword = 1;
   words[iword] = strtok(NULL,"' \t\n\r\f");
 
   int nelemfile = atoi(words[0]);
   ncoeff = atoi(words[1])-1;
 
   // Set up element lists
 
   memory->create(radelem,nelements,"pair:radelem");
   memory->create(wjelem,nelements,"pair:wjelem");
   memory->create(coeffelem,nelements,ncoeff+1,"pair:coeffelem");
 
   int *found = new int[nelements];
   for (int ielem = 0; ielem < nelements; ielem++)
     found[ielem] = 0;
 
   // Loop over elements in the SNAP coefficient file
 
   for (int ielemfile = 0; ielemfile < nelemfile; ielemfile++) {
 
     if (comm->me == 0) {
       ptr = fgets(line,MAXLINE,fpcoeff);
       if (ptr == NULL) {
 	eof = 1;
 	fclose(fpcoeff);
       } else n = strlen(line) + 1;
     }
     MPI_Bcast(&eof,1,MPI_INT,0,world);
     if (eof)
       error->all(FLERR,"Incorrect format in SNAP coefficient file");
     MPI_Bcast(&n,1,MPI_INT,0,world);
     MPI_Bcast(line,n,MPI_CHAR,0,world);
 
     nwords = atom->count_words(line);
     if (nwords != 3)
       error->all(FLERR,"Incorrect format in SNAP coefficient file");
 
     iword = 0;
     words[iword] = strtok(line,"' \t\n\r\f");
     iword = 1;
     words[iword] = strtok(NULL,"' \t\n\r\f");
     iword = 2;
     words[iword] = strtok(NULL,"' \t\n\r\f");
 
     char* elemtmp = words[0];
     double radtmp = atof(words[1]);
     double wjtmp = atof(words[2]);
 
     // skip if element name isn't in element list
 
     int ielem;
     for (ielem = 0; ielem < nelements; ielem++)
       if (strcmp(elemtmp,elements[ielem]) == 0) break;
     if (ielem == nelements) {
       if (comm->me == 0)
 	for (int icoeff = 0; icoeff <= ncoeff; icoeff++)
 	  ptr = fgets(line,MAXLINE,fpcoeff);
       continue;
     }
 
     // skip if element already appeared
 
     if (found[ielem]) {
       if (comm->me == 0)
 	for (int icoeff = 0; icoeff <= ncoeff; icoeff++)
 	  ptr = fgets(line,MAXLINE,fpcoeff);
       continue;
     }
 
     found[ielem] = 1;
     radelem[ielem] = radtmp;
     wjelem[ielem] = wjtmp;
 
 
     if (comm->me == 0) {
       if (screen) fprintf(screen,"SNAP Element = %s, Radius %g, Weight %g \n",
 			  elements[ielem], radelem[ielem], wjelem[ielem]);
       if (logfile) fprintf(logfile,"SNAP Element = %s, Radius %g, Weight %g \n",
 			  elements[ielem], radelem[ielem], wjelem[ielem]);
     }
 
     for (int icoeff = 0; icoeff <= ncoeff; icoeff++) {
       if (comm->me == 0) {
 	ptr = fgets(line,MAXLINE,fpcoeff);
 	if (ptr == NULL) {
 	  eof = 1;
 	  fclose(fpcoeff);
 	} else n = strlen(line) + 1;
       }
 
       MPI_Bcast(&eof,1,MPI_INT,0,world);
       if (eof)
 	error->all(FLERR,"Incorrect format in SNAP coefficient file");
       MPI_Bcast(&n,1,MPI_INT,0,world);
       MPI_Bcast(line,n,MPI_CHAR,0,world);
 
       nwords = atom->count_words(line);
       if (nwords != 1)
 	error->all(FLERR,"Incorrect format in SNAP coefficient file");
 
       iword = 0;
       words[iword] = strtok(line,"' \t\n\r\f");
 
       coeffelem[ielem][icoeff] = atof(words[0]);
 
     }
   }
 
   // set flags for required keywords
 
   rcutfacflag = 0;
   twojmaxflag = 0;
 
   // Set defaults for optional keywords
 
   gamma = 1.0;
   gammaoneflag = 1;
   rfac0 = 0.99363;
   rmin0 = 0.0;
   diagonalstyle = 3;
   switchflag = 1;
+  bzeroflag = 0;
   // open SNAP parameter file on proc 0
 
   FILE *fpparam;
   if (comm->me == 0) {
     fpparam = force->open_potential(paramfilename);
     if (fpparam == NULL) {
       char str[128];
       sprintf(str,"Cannot open SNAP parameter file %s",paramfilename);
       error->one(FLERR,str);
     }
   }
 
   eof = 0;
   while (1) {
     if (comm->me == 0) {
       ptr = fgets(line,MAXLINE,fpparam);
       if (ptr == NULL) {
         eof = 1;
         fclose(fpparam);
       } else n = strlen(line) + 1;
     }
     MPI_Bcast(&eof,1,MPI_INT,0,world);
     if (eof) break;
     MPI_Bcast(&n,1,MPI_INT,0,world);
     MPI_Bcast(line,n,MPI_CHAR,0,world);
 
     // strip comment, skip line if blank
 
     if ((ptr = strchr(line,'#'))) *ptr = '\0';
     nwords = atom->count_words(line);
     if (nwords == 0) continue;
 
     if (nwords != 2)
       error->all(FLERR,"Incorrect format in SNAP parameter file");
 
     // words = ptrs to all words in line
     // strip single and double quotes from words
 
     char* keywd = strtok(line,"' \t\n\r\f");
     char* keyval = strtok(NULL,"' \t\n\r\f");
 
     if (comm->me == 0) {
       if (screen) fprintf(screen,"SNAP keyword %s %s \n",keywd,keyval);
       if (logfile) fprintf(logfile,"SNAP keyword %s %s \n",keywd,keyval);
     }
 
     if (strcmp(keywd,"rcutfac") == 0) {
       rcutfac = atof(keyval);
       rcutfacflag = 1;
     } else if (strcmp(keywd,"twojmax") == 0) {
       twojmax = atoi(keyval);
       twojmaxflag = 1;
     } else if (strcmp(keywd,"gamma") == 0)
       gamma = atof(keyval);
     else if (strcmp(keywd,"rfac0") == 0)
       rfac0 = atof(keyval);
     else if (strcmp(keywd,"rmin0") == 0)
       rmin0 = atof(keyval);
     else if (strcmp(keywd,"diagonalstyle") == 0)
       diagonalstyle = atoi(keyval);
     else if (strcmp(keywd,"switchflag") == 0)
       switchflag = atoi(keyval);
+    else if (strcmp(keywd,"bzeroflag") == 0)
+      bzeroflag = atoi(keyval);
     else
       error->all(FLERR,"Incorrect SNAP parameter file");
   }
 
   if (rcutfacflag == 0 || twojmaxflag == 0)
     error->all(FLERR,"Incorrect SNAP parameter file");
 
   if (gamma == 1.0) gammaoneflag = 1;
   else gammaoneflag = 0;
 
   delete[] found;
 }
 
 /* ----------------------------------------------------------------------
    memory usage
 ------------------------------------------------------------------------- */
 
 double PairSNAP::memory_usage()
 {
   double bytes = Pair::memory_usage();
   int n = atom->ntypes+1;
   bytes += n*n*sizeof(int);
   bytes += n*n*sizeof(double);
   bytes += 3*nmax*sizeof(double);
   bytes += nmax*sizeof(int);
   bytes += (2*ncoeff+1)*sizeof(double);
   bytes += (ncoeff*3)*sizeof(double);
   bytes += sna[0]->memory_usage()*nthreads;
   return bytes;
 }
 
diff --git a/src/SNAP/pair_snap.h b/src/SNAP/pair_snap.h
index a6395bfd6..559d3ef57 100644
--- a/src/SNAP/pair_snap.h
+++ b/src/SNAP/pair_snap.h
@@ -1,171 +1,171 @@
 /* -*- c++ -*- ----------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef PAIR_CLASS
 
 PairStyle(snap,PairSNAP)
 
 #else
 
 #ifndef LMP_PAIR_SNAP_H
 #define LMP_PAIR_SNAP_H
 
 #include "pair.h"
 
 namespace LAMMPS_NS {
 
 class PairSNAP : public Pair {
 public:
   PairSNAP(class LAMMPS *);
   ~PairSNAP();
   void compute(int, int);
   void compute_regular(int, int);
   void compute_optimized(int, int);
   void settings(int, char **);
   void coeff(int, char **);
   void init_style();
   double init_one(int, int);
   double memory_usage();
 
 protected:
   int ncoeff;
   double **bvec, ***dbvec;
   class SNA** sna;
   int nmax;
   int nthreads;
   void allocate();
   void read_files(char *, char *);
   inline int equal(double* x,double* y);
   inline double dist2(double* x,double* y);
   double extra_cutoff();
   void load_balance();
   void set_sna_to_shared(int snaid,int i);
   void build_per_atom_arrays();
 
   int schedule_user;
   double schedule_time_guided;
   double schedule_time_dynamic;
 
   int ncalls_neigh;
   int do_load_balance;
   int ilistmask_max;
   int* ilistmask;
   int ghostinum;
   int ghostilist_max;
   int* ghostilist;
   int ghostnumneigh_max;
   int* ghostnumneigh;
   int* ghostneighs;
   int* ghostfirstneigh;
   int ghostneighs_total;
   int ghostneighs_max;
 
   int use_optimized;
   int use_shared_arrays;
 
   int i_max;
   int i_neighmax;
   int i_numpairs;
   int **i_pairs;
   double ***i_rij;
   int **i_inside;
   double **i_wj;
   double **i_rcutij;
   int *i_ninside;
   double ****i_uarraytot_r, ****i_uarraytot_i;
   double ******i_zarray_r, ******i_zarray_i;
 
 #ifdef TIMING_INFO
   //  timespec starttime, endtime;
   double timers[4];
 #endif
   double gamma;
 
   double rcutmax;               // max cutoff for all elements
   int nelements;                // # of unique elements
   char **elements;              // names of unique elements
   double *radelem;              // element radii
   double *wjelem;               // elements weights
   double **coeffelem;           // element bispectrum coefficients
   int *map;                     // mapping from atom types to elements
-  int twojmax, diagonalstyle, switchflag;
+  int twojmax, diagonalstyle, switchflag, bzeroflag;
   double rcutfac, rfac0, rmin0, wj1, wj2;
   int rcutfacflag, twojmaxflag; // flags for required parameters
   int gammaoneflag;              // 1 if parameter gamma is 1
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Communication cutoff too small for SNAP micro load balancing
 
 This can happen if you change the neighbor skin after your pair_style
 command or if your box dimensions grow during a run. You can set the
 cutoff explicitly via the comm_modify cutoff command.
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
 E: Must set number of threads via package omp command
 
 Because you are using the USER-OMP package, set the number of threads
 via its settings, not by the pair_style snap nthreads setting.
 
 W: Communication cutoff is too small for SNAP micro load balancing, increased to %lf
 
 Self-explanatory.
 
 E: Incorrect args for pair coefficients
 
 Self-explanatory.  Check the input script or data file.
 
 E: Incorrect SNAP parameter file
 
 The file cannot be parsed correctly, check its internal syntax.
 
 E: Pair style SNAP requires newton pair on
 
 See the newton command.  This is a restriction to use the SNAP
 potential.
 
 E: All pair coeffs are not set
 
 All pair coefficients must be set in the data file or by the
 pair_coeff command before running a simulation.
 
 E: Cannot open SNAP coefficient file %s
 
 The specified SNAP coefficient file cannot be opened.  Check that the
 path and name are correct.
 
 E: Incorrect format in SNAP coefficient file
 
 Incorrect number of words per line in the coefficient file.
 
 E: Cannot open SNAP parameter file %s
 
 The specified SNAP parameter file cannot be opened.  Check that the
 path and name are correct.
 
 E: Incorrect format in SNAP parameter file
 
 Incorrect number of words per line in the parameter file.
 
 */
diff --git a/src/SNAP/sna.cpp b/src/SNAP/sna.cpp
index 8b16b8933..2c20e78b7 100644
--- a/src/SNAP/sna.cpp
+++ b/src/SNAP/sna.cpp
@@ -1,1955 +1,1972 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing authors: Aidan Thompson, Christian Trott, SNL
 ------------------------------------------------------------------------- */
 
 #include "sna.h"
 #include <math.h>
 #include "math_const.h"
 #include "math_extra.h"
 #include <string.h>
 #include <stdlib.h>
 #include "openmp_snap.h"
 
 #include "memory.h"
 #include "error.h"
 #include "comm.h"
 #include "atom.h"
 
 using namespace std;
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
 /* ----------------------------------------------------------------------
 
    this implementation is based on the method outlined
    in Bartok[1], using formulae from VMK[2].
 
    for the Clebsch-Gordan coefficients, we
    convert the VMK half-integral labels
    a, b, c, alpha, beta, gamma
    to array offsets j1, j2, j, m1, m2, m
    using the following relations:
 
    j1 = 2*a
    j2 = 2*b
    j =  2*c
 
    m1 = alpha+a      2*alpha = 2*m1 - j1
    m2 = beta+b    or 2*beta = 2*m2 - j2
    m =  gamma+c      2*gamma = 2*m - j
 
    in this way:
 
    -a <= alpha <= a
    -b <= beta <= b
    -c <= gamma <= c
 
    becomes:
 
    0 <= m1 <= j1
    0 <= m2 <= j2
    0 <= m <= j
 
    and the requirement that
    a+b+c be integral implies that
    j1+j2+j must be even.
    The requirement that:
 
    gamma = alpha+beta
 
    becomes:
 
    2*m - j = 2*m1 - j1 + 2*m2 - j2
 
    Similarly, for the Wigner U-functions U(J,m,m') we
    convert the half-integral labels J,m,m' to
    array offsets j,ma,mb:
 
    j = 2*J
    ma = J+m
    mb = J+m'
 
    so that:
 
    0 <= j <= 2*Jmax
    0 <= ma, mb <= j.
 
    For the bispectrum components B(J1,J2,J) we convert to:
 
    j1 = 2*J1
    j2 = 2*J2
    j = 2*J
 
    and the requirement:
 
    |J1-J2| <= J <= J1+J2, for j1+j2+j integral
 
    becomes:
 
    |j1-j2| <= j <= j1+j2, for j1+j2+j even integer
 
    or
 
    j = |j1-j2|, |j1-j2|+2,...,j1+j2-2,j1+j2
 
    [1] Albert Bartok-Partay, "Gaussian Approximation..."
    Doctoral Thesis, Cambrindge University, (2009)
 
    [2] D. A. Varshalovich, A. N. Moskalev, and V. K. Khersonskii,
    "Quantum Theory of Angular Momentum," World Scientific (1988)
 
 ------------------------------------------------------------------------- */
 
 SNA::SNA(LAMMPS* lmp, double rfac0_in,
          int twojmax_in, int diagonalstyle_in, int use_shared_arrays_in,
-         double rmin0_in, int switch_flag_in) : Pointers(lmp)
+         double rmin0_in, int switch_flag_in, int bzero_flag_in) : Pointers(lmp)
 {
   wself = 1.0;
-
+  
   use_shared_arrays = use_shared_arrays_in;
   rfac0 = rfac0_in;
   rmin0 = rmin0_in;
   switch_flag = switch_flag_in;
+  bzero_flag = bzero_flag_in;
 
   twojmax = twojmax_in;
   diagonalstyle = diagonalstyle_in;
 
   ncoeff = compute_ncoeff();
 
   create_twojmax_arrays();
 
   bvec = NULL;
   dbvec = NULL;
   memory->create(bvec, ncoeff, "pair:bvec");
   memory->create(dbvec, ncoeff, 3, "pair:dbvec");
   rij = NULL;
   inside = NULL;
   wj = NULL;
   rcutij = NULL;
   nmax = 0;
   idxj = NULL;
 
+  if (bzero_flag) {
+    double www = wself*wself*wself;
+    for(int j = 0; j <= twojmax; j++)
+      bzero[j] = www*(j+1);
+  }
+  
 #ifdef TIMING_INFO
   timers = new double[20];
   for(int i = 0; i < 20; i++) timers[i] = 0;
   print = 0;
   counter = 0;
 #endif
 
   build_indexlist();
 
+  
 }
 
 /* ---------------------------------------------------------------------- */
 
 SNA::~SNA()
 {
   if(!use_shared_arrays) {
     destroy_twojmax_arrays();
     memory->destroy(rij);
     memory->destroy(inside);
     memory->destroy(wj);
     memory->destroy(rcutij);
     memory->destroy(bvec);
     memory->destroy(dbvec);
   }
   delete[] idxj;
 }
 
 void SNA::build_indexlist()
 {
   if(diagonalstyle == 0) {
     int idxj_count = 0;
 
     for(int j1 = 0; j1 <= twojmax; j1++)
       for(int j2 = 0; j2 <= j1; j2++)
         for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2)
           idxj_count++;
 
     // indexList can be changed here
 
     idxj = new SNA_LOOPINDICES[idxj_count];
     idxj_max = idxj_count;
 
     idxj_count = 0;
 
     for(int j1 = 0; j1 <= twojmax; j1++)
       for(int j2 = 0; j2 <= j1; j2++)
         for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) {
           idxj[idxj_count].j1 = j1;
           idxj[idxj_count].j2 = j2;
           idxj[idxj_count].j = j;
           idxj_count++;
         }
   }
 
   if(diagonalstyle == 1) {
     int idxj_count = 0;
 
     for(int j1 = 0; j1 <= twojmax; j1++)
       for(int j = 0; j <= MIN(twojmax, 2 * j1); j += 2) {
         idxj_count++;
       }
 
     // indexList can be changed here
 
     idxj = new SNA_LOOPINDICES[idxj_count];
     idxj_max = idxj_count;
 
     idxj_count = 0;
 
     for(int j1 = 0; j1 <= twojmax; j1++)
       for(int j = 0; j <= MIN(twojmax, 2 * j1); j += 2) {
         idxj[idxj_count].j1 = j1;
         idxj[idxj_count].j2 = j1;
         idxj[idxj_count].j = j;
         idxj_count++;
       }
   }
 
   if(diagonalstyle == 2) {
     int idxj_count = 0;
 
     for(int j1 = 0; j1 <= twojmax; j1++) {
       idxj_count++;
     }
 
     // indexList can be changed here
 
     idxj = new SNA_LOOPINDICES[idxj_count];
     idxj_max = idxj_count;
 
     idxj_count = 0;
 
     for(int j1 = 0; j1 <= twojmax; j1++) {
       idxj[idxj_count].j1 = j1;
       idxj[idxj_count].j2 = j1;
       idxj[idxj_count].j = j1;
       idxj_count++;
     }
   }
 
   if(diagonalstyle == 3) {
     int idxj_count = 0;
 
     for(int j1 = 0; j1 <= twojmax; j1++)
       for(int j2 = 0; j2 <= j1; j2++)
         for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2)
           if (j >= j1) idxj_count++;
 
     // indexList can be changed here
 
     idxj = new SNA_LOOPINDICES[idxj_count];
     idxj_max = idxj_count;
 
     idxj_count = 0;
 
     for(int j1 = 0; j1 <= twojmax; j1++)
       for(int j2 = 0; j2 <= j1; j2++)
         for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2)
 	  if (j >= j1) {
 	    idxj[idxj_count].j1 = j1;
 	    idxj[idxj_count].j2 = j2;
 	    idxj[idxj_count].j = j;
 	    idxj_count++;
 	  }
   }
 
 }
 /* ---------------------------------------------------------------------- */
 
 void SNA::init()
 {
   init_clebsch_gordan();
   init_rootpqarray();
 }
 
 
 void SNA::grow_rij(int newnmax)
 {
   if(newnmax <= nmax) return;
 
   nmax = newnmax;
 
   if(!use_shared_arrays) {
     memory->destroy(rij);
     memory->destroy(inside);
     memory->destroy(wj);
     memory->destroy(rcutij);
     memory->create(rij, nmax, 3, "pair:rij");
     memory->create(inside, nmax, "pair:inside");
     memory->create(wj, nmax, "pair:wj");
     memory->create(rcutij, nmax, "pair:rcutij");
  }
 }
 /* ----------------------------------------------------------------------
    compute Ui by summing over neighbors j
 ------------------------------------------------------------------------- */
 
 void SNA::compute_ui(int jnum)
 {
   double rsq, r, x, y, z, z0, theta0;
 
   // utot(j,ma,mb) = 0 for all j,ma,ma
   // utot(j,ma,ma) = 1 for all j,ma
   // for j in neighbors of i:
   //   compute r0 = (x,y,z,z0)
   //   utot(j,ma,mb) += u(r0;j,ma,mb) for all j,ma,mb
 
   zero_uarraytot();
   addself_uarraytot(wself);
 
 #ifdef TIMING_INFO
   clock_gettime(CLOCK_REALTIME, &starttime);
 #endif
 
   for(int j = 0; j < jnum; j++) {
     x = rij[j][0];
     y = rij[j][1];
     z = rij[j][2];
     rsq = x * x + y * y + z * z;
     r = sqrt(rsq);
 
     theta0 = (r - rmin0) * rfac0 * MY_PI / (rcutij[j] - rmin0);
     //    theta0 = (r - rmin0) * rscale0;
     z0 = r / tan(theta0);
 
     compute_uarray(x, y, z, z0, r);
     add_uarraytot(r, wj[j], rcutij[j]);
   }
 
 #ifdef TIMING_INFO
   clock_gettime(CLOCK_REALTIME, &endtime);
   timers[0] += (endtime.tv_sec - starttime.tv_sec + 1.0 *
                 (endtime.tv_nsec - starttime.tv_nsec) / 1000000000);
 #endif
 
 }
 
 void SNA::compute_ui_omp(int jnum, int sub_threads)
 {
   double rsq, r, x, y, z, z0, theta0;
 
   // utot(j,ma,mb) = 0 for all j,ma,ma
   // utot(j,ma,ma) = 1 for all j,ma
   // for j in neighbors of i:
   //   compute r0 = (x,y,z,z0)
   //   utot(j,ma,mb) += u(r0;j,ma,mb) for all j,ma,mb
 
   zero_uarraytot();
   addself_uarraytot(wself);
 
   for(int j = 0; j < jnum; j++) {
     x = rij[j][0];
     y = rij[j][1];
     z = rij[j][2];
     rsq = x * x + y * y + z * z;
     r = sqrt(rsq);
     theta0 = (r - rmin0) * rfac0 * MY_PI / (rcutij[j] - rmin0);
     //    theta0 = (r - rmin0) * rscale0;
     z0 = r / tan(theta0);
     omp_set_num_threads(sub_threads);
 
 #if defined(_OPENMP)
 #pragma omp parallel shared(x,y,z,z0,r,sub_threads) default(none)
 #endif
     {
       compute_uarray_omp(x, y, z, z0, r, sub_threads);
     }
     add_uarraytot(r, wj[j], rcutij[j]);
   }
 
 
 }
 
 /* ----------------------------------------------------------------------
    compute Zi by summing over products of Ui
 ------------------------------------------------------------------------- */
 
 void SNA::compute_zi()
 {
   // for j1 = 0,...,twojmax
   //   for j2 = 0,twojmax
   //     for j = |j1-j2|,Min(twojmax,j1+j2),2
   //        for ma = 0,...,j
   //          for mb = 0,...,jmid
   //            z(j1,j2,j,ma,mb) = 0
   //            for ma1 = Max(0,ma+(j1-j2-j)/2),Min(j1,ma+(j1+j2-j)/2)
   //              sumb1 = 0
   //              ma2 = ma-ma1+(j1+j2-j)/2;
   //              for mb1 = Max(0,mb+(j1-j2-j)/2),Min(j1,mb+(j1+j2-j)/2)
   //                mb2 = mb-mb1+(j1+j2-j)/2;
   //                sumb1 += cg(j1,mb1,j2,mb2,j) *
   //                  u(j1,ma1,mb1) * u(j2,ma2,mb2)
   //              z(j1,j2,j,ma,mb) += sumb1*cg(j1,ma1,j2,ma2,j)
 
 #ifdef TIMING_INFO
   clock_gettime(CLOCK_REALTIME, &starttime);
 #endif
 
   // compute_dbidrj() requires full j1/j2/j chunk of z elements
   // use zarray j1/j2 symmetry
 
   for(int j1 = 0; j1 <= twojmax; j1++)
     for(int j2 = 0; j2 <= j1; j2++) {
       for(int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2) {
 	double sumb1_r, sumb1_i;
 	int ma2, mb2;
 	for(int mb = 0; 2*mb <= j; mb++)
 	  for(int ma = 0; ma <= j; ma++) {
 	    zarray_r[j1][j2][j][ma][mb] = 0.0;
 	    zarray_i[j1][j2][j][ma][mb] = 0.0;
 
 	    for(int ma1 = MAX(0, (2 * ma - j - j2 + j1) / 2);
 		ma1 <= MIN(j1, (2 * ma - j + j2 + j1) / 2); ma1++) {
 	      sumb1_r = 0.0;
 	      sumb1_i = 0.0;
 
 	      ma2 = (2 * ma - j - (2 * ma1 - j1) + j2) / 2;
 
 	      for(int mb1 = MAX(0, (2 * mb - j - j2 + j1) / 2);
               mb1 <= MIN(j1, (2 * mb - j + j2 + j1) / 2); mb1++) {
 
 		mb2 = (2 * mb - j - (2 * mb1 - j1) + j2) / 2;
 		sumb1_r += cgarray[j1][j2][j][mb1][mb2] *
 		  (uarraytot_r[j1][ma1][mb1] * uarraytot_r[j2][ma2][mb2] -
 		   uarraytot_i[j1][ma1][mb1] * uarraytot_i[j2][ma2][mb2]);
 		sumb1_i += cgarray[j1][j2][j][mb1][mb2] *
 		  (uarraytot_r[j1][ma1][mb1] * uarraytot_i[j2][ma2][mb2] +
 		   uarraytot_i[j1][ma1][mb1] * uarraytot_r[j2][ma2][mb2]);
 	      } // end loop over mb1
 
 	      zarray_r[j1][j2][j][ma][mb] +=
 		sumb1_r * cgarray[j1][j2][j][ma1][ma2];
 	      zarray_i[j1][j2][j][ma][mb] +=
 		sumb1_i * cgarray[j1][j2][j][ma1][ma2];
 	    } // end loop over ma1
 	  } // end loop over ma, mb
       } // end loop over j
     } // end loop over j1, j2
 
 #ifdef TIMING_INFO
   clock_gettime(CLOCK_REALTIME, &endtime);
   timers[1] += (endtime.tv_sec - starttime.tv_sec + 1.0 *
                 (endtime.tv_nsec - starttime.tv_nsec) / 1000000000);
 #endif
 }
 
 void SNA::compute_zi_omp(int sub_threads)
 {
   // for j1 = 0,...,twojmax
   //   for j2 = 0,twojmax
   //     for j = |j1-j2|,Min(twojmax,j1+j2),2
   //        for ma = 0,...,j
   //          for mb = 0,...,j
   //            z(j1,j2,j,ma,mb) = 0
   //            for ma1 = Max(0,ma+(j1-j2-j)/2),Min(j1,ma+(j1+j2-j)/2)
   //              sumb1 = 0
   //              ma2 = ma-ma1+(j1+j2-j)/2;
   //              for mb1 = Max(0,mb+(j1-j2-j)/2),Min(j1,mb+(j1+j2-j)/2)
   //                mb2 = mb-mb1+(j1+j2-j)/2;
   //                sumb1 += cg(j1,mb1,j2,mb2,j) *
   //                  u(j1,ma1,mb1) * u(j2,ma2,mb2)
   //              z(j1,j2,j,ma,mb) += sumb1*cg(j1,ma1,j2,ma2,j)
 
   if(omp_in_parallel())
     omp_set_num_threads(sub_threads);
 
   // compute_dbidrj() requires full j1/j2/j chunk of z elements
   // use zarray j1/j2 symmetry
 
 #if defined(_OPENMP)
 #pragma omp parallel for schedule(auto) default(none)
 #endif
   for(int j1 = 0; j1 <= twojmax; j1++)
     for(int j2 = 0; j2 <= j1; j2++)
       for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) {
 
     double sumb1_r, sumb1_i;
     int ma2, mb2;
 
     for(int ma = 0; ma <= j; ma++)
       for(int mb = 0; mb <= j; mb++) {
         zarray_r[j1][j2][j][ma][mb] = 0.0;
         zarray_i[j1][j2][j][ma][mb] = 0.0;
 
         for(int ma1 = MAX(0, (2 * ma - j - j2 + j1) / 2);
             ma1 <= MIN(j1, (2 * ma - j + j2 + j1) / 2); ma1++) {
           sumb1_r = 0.0;
           sumb1_i = 0.0;
 
           ma2 = (2 * ma - j - (2 * ma1 - j1) + j2) / 2;
 
           for(int mb1 = MAX(0, (2 * mb - j - j2 + j1) / 2);
               mb1 <= MIN(j1, (2 * mb - j + j2 + j1) / 2); mb1++) {
 
             mb2 = (2 * mb - j - (2 * mb1 - j1) + j2) / 2;
             sumb1_r += cgarray[j1][j2][j][mb1][mb2] *
 	      (uarraytot_r[j1][ma1][mb1] * uarraytot_r[j2][ma2][mb2] -
 	       uarraytot_i[j1][ma1][mb1] * uarraytot_i[j2][ma2][mb2]);
             sumb1_i += cgarray[j1][j2][j][mb1][mb2] *
 	      (uarraytot_r[j1][ma1][mb1] * uarraytot_i[j2][ma2][mb2] +
 	       uarraytot_i[j1][ma1][mb1] * uarraytot_r[j2][ma2][mb2]);
           }
 
           zarray_r[j1][j2][j][ma][mb] +=
             sumb1_r * cgarray[j1][j2][j][ma1][ma2];
           zarray_i[j1][j2][j][ma][mb] +=
             sumb1_i * cgarray[j1][j2][j][ma1][ma2];
         }
       }
   }
 }
 
 /* ----------------------------------------------------------------------
    compute Bi by summing conj(Ui)*Zi
 ------------------------------------------------------------------------- */
 
 void SNA::compute_bi()
 {
   // for j1 = 0,...,twojmax
   //   for j2 = 0,twojmax
   //     for j = |j1-j2|,Min(twojmax,j1+j2),2
   //        b(j1,j2,j) = 0
   //        for mb = 0,...,jmid
   //          for ma = 0,...,j
   //            b(j1,j2,j) +=
   //              2*Conj(u(j,ma,mb))*z(j1,j2,j,ma,mb)
 
 #ifdef TIMING_INFO
   clock_gettime(CLOCK_REALTIME, &starttime);
 #endif
 
   for(int j1 = 0; j1 <= twojmax; j1++)
     for(int j2 = 0; j2 <= j1; j2++) {
       for(int j = abs(j1 - j2);
           j <= MIN(twojmax, j1 + j2); j += 2) {
         barray[j1][j2][j] = 0.0;
 
-	for(int mb = 0; 2*mb < j; mb++) {
-	  for(int ma = 0; ma <= j; ma++) {
+	for(int mb = 0; 2*mb < j; mb++)
+	  for(int ma = 0; ma <= j; ma++)
             barray[j1][j2][j] +=
               uarraytot_r[j][ma][mb] * zarray_r[j1][j2][j][ma][mb] +
 	      uarraytot_i[j][ma][mb] * zarray_i[j1][j2][j][ma][mb];
-	  }
-	}
 
 	// For j even, special treatment for middle column
 
 	if (j%2 == 0) {
 	  int mb = j/2;
 	  for(int ma = 0; ma < mb; ma++)
 	    barray[j1][j2][j] +=
 	      uarraytot_r[j][ma][mb] * zarray_r[j1][j2][j][ma][mb] +
 	      uarraytot_i[j][ma][mb] * zarray_i[j1][j2][j][ma][mb];
 	  int ma = mb;
 	  barray[j1][j2][j] +=
 	    (uarraytot_r[j][ma][mb] * zarray_r[j1][j2][j][ma][mb] +
 	     uarraytot_i[j][ma][mb] * zarray_i[j1][j2][j][ma][mb])*0.5;
 	}
 
         barray[j1][j2][j] *= 2.0;
+	if (bzero_flag)
+	  barray[j1][j2][j] -= bzero[j];
       }
     }
 
 #ifdef TIMING_INFO
   clock_gettime(CLOCK_REALTIME, &endtime);
   timers[2] += (endtime.tv_sec - starttime.tv_sec + 1.0 *
                 (endtime.tv_nsec - starttime.tv_nsec) / 1000000000);
 #endif
 
 }
 
 /* ----------------------------------------------------------------------
    copy Bi array to a vector
 ------------------------------------------------------------------------- */
 
 void SNA::copy_bi2bvec()
 {
   int ncount, j1, j2, j;
 
   ncount = 0;
 
   for(j1 = 0; j1 <= twojmax; j1++)
     if(diagonalstyle == 0) {
       for(j2 = 0; j2 <= j1; j2++)
         for(j = abs(j1 - j2);
             j <= MIN(twojmax, j1 + j2); j += 2) {
           bvec[ncount] = barray[j1][j2][j];
           ncount++;
         }
     } else if(diagonalstyle == 1) {
       j2 = j1;
       for(j = abs(j1 - j2);
           j <= MIN(twojmax, j1 + j2); j += 2) {
         bvec[ncount] = barray[j1][j2][j];
         ncount++;
       }
     } else if(diagonalstyle == 2) {
       j = j2 = j1;
       bvec[ncount] = barray[j1][j2][j];
       ncount++;
     } else if(diagonalstyle == 3) {
       for(j2 = 0; j2 <= j1; j2++)
         for(j = abs(j1 - j2);
             j <= MIN(twojmax, j1 + j2); j += 2)
 	  if (j >= j1) {
 	    bvec[ncount] = barray[j1][j2][j];
 	    ncount++;
 	  }
     }
 }
 
 /* ----------------------------------------------------------------------
    calculate derivative of Ui w.r.t. atom j
 ------------------------------------------------------------------------- */
 
 void SNA::compute_duidrj(double* rij, double wj, double rcut)
 {
   double rsq, r, x, y, z, z0, theta0, cs, sn;
   double dz0dr;
 
   x = rij[0];
   y = rij[1];
   z = rij[2];
   rsq = x * x + y * y + z * z;
   r = sqrt(rsq);
   double rscale0 = rfac0 * MY_PI / (rcut - rmin0);
   theta0 = (r - rmin0) * rscale0;
   cs = cos(theta0);
   sn = sin(theta0);
   z0 = r * cs / sn;
   dz0dr = z0 / r - (r*rscale0) * (rsq + z0 * z0) / rsq;
 
 #ifdef TIMING_INFO
   clock_gettime(CLOCK_REALTIME, &starttime);
 #endif
 
   compute_duarray(x, y, z, z0, r, dz0dr, wj, rcut);
 
 #ifdef TIMING_INFO
   clock_gettime(CLOCK_REALTIME, &endtime);
   timers[3] += (endtime.tv_sec - starttime.tv_sec + 1.0 *
                 (endtime.tv_nsec - starttime.tv_nsec) / 1000000000);
 #endif
 
 }
 
 /* ----------------------------------------------------------------------
    calculate derivative of Bi w.r.t. atom j
    variant using indexlist for j1,j2,j
    variant not using symmetry relation
 ------------------------------------------------------------------------- */
 
 void SNA::compute_dbidrj_nonsymm()
 {
   // for j1 = 0,...,twojmax
   //   for j2 = 0,twojmax
   //     for j = |j1-j2|,Min(twojmax,j1+j2),2
   //        dbdr(j1,j2,j) = 0
   //        for ma = 0,...,j
   //          for mb = 0,...,j
   //            dzdr = 0
   //            for ma1 = Max(0,ma+(j1-j2-j)/2),Min(j1,ma+(j1+j2-j)/2)
   //              sumb1 = 0
   //              ma2 = ma-ma1+(j1+j2-j)/2;
   //              for mb1 = Max(0,mb+(j1-j2-j)/2),Min(j1,mb+(j1+j2-j)/2)
   //                mb2 = mb-mb1+(j1+j2-j)/2;
   //                sumb1 += cg(j1,mb1,j2,mb2,j) *
   //                  (dudr(j1,ma1,mb1) * u(j2,ma2,mb2) +
   //                  u(j1,ma1,mb1) * dudr(j2,ma2,mb2))
   //              dzdr += sumb1*cg(j1,ma1,j2,ma2,j)
   //            dbdr(j1,j2,j) +=
   //              Conj(dudr(j,ma,mb))*z(j1,j2,j,ma,mb) +
   //              Conj(u(j,ma,mb))*dzdr
 
   double* dbdr;
   double* dudr_r, *dudr_i;
   double sumb1_r[3], sumb1_i[3], dzdr_r[3], dzdr_i[3];
   int ma2;
 
 #ifdef TIMING_INFO
   clock_gettime(CLOCK_REALTIME, &starttime);
 #endif
 
   for(int JJ = 0; JJ < idxj_max; JJ++) {
     const int j1 = idxj[JJ].j1;
     const int j2 = idxj[JJ].j2;
     const int j = idxj[JJ].j;
 
     dbdr = dbarray[j1][j2][j];
     dbdr[0] = 0.0;
     dbdr[1] = 0.0;
     dbdr[2] = 0.0;
 
     double** *j1duarray_r = duarray_r[j1];
     double** *j2duarray_r = duarray_r[j2];
     double** *j1duarray_i = duarray_i[j1];
     double** *j2duarray_i = duarray_i[j2];
     double** j1uarraytot_r = uarraytot_r[j1];
     double** j2uarraytot_r = uarraytot_r[j2];
     double** j1uarraytot_i = uarraytot_i[j1];
     double** j2uarraytot_i = uarraytot_i[j2];
     double** j1j2jcgarray = cgarray[j1][j2][j];
 
     for(int ma = 0; ma <= j; ma++)
       for(int mb = 0; mb <= j; mb++) {
         dzdr_r[0] = 0.0;
         dzdr_r[1] = 0.0;
         dzdr_r[2] = 0.0;
         dzdr_i[0] = 0.0;
         dzdr_i[1] = 0.0;
         dzdr_i[2] = 0.0;
 
         const int max_mb1 = MIN(j1, (2 * mb - j + j2 + j1) / 2) + 1;
         const int max_ma1 = MIN(j1, (2 * ma - j + j2 + j1) / 2) + 1;
 
         for(int ma1 = MAX(0, (2 * ma - j - j2 + j1) / 2);
             ma1 < max_ma1; ma1++) {
 
           ma2 = (2 * ma - j - (2 * ma1 - j1) + j2) / 2;
           sumb1_r[0] = 0.0;
           sumb1_r[1] = 0.0;
           sumb1_r[2] = 0.0;
           sumb1_i[0] = 0.0;
           sumb1_i[1] = 0.0;
           sumb1_i[2] = 0.0;
 
           //inside loop 54 operations (mul and add)
           for(int mb1 = MAX(0, (2 * mb - j - j2 + j1) / 2),
               mb2 = mb + (j1 + j2 - j) / 2 - mb1;
               mb1 < max_mb1; mb1++, mb2--) {
 
             double* dudr1_r, *dudr1_i, *dudr2_r, *dudr2_i;
 
             dudr1_r = j1duarray_r[ma1][mb1];
             dudr2_r = j2duarray_r[ma2][mb2];
             dudr1_i = j1duarray_i[ma1][mb1];
             dudr2_i = j2duarray_i[ma2][mb2];
 
             const double cga_mb1mb2 = j1j2jcgarray[mb1][mb2];
             const double uat_r_ma2mb2 = cga_mb1mb2 * j2uarraytot_r[ma2][mb2];
             const double uat_r_ma1mb1 = cga_mb1mb2 * j1uarraytot_r[ma1][mb1];
             const double uat_i_ma2mb2 = cga_mb1mb2 * j2uarraytot_i[ma2][mb2];
             const double uat_i_ma1mb1 = cga_mb1mb2 * j1uarraytot_i[ma1][mb1];
 
             for(int k = 0; k < 3; k++) {
               sumb1_r[k] += dudr1_r[k] * uat_r_ma2mb2;
               sumb1_r[k] -= dudr1_i[k] * uat_i_ma2mb2;
               sumb1_i[k] += dudr1_r[k] * uat_i_ma2mb2;
               sumb1_i[k] += dudr1_i[k] * uat_r_ma2mb2;
 
               sumb1_r[k] += dudr2_r[k] * uat_r_ma1mb1;
               sumb1_r[k] -= dudr2_i[k] * uat_i_ma1mb1;
               sumb1_i[k] += dudr2_r[k] * uat_i_ma1mb1;
               sumb1_i[k] += dudr2_i[k] * uat_r_ma1mb1;
             }
           } // end loop over mb1,mb2
 
           // dzdr += sumb1*cg(j1,ma1,j2,ma2,j)
 
           dzdr_r[0] += sumb1_r[0] * j1j2jcgarray[ma1][ma2];
           dzdr_r[1] += sumb1_r[1] * j1j2jcgarray[ma1][ma2];
           dzdr_r[2] += sumb1_r[2] * j1j2jcgarray[ma1][ma2];
           dzdr_i[0] += sumb1_i[0] * j1j2jcgarray[ma1][ma2];
           dzdr_i[1] += sumb1_i[1] * j1j2jcgarray[ma1][ma2];
           dzdr_i[2] += sumb1_i[2] * j1j2jcgarray[ma1][ma2];
         } // end loop over ma1,ma2
 
         // dbdr(j1,j2,j) +=
         //   Conj(dudr(j,ma,mb))*z(j1,j2,j,ma,mb) +
         //   Conj(u(j,ma,mb))*dzdr
 
         dudr_r = duarray_r[j][ma][mb];
         dudr_i = duarray_i[j][ma][mb];
 
         for(int k = 0; k < 3; k++)
           dbdr[k] +=
             (dudr_r[k] * zarray_r[j1][j2][j][ma][mb] +
              dudr_i[k] * zarray_i[j1][j2][j][ma][mb]) +
             (uarraytot_r[j][ma][mb] * dzdr_r[k] +
              uarraytot_i[j][ma][mb] * dzdr_i[k]);
       } //end loop over ma mb
 
   } //end loop over j1 j2 j
 
 #ifdef TIMING_INFO
   clock_gettime(CLOCK_REALTIME, &endtime);
   timers[4] += (endtime.tv_sec - starttime.tv_sec + 1.0 *
                 (endtime.tv_nsec - starttime.tv_nsec) / 1000000000);
 #endif
 
 }
 
 /* ----------------------------------------------------------------------
    calculate derivative of Bi w.r.t. atom j
    variant using indexlist for j1,j2,j
    variant using symmetry relation
 ------------------------------------------------------------------------- */
 
 void SNA::compute_dbidrj()
 {
   // for j1 = 0,...,twojmax
   //   for j2 = 0,twojmax
   //     for j = |j1-j2|,Min(twojmax,j1+j2),2
   //        zdb = 0
   //        for mb = 0,...,jmid
   //          for ma = 0,...,j
   //            zdb +=
   //              Conj(dudr(j,ma,mb))*z(j1,j2,j,ma,mb)
   //        dbdr(j1,j2,j) += 2*zdb
   //        zdb = 0
   //        for mb1 = 0,...,j1mid
   //          for ma1 = 0,...,j1
   //            zdb +=
   //              Conj(dudr(j1,ma1,mb1))*z(j,j2,j1,ma1,mb1)
   //        dbdr(j1,j2,j) += 2*zdb*(j+1)/(j1+1)
   //        zdb = 0
   //        for mb2 = 0,...,j2mid
   //          for ma2 = 0,...,j2
   //            zdb +=
   //              Conj(dudr(j2,ma2,mb2))*z(j1,j,j2,ma2,mb2)
   //        dbdr(j1,j2,j) += 2*zdb*(j+1)/(j2+1)
 
   double* dbdr;
   double* dudr_r, *dudr_i;
   double sumzdu_r[3];
   double** jjjzarray_r;
   double** jjjzarray_i;
   double jjjmambzarray_r;
   double jjjmambzarray_i;
 
 #ifdef TIMING_INFO
   clock_gettime(CLOCK_REALTIME, &starttime);
 #endif
 
   for(int JJ = 0; JJ < idxj_max; JJ++) {
     const int j1 = idxj[JJ].j1;
     const int j2 = idxj[JJ].j2;
     const int j = idxj[JJ].j;
 
     dbdr = dbarray[j1][j2][j];
     dbdr[0] = 0.0;
     dbdr[1] = 0.0;
     dbdr[2] = 0.0;
 
     // Sum terms Conj(dudr(j,ma,mb))*z(j1,j2,j,ma,mb)
 
     for(int k = 0; k < 3; k++)
       sumzdu_r[k] = 0.0;
 
     // use zarray j1/j2 symmetry (optional)
 
     if (j1 >= j2) {
       jjjzarray_r = zarray_r[j1][j2][j];
       jjjzarray_i = zarray_i[j1][j2][j];
     } else {
       jjjzarray_r = zarray_r[j2][j1][j];
       jjjzarray_i = zarray_i[j2][j1][j];
     }
 
     for(int mb = 0; 2*mb < j; mb++)
       for(int ma = 0; ma <= j; ma++) {
 
         dudr_r = duarray_r[j][ma][mb];
         dudr_i = duarray_i[j][ma][mb];
 	jjjmambzarray_r = jjjzarray_r[ma][mb];
 	jjjmambzarray_i = jjjzarray_i[ma][mb];
         for(int k = 0; k < 3; k++)
           sumzdu_r[k] +=
             dudr_r[k] * jjjmambzarray_r +
 	    dudr_i[k] * jjjmambzarray_i;
 
       } //end loop over ma mb
 
     // For j even, handle middle column
 
     if (j%2 == 0) {
       int mb = j/2;
       for(int ma = 0; ma < mb; ma++) {
         dudr_r = duarray_r[j][ma][mb];
 	dudr_i = duarray_i[j][ma][mb];
 	jjjmambzarray_r = jjjzarray_r[ma][mb];
 	jjjmambzarray_i = jjjzarray_i[ma][mb];
         for(int k = 0; k < 3; k++)
           sumzdu_r[k] +=
             dudr_r[k] * jjjmambzarray_r +
 	    dudr_i[k] * jjjmambzarray_i;
       }
       int ma = mb;
       dudr_r = duarray_r[j][ma][mb];
       dudr_i = duarray_i[j][ma][mb];
       jjjmambzarray_r = jjjzarray_r[ma][mb];
       jjjmambzarray_i = jjjzarray_i[ma][mb];
       for(int k = 0; k < 3; k++)
 	sumzdu_r[k] +=
 	  (dudr_r[k] * jjjmambzarray_r +
 	   dudr_i[k] * jjjmambzarray_i)*0.5;
     } // end if jeven
 
     for(int k = 0; k < 3; k++)
       dbdr[k] += 2.0*sumzdu_r[k];
 
     // Sum over Conj(dudr(j1,ma1,mb1))*z(j,j2,j1,ma1,mb1)
 
     double j1fac = (j+1)/(j1+1.0);
 
     for(int k = 0; k < 3; k++)
       sumzdu_r[k] = 0.0;
 
     // use zarray j1/j2 symmetry (optional)
 
     if (j >= j2) {
       jjjzarray_r = zarray_r[j][j2][j1];
       jjjzarray_i = zarray_i[j][j2][j1];
     } else {
       jjjzarray_r = zarray_r[j2][j][j1];
       jjjzarray_i = zarray_i[j2][j][j1];
     }
 
     for(int mb1 = 0; 2*mb1 < j1; mb1++)
       for(int ma1 = 0; ma1 <= j1; ma1++) {
 
         dudr_r = duarray_r[j1][ma1][mb1];
         dudr_i = duarray_i[j1][ma1][mb1];
 	jjjmambzarray_r = jjjzarray_r[ma1][mb1];
 	jjjmambzarray_i = jjjzarray_i[ma1][mb1];
         for(int k = 0; k < 3; k++)
           sumzdu_r[k] +=
             dudr_r[k] * jjjmambzarray_r +
 	    dudr_i[k] * jjjmambzarray_i;
 
       } //end loop over ma1 mb1
 
     // For j1 even, handle middle column
 
     if (j1%2 == 0) {
       int mb1 = j1/2;
       for(int ma1 = 0; ma1 < mb1; ma1++) {
         dudr_r = duarray_r[j1][ma1][mb1];
 	dudr_i = duarray_i[j1][ma1][mb1];
 	jjjmambzarray_r = jjjzarray_r[ma1][mb1];
 	jjjmambzarray_i = jjjzarray_i[ma1][mb1];
         for(int k = 0; k < 3; k++)
           sumzdu_r[k] +=
             dudr_r[k] * jjjmambzarray_r +
 	    dudr_i[k] * jjjmambzarray_i;
       }
       int ma1 = mb1;
       dudr_r = duarray_r[j1][ma1][mb1];
       dudr_i = duarray_i[j1][ma1][mb1];
       jjjmambzarray_r = jjjzarray_r[ma1][mb1];
       jjjmambzarray_i = jjjzarray_i[ma1][mb1];
       for(int k = 0; k < 3; k++)
 	sumzdu_r[k] +=
 	  (dudr_r[k] * jjjmambzarray_r +
 	   dudr_i[k] * jjjmambzarray_i)*0.5;
     } // end if j1even
 
     for(int k = 0; k < 3; k++)
       dbdr[k] += 2.0*sumzdu_r[k]*j1fac;
 
     // Sum over Conj(dudr(j2,ma2,mb2))*z(j1,j,j2,ma2,mb2)
 
     double j2fac = (j+1)/(j2+1.0);
 
     for(int k = 0; k < 3; k++)
       sumzdu_r[k] = 0.0;
 
     // use zarray j1/j2 symmetry (optional)
 
     if (j1 >= j) {
       jjjzarray_r = zarray_r[j1][j][j2];
       jjjzarray_i = zarray_i[j1][j][j2];
     } else {
       jjjzarray_r = zarray_r[j][j1][j2];
       jjjzarray_i = zarray_i[j][j1][j2];
     }
 
     for(int mb2 = 0; 2*mb2 < j2; mb2++)
       for(int ma2 = 0; ma2 <= j2; ma2++) {
 
         dudr_r = duarray_r[j2][ma2][mb2];
         dudr_i = duarray_i[j2][ma2][mb2];
 	jjjmambzarray_r = jjjzarray_r[ma2][mb2];
 	jjjmambzarray_i = jjjzarray_i[ma2][mb2];
         for(int k = 0; k < 3; k++)
           sumzdu_r[k] +=
             dudr_r[k] * jjjmambzarray_r +
 	    dudr_i[k] * jjjmambzarray_i;
 
       } //end loop over ma2 mb2
 
     // For j2 even, handle middle column
 
     if (j2%2 == 0) {
       int mb2 = j2/2;
       for(int ma2 = 0; ma2 < mb2; ma2++) {
         dudr_r = duarray_r[j2][ma2][mb2];
 	dudr_i = duarray_i[j2][ma2][mb2];
 	jjjmambzarray_r = jjjzarray_r[ma2][mb2];
 	jjjmambzarray_i = jjjzarray_i[ma2][mb2];
         for(int k = 0; k < 3; k++)
           sumzdu_r[k] +=
             dudr_r[k] * jjjmambzarray_r +
 	    dudr_i[k] * jjjmambzarray_i;
       }
       int ma2 = mb2;
       dudr_r = duarray_r[j2][ma2][mb2];
       dudr_i = duarray_i[j2][ma2][mb2];
       jjjmambzarray_r = jjjzarray_r[ma2][mb2];
       jjjmambzarray_i = jjjzarray_i[ma2][mb2];
       for(int k = 0; k < 3; k++)
 	sumzdu_r[k] +=
 	  (dudr_r[k] * jjjmambzarray_r +
 	   dudr_i[k] * jjjmambzarray_i)*0.5;
     } // end if j2even
 
     for(int k = 0; k < 3; k++)
       dbdr[k] += 2.0*sumzdu_r[k]*j2fac;
 
   } //end loop over j1 j2 j
 
 #ifdef TIMING_INFO
   clock_gettime(CLOCK_REALTIME, &endtime);
   timers[4] += (endtime.tv_sec - starttime.tv_sec + 1.0 *
                 (endtime.tv_nsec - starttime.tv_nsec) / 1000000000);
 #endif
 
 }
 
 /* ----------------------------------------------------------------------
    copy Bi derivatives into a vector
 ------------------------------------------------------------------------- */
 
 void SNA::copy_dbi2dbvec()
 {
   int ncount, j1, j2, j;
 
   ncount = 0;
 
   for(j1 = 0; j1 <= twojmax; j1++) {
     if(diagonalstyle == 0) {
       for(j2 = 0; j2 <= j1; j2++)
         for(j = abs(j1 - j2);
             j <= MIN(twojmax, j1 + j2); j += 2) {
           dbvec[ncount][0] = dbarray[j1][j2][j][0];
           dbvec[ncount][1] = dbarray[j1][j2][j][1];
           dbvec[ncount][2] = dbarray[j1][j2][j][2];
           ncount++;
         }
     } else if(diagonalstyle == 1) {
       j2 = j1;
       for(j = abs(j1 - j2);
           j <= MIN(twojmax, j1 + j2); j += 2) {
         dbvec[ncount][0] = dbarray[j1][j2][j][0];
         dbvec[ncount][1] = dbarray[j1][j2][j][1];
         dbvec[ncount][2] = dbarray[j1][j2][j][2];
         ncount++;
       }
     } else if(diagonalstyle == 2) {
       j = j2 = j1;
       dbvec[ncount][0] = dbarray[j1][j2][j][0];
       dbvec[ncount][1] = dbarray[j1][j2][j][1];
       dbvec[ncount][2] = dbarray[j1][j2][j][2];
       ncount++;
     } else if(diagonalstyle == 3) {
       for(j2 = 0; j2 <= j1; j2++)
         for(j = abs(j1 - j2);
             j <= MIN(twojmax, j1 + j2); j += 2)
 	  if (j >= j1) {
 	    dbvec[ncount][0] = dbarray[j1][j2][j][0];
 	    dbvec[ncount][1] = dbarray[j1][j2][j][1];
 	    dbvec[ncount][2] = dbarray[j1][j2][j][2];
 	    ncount++;
 	  }
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void SNA::zero_uarraytot()
 {
   for (int j = 0; j <= twojmax; j++)
     for (int ma = 0; ma <= j; ma++)
       for (int mb = 0; mb <= j; mb++) {
         uarraytot_r[j][ma][mb] = 0.0;
         uarraytot_i[j][ma][mb] = 0.0;
       }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void SNA::addself_uarraytot(double wself_in)
 {
   for (int j = 0; j <= twojmax; j++)
     for (int ma = 0; ma <= j; ma++) {
       uarraytot_r[j][ma][ma] = wself_in;
       uarraytot_i[j][ma][ma] = 0.0;
     }
 }
 
 /* ----------------------------------------------------------------------
    add Wigner U-functions for one neighbor to the total
 ------------------------------------------------------------------------- */
 
 void SNA::add_uarraytot(double r, double wj, double rcut)
 {
   double sfac;
 
   sfac = compute_sfac(r, rcut);
 
   sfac *= wj;
 
   for (int j = 0; j <= twojmax; j++)
     for (int ma = 0; ma <= j; ma++)
       for (int mb = 0; mb <= j; mb++) {
         uarraytot_r[j][ma][mb] +=
           sfac * uarray_r[j][ma][mb];
         uarraytot_i[j][ma][mb] +=
           sfac * uarray_i[j][ma][mb];
       }
 }
 
 void SNA::add_uarraytot_omp(double r, double wj, double rcut)
 {
   double sfac;
 
   sfac = compute_sfac(r, rcut);
 
   sfac *= wj;
 
 #if defined(_OPENMP)
 #pragma omp for
 #endif
   for (int j = 0; j <= twojmax; j++)
     for (int ma = 0; ma <= j; ma++)
       for (int mb = 0; mb <= j; mb++) {
         uarraytot_r[j][ma][mb] +=
           sfac * uarray_r[j][ma][mb];
         uarraytot_i[j][ma][mb] +=
           sfac * uarray_i[j][ma][mb];
       }
 }
 
 /* ----------------------------------------------------------------------
    compute Wigner U-functions for one neighbor
 ------------------------------------------------------------------------- */
 
 void SNA::compute_uarray(double x, double y, double z,
                          double z0, double r)
 {
   double r0inv;
   double a_r, b_r, a_i, b_i;
   double rootpq;
 
   // compute Cayley-Klein parameters for unit quaternion
 
   r0inv = 1.0 / sqrt(r * r + z0 * z0);
   a_r = r0inv * z0;
   a_i = -r0inv * z;
   b_r = r0inv * y;
   b_i = -r0inv * x;
 
   // VMK Section 4.8.2
 
   uarray_r[0][0][0] = 1.0;
   uarray_i[0][0][0] = 0.0;
 
   for (int j = 1; j <= twojmax; j++) {
 
     // fill in left side of matrix layer from previous layer
 
     for (int mb = 0; 2*mb <= j; mb++) {
       uarray_r[j][0][mb] = 0.0;
       uarray_i[j][0][mb] = 0.0;
 
       for (int ma = 0; ma < j; ma++) {
 	rootpq = rootpqarray[j - ma][j - mb];
         uarray_r[j][ma][mb] +=
           rootpq *
           (a_r * uarray_r[j - 1][ma][mb] +
 	   a_i * uarray_i[j - 1][ma][mb]);
         uarray_i[j][ma][mb] +=
           rootpq *
           (a_r * uarray_i[j - 1][ma][mb] -
 	   a_i * uarray_r[j - 1][ma][mb]);
 
 	rootpq = rootpqarray[ma + 1][j - mb];
         uarray_r[j][ma + 1][mb] =
           -rootpq *
           (b_r * uarray_r[j - 1][ma][mb] +
 	   b_i * uarray_i[j - 1][ma][mb]);
         uarray_i[j][ma + 1][mb] =
           -rootpq *
           (b_r * uarray_i[j - 1][ma][mb] -
 	   b_i * uarray_r[j - 1][ma][mb]);
       }
     }
 
     // copy left side to right side with inversion symmetry VMK 4.4(2)
     // u[ma-j][mb-j] = (-1)^(ma-mb)*Conj([u[ma][mb])
 
     int mbpar = -1;
     for (int mb = 0; 2*mb <= j; mb++) {
       mbpar = -mbpar;
       int mapar = -mbpar;
       for (int ma = 0; ma <= j; ma++) {
     	mapar = -mapar;
     	if (mapar == 1) {
     	  uarray_r[j][j-ma][j-mb] = uarray_r[j][ma][mb];
     	  uarray_i[j][j-ma][j-mb] = -uarray_i[j][ma][mb];
     	} else {
     	  uarray_r[j][j-ma][j-mb] = -uarray_r[j][ma][mb];
     	  uarray_i[j][j-ma][j-mb] = uarray_i[j][ma][mb];
     	}
       }
     }
   }
 }
 
 void SNA::compute_uarray_omp(double x, double y, double z,
                              double z0, double r, int sub_threads)
 {
   double r0inv;
   double a_r, b_r, a_i, b_i;
   double rootpq;
 
   // compute Cayley-Klein parameters for unit quaternion
 
   r0inv = 1.0 / sqrt(r * r + z0 * z0);
   a_r = r0inv * z0;
   a_i = -r0inv * z;
   b_r = r0inv * y;
   b_i = -r0inv * x;
 
   // VMK Section 4.8.2
 
   uarray_r[0][0][0] = 1.0;
   uarray_i[0][0][0] = 0.0;
 
   for (int j = 1; j <= twojmax; j++) {
 #if defined(_OPENMP)
 #pragma omp for
 #endif
     for (int mb = 0; mb < j; mb++) {
       uarray_r[j][0][mb] = 0.0;
       uarray_i[j][0][mb] = 0.0;
 
       for (int ma = 0; ma < j; ma++) {
 	rootpq = rootpqarray[j - ma][j - mb];
         uarray_r[j][ma][mb] +=
 	  rootpq *
           (a_r * uarray_r[j - 1][ma][mb] +
 	   a_i * uarray_i[j - 1][ma][mb]);
         uarray_i[j][ma][mb] +=
 	  rootpq *
           (a_r * uarray_i[j - 1][ma][mb] -
 	   a_i * uarray_r[j - 1][ma][mb]);
 
 	rootpq = rootpqarray[ma + 1][j - mb];
         uarray_r[j][ma + 1][mb] =
 	  -rootpq *
           (b_r * uarray_r[j - 1][ma][mb] +
 	   b_i * uarray_i[j - 1][ma][mb]);
         uarray_i[j][ma + 1][mb] =
 	  -rootpq *
           (b_r * uarray_i[j - 1][ma][mb] -
 	   b_i * uarray_r[j - 1][ma][mb]);
       }
     }
 
     int mb = j;
     uarray_r[j][0][mb] = 0.0;
     uarray_i[j][0][mb] = 0.0;
 
 #if defined(_OPENMP)
 #pragma omp for
 #endif
     for (int ma = 0; ma < j; ma++) {
       rootpq = rootpqarray[j - ma][mb];
       uarray_r[j][ma][mb] +=
 	rootpq *
         (b_r * uarray_r[j - 1][ma][mb - 1] -
 	 b_i * uarray_i[j - 1][ma][mb - 1]);
       uarray_i[j][ma][mb] +=
 	rootpq *
         (b_r * uarray_i[j - 1][ma][mb - 1] +
 	 b_i * uarray_r[j - 1][ma][mb - 1]);
 
       rootpq = rootpqarray[ma + 1][mb];
       uarray_r[j][ma + 1][mb] =
 	rootpq *
         (a_r * uarray_r[j - 1][ma][mb - 1] -
 	 a_i * uarray_i[j - 1][ma][mb - 1]);
       uarray_i[j][ma + 1][mb] =
 	rootpq *
         (a_r * uarray_i[j - 1][ma][mb - 1] +
 	 a_i * uarray_r[j - 1][ma][mb - 1]);
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    compute derivatives of Wigner U-functions for one neighbor
    see comments in compute_uarray()
 ------------------------------------------------------------------------- */
 
 void SNA::compute_duarray(double x, double y, double z,
                           double z0, double r, double dz0dr,
 			  double wj, double rcut)
 {
   double r0inv;
   double a_r, a_i, b_r, b_i;
   double da_r[3], da_i[3], db_r[3], db_i[3];
   double dz0[3], dr0inv[3], dr0invdr;
   double rootpq;
 
   double rinv = 1.0 / r;
   double ux = x * rinv;
   double uy = y * rinv;
   double uz = z * rinv;
 
   r0inv = 1.0 / sqrt(r * r + z0 * z0);
   a_r = z0 * r0inv;
   a_i = -z * r0inv;
   b_r = y * r0inv;
   b_i = -x * r0inv;
 
   dr0invdr = -pow(r0inv, 3.0) * (r + z0 * dz0dr);
 
   dr0inv[0] = dr0invdr * ux;
   dr0inv[1] = dr0invdr * uy;
   dr0inv[2] = dr0invdr * uz;
 
   dz0[0] = dz0dr * ux;
   dz0[1] = dz0dr * uy;
   dz0[2] = dz0dr * uz;
 
   for (int k = 0; k < 3; k++) {
     da_r[k] = dz0[k] * r0inv + z0 * dr0inv[k];
     da_i[k] = -z * dr0inv[k];
   }
 
   da_i[2] += -r0inv;
 
   for (int k = 0; k < 3; k++) {
     db_r[k] = y * dr0inv[k];
     db_i[k] = -x * dr0inv[k];
   }
 
   db_i[0] += -r0inv;
   db_r[1] += r0inv;
 
   uarray_r[0][0][0] = 1.0;
   duarray_r[0][0][0][0] = 0.0;
   duarray_r[0][0][0][1] = 0.0;
   duarray_r[0][0][0][2] = 0.0;
   uarray_i[0][0][0] = 0.0;
   duarray_i[0][0][0][0] = 0.0;
   duarray_i[0][0][0][1] = 0.0;
   duarray_i[0][0][0][2] = 0.0;
 
   for (int j = 1; j <= twojmax; j++) {
     for (int mb = 0; 2*mb <= j; mb++) {
       uarray_r[j][0][mb] = 0.0;
       duarray_r[j][0][mb][0] = 0.0;
       duarray_r[j][0][mb][1] = 0.0;
       duarray_r[j][0][mb][2] = 0.0;
       uarray_i[j][0][mb] = 0.0;
       duarray_i[j][0][mb][0] = 0.0;
       duarray_i[j][0][mb][1] = 0.0;
       duarray_i[j][0][mb][2] = 0.0;
 
       for (int ma = 0; ma < j; ma++) {
         rootpq = rootpqarray[j - ma][j - mb];
         uarray_r[j][ma][mb] += rootpq *
                                (a_r *  uarray_r[j - 1][ma][mb] +
                                 a_i *  uarray_i[j - 1][ma][mb]);
         uarray_i[j][ma][mb] += rootpq *
                                (a_r *  uarray_i[j - 1][ma][mb] -
                                 a_i *  uarray_r[j - 1][ma][mb]);
 
         for (int k = 0; k < 3; k++) {
           duarray_r[j][ma][mb][k] +=
             rootpq * (da_r[k] * uarray_r[j - 1][ma][mb] +
                       da_i[k] * uarray_i[j - 1][ma][mb] +
                       a_r * duarray_r[j - 1][ma][mb][k] +
                       a_i * duarray_i[j - 1][ma][mb][k]);
           duarray_i[j][ma][mb][k] +=
             rootpq * (da_r[k] * uarray_i[j - 1][ma][mb] -
                       da_i[k] * uarray_r[j - 1][ma][mb] +
                       a_r * duarray_i[j - 1][ma][mb][k] -
                       a_i * duarray_r[j - 1][ma][mb][k]);
         }
 
 	rootpq = rootpqarray[ma + 1][j - mb];
         uarray_r[j][ma + 1][mb] =
           -rootpq * (b_r *  uarray_r[j - 1][ma][mb] +
                      b_i *  uarray_i[j - 1][ma][mb]);
         uarray_i[j][ma + 1][mb] =
           -rootpq * (b_r *  uarray_i[j - 1][ma][mb] -
                      b_i *  uarray_r[j - 1][ma][mb]);
 
         for (int k = 0; k < 3; k++) {
           duarray_r[j][ma + 1][mb][k] =
             -rootpq * (db_r[k] * uarray_r[j - 1][ma][mb] +
                        db_i[k] * uarray_i[j - 1][ma][mb] +
                        b_r * duarray_r[j - 1][ma][mb][k] +
                        b_i * duarray_i[j - 1][ma][mb][k]);
           duarray_i[j][ma + 1][mb][k] =
             -rootpq * (db_r[k] * uarray_i[j - 1][ma][mb] -
                        db_i[k] * uarray_r[j - 1][ma][mb] +
                        b_r * duarray_i[j - 1][ma][mb][k] -
                        b_i * duarray_r[j - 1][ma][mb][k]);
         }
       }
     }
 
     int mbpar = -1;
     for (int mb = 0; 2*mb <= j; mb++) {
       mbpar = -mbpar;
       int mapar = -mbpar;
       for (int ma = 0; ma <= j; ma++) {
     	mapar = -mapar;
     	if (mapar == 1) {
     	  uarray_r[j][j-ma][j-mb] = uarray_r[j][ma][mb];
     	  uarray_i[j][j-ma][j-mb] = -uarray_i[j][ma][mb];
     	  for (int k = 0; k < 3; k++) {
     	    duarray_r[j][j-ma][j-mb][k] = duarray_r[j][ma][mb][k];
     	    duarray_i[j][j-ma][j-mb][k] = -duarray_i[j][ma][mb][k];
     	  }
     	} else {
     	  uarray_r[j][j-ma][j-mb] = -uarray_r[j][ma][mb];
     	  uarray_i[j][j-ma][j-mb] = uarray_i[j][ma][mb];
     	  for (int k = 0; k < 3; k++) {
     	    duarray_r[j][j-ma][j-mb][k] = -duarray_r[j][ma][mb][k];
     	    duarray_i[j][j-ma][j-mb][k] = duarray_i[j][ma][mb][k];
     	  }
     	}
       }
     }
   }
 
   double sfac = compute_sfac(r, rcut);
   double dsfac = compute_dsfac(r, rcut);
 
   sfac *= wj;
   dsfac *= wj;
 
   for (int j = 0; j <= twojmax; j++)
     for (int ma = 0; ma <= j; ma++)
       for (int mb = 0; mb <= j; mb++) {
         duarray_r[j][ma][mb][0] = dsfac * uarray_r[j][ma][mb] * ux +
                                   sfac * duarray_r[j][ma][mb][0];
         duarray_i[j][ma][mb][0] = dsfac * uarray_i[j][ma][mb] * ux +
                                   sfac * duarray_i[j][ma][mb][0];
         duarray_r[j][ma][mb][1] = dsfac * uarray_r[j][ma][mb] * uy +
                                   sfac * duarray_r[j][ma][mb][1];
         duarray_i[j][ma][mb][1] = dsfac * uarray_i[j][ma][mb] * uy +
                                   sfac * duarray_i[j][ma][mb][1];
         duarray_r[j][ma][mb][2] = dsfac * uarray_r[j][ma][mb] * uz +
                                   sfac * duarray_r[j][ma][mb][2];
         duarray_i[j][ma][mb][2] = dsfac * uarray_i[j][ma][mb] * uz +
                                   sfac * duarray_i[j][ma][mb][2];
       }
 }
 
 /* ----------------------------------------------------------------------
    memory usage of arrays
 ------------------------------------------------------------------------- */
 
 double SNA::memory_usage()
 {
   int jdim = twojmax + 1;
   double bytes;
   bytes = jdim * jdim * jdim * jdim * jdim * sizeof(double);
   bytes += 2 * jdim * jdim * jdim * sizeof(complex<double>);
   bytes += 2 * jdim * jdim * jdim * sizeof(double);
   bytes += jdim * jdim * jdim * 3 * sizeof(complex<double>);
   bytes += jdim * jdim * jdim * 3 * sizeof(double);
   bytes += ncoeff * sizeof(double);
   bytes += jdim * jdim * jdim * jdim * jdim * sizeof(complex<double>);
   return bytes;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void SNA::create_twojmax_arrays()
 {
   int jdim = twojmax + 1;
 
   memory->create(cgarray, jdim, jdim, jdim, jdim, jdim,
                  "sna:cgarray");
   memory->create(rootpqarray, jdim+1, jdim+1,
                  "sna:rootpqarray");
   memory->create(barray, jdim, jdim, jdim,
                  "sna:barray");
   memory->create(dbarray, jdim, jdim, jdim, 3,
                  "sna:dbarray");
 
   memory->create(duarray_r, jdim, jdim, jdim, 3,
                  "sna:duarray");
   memory->create(duarray_i, jdim, jdim, jdim, 3,
                  "sna:duarray");
 
   memory->create(uarray_r, jdim, jdim, jdim,
                  "sna:uarray");
   memory->create(uarray_i, jdim, jdim, jdim,
                  "sna:uarray");
 
+  if (bzero_flag)
+    memory->create(bzero, jdim,"sna:bzero");
+  else
+    bzero = NULL;
+  
+
   if(!use_shared_arrays) {
     memory->create(uarraytot_r, jdim, jdim, jdim,
                    "sna:uarraytot");
     memory->create(zarray_r, jdim, jdim, jdim, jdim, jdim,
                    "sna:zarray");
     memory->create(uarraytot_i, jdim, jdim, jdim,
                    "sna:uarraytot");
     memory->create(zarray_i, jdim, jdim, jdim, jdim, jdim,
                    "sna:zarray");
   }
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 void SNA::destroy_twojmax_arrays()
 {
   memory->destroy(cgarray);
   memory->destroy(rootpqarray);
   memory->destroy(barray);
 
   memory->destroy(dbarray);
 
   memory->destroy(duarray_r);
   memory->destroy(duarray_i);
 
   memory->destroy(uarray_r);
   memory->destroy(uarray_i);
 
+  if (bzero_flag)
+    memory->destroy(bzero);
+
   if(!use_shared_arrays) {
     memory->destroy(uarraytot_r);
     memory->destroy(zarray_r);
     memory->destroy(uarraytot_i);
     memory->destroy(zarray_i);
   }
 }
 
 /* ----------------------------------------------------------------------
    factorial n, wrapper for precomputed table
 ------------------------------------------------------------------------- */
 
 double SNA::factorial(int n)
 {
   if (n < 0 || n > nmaxfactorial) {
     char str[128];
     sprintf(str, "Invalid argument to factorial %d", n);
     error->all(FLERR, str);
   }
 
   return nfac_table[n];
 }
 
 /* ----------------------------------------------------------------------
    factorial n table, size SNA::nmaxfactorial+1
 ------------------------------------------------------------------------- */
 
 const double SNA::nfac_table[] = {
   1,
   1,
   2,
   6,
   24,
   120,
   720,
   5040,
   40320,
   362880,
   3628800,
   39916800,
   479001600,
   6227020800,
   87178291200,
   1307674368000,
   20922789888000,
   355687428096000,
   6.402373705728e+15,
   1.21645100408832e+17,
   2.43290200817664e+18,
   5.10909421717094e+19,
   1.12400072777761e+21,
   2.5852016738885e+22,
   6.20448401733239e+23,
   1.5511210043331e+25,
   4.03291461126606e+26,
   1.08888694504184e+28,
   3.04888344611714e+29,
   8.8417619937397e+30,
   2.65252859812191e+32,
   8.22283865417792e+33,
   2.63130836933694e+35,
   8.68331761881189e+36,
   2.95232799039604e+38,
   1.03331479663861e+40,
   3.71993326789901e+41,
   1.37637530912263e+43,
   5.23022617466601e+44,
   2.03978820811974e+46,
   8.15915283247898e+47,
   3.34525266131638e+49,
   1.40500611775288e+51,
   6.04152630633738e+52,
   2.65827157478845e+54,
   1.1962222086548e+56,
   5.50262215981209e+57,
   2.58623241511168e+59,
   1.24139155925361e+61,
   6.08281864034268e+62,
   3.04140932017134e+64,
   1.55111875328738e+66,
   8.06581751709439e+67,
   4.27488328406003e+69,
   2.30843697339241e+71,
   1.26964033536583e+73,
   7.10998587804863e+74,
   4.05269195048772e+76,
   2.35056133128288e+78,
   1.3868311854569e+80,
   8.32098711274139e+81,
   5.07580213877225e+83,
   3.14699732603879e+85,
   1.98260831540444e+87,
   1.26886932185884e+89,
   8.24765059208247e+90,
   5.44344939077443e+92,
   3.64711109181887e+94,
   2.48003554243683e+96,
   1.71122452428141e+98,
   1.19785716699699e+100,
   8.50478588567862e+101,
   6.12344583768861e+103,
   4.47011546151268e+105,
   3.30788544151939e+107,
   2.48091408113954e+109,
   1.88549470166605e+111,
   1.45183092028286e+113,
   1.13242811782063e+115,
   8.94618213078297e+116,
   7.15694570462638e+118,
   5.79712602074737e+120,
   4.75364333701284e+122,
   3.94552396972066e+124,
   3.31424013456535e+126,
   2.81710411438055e+128,
   2.42270953836727e+130,
   2.10775729837953e+132,
   1.85482642257398e+134,
   1.65079551609085e+136,
   1.48571596448176e+138,
   1.3520015276784e+140,
   1.24384140546413e+142,
   1.15677250708164e+144,
   1.08736615665674e+146,
   1.03299784882391e+148,
   9.91677934870949e+149,
   9.61927596824821e+151,
   9.42689044888324e+153,
   9.33262154439441e+155,
   9.33262154439441e+157,
   9.42594775983835e+159,
   9.61446671503512e+161,
   9.90290071648618e+163,
   1.02990167451456e+166,
   1.08139675824029e+168,
   1.14628056373471e+170,
   1.22652020319614e+172,
   1.32464181945183e+174,
   1.44385958320249e+176,
   1.58824554152274e+178,
   1.76295255109024e+180,
   1.97450685722107e+182,
   2.23119274865981e+184,
   2.54355973347219e+186,
   2.92509369349301e+188,
   3.3931086844519e+190,
   3.96993716080872e+192,
   4.68452584975429e+194,
   5.5745857612076e+196,
   6.68950291344912e+198,
   8.09429852527344e+200,
   9.8750442008336e+202,
   1.21463043670253e+205,
   1.50614174151114e+207,
   1.88267717688893e+209,
   2.37217324288005e+211,
   3.01266001845766e+213,
   3.8562048236258e+215,
   4.97450422247729e+217,
   6.46685548922047e+219,
   8.47158069087882e+221,
   1.118248651196e+224,
   1.48727070609069e+226,
   1.99294274616152e+228,
   2.69047270731805e+230,
   3.65904288195255e+232,
   5.01288874827499e+234,
   6.91778647261949e+236,
   9.61572319694109e+238,
   1.34620124757175e+241,
   1.89814375907617e+243,
   2.69536413788816e+245,
   3.85437071718007e+247,
   5.5502938327393e+249,
   8.04792605747199e+251,
   1.17499720439091e+254,
   1.72724589045464e+256,
   2.55632391787286e+258,
   3.80892263763057e+260,
   5.71338395644585e+262,
   8.62720977423323e+264,
   1.31133588568345e+267,
   2.00634390509568e+269,
   3.08976961384735e+271,
   4.78914290146339e+273,
   7.47106292628289e+275,
   1.17295687942641e+278,
   1.85327186949373e+280,
   2.94670227249504e+282,
   4.71472363599206e+284,
   7.59070505394721e+286,
   1.22969421873945e+289,
   2.0044015765453e+291,
   3.28721858553429e+293,
   5.42391066613159e+295,
   9.00369170577843e+297,
   1.503616514865e+300, // nmaxfactorial = 167
 };
 
 /* ----------------------------------------------------------------------
    the function delta given by VMK Eq. 8.2(1)
 ------------------------------------------------------------------------- */
 
 double SNA::deltacg(int j1, int j2, int j)
 {
   double sfaccg = factorial((j1 + j2 + j) / 2 + 1);
   return sqrt(factorial((j1 + j2 - j) / 2) *
               factorial((j1 - j2 + j) / 2) *
               factorial((-j1 + j2 + j) / 2) / sfaccg);
 }
 
 /* ----------------------------------------------------------------------
    assign Clebsch-Gordan coefficients using
    the quasi-binomial formula VMK 8.2.1(3)
 ------------------------------------------------------------------------- */
 
 void SNA::init_clebsch_gordan()
 {
   double sum,dcg,sfaccg;
   int m, aa2, bb2, cc2;
   int ifac;
 
   for (int j1 = 0; j1 <= twojmax; j1++)
     for (int j2 = 0; j2 <= twojmax; j2++)
       for (int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2)
         for (int m1 = 0; m1 <= j1; m1 += 1) {
           aa2 = 2 * m1 - j1;
 
           for (int m2 = 0; m2 <= j2; m2 += 1) {
 
             // -c <= cc <= c
 
             bb2 = 2 * m2 - j2;
             m = (aa2 + bb2 + j) / 2;
 
             if(m < 0 || m > j) continue;
 
 	    sum = 0.0;
 
 	    for (int z = MAX(0, MAX(-(j - j2 + aa2)
 				   / 2, -(j - j1 - bb2) / 2));
 		z <= MIN((j1 + j2 - j) / 2,
 			 MIN((j1 - aa2) / 2, (j2 + bb2) / 2));
 		z++) {
 	      ifac = z % 2 ? -1 : 1;
 	      sum += ifac /
 		(factorial(z) *
 		 factorial((j1 + j2 - j) / 2 - z) *
 		 factorial((j1 - aa2) / 2 - z) *
 		 factorial((j2 + bb2) / 2 - z) *
 		 factorial((j - j2 + aa2) / 2 + z) *
 		 factorial((j - j1 - bb2) / 2 + z));
 	    }
 
 	    cc2 = 2 * m - j;
 	    dcg = deltacg(j1, j2, j);
 	    sfaccg = sqrt(factorial((j1 + aa2) / 2) *
 			factorial((j1 - aa2) / 2) *
 			factorial((j2 + bb2) / 2) *
 			factorial((j2 - bb2) / 2) *
 			factorial((j  + cc2) / 2) *
 			factorial((j  - cc2) / 2) *
 			(j + 1));
 
 	    cgarray[j1][j2][j][m1][m2] = sum * dcg * sfaccg;
 	  }
 	}
 }
 
 /* ----------------------------------------------------------------------
    pre-compute table of sqrt[p/m2], p, q = 1,twojmax
    the p = 0, q = 0 entries are allocated and skipped for convenience.
 ------------------------------------------------------------------------- */
 
 void SNA::init_rootpqarray()
 {
   for (int p = 1; p <= twojmax; p++)
     for (int q = 1; q <= twojmax; q++)
       rootpqarray[p][q] = sqrt(static_cast<double>(p)/q);
 }
 
 /* ----------------------------------------------------------------------
    a = j/2
 ------------------------------------------------------------------------- */
 
 void SNA::jtostr(char* str, int j)
 {
   if(j % 2 == 0)
     sprintf(str, "%d", j / 2);
   else
     sprintf(str, "%d/2", j);
 }
 
 /* ----------------------------------------------------------------------
    aa = m - j/2
 ------------------------------------------------------------------------- */
 
 void SNA::mtostr(char* str, int j, int m)
 {
   if(j % 2 == 0)
     sprintf(str, "%d", m - j / 2);
   else
     sprintf(str, "%d/2", 2 * m - j);
 }
 
 /* ----------------------------------------------------------------------
    list values of Clebsch-Gordan coefficients
    using notation of VMK Table 8.11
 ------------------------------------------------------------------------- */
 
 void SNA::print_clebsch_gordan(FILE* file)
 {
   char stra[20], strb[20], strc[20], straa[20], strbb[20], strcc[20];
   int m, aa2, bb2;
 
   fprintf(file, "a, aa, b, bb, c, cc, c(a,aa,b,bb,c,cc) \n");
 
   for (int j1 = 0; j1 <= twojmax; j1++) {
     jtostr(stra, j1);
 
     for (int j2 = 0; j2 <= twojmax; j2++) {
       jtostr(strb, j2);
 
       for (int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) {
         jtostr(strc, j);
 
         for (int m1 = 0; m1 <= j1; m1 += 1) {
           mtostr(straa, j1, m1);
           aa2 = 2 * m1 - j1;
 
           for (int m2 = 0; m2 <= j2; m2 += 1) {
             bb2 = 2 * m2 - j2;
             m = (aa2 + bb2 + j) / 2;
 
             if(m < 0 || m > j) continue;
 
             mtostr(strbb, j2, m2);
             mtostr(strcc, j, m);
 
             fprintf(file, "%s\t%s\t%s\t%s\t%s\t%s\t%g\n",
                     stra, straa, strb, strbb, strc, strcc,
                     cgarray[j1][j2][j][m1][m2]);
           }
         }
       }
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 int SNA::compute_ncoeff()
 {
   int ncount;
 
   ncount = 0;
 
   for (int j1 = 0; j1 <= twojmax; j1++)
     if(diagonalstyle == 0) {
       for (int j2 = 0; j2 <= j1; j2++)
         for (int j = abs(j1 - j2);
             j <= MIN(twojmax, j1 + j2); j += 2)
           ncount++;
     } else if(diagonalstyle == 1) {
       int j2 = j1;
 
       for (int j = abs(j1 - j2);
           j <= MIN(twojmax, j1 + j2); j += 2)
         ncount++;
     } else if(diagonalstyle == 2) {
       ncount++;
     } else if(diagonalstyle == 3) {
       for (int j2 = 0; j2 <= j1; j2++)
         for (int j = abs(j1 - j2);
             j <= MIN(twojmax, j1 + j2); j += 2)
           if (j >= j1) ncount++;
     }
 
   return ncount;
 }
 
 /* ---------------------------------------------------------------------- */
 
 double SNA::compute_sfac(double r, double rcut)
 {
   if (switch_flag == 0) return 1.0;
   if (switch_flag == 1) {
     if(r <= rmin0) return 1.0;
     else if(r > rcut) return 0.0;
     else {
       double rcutfac = MY_PI / (rcut - rmin0);
       return 0.5 * (cos((r - rmin0) * rcutfac) + 1.0);
     }
   }
   return 0.0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 double SNA::compute_dsfac(double r, double rcut)
 {
   if (switch_flag == 0) return 0.0;
   if (switch_flag == 1) {
     if(r <= rmin0) return 0.0;
     else if(r > rcut) return 0.0;
     else {
       double rcutfac = MY_PI / (rcut - rmin0);
       return -0.5 * sin((r - rmin0) * rcutfac) * rcutfac;
     }
   }
   return 0.0;
 }
 
diff --git a/src/SNAP/sna.h b/src/SNAP/sna.h
index c8bce915f..d05ad0fb8 100644
--- a/src/SNAP/sna.h
+++ b/src/SNAP/sna.h
@@ -1,155 +1,157 @@
 /* -*- c++ -*- -------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing authors: Aidan Thompson, Christian Trott, SNL
 ------------------------------------------------------------------------- */
 
 #ifndef LMP_SNA_H
 #define LMP_SNA_H
 
 #include <complex>
 #include "pointers.h"
 #include <ctime>
 
 namespace LAMMPS_NS {
 
 struct SNA_LOOPINDICES {
   int j1, j2, j;
 };
 
 class SNA : protected Pointers {
 
 public:
-  SNA(LAMMPS*, double, int, int, int, double, int);
+  SNA(LAMMPS*, double, int, int, int, double, int, int);
 
   SNA(LAMMPS* lmp) : Pointers(lmp) {};
   ~SNA();
   void build_indexlist();
   void init();
   double memory_usage();
 
   int ncoeff;
 
   // functions for bispectrum coefficients
 
   void compute_ui(int);
   void compute_ui_omp(int, int);
   void compute_zi();
   void compute_zi_omp(int);
   void compute_bi();
   void copy_bi2bvec();
 
   // functions for derivatives
 
   void compute_duidrj(double*, double, double);
   void compute_dbidrj();
   void compute_dbidrj_nonsymm();
   void copy_dbi2dbvec();
   double compute_sfac(double, double);
   double compute_dsfac(double, double);
 
 #ifdef TIMING_INFO
   double* timers;
   timespec starttime, endtime;
   int print;
   int counter;
 #endif
 
   //per sna class instance for OMP use
 
   double* bvec, ** dbvec;
   double** rij;
   int* inside;
   double* wj;
   double* rcutij;
   int nmax;
 
   void grow_rij(int);
 
   int twojmax, diagonalstyle;
   double*** uarraytot_r, *** uarraytot_i;
   double***** zarray_r, ***** zarray_i;
   double*** uarraytot_r_b, *** uarraytot_i_b;
   double***** zarray_r_b, ***** zarray_i_b;
   double*** uarray_r, *** uarray_i;
 
 private:
   double rmin0, rfac0;
 
   //use indexlist instead of loops, constructor generates these
   SNA_LOOPINDICES* idxj;
   int idxj_max;
   // data for bispectrum coefficients
 
   double***** cgarray;
   double** rootpqarray;
   double*** barray;
 
   // derivatives of data
 
   double**** duarray_r, **** duarray_i;
   double**** dbarray;
 
   static const int nmaxfactorial = 167;
   static const double nfac_table[];
   double factorial(int);
 
   void create_twojmax_arrays();
   void destroy_twojmax_arrays();
   void init_clebsch_gordan();
   void init_rootpqarray();
   void jtostr(char*, int);
   void mtostr(char*, int, int);
   void print_clebsch_gordan(FILE*);
   void zero_uarraytot();
   void addself_uarraytot(double);
   void add_uarraytot(double, double, double);
   void add_uarraytot_omp(double, double, double);
   void compute_uarray(double, double, double,
                       double, double);
   void compute_uarray_omp(double, double, double,
                           double, double, int);
   double deltacg(int, int, int);
   int compute_ncoeff();
   void compute_duarray(double, double, double,
                        double, double, double, double, double);
 
   // if number of atoms are small use per atom arrays
   // for twojmax arrays, rij, inside, bvec
   // this will increase the memory footprint considerably,
   // but allows parallel filling and reuse of these arrays
   int use_shared_arrays;
 
   // Sets the style for the switching function
   // 0 = none
   // 1 = cosine
   int switch_flag;
 
   // Self-weight
   double wself;
 
+  int bzero_flag; // 1 if bzero subtracted from barray
+  double *bzero;  // array of B values for isolated atoms
 };
 
 }
 
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Invalid argument to factorial %d
 
 N must be >= 0 and <= 167, otherwise the factorial result is too
 large.
 
 */
diff --git a/src/balance.cpp b/src/balance.cpp
index 52f6072a6..47e7c0969 100644
--- a/src/balance.cpp
+++ b/src/balance.cpp
@@ -1,1336 +1,1338 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing authors, for weighted balancing: 
      Axel Kohlmeyer (Temple U), Iain Bethune (EPCC)
 ------------------------------------------------------------------------- */
 
 //#define BALANCE_DEBUG 1
 
 #include <mpi.h>
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include "balance.h"
 #include "atom.h"
 #include "comm.h"
 #include "rcb.h"
 #include "irregular.h"
 #include "domain.h"
 #include "force.h"
 #include "update.h"
 #include "group.h"
 #include "modify.h"
 #include "fix_store.h"
 #include "imbalance.h"
 #include "imbalance_group.h"
 #include "imbalance_time.h"
 #include "imbalance_neigh.h"
 #include "imbalance_store.h"
 #include "imbalance_var.h"
 #include "timer.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
 enum{XYZ,SHIFT,BISECTION};
 enum{NONE,UNIFORM,USER};
 enum{X,Y,Z};
 enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED};    // several files
 
 /* ---------------------------------------------------------------------- */
 
 Balance::Balance(LAMMPS *lmp) : Pointers(lmp)
 {
   MPI_Comm_rank(world,&me);
   MPI_Comm_size(world,&nprocs);
 
   user_xsplit = user_ysplit = user_zsplit = NULL;
   shift_allocate = 0;
   proccost = allproccost = NULL;
 
   rcb = NULL;
 
   nimbalance = 0;
   imbalances = NULL;
   fixstore = NULL;
 
   fp = NULL;
   firststep = 1;
 }
 
 /* ---------------------------------------------------------------------- */
 
 Balance::~Balance()
 {
   memory->destroy(proccost);
   memory->destroy(allproccost);
 
   delete [] user_xsplit;
   delete [] user_ysplit;
   delete [] user_zsplit;
 
   if (shift_allocate) {
     delete [] bdim;
     delete [] onecost;
     delete [] allcost;
     delete [] sum;
     delete [] target;
     delete [] lo;
     delete [] hi;
     delete [] losum;
     delete [] hisum;
   }
 
   delete rcb;
 
   for (int i = 0; i < nimbalance; i++) delete imbalances[i];
   delete [] imbalances;
 
   // check nfix in case all fixes have already been deleted
 
   if (fixstore && modify->nfix) modify->delete_fix(fixstore->id);
   fixstore = NULL;
 
   if (fp) fclose(fp);
 }
 
 /* ----------------------------------------------------------------------
    called as balance command in input script
 ------------------------------------------------------------------------- */
 
 void Balance::command(int narg, char **arg)
 {
   if (domain->box_exist == 0)
     error->all(FLERR,"Balance command before simulation box is defined");
 
   if (me == 0 && screen) fprintf(screen,"Balancing ...\n");
 
   // parse required arguments
 
   if (narg < 2) error->all(FLERR,"Illegal balance command");
 
   thresh = force->numeric(FLERR,arg[0]);
 
   int dimension = domain->dimension;
   int *procgrid = comm->procgrid;
   style = -1;
   xflag = yflag = zflag = NONE;
 
   int iarg = 1;
   while (iarg < narg) {
     if (strcmp(arg[iarg],"x") == 0) {
       if (style != -1 && style != XYZ)
         error->all(FLERR,"Illegal balance command");
       style = XYZ;
       if (strcmp(arg[iarg+1],"uniform") == 0) {
         if (iarg+2 > narg) error->all(FLERR,"Illegal balance command");
         xflag = UNIFORM;
         iarg += 2;
       } else {
         if (1 + procgrid[0]-1 > narg)
           error->all(FLERR,"Illegal balance command");
         xflag = USER;
         delete [] user_xsplit;
         user_xsplit = new double[procgrid[0]+1];
         user_xsplit[0] = 0.0;
         iarg++;
         for (int i = 1; i < procgrid[0]; i++)
           user_xsplit[i] = force->numeric(FLERR,arg[iarg++]);
         user_xsplit[procgrid[0]] = 1.0;
       }
     } else if (strcmp(arg[iarg],"y") == 0) {
       if (style != -1 && style != XYZ)
         error->all(FLERR,"Illegal balance command");
       style = XYZ;
       if (strcmp(arg[iarg+1],"uniform") == 0) {
         if (iarg+2 > narg) error->all(FLERR,"Illegal balance command");
         yflag = UNIFORM;
         iarg += 2;
       } else {
         if (1 + procgrid[1]-1 > narg)
           error->all(FLERR,"Illegal balance command");
         yflag = USER;
         delete [] user_ysplit;
         user_ysplit = new double[procgrid[1]+1];
         user_ysplit[0] = 0.0;
         iarg++;
         for (int i = 1; i < procgrid[1]; i++)
           user_ysplit[i] = force->numeric(FLERR,arg[iarg++]);
         user_ysplit[procgrid[1]] = 1.0;
       }
     } else if (strcmp(arg[iarg],"z") == 0) {
       if (style != -1 && style != XYZ)
         error->all(FLERR,"Illegal balance command");
       style = XYZ;
       if (strcmp(arg[iarg+1],"uniform") == 0) {
         if (iarg+2 > narg) error->all(FLERR,"Illegal balance command");
         zflag = UNIFORM;
         iarg += 2;
       } else {
         if (1 + procgrid[2]-1 > narg)
           error->all(FLERR,"Illegal balance command");
         zflag = USER;
         delete [] user_zsplit;
         user_zsplit = new double[procgrid[2]+1];
         user_zsplit[0] = 0.0;
         iarg++;
         for (int i = 1; i < procgrid[2]; i++)
           user_zsplit[i] = force->numeric(FLERR,arg[iarg++]);
         user_zsplit[procgrid[2]] = 1.0;
       }
 
     } else if (strcmp(arg[iarg],"shift") == 0) {
       if (style != -1) error->all(FLERR,"Illegal balance command");
       if (iarg+4 > narg) error->all(FLERR,"Illegal balance command");
       style = SHIFT;
       if (strlen(arg[iarg+1]) > 3) error->all(FLERR,"Illegal balance command");
       strcpy(bstr,arg[iarg+1]);
       nitermax = force->inumeric(FLERR,arg[iarg+2]);
       if (nitermax <= 0) error->all(FLERR,"Illegal balance command");
       stopthresh = force->numeric(FLERR,arg[iarg+3]);
       if (stopthresh < 1.0) error->all(FLERR,"Illegal balance command");
       iarg += 4;
 
     } else if (strcmp(arg[iarg],"rcb") == 0) {
       if (style != -1) error->all(FLERR,"Illegal balance command");
       style = BISECTION;
       iarg++;
 
     } else break;
   }
 
   // error checks
 
   if (style == XYZ) {
     if (zflag != NONE  && dimension == 2)
       error->all(FLERR,"Cannot balance in z dimension for 2d simulation");
 
     if (xflag == USER)
       for (int i = 1; i <= procgrid[0]; i++)
         if (user_xsplit[i-1] >= user_xsplit[i])
           error->all(FLERR,"Illegal balance command");
     if (yflag == USER)
       for (int i = 1; i <= procgrid[1]; i++)
         if (user_ysplit[i-1] >= user_ysplit[i])
           error->all(FLERR,"Illegal balance command");
     if (zflag == USER)
       for (int i = 1; i <= procgrid[2]; i++)
         if (user_zsplit[i-1] >= user_zsplit[i])
           error->all(FLERR,"Illegal balance command");
   }
 
   if (style == SHIFT) {
     const int blen=strlen(bstr);
     for (int i = 0; i < blen; i++) {
       if (bstr[i] != 'x' && bstr[i] != 'y' && bstr[i] != 'z')
         error->all(FLERR,"Balance shift string is invalid");
       if (bstr[i] == 'z' && dimension == 2)
         error->all(FLERR,"Balance shift string is invalid");
       for (int j = i+1; j < blen; j++)
         if (bstr[i] == bstr[j])
           error->all(FLERR,"Balance shift string is invalid");
     }
   }
 
   if (style == BISECTION && comm->style == 0)
     error->all(FLERR,"Balance rcb cannot be used with comm_style brick");
 
   // process remaining optional args
 
   options(iarg,narg,arg);
   if (wtflag) weight_storage(NULL);
 
   // insure particles are in current box & update box via shrink-wrap
   // init entire system since comm->setup is done
   // comm::init needs neighbor::init needs pair::init needs kspace::init, etc
+  // must reset atom map after exchange() since it clears it
 
   MPI_Barrier(world);
   double start_time = MPI_Wtime();
 
   lmp->init();
 
   if (domain->triclinic) domain->x2lamda(atom->nlocal);
   domain->pbc();
   domain->reset_box();
   comm->setup();
   comm->exchange();
+  if (atom->map_style) atom->map_set();
   if (domain->triclinic) domain->lamda2x(atom->nlocal);
 
   // imbinit = initial imbalance
 
   double maxinit;
   init_imbalance(0);
   set_weights();
   double imbinit = imbalance_factor(maxinit);
 
   // no load-balance if imbalance doesn't exceed threshold
   // unless switching from tiled to non tiled layout, then force rebalance
 
   if (comm->layout == LAYOUT_TILED && style != BISECTION) {
   } else if (imbinit < thresh) return;
 
   // debug output of initial state
 
 #ifdef BALANCE_DEBUG
   if (outflag) dumpout(update->ntimestep);
 #endif
 
   int niter = 0;
   
   // perform load-balance
   // style XYZ = explicit setting of cutting planes of logical 3d grid
 
   if (style == XYZ) {
     if (comm->layout == LAYOUT_UNIFORM) {
       if (xflag == USER || yflag == USER || zflag == USER)
         comm->layout = LAYOUT_NONUNIFORM;
     } else if (comm->style == LAYOUT_NONUNIFORM) {
       if (xflag == UNIFORM && yflag == UNIFORM && zflag == UNIFORM)
         comm->layout = LAYOUT_UNIFORM;
     } else if (comm->style == LAYOUT_TILED) {
       if (xflag == UNIFORM && yflag == UNIFORM && zflag == UNIFORM)
         comm->layout = LAYOUT_UNIFORM;
       else comm->layout = LAYOUT_NONUNIFORM;
     }
 
     if (xflag == UNIFORM) {
       for (int i = 0; i < procgrid[0]; i++)
         comm->xsplit[i] = i * 1.0/procgrid[0];
       comm->xsplit[procgrid[0]] = 1.0;
     } else if (xflag == USER)
       for (int i = 0; i <= procgrid[0]; i++) comm->xsplit[i] = user_xsplit[i];
 
     if (yflag == UNIFORM) {
       for (int i = 0; i < procgrid[1]; i++)
         comm->ysplit[i] = i * 1.0/procgrid[1];
       comm->ysplit[procgrid[1]] = 1.0;
     } else if (yflag == USER)
       for (int i = 0; i <= procgrid[1]; i++) comm->ysplit[i] = user_ysplit[i];
 
     if (zflag == UNIFORM) {
       for (int i = 0; i < procgrid[2]; i++)
         comm->zsplit[i] = i * 1.0/procgrid[2];
       comm->zsplit[procgrid[2]] = 1.0;
     } else if (zflag == USER)
       for (int i = 0; i <= procgrid[2]; i++) comm->zsplit[i] = user_zsplit[i];
   }
 
   // style SHIFT = adjust cutting planes of logical 3d grid
 
   if (style == SHIFT) {
     comm->layout = LAYOUT_NONUNIFORM;
     shift_setup_static(bstr);
     niter = shift();
   }
 
   // style BISECTION = recursive coordinate bisectioning
 
   if (style == BISECTION) {
     comm->layout = LAYOUT_TILED;
     bisection(1);
   }
 
   // reset proc sub-domains
   // for either brick or tiled comm style
 
   if (domain->triclinic) domain->set_lamda_box();
   domain->set_local_box();
 
   // move particles to new processors via irregular()
 
   if (domain->triclinic) domain->x2lamda(atom->nlocal);
   Irregular *irregular = new Irregular(lmp);
   if (wtflag) fixstore->disable = 0;
   if (style == BISECTION) irregular->migrate_atoms(1,1,rcb->sendproc);
   else irregular->migrate_atoms(1);
   if (wtflag) fixstore->disable = 1;
   delete irregular;
   if (domain->triclinic) domain->lamda2x(atom->nlocal);
 
   // output of final result
 
   if (outflag) dumpout(update->ntimestep);
 
   // check if any particles were lost
 
   bigint natoms;
   bigint nblocal = atom->nlocal;
   MPI_Allreduce(&nblocal,&natoms,1,MPI_LMP_BIGINT,MPI_SUM,world);
   if (natoms != atom->natoms) {
     char str[128];
     sprintf(str,"Lost atoms via balance: original " BIGINT_FORMAT
             " current " BIGINT_FORMAT,atom->natoms,natoms);
     error->all(FLERR,str);
   }
 
   // imbfinal = final imbalance
 
   double maxfinal;
   double imbfinal = imbalance_factor(maxfinal);
 
   // stats output
 
   double stop_time = MPI_Wtime();
 
   if (me == 0) {
     if (screen) {
       fprintf(screen,"  rebalancing time: %g seconds\n",stop_time-start_time);
       fprintf(screen,"  iteration count = %d\n",niter);
       for (int i = 0; i < nimbalance; ++i) imbalances[i]->info(screen);
       fprintf(screen,"  initial/final max load/proc = %g %g\n",
               maxinit,maxfinal);
       fprintf(screen,"  initial/final imbalance factor = %g %g\n",
               imbinit,imbfinal);
     }
     if (logfile) {
       fprintf(logfile,"  rebalancing time: %g seconds\n",stop_time-start_time);
       fprintf(logfile,"  iteration count = %d\n",niter);
       for (int i = 0; i < nimbalance; ++i) imbalances[i]->info(logfile);
       fprintf(logfile,"  initial/final max load/proc = %g %g\n",
               maxinit,maxfinal);
       fprintf(logfile,"  initial/final imbalance factor = %g %g\n",
               imbinit,imbfinal);
     }
   }
 
   if (style != BISECTION) {
     if (me == 0) {
       if (screen) {
         fprintf(screen,"  x cuts:");
         for (int i = 0; i <= comm->procgrid[0]; i++)
           fprintf(screen," %g",comm->xsplit[i]);
         fprintf(screen,"\n");
         fprintf(screen,"  y cuts:");
         for (int i = 0; i <= comm->procgrid[1]; i++)
           fprintf(screen," %g",comm->ysplit[i]);
         fprintf(screen,"\n");
         fprintf(screen,"  z cuts:");
         for (int i = 0; i <= comm->procgrid[2]; i++)
           fprintf(screen," %g",comm->zsplit[i]);
         fprintf(screen,"\n");
       }
       if (logfile) {
         fprintf(logfile,"  x cuts:");
         for (int i = 0; i <= comm->procgrid[0]; i++)
           fprintf(logfile," %g",comm->xsplit[i]);
         fprintf(logfile,"\n");
         fprintf(logfile,"  y cuts:");
         for (int i = 0; i <= comm->procgrid[1]; i++)
           fprintf(logfile," %g",comm->ysplit[i]);
         fprintf(logfile,"\n");
         fprintf(logfile,"  z cuts:");
         for (int i = 0; i <= comm->procgrid[2]; i++)
           fprintf(logfile," %g",comm->zsplit[i]);
         fprintf(logfile,"\n");
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    process optional command args for Balance and FixBalance
 ------------------------------------------------------------------------- */
 
 void Balance::options(int iarg, int narg, char **arg)
 {
   // count max number of weight settings
 
   nimbalance = 0;
   for (int i = iarg; i < narg; i++)
     if (strcmp(arg[i],"weight") == 0) nimbalance++;
   if (nimbalance) imbalances = new Imbalance*[nimbalance];
   nimbalance = 0;
 
   wtflag = 0;
   varflag = 0;
   oldrcb = 0;
   outflag = 0;
   int outarg = 0;
   fp = NULL;
 
   while (iarg < narg) {
     if (strcmp(arg[iarg],"weight") == 0) {
       wtflag = 1;
       Imbalance *imb;
       int nopt = 0;
       if (strcmp(arg[iarg+1],"group") == 0) {
         imb = new ImbalanceGroup(lmp);
         nopt = imb->options(narg-iarg,arg+iarg+2);
         imbalances[nimbalance++] = imb;
       } else if (strcmp(arg[iarg+1],"time") == 0) {
         imb = new ImbalanceTime(lmp);
         nopt = imb->options(narg-iarg,arg+iarg+2);
         imbalances[nimbalance++] = imb;
       } else if (strcmp(arg[iarg+1],"neigh") == 0) {
         imb = new ImbalanceNeigh(lmp);
         nopt = imb->options(narg-iarg,arg+iarg+2);
         imbalances[nimbalance++] = imb;
       } else if (strcmp(arg[iarg+1],"var") == 0) {
         varflag = 1;
         imb = new ImbalanceVar(lmp);
         nopt = imb->options(narg-iarg,arg+iarg+2);
         imbalances[nimbalance++] = imb;
       } else if (strcmp(arg[iarg+1],"store") == 0) {
         imb = new ImbalanceStore(lmp);
         nopt = imb->options(narg-iarg,arg+iarg+2);
         imbalances[nimbalance++] = imb;
       } else {
         error->all(FLERR,"Unknown (fix) balance weight method");
       }
       iarg += 2+nopt;
 
     } else if (strcmp(arg[iarg],"old") == 0) {
       oldrcb = 1;
       iarg++;
     } else if (strcmp(arg[iarg],"out") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal (fix) balance command");
       outflag = 1;
       outarg = iarg+1;
       iarg += 2;
     } else error->all(FLERR,"Illegal (fix) balance command");
   }
 
   // output file
 
   if (outflag && comm->me == 0) {
     fp = fopen(arg[outarg],"w");
     if (fp == NULL) error->one(FLERR,"Cannot open (fix) balance output file");
   }
 }
 
 /* ----------------------------------------------------------------------
    allocate per-particle weight storage via FixStore
    use prefix to distinguish Balance vs FixBalance storage
    fix could already be allocated if fix balance is re-specified
 ------------------------------------------------------------------------- */
 
 void Balance::weight_storage(char *prefix)
 {
   char *fixargs[6];
 
   if (prefix) {
     int n = strlen(prefix) + 32;
     fixargs[0] = new char[n];
     strcpy(fixargs[0],prefix);
     strcat(fixargs[0],"IMBALANCE_WEIGHTS");
   } else fixargs[0] = (char *) "IMBALANCE_WEIGHTS";
 
   fixargs[1] = (char *) "all";
   fixargs[2] = (char *) "STORE";
   fixargs[3] = (char *) "peratom";
   fixargs[4] = (char *) "0";
   fixargs[5] = (char *) "1";
 
   int ifix = modify->find_fix(fixargs[0]);
   if (ifix < 1) {
     modify->add_fix(6,fixargs);
     fixstore = (FixStore *) modify->fix[modify->nfix-1];
   } else fixstore = (FixStore *) modify->fix[ifix];
 
   fixstore->disable = 1;
 
   if (prefix) delete [] fixargs[0];
 }
 
 /* ----------------------------------------------------------------------
    invoke init() for each Imbalance class
    flag = 0 for call from Balance, 1 for call from FixBalance
 ------------------------------------------------------------------------- */
 
 void Balance::init_imbalance(int flag)
 {
   if (!wtflag) return;
   for (int n = 0; n < nimbalance; n++) imbalances[n]->init(flag);
 }
 
 /* ----------------------------------------------------------------------
    set weight for each particle
    via list of Nimbalance classes
 ------------------------------------------------------------------------- */
 
 void Balance::set_weights()
 {
   if (!wtflag) return;
   weight = fixstore->vstore;
 
   int nlocal = atom->nlocal;
   for (int i = 0; i < nlocal; i++) weight[i] = 1.0;
   for (int n = 0; n < nimbalance; n++) imbalances[n]->compute(weight);
 }
 
 /* ----------------------------------------------------------------------
    calculate imbalance factor based on particle count or particle weights
    return max = max load per proc
    return imbalance = max load per proc / ave load per proc
 ------------------------------------------------------------------------- */
 
 double Balance::imbalance_factor(double &maxcost)
 {
   double mycost,totalcost;
 
   if (wtflag) {
     weight = fixstore->vstore;
     int nlocal = atom->nlocal;
 
     mycost = 0.0;
     for (int i = 0; i < nlocal; i++) mycost += weight[i];
 
   } else mycost = atom->nlocal;
 
   MPI_Allreduce(&mycost,&maxcost,1,MPI_DOUBLE,MPI_MAX,world);
   MPI_Allreduce(&mycost,&totalcost,1,MPI_DOUBLE,MPI_SUM,world);
 
   double imbalance = 1.0;
   if (maxcost > 0.0) imbalance = maxcost / (totalcost/nprocs);
   return imbalance;
 }
 
 /* ----------------------------------------------------------------------
    perform balancing via RCB class
    sortflag = flag for sorting order of received messages by proc ID
    return list of procs to send my atoms to
 ------------------------------------------------------------------------- */
 
 int *Balance::bisection(int sortflag)
 {
   if (!rcb) rcb = new RCB(lmp);
 
   // NOTE: this logic is specific to orthogonal boxes, not triclinic
 
   int dim = domain->dimension;
   double *boxlo = domain->boxlo;
   double *boxhi = domain->boxhi;
   double *prd = domain->prd;
 
   // shrink-wrap simulation box around atoms for input to RCB
   // leads to better-shaped sub-boxes when atoms are far from box boundaries
 
   double shrink[6],shrinkall[6];
 
   shrink[0] = boxhi[0]; shrink[1] = boxhi[1]; shrink[2] = boxhi[2];
   shrink[3] = boxlo[0]; shrink[4] = boxlo[1]; shrink[5] = boxlo[2];
 
   double **x = atom->x;
   int nlocal = atom->nlocal;
   for (int i = 0; i < nlocal; i++) {
     shrink[0] = MIN(shrink[0],x[i][0]);
     shrink[1] = MIN(shrink[1],x[i][1]);
     shrink[2] = MIN(shrink[2],x[i][2]);
     shrink[3] = MAX(shrink[3],x[i][0]);
     shrink[4] = MAX(shrink[4],x[i][1]);
     shrink[5] = MAX(shrink[5],x[i][2]);
   }
 
   shrink[3] = -shrink[3]; shrink[4] = -shrink[4]; shrink[5] = -shrink[5];
   MPI_Allreduce(shrink,shrinkall,6,MPI_DOUBLE,MPI_MIN,world);
   shrinkall[3] = -shrinkall[3];
   shrinkall[4] = -shrinkall[4];
   shrinkall[5] = -shrinkall[5];
 
   double *shrinklo = &shrinkall[0];
   double *shrinkhi = &shrinkall[3];
 
   // invoke RCB
   // then invert() to create list of proc assignments for my atoms
   // NOTE: (3/2017) can remove undocumented "old" option at some point
   //       ditto in rcb.cpp
 
   if (oldrcb) {
     if (wtflag) {
       weight = fixstore->vstore;
       rcb->compute_old(dim,atom->nlocal,atom->x,weight,shrinklo,shrinkhi);
     } else rcb->compute_old(dim,atom->nlocal,atom->x,NULL,shrinklo,shrinkhi);
   } else {
     if (wtflag) {
       weight = fixstore->vstore;
       rcb->compute(dim,atom->nlocal,atom->x,weight,shrinklo,shrinkhi);
     } else rcb->compute(dim,atom->nlocal,atom->x,NULL,shrinklo,shrinkhi);
   }
     
   rcb->invert(sortflag);
 
   // reset RCB lo/hi bounding box to full simulation box as needed
 
   double *lo = rcb->lo;
   double *hi = rcb->hi;
 
   if (lo[0] == shrinklo[0]) lo[0] = boxlo[0];
   if (lo[1] == shrinklo[1]) lo[1] = boxlo[1];
   if (lo[2] == shrinklo[2]) lo[2] = boxlo[2];
   if (hi[0] == shrinkhi[0]) hi[0] = boxhi[0];
   if (hi[1] == shrinkhi[1]) hi[1] = boxhi[1];
   if (hi[2] == shrinkhi[2]) hi[2] = boxhi[2];
 
   // store RCB cut, dim, lo/hi box in CommTiled
   // cut and lo/hi need to be in fractional form so can
   // OK if changes by epsilon from what RCB used since atoms
   //   will subsequently migrate to new owning procs by exchange() anyway
   // ditto for atoms exactly on lo/hi RCB box boundaries due to ties
 
   comm->rcbnew = 1;
 
   int idim = rcb->cutdim;
   if (idim >= 0) comm->rcbcutfrac = (rcb->cut - boxlo[idim]) / prd[idim];
   else comm->rcbcutfrac = 0.0;
   comm->rcbcutdim = idim;
 
   double (*mysplit)[2] = comm->mysplit;
 
   mysplit[0][0] = (lo[0] - boxlo[0]) / prd[0];
   if (hi[0] == boxhi[0]) mysplit[0][1] = 1.0;
   else mysplit[0][1] = (hi[0] - boxlo[0]) / prd[0];
 
   mysplit[1][0] = (lo[1] - boxlo[1]) / prd[1];
   if (hi[1] == boxhi[1]) mysplit[1][1] = 1.0;
   else mysplit[1][1] = (hi[1] - boxlo[1]) / prd[1];
 
   mysplit[2][0] = (lo[2] - boxlo[2]) / prd[2];
   if (hi[2] == boxhi[2]) mysplit[2][1] = 1.0;
   else mysplit[2][1] = (hi[2] - boxlo[2]) / prd[2];
 
   // return list of procs to send my atoms to
 
   return rcb->sendproc;
 }
 
 /* ----------------------------------------------------------------------
    setup static load balance operations
    called from command and indirectly initially from fix balance
    set rho = 0 for static balancing
 ------------------------------------------------------------------------- */
 
 void Balance::shift_setup_static(char *str)
 {
   shift_allocate = 1;
 
   memory->create(proccost,nprocs,"balance:proccost");
   memory->create(allproccost,nprocs,"balance:allproccost");
 
   ndim = strlen(str);
   bdim = new int[ndim];
 
   for (int i = 0; i < ndim; i++) {
     if (str[i] == 'x') bdim[i] = X;
     if (str[i] == 'y') bdim[i] = Y;
     if (str[i] == 'z') bdim[i] = Z;
   }
 
   int max = MAX(comm->procgrid[0],comm->procgrid[1]);
   max = MAX(max,comm->procgrid[2]);
 
   onecost = new double[max];
   allcost = new double[max];
   sum = new double[max+1];
   target = new double[max+1];
   lo = new double[max+1];
   hi = new double[max+1];
   losum = new double[max+1];
   hisum = new double[max+1];
 
   // if current layout is TILED, set initial uniform splits in Comm
   // this gives starting point to subsequent shift balancing
 
   if (comm->layout == LAYOUT_TILED) {
     int *procgrid = comm->procgrid;
     double *xsplit = comm->xsplit;
     double *ysplit = comm->ysplit;
     double *zsplit = comm->zsplit;
 
     for (int i = 0; i < procgrid[0]; i++) xsplit[i] = i * 1.0/procgrid[0];
     for (int i = 0; i < procgrid[1]; i++) ysplit[i] = i * 1.0/procgrid[1];
     for (int i = 0; i < procgrid[2]; i++) zsplit[i] = i * 1.0/procgrid[2];
     xsplit[procgrid[0]] = ysplit[procgrid[1]] = zsplit[procgrid[2]] = 1.0;
   }
 
   rho = 0;
 }
 
 /* ----------------------------------------------------------------------
    setup shift load balance operations
    called from fix balance
    set rho = 1 to do dynamic balancing after call to shift_setup_static()
 ------------------------------------------------------------------------- */
 
 void Balance::shift_setup(char *str, int nitermax_in, double thresh_in)
 {
   shift_setup_static(str);
   nitermax = nitermax_in;
   stopthresh = thresh_in;
   rho = 1;
 }
 
 /* ----------------------------------------------------------------------
    load balance by changing xyz split proc boundaries in Comm
    called one time from input script command or many times from fix balance
    return niter = iteration count
 ------------------------------------------------------------------------- */
 
 int Balance::shift()
 {
   int i,j,k,m,np,max;
   double mycost,totalcost;
   double *split;
 
   // no balancing if no atoms
 
   bigint natoms = atom->natoms;
   if (natoms == 0) return 0;
 
   // set delta for 1d balancing = root of threshold
   // root = # of dimensions being balanced on
 
   double delta = pow(stopthresh,1.0/ndim) - 1.0;
   int *procgrid = comm->procgrid;
 
   // all balancing done in lamda coords
 
   domain->x2lamda(atom->nlocal);
 
   // loop over dimensions in balance string
 
   int niter = 0;
   for (int idim = 0; idim < ndim; idim++) {
 
     // split = ptr to xyz split in Comm
 
     if (bdim[idim] == X) split = comm->xsplit;
     else if (bdim[idim] == Y) split = comm->ysplit;
     else if (bdim[idim] == Z) split = comm->zsplit;
     else continue;
 
     // initial count and sum
 
     np = procgrid[bdim[idim]];
     tally(bdim[idim],np,split);
 
     // target[i] = desired sum at split I
 
     if (wtflag) {
       weight = fixstore->vstore;
       int nlocal = atom->nlocal;
       mycost = 0.0;
       for (i = 0; i < nlocal; i++) mycost += weight[i];
     } else mycost = atom->nlocal;
 
     MPI_Allreduce(&mycost,&totalcost,1,MPI_DOUBLE,MPI_SUM,world);
 
     for (i = 0; i < np; i++) target[i] = totalcost/np * i;
     target[np] = totalcost;
 
     // lo[i] = closest split <= split[i] with a sum <= target
     // hi[i] = closest split >= split[i] with a sum >= target
 
     lo[0] = hi[0] = 0.0;
     lo[np] = hi[np] = 1.0;
     losum[0] = hisum[0] = 0.0;
     losum[np] = hisum[np] = totalcost;
 
     for (i = 1; i < np; i++) {
       for (j = i; j >= 0; j--)
         if (sum[j] <= target[i]) {
           lo[i] = split[j];
           losum[i] = sum[j];
           break;
         }
       for (j = i; j <= np; j++)
         if (sum[j] >= target[i]) {
           hi[i] = split[j];
           hisum[i] = sum[j];
           break;
         }
     }
 
     // iterate until balanced
 
 #ifdef BALANCE_DEBUG
     if (me == 0) debug_shift_output(idim,0,np,split);
 #endif
 
     int doneflag;
     int change = 1;
     for (m = 0; m < nitermax; m++) {
       change = adjust(np,split);
       tally(bdim[idim],np,split);
       niter++;
 
 #ifdef BALANCE_DEBUG
       if (me == 0) debug_shift_output(idim,m+1,np,split);
       if (outflag) dumpout(update->ntimestep);
 #endif
 
       // stop if no change in splits, b/c all targets are met exactly
 
       if (!change) break;
 
       // stop if all split sums are within delta of targets
       // this is a 1d test of particle count per slice
       // assumption is that this is sufficient accuracy
       //   for 3d imbalance factor to reach threshold
 
       doneflag = 1;
       for (i = 1; i < np; i++)
         if (fabs(1.0*(sum[i]-target[i]))/target[i] > delta) doneflag = 0;
       if (doneflag) break;
     }
 
     // eliminate final adjacent splits that are duplicates
     // can happen if particle distribution is narrow and Nitermax is small
     // set lo = midpt between splits
     // spread duplicates out evenly between bounding midpts with non-duplicates
     // i,j = lo/hi indices of set of duplicate splits
     // delta = new spacing between duplicates
     // bounding midpts = lo[i-1] and lo[j]
 
     int duplicate = 0;
     for (i = 1; i < np-1; i++)
       if (split[i] == split[i+1]) duplicate = 1;
     if (duplicate) {
       for (i = 0; i < np; i++)
         lo[i] = 0.5 * (split[i] + split[i+1]);
       i = 1;
       while (i < np-1) {
         j = i+1;
         while (split[j] == split[i]) j++;
         j--;
         if (j > i) {
           delta = (lo[j] - lo[i-1]) / (j-i+2);
           for (k = i; k <= j; k++)
             split[k] = lo[i-1] + (k-i+1)*delta;
         }
         i = j+1;
       }
     }
 
     // sanity check on bad duplicate or inverted splits
     // zero or negative width sub-domains will break Comm class
     // should never happen if recursive multisection algorithm is correct
 
     int bad = 0;
     for (i = 0; i < np; i++)
       if (split[i] >= split[i+1]) bad = 1;
     if (bad) error->all(FLERR,"Balance produced bad splits");
     /*
       if (me == 0) {
       printf("BAD SPLITS %d %d %d\n",np+1,niter,delta);
       for (i = 0; i < np+1; i++)
       printf(" %g",split[i]);
       printf("\n");
       }
     */
 
     // stop at this point in bstr if imbalance factor < threshold
     // this is a true 3d test of particle count per processor
 
     double imbfactor = imbalance_splits(max);
     if (imbfactor <= stopthresh) break;
   }
 
   // restore real coords
 
   domain->lamda2x(atom->nlocal);
 
   return niter;
 }
 
 /* ----------------------------------------------------------------------
    count atoms in each slice, based on their dim coordinate
    N = # of slices
    split = N+1 cuts between N slices
    return updated count = particles per slice
    return updated sum = cumulative count below each of N+1 splits
    use binary search to find which slice each atom is in
 ------------------------------------------------------------------------- */
 
 void Balance::tally(int dim, int n, double *split)
 {
   for (int i = 0; i < n; i++) onecost[i] = 0.0;
 
   double **x = atom->x;
   int nlocal = atom->nlocal;
   int index;
 
   if (wtflag) {
     weight = fixstore->vstore;
     for (int i = 0; i < nlocal; i++) {
       index = binary(x[i][dim],n,split);
       onecost[index] += weight[i];
     }
   } else {
     for (int i = 0; i < nlocal; i++) {
       index = binary(x[i][dim],n,split);
       onecost[index] += 1.0;
     }
   }
 
   MPI_Allreduce(onecost,allcost,n,MPI_DOUBLE,MPI_SUM,world);
 
   sum[0] = 0.0;
   for (int i = 1; i < n+1; i++)
     sum[i] = sum[i-1] + allcost[i-1];
 }
 
 /* ----------------------------------------------------------------------
    adjust cuts between N slices in a dim via recursive multisectioning method
    split = current N+1 cuts, with 0.0 and 1.0 at end points
    sum = cumulative count up to each split
    target = desired cumulative count up to each split
    lo/hi = split values that bound current split
    update lo/hi to reflect sums at current split values
    overwrite split with new cuts
      guaranteed that splits will remain in ascending order,
      though adjacent values may be identical
    recursive bisectioning zooms in on each cut by halving lo/hi
    return 0 if no changes in any splits, b/c they are all perfect
 ------------------------------------------------------------------------- */
 
 int Balance::adjust(int n, double *split)
 {
   int i;
   double fraction;
 
   // reset lo/hi based on current sum and splits
   // insure lo is monotonically increasing, ties are OK
   // insure hi is monotonically decreasing, ties are OK
   // this effectively uses info from nearby splits
   // to possibly tighten bounds on lo/hi
 
   for (i = 1; i < n; i++) {
     if (sum[i] <= target[i]) {
       lo[i] = split[i];
       losum[i] = sum[i];
     }
     if (sum[i] >= target[i]) {
       hi[i] = split[i];
       hisum[i] = sum[i];
     }
   }
   for (i = 1; i < n; i++)
     if (lo[i] < lo[i-1]) {
       lo[i] = lo[i-1];
       losum[i] = losum[i-1];
     }
   for (i = n-1; i > 0; i--)
     if (hi[i] > hi[i+1]) {
       hi[i] = hi[i+1];
       hisum[i] = hisum[i+1];
     }
 
   int change = 0;
   for (int i = 1; i < n; i++)
     if (sum[i] != target[i]) {
       change = 1;
       if (rho == 0) split[i] = 0.5 * (lo[i]+hi[i]);
       else {
         fraction = 1.0*(target[i]-losum[i]) / (hisum[i]-losum[i]);
         split[i] = lo[i] + fraction * (hi[i]-lo[i]);
       }
     }
   return change;
 }
 
 /* ----------------------------------------------------------------------
    calculate imbalance based on processor splits in 3 dims
    atoms must be in lamda coords (0-1) before called
    map particles to 3d grid of procs
    return maxcost = max load per proc
    return imbalance factor = max load per proc / ave load per proc
 ------------------------------------------------------------------------- */
 
 double Balance::imbalance_splits(int &maxcost)
 {
   double *xsplit = comm->xsplit;
   double *ysplit = comm->ysplit;
   double *zsplit = comm->zsplit;
 
   int nx = comm->procgrid[0];
   int ny = comm->procgrid[1];
   int nz = comm->procgrid[2];
 
   for (int i = 0; i < nprocs; i++) proccost[i] = 0.0;
 
   double **x = atom->x;
   int nlocal = atom->nlocal;
   int ix,iy,iz;
 
   if (wtflag) {
     weight = fixstore->vstore;
     for (int i = 0; i < nlocal; i++) {
       ix = binary(x[i][0],nx,xsplit);
       iy = binary(x[i][1],ny,ysplit);
       iz = binary(x[i][2],nz,zsplit);
       proccost[iz*nx*ny + iy*nx + ix] += weight[i];
     }
   } else {
     for (int i = 0; i < nlocal; i++) {
       ix = binary(x[i][0],nx,xsplit);
       iy = binary(x[i][1],ny,ysplit);
       iz = binary(x[i][2],nz,zsplit);
       proccost[iz*nx*ny + iy*nx + ix] += 1.0;
     }
   }
 
   // one proc's particles may map to many partitions, so must Allreduce
 
   MPI_Allreduce(proccost,allproccost,nprocs,MPI_DOUBLE,MPI_SUM,world);
 
   maxcost = 0.0;
   double totalcost = 0.0;
   for (int i = 0; i < nprocs; i++) {
     maxcost = MAX(maxcost,allproccost[i]);
     totalcost += allproccost[i];
   }
 
   double imbalance = 1.0;
   if (maxcost > 0.0) imbalance = maxcost / (totalcost/nprocs);
   return imbalance;
 }
 
 /* ----------------------------------------------------------------------
    binary search for where value falls in N-length vec
    note that vec actually has N+1 values, but ignore last one
    values in vec are monotonically increasing, but adjacent values can be ties
    value may be outside range of vec limits
    always return index from 0 to N-1 inclusive
    return 0 if value < vec[0]
    reutrn N-1 if value >= vec[N-1]
    return index = 1 to N-2 inclusive if vec[index] <= value < vec[index+1]
    note that for adjacent tie values, index of lower tie is not returned
      since never satisfies 2nd condition that value < vec[index+1]
 ------------------------------------------------------------------------- */
 
 int Balance::binary(double value, int n, double *vec)
 {
   int lo = 0;
   int hi = n-1;
 
   if (value < vec[lo]) return lo;
   if (value >= vec[hi]) return hi;
 
   // insure vec[lo] <= value < vec[hi] at every iteration
   // done when lo,hi are adjacent
 
   int index = (lo+hi)/2;
   while (lo < hi-1) {
     if (value < vec[index]) hi = index;
     else if (value >= vec[index]) lo = index;
     index = (lo+hi)/2;
   }
 
   return index;
 }
 
 /* ----------------------------------------------------------------------
    write dump snapshot of line segments in Pizza.py mdump mesh format
    write xy lines around each proc's sub-domain for 2d
    write xyz cubes around each proc's sub-domain for 3d
    only called by proc 0
    NOTE: only implemented for orthogonal boxes, not triclinic
 ------------------------------------------------------------------------- */
 
 void Balance::dumpout(bigint tstep)
 {
   int dimension = domain->dimension;
   int triclinic = domain->triclinic;
 
   // Allgather each proc's sub-box
   // could use Gather, but that requires MPI to alloc memory
 
   double *lo,*hi;
   if (triclinic == 0) {
     lo = domain->sublo;
     hi = domain->subhi;
   } else {
     lo = domain->sublo_lamda;
     hi = domain->subhi_lamda;
   }
 
   double box[6];
   box[0] = lo[0]; box[1] = lo[1]; box[2] = lo[2];
   box[3] = hi[0]; box[4] = hi[1]; box[5] = hi[2];
 
   double **boxall;
   memory->create(boxall,nprocs,6,"balance:dumpout");
   MPI_Allgather(box,6,MPI_DOUBLE,&boxall[0][0],6,MPI_DOUBLE,world);
 
   if (me) {
     memory->destroy(boxall);
     return;
   }
 
   // proc 0 writes out nodal coords
   // some will be duplicates
 
   double *boxlo = domain->boxlo;
   double *boxhi = domain->boxhi;
 
   fprintf(fp,"ITEM: TIMESTEP\n");
   fprintf(fp,BIGINT_FORMAT "\n",tstep);
   fprintf(fp,"ITEM: NUMBER OF NODES\n");
   if (dimension == 2) fprintf(fp,"%d\n",4*nprocs);
   else fprintf(fp,"%d\n",8*nprocs);
   fprintf(fp,"ITEM: BOX BOUNDS\n");
   fprintf(fp,"%g %g\n",boxlo[0],boxhi[0]);
   fprintf(fp,"%g %g\n",boxlo[1],boxhi[1]);
   fprintf(fp,"%g %g\n",boxlo[2],boxhi[2]);
   fprintf(fp,"ITEM: NODES\n");
 
   if (triclinic == 0) {
     if (dimension == 2) {
       int m = 0;
       for (int i = 0; i < nprocs; i++) {
         fprintf(fp,"%d %d %g %g %g\n",m+1,1,boxall[i][0],boxall[i][1],0.0);
         fprintf(fp,"%d %d %g %g %g\n",m+2,1,boxall[i][3],boxall[i][1],0.0);
         fprintf(fp,"%d %d %g %g %g\n",m+3,1,boxall[i][3],boxall[i][4],0.0);
         fprintf(fp,"%d %d %g %g %g\n",m+4,1,boxall[i][0],boxall[i][4],0.0);
         m += 4;
       }
     } else {
       int m = 0;
       for (int i = 0; i < nprocs; i++) {
         fprintf(fp,"%d %d %g %g %g\n",m+1,1,
                 boxall[i][0],boxall[i][1],boxall[i][2]);
         fprintf(fp,"%d %d %g %g %g\n",m+2,1,
                 boxall[i][3],boxall[i][1],boxall[i][2]);
         fprintf(fp,"%d %d %g %g %g\n",m+3,1,
                 boxall[i][3],boxall[i][4],boxall[i][2]);
         fprintf(fp,"%d %d %g %g %g\n",m+4,1,
                 boxall[i][0],boxall[i][4],boxall[i][2]);
         fprintf(fp,"%d %d %g %g %g\n",m+5,1,
                 boxall[i][0],boxall[i][1],boxall[i][5]);
         fprintf(fp,"%d %d %g %g %g\n",m+6,1,
                 boxall[i][3],boxall[i][1],boxall[i][5]);
         fprintf(fp,"%d %d %g %g %g\n",m+7,1,
                 boxall[i][3],boxall[i][4],boxall[i][5]);
         fprintf(fp,"%d %d %g %g %g\n",m+8,1,
                 boxall[i][0],boxall[i][4],boxall[i][5]);
         m += 8;
       }
     }
 
   } else {
     double (*bc)[3] = domain->corners;
 
     if (dimension == 2) {
       int m = 0;
       for (int i = 0; i < nprocs; i++) {
         domain->lamda_box_corners(&boxall[i][0],&boxall[i][3]);
         fprintf(fp,"%d %d %g %g %g\n",m+1,1,bc[0][0],bc[0][1],0.0);
         fprintf(fp,"%d %d %g %g %g\n",m+2,1,bc[1][0],bc[1][1],0.0);
         fprintf(fp,"%d %d %g %g %g\n",m+3,1,bc[2][0],bc[2][1],0.0);
         fprintf(fp,"%d %d %g %g %g\n",m+4,1,bc[3][0],bc[3][1],0.0);
         m += 4;
       }
     } else {
       int m = 0;
       for (int i = 0; i < nprocs; i++) {
         domain->lamda_box_corners(&boxall[i][0],&boxall[i][3]);
         fprintf(fp,"%d %d %g %g %g\n",m+1,1,bc[0][0],bc[0][1],bc[0][1]);
         fprintf(fp,"%d %d %g %g %g\n",m+2,1,bc[1][0],bc[1][1],bc[1][1]);
         fprintf(fp,"%d %d %g %g %g\n",m+3,1,bc[2][0],bc[2][1],bc[2][1]);
         fprintf(fp,"%d %d %g %g %g\n",m+4,1,bc[3][0],bc[3][1],bc[3][1]);
         fprintf(fp,"%d %d %g %g %g\n",m+5,1,bc[4][0],bc[4][1],bc[4][1]);
         fprintf(fp,"%d %d %g %g %g\n",m+6,1,bc[5][0],bc[5][1],bc[5][1]);
         fprintf(fp,"%d %d %g %g %g\n",m+7,1,bc[6][0],bc[6][1],bc[6][1]);
         fprintf(fp,"%d %d %g %g %g\n",m+8,1,bc[7][0],bc[7][1],bc[7][1]);
         m += 8;
       }
     }
   }
 
   // write out one square/cube per processor for 2d/3d
 
   fprintf(fp,"ITEM: TIMESTEP\n");
   fprintf(fp,BIGINT_FORMAT "\n",tstep);
   if (dimension == 2) fprintf(fp,"ITEM: NUMBER OF SQUARES\n");
   else fprintf(fp,"ITEM: NUMBER OF CUBES\n");
   fprintf(fp,"%d\n",nprocs);
   if (dimension == 2) fprintf(fp,"ITEM: SQUARES\n");
   else fprintf(fp,"ITEM: CUBES\n");
 
   if (dimension == 2) {
     int m = 0;
     for (int i = 0; i < nprocs; i++) {
       fprintf(fp,"%d %d %d %d %d %d\n",i+1,1,m+1,m+2,m+3,m+4);
       m += 4;
     }
   } else {
     int m = 0;
     for (int i = 0; i < nprocs; i++) {
       fprintf(fp,"%d %d %d %d %d %d %d %d %d %d\n",
               i+1,1,m+1,m+2,m+3,m+4,m+5,m+6,m+7,m+8);
       m += 8;
     }
   }
 
   memory->destroy(boxall);
 }
 
 /* ----------------------------------------------------------------------
    debug output for Idim and count
    only called by proc 0
 ------------------------------------------------------------------------- */
 
 #ifdef BALANCE_DEBUG
 void Balance::debug_shift_output(int idim, int m, int np, double *split)
 {
   int i;
   const char *dim = NULL;
 
   double *boxlo = domain->boxlo;
   double *prd = domain->prd;
 
   if (bdim[idim] == X) dim = "X";
   else if (bdim[idim] == Y) dim = "Y";
   else if (bdim[idim] == Z) dim = "Z";
   fprintf(stderr,"Dimension %s, Iteration %d\n",dim,m);
 
   fprintf(stderr,"  Count:");
   for (i = 0; i < np; i++) fprintf(stderr," " BIGINT_FORMAT,count[i]);
   fprintf(stderr,"\n");
   fprintf(stderr,"  Sum:");
   for (i = 0; i <= np; i++) fprintf(stderr," " BIGINT_FORMAT,sum[i]);
   fprintf(stderr,"\n");
   fprintf(stderr,"  Target:");
   for (i = 0; i <= np; i++) fprintf(stderr," " BIGINT_FORMAT,target[i]);
   fprintf(stderr,"\n");
   fprintf(stderr,"  Actual cut:");
   for (i = 0; i <= np; i++)
     fprintf(stderr," %g",boxlo[bdim[idim]] + split[i]*prd[bdim[idim]]);
   fprintf(stderr,"\n");
   fprintf(stderr,"  Split:");
   for (i = 0; i <= np; i++) fprintf(stderr," %g",split[i]);
   fprintf(stderr,"\n");
   fprintf(stderr,"  Low:");
   for (i = 0; i <= np; i++) fprintf(stderr," %g",lo[i]);
   fprintf(stderr,"\n");
   fprintf(stderr,"  Low-sum:");
   for (i = 0; i <= np; i++) fprintf(stderr," " BIGINT_FORMAT,losum[i]);
   fprintf(stderr,"\n");
   fprintf(stderr,"  Hi:");
   for (i = 0; i <= np; i++) fprintf(stderr," %g",hi[i]);
   fprintf(stderr,"\n");
   fprintf(stderr,"  Hi-sum:");
   for (i = 0; i <= np; i++) fprintf(stderr," " BIGINT_FORMAT,hisum[i]);
   fprintf(stderr,"\n");
   fprintf(stderr,"  Delta:");
   for (i = 0; i < np; i++) fprintf(stderr," %g",split[i+1]-split[i]);
   fprintf(stderr,"\n");
 
   bigint max = 0;
   for (i = 0; i < np; i++) max = MAX(max,count[i]);
   fprintf(stderr,"  Imbalance factor: %g\n",1.0*max*np/target[np]);
 }
 #endif
diff --git a/src/comm.cpp b/src/comm.cpp
index b558b3fd8..871675ca8 100644
--- a/src/comm.cpp
+++ b/src/comm.cpp
@@ -1,789 +1,790 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #include <mpi.h>
 #include <stdlib.h>
 #include <string.h>
 #include "comm.h"
 #include "universe.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "force.h"
 #include "pair.h"
 #include "modify.h"
 #include "fix.h"
 #include "compute.h"
 #include "domain.h"
 #include "output.h"
 #include "dump.h"
 #include "group.h"
 #include "procmap.h"
 #include "accelerator_kokkos.h"
 #include "memory.h"
 #include "error.h"
 
 #ifdef _OPENMP
 #include <omp.h>
 #endif
 
 using namespace LAMMPS_NS;
 
 #define BUFMIN 1000             // also in comm styles
 
 enum{SINGLE,MULTI};             // same as in Comm sub-styles
 enum{MULTIPLE};                   // same as in ProcMap
 enum{ONELEVEL,TWOLEVEL,NUMA,CUSTOM};
 enum{CART,CARTREORDER,XYZ};
 enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED};    // several files
 
 /* ---------------------------------------------------------------------- */
 
 Comm::Comm(LAMMPS *lmp) : Pointers(lmp)
 {
   MPI_Comm_rank(world,&me);
   MPI_Comm_size(world,&nprocs);
 
   mode = 0;
   bordergroup = 0;
   cutghostuser = 0.0;
   cutusermulti = NULL;
   ghost_velocity = 0;
 
   user_procgrid[0] = user_procgrid[1] = user_procgrid[2] = 0;
   coregrid[0] = coregrid[1] = coregrid[2] = 1;
   gridflag = ONELEVEL;
   mapflag = CART;
   customfile = NULL;
   outfile = NULL;
   recv_from_partition = send_to_partition = -1;
   otherflag = 0;
   maxexchange_atom = maxexchange_fix = 0;
 
   grid2proc = NULL;
   xsplit = ysplit = zsplit = NULL;
   rcbnew = 0;
 
   // use of OpenMP threads
   // query OpenMP for number of threads/process set by user at run-time
   // if the OMP_NUM_THREADS environment variable is not set, we default
   // to using 1 thread. This follows the principle of the least surprise,
   // while practically all OpenMP implementations violate it by using
   // as many threads as there are (virtual) CPU cores by default.
 
   nthreads = 1;
 #ifdef _OPENMP
   if (lmp->kokkos) {
     nthreads = lmp->kokkos->num_threads * lmp->kokkos->numa;
   } else if (getenv("OMP_NUM_THREADS") == NULL) {
     nthreads = 1;
     if (me == 0)
       error->message(FLERR,"OMP_NUM_THREADS environment is not set. "
                            "Defaulting to 1 thread.");
   } else {
     nthreads = omp_get_max_threads();
   }
 
   // enforce consistent number of threads across all MPI tasks
 
   MPI_Bcast(&nthreads,1,MPI_INT,0,world);
   if (!lmp->kokkos) omp_set_num_threads(nthreads);
 
   if (me == 0) {
     if (screen)
       fprintf(screen,"  using %d OpenMP thread(s) per MPI task\n",nthreads);
     if (logfile)
       fprintf(logfile,"  using %d OpenMP thread(s) per MPI task\n",nthreads);
   }
 #endif
 
 }
 
 /* ---------------------------------------------------------------------- */
 
 Comm::~Comm()
 {
   memory->destroy(grid2proc);
   memory->destroy(xsplit);
   memory->destroy(ysplit);
   memory->destroy(zsplit);
   memory->destroy(cutusermulti);
   delete [] customfile;
   delete [] outfile;
 }
 
 /* ----------------------------------------------------------------------
    deep copy of arrays from old Comm class to new one
    all public/protected vectors/arrays in parent Comm class must be copied
    called from alternate constructor of child classes
    when new comm style is created from Input
 ------------------------------------------------------------------------- */
 
 void Comm::copy_arrays(Comm *oldcomm)
 {
   if (oldcomm->grid2proc) {
     memory->create(grid2proc,procgrid[0],procgrid[1],procgrid[2],
                    "comm:grid2proc");
     memcpy(&grid2proc[0][0][0],&oldcomm->grid2proc[0][0][0],
            (procgrid[0]*procgrid[1]*procgrid[2])*sizeof(int));
 
     memory->create(xsplit,procgrid[0]+1,"comm:xsplit");
     memory->create(ysplit,procgrid[1]+1,"comm:ysplit");
     memory->create(zsplit,procgrid[2]+1,"comm:zsplit");
     memcpy(xsplit,oldcomm->xsplit,(procgrid[0]+1)*sizeof(double));
     memcpy(ysplit,oldcomm->ysplit,(procgrid[1]+1)*sizeof(double));
     memcpy(zsplit,oldcomm->zsplit,(procgrid[2]+1)*sizeof(double));
   }
 
   if (oldcomm->cutusermulti) {
     memory->create(cutusermulti,atom->ntypes+1,"comm:cutusermulti");
     memcpy(cutusermulti,oldcomm->cutusermulti,atom->ntypes+1);
   }
 
   if (customfile) {
     int n = strlen(oldcomm->customfile) + 1;
     customfile = new char[n];
     strcpy(customfile,oldcomm->customfile);
   }
   if (outfile) {
     int n = strlen(oldcomm->outfile) + 1;
     outfile = new char[n];
     strcpy(outfile,oldcomm->outfile);
   }
 }
 
 /* ----------------------------------------------------------------------
    common to all Comm styles
 ------------------------------------------------------------------------- */
 
 void Comm::init()
 {
   triclinic = domain->triclinic;
   map_style = atom->map_style;
 
   // check warn if any proc's subbox is smaller than neigh skin
   //   since may lead to lost atoms in exchange()
   // really should check every exchange() in case box size is shrinking
   //   but seems overkill to do that (fix balance does perform this check)
 
   domain->subbox_too_small_check(neighbor->skin);
 
   // comm_only = 1 if only x,f are exchanged in forward/reverse comm
   // comm_x_only = 0 if ghost_velocity since velocities are added
 
   comm_x_only = atom->avec->comm_x_only;
   comm_f_only = atom->avec->comm_f_only;
   if (ghost_velocity) comm_x_only = 0;
 
   // set per-atom sizes for forward/reverse/border comm
   // augment by velocity and fix quantities if needed
 
   size_forward = atom->avec->size_forward;
   size_reverse = atom->avec->size_reverse;
   size_border = atom->avec->size_border;
 
   if (ghost_velocity) size_forward += atom->avec->size_velocity;
   if (ghost_velocity) size_border += atom->avec->size_velocity;
 
   for (int i = 0; i < modify->nfix; i++)
     size_border += modify->fix[i]->comm_border;
 
   // per-atom limits for communication
   // maxexchange = max # of datums in exchange comm, set in exchange()
   // maxforward = # of datums in largest forward comm
   // maxreverse = # of datums in largest reverse comm
   // query pair,fix,compute,dump for their requirements
   // pair style can force reverse comm even if newton off
 
   maxforward = MAX(size_forward,size_border);
   maxreverse = size_reverse;
 
   if (force->pair) maxforward = MAX(maxforward,force->pair->comm_forward);
   if (force->pair) maxreverse = MAX(maxreverse,force->pair->comm_reverse);
 
   for (int i = 0; i < modify->nfix; i++) {
     maxforward = MAX(maxforward,modify->fix[i]->comm_forward);
     maxreverse = MAX(maxreverse,modify->fix[i]->comm_reverse);
   }
 
   for (int i = 0; i < modify->ncompute; i++) {
     maxforward = MAX(maxforward,modify->compute[i]->comm_forward);
     maxreverse = MAX(maxreverse,modify->compute[i]->comm_reverse);
   }
 
   for (int i = 0; i < output->ndump; i++) {
     maxforward = MAX(maxforward,output->dump[i]->comm_forward);
     maxreverse = MAX(maxreverse,output->dump[i]->comm_reverse);
   }
 
   if (force->newton == 0) maxreverse = 0;
   if (force->pair) maxreverse = MAX(maxreverse,force->pair->comm_reverse_off);
 }
 
 /* ----------------------------------------------------------------------
    modify communication params
    invoked from input script by comm_modify command
 ------------------------------------------------------------------------- */
 
 void Comm::modify_params(int narg, char **arg)
 {
   if (narg < 1) error->all(FLERR,"Illegal comm_modify command");
 
   int iarg = 0;
   while (iarg < narg) {
     if (strcmp(arg[iarg],"mode") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal comm_modify command");
       if (strcmp(arg[iarg+1],"single") == 0) {
         // need to reset cutghostuser when switching comm mode
         if (mode == MULTI) cutghostuser = 0.0;
         memory->destroy(cutusermulti);
         cutusermulti = NULL;
         mode = SINGLE;
       } else if (strcmp(arg[iarg+1],"multi") == 0) {
         // need to reset cutghostuser when switching comm mode
         if (mode == SINGLE) cutghostuser = 0.0;
         mode = MULTI;
       } else error->all(FLERR,"Illegal comm_modify command");
       iarg += 2;
     } else if (strcmp(arg[iarg],"group") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal comm_modify command");
       bordergroup = group->find(arg[iarg+1]);
       if (bordergroup < 0)
         error->all(FLERR,"Invalid group in comm_modify command");
       if (bordergroup && (atom->firstgroupname == NULL ||
                           strcmp(arg[iarg+1],atom->firstgroupname) != 0))
         error->all(FLERR,"Comm_modify group != atom_modify first group");
       iarg += 2;
     } else if (strcmp(arg[iarg],"cutoff") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal comm_modify command");
       if (mode == MULTI)
-        error->all(FLERR,"Use cutoff/multi keyword to set cutoff in multi mode");
+        error->all(FLERR,
+                   "Use cutoff/multi keyword to set cutoff in multi mode");
       cutghostuser = force->numeric(FLERR,arg[iarg+1]);
       if (cutghostuser < 0.0)
         error->all(FLERR,"Invalid cutoff in comm_modify command");
       iarg += 2;
     } else if (strcmp(arg[iarg],"cutoff/multi") == 0) {
       int i,nlo,nhi;
       double cut;
       if (mode == SINGLE)
         error->all(FLERR,"Use cutoff keyword to set cutoff in single mode");
       if (domain->box_exist == 0)
         error->all(FLERR,
                    "Cannot set cutoff/multi before simulation box is defined");
       const int ntypes = atom->ntypes;
       if (iarg+3 > narg)
         error->all(FLERR,"Illegal comm_modify command");
       if (cutusermulti == NULL) {
         memory->create(cutusermulti,ntypes+1,"comm:cutusermulti");
         for (i=0; i < ntypes+1; ++i)
           cutusermulti[i] = -1.0;
       }
       force->bounds(FLERR,arg[iarg+1],ntypes,nlo,nhi,1);
       cut = force->numeric(FLERR,arg[iarg+2]);
       cutghostuser = MAX(cutghostuser,cut);
       if (cut < 0.0)
         error->all(FLERR,"Invalid cutoff in comm_modify command");
       for (i=nlo; i<=nhi; ++i)
         cutusermulti[i] = cut;
       iarg += 3;
     } else if (strcmp(arg[iarg],"vel") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal comm_modify command");
       if (strcmp(arg[iarg+1],"yes") == 0) ghost_velocity = 1;
       else if (strcmp(arg[iarg+1],"no") == 0) ghost_velocity = 0;
       else error->all(FLERR,"Illegal comm_modify command");
       iarg += 2;
     } else error->all(FLERR,"Illegal comm_modify command");
   }
 }
 
 /* ----------------------------------------------------------------------
    set dimensions for 3d grid of processors, and associated flags
    invoked from input script by processors command
 ------------------------------------------------------------------------- */
 
 void Comm::set_processors(int narg, char **arg)
 {
   if (narg < 3) error->all(FLERR,"Illegal processors command");
 
   if (strcmp(arg[0],"*") == 0) user_procgrid[0] = 0;
   else user_procgrid[0] = force->inumeric(FLERR,arg[0]);
   if (strcmp(arg[1],"*") == 0) user_procgrid[1] = 0;
   else user_procgrid[1] = force->inumeric(FLERR,arg[1]);
   if (strcmp(arg[2],"*") == 0) user_procgrid[2] = 0;
   else user_procgrid[2] = force->inumeric(FLERR,arg[2]);
 
   if (user_procgrid[0] < 0 || user_procgrid[1] < 0 || user_procgrid[2] < 0)
     error->all(FLERR,"Illegal processors command");
 
   int p = user_procgrid[0]*user_procgrid[1]*user_procgrid[2];
   if (p && p != nprocs)
     error->all(FLERR,"Specified processors != physical processors");
 
   int iarg = 3;
   while (iarg < narg) {
     if (strcmp(arg[iarg],"grid") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal processors command");
 
       if (strcmp(arg[iarg+1],"onelevel") == 0) {
         gridflag = ONELEVEL;
 
       } else if (strcmp(arg[iarg+1],"twolevel") == 0) {
         if (iarg+6 > narg) error->all(FLERR,"Illegal processors command");
         gridflag = TWOLEVEL;
 
         ncores = force->inumeric(FLERR,arg[iarg+2]);
         if (strcmp(arg[iarg+3],"*") == 0) user_coregrid[0] = 0;
         else user_coregrid[0] = force->inumeric(FLERR,arg[iarg+3]);
         if (strcmp(arg[iarg+4],"*") == 0) user_coregrid[1] = 0;
         else user_coregrid[1] = force->inumeric(FLERR,arg[iarg+4]);
         if (strcmp(arg[iarg+5],"*") == 0) user_coregrid[2] = 0;
         else user_coregrid[2] = force->inumeric(FLERR,arg[iarg+5]);
 
         if (ncores <= 0 || user_coregrid[0] < 0 ||
             user_coregrid[1] < 0 || user_coregrid[2] < 0)
           error->all(FLERR,"Illegal processors command");
         iarg += 4;
 
       } else if (strcmp(arg[iarg+1],"numa") == 0) {
         gridflag = NUMA;
 
       } else if (strcmp(arg[iarg+1],"custom") == 0) {
         if (iarg+3 > narg) error->all(FLERR,"Illegal processors command");
         gridflag = CUSTOM;
         delete [] customfile;
         int n = strlen(arg[iarg+2]) + 1;
         customfile = new char[n];
         strcpy(customfile,arg[iarg+2]);
         iarg += 1;
 
       } else error->all(FLERR,"Illegal processors command");
       iarg += 2;
 
     } else if (strcmp(arg[iarg],"map") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal processors command");
       if (strcmp(arg[iarg+1],"cart") == 0) mapflag = CART;
       else if (strcmp(arg[iarg+1],"cart/reorder") == 0) mapflag = CARTREORDER;
       else if (strcmp(arg[iarg+1],"xyz") == 0 ||
                strcmp(arg[iarg+1],"xzy") == 0 ||
                strcmp(arg[iarg+1],"yxz") == 0 ||
                strcmp(arg[iarg+1],"yzx") == 0 ||
                strcmp(arg[iarg+1],"zxy") == 0 ||
                strcmp(arg[iarg+1],"zyx") == 0) {
         mapflag = XYZ;
         strncpy(xyz,arg[iarg+1],3);
       } else error->all(FLERR,"Illegal processors command");
       iarg += 2;
 
     } else if (strcmp(arg[iarg],"part") == 0) {
       if (iarg+4 > narg) error->all(FLERR,"Illegal processors command");
       if (universe->nworlds == 1)
         error->all(FLERR,
                    "Cannot use processors part command "
                    "without using partitions");
       int isend = force->inumeric(FLERR,arg[iarg+1]);
       int irecv = force->inumeric(FLERR,arg[iarg+2]);
       if (isend < 1 || isend > universe->nworlds ||
           irecv < 1 || irecv > universe->nworlds || isend == irecv)
         error->all(FLERR,"Invalid partitions in processors part command");
       if (isend-1 == universe->iworld) {
         if (send_to_partition >= 0)
           error->all(FLERR,
                      "Sending partition in processors part command "
                      "is already a sender");
         send_to_partition = irecv-1;
       }
       if (irecv-1 == universe->iworld) {
         if (recv_from_partition >= 0)
           error->all(FLERR,
                      "Receiving partition in processors part command "
                      "is already a receiver");
         recv_from_partition = isend-1;
       }
 
       // only receiver has otherflag dependency
 
       if (strcmp(arg[iarg+3],"multiple") == 0) {
         if (universe->iworld == irecv-1) {
           otherflag = 1;
           other_style = MULTIPLE;
         }
       } else error->all(FLERR,"Illegal processors command");
       iarg += 4;
 
     } else if (strcmp(arg[iarg],"file") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal processors command");
       delete [] outfile;
       int n = strlen(arg[iarg+1]) + 1;
       outfile = new char[n];
       strcpy(outfile,arg[iarg+1]);
       iarg += 2;
 
     } else error->all(FLERR,"Illegal processors command");
   }
 
   // error checks
 
   if (gridflag == NUMA && mapflag != CART)
     error->all(FLERR,"Processors grid numa and map style are incompatible");
   if (otherflag && (gridflag == NUMA || gridflag == CUSTOM))
     error->all(FLERR,
                "Processors part option and grid style are incompatible");
 }
 
 /* ----------------------------------------------------------------------
    create a 3d grid of procs based on Nprocs and box size & shape
    map processors to grid, setup xyz split for a uniform grid
 ------------------------------------------------------------------------- */
 
 void Comm::set_proc_grid(int outflag)
 {
   // recv 3d proc grid of another partition if my 3d grid depends on it
 
   if (recv_from_partition >= 0) {
     if (me == 0) {
       MPI_Recv(other_procgrid,3,MPI_INT,
                universe->root_proc[recv_from_partition],0,
                universe->uworld,MPI_STATUS_IGNORE);
       MPI_Recv(other_coregrid,3,MPI_INT,
                universe->root_proc[recv_from_partition],0,
                universe->uworld,MPI_STATUS_IGNORE);
     }
     MPI_Bcast(other_procgrid,3,MPI_INT,0,world);
     MPI_Bcast(other_coregrid,3,MPI_INT,0,world);
   }
 
   // create ProcMap class to create 3d grid and map procs to it
 
   ProcMap *pmap = new ProcMap(lmp);
 
   // create 3d grid of processors
   // produces procgrid and coregrid (if relevant)
 
   if (gridflag == ONELEVEL) {
     pmap->onelevel_grid(nprocs,user_procgrid,procgrid,
                         otherflag,other_style,other_procgrid,other_coregrid);
 
   } else if (gridflag == TWOLEVEL) {
     pmap->twolevel_grid(nprocs,user_procgrid,procgrid,
                         ncores,user_coregrid,coregrid,
                         otherflag,other_style,other_procgrid,other_coregrid);
 
   } else if (gridflag == NUMA) {
     pmap->numa_grid(nprocs,user_procgrid,procgrid,coregrid);
 
   } else if (gridflag == CUSTOM) {
     pmap->custom_grid(customfile,nprocs,user_procgrid,procgrid);
   }
 
   // error check on procgrid
   // should not be necessary due to ProcMap
 
   if (procgrid[0]*procgrid[1]*procgrid[2] != nprocs)
     error->all(FLERR,"Bad grid of processors");
   if (domain->dimension == 2 && procgrid[2] != 1)
     error->all(FLERR,"Processor count in z must be 1 for 2d simulation");
 
   // grid2proc[i][j][k] = proc that owns i,j,k location in 3d grid
 
   if (grid2proc) memory->destroy(grid2proc);
   memory->create(grid2proc,procgrid[0],procgrid[1],procgrid[2],
                  "comm:grid2proc");
 
   // map processor IDs to 3d processor grid
   // produces myloc, procneigh, grid2proc
 
   if (gridflag == ONELEVEL) {
     if (mapflag == CART)
       pmap->cart_map(0,procgrid,myloc,procneigh,grid2proc);
     else if (mapflag == CARTREORDER)
       pmap->cart_map(1,procgrid,myloc,procneigh,grid2proc);
     else if (mapflag == XYZ)
       pmap->xyz_map(xyz,procgrid,myloc,procneigh,grid2proc);
 
   } else if (gridflag == TWOLEVEL) {
     if (mapflag == CART)
       pmap->cart_map(0,procgrid,ncores,coregrid,myloc,procneigh,grid2proc);
     else if (mapflag == CARTREORDER)
       pmap->cart_map(1,procgrid,ncores,coregrid,myloc,procneigh,grid2proc);
     else if (mapflag == XYZ)
       pmap->xyz_map(xyz,procgrid,ncores,coregrid,myloc,procneigh,grid2proc);
 
   } else if (gridflag == NUMA) {
     pmap->numa_map(0,coregrid,myloc,procneigh,grid2proc);
 
   } else if (gridflag == CUSTOM) {
     pmap->custom_map(procgrid,myloc,procneigh,grid2proc);
   }
 
   // print 3d grid info to screen and logfile
 
   if (outflag && me == 0) {
     if (screen) {
       fprintf(screen,"  %d by %d by %d MPI processor grid\n",
               procgrid[0],procgrid[1],procgrid[2]);
       if (gridflag == NUMA || gridflag == TWOLEVEL)
         fprintf(screen,"  %d by %d by %d core grid within node\n",
                 coregrid[0],coregrid[1],coregrid[2]);
     }
     if (logfile) {
       fprintf(logfile,"  %d by %d by %d MPI processor grid\n",
               procgrid[0],procgrid[1],procgrid[2]);
       if (gridflag == NUMA || gridflag == TWOLEVEL)
         fprintf(logfile,"  %d by %d by %d core grid within node\n",
                 coregrid[0],coregrid[1],coregrid[2]);
     }
   }
 
   // print 3d grid details to outfile
 
   if (outfile) pmap->output(outfile,procgrid,grid2proc);
 
   // free ProcMap class
 
   delete pmap;
 
   // set xsplit,ysplit,zsplit for uniform spacings
 
   memory->destroy(xsplit);
   memory->destroy(ysplit);
   memory->destroy(zsplit);
 
   memory->create(xsplit,procgrid[0]+1,"comm:xsplit");
   memory->create(ysplit,procgrid[1]+1,"comm:ysplit");
   memory->create(zsplit,procgrid[2]+1,"comm:zsplit");
 
   for (int i = 0; i < procgrid[0]; i++) xsplit[i] = i * 1.0/procgrid[0];
   for (int i = 0; i < procgrid[1]; i++) ysplit[i] = i * 1.0/procgrid[1];
   for (int i = 0; i < procgrid[2]; i++) zsplit[i] = i * 1.0/procgrid[2];
 
   xsplit[procgrid[0]] = ysplit[procgrid[1]] = zsplit[procgrid[2]] = 1.0;
 
   // set lamda box params after procs are assigned
   // only set once unless load-balancing occurs
 
   if (domain->triclinic) domain->set_lamda_box();
 
   // send my 3d proc grid to another partition if requested
 
   if (send_to_partition >= 0) {
     if (me == 0) {
       MPI_Send(procgrid,3,MPI_INT,
                universe->root_proc[send_to_partition],0,
                universe->uworld);
       MPI_Send(coregrid,3,MPI_INT,
                universe->root_proc[send_to_partition],0,
                universe->uworld);
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    determine which proc owns atom with coord x[3] based on current decomp
    x will be in box (orthogonal) or lamda coords (triclinic)
    if layout = UNIFORM, calculate owning proc directly
    if layout = NONUNIFORM, iteratively find owning proc via binary search
    if layout = TILED, CommTiled has its own method
    return owning proc ID via grid2proc
    return igx,igy,igz = logical grid loc of owing proc within 3d grid of procs
 ------------------------------------------------------------------------- */
 
 int Comm::coord2proc(double *x, int &igx, int &igy, int &igz)
 {
   double *prd = domain->prd;
   double *boxlo = domain->boxlo;
 
   // initialize triclinic b/c coord2proc can be called before Comm::init()
   // via Irregular::migrate_atoms()
 
   triclinic = domain->triclinic;
 
   if (layout == LAYOUT_UNIFORM) {
     if (triclinic == 0) {
       igx = static_cast<int> (procgrid[0] * (x[0]-boxlo[0]) / prd[0]);
       igy = static_cast<int> (procgrid[1] * (x[1]-boxlo[1]) / prd[1]);
       igz = static_cast<int> (procgrid[2] * (x[2]-boxlo[2]) / prd[2]);
     } else {
       igx = static_cast<int> (procgrid[0] * x[0]);
       igy = static_cast<int> (procgrid[1] * x[1]);
       igz = static_cast<int> (procgrid[2] * x[2]);
     }
 
   } else if (layout == LAYOUT_NONUNIFORM) {
     if (triclinic == 0) {
       igx = binary((x[0]-boxlo[0])/prd[0],procgrid[0],xsplit);
       igy = binary((x[1]-boxlo[1])/prd[1],procgrid[1],ysplit);
       igz = binary((x[2]-boxlo[2])/prd[2],procgrid[2],zsplit);
     } else {
       igx = binary(x[0],procgrid[0],xsplit);
       igy = binary(x[1],procgrid[1],ysplit);
       igz = binary(x[2],procgrid[2],zsplit);
     }
   }
 
   if (igx < 0) igx = 0;
   if (igx >= procgrid[0]) igx = procgrid[0] - 1;
   if (igy < 0) igy = 0;
   if (igy >= procgrid[1]) igy = procgrid[1] - 1;
   if (igz < 0) igz = 0;
   if (igz >= procgrid[2]) igz = procgrid[2] - 1;
 
   return grid2proc[igx][igy][igz];
 }
 
 /* ----------------------------------------------------------------------
    binary search for value in N-length ascending vec
    value may be outside range of vec limits
    always return index from 0 to N-1 inclusive
    return 0 if value < vec[0]
    reutrn N-1 if value >= vec[N-1]
    return index = 1 to N-2 if vec[index] <= value < vec[index+1]
 ------------------------------------------------------------------------- */
 
 int Comm::binary(double value, int n, double *vec)
 {
   int lo = 0;
   int hi = n-1;
 
   if (value < vec[lo]) return lo;
   if (value >= vec[hi]) return hi;
 
   // insure vec[lo] <= value < vec[hi] at every iteration
   // done when lo,hi are adjacent
 
   int index = (lo+hi)/2;
   while (lo < hi-1) {
     if (value < vec[index]) hi = index;
     else if (value >= vec[index]) lo = index;
     index = (lo+hi)/2;
   }
 
   return index;
 }
 
 /* ----------------------------------------------------------------------
    communicate inbuf around full ring of processors with messtag
    nbytes = size of inbuf = n datums * nper bytes
    callback() is invoked to allow caller to process/update each proc's inbuf
    if self=1 (default), then callback() is invoked on final iteration
      using original inbuf, which may have been updated
    for non-NULL outbuf, final updated inbuf is copied to it
      ok to specify outbuf = inbuf
 ------------------------------------------------------------------------- */
 
 void Comm::ring(int n, int nper, void *inbuf, int messtag,
                 void (*callback)(int, char *), void *outbuf, int self)
 {
   MPI_Request request;
   MPI_Status status;
 
   int nbytes = n*nper;
   int maxbytes;
   MPI_Allreduce(&nbytes,&maxbytes,1,MPI_INT,MPI_MAX,world);
 
   // no need to communicate without data
 
   if (maxbytes == 0) return;
 
   char *buf,*bufcopy;
   memory->create(buf,maxbytes,"comm:buf");
   memory->create(bufcopy,maxbytes,"comm:bufcopy");
   memcpy(buf,inbuf,nbytes);
 
   int next = me + 1;
   int prev = me - 1;
   if (next == nprocs) next = 0;
   if (prev < 0) prev = nprocs - 1;
 
   for (int loop = 0; loop < nprocs; loop++) {
     if (me != next) {
       MPI_Irecv(bufcopy,maxbytes,MPI_CHAR,prev,messtag,world,&request);
       MPI_Send(buf,nbytes,MPI_CHAR,next,messtag,world);
       MPI_Wait(&request,&status);
       MPI_Get_count(&status,MPI_CHAR,&nbytes);
       memcpy(buf,bufcopy,nbytes);
     }
     if (self || loop < nprocs-1) callback(nbytes/nper,buf);
   }
 
   if (outbuf) memcpy(outbuf,buf,nbytes);
 
   memory->destroy(buf);
   memory->destroy(bufcopy);
 }
 
 /* ----------------------------------------------------------------------
    proc 0 reads Nlines from file into buf and bcasts buf to all procs
    caller allocates buf to max size needed
    each line is terminated by newline, even if last line in file is not
    return 0 if successful, 1 if get EOF error before read is complete
 ------------------------------------------------------------------------- */
 
 int Comm::read_lines_from_file(FILE *fp, int nlines, int maxline, char *buf)
 {
   int m;
 
   if (me == 0) {
     m = 0;
     for (int i = 0; i < nlines; i++) {
       if (!fgets(&buf[m],maxline,fp)) {
 	m = 0;
 	break;
       }
       m += strlen(&buf[m]);
     }
     if (m) {
       if (buf[m-1] != '\n') strcpy(&buf[m++],"\n");
       m++;
     }
   }
 
   MPI_Bcast(&m,1,MPI_INT,0,world);
   if (m == 0) return 1;
   MPI_Bcast(buf,m,MPI_CHAR,0,world);
   return 0;
 }
 
 /* ----------------------------------------------------------------------
    proc 0 reads Nlines from file into buf and bcasts buf to all procs
    caller allocates buf to max size needed
    each line is terminated by newline, even if last line in file is not
    return 0 if successful, 1 if get EOF error before read is complete
 ------------------------------------------------------------------------- */
 
 int Comm::read_lines_from_file_universe(FILE *fp, int nlines, int maxline,
                                         char *buf)
 {
   int m;
 
   int me_universe = universe->me;
   MPI_Comm uworld = universe->uworld;
 
   if (me_universe == 0) {
     m = 0;
     for (int i = 0; i < nlines; i++) {
       if (!fgets(&buf[m],maxline,fp)) {
 	m = 0;
 	break;
       }
       m += strlen(&buf[m]);
     }
     if (m) {
       if (buf[m-1] != '\n') strcpy(&buf[m++],"\n");
       m++;
     }
   }
 
   MPI_Bcast(&m,1,MPI_INT,0,uworld);
   if (m == 0) return 1;
   MPI_Bcast(buf,m,MPI_CHAR,0,uworld);
   return 0;
 }
diff --git a/src/compute.cpp b/src/compute.cpp
index d1db42a24..00a3984aa 100644
--- a/src/compute.cpp
+++ b/src/compute.cpp
@@ -1,230 +1,234 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #include <mpi.h>
 #include <stdlib.h>
 #include <string.h>
 #include <ctype.h>
 #include "compute.h"
 #include "atom.h"
 #include "domain.h"
 #include "force.h"
 #include "comm.h"
 #include "group.h"
 #include "modify.h"
 #include "fix.h"
 #include "atom_masks.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
 #define DELTA 4
 #define BIG MAXTAGINT
 
 // allocate space for static class instance variable and initialize it
 
 int Compute::instance_total = 0;
 
 /* ---------------------------------------------------------------------- */
 
 Compute::Compute(LAMMPS *lmp, int narg, char **arg) : 
   Pointers(lmp),
   id(NULL), style(NULL),
   vector(NULL), array(NULL), vector_atom(NULL),
   array_atom(NULL), vector_local(NULL), array_local(NULL), extlist(NULL),
   tlist(NULL), vbiasall(NULL)
 {
   instance_me = instance_total++;
   
   if (narg < 3) error->all(FLERR,"Illegal compute command");
 
   // compute ID, group, and style
   // ID must be all alphanumeric chars or underscores
 
   int n = strlen(arg[0]) + 1;
   id = new char[n];
   strcpy(id,arg[0]);
 
   for (int i = 0; i < n-1; i++)
     if (!isalnum(id[i]) && id[i] != '_')
       error->all(FLERR,
                  "Compute ID must be alphanumeric or underscore characters");
 
   igroup = group->find(arg[1]);
   if (igroup == -1) error->all(FLERR,"Could not find compute group ID");
   groupbit = group->bitmask[igroup];
 
   n = strlen(arg[2]) + 1;
   style = new char[n];
   strcpy(style,arg[2]);
 
   // set child class defaults
 
   scalar_flag = vector_flag = array_flag = 0;
   peratom_flag = local_flag = 0;
   size_vector_variable = size_array_rows_variable = 0;
 
   tempflag = pressflag = peflag = 0;
   pressatomflag = peatomflag = 0;
   create_attribute = 0;
   tempbias = 0;
 
   timeflag = 0;
   comm_forward = comm_reverse = 0;
   dynamic = 0;
   dynamic_group_allow = 1;
 
   invoked_scalar = invoked_vector = invoked_array = -1;
   invoked_peratom = invoked_local = -1;
   invoked_flag = 0;
 
   // set modify defaults
 
   extra_dof = domain->dimension;
   dynamic_user = 0;
   fix_dof = 0;
 
   // setup list of timesteps
 
   ntime = maxtime = 0;
   
   // data masks
 
   execution_space = Host;
   datamask_read = ALL_MASK;
   datamask_modify = ALL_MASK;
 
   copymode = 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 Compute::~Compute()
 {
   if (copymode) return;
 
   delete [] id;
   delete [] style;
   memory->destroy(tlist);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void Compute::modify_params(int narg, char **arg)
 {
   if (narg == 0) error->all(FLERR,"Illegal compute_modify command");
 
   int iarg = 0;
   while (iarg < narg) {
-    if (strcmp(arg[iarg],"extra") == 0) {
+    // added more specific keywords in Mar17
+    // at some point, remove generic extra and dynamic
+    if (strcmp(arg[iarg],"extra") == 0 || 
+        strcmp(arg[iarg],"extra/dof") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal compute_modify command");
       extra_dof = force->numeric(FLERR,arg[iarg+1]);
       iarg += 2;
-    } else if (strcmp(arg[iarg],"dynamic") == 0) {
+    } else if (strcmp(arg[iarg],"dynamic") == 0 || 
+               strcmp(arg[iarg],"dynamic/dof") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal compute_modify command");
       if (strcmp(arg[iarg+1],"no") == 0) dynamic_user = 0;
       else if (strcmp(arg[iarg+1],"yes") == 0) dynamic_user = 1;
       else error->all(FLERR,"Illegal compute_modify command");
       iarg += 2;
     } else error->all(FLERR,"Illegal compute_modify command");
   }
 }
 
 /* ----------------------------------------------------------------------
    calculate adjustment in DOF due to fixes
 ------------------------------------------------------------------------- */
 
 void Compute::adjust_dof_fix()
 {
   Fix **fix = modify->fix;
   int nfix = modify->nfix;
 
   fix_dof = 0;
   for (int i = 0; i < nfix; i++)
     if (fix[i]->dof_flag)
       fix_dof += fix[i]->dof(igroup);
 }
 
 /* ----------------------------------------------------------------------
    reset extra_dof to its default value
 ------------------------------------------------------------------------- */
 
 void Compute::reset_extra_dof()
 {
   extra_dof = domain->dimension;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void Compute::reset_extra_compute_fix(const char *)
 {
   error->all(FLERR,
              "Compute does not allow an extra compute or fix to be reset");
 }
 
 /* ----------------------------------------------------------------------
    add ntimestep to list of timesteps the compute will be called on
    do not add if already in list
    search from top downward, since list of times is in decreasing order
 ------------------------------------------------------------------------- */
 
 void Compute::addstep(bigint ntimestep)
 {
   // i = location in list to insert ntimestep
 
   int i;
   for (i = ntime-1; i >= 0; i--) {
     if (ntimestep == tlist[i]) return;
     if (ntimestep < tlist[i]) break;
   }
   i++;
 
   // extend list as needed
 
   if (ntime == maxtime) {
     maxtime += DELTA;
     memory->grow(tlist,maxtime,"compute:tlist");
   }
 
   // move remainder of list upward and insert ntimestep
 
   for (int j = ntime-1; j >= i; j--) tlist[j+1] = tlist[j];
   tlist[i] = ntimestep;
   ntime++;
 }
 
 /* ----------------------------------------------------------------------
    return 1/0 if ntimestep is or is not in list of calling timesteps
    if value(s) on top of list are less than ntimestep, delete them
    search from top downward, since list of times is in decreasing order
 ------------------------------------------------------------------------- */
 
 int Compute::matchstep(bigint ntimestep)
 {
   for (int i = ntime-1; i >= 0; i--) {
     if (ntimestep < tlist[i]) return 0;
     if (ntimestep == tlist[i]) return 1;
     if (ntimestep > tlist[i]) ntime--;
   }
   return 0;
 }
 
 /* ----------------------------------------------------------------------
    clean out list of timesteps to call the compute on
 ------------------------------------------------------------------------- */
 
 void Compute::clearstep()
 {
   ntime = 0;
 }
diff --git a/src/fix.cpp b/src/fix.cpp
index 0a95bcc69..ca9a69606 100644
--- a/src/fix.cpp
+++ b/src/fix.cpp
@@ -1,311 +1,318 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #include <string.h>
 #include <ctype.h>
 #include "fix.h"
 #include "atom.h"
 #include "group.h"
 #include "force.h"
 #include "comm.h"
 #include "atom_masks.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
 // allocate space for static class instance variable and initialize it
 
 int Fix::instance_total = 0;
 
 /* ---------------------------------------------------------------------- */
 
 Fix::Fix(LAMMPS *lmp, int narg, char **arg) : 
   Pointers(lmp),
   id(NULL), style(NULL), extlist(NULL), vector_atom(NULL), array_atom(NULL),
   vector_local(NULL), array_local(NULL), eatom(NULL), vatom(NULL)
 {
   instance_me = instance_total++;
 
   // fix ID, group, and style
   // ID must be all alphanumeric chars or underscores
 
   int n = strlen(arg[0]) + 1;
   id = new char[n];
   strcpy(id,arg[0]);
 
   for (int i = 0; i < n-1; i++)
     if (!isalnum(id[i]) && id[i] != '_')
       error->all(FLERR,"Fix ID must be alphanumeric or underscore characters");
 
   igroup = group->find(arg[1]);
   if (igroup == -1) error->all(FLERR,"Could not find fix group ID");
   groupbit = group->bitmask[igroup];
 
   n = strlen(arg[2]) + 1;
   style = new char[n];
   strcpy(style,arg[2]);
 
   restart_global = restart_peratom = restart_file = 0;
   force_reneighbor = 0;
   box_change_size = box_change_shape = box_change_domain = 0;
   thermo_energy = 0;
   rigid_flag = 0;
   peatom_flag = 0;
   virial_flag = 0;
   no_change_box = 0;
   time_integrate = 0;
   time_depend = 0;
   create_attribute = 0;
   restart_pbc = 0;
   wd_header = wd_section = 0;
   dynamic_group_allow = 0;
+  dynamic = 0;
   dof_flag = 0;
   special_alter_flag = 0;
   enforce2d_flag = 0;
   respa_level_support = 0;
   respa_level = -1;
 
   scalar_flag = vector_flag = array_flag = 0;
   peratom_flag = local_flag = 0;
   size_vector_variable = size_array_rows_variable = 0;
 
   comm_forward = comm_reverse = comm_border = 0;
   restart_reset = 0;
 
   // reasonable defaults
   // however, each fix that uses these values should explicitly set them
 
   nevery = 1;
   global_freq = 1;
 
   // per-atom virial
   // set vflag_atom = 0 b/c some fixes grow vatom in grow_arrays()
   //   which may occur outside of timestepping
 
   maxeatom = maxvatom = 0;
   vflag_atom = 0;
 
   // KOKKOS per-fix data masks
 
   execution_space = Host;
   datamask_read = ALL_MASK;
   datamask_modify = ALL_MASK;
 
   kokkosable = 0;
   copymode = 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 Fix::~Fix()
 {
   if (copymode) return;
 
   delete [] id;
   delete [] style;
   memory->destroy(eatom);
   memory->destroy(vatom);
 }
 
 /* ----------------------------------------------------------------------
    process params common to all fixes here
    if unknown param, call modify_param specific to the fix
 ------------------------------------------------------------------------- */
 
 void Fix::modify_params(int narg, char **arg)
 {
   if (narg == 0) error->all(FLERR,"Illegal fix_modify command");
 
   int iarg = 0;
   while (iarg < narg) {
-    if (strcmp(arg[iarg],"energy") == 0) {
+    if (strcmp(arg[iarg],"dynamic/dof") == 0) {
+      if (iarg+2 > narg) error->all(FLERR,"Illegal fix_modify command");
+      if (strcmp(arg[iarg+1],"no") == 0) dynamic = 0;
+      else if (strcmp(arg[iarg+1],"yes") == 0) dynamic = 1;
+      else error->all(FLERR,"Illegal fix_modify command");
+      iarg += 2;
+    } else if (strcmp(arg[iarg],"energy") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix_modify command");
       if (strcmp(arg[iarg+1],"no") == 0) thermo_energy = 0;
       else if (strcmp(arg[iarg+1],"yes") == 0) thermo_energy = 1;
       else error->all(FLERR,"Illegal fix_modify command");
       iarg += 2;
     } else if (strcmp(arg[iarg],"respa") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix_modify command");
       if (!respa_level_support) error->all(FLERR,"Illegal fix_modify command");
       int lvl = force->inumeric(FLERR,arg[iarg+1]);
       if (lvl < 0) error->all(FLERR,"Illegal fix_modify command");
       respa_level = lvl-1;
       iarg += 2;
     } else {
       int n = modify_param(narg-iarg,&arg[iarg]);
       if (n == 0) error->all(FLERR,"Illegal fix_modify command");
       iarg += n;
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    setup for energy, virial computation
    see integrate::ev_set() for values of eflag (0-3) and vflag (0-6)
    fixes call this if use ev_tally()
 ------------------------------------------------------------------------- */
 
 void Fix::ev_setup(int eflag, int vflag)
 {
   int i,n;
 
   evflag = 1;
 
   eflag_either = eflag;
   eflag_global = eflag % 2;
   eflag_atom = eflag / 2;
 
   vflag_either = vflag;
   vflag_global = vflag % 4;
   vflag_atom = vflag / 4;
 
   // reallocate per-atom arrays if necessary
 
   if (eflag_atom && atom->nlocal > maxeatom) {
     maxeatom = atom->nmax;
     memory->destroy(eatom);
     memory->create(eatom,maxeatom,"fix:eatom");
   }
   if (vflag_atom && atom->nlocal > maxvatom) {
     maxvatom = atom->nmax;
     memory->destroy(vatom);
     memory->create(vatom,maxvatom,6,"fix:vatom");
   }
 
   // zero accumulators
   // no global energy variable to zero (unlike pair,bond,angle,etc)
   // fixes tally it individually via fix_modify energy yes and compute_scalar()
 
   if (vflag_global) for (i = 0; i < 6; i++) virial[i] = 0.0;
   if (eflag_atom) {
     n = atom->nlocal;
     for (i = 0; i < n; i++) eatom[i] = 0.0;
   }
   if (vflag_atom) {
     n = atom->nlocal;
     for (i = 0; i < n; i++) {
       vatom[i][0] = 0.0;
       vatom[i][1] = 0.0;
       vatom[i][2] = 0.0;
       vatom[i][3] = 0.0;
       vatom[i][4] = 0.0;
       vatom[i][5] = 0.0;
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    setup for virial computation
    see integrate::ev_set() for values of vflag (0-6)
    fixes call this if use v_tally()
 ------------------------------------------------------------------------- */
 
 void Fix::v_setup(int vflag)
 {
   int i,n;
 
   evflag = 1;
 
   vflag_global = vflag % 4;
   vflag_atom = vflag / 4;
 
   // reallocate per-atom array if necessary
 
   if (vflag_atom && atom->nlocal > maxvatom) {
     maxvatom = atom->nmax;
     memory->destroy(vatom);
     memory->create(vatom,maxvatom,6,"fix:vatom");
   }
 
   // zero accumulators
 
   if (vflag_global) for (i = 0; i < 6; i++) virial[i] = 0.0;
   if (vflag_atom) {
     n = atom->nlocal;
     for (i = 0; i < n; i++) {
       vatom[i][0] = 0.0;
       vatom[i][1] = 0.0;
       vatom[i][2] = 0.0;
       vatom[i][3] = 0.0;
       vatom[i][4] = 0.0;
       vatom[i][5] = 0.0;
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    tally per-atom energy and global/per-atom virial into accumulators
    n = # of local owned atoms involved, with local indices in list
    eng = total energy for the interaction involving total atoms
    v = total virial for the interaction involving total atoms
    increment per-atom energy of each atom in list by 1/total fraction
    v_tally tallies virial
    this method can be used when fix computes energy/forces in post_force()
      e.g. fix cmap: compute energy and virial only on owned atoms
        whether newton_bond is on or off
      other procs will tally left-over fractions for atoms they own
 ------------------------------------------------------------------------- */
 
 void Fix::ev_tally(int n, int *list, double total, double eng, double *v)
 {
   if (eflag_atom) {
     double fraction = eng/total;
     for (int i = 0; i < n; i++)
       eatom[list[i]] += fraction;
   }
 
   v_tally(n,list,total,v);
 }
 
 
 /* ----------------------------------------------------------------------
    tally virial into global and per-atom accumulators
    n = # of local owned atoms involved, with local indices in list
    v = total virial for the interaction involving total atoms
    increment global virial by n/total fraction
    increment per-atom virial of each atom in list by 1/total fraction
    this method can be used when fix computes forces in post_force()
      e.g. fix shake, fix rigid: compute virial only on owned atoms
        whether newton_bond is on or off
      other procs will tally left-over fractions for atoms they own
 ------------------------------------------------------------------------- */
 
 void Fix::v_tally(int n, int *list, double total, double *v)
 {
   int m;
 
   if (vflag_global) {
     double fraction = n/total;
     virial[0] += fraction*v[0];
     virial[1] += fraction*v[1];
     virial[2] += fraction*v[2];
     virial[3] += fraction*v[3];
     virial[4] += fraction*v[4];
     virial[5] += fraction*v[5];
   }
 
   if (vflag_atom) {
     double fraction = 1.0/total;
     for (int i = 0; i < n; i++) {
       m = list[i];
       vatom[m][0] += fraction*v[0];
       vatom[m][1] += fraction*v[1];
       vatom[m][2] += fraction*v[2];
       vatom[m][3] += fraction*v[3];
       vatom[m][4] += fraction*v[4];
       vatom[m][5] += fraction*v[5];
     }
   }
 }
diff --git a/src/fix.h b/src/fix.h
index 8005da1ad..d91937848 100644
--- a/src/fix.h
+++ b/src/fix.h
@@ -1,283 +1,285 @@
 /* -*- c++ -*- ----------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifndef LMP_FIX_H
 #define LMP_FIX_H
 
 #include "pointers.h"
 
 namespace LAMMPS_NS {
 
 class Fix : protected Pointers {
  public:
   static int instance_total;     // # of Fix classes ever instantiated
 
   char *id,*style;
   int igroup,groupbit;
 
   int restart_global;            // 1 if Fix saves global state, 0 if not
   int restart_peratom;           // 1 if Fix saves peratom state, 0 if not
   int restart_file;              // 1 if Fix writes own restart file, 0 if not
   int force_reneighbor;          // 1 if Fix forces reneighboring, 0 if not
 
   int box_change_size;           // 1 if Fix changes box size, 0 if not
   int box_change_shape;          // 1 if Fix changes box shape, 0 if not
   int box_change_domain;         // 1 if Fix changes proc sub-domains, 0 if not
 
   bigint next_reneighbor;        // next timestep to force a reneighboring
   int thermo_energy;             // 1 if fix_modify enabled ThEng, 0 if not
   int nevery;                    // how often to call an end_of_step fix
   int rigid_flag;                // 1 if Fix integrates rigid bodies, 0 if not
   int peatom_flag;               // 1 if Fix contributes per-atom eng, 0 if not
   int virial_flag;               // 1 if Fix contributes to virial, 0 if not
   int no_change_box;             // 1 if cannot swap ortho <-> triclinic
   int time_integrate;            // 1 if fix performs time integration, 0 if no
   int time_depend;               // 1 if requires continuous timestepping
   int create_attribute;          // 1 if fix stores attributes that need
                                  //      setting when a new atom is created
   int restart_pbc;               // 1 if fix moves atoms (except integrate)
                                  //      so write_restart must remap to PBC
   int wd_header;                 // # of header values fix writes to data file
   int wd_section;                // # of sections fix writes to data file
   int dynamic_group_allow;       // 1 if can be used with dynamic group, else 0
   int dof_flag;                  // 1 if has dof() method (not min_dof())
   int special_alter_flag;        // 1 if has special_alter() meth for spec lists
   int enforce2d_flag;            // 1 if has enforce2d method
   int respa_level_support;       // 1 if fix supports fix_modify respa
   int respa_level;               // which respa level to apply fix (1-Nrespa)
 
   int scalar_flag;               // 0/1 if compute_scalar() function exists
   int vector_flag;               // 0/1 if compute_vector() function exists
   int array_flag;                // 0/1 if compute_array() function exists
   int size_vector;               // length of global vector
   int size_array_rows;           // rows in global array
   int size_array_cols;           // columns in global array
   int size_vector_variable;      // 1 if vec length is unknown in advance
   int size_array_rows_variable;  // 1 if array rows is unknown in advance
   int global_freq;               // frequency s/v data is available at
 
   int peratom_flag;              // 0/1 if per-atom data is stored
   int size_peratom_cols;         // 0 = vector, N = columns in peratom array
   int peratom_freq;              // frequency per-atom data is available at
 
   int local_flag;                // 0/1 if local data is stored
   int size_local_rows;           // rows in local vector or array
   int size_local_cols;           // 0 = vector, N = columns in local array
   int local_freq;                // frequency local data is available at
 
   int extscalar;            // 0/1 if global scalar is intensive/extensive
   int extvector;            // 0/1/-1 if global vector is all int/ext/extlist
   int *extlist;             // list of 0/1 int/ext for each vec component
   int extarray;             // 0/1 if global array is intensive/extensive
 
   double *vector_atom;           // computed per-atom vector
   double **array_atom;           // computed per-atom array
   double *vector_local;          // computed local vector
   double **array_local;          // computed local array
 
   int comm_forward;              // size of forward communication (0 if none)
   int comm_reverse;              // size of reverse communication (0 if none)
   int comm_border;               // size of border communication (0 if none)
 
   double virial[6];              // accumulated virial
   double *eatom,**vatom;         // accumulated per-atom energy/virial
 
   int restart_reset;             // 1 if restart just re-initialized fix
 
   // KOKKOS host/device flag and data masks
 
   int kokkosable;                // 1 if Kokkos fix
   ExecutionSpace execution_space;
   unsigned int datamask_read,datamask_modify;
 
   Fix(class LAMMPS *, int, char **);
   virtual ~Fix();
   void modify_params(int, char **);
 
   virtual int setmask() = 0;
 
   virtual void post_constructor() {}
   virtual void init() {}
   virtual void init_list(int, class NeighList *) {}
   virtual void setup(int) {}
   virtual void setup_pre_exchange() {}
   virtual void setup_pre_neighbor() {}
   virtual void setup_pre_force(int) {}
   virtual void setup_pre_reverse(int, int) {}
   virtual void min_setup(int) {}
   virtual void initial_integrate(int) {}
   virtual void post_integrate() {}
   virtual void pre_exchange() {}
   virtual void pre_neighbor() {}
   virtual void pre_force(int) {}
   virtual void pre_reverse(int,int) {}
   virtual void post_force(int) {}
   virtual void final_integrate() {}
   virtual void end_of_step() {}
   virtual void post_run() {}
   virtual void write_restart(FILE *) {}
   virtual void write_restart_file(char *) {}
   virtual void restart(char *) {}
 
   virtual void grow_arrays(int) {}
   virtual void copy_arrays(int, int, int) {}
   virtual void set_arrays(int) {}
   virtual void update_arrays(int, int) {}
   virtual void set_molecule(int, tagint, int, double *, double *, double *) {}
   virtual void clear_bonus() {}
 
   virtual int pack_border(int, int *, double *) {return 0;}
   virtual int unpack_border(int, int, double *) {return 0;}
   virtual int pack_exchange(int, double *) {return 0;}
   virtual int unpack_exchange(int, double *) {return 0;}
   virtual int pack_restart(int, double *) {return 0;}
   virtual void unpack_restart(int, int) {}
   virtual int size_restart(int) {return 0;}
   virtual int maxsize_restart() {return 0;}
 
   virtual void setup_pre_force_respa(int, int) {}
   virtual void initial_integrate_respa(int, int, int) {}
   virtual void post_integrate_respa(int, int) {}
   virtual void pre_force_respa(int, int, int) {}
   virtual void post_force_respa(int, int, int) {}
   virtual void final_integrate_respa(int, int) {}
 
   virtual void min_pre_exchange() {}
   virtual void min_pre_neighbor() {}
   virtual void min_pre_force(int) {}
   virtual void min_pre_reverse(int,int) {}
   virtual void min_post_force(int) {}
 
   virtual double min_energy(double *) {return 0.0;}
   virtual void min_store() {}
   virtual void min_clearstore() {}
   virtual void min_pushstore() {}
   virtual void min_popstore() {}
   virtual int min_reset_ref() {return 0;}
   virtual void min_step(double, double *) {}
   virtual double max_alpha(double *) {return 0.0;}
   virtual int min_dof() {return 0;}
 
   virtual int pack_forward_comm(int, int *, double *, int, int *) {return 0;}
   virtual void unpack_forward_comm(int, int, double *) {}
   virtual int pack_reverse_comm_size(int, int) {return 0;}
   virtual int pack_reverse_comm(int, int, double *) {return 0;}
   virtual void unpack_reverse_comm(int, int *, double *) {}
 
   virtual double compute_scalar() {return 0.0;}
   virtual double compute_vector(int) {return 0.0;}
   virtual double compute_array(int,int) {return 0.0;}
 
   virtual int dof(int) {return 0;}
   virtual void deform(int) {}
   virtual void reset_target(double) {}
   virtual void reset_dt() {}
   virtual void enforce2d() {}
 
   virtual void read_data_header(char *) {}
   virtual void read_data_section(char *, int, char *, tagint) {}
   virtual bigint read_data_skip_lines(char *) {return 0;}
 
   virtual void write_data_header(FILE *, int) {}
   virtual void write_data_section_size(int, int &, int &) {}
   virtual void write_data_section_pack(int, double **) {}
   virtual void write_data_section_keyword(int, FILE *) {}
   virtual void write_data_section(int, FILE *, int, double **, int) {}
 
   virtual void zero_momentum() {}
   virtual void zero_rotation() {}
 
   virtual void rebuild_special() {}
 
   virtual int image(int *&, double **&) {return 0;}
 
   virtual int modify_param(int, char **) {return 0;}
   virtual void *extract(const char *, int &) {return NULL;}
 
   virtual double memory_usage() {return 0.0;}
 
  protected:
   int instance_me;        // which Fix class instantiation I am
 
   int evflag;
   int eflag_either,eflag_global,eflag_atom;
   int vflag_either,vflag_global,vflag_atom;
   int maxeatom,maxvatom;
 
   int copymode;   // if set, do not deallocate during destruction
                   // required when classes are used as functors by Kokkos
 
+  int dynamic;    // recount atoms for temperature computes
+
   void ev_setup(int, int);
   void ev_tally(int, int *, double, double, double *);
   void v_setup(int);
   void v_tally(int, int *, double, double *);
 
   // union data struct for packing 32-bit and 64-bit ints into double bufs
   // see atom_vec.h for documentation
 
   union ubuf {
     double d;
     int64_t i;
     ubuf(double arg) : d(arg) {}
     ubuf(int64_t arg) : i(arg) {}
     ubuf(int arg) : i(arg) {}
   };
 };
 
 namespace FixConst {
   static const int INITIAL_INTEGRATE =       1<<0;
   static const int POST_INTEGRATE =          1<<1;
   static const int PRE_EXCHANGE =            1<<2;
   static const int PRE_NEIGHBOR =            1<<3;
   static const int PRE_FORCE =               1<<4;
   static const int PRE_REVERSE =             1<<5;
   static const int POST_FORCE =              1<<6;
   static const int FINAL_INTEGRATE =         1<<7;
   static const int END_OF_STEP =             1<<8;
   static const int POST_RUN =                1<<9;
   static const int THERMO_ENERGY =           1<<10;
   static const int INITIAL_INTEGRATE_RESPA = 1<<11;
   static const int POST_INTEGRATE_RESPA =    1<<12;
   static const int PRE_FORCE_RESPA =         1<<13;
   static const int POST_FORCE_RESPA =        1<<14;
   static const int FINAL_INTEGRATE_RESPA =   1<<15;
   static const int MIN_PRE_EXCHANGE =        1<<16;
   static const int MIN_PRE_NEIGHBOR =        1<<17;
   static const int MIN_PRE_FORCE =           1<<18;
   static const int MIN_PRE_REVERSE =         1<<19;
   static const int MIN_POST_FORCE =          1<<20;
   static const int MIN_ENERGY =              1<<21;
   static const int FIX_CONST_LAST =          1<<22;
 }
 
 }
 
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Fix ID must be alphanumeric or underscore characters
 
 Self-explanatory.
 
 E: Could not find fix group ID
 
 A group ID used in the fix command does not exist.
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
 */
diff --git a/src/fix_momentum.h b/src/fix_momentum.h
index 05fd7ff7c..6e17f75a7 100644
--- a/src/fix_momentum.h
+++ b/src/fix_momentum.h
@@ -1,58 +1,57 @@
 /* -*- c++ -*- ----------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef FIX_CLASS
 
 FixStyle(momentum,FixMomentum)
 
 #else
 
 #ifndef LMP_FIX_MOMENTUM_H
 #define LMP_FIX_MOMENTUM_H
 
 #include "fix.h"
 
 namespace LAMMPS_NS {
 
 class FixMomentum : public Fix {
  public:
   FixMomentum(class LAMMPS *, int, char **);
   int setmask();
   void init();
   void end_of_step();
 
  protected:
   int linear,angular,rescale;
   int xflag,yflag,zflag;
-  int dynamic;
   double masstotal;
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
 E: Fix momentum group has no atoms
 
 Self-explanatory.
 
 */
diff --git a/src/fix_nh.cpp b/src/fix_nh.cpp
index f4a0a71a4..0f1f3395d 100644
--- a/src/fix_nh.cpp
+++ b/src/fix_nh.cpp
@@ -1,2361 +1,2360 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing authors: Mark Stevens (SNL), Aidan Thompson (SNL)
 ------------------------------------------------------------------------- */
 
 #include <string.h>
 #include <stdlib.h>
 #include <math.h>
 #include "fix_nh.h"
 #include "math_extra.h"
 #include "atom.h"
 #include "force.h"
 #include "group.h"
 #include "comm.h"
 #include "neighbor.h"
 #include "irregular.h"
 #include "modify.h"
 #include "fix_deform.h"
 #include "compute.h"
 #include "kspace.h"
 #include "update.h"
 #include "respa.h"
 #include "domain.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
 #define DELTAFLIP 0.1
 #define TILTMAX 1.5
 
 enum{NOBIAS,BIAS};
 enum{NONE,XYZ,XY,YZ,XZ};
 enum{ISO,ANISO,TRICLINIC};
 
 /* ----------------------------------------------------------------------
    NVT,NPH,NPT integrators for improved Nose-Hoover equations of motion
  ---------------------------------------------------------------------- */
 
 FixNH::FixNH(LAMMPS *lmp, int narg, char **arg) : 
   Fix(lmp, narg, arg),
   rfix(NULL), id_dilate(NULL), irregular(NULL), id_temp(NULL), id_press(NULL),
   eta(NULL), eta_dot(NULL), eta_dotdot(NULL),
   eta_mass(NULL), etap(NULL), etap_dot(NULL), etap_dotdot(NULL),
   etap_mass(NULL)
 {
   if (narg < 4) error->all(FLERR,"Illegal fix nvt/npt/nph command");
 
   restart_global = 1;
   dynamic_group_allow = 1;
   time_integrate = 1;
   scalar_flag = 1;
   vector_flag = 1;
   global_freq = 1;
   extscalar = 1;
   extvector = 0;
 
   // default values
 
   pcouple = NONE;
   drag = 0.0;
   allremap = 1;
   id_dilate = NULL;
   mtchain = mpchain = 3;
   nc_tchain = nc_pchain = 1;
   mtk_flag = 1;
   deviatoric_flag = 0;
   nreset_h0 = 0;
   eta_mass_flag = 1;
   omega_mass_flag = 0;
   etap_mass_flag = 0;
   flipflag = 1;
   dipole_flag = 0;
   dlm_flag = 0;
 
   tcomputeflag = 0;
   pcomputeflag = 0;
   id_temp = NULL;
   id_press = NULL;
 
   // turn on tilt factor scaling, whenever applicable
 
   dimension = domain->dimension;
 
   scaleyz = scalexz = scalexy = 0;
   if (domain->yperiodic && domain->xy != 0.0) scalexy = 1;
   if (domain->zperiodic && dimension == 3) {
     if (domain->yz != 0.0) scaleyz = 1;
     if (domain->xz != 0.0) scalexz = 1;
   }
 
   // set fixed-point to default = center of cell
 
   fixedpoint[0] = 0.5*(domain->boxlo[0]+domain->boxhi[0]);
   fixedpoint[1] = 0.5*(domain->boxlo[1]+domain->boxhi[1]);
   fixedpoint[2] = 0.5*(domain->boxlo[2]+domain->boxhi[2]);
 
   // used by FixNVTSllod to preserve non-default value
 
   mtchain_default_flag = 1;
 
   tstat_flag = 0;
   double t_period = 0.0;
 
   double p_period[6];
   for (int i = 0; i < 6; i++) {
     p_start[i] = p_stop[i] = p_period[i] = p_target[i] = 0.0;
     p_flag[i] = 0;
   }
 
   // process keywords
 
   int iarg = 3;
 
   while (iarg < narg) {
     if (strcmp(arg[iarg],"temp") == 0) {
       if (iarg+4 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command");
       tstat_flag = 1;
       t_start = force->numeric(FLERR,arg[iarg+1]);
       t_target = t_start;
       t_stop = force->numeric(FLERR,arg[iarg+2]);
       t_period = force->numeric(FLERR,arg[iarg+3]);
       if (t_start < 0.0 || t_stop <= 0.0)
         error->all(FLERR,
                    "Target temperature for fix nvt/npt/nph cannot be 0.0");
       iarg += 4;
 
     } else if (strcmp(arg[iarg],"iso") == 0) {
       if (iarg+4 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command");
       pcouple = XYZ;
       p_start[0] = p_start[1] = p_start[2] = force->numeric(FLERR,arg[iarg+1]);
       p_stop[0] = p_stop[1] = p_stop[2] = force->numeric(FLERR,arg[iarg+2]);
       p_period[0] = p_period[1] = p_period[2] =
         force->numeric(FLERR,arg[iarg+3]);
       p_flag[0] = p_flag[1] = p_flag[2] = 1;
       if (dimension == 2) {
         p_start[2] = p_stop[2] = p_period[2] = 0.0;
         p_flag[2] = 0;
       }
       iarg += 4;
     } else if (strcmp(arg[iarg],"aniso") == 0) {
       if (iarg+4 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command");
       pcouple = NONE;
       p_start[0] = p_start[1] = p_start[2] = force->numeric(FLERR,arg[iarg+1]);
       p_stop[0] = p_stop[1] = p_stop[2] = force->numeric(FLERR,arg[iarg+2]);
       p_period[0] = p_period[1] = p_period[2] =
         force->numeric(FLERR,arg[iarg+3]);
       p_flag[0] = p_flag[1] = p_flag[2] = 1;
       if (dimension == 2) {
         p_start[2] = p_stop[2] = p_period[2] = 0.0;
         p_flag[2] = 0;
       }
       iarg += 4;
     } else if (strcmp(arg[iarg],"tri") == 0) {
       if (iarg+4 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command");
       pcouple = NONE;
       scalexy = scalexz = scaleyz = 0;
       p_start[0] = p_start[1] = p_start[2] = force->numeric(FLERR,arg[iarg+1]);
       p_stop[0] = p_stop[1] = p_stop[2] = force->numeric(FLERR,arg[iarg+2]);
       p_period[0] = p_period[1] = p_period[2] =
         force->numeric(FLERR,arg[iarg+3]);
       p_flag[0] = p_flag[1] = p_flag[2] = 1;
       p_start[3] = p_start[4] = p_start[5] = 0.0;
       p_stop[3] = p_stop[4] = p_stop[5] = 0.0;
       p_period[3] = p_period[4] = p_period[5] =
         force->numeric(FLERR,arg[iarg+3]);
       p_flag[3] = p_flag[4] = p_flag[5] = 1;
       if (dimension == 2) {
         p_start[2] = p_stop[2] = p_period[2] = 0.0;
         p_flag[2] = 0;
         p_start[3] = p_stop[3] = p_period[3] = 0.0;
         p_flag[3] = 0;
         p_start[4] = p_stop[4] = p_period[4] = 0.0;
         p_flag[4] = 0;
       }
       iarg += 4;
     } else if (strcmp(arg[iarg],"x") == 0) {
       if (iarg+4 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command");
       p_start[0] = force->numeric(FLERR,arg[iarg+1]);
       p_stop[0] = force->numeric(FLERR,arg[iarg+2]);
       p_period[0] = force->numeric(FLERR,arg[iarg+3]);
       p_flag[0] = 1;
       deviatoric_flag = 1;
       iarg += 4;
     } else if (strcmp(arg[iarg],"y") == 0) {
       if (iarg+4 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command");
       p_start[1] = force->numeric(FLERR,arg[iarg+1]);
       p_stop[1] = force->numeric(FLERR,arg[iarg+2]);
       p_period[1] = force->numeric(FLERR,arg[iarg+3]);
       p_flag[1] = 1;
       deviatoric_flag = 1;
       iarg += 4;
     } else if (strcmp(arg[iarg],"z") == 0) {
       if (iarg+4 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command");
       p_start[2] = force->numeric(FLERR,arg[iarg+1]);
       p_stop[2] = force->numeric(FLERR,arg[iarg+2]);
       p_period[2] = force->numeric(FLERR,arg[iarg+3]);
       p_flag[2] = 1;
       deviatoric_flag = 1;
       iarg += 4;
       if (dimension == 2)
         error->all(FLERR,"Invalid fix nvt/npt/nph command for a 2d simulation");
 
     } else if (strcmp(arg[iarg],"yz") == 0) {
       if (iarg+4 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command");
       p_start[3] = force->numeric(FLERR,arg[iarg+1]);
       p_stop[3] = force->numeric(FLERR,arg[iarg+2]);
       p_period[3] = force->numeric(FLERR,arg[iarg+3]);
       p_flag[3] = 1;
       deviatoric_flag = 1;
       scaleyz = 0;
       iarg += 4;
       if (dimension == 2)
         error->all(FLERR,"Invalid fix nvt/npt/nph command for a 2d simulation");
     } else if (strcmp(arg[iarg],"xz") == 0) {
       if (iarg+4 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command");
       p_start[4] = force->numeric(FLERR,arg[iarg+1]);
       p_stop[4] = force->numeric(FLERR,arg[iarg+2]);
       p_period[4] = force->numeric(FLERR,arg[iarg+3]);
       p_flag[4] = 1;
       deviatoric_flag = 1;
       scalexz = 0;
       iarg += 4;
       if (dimension == 2)
         error->all(FLERR,"Invalid fix nvt/npt/nph command for a 2d simulation");
     } else if (strcmp(arg[iarg],"xy") == 0) {
       if (iarg+4 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command");
       p_start[5] = force->numeric(FLERR,arg[iarg+1]);
       p_stop[5] = force->numeric(FLERR,arg[iarg+2]);
       p_period[5] = force->numeric(FLERR,arg[iarg+3]);
       p_flag[5] = 1;
       deviatoric_flag = 1;
       scalexy = 0;
       iarg += 4;
 
     } else if (strcmp(arg[iarg],"couple") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command");
       if (strcmp(arg[iarg+1],"xyz") == 0) pcouple = XYZ;
       else if (strcmp(arg[iarg+1],"xy") == 0) pcouple = XY;
       else if (strcmp(arg[iarg+1],"yz") == 0) pcouple = YZ;
       else if (strcmp(arg[iarg+1],"xz") == 0) pcouple = XZ;
       else if (strcmp(arg[iarg+1],"none") == 0) pcouple = NONE;
       else error->all(FLERR,"Illegal fix nvt/npt/nph command");
       iarg += 2;
 
     } else if (strcmp(arg[iarg],"drag") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command");
       drag = force->numeric(FLERR,arg[iarg+1]);
       if (drag < 0.0) error->all(FLERR,"Illegal fix nvt/npt/nph command");
       iarg += 2;
     } else if (strcmp(arg[iarg],"dilate") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command");
       if (strcmp(arg[iarg+1],"all") == 0) allremap = 1;
       else {
         allremap = 0;
         delete [] id_dilate;
         int n = strlen(arg[iarg+1]) + 1;
         id_dilate = new char[n];
         strcpy(id_dilate,arg[iarg+1]);
         int idilate = group->find(id_dilate);
         if (idilate == -1)
           error->all(FLERR,"Fix nvt/npt/nph dilate group ID does not exist");
       }
       iarg += 2;
 
     } else if (strcmp(arg[iarg],"tchain") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command");
       mtchain = force->inumeric(FLERR,arg[iarg+1]);
       // used by FixNVTSllod to preserve non-default value
       mtchain_default_flag = 0;
       if (mtchain < 1) error->all(FLERR,"Illegal fix nvt/npt/nph command");
       iarg += 2;
     } else if (strcmp(arg[iarg],"pchain") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command");
       mpchain = force->inumeric(FLERR,arg[iarg+1]);
       if (mpchain < 0) error->all(FLERR,"Illegal fix nvt/npt/nph command");
       iarg += 2;
     } else if (strcmp(arg[iarg],"mtk") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command");
       if (strcmp(arg[iarg+1],"yes") == 0) mtk_flag = 1;
       else if (strcmp(arg[iarg+1],"no") == 0) mtk_flag = 0;
       else error->all(FLERR,"Illegal fix nvt/npt/nph command");
       iarg += 2;
     } else if (strcmp(arg[iarg],"tloop") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command");
       nc_tchain = force->inumeric(FLERR,arg[iarg+1]);
       if (nc_tchain < 0) error->all(FLERR,"Illegal fix nvt/npt/nph command");
       iarg += 2;
     } else if (strcmp(arg[iarg],"ploop") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command");
       nc_pchain = force->inumeric(FLERR,arg[iarg+1]);
       if (nc_pchain < 0) error->all(FLERR,"Illegal fix nvt/npt/nph command");
       iarg += 2;
     } else if (strcmp(arg[iarg],"nreset") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command");
       nreset_h0 = force->inumeric(FLERR,arg[iarg+1]);
       if (nreset_h0 < 0) error->all(FLERR,"Illegal fix nvt/npt/nph command");
       iarg += 2;
     } else if (strcmp(arg[iarg],"scalexy") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command");
       if (strcmp(arg[iarg+1],"yes") == 0) scalexy = 1;
       else if (strcmp(arg[iarg+1],"no") == 0) scalexy = 0;
       else error->all(FLERR,"Illegal fix nvt/npt/nph command");
       iarg += 2;
     } else if (strcmp(arg[iarg],"scalexz") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command");
       if (strcmp(arg[iarg+1],"yes") == 0) scalexz = 1;
       else if (strcmp(arg[iarg+1],"no") == 0) scalexz = 0;
       else error->all(FLERR,"Illegal fix nvt/npt/nph command");
       iarg += 2;
     } else if (strcmp(arg[iarg],"scaleyz") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command");
       if (strcmp(arg[iarg+1],"yes") == 0) scaleyz = 1;
       else if (strcmp(arg[iarg+1],"no") == 0) scaleyz = 0;
       else error->all(FLERR,"Illegal fix nvt/npt/nph command");
       iarg += 2;
     } else if (strcmp(arg[iarg],"flip") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command");
       if (strcmp(arg[iarg+1],"yes") == 0) flipflag = 1;
       else if (strcmp(arg[iarg+1],"no") == 0) flipflag = 0;
       else error->all(FLERR,"Illegal fix nvt/npt/nph command");
       iarg += 2;
     } else if (strcmp(arg[iarg],"update") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command");
       if (strcmp(arg[iarg+1],"dipole") == 0) dipole_flag = 1;
       else if (strcmp(arg[iarg+1],"dipole/dlm") == 0) {
         dipole_flag = 1;
         dlm_flag = 1;
       } else error->all(FLERR,"Illegal fix nvt/npt/nph command");
       iarg += 2;
     } else if (strcmp(arg[iarg],"fixedpoint") == 0) {
       if (iarg+4 > narg) error->all(FLERR,"Illegal fix nvt/npt/nph command");
       fixedpoint[0] = force->numeric(FLERR,arg[iarg+1]);
       fixedpoint[1] = force->numeric(FLERR,arg[iarg+2]);
       fixedpoint[2] = force->numeric(FLERR,arg[iarg+3]);
       iarg += 4;
 
     // disc keyword is also parsed in fix/nh/sphere  
 
     } else if (strcmp(arg[iarg],"disc") == 0) {
       iarg++;
 
     } else error->all(FLERR,"Illegal fix nvt/npt/nph command");
   }
 
   // error checks
 
   if (dimension == 2 && (p_flag[2] || p_flag[3] || p_flag[4]))
     error->all(FLERR,"Invalid fix nvt/npt/nph command for a 2d simulation");
   if (dimension == 2 && (pcouple == YZ || pcouple == XZ))
     error->all(FLERR,"Invalid fix nvt/npt/nph command for a 2d simulation");
   if (dimension == 2 && (scalexz == 1 || scaleyz == 1 ))
     error->all(FLERR,"Invalid fix nvt/npt/nph command for a 2d simulation");
 
   if (pcouple == XYZ && (p_flag[0] == 0 || p_flag[1] == 0))
     error->all(FLERR,"Invalid fix nvt/npt/nph command pressure settings");
   if (pcouple == XYZ && dimension == 3 && p_flag[2] == 0)
     error->all(FLERR,"Invalid fix nvt/npt/nph command pressure settings");
   if (pcouple == XY && (p_flag[0] == 0 || p_flag[1] == 0))
     error->all(FLERR,"Invalid fix nvt/npt/nph command pressure settings");
   if (pcouple == YZ && (p_flag[1] == 0 || p_flag[2] == 0))
     error->all(FLERR,"Invalid fix nvt/npt/nph command pressure settings");
   if (pcouple == XZ && (p_flag[0] == 0 || p_flag[2] == 0))
     error->all(FLERR,"Invalid fix nvt/npt/nph command pressure settings");
 
   // require periodicity in tensile dimension
 
   if (p_flag[0] && domain->xperiodic == 0)
     error->all(FLERR,"Cannot use fix nvt/npt/nph on a non-periodic dimension");
   if (p_flag[1] && domain->yperiodic == 0)
     error->all(FLERR,"Cannot use fix nvt/npt/nph on a non-periodic dimension");
   if (p_flag[2] && domain->zperiodic == 0)
     error->all(FLERR,"Cannot use fix nvt/npt/nph on a non-periodic dimension");
 
   // require periodicity in 2nd dim of off-diagonal tilt component
 
   if (p_flag[3] && domain->zperiodic == 0)
     error->all(FLERR,
                "Cannot use fix nvt/npt/nph on a 2nd non-periodic dimension");
   if (p_flag[4] && domain->zperiodic == 0)
     error->all(FLERR,
                "Cannot use fix nvt/npt/nph on a 2nd non-periodic dimension");
   if (p_flag[5] && domain->yperiodic == 0)
     error->all(FLERR,
                "Cannot use fix nvt/npt/nph on a 2nd non-periodic dimension");
 
   if (scaleyz == 1 && domain->zperiodic == 0)
     error->all(FLERR,"Cannot use fix nvt/npt/nph "
                "with yz scaling when z is non-periodic dimension");
   if (scalexz == 1 && domain->zperiodic == 0)
     error->all(FLERR,"Cannot use fix nvt/npt/nph "
                "with xz scaling when z is non-periodic dimension");
   if (scalexy == 1 && domain->yperiodic == 0)
     error->all(FLERR,"Cannot use fix nvt/npt/nph "
                "with xy scaling when y is non-periodic dimension");
 
   if (p_flag[3] && scaleyz == 1)
     error->all(FLERR,"Cannot use fix nvt/npt/nph with "
                "both yz dynamics and yz scaling");
   if (p_flag[4] && scalexz == 1)
     error->all(FLERR,"Cannot use fix nvt/npt/nph with "
                "both xz dynamics and xz scaling");
   if (p_flag[5] && scalexy == 1)
     error->all(FLERR,"Cannot use fix nvt/npt/nph with "
                "both xy dynamics and xy scaling");
 
   if (!domain->triclinic && (p_flag[3] || p_flag[4] || p_flag[5]))
     error->all(FLERR,"Can not specify Pxy/Pxz/Pyz in "
                "fix nvt/npt/nph with non-triclinic box");
 
   if (pcouple == XYZ && dimension == 3 &&
       (p_start[0] != p_start[1] || p_start[0] != p_start[2] ||
        p_stop[0] != p_stop[1] || p_stop[0] != p_stop[2] ||
        p_period[0] != p_period[1] || p_period[0] != p_period[2]))
     error->all(FLERR,"Invalid fix nvt/npt/nph pressure settings");
   if (pcouple == XYZ && dimension == 2 &&
       (p_start[0] != p_start[1] || p_stop[0] != p_stop[1] ||
        p_period[0] != p_period[1]))
     error->all(FLERR,"Invalid fix nvt/npt/nph pressure settings");
   if (pcouple == XY &&
       (p_start[0] != p_start[1] || p_stop[0] != p_stop[1] ||
        p_period[0] != p_period[1]))
     error->all(FLERR,"Invalid fix nvt/npt/nph pressure settings");
   if (pcouple == YZ &&
       (p_start[1] != p_start[2] || p_stop[1] != p_stop[2] ||
        p_period[1] != p_period[2]))
     error->all(FLERR,"Invalid fix nvt/npt/nph pressure settings");
   if (pcouple == XZ &&
       (p_start[0] != p_start[2] || p_stop[0] != p_stop[2] ||
        p_period[0] != p_period[2]))
     error->all(FLERR,"Invalid fix nvt/npt/nph pressure settings");
 
   if (dipole_flag) {
     if (!atom->sphere_flag)
       error->all(FLERR,"Using update dipole flag requires atom style sphere");
     if (!atom->mu_flag)
       error->all(FLERR,"Using update dipole flag requires atom attribute mu");
   }
 
   if ((tstat_flag && t_period <= 0.0) ||
       (p_flag[0] && p_period[0] <= 0.0) ||
       (p_flag[1] && p_period[1] <= 0.0) ||
       (p_flag[2] && p_period[2] <= 0.0) ||
       (p_flag[3] && p_period[3] <= 0.0) ||
       (p_flag[4] && p_period[4] <= 0.0) ||
       (p_flag[5] && p_period[5] <= 0.0))
     error->all(FLERR,"Fix nvt/npt/nph damping parameters must be > 0.0");
 
   // set pstat_flag and box change and restart_pbc variables
 
   pre_exchange_flag = 0;
   pstat_flag = 0;
   pstyle = ISO;
 
   for (int i = 0; i < 6; i++)
     if (p_flag[i]) pstat_flag = 1;
 
   if (pstat_flag) {
     if (p_flag[0] || p_flag[1] || p_flag[2]) box_change_size = 1;
     if (p_flag[3] || p_flag[4] || p_flag[5]) box_change_shape = 1;
     no_change_box = 1;
     if (allremap == 0) restart_pbc = 1;
 
     // pstyle = TRICLINIC if any off-diagonal term is controlled -> 6 dof
     // else pstyle = ISO if XYZ coupling or XY coupling in 2d -> 1 dof
     // else pstyle = ANISO -> 3 dof
 
     if (p_flag[3] || p_flag[4] || p_flag[5]) pstyle = TRICLINIC;
     else if (pcouple == XYZ || (dimension == 2 && pcouple == XY)) pstyle = ISO;
     else pstyle = ANISO;
 
     // pre_exchange only required if flips can occur due to shape changes
 
     if (flipflag && (p_flag[3] || p_flag[4] || p_flag[5]))
       pre_exchange_flag = 1;
     if (flipflag && (domain->yz != 0.0 || domain->xz != 0.0 ||
                      domain->xy != 0.0))
       pre_exchange_flag = 1;
   }
 
   // convert input periods to frequencies
 
   t_freq = 0.0;
   p_freq[0] = p_freq[1] = p_freq[2] = p_freq[3] = p_freq[4] = p_freq[5] = 0.0;
 
   if (tstat_flag) t_freq = 1.0 / t_period;
   if (p_flag[0]) p_freq[0] = 1.0 / p_period[0];
   if (p_flag[1]) p_freq[1] = 1.0 / p_period[1];
   if (p_flag[2]) p_freq[2] = 1.0 / p_period[2];
   if (p_flag[3]) p_freq[3] = 1.0 / p_period[3];
   if (p_flag[4]) p_freq[4] = 1.0 / p_period[4];
   if (p_flag[5]) p_freq[5] = 1.0 / p_period[5];
 
   // Nose/Hoover temp and pressure init
 
   size_vector = 0;
 
   if (tstat_flag) {
     int ich;
     eta = new double[mtchain];
 
     // add one extra dummy thermostat, set to zero
 
     eta_dot = new double[mtchain+1];
     eta_dot[mtchain] = 0.0;
     eta_dotdot = new double[mtchain];
     for (ich = 0; ich < mtchain; ich++) {
       eta[ich] = eta_dot[ich] = eta_dotdot[ich] = 0.0;
     }
     eta_mass = new double[mtchain];
     size_vector += 2*2*mtchain;
   }
 
   if (pstat_flag) {
     omega[0] = omega[1] = omega[2] = 0.0;
     omega_dot[0] = omega_dot[1] = omega_dot[2] = 0.0;
     omega_mass[0] = omega_mass[1] = omega_mass[2] = 0.0;
     omega[3] = omega[4] = omega[5] = 0.0;
     omega_dot[3] = omega_dot[4] = omega_dot[5] = 0.0;
     omega_mass[3] = omega_mass[4] = omega_mass[5] = 0.0;
     if (pstyle == ISO) size_vector += 2*2*1;
     else if (pstyle == ANISO) size_vector += 2*2*3;
     else if (pstyle == TRICLINIC) size_vector += 2*2*6;
 
     if (mpchain) {
       int ich;
       etap = new double[mpchain];
 
       // add one extra dummy thermostat, set to zero
 
       etap_dot = new double[mpchain+1];
       etap_dot[mpchain] = 0.0;
       etap_dotdot = new double[mpchain];
       for (ich = 0; ich < mpchain; ich++) {
         etap[ich] = etap_dot[ich] =
           etap_dotdot[ich] = 0.0;
       }
       etap_mass = new double[mpchain];
       size_vector += 2*2*mpchain;
     }
 
     if (deviatoric_flag) size_vector += 1;
   }
 
   nrigid = 0;
   rfix = NULL;
 
   if (pre_exchange_flag) irregular = new Irregular(lmp);
   else irregular = NULL;
 
   // initialize vol0,t0 to zero to signal uninitialized
   // values then assigned in init(), if necessary
 
   vol0 = t0 = 0.0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 FixNH::~FixNH()
 {
   if (copymode) return;
 
   delete [] id_dilate;
   delete [] rfix;
 
   delete irregular;
 
   // delete temperature and pressure if fix created them
 
   if (tcomputeflag) modify->delete_compute(id_temp);
   delete [] id_temp;
 
   if (tstat_flag) {
     delete [] eta;
     delete [] eta_dot;
     delete [] eta_dotdot;
     delete [] eta_mass;
   }
 
   if (pstat_flag) {
     if (pcomputeflag) modify->delete_compute(id_press);
     delete [] id_press;
     if (mpchain) {
       delete [] etap;
       delete [] etap_dot;
       delete [] etap_dotdot;
       delete [] etap_mass;
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 int FixNH::setmask()
 {
   int mask = 0;
   mask |= INITIAL_INTEGRATE;
   mask |= FINAL_INTEGRATE;
   mask |= THERMO_ENERGY;
   mask |= INITIAL_INTEGRATE_RESPA;
   mask |= FINAL_INTEGRATE_RESPA;
   if (pre_exchange_flag) mask |= PRE_EXCHANGE;
   return mask;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixNH::init()
 {
   // recheck that dilate group has not been deleted
 
   if (allremap == 0) {
     int idilate = group->find(id_dilate);
     if (idilate == -1)
       error->all(FLERR,"Fix nvt/npt/nph dilate group ID does not exist");
     dilate_group_bit = group->bitmask[idilate];
   }
 
   // ensure no conflict with fix deform
 
   if (pstat_flag)
     for (int i = 0; i < modify->nfix; i++)
       if (strcmp(modify->fix[i]->style,"deform") == 0) {
         int *dimflag = ((FixDeform *) modify->fix[i])->dimflag;
         if ((p_flag[0] && dimflag[0]) || (p_flag[1] && dimflag[1]) ||
             (p_flag[2] && dimflag[2]) || (p_flag[3] && dimflag[3]) ||
             (p_flag[4] && dimflag[4]) || (p_flag[5] && dimflag[5]))
           error->all(FLERR,"Cannot use fix npt and fix deform on "
                      "same component of stress tensor");
       }
 
   // set temperature and pressure ptrs
 
   int icompute = modify->find_compute(id_temp);
   if (icompute < 0)
     error->all(FLERR,"Temperature ID for fix nvt/npt does not exist");
   temperature = modify->compute[icompute];
 
   if (temperature->tempbias) which = BIAS;
   else which = NOBIAS;
 
   if (pstat_flag) {
     icompute = modify->find_compute(id_press);
     if (icompute < 0)
       error->all(FLERR,"Pressure ID for fix npt/nph does not exist");
     pressure = modify->compute[icompute];
   }
 
   // set timesteps and frequencies
 
   dtv = update->dt;
   dtf = 0.5 * update->dt * force->ftm2v;
   dthalf = 0.5 * update->dt;
   dt4 = 0.25 * update->dt;
   dt8 = 0.125 * update->dt;
   dto = dthalf;
 
   p_freq_max = 0.0;
   if (pstat_flag) {
     p_freq_max = MAX(p_freq[0],p_freq[1]);
     p_freq_max = MAX(p_freq_max,p_freq[2]);
     if (pstyle == TRICLINIC) {
       p_freq_max = MAX(p_freq_max,p_freq[3]);
       p_freq_max = MAX(p_freq_max,p_freq[4]);
       p_freq_max = MAX(p_freq_max,p_freq[5]);
     }
     pdrag_factor = 1.0 - (update->dt * p_freq_max * drag / nc_pchain);
   }
 
   if (tstat_flag)
     tdrag_factor = 1.0 - (update->dt * t_freq * drag / nc_tchain);
 
   // tally the number of dimensions that are barostatted
   // set initial volume and reference cell, if not already done
 
   if (pstat_flag) {
     pdim = p_flag[0] + p_flag[1] + p_flag[2];
     if (vol0 == 0.0) {
       if (dimension == 3) vol0 = domain->xprd * domain->yprd * domain->zprd;
       else vol0 = domain->xprd * domain->yprd;
       h0_inv[0] = domain->h_inv[0];
       h0_inv[1] = domain->h_inv[1];
       h0_inv[2] = domain->h_inv[2];
       h0_inv[3] = domain->h_inv[3];
       h0_inv[4] = domain->h_inv[4];
       h0_inv[5] = domain->h_inv[5];
     }
   }
 
   boltz = force->boltz;
   nktv2p = force->nktv2p;
 
   if (force->kspace) kspace_flag = 1;
   else kspace_flag = 0;
 
   if (strstr(update->integrate_style,"respa")) {
     nlevels_respa = ((Respa *) update->integrate)->nlevels;
     step_respa = ((Respa *) update->integrate)->step;
     dto = 0.5*step_respa[0];
   }
 
   // detect if any rigid fixes exist so rigid bodies move when box is remapped
   // rfix[] = indices to each fix rigid
 
   delete [] rfix;
   nrigid = 0;
   rfix = NULL;
 
   for (int i = 0; i < modify->nfix; i++)
     if (modify->fix[i]->rigid_flag) nrigid++;
   if (nrigid) {
     rfix = new int[nrigid];
     nrigid = 0;
     for (int i = 0; i < modify->nfix; i++)
       if (modify->fix[i]->rigid_flag) rfix[nrigid++] = i;
   }
 }
 
 /* ----------------------------------------------------------------------
    compute T,P before integrator starts
 ------------------------------------------------------------------------- */
 
 void FixNH::setup(int vflag)
 {
   // tdof needed by compute_temp_target()
 
   t_current = temperature->compute_scalar();
   tdof = temperature->dof;
 
   // t_target is needed by NVT and NPT in compute_scalar()
   // If no thermostat or using fix nphug,
   // t_target must be defined by other means.
 
   if (tstat_flag && strstr(style,"nphug") == NULL) {
     compute_temp_target();
   } else if (pstat_flag) {
 
     // t0 = reference temperature for masses
     // cannot be done in init() b/c temperature cannot be called there
     // is b/c Modify::init() inits computes after fixes due to dof dependence
     // guesstimate a unit-dependent t0 if actual T = 0.0
     // if it was read in from a restart file, leave it be
 
     if (t0 == 0.0) {
       t0 = temperature->compute_scalar();
       if (t0 == 0.0) {
         if (strcmp(update->unit_style,"lj") == 0) t0 = 1.0;
         else t0 = 300.0;
       }
     }
     t_target = t0;
   }
 
   if (pstat_flag) compute_press_target();
 
   if (pstat_flag) {
     if (pstyle == ISO) pressure->compute_scalar();
     else pressure->compute_vector();
     couple();
     pressure->addstep(update->ntimestep+1);
   }
 
   // masses and initial forces on thermostat variables
 
   if (tstat_flag) {
     eta_mass[0] = tdof * boltz * t_target / (t_freq*t_freq);
     for (int ich = 1; ich < mtchain; ich++)
       eta_mass[ich] = boltz * t_target / (t_freq*t_freq);
     for (int ich = 1; ich < mtchain; ich++) {
       eta_dotdot[ich] = (eta_mass[ich-1]*eta_dot[ich-1]*eta_dot[ich-1] -
                          boltz * t_target) / eta_mass[ich];
     }
   }
 
   // masses and initial forces on barostat variables
 
   if (pstat_flag) {
     double kt = boltz * t_target;
     double nkt = atom->natoms * kt;
 
     for (int i = 0; i < 3; i++)
       if (p_flag[i])
         omega_mass[i] = nkt/(p_freq[i]*p_freq[i]);
 
     if (pstyle == TRICLINIC) {
       for (int i = 3; i < 6; i++)
         if (p_flag[i]) omega_mass[i] = nkt/(p_freq[i]*p_freq[i]);
     }
 
   // masses and initial forces on barostat thermostat variables
 
     if (mpchain) {
       etap_mass[0] = boltz * t_target / (p_freq_max*p_freq_max);
       for (int ich = 1; ich < mpchain; ich++)
         etap_mass[ich] = boltz * t_target / (p_freq_max*p_freq_max);
       for (int ich = 1; ich < mpchain; ich++)
         etap_dotdot[ich] =
           (etap_mass[ich-1]*etap_dot[ich-1]*etap_dot[ich-1] -
            boltz * t_target) / etap_mass[ich];
     }
-
   }
 }
 
 /* ----------------------------------------------------------------------
    1st half of Verlet update
 ------------------------------------------------------------------------- */
 
 void FixNH::initial_integrate(int vflag)
 {
   // update eta_press_dot
 
   if (pstat_flag && mpchain) nhc_press_integrate();
 
   // update eta_dot
 
   if (tstat_flag) {
     compute_temp_target();
     nhc_temp_integrate();
   }
 
   // need to recompute pressure to account for change in KE
   // t_current is up-to-date, but compute_temperature is not
   // compute appropriately coupled elements of mvv_current
 
   if (pstat_flag) {
     if (pstyle == ISO) {
       temperature->compute_scalar();
       pressure->compute_scalar();
     } else {
       temperature->compute_vector();
       pressure->compute_vector();
     }
     couple();
     pressure->addstep(update->ntimestep+1);
   }
 
   if (pstat_flag) {
     compute_press_target();
     nh_omega_dot();
     nh_v_press();
   }
 
   nve_v();
 
   // remap simulation box by 1/2 step
 
   if (pstat_flag) remap();
 
   nve_x();
 
   // remap simulation box by 1/2 step
   // redo KSpace coeffs since volume has changed
 
   if (pstat_flag) {
     remap();
     if (kspace_flag) force->kspace->setup();
   }
 }
 
 /* ----------------------------------------------------------------------
    2nd half of Verlet update
 ------------------------------------------------------------------------- */
 
 void FixNH::final_integrate()
 {
   nve_v();
 
   // re-compute temp before nh_v_press()
   // only needed for temperature computes with BIAS on reneighboring steps:
   //   b/c some biases store per-atom values (e.g. temp/profile)
   //   per-atom values are invalid if reneigh/comm occurred
   //     since temp->compute() in initial_integrate()
 
   if (which == BIAS && neighbor->ago == 0)
     t_current = temperature->compute_scalar();
 
   if (pstat_flag) nh_v_press();
 
   // compute new T,P after velocities rescaled by nh_v_press()
   // compute appropriately coupled elements of mvv_current
 
   t_current = temperature->compute_scalar();
   tdof = temperature->dof;
 
   if (pstat_flag) {
     if (pstyle == ISO) pressure->compute_scalar();
     else pressure->compute_vector();
     couple();
     pressure->addstep(update->ntimestep+1);
   }
 
   if (pstat_flag) nh_omega_dot();
 
   // update eta_dot
   // update eta_press_dot
 
   if (tstat_flag) nhc_temp_integrate();
   if (pstat_flag && mpchain) nhc_press_integrate();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixNH::initial_integrate_respa(int vflag, int ilevel, int iloop)
 {
   // set timesteps by level
 
   dtv = step_respa[ilevel];
   dtf = 0.5 * step_respa[ilevel] * force->ftm2v;
   dthalf = 0.5 * step_respa[ilevel];
 
   // outermost level - update eta_dot and omega_dot, apply to v
   // all other levels - NVE update of v
   // x,v updates only performed for atoms in group
 
   if (ilevel == nlevels_respa-1) {
 
     // update eta_press_dot
 
     if (pstat_flag && mpchain) nhc_press_integrate();
 
     // update eta_dot
 
     if (tstat_flag) {
       compute_temp_target();
       nhc_temp_integrate();
     }
 
     // recompute pressure to account for change in KE
     // t_current is up-to-date, but compute_temperature is not
     // compute appropriately coupled elements of mvv_current
 
     if (pstat_flag) {
       if (pstyle == ISO) {
         temperature->compute_scalar();
         pressure->compute_scalar();
       } else {
         temperature->compute_vector();
         pressure->compute_vector();
       }
       couple();
       pressure->addstep(update->ntimestep+1);
     }
 
     if (pstat_flag) {
       compute_press_target();
       nh_omega_dot();
       nh_v_press();
     }
 
     nve_v();
 
   } else nve_v();
 
   // innermost level - also update x only for atoms in group
   // if barostat, perform 1/2 step remap before and after
 
   if (ilevel == 0) {
     if (pstat_flag) remap();
     nve_x();
     if (pstat_flag) remap();
   }
 
   // if barostat, redo KSpace coeffs at outermost level,
   // since volume has changed
 
   if (ilevel == nlevels_respa-1 && kspace_flag && pstat_flag)
     force->kspace->setup();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixNH::final_integrate_respa(int ilevel, int iloop)
 {
   // set timesteps by level
 
   dtf = 0.5 * step_respa[ilevel] * force->ftm2v;
   dthalf = 0.5 * step_respa[ilevel];
 
   // outermost level - update eta_dot and omega_dot, apply via final_integrate
   // all other levels - NVE update of v
 
   if (ilevel == nlevels_respa-1) final_integrate();
   else nve_v();
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixNH::couple()
 {
   double *tensor = pressure->vector;
 
   if (pstyle == ISO)
     p_current[0] = p_current[1] = p_current[2] = pressure->scalar;
   else if (pcouple == XYZ) {
     double ave = 1.0/3.0 * (tensor[0] + tensor[1] + tensor[2]);
     p_current[0] = p_current[1] = p_current[2] = ave;
   } else if (pcouple == XY) {
     double ave = 0.5 * (tensor[0] + tensor[1]);
     p_current[0] = p_current[1] = ave;
     p_current[2] = tensor[2];
   } else if (pcouple == YZ) {
     double ave = 0.5 * (tensor[1] + tensor[2]);
     p_current[1] = p_current[2] = ave;
     p_current[0] = tensor[0];
   } else if (pcouple == XZ) {
     double ave = 0.5 * (tensor[0] + tensor[2]);
     p_current[0] = p_current[2] = ave;
     p_current[1] = tensor[1];
   } else {
     p_current[0] = tensor[0];
     p_current[1] = tensor[1];
     p_current[2] = tensor[2];
   }
 
   if (!ISFINITE(p_current[0]) || !ISFINITE(p_current[1]) || !ISFINITE(p_current[2]))
     error->all(FLERR,"Non-numeric pressure - simulation unstable");
 
   // switch order from xy-xz-yz to Voigt
 
   if (pstyle == TRICLINIC) {
     p_current[3] = tensor[5];
     p_current[4] = tensor[4];
     p_current[5] = tensor[3];
 
     if (!ISFINITE(p_current[3]) || !ISFINITE(p_current[4]) || !ISFINITE(p_current[5]))
       error->all(FLERR,"Non-numeric pressure - simulation unstable");
   }
 }
 
 /* ----------------------------------------------------------------------
    change box size
    remap all atoms or dilate group atoms depending on allremap flag
    if rigid bodies exist, scale rigid body centers-of-mass
 ------------------------------------------------------------------------- */
 
 void FixNH::remap()
 {
   int i;
   double oldlo,oldhi;
   double expfac;
 
   double **x = atom->x;
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
   double *h = domain->h;
 
   // omega is not used, except for book-keeping
 
   for (int i = 0; i < 6; i++) omega[i] += dto*omega_dot[i];
 
   // convert pertinent atoms and rigid bodies to lamda coords
 
   if (allremap) domain->x2lamda(nlocal);
   else {
     for (i = 0; i < nlocal; i++)
       if (mask[i] & dilate_group_bit)
         domain->x2lamda(x[i],x[i]);
   }
 
   if (nrigid)
     for (i = 0; i < nrigid; i++)
       modify->fix[rfix[i]]->deform(0);
 
   // reset global and local box to new size/shape
 
   // this operation corresponds to applying the
   // translate and scale operations
   // corresponding to the solution of the following ODE:
   //
   // h_dot = omega_dot * h
   //
   // where h_dot, omega_dot and h are all upper-triangular
   // 3x3 tensors. In Voigt notation, the elements of the
   // RHS product tensor are:
   // h_dot = [0*0, 1*1, 2*2, 1*3+3*2, 0*4+5*3+4*2, 0*5+5*1]
   //
   // Ordering of operations preserves time symmetry.
 
   double dto2 = dto/2.0;
   double dto4 = dto/4.0;
   double dto8 = dto/8.0;
 
   // off-diagonal components, first half
 
   if (pstyle == TRICLINIC) {
 
     if (p_flag[4]) {
       expfac = exp(dto8*omega_dot[0]);
       h[4] *= expfac;
       h[4] += dto4*(omega_dot[5]*h[3]+omega_dot[4]*h[2]);
       h[4] *= expfac;
     }
 
     if (p_flag[3]) {
       expfac = exp(dto4*omega_dot[1]);
       h[3] *= expfac;
       h[3] += dto2*(omega_dot[3]*h[2]);
       h[3] *= expfac;
     }
 
     if (p_flag[5]) {
       expfac = exp(dto4*omega_dot[0]);
       h[5] *= expfac;
       h[5] += dto2*(omega_dot[5]*h[1]);
       h[5] *= expfac;
     }
 
     if (p_flag[4]) {
       expfac = exp(dto8*omega_dot[0]);
       h[4] *= expfac;
       h[4] += dto4*(omega_dot[5]*h[3]+omega_dot[4]*h[2]);
       h[4] *= expfac;
     }
   }
 
   // scale diagonal components
   // scale tilt factors with cell, if set
 
   if (p_flag[0]) {
     oldlo = domain->boxlo[0];
     oldhi = domain->boxhi[0];
     expfac = exp(dto*omega_dot[0]);
     domain->boxlo[0] = (oldlo-fixedpoint[0])*expfac + fixedpoint[0];
     domain->boxhi[0] = (oldhi-fixedpoint[0])*expfac + fixedpoint[0];
   }
 
   if (p_flag[1]) {
     oldlo = domain->boxlo[1];
     oldhi = domain->boxhi[1];
     expfac = exp(dto*omega_dot[1]);
     domain->boxlo[1] = (oldlo-fixedpoint[1])*expfac + fixedpoint[1];
     domain->boxhi[1] = (oldhi-fixedpoint[1])*expfac + fixedpoint[1];
     if (scalexy) h[5] *= expfac;
   }
 
   if (p_flag[2]) {
     oldlo = domain->boxlo[2];
     oldhi = domain->boxhi[2];
     expfac = exp(dto*omega_dot[2]);
     domain->boxlo[2] = (oldlo-fixedpoint[2])*expfac + fixedpoint[2];
     domain->boxhi[2] = (oldhi-fixedpoint[2])*expfac + fixedpoint[2];
     if (scalexz) h[4] *= expfac;
     if (scaleyz) h[3] *= expfac;
   }
 
   // off-diagonal components, second half
 
   if (pstyle == TRICLINIC) {
 
     if (p_flag[4]) {
       expfac = exp(dto8*omega_dot[0]);
       h[4] *= expfac;
       h[4] += dto4*(omega_dot[5]*h[3]+omega_dot[4]*h[2]);
       h[4] *= expfac;
     }
 
     if (p_flag[3]) {
       expfac = exp(dto4*omega_dot[1]);
       h[3] *= expfac;
       h[3] += dto2*(omega_dot[3]*h[2]);
       h[3] *= expfac;
     }
 
     if (p_flag[5]) {
       expfac = exp(dto4*omega_dot[0]);
       h[5] *= expfac;
       h[5] += dto2*(omega_dot[5]*h[1]);
       h[5] *= expfac;
     }
 
     if (p_flag[4]) {
       expfac = exp(dto8*omega_dot[0]);
       h[4] *= expfac;
       h[4] += dto4*(omega_dot[5]*h[3]+omega_dot[4]*h[2]);
       h[4] *= expfac;
     }
 
   }
 
   domain->yz = h[3];
   domain->xz = h[4];
   domain->xy = h[5];
 
   // tilt factor to cell length ratio can not exceed TILTMAX in one step
 
   if (domain->yz < -TILTMAX*domain->yprd ||
       domain->yz > TILTMAX*domain->yprd ||
       domain->xz < -TILTMAX*domain->xprd ||
       domain->xz > TILTMAX*domain->xprd ||
       domain->xy < -TILTMAX*domain->xprd ||
       domain->xy > TILTMAX*domain->xprd)
     error->all(FLERR,"Fix npt/nph has tilted box too far in one step - "
                "periodic cell is too far from equilibrium state");
 
   domain->set_global_box();
   domain->set_local_box();
 
   // convert pertinent atoms and rigid bodies back to box coords
 
   if (allremap) domain->lamda2x(nlocal);
   else {
     for (i = 0; i < nlocal; i++)
       if (mask[i] & dilate_group_bit)
         domain->lamda2x(x[i],x[i]);
   }
 
   if (nrigid)
     for (i = 0; i < nrigid; i++)
       modify->fix[rfix[i]]->deform(1);
 }
 
 /* ----------------------------------------------------------------------
    pack entire state of Fix into one write
 ------------------------------------------------------------------------- */
 
 void FixNH::write_restart(FILE *fp)
 {
   int nsize = size_restart_global();
 
   double *list;
   memory->create(list,nsize,"nh:list");
 
   pack_restart_data(list);
 
   if (comm->me == 0) {
     int size = nsize * sizeof(double);
     fwrite(&size,sizeof(int),1,fp);
     fwrite(list,sizeof(double),nsize,fp);
   }
 
   memory->destroy(list);
 }
 
 /* ----------------------------------------------------------------------
     calculate the number of data to be packed
 ------------------------------------------------------------------------- */
 
 int FixNH::size_restart_global()
 {
   int nsize = 2;
   if (tstat_flag) nsize += 1 + 2*mtchain;
   if (pstat_flag) {
     nsize += 16 + 2*mpchain;
     if (deviatoric_flag) nsize += 6;
   }
 
   return nsize;
 }
 
 /* ----------------------------------------------------------------------
    pack restart data
 ------------------------------------------------------------------------- */
 
 int FixNH::pack_restart_data(double *list)
 {
   int n = 0;
 
   list[n++] = tstat_flag;
   if (tstat_flag) {
     list[n++] = mtchain;
     for (int ich = 0; ich < mtchain; ich++)
       list[n++] = eta[ich];
     for (int ich = 0; ich < mtchain; ich++)
       list[n++] = eta_dot[ich];
   }
 
   list[n++] = pstat_flag;
   if (pstat_flag) {
     list[n++] = omega[0];
     list[n++] = omega[1];
     list[n++] = omega[2];
     list[n++] = omega[3];
     list[n++] = omega[4];
     list[n++] = omega[5];
     list[n++] = omega_dot[0];
     list[n++] = omega_dot[1];
     list[n++] = omega_dot[2];
     list[n++] = omega_dot[3];
     list[n++] = omega_dot[4];
     list[n++] = omega_dot[5];
     list[n++] = vol0;
     list[n++] = t0;
     list[n++] = mpchain;
     if (mpchain) {
       for (int ich = 0; ich < mpchain; ich++)
         list[n++] = etap[ich];
       for (int ich = 0; ich < mpchain; ich++)
         list[n++] = etap_dot[ich];
     }
 
     list[n++] = deviatoric_flag;
     if (deviatoric_flag) {
       list[n++] = h0_inv[0];
       list[n++] = h0_inv[1];
       list[n++] = h0_inv[2];
       list[n++] = h0_inv[3];
       list[n++] = h0_inv[4];
       list[n++] = h0_inv[5];
     }
   }
 
   return n;
 }
 
 /* ----------------------------------------------------------------------
    use state info from restart file to restart the Fix
 ------------------------------------------------------------------------- */
 
 void FixNH::restart(char *buf)
 {
   int n = 0;
   double *list = (double *) buf;
   int flag = static_cast<int> (list[n++]);
   if (flag) {
     int m = static_cast<int> (list[n++]);
     if (tstat_flag && m == mtchain) {
       for (int ich = 0; ich < mtchain; ich++)
         eta[ich] = list[n++];
       for (int ich = 0; ich < mtchain; ich++)
         eta_dot[ich] = list[n++];
     } else n += 2*m;
   }
   flag = static_cast<int> (list[n++]);
   if (flag) {
     omega[0] = list[n++];
     omega[1] = list[n++];
     omega[2] = list[n++];
     omega[3] = list[n++];
     omega[4] = list[n++];
     omega[5] = list[n++];
     omega_dot[0] = list[n++];
     omega_dot[1] = list[n++];
     omega_dot[2] = list[n++];
     omega_dot[3] = list[n++];
     omega_dot[4] = list[n++];
     omega_dot[5] = list[n++];
     vol0 = list[n++];
     t0 = list[n++];
     int m = static_cast<int> (list[n++]);
     if (pstat_flag && m == mpchain) {
       for (int ich = 0; ich < mpchain; ich++)
         etap[ich] = list[n++];
       for (int ich = 0; ich < mpchain; ich++)
         etap_dot[ich] = list[n++];
     } else n+=2*m;
     flag = static_cast<int> (list[n++]);
     if (flag) {
       h0_inv[0] = list[n++];
       h0_inv[1] = list[n++];
       h0_inv[2] = list[n++];
       h0_inv[3] = list[n++];
       h0_inv[4] = list[n++];
       h0_inv[5] = list[n++];
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 int FixNH::modify_param(int narg, char **arg)
 {
   if (strcmp(arg[0],"temp") == 0) {
     if (narg < 2) error->all(FLERR,"Illegal fix_modify command");
     if (tcomputeflag) {
       modify->delete_compute(id_temp);
       tcomputeflag = 0;
     }
     delete [] id_temp;
     int n = strlen(arg[1]) + 1;
     id_temp = new char[n];
     strcpy(id_temp,arg[1]);
 
     int icompute = modify->find_compute(arg[1]);
     if (icompute < 0)
       error->all(FLERR,"Could not find fix_modify temperature ID");
     temperature = modify->compute[icompute];
 
     if (temperature->tempflag == 0)
       error->all(FLERR,
                  "Fix_modify temperature ID does not compute temperature");
     if (temperature->igroup != 0 && comm->me == 0)
       error->warning(FLERR,"Temperature for fix modify is not for group all");
 
     // reset id_temp of pressure to new temperature ID
 
     if (pstat_flag) {
       icompute = modify->find_compute(id_press);
       if (icompute < 0)
         error->all(FLERR,"Pressure ID for fix modify does not exist");
       modify->compute[icompute]->reset_extra_compute_fix(id_temp);
     }
 
     return 2;
 
   } else if (strcmp(arg[0],"press") == 0) {
     if (narg < 2) error->all(FLERR,"Illegal fix_modify command");
     if (!pstat_flag) error->all(FLERR,"Illegal fix_modify command");
     if (pcomputeflag) {
       modify->delete_compute(id_press);
       pcomputeflag = 0;
     }
     delete [] id_press;
     int n = strlen(arg[1]) + 1;
     id_press = new char[n];
     strcpy(id_press,arg[1]);
 
     int icompute = modify->find_compute(arg[1]);
     if (icompute < 0) error->all(FLERR,"Could not find fix_modify pressure ID");
     pressure = modify->compute[icompute];
 
     if (pressure->pressflag == 0)
       error->all(FLERR,"Fix_modify pressure ID does not compute pressure");
     return 2;
   }
 
   return 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 double FixNH::compute_scalar()
 {
   int i;
   double volume;
   double energy;
   double kt = boltz * t_target;
   double lkt_press = kt;
   int ich;
   if (dimension == 3) volume = domain->xprd * domain->yprd * domain->zprd;
   else volume = domain->xprd * domain->yprd;
 
   energy = 0.0;
 
   // thermostat chain energy is equivalent to Eq. (2) in
   // Martyna, Tuckerman, Tobias, Klein, Mol Phys, 87, 1117
   // Sum(0.5*p_eta_k^2/Q_k,k=1,M) + L*k*T*eta_1 + Sum(k*T*eta_k,k=2,M),
   // where L = tdof
   //       M = mtchain
   //       p_eta_k = Q_k*eta_dot[k-1]
   //       Q_1 = L*k*T/t_freq^2
   //       Q_k = k*T/t_freq^2, k > 1
 
   if (tstat_flag) {
     energy += ke_target * eta[0] + 0.5*eta_mass[0]*eta_dot[0]*eta_dot[0];
     for (ich = 1; ich < mtchain; ich++)
       energy += kt * eta[ich] + 0.5*eta_mass[ich]*eta_dot[ich]*eta_dot[ich];
   }
 
   // barostat energy is equivalent to Eq. (8) in
   // Martyna, Tuckerman, Tobias, Klein, Mol Phys, 87, 1117
   // Sum(0.5*p_omega^2/W + P*V),
   // where N = natoms
   //       p_omega = W*omega_dot
   //       W = N*k*T/p_freq^2
   //       sum is over barostatted dimensions
 
   if (pstat_flag) {
     for (i = 0; i < 3; i++)
       if (p_flag[i])
         energy += 0.5*omega_dot[i]*omega_dot[i]*omega_mass[i] +
           p_hydro*(volume-vol0) / (pdim*nktv2p);
 
     if (pstyle == TRICLINIC) {
       for (i = 3; i < 6; i++)
         if (p_flag[i])
           energy += 0.5*omega_dot[i]*omega_dot[i]*omega_mass[i];
     }
 
     // extra contributions from thermostat chain for barostat
 
     if (mpchain) {
       energy += lkt_press * etap[0] + 0.5*etap_mass[0]*etap_dot[0]*etap_dot[0];
       for (ich = 1; ich < mpchain; ich++)
         energy += kt * etap[ich] +
           0.5*etap_mass[ich]*etap_dot[ich]*etap_dot[ich];
     }
 
     // extra contribution from strain energy
 
     if (deviatoric_flag) energy += compute_strain_energy();
   }
 
   return energy;
 }
 
 /* ----------------------------------------------------------------------
    return a single element of the following vectors, in this order:
       eta[tchain], eta_dot[tchain], omega[ndof], omega_dot[ndof]
       etap[pchain], etap_dot[pchain], PE_eta[tchain], KE_eta_dot[tchain]
       PE_omega[ndof], KE_omega_dot[ndof], PE_etap[pchain], KE_etap_dot[pchain]
       PE_strain[1]
   if no thermostat exists, related quantities are omitted from the list
   if no barostat exists, related quantities are omitted from the list
   ndof = 1,3,6 degrees of freedom for pstyle = ISO,ANISO,TRI
 ------------------------------------------------------------------------- */
 
 double FixNH::compute_vector(int n)
 {
   int ilen;
 
   if (tstat_flag) {
     ilen = mtchain;
     if (n < ilen) return eta[n];
     n -= ilen;
     ilen = mtchain;
     if (n < ilen) return eta_dot[n];
     n -= ilen;
   }
 
   if (pstat_flag) {
     if (pstyle == ISO) {
       ilen = 1;
       if (n < ilen) return omega[n];
       n -= ilen;
     } else if (pstyle == ANISO) {
       ilen = 3;
       if (n < ilen) return omega[n];
       n -= ilen;
     } else {
       ilen = 6;
       if (n < ilen) return omega[n];
       n -= ilen;
     }
 
     if (pstyle == ISO) {
       ilen = 1;
       if (n < ilen) return omega_dot[n];
       n -= ilen;
     } else if (pstyle == ANISO) {
       ilen = 3;
       if (n < ilen) return omega_dot[n];
       n -= ilen;
     } else {
       ilen = 6;
       if (n < ilen) return omega_dot[n];
       n -= ilen;
     }
 
     if (mpchain) {
       ilen = mpchain;
       if (n < ilen) return etap[n];
       n -= ilen;
       ilen = mpchain;
       if (n < ilen) return etap_dot[n];
       n -= ilen;
     }
   }
 
   double volume;
   double kt = boltz * t_target;
   double lkt_press = kt;
   int ich;
   if (dimension == 3) volume = domain->xprd * domain->yprd * domain->zprd;
   else volume = domain->xprd * domain->yprd;
 
   if (tstat_flag) {
     ilen = mtchain;
     if (n < ilen) {
       ich = n;
       if (ich == 0)
         return ke_target * eta[0];
       else
         return kt * eta[ich];
     }
     n -= ilen;
     ilen = mtchain;
     if (n < ilen) {
       ich = n;
       if (ich == 0)
         return 0.5*eta_mass[0]*eta_dot[0]*eta_dot[0];
       else
         return 0.5*eta_mass[ich]*eta_dot[ich]*eta_dot[ich];
     }
     n -= ilen;
   }
 
   if (pstat_flag) {
     if (pstyle == ISO) {
       ilen = 1;
       if (n < ilen)
         return p_hydro*(volume-vol0) / nktv2p;
       n -= ilen;
     } else if (pstyle == ANISO) {
       ilen = 3;
       if (n < ilen) {
         if (p_flag[n])
           return p_hydro*(volume-vol0) / (pdim*nktv2p);
         else
           return 0.0;
       }
       n -= ilen;
     } else {
       ilen = 6;
       if (n < ilen) {
         if (n > 2) return 0.0;
         else if (p_flag[n])
           return p_hydro*(volume-vol0) / (pdim*nktv2p);
         else
           return 0.0;
       }
       n -= ilen;
     }
 
     if (pstyle == ISO) {
       ilen = 1;
       if (n < ilen)
         return pdim*0.5*omega_dot[n]*omega_dot[n]*omega_mass[n];
       n -= ilen;
     } else if (pstyle == ANISO) {
       ilen = 3;
       if (n < ilen) {
         if (p_flag[n])
           return 0.5*omega_dot[n]*omega_dot[n]*omega_mass[n];
         else return 0.0;
       }
       n -= ilen;
     } else {
       ilen = 6;
       if (n < ilen) {
         if (p_flag[n])
           return 0.5*omega_dot[n]*omega_dot[n]*omega_mass[n];
         else return 0.0;
       }
       n -= ilen;
     }
 
     if (mpchain) {
       ilen = mpchain;
       if (n < ilen) {
         ich = n;
         if (ich == 0) return lkt_press * etap[0];
         else return kt * etap[ich];
       }
       n -= ilen;
       ilen = mpchain;
       if (n < ilen) {
         ich = n;
         if (ich == 0)
           return 0.5*etap_mass[0]*etap_dot[0]*etap_dot[0];
         else
           return 0.5*etap_mass[ich]*etap_dot[ich]*etap_dot[ich];
       }
       n -= ilen;
     }
 
     if (deviatoric_flag) {
       ilen = 1;
       if (n < ilen)
         return compute_strain_energy();
       n -= ilen;
     }
   }
 
   return 0.0;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixNH::reset_target(double t_new)
 {
   t_target = t_start = t_stop = t_new;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixNH::reset_dt()
 {
   dtv = update->dt;
   dtf = 0.5 * update->dt * force->ftm2v;
   dthalf = 0.5 * update->dt;
   dt4 = 0.25 * update->dt;
   dt8 = 0.125 * update->dt;
   dto = dthalf;
 
   // If using respa, then remap is performed in innermost level
 
   if (strstr(update->integrate_style,"respa"))
     dto = 0.5*step_respa[0];
 
   if (pstat_flag)
     pdrag_factor = 1.0 - (update->dt * p_freq_max * drag / nc_pchain);
 
   if (tstat_flag)
     tdrag_factor = 1.0 - (update->dt * t_freq * drag / nc_tchain);
 }
 
 /* ----------------------------------------------------------------------
    extract thermostat properties
 ------------------------------------------------------------------------- */
 
 void *FixNH::extract(const char *str, int &dim)
 {
   dim=0;
   if (tstat_flag && strcmp(str,"t_target") == 0) {
     return &t_target;
   } else if (tstat_flag && strcmp(str,"t_start") == 0) {
     return &t_start;
   } else if (tstat_flag && strcmp(str,"t_stop") == 0) {
     return &t_stop;
   } else if (tstat_flag && strcmp(str,"mtchain") == 0) {
     return &mtchain;
   } else if (pstat_flag && strcmp(str,"mpchain") == 0) {
     return &mtchain;
   }
   dim=1;
   if (tstat_flag && strcmp(str,"eta") == 0) {
     return &eta;
   } else if (pstat_flag && strcmp(str,"etap") == 0) {
     return &eta;
   } else if (pstat_flag && strcmp(str,"p_flag") == 0) {
     return &p_flag;
   } else if (pstat_flag && strcmp(str,"p_start") == 0) {
     return &p_start;
   } else if (pstat_flag && strcmp(str,"p_stop") == 0) {
     return &p_stop;
   } else if (pstat_flag && strcmp(str,"p_target") == 0) {
     return &p_target;
   }
   return NULL;
 }
 
 /* ----------------------------------------------------------------------
    perform half-step update of chain thermostat variables
 ------------------------------------------------------------------------- */
 
 void FixNH::nhc_temp_integrate()
 {
   int ich;
   double expfac;
   double kecurrent = tdof * boltz * t_current;
 
   // Update masses, to preserve initial freq, if flag set
 
   if (eta_mass_flag) {
     eta_mass[0] = tdof * boltz * t_target / (t_freq*t_freq);
     for (int ich = 1; ich < mtchain; ich++)
       eta_mass[ich] = boltz * t_target / (t_freq*t_freq);
   }
 
   if (eta_mass[0] > 0.0)
     eta_dotdot[0] = (kecurrent - ke_target)/eta_mass[0];
   else eta_dotdot[0] = 0.0;
 
   double ncfac = 1.0/nc_tchain;
   for (int iloop = 0; iloop < nc_tchain; iloop++) {
 
     for (ich = mtchain-1; ich > 0; ich--) {
       expfac = exp(-ncfac*dt8*eta_dot[ich+1]);
       eta_dot[ich] *= expfac;
       eta_dot[ich] += eta_dotdot[ich] * ncfac*dt4;
       eta_dot[ich] *= tdrag_factor;
       eta_dot[ich] *= expfac;
     }
 
     expfac = exp(-ncfac*dt8*eta_dot[1]);
     eta_dot[0] *= expfac;
     eta_dot[0] += eta_dotdot[0] * ncfac*dt4;
     eta_dot[0] *= tdrag_factor;
     eta_dot[0] *= expfac;
 
     factor_eta = exp(-ncfac*dthalf*eta_dot[0]);
     nh_v_temp();
 
     // rescale temperature due to velocity scaling
     // should not be necessary to explicitly recompute the temperature
 
     t_current *= factor_eta*factor_eta;
     kecurrent = tdof * boltz * t_current;
 
     if (eta_mass[0] > 0.0)
       eta_dotdot[0] = (kecurrent - ke_target)/eta_mass[0];
     else eta_dotdot[0] = 0.0;
 
     for (ich = 0; ich < mtchain; ich++)
       eta[ich] += ncfac*dthalf*eta_dot[ich];
 
     eta_dot[0] *= expfac;
     eta_dot[0] += eta_dotdot[0] * ncfac*dt4;
     eta_dot[0] *= expfac;
 
     for (ich = 1; ich < mtchain; ich++) {
       expfac = exp(-ncfac*dt8*eta_dot[ich+1]);
       eta_dot[ich] *= expfac;
       eta_dotdot[ich] = (eta_mass[ich-1]*eta_dot[ich-1]*eta_dot[ich-1]
                          - boltz * t_target)/eta_mass[ich];
       eta_dot[ich] += eta_dotdot[ich] * ncfac*dt4;
       eta_dot[ich] *= expfac;
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    perform half-step update of chain thermostat variables for barostat
    scale barostat velocities
 ------------------------------------------------------------------------- */
 
 void FixNH::nhc_press_integrate()
 {
   int ich,i;
   double expfac,factor_etap,kecurrent;
   double kt = boltz * t_target;
   double lkt_press = kt;
 
   // Update masses, to preserve initial freq, if flag set
 
   if (omega_mass_flag) {
     double nkt = atom->natoms * kt;
     for (int i = 0; i < 3; i++)
       if (p_flag[i])
         omega_mass[i] = nkt/(p_freq[i]*p_freq[i]);
 
     if (pstyle == TRICLINIC) {
       for (int i = 3; i < 6; i++)
         if (p_flag[i]) omega_mass[i] = nkt/(p_freq[i]*p_freq[i]);
     }
   }
 
   if (etap_mass_flag) {
     if (mpchain) {
       etap_mass[0] = boltz * t_target / (p_freq_max*p_freq_max);
       for (int ich = 1; ich < mpchain; ich++)
         etap_mass[ich] = boltz * t_target / (p_freq_max*p_freq_max);
       for (int ich = 1; ich < mpchain; ich++)
         etap_dotdot[ich] =
           (etap_mass[ich-1]*etap_dot[ich-1]*etap_dot[ich-1] -
            boltz * t_target) / etap_mass[ich];
     }
   }
 
   kecurrent = 0.0;
   for (i = 0; i < 3; i++)
     if (p_flag[i]) kecurrent += omega_mass[i]*omega_dot[i]*omega_dot[i];
 
   if (pstyle == TRICLINIC) {
     for (i = 3; i < 6; i++)
       if (p_flag[i]) kecurrent += omega_mass[i]*omega_dot[i]*omega_dot[i];
   }
 
   etap_dotdot[0] = (kecurrent - lkt_press)/etap_mass[0];
 
   double ncfac = 1.0/nc_pchain;
   for (int iloop = 0; iloop < nc_pchain; iloop++) {
 
     for (ich = mpchain-1; ich > 0; ich--) {
       expfac = exp(-ncfac*dt8*etap_dot[ich+1]);
       etap_dot[ich] *= expfac;
       etap_dot[ich] += etap_dotdot[ich] * ncfac*dt4;
       etap_dot[ich] *= pdrag_factor;
       etap_dot[ich] *= expfac;
     }
 
     expfac = exp(-ncfac*dt8*etap_dot[1]);
     etap_dot[0] *= expfac;
     etap_dot[0] += etap_dotdot[0] * ncfac*dt4;
     etap_dot[0] *= pdrag_factor;
     etap_dot[0] *= expfac;
 
     for (ich = 0; ich < mpchain; ich++)
       etap[ich] += ncfac*dthalf*etap_dot[ich];
 
     factor_etap = exp(-ncfac*dthalf*etap_dot[0]);
     for (i = 0; i < 3; i++)
       if (p_flag[i]) omega_dot[i] *= factor_etap;
 
     if (pstyle == TRICLINIC) {
       for (i = 3; i < 6; i++)
         if (p_flag[i]) omega_dot[i] *= factor_etap;
     }
 
     kecurrent = 0.0;
     for (i = 0; i < 3; i++)
       if (p_flag[i]) kecurrent += omega_mass[i]*omega_dot[i]*omega_dot[i];
 
     if (pstyle == TRICLINIC) {
       for (i = 3; i < 6; i++)
         if (p_flag[i]) kecurrent += omega_mass[i]*omega_dot[i]*omega_dot[i];
     }
 
     etap_dotdot[0] = (kecurrent - lkt_press)/etap_mass[0];
 
     etap_dot[0] *= expfac;
     etap_dot[0] += etap_dotdot[0] * ncfac*dt4;
     etap_dot[0] *= expfac;
 
     for (ich = 1; ich < mpchain; ich++) {
       expfac = exp(-ncfac*dt8*etap_dot[ich+1]);
       etap_dot[ich] *= expfac;
       etap_dotdot[ich] =
         (etap_mass[ich-1]*etap_dot[ich-1]*etap_dot[ich-1] - boltz*t_target) /
         etap_mass[ich];
       etap_dot[ich] += etap_dotdot[ich] * ncfac*dt4;
       etap_dot[ich] *= expfac;
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    perform half-step barostat scaling of velocities
 -----------------------------------------------------------------------*/
 
 void FixNH::nh_v_press()
 {
   double factor[3];
   double **v = atom->v;
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
   if (igroup == atom->firstgroup) nlocal = atom->nfirst;
 
   factor[0] = exp(-dt4*(omega_dot[0]+mtk_term2));
   factor[1] = exp(-dt4*(omega_dot[1]+mtk_term2));
   factor[2] = exp(-dt4*(omega_dot[2]+mtk_term2));
 
   if (which == NOBIAS) {
     for (int i = 0; i < nlocal; i++) {
       if (mask[i] & groupbit) {
         v[i][0] *= factor[0];
         v[i][1] *= factor[1];
         v[i][2] *= factor[2];
         if (pstyle == TRICLINIC) {
           v[i][0] += -dthalf*(v[i][1]*omega_dot[5] + v[i][2]*omega_dot[4]);
           v[i][1] += -dthalf*v[i][2]*omega_dot[3];
         }
         v[i][0] *= factor[0];
         v[i][1] *= factor[1];
         v[i][2] *= factor[2];
       }
     }
   } else if (which == BIAS) {
     for (int i = 0; i < nlocal; i++) {
       if (mask[i] & groupbit) {
         temperature->remove_bias(i,v[i]);
         v[i][0] *= factor[0];
         v[i][1] *= factor[1];
         v[i][2] *= factor[2];
         if (pstyle == TRICLINIC) {
           v[i][0] += -dthalf*(v[i][1]*omega_dot[5] + v[i][2]*omega_dot[4]);
           v[i][1] += -dthalf*v[i][2]*omega_dot[3];
         }
         v[i][0] *= factor[0];
         v[i][1] *= factor[1];
         v[i][2] *= factor[2];
         temperature->restore_bias(i,v[i]);
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    perform half-step update of velocities
 -----------------------------------------------------------------------*/
 
 void FixNH::nve_v()
 {
   double dtfm;
   double **v = atom->v;
   double **f = atom->f;
   double *rmass = atom->rmass;
   double *mass = atom->mass;
   int *type = atom->type;
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
   if (igroup == atom->firstgroup) nlocal = atom->nfirst;
 
   if (rmass) {
     for (int i = 0; i < nlocal; i++) {
       if (mask[i] & groupbit) {
         dtfm = dtf / rmass[i];
         v[i][0] += dtfm*f[i][0];
         v[i][1] += dtfm*f[i][1];
         v[i][2] += dtfm*f[i][2];
       }
     }
   } else {
     for (int i = 0; i < nlocal; i++) {
       if (mask[i] & groupbit) {
         dtfm = dtf / mass[type[i]];
         v[i][0] += dtfm*f[i][0];
         v[i][1] += dtfm*f[i][1];
         v[i][2] += dtfm*f[i][2];
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    perform full-step update of positions
 -----------------------------------------------------------------------*/
 
 void FixNH::nve_x()
 {
   double **x = atom->x;
   double **v = atom->v;
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
   if (igroup == atom->firstgroup) nlocal = atom->nfirst;
 
   // x update by full step only for atoms in group
 
   for (int i = 0; i < nlocal; i++) {
     if (mask[i] & groupbit) {
       x[i][0] += dtv * v[i][0];
       x[i][1] += dtv * v[i][1];
       x[i][2] += dtv * v[i][2];
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    perform half-step thermostat scaling of velocities
 -----------------------------------------------------------------------*/
 
 void FixNH::nh_v_temp()
 {
   double **v = atom->v;
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
   if (igroup == atom->firstgroup) nlocal = atom->nfirst;
 
   if (which == NOBIAS) {
     for (int i = 0; i < nlocal; i++) {
       if (mask[i] & groupbit) {
         v[i][0] *= factor_eta;
         v[i][1] *= factor_eta;
         v[i][2] *= factor_eta;
       }
     }
   } else if (which == BIAS) {
     for (int i = 0; i < nlocal; i++) {
       if (mask[i] & groupbit) {
         temperature->remove_bias(i,v[i]);
         v[i][0] *= factor_eta;
         v[i][1] *= factor_eta;
         v[i][2] *= factor_eta;
         temperature->restore_bias(i,v[i]);
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
    compute sigma tensor
    needed whenever p_target or h0_inv changes
 -----------------------------------------------------------------------*/
 
 void FixNH::compute_sigma()
 {
   // if nreset_h0 > 0, reset vol0 and h0_inv
   // every nreset_h0 timesteps
 
   if (nreset_h0 > 0) {
     int delta = update->ntimestep - update->beginstep;
     if (delta % nreset_h0 == 0) {
       if (dimension == 3) vol0 = domain->xprd * domain->yprd * domain->zprd;
       else vol0 = domain->xprd * domain->yprd;
       h0_inv[0] = domain->h_inv[0];
       h0_inv[1] = domain->h_inv[1];
       h0_inv[2] = domain->h_inv[2];
       h0_inv[3] = domain->h_inv[3];
       h0_inv[4] = domain->h_inv[4];
       h0_inv[5] = domain->h_inv[5];
     }
   }
 
   // generate upper-triangular half of
   // sigma = vol0*h0inv*(p_target-p_hydro)*h0inv^t
   // units of sigma are are PV/L^2 e.g. atm.A
   //
   // [ 0 5 4 ]   [ 0 5 4 ] [ 0 5 4 ] [ 0 - - ]
   // [ 5 1 3 ] = [ - 1 3 ] [ 5 1 3 ] [ 5 1 - ]
   // [ 4 3 2 ]   [ - - 2 ] [ 4 3 2 ] [ 4 3 2 ]
 
   sigma[0] =
     vol0*(h0_inv[0]*((p_target[0]-p_hydro)*h0_inv[0] +
                      p_target[5]*h0_inv[5]+p_target[4]*h0_inv[4]) +
           h0_inv[5]*(p_target[5]*h0_inv[0] +
                      (p_target[1]-p_hydro)*h0_inv[5]+p_target[3]*h0_inv[4]) +
           h0_inv[4]*(p_target[4]*h0_inv[0]+p_target[3]*h0_inv[5] +
                      (p_target[2]-p_hydro)*h0_inv[4]));
   sigma[1] =
     vol0*(h0_inv[1]*((p_target[1]-p_hydro)*h0_inv[1] +
                      p_target[3]*h0_inv[3]) +
           h0_inv[3]*(p_target[3]*h0_inv[1] +
                      (p_target[2]-p_hydro)*h0_inv[3]));
   sigma[2] =
     vol0*(h0_inv[2]*((p_target[2]-p_hydro)*h0_inv[2]));
   sigma[3] =
     vol0*(h0_inv[1]*(p_target[3]*h0_inv[2]) +
           h0_inv[3]*((p_target[2]-p_hydro)*h0_inv[2]));
   sigma[4] =
     vol0*(h0_inv[0]*(p_target[4]*h0_inv[2]) +
           h0_inv[5]*(p_target[3]*h0_inv[2]) +
           h0_inv[4]*((p_target[2]-p_hydro)*h0_inv[2]));
   sigma[5] =
     vol0*(h0_inv[0]*(p_target[5]*h0_inv[1]+p_target[4]*h0_inv[3]) +
           h0_inv[5]*((p_target[1]-p_hydro)*h0_inv[1]+p_target[3]*h0_inv[3]) +
           h0_inv[4]*(p_target[3]*h0_inv[1]+(p_target[2]-p_hydro)*h0_inv[3]));
 }
 
 /* ----------------------------------------------------------------------
    compute strain energy
 -----------------------------------------------------------------------*/
 
 double FixNH::compute_strain_energy()
 {
   // compute strain energy = 0.5*Tr(sigma*h*h^t) in energy units
 
   double* h = domain->h;
   double d0,d1,d2;
 
   d0 =
     sigma[0]*(h[0]*h[0]+h[5]*h[5]+h[4]*h[4]) +
     sigma[5]*(          h[1]*h[5]+h[3]*h[4]) +
     sigma[4]*(                    h[2]*h[4]);
   d1 =
     sigma[5]*(          h[5]*h[1]+h[4]*h[3]) +
     sigma[1]*(          h[1]*h[1]+h[3]*h[3]) +
     sigma[3]*(                    h[2]*h[3]);
   d2 =
     sigma[4]*(                    h[4]*h[2]) +
     sigma[3]*(                    h[3]*h[2]) +
     sigma[2]*(                    h[2]*h[2]);
 
   double energy = 0.5*(d0+d1+d2)/nktv2p;
   return energy;
 }
 
 /* ----------------------------------------------------------------------
    compute deviatoric barostat force = h*sigma*h^t
 -----------------------------------------------------------------------*/
 
 void FixNH::compute_deviatoric()
 {
   // generate upper-triangular part of h*sigma*h^t
   // units of fdev are are PV, e.g. atm*A^3
   // [ 0 5 4 ]   [ 0 5 4 ] [ 0 5 4 ] [ 0 - - ]
   // [ 5 1 3 ] = [ - 1 3 ] [ 5 1 3 ] [ 5 1 - ]
   // [ 4 3 2 ]   [ - - 2 ] [ 4 3 2 ] [ 4 3 2 ]
 
   double* h = domain->h;
 
   fdev[0] =
     h[0]*(sigma[0]*h[0]+sigma[5]*h[5]+sigma[4]*h[4]) +
     h[5]*(sigma[5]*h[0]+sigma[1]*h[5]+sigma[3]*h[4]) +
     h[4]*(sigma[4]*h[0]+sigma[3]*h[5]+sigma[2]*h[4]);
   fdev[1] =
     h[1]*(              sigma[1]*h[1]+sigma[3]*h[3]) +
     h[3]*(              sigma[3]*h[1]+sigma[2]*h[3]);
   fdev[2] =
     h[2]*(                            sigma[2]*h[2]);
   fdev[3] =
     h[1]*(                            sigma[3]*h[2]) +
     h[3]*(                            sigma[2]*h[2]);
   fdev[4] =
     h[0]*(                            sigma[4]*h[2]) +
     h[5]*(                            sigma[3]*h[2]) +
     h[4]*(                            sigma[2]*h[2]);
   fdev[5] =
     h[0]*(              sigma[5]*h[1]+sigma[4]*h[3]) +
     h[5]*(              sigma[1]*h[1]+sigma[3]*h[3]) +
     h[4]*(              sigma[3]*h[1]+sigma[2]*h[3]);
 }
 
 /* ----------------------------------------------------------------------
    compute target temperature and kinetic energy
 -----------------------------------------------------------------------*/
 
 void FixNH::compute_temp_target()
 {
   double delta = update->ntimestep - update->beginstep;
   if (delta != 0.0) delta /= update->endstep - update->beginstep;
 
   t_target = t_start + delta * (t_stop-t_start);
   ke_target = tdof * boltz * t_target;
 }
 
 /* ----------------------------------------------------------------------
    compute hydrostatic target pressure
 -----------------------------------------------------------------------*/
 
 void FixNH::compute_press_target()
 {
   double delta = update->ntimestep - update->beginstep;
   if (delta != 0.0) delta /= update->endstep - update->beginstep;
 
   p_hydro = 0.0;
   for (int i = 0; i < 3; i++)
     if (p_flag[i]) {
       p_target[i] = p_start[i] + delta * (p_stop[i]-p_start[i]);
       p_hydro += p_target[i];
     }
   if (pdim > 0) p_hydro /= pdim;
 
   if (pstyle == TRICLINIC)
     for (int i = 3; i < 6; i++)
       p_target[i] = p_start[i] + delta * (p_stop[i]-p_start[i]);
 
   // if deviatoric, recompute sigma each time p_target changes
 
   if (deviatoric_flag) compute_sigma();
 }
 
 /* ----------------------------------------------------------------------
    update omega_dot, omega
 -----------------------------------------------------------------------*/
 
 void FixNH::nh_omega_dot()
 {
   double f_omega,volume;
 
   if (dimension == 3) volume = domain->xprd*domain->yprd*domain->zprd;
   else volume = domain->xprd*domain->yprd;
 
   if (deviatoric_flag) compute_deviatoric();
 
   mtk_term1 = 0.0;
   if (mtk_flag) {
     if (pstyle == ISO) {
       mtk_term1 = tdof * boltz * t_current;
       mtk_term1 /= pdim * atom->natoms;
     } else {
       double *mvv_current = temperature->vector;
       for (int i = 0; i < 3; i++)
         if (p_flag[i])
           mtk_term1 += mvv_current[i];
       mtk_term1 /= pdim * atom->natoms;
     }
   }
 
   for (int i = 0; i < 3; i++)
     if (p_flag[i]) {
       f_omega = (p_current[i]-p_hydro)*volume /
         (omega_mass[i] * nktv2p) + mtk_term1 / omega_mass[i];
       if (deviatoric_flag) f_omega -= fdev[i]/(omega_mass[i] * nktv2p);
       omega_dot[i] += f_omega*dthalf;
       omega_dot[i] *= pdrag_factor;
     }
 
   mtk_term2 = 0.0;
   if (mtk_flag) {
     for (int i = 0; i < 3; i++)
       if (p_flag[i])
         mtk_term2 += omega_dot[i];
     if (pdim > 0) mtk_term2 /= pdim * atom->natoms;
   }
 
   if (pstyle == TRICLINIC) {
     for (int i = 3; i < 6; i++) {
       if (p_flag[i]) {
         f_omega = p_current[i]*volume/(omega_mass[i] * nktv2p);
         if (deviatoric_flag)
           f_omega -= fdev[i]/(omega_mass[i] * nktv2p);
         omega_dot[i] += f_omega*dthalf;
         omega_dot[i] *= pdrag_factor;
       }
     }
   }
 }
 
 /* ----------------------------------------------------------------------
   if any tilt ratios exceed limits, set flip = 1 and compute new tilt values
   do not flip in x or y if non-periodic (can tilt but not flip)
     this is b/c the box length would be changed (dramatically) by flip
   if yz tilt exceeded, adjust C vector by one B vector
   if xz tilt exceeded, adjust C vector by one A vector
   if xy tilt exceeded, adjust B vector by one A vector
   check yz first since it may change xz, then xz check comes after
   if any flip occurs, create new box in domain
   image_flip() adjusts image flags due to box shape change induced by flip
   remap() puts atoms outside the new box back into the new box
   perform irregular on atoms in lamda coords to migrate atoms to new procs
   important that image_flip comes before remap, since remap may change
     image flags to new values, making eqs in doc of Domain:image_flip incorrect
 ------------------------------------------------------------------------- */
 
 void FixNH::pre_exchange()
 {
   double xprd = domain->xprd;
   double yprd = domain->yprd;
 
   // flip is only triggered when tilt exceeds 0.5 by DELTAFLIP
   // this avoids immediate re-flipping due to tilt oscillations
 
   double xtiltmax = (0.5+DELTAFLIP)*xprd;
   double ytiltmax = (0.5+DELTAFLIP)*yprd;
 
   int flipxy,flipxz,flipyz;
   flipxy = flipxz = flipyz = 0;
 
   if (domain->yperiodic) {
     if (domain->yz < -ytiltmax) {
       domain->yz += yprd;
       domain->xz += domain->xy;
       flipyz = 1;
     } else if (domain->yz >= ytiltmax) {
       domain->yz -= yprd;
       domain->xz -= domain->xy;
       flipyz = -1;
     }
   }
 
   if (domain->xperiodic) {
     if (domain->xz < -xtiltmax) {
       domain->xz += xprd;
       flipxz = 1;
     } else if (domain->xz >= xtiltmax) {
       domain->xz -= xprd;
       flipxz = -1;
     }
     if (domain->xy < -xtiltmax) {
       domain->xy += xprd;
       flipxy = 1;
     } else if (domain->xy >= xtiltmax) {
       domain->xy -= xprd;
       flipxy = -1;
     }
   }
 
   int flip = 0;
   if (flipxy || flipxz || flipyz) flip = 1;
 
   if (flip) {
     domain->set_global_box();
     domain->set_local_box();
 
     domain->image_flip(flipxy,flipxz,flipyz);
 
     double **x = atom->x;
     imageint *image = atom->image;
     int nlocal = atom->nlocal;
     for (int i = 0; i < nlocal; i++) domain->remap(x[i],image[i]);
 
     domain->x2lamda(atom->nlocal);
     irregular->migrate_atoms();
     domain->lamda2x(atom->nlocal);
   }
 }
 
 /* ----------------------------------------------------------------------
    memory usage of Irregular
 ------------------------------------------------------------------------- */
 
 double FixNH::memory_usage()
 {
   double bytes = 0.0;
   if (irregular) bytes += irregular->memory_usage();
   return bytes;
 }
diff --git a/src/library.cpp b/src/library.cpp
index da491f715..ca3276ee8 100644
--- a/src/library.cpp
+++ b/src/library.cpp
@@ -1,1057 +1,1058 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 // C or Fortran style library interface to LAMMPS
 // customize by adding new LAMMPS-specific functions
 
 #include <mpi.h>
 #include <string.h>
 #include <stdlib.h>
 #include "library.h"
 #include "lmptype.h"
 #include "lammps.h"
 #include "universe.h"
 #include "input.h"
 #include "atom_vec.h"
 #include "atom.h"
 #include "domain.h"
 #include "update.h"
 #include "group.h"
 #include "input.h"
 #include "variable.h"
 #include "modify.h"
 #include "output.h"
 #include "thermo.h"
 #include "compute.h"
 #include "fix.h"
 #include "comm.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
 // ----------------------------------------------------------------------
 // utility macros
 // ----------------------------------------------------------------------
 
 /* ----------------------------------------------------------------------
    macros for optional code path which captures all exceptions
    and stores the last error message. These assume there is a variable lmp
    which is a pointer to the current LAMMPS instance.
 
    Usage:
 
    BEGIN_CAPTURE
    {
      // code paths which might throw exception
      ...
    }
    END_CAPTURE
 ------------------------------------------------------------------------- */
 
 #ifdef LAMMPS_EXCEPTIONS
 #define BEGIN_CAPTURE \
   Error * error = lmp->error; \
   try
 
 #define END_CAPTURE \
   catch(LAMMPSAbortException & ae) { \
     int nprocs = 0; \
     MPI_Comm_size(ae.universe, &nprocs ); \
     \
     if (nprocs > 1) { \
       error->set_last_error(ae.message.c_str(), ERROR_ABORT); \
     } else { \
       error->set_last_error(ae.message.c_str(), ERROR_NORMAL); \
     } \
   } catch(LAMMPSException & e) { \
     error->set_last_error(e.message.c_str(), ERROR_NORMAL); \
   }
 #else
 #define BEGIN_CAPTURE
 #define END_CAPTURE
 #endif
 
 // ----------------------------------------------------------------------
 // helper functions, not in library API
 // ----------------------------------------------------------------------
 
 /* ----------------------------------------------------------------------
    concatenate one or more LAMMPS input lines starting at ptr
    removes NULL terminator when last printable char of line = '&'
      by replacing both NULL and '&' with space character
    repeat as many times as needed
    on return, ptr now points to longer line
 ------------------------------------------------------------------------- */
 
 void concatenate_lines(char *ptr)
 {
   int nend = strlen(ptr);
   int n = nend-1;
   while (n && isspace(ptr[n])) n--;
   while (ptr[n] == '&') {
     ptr[nend] = ' ';
     ptr[n] = ' ';
     strtok(ptr,"\n");
     nend = strlen(ptr);
     n = nend-1;
     while (n && isspace(ptr[n])) n--;
   }
 }
 
 // ----------------------------------------------------------------------
 // library API functions to create/destroy an instance of LAMMPS
 //   and communicate commands to it
 // ----------------------------------------------------------------------
 
 /* ----------------------------------------------------------------------
    create an instance of LAMMPS and return pointer to it
    pass in command-line args and MPI communicator to run on
 ------------------------------------------------------------------------- */
 
 void lammps_open(int argc, char **argv, MPI_Comm communicator, void **ptr)
 {
 #ifdef LAMMPS_EXCEPTIONS
   try
   {
     LAMMPS *lmp = new LAMMPS(argc,argv,communicator);
     *ptr = (void *) lmp;
   }
   catch(LAMMPSException & e) {
     fprintf(stderr, "LAMMPS Exception: %s", e.message.c_str());
     *ptr = (void *) NULL;
   }
 #else
   LAMMPS *lmp = new LAMMPS(argc,argv,communicator);
   *ptr = (void *) lmp;
 #endif
 }
 
 /* ----------------------------------------------------------------------
    create an instance of LAMMPS and return pointer to it
    caller doesn't know MPI communicator, so use MPI_COMM_WORLD
    initialize MPI if needed
 ------------------------------------------------------------------------- */
 
 void lammps_open_no_mpi(int argc, char **argv, void **ptr)
 {
   int flag;
   MPI_Initialized(&flag);
 
   if (!flag) {
     int argc = 0;
     char **argv = NULL;
     MPI_Init(&argc,&argv);
   }
 
   MPI_Comm communicator = MPI_COMM_WORLD;
 
 #ifdef LAMMPS_EXCEPTIONS
   try
   {
     LAMMPS *lmp = new LAMMPS(argc,argv,communicator);
     *ptr = (void *) lmp;
   }
   catch(LAMMPSException & e) {
     fprintf(stderr, "LAMMPS Exception: %s", e.message.c_str());
     *ptr = (void*) NULL;
   }
 #else
   LAMMPS *lmp = new LAMMPS(argc,argv,communicator);
   *ptr = (void *) lmp;
 #endif
 }
 
 /* ----------------------------------------------------------------------
    destruct an instance of LAMMPS
 ------------------------------------------------------------------------- */
 
 void lammps_close(void *ptr)
 {
   LAMMPS *lmp = (LAMMPS *) ptr;
   delete lmp;
 }
 
 /* ----------------------------------------------------------------------
    get the numerical representation of the current LAMMPS version
 ------------------------------------------------------------------------- */
 
 int lammps_version(void *ptr)
 {
   LAMMPS *lmp = (LAMMPS *) ptr;
   return atoi(lmp->universe->num_ver);
 }
 
 /* ----------------------------------------------------------------------
    process an input script in filename str
 ------------------------------------------------------------------------- */
 
 void lammps_file(void *ptr, char *str)
 {
   LAMMPS *lmp = (LAMMPS *) ptr;
 
   BEGIN_CAPTURE
   {
     lmp->input->file(str);
   }
   END_CAPTURE
 }
 
 /* ----------------------------------------------------------------------
    process a single input command in str
    does not matter if str ends in newline
    return command name to caller
 ------------------------------------------------------------------------- */
 
 char *lammps_command(void *ptr, char *str)
 {
   LAMMPS *lmp = (LAMMPS *) ptr;
   char *result = NULL;
 
   BEGIN_CAPTURE
   {
     result = lmp->input->one(str);
   }
   END_CAPTURE
 
   return result;
 }
 
 /* ----------------------------------------------------------------------
    process multiple input commands in cmds = list of strings
    does not matter if each string ends in newline
    create long contatentated string for processing by commands_string()
    insert newlines in concatenated string as needed
 ------------------------------------------------------------------------- */
 
 void lammps_commands_list(void *ptr, int ncmd, char **cmds)
 {
   LAMMPS *lmp = (LAMMPS *) ptr;
 
   int n = ncmd+1;
   for (int i = 0; i < ncmd; i++) n += strlen(cmds[i]);
 
   char *str = (char *) lmp->memory->smalloc(n,"lib/commands/list:str");
   str[0] = '\0';
   n = 0;
 
   for (int i = 0; i < ncmd; i++) {
     strcpy(&str[n],cmds[i]);
     n += strlen(cmds[i]);
     if (str[n-1] != '\n') {
       str[n] = '\n';
       str[n+1] = '\0';
       n++;
     }
   }
 
   lammps_commands_string(ptr,str);
   lmp->memory->sfree(str);
 }
 
 /* ----------------------------------------------------------------------
    process multiple input commands in single long str, separated by newlines
    single command can span multiple lines via continuation characters 
    multi-line commands enabled by triple quotes will not work
 ------------------------------------------------------------------------- */
 
 void lammps_commands_string(void *ptr, char *str)
 {
   LAMMPS *lmp = (LAMMPS *) ptr;
 
   // make copy of str so can strtok() it
 
   int n = strlen(str) + 1;
   char *copy = new char[n];
   strcpy(copy,str);
 
   BEGIN_CAPTURE
   {
     char *ptr = strtok(copy,"\n");
     if (ptr) concatenate_lines(ptr);
     while (ptr) {
       lmp->input->one(ptr);
       ptr = strtok(NULL,"\n");
       if (ptr) concatenate_lines(ptr);
     }
   }
   END_CAPTURE
 
   delete [] copy;
 }
 
 /* ----------------------------------------------------------------------
    clean-up function to free memory allocated by lib and returned to caller
 ------------------------------------------------------------------------- */
 
 void lammps_free(void *ptr)
 {
   free(ptr);
 }
 
 // ----------------------------------------------------------------------
 // library API functions to extract info from LAMMPS or set info in LAMMPS
 // ----------------------------------------------------------------------
 
 /* ----------------------------------------------------------------------
    add LAMMPS-specific library functions
    all must receive LAMMPS pointer as argument
    customize by adding a function here and in library.h header file
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    extract a LAMMPS setting as an integer
    only use for settings that require return of an int
    customize by adding names
 ------------------------------------------------------------------------- */
 
 int lammps_extract_setting(void *ptr, char *name)
 {
   if (strcmp(name,"bigint") == 0) return sizeof(bigint);
   if (strcmp(name,"tagint") == 0) return sizeof(tagint);
   if (strcmp(name,"imageint") == 0) return sizeof(imageint);
 
   return -1;
 }
 
 /* ----------------------------------------------------------------------
    extract a pointer to an internal LAMMPS global entity
    name = desired quantity, e.g. dt or boxyhi or natoms
    returns a void pointer to the entity
      which the caller can cast to the proper data type
    returns a NULL if name not listed below
    this function need only be invoked once
      the returned pointer is a permanent valid reference to the quantity
    customize by adding names
 ------------------------------------------------------------------------- */
 
 void *lammps_extract_global(void *ptr, char *name)
 {
   LAMMPS *lmp = (LAMMPS *) ptr;
 
   if (strcmp(name,"dt") == 0) return (void *) &lmp->update->dt;
   if (strcmp(name,"boxlo") == 0) return (void *) lmp->domain->boxlo;
   if (strcmp(name,"boxhi") == 0) return (void *) lmp->domain->boxhi;
   if (strcmp(name,"boxxlo") == 0) return (void *) &lmp->domain->boxlo[0];
   if (strcmp(name,"boxxhi") == 0) return (void *) &lmp->domain->boxhi[0];
   if (strcmp(name,"boxylo") == 0) return (void *) &lmp->domain->boxlo[1];
   if (strcmp(name,"boxyhi") == 0) return (void *) &lmp->domain->boxhi[1];
   if (strcmp(name,"boxzlo") == 0) return (void *) &lmp->domain->boxlo[2];
   if (strcmp(name,"boxzhi") == 0) return (void *) &lmp->domain->boxhi[2];
   if (strcmp(name,"periodicity") == 0) return (void *) lmp->domain->periodicity;
 
   if (strcmp(name,"xy") == 0) return (void *) &lmp->domain->xy;
   if (strcmp(name,"xz") == 0) return (void *) &lmp->domain->xz;
   if (strcmp(name,"yz") == 0) return (void *) &lmp->domain->yz;
   if (strcmp(name,"natoms") == 0) return (void *) &lmp->atom->natoms;
   if (strcmp(name,"nbonds") == 0) return (void *) &lmp->atom->nbonds;
   if (strcmp(name,"nangles") == 0) return (void *) &lmp->atom->nangles;
   if (strcmp(name,"ndihedrals") == 0) return (void *) &lmp->atom->ndihedrals;
   if (strcmp(name,"nimpropers") == 0) return (void *) &lmp->atom->nimpropers;
   if (strcmp(name,"nlocal") == 0) return (void *) &lmp->atom->nlocal;
   if (strcmp(name,"nghost") == 0) return (void *) &lmp->atom->nghost;
   if (strcmp(name,"nmax") == 0) return (void *) &lmp->atom->nmax;
   if (strcmp(name,"ntimestep") == 0) return (void *) &lmp->update->ntimestep;
 
   if (strcmp(name,"units") == 0) return (void *) lmp->update->unit_style;
   if (strcmp(name,"triclinic") == 0) return (void *) &lmp->domain->triclinic;
 
   if (strcmp(name,"q_flag") == 0) return (void *) &lmp->atom->q_flag;
 
   // update->atime can be referenced as a pointer
   // thermo "timer" data cannot be, since it is computed on request
   // lammps_get_thermo() can access all thermo keywords by value
 
   if (strcmp(name,"atime") == 0) return (void *) &lmp->update->atime;
   if (strcmp(name,"atimestep") == 0) return (void *) &lmp->update->atimestep;
 
   return NULL;
 }
 
 /* ----------------------------------------------------------------------
    extract simulation box parameters
    see domain.h for definition of these arguments
    domain->init() call needed to set box_change
 ------------------------------------------------------------------------- */
 
 void lammps_extract_box(void *ptr, double *boxlo, double *boxhi,
                         double *xy, double *yz, double *xz,
                         int *periodicity, int *box_change)
 {
   LAMMPS *lmp = (LAMMPS *) ptr;
   Domain *domain = lmp->domain;
   domain->init();
 
   boxlo[0] = domain->boxlo[0];
   boxlo[1] = domain->boxlo[1];
   boxlo[2] = domain->boxlo[2];
   boxhi[0] = domain->boxhi[0];
   boxhi[1] = domain->boxhi[1];
   boxhi[2] = domain->boxhi[2];
 
   *xy = domain->xy;
   *yz = domain->yz;
   *xz = domain->xz;
 
   periodicity[0] = domain->periodicity[0];
   periodicity[1] = domain->periodicity[1];
   periodicity[2] = domain->periodicity[2];
   
   *box_change = domain->box_change;
 }
 
 /* ----------------------------------------------------------------------
    extract a pointer to an internal LAMMPS atom-based entity
    name = desired quantity, e.g. x or mass
    returns a void pointer to the entity
      which the caller can cast to the proper data type
    returns a NULL if Atom::extract() does not recognize the name
    the returned pointer is not a permanent valid reference to the
      per-atom quantity, since LAMMPS may reallocate per-atom data
    customize by adding names to Atom::extract()
 ------------------------------------------------------------------------- */
 
 void *lammps_extract_atom(void *ptr, char *name)
 {
   LAMMPS *lmp = (LAMMPS *) ptr;
   return lmp->atom->extract(name);
 }
 
 /* ----------------------------------------------------------------------
    extract a pointer to an internal LAMMPS compute-based entity
    the compute is invoked if its value(s) is not current
    id = compute ID
    style = 0 for global data, 1 for per-atom data, 2 for local data
    type = 0 for scalar, 1 for vector, 2 for array
    for global data, returns a pointer to the
      compute's internal data structure for the entity
      caller should cast it to (double *) for a scalar or vector
      caller should cast it to (double **) for an array
    for per-atom or local data, returns a pointer to the
      compute's internal data structure for the entity
      caller should cast it to (double *) for a vector
      caller should cast it to (double **) for an array
    returns a void pointer to the compute's internal data structure
      for the entity which the caller can cast to the proper data type
    returns a NULL if id is not recognized or style/type not supported
    the returned pointer is not a permanent valid reference to the
      compute data, this function should be re-invoked
    IMPORTANT: if the compute is not current it will be invoked
      LAMMPS cannot easily check here if it is valid to invoke the compute,
      so caller must insure that it is OK
 ------------------------------------------------------------------------- */
 
 void *lammps_extract_compute(void *ptr, char *id, int style, int type)
 {
   LAMMPS *lmp = (LAMMPS *) ptr;
 
   BEGIN_CAPTURE
   {
     int icompute = lmp->modify->find_compute(id);
     if (icompute < 0) return NULL;
     Compute *compute = lmp->modify->compute[icompute];
 
     if (style == 0) {
       if (type == 0) {
         if (!compute->scalar_flag) return NULL;
         if (compute->invoked_scalar != lmp->update->ntimestep)
           compute->compute_scalar();
         return (void *) &compute->scalar;
       }
       if (type == 1) {
         if (!compute->vector_flag) return NULL;
         if (compute->invoked_vector != lmp->update->ntimestep)
           compute->compute_vector();
         return (void *) compute->vector;
       }
       if (type == 2) {
         if (!compute->array_flag) return NULL;
         if (compute->invoked_array != lmp->update->ntimestep)
           compute->compute_array();
         return (void *) compute->array;
       }
     }
 
     if (style == 1) {
       if (!compute->peratom_flag) return NULL;
       if (type == 1) {
         if (compute->invoked_peratom != lmp->update->ntimestep)
           compute->compute_peratom();
         return (void *) compute->vector_atom;
       }
       if (type == 2) {
         if (compute->invoked_peratom != lmp->update->ntimestep)
           compute->compute_peratom();
         return (void *) compute->array_atom;
       }
     }
 
     if (style == 2) {
       if (!compute->local_flag) return NULL;
       if (type == 1) {
         if (compute->invoked_local != lmp->update->ntimestep)
           compute->compute_local();
         return (void *) compute->vector_local;
       }
       if (type == 2) {
         if (compute->invoked_local != lmp->update->ntimestep)
           compute->compute_local();
         return (void *) compute->array_local;
       }
     }
   }
   END_CAPTURE
 
   return NULL;
 }
 
 /* ----------------------------------------------------------------------
    extract a pointer to an internal LAMMPS fix-based entity
    id = fix ID
    style = 0 for global data, 1 for per-atom data, 2 for local data
    type = 0 for scalar, 1 for vector, 2 for array
    i,j = indices needed only to specify which global vector or array value
    for global data, returns a pointer to a memory location
      which is allocated by this function
      which the caller can cast to a (double *) which points to the value
    for per-atom or local data, returns a pointer to the
      fix's internal data structure for the entity
      caller should cast it to (double *) for a vector
      caller should cast it to (double **) for an array
    returns a NULL if id is not recognized or style/type not supported
    IMPORTANT: for global data,
      this function allocates a double to store the value in,
      so the caller must free this memory to avoid a leak, e.g.
        double *dptr = (double *) lammps_extract_fix();
        double value = *dptr;
        lammps_free(dptr);
    IMPORTANT: LAMMPS cannot easily check here when info extracted from
      the fix is valid, so caller must insure that it is OK
 ------------------------------------------------------------------------- */
 
 void *lammps_extract_fix(void *ptr, char *id, int style, int type,
                          int i, int j)
 {
   LAMMPS *lmp = (LAMMPS *) ptr;
 
   BEGIN_CAPTURE
   {
     int ifix = lmp->modify->find_fix(id);
     if (ifix < 0) return NULL;
     Fix *fix = lmp->modify->fix[ifix];
 
     if (style == 0) {
       double *dptr = (double *) malloc(sizeof(double));
       if (type == 0) {
         if (!fix->scalar_flag) return NULL;
         *dptr = fix->compute_scalar();
         return (void *) dptr;
       }
       if (type == 1) {
         if (!fix->vector_flag) return NULL;
         *dptr = fix->compute_vector(i);
         return (void *) dptr;
       }
       if (type == 2) {
         if (!fix->array_flag) return NULL;
         *dptr = fix->compute_array(i,j);
         return (void *) dptr;
       }
     }
 
     if (style == 1) {
       if (!fix->peratom_flag) return NULL;
       if (type == 1) return (void *) fix->vector_atom;
       if (type == 2) return (void *) fix->array_atom;
     }
 
     if (style == 2) {
       if (!fix->local_flag) return NULL;
       if (type == 1) return (void *) fix->vector_local;
       if (type == 2) return (void *) fix->array_local;
     }
   }
   END_CAPTURE
 
   return NULL;
 }
 
 /* ----------------------------------------------------------------------
    extract a pointer to an internal LAMMPS evaluated variable
    name = variable name, must be equal-style or atom-style variable
    group = group ID for evaluating an atom-style variable, else NULL
    for equal-style variable, returns a pointer to a memory location
      which is allocated by this function
      which the caller can cast to a (double *) which points to the value
    for atom-style variable, returns a pointer to the
      vector of per-atom values on each processor,
      which the caller can cast to a (double *) which points to the values
    returns a NULL if name is not recognized or not equal-style or atom-style
    IMPORTANT: for both equal-style and atom-style variables,
      this function allocates memory to store the variable data in
      so the caller must free this memory to avoid a leak
      e.g. for equal-style variables
        double *dptr = (double *) lammps_extract_variable();
        double value = *dptr;
        lammps_free(dptr);
      e.g. for atom-style variables
        double *vector = (double *) lammps_extract_variable();
        use the vector values
        lammps_free(vector);
    IMPORTANT: LAMMPS cannot easily check here when it is valid to evaluate
      the variable or any fixes or computes or thermodynamic info it references,
      so caller must insure that it is OK
 ------------------------------------------------------------------------- */
 
 void *lammps_extract_variable(void *ptr, char *name, char *group)
 {
   LAMMPS *lmp = (LAMMPS *) ptr;
 
   BEGIN_CAPTURE
   {
     int ivar = lmp->input->variable->find(name);
     if (ivar < 0) return NULL;
 
     if (lmp->input->variable->equalstyle(ivar)) {
       double *dptr = (double *) malloc(sizeof(double));
       *dptr = lmp->input->variable->compute_equal(ivar);
       return (void *) dptr;
     }
 
     if (lmp->input->variable->atomstyle(ivar)) {
       int igroup = lmp->group->find(group);
       if (igroup < 0) return NULL;
       int nlocal = lmp->atom->nlocal;
       double *vector = (double *) malloc(nlocal*sizeof(double));
       lmp->input->variable->compute_atom(ivar,igroup,vector,1,0);
       return (void *) vector;
     }
   }
   END_CAPTURE
 
   return NULL;
 }
 
 
 /* ----------------------------------------------------------------------
    reset simulation box parameters
    see domain.h for definition of these arguments
    assumes domain->set_initial_box() has been invoked previously
 ------------------------------------------------------------------------- */
 
 void lammps_reset_box(void *ptr, double *boxlo, double *boxhi,
                       double xy, double yz, double xz)
 {
   LAMMPS *lmp = (LAMMPS *) ptr;
   Domain *domain = lmp->domain;
 
   domain->boxlo[0] = boxlo[0];
   domain->boxlo[1] = boxlo[1];
   domain->boxlo[2] = boxlo[2];
   domain->boxhi[0] = boxhi[0];
   domain->boxhi[1] = boxhi[1];
   domain->boxhi[2] = boxhi[2];
 
   domain->xy = xy;
   domain->yz = yz;
   domain->xz = xz;
 
   domain->set_global_box();
   lmp->comm->set_proc_grid();
   domain->set_local_box();
 }
 
 /* ----------------------------------------------------------------------
    set the value of a STRING variable to str
    return -1 if variable doesn't exist or not a STRING variable
    return 0 for success
 ------------------------------------------------------------------------- */
 
 int lammps_set_variable(void *ptr, char *name, char *str)
 {
   LAMMPS *lmp = (LAMMPS *) ptr;
   int err = -1;
 
   BEGIN_CAPTURE
   {
     err = lmp->input->variable->set_string(name,str);
   }
   END_CAPTURE
 
   return err;
 }
 
 /* ----------------------------------------------------------------------
    return the current value of a thermo keyword as a double
    unlike lammps_extract_global() this does not give access to the
      storage of the data in question
    instead it triggers the Thermo class to compute the current value
      and returns it
 ------------------------------------------------------------------------- */
 
 double lammps_get_thermo(void *ptr, char *name)
 {
   LAMMPS *lmp = (LAMMPS *) ptr;
   double dval = 0.0;
 
   BEGIN_CAPTURE
   {
     lmp->output->thermo->evaluate_keyword(name,&dval);
   }
   END_CAPTURE
 
   return dval;
 }
 
 /* ----------------------------------------------------------------------
    return the total number of atoms in the system
    useful before call to lammps_get_atoms() so can pre-allocate vector
 ------------------------------------------------------------------------- */
 
 int lammps_get_natoms(void *ptr)
 {
   LAMMPS *lmp = (LAMMPS *) ptr;
 
   if (lmp->atom->natoms > MAXSMALLINT) return 0;
   int natoms = static_cast<int> (lmp->atom->natoms);
   return natoms;
 }
 
 /* ----------------------------------------------------------------------
    gather the named atom-based entity across all processors
    atom IDs must be consecutive from 1 to N
    name = desired quantity, e.g. x or charge
    type = 0 for integer values, 1 for double values
    count = # of per-atom values, e.g. 1 for type or charge, 3 for x or f
    return atom-based values in 1d data, ordered by count, then by atom ID
      e.g. x[0][0],x[0][1],x[0][2],x[1][0],x[1][1],x[1][2],x[2][0],...
      data must be pre-allocated by caller to correct length
 ------------------------------------------------------------------------- */
 
 void lammps_gather_atoms(void *ptr, char *name,
                          int type, int count, void *data)
 {
   LAMMPS *lmp = (LAMMPS *) ptr;
 
   BEGIN_CAPTURE
   {
     // error if tags are not defined or not consecutive
 
     int flag = 0;
     if (lmp->atom->tag_enable == 0 || lmp->atom->tag_consecutive() == 0) 
       flag = 1;
     if (lmp->atom->natoms > MAXSMALLINT) flag = 1;
     if (flag) {
       if (lmp->comm->me == 0)
         lmp->error->warning(FLERR,"Library error in lammps_gather_atoms");
       return;
     }
 
     int natoms = static_cast<int> (lmp->atom->natoms);
 
     int i,j,offset;
     void *vptr = lmp->atom->extract(name);
     if(vptr == NULL) {
         lmp->error->warning(FLERR,"lammps_gather_atoms: unknown property name");
         return;
     }
 
     // copy = Natom length vector of per-atom values
     // use atom ID to insert each atom's values into copy
     // MPI_Allreduce with MPI_SUM to merge into data, ordered by atom ID
 
     if (type == 0) {
       int *vector = NULL;
       int **array = NULL;
       if (count == 1) vector = (int *) vptr;
       else array = (int **) vptr;
 
       int *copy;
       lmp->memory->create(copy,count*natoms,"lib/gather:copy");
       for (i = 0; i < count*natoms; i++) copy[i] = 0;
 
       tagint *tag = lmp->atom->tag;
       int nlocal = lmp->atom->nlocal;
 
       if (count == 1)
         for (i = 0; i < nlocal; i++)
           copy[tag[i]-1] = vector[i];
       else
         for (i = 0; i < nlocal; i++) {
           offset = count*(tag[i]-1);
           for (j = 0; j < count; j++)
             copy[offset++] = array[i][0];
         }
       
       MPI_Allreduce(copy,data,count*natoms,MPI_INT,MPI_SUM,lmp->world);
       lmp->memory->destroy(copy);
 
     } else {
       double *vector = NULL;
       double **array = NULL;
       if (count == 1) vector = (double *) vptr;
       else array = (double **) vptr;
 
       double *copy;
       lmp->memory->create(copy,count*natoms,"lib/gather:copy");
       for (i = 0; i < count*natoms; i++) copy[i] = 0.0;
 
       tagint *tag = lmp->atom->tag;
       int nlocal = lmp->atom->nlocal;
 
       if (count == 1) {
         for (i = 0; i < nlocal; i++)
           copy[tag[i]-1] = vector[i];
       } else {
         for (i = 0; i < nlocal; i++) {
           offset = count*(tag[i]-1);
           for (j = 0; j < count; j++)
             copy[offset++] = array[i][j];
         }
       }
 
       MPI_Allreduce(copy,data,count*natoms,MPI_DOUBLE,MPI_SUM,lmp->world);
       lmp->memory->destroy(copy);
     }
   }
   END_CAPTURE
 }
 
 /* ----------------------------------------------------------------------
    scatter the named atom-based entity across all processors
    atom IDs must be consecutive from 1 to N
    name = desired quantity, e.g. x or charge
    type = 0 for integer values, 1 for double values
    count = # of per-atom values, e.g. 1 for type or charge, 3 for x or f
    data = atom-based values in 1d data, ordered by count, then by atom ID
      e.g. x[0][0],x[0][1],x[0][2],x[1][0],x[1][1],x[1][2],x[2][0],...
 ------------------------------------------------------------------------- */
 
 void lammps_scatter_atoms(void *ptr, char *name,
                           int type, int count, void *data)
 {
   LAMMPS *lmp = (LAMMPS *) ptr;
 
   BEGIN_CAPTURE
   {
     // error if tags are not defined or not consecutive or no atom map
 
     int flag = 0;
     if (lmp->atom->tag_enable == 0 || lmp->atom->tag_consecutive() == 0) 
       flag = 1;
     if (lmp->atom->natoms > MAXSMALLINT) flag = 1;
     if (lmp->atom->map_style == 0) flag = 1;
     if (flag) {
       if (lmp->comm->me == 0)
         lmp->error->warning(FLERR,"Library error in lammps_scatter_atoms");
       return;
     }
 
     int natoms = static_cast<int> (lmp->atom->natoms);
 
     int i,j,m,offset;
     void *vptr = lmp->atom->extract(name);
     if(vptr == NULL) {
-        lmp->error->warning(FLERR,"lammps_scatter_atoms: unknown property name");
+        lmp->error->warning(FLERR,
+                            "lammps_scatter_atoms: unknown property name");
         return;
     }
 
     // copy = Natom length vector of per-atom values
     // use atom ID to insert each atom's values into copy
     // MPI_Allreduce with MPI_SUM to merge into data, ordered by atom ID
 
     if (type == 0) {
       int *vector = NULL;
       int **array = NULL;
       if (count == 1) vector = (int *) vptr;
       else array = (int **) vptr;
       int *dptr = (int *) data;
 
       if (count == 1) {
         for (i = 0; i < natoms; i++)
           if ((m = lmp->atom->map(i+1)) >= 0)
             vector[m] = dptr[i];
       } else {
         for (i = 0; i < natoms; i++)
           if ((m = lmp->atom->map(i+1)) >= 0) {
             offset = count*i;
             for (j = 0; j < count; j++)
               array[m][j] = dptr[offset++];
           }
       }
     } else {
       double *vector = NULL;
       double **array = NULL;
       if (count == 1) vector = (double *) vptr;
       else array = (double **) vptr;
       double *dptr = (double *) data;
 
       if (count == 1) {
         for (i = 0; i < natoms; i++)
           if ((m = lmp->atom->map(i+1)) >= 0)
             vector[m] = dptr[i];
       } else {
         for (i = 0; i < natoms; i++) {
           if ((m = lmp->atom->map(i+1)) >= 0) {
             offset = count*i;
             for (j = 0; j < count; j++)
               array[m][j] = dptr[offset++];
           }
         }
       }
     }
   }
   END_CAPTURE
 }
 
 /* ----------------------------------------------------------------------
    create N atoms and assign them to procs based on coords
    id = atom IDs (optional, NULL will generate 1 to N)
    type = N-length vector of atom types (required)
    x = 3N-length 1d vector of atom coords (required)
    v = 3N-length 1d vector of atom velocities (optional, NULL if just 0.0)
    image flags can be treated in two ways:
      (a) image = vector of current image flags
          each atom will be remapped into periodic box by domain->ownatom()
          image flag will be incremented accordingly and stored with atom
      (b) image = NULL
          each atom will be remapped into periodic box by domain->ownatom()
          image flag will be set to 0 by atom->avec->create_atom()
    shrinkexceed = 1 allows atoms to be outside a shrinkwrapped boundary
      passed to ownatom() which will assign them to boundary proc
      important if atoms may be (slightly) outside non-periodic dim
      e.g. due to restoring a snapshot from a previous run and previous box
    id and image must be 32-bit integers
    x,v = ordered by xyz, then by atom
      e.g. x[0][0],x[0][1],x[0][2],x[1][0],x[1][1],x[1][2],x[2][0],...
 ------------------------------------------------------------------------- */
 
 void lammps_create_atoms(void *ptr, int n, tagint *id, int *type,
 			 double *x, double *v, imageint *image,
                          int shrinkexceed)
 {
   LAMMPS *lmp = (LAMMPS *) ptr;
 
   BEGIN_CAPTURE
   {
     // error if box does not exist or tags not defined
 
     int flag = 0;
     if (lmp->domain->box_exist == 0) flag = 1;
     if (lmp->atom->tag_enable == 0) flag = 1;
     if (flag) {
       if (lmp->comm->me == 0)
         lmp->error->warning(FLERR,"Library error in lammps_create_atoms");
       return;
     }
 
     // loop over N atoms of entire system
     // if this proc owns it based on coords, invoke create_atom()
     // optionally set atom tags and velocities
 
     Atom *atom = lmp->atom;
     Domain *domain = lmp->domain;
     int nlocal = atom->nlocal;
 
     bigint natoms_prev = atom->natoms;
     int nlocal_prev = nlocal;
     double xdata[3];
     
     for (int i = 0; i < n; i++) {
       xdata[0] = x[3*i];
       xdata[1] = x[3*i+1];
       xdata[2] = x[3*i+2];
       imageint * img = image ? &image[i] : NULL;
       tagint     tag = id    ? id[i]     : -1;
       if (!domain->ownatom(tag, xdata, img, shrinkexceed)) continue;
   
       atom->avec->create_atom(type[i],xdata);
       if (id) atom->tag[nlocal] = id[i];
       else atom->tag[nlocal] = i+1;
       if (v) {
 	atom->v[nlocal][0] = v[3*i];
 	atom->v[nlocal][1] = v[3*i+1];
 	atom->v[nlocal][2] = v[3*i+2];
       }
       if (image) atom->image[nlocal] = image[i];
       nlocal++;
     }
 
     // need to reset atom->natoms inside LAMMPS
 
     bigint ncurrent = nlocal;
     MPI_Allreduce(&ncurrent,&lmp->atom->natoms,1,MPI_LMP_BIGINT,
                   MPI_SUM,lmp->world);
 
     // init per-atom fix/compute/variable values for created atoms
 
     atom->data_fix_compute_variable(nlocal_prev,nlocal);
 
     // if global map exists, reset it
     // invoke map_init() b/c atom count has grown
 
     if (lmp->atom->map_style) {
       lmp->atom->map_init();
       lmp->atom->map_set();
     }
 
     // warn if new natoms is not correct
     
     if (lmp->atom->natoms != natoms_prev + n) {
       char str[128];
       sprintf(str,"Library warning in lammps_create_atoms, "
               "invalid total atoms %ld %ld",lmp->atom->natoms,natoms_prev+n);
       if (lmp->comm->me == 0)
         lmp->error->warning(FLERR,str);
     }
   }
   END_CAPTURE
 }
 
 // ----------------------------------------------------------------------
 // library API functions for error handling
 // ----------------------------------------------------------------------
 
 #ifdef LAMMPS_EXCEPTIONS
 
 /* ----------------------------------------------------------------------
    check if a new error message
 ------------------------------------------------------------------------- */
 
 int lammps_has_error(void *ptr) {
   LAMMPS *  lmp = (LAMMPS *) ptr;
   Error * error = lmp->error;
   return error->get_last_error() ? 1 : 0;
 }
 
 /* ----------------------------------------------------------------------
    copy the last error message of LAMMPS into a character buffer
    return value encodes which type of error:
    1 = normal error (recoverable)
    2 = abort error (non-recoverable)
 ------------------------------------------------------------------------- */
 
 int lammps_get_last_error_message(void *ptr, char * buffer, int buffer_size) {
   LAMMPS *  lmp = (LAMMPS *) ptr;
   Error * error = lmp->error;
 
   if(error->get_last_error()) {
     int error_type = error->get_last_error_type();
     strncpy(buffer, error->get_last_error(), buffer_size-1);
     error->set_last_error(NULL, ERROR_NONE);
     return error_type;
   }
   return 0;
 }
 
 #endif
diff --git a/src/special.cpp b/src/special.cpp
index c4de11e09..3fb5ec807 100644
--- a/src/special.cpp
+++ b/src/special.cpp
@@ -1,1153 +1,1155 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #include <mpi.h>
 #include <stdio.h>
 #include "special.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "force.h"
 #include "comm.h"
 #include "modify.h"
 #include "fix.h"
 #include "accelerator_kokkos.h"
 #include "memory.h"
 #include "error.h"
 #include "atom_masks.h"
 
 using namespace LAMMPS_NS;
 
 // allocate space for static class variable
 
 Special *Special::sptr;
 
 /* ---------------------------------------------------------------------- */
 
 Special::Special(LAMMPS *lmp) : Pointers(lmp)
 {
   MPI_Comm_rank(world,&me);
   MPI_Comm_size(world,&nprocs);
 
   onetwo = onethree = onefour = NULL;
 }
 
 /* ---------------------------------------------------------------------- */
 
 Special::~Special()
 {
   memory->destroy(onetwo);
   memory->destroy(onethree);
   memory->destroy(onefour);
 }
 
 /* ----------------------------------------------------------------------
    create 1-2, 1-3, 1-4 lists of topology neighbors
    store in onetwo, onethree, onefour for each atom
    store 3 counters in nspecial[i]
 ------------------------------------------------------------------------- */
 
 void Special::build()
 {
   int i,j,k,size;
   int max,maxall,nbuf;
   tagint *buf;
 
   MPI_Barrier(world);
 
   int nlocal = atom->nlocal;
 
   tagint *tag = atom->tag;
   int *num_bond = atom->num_bond;
   tagint **bond_atom = atom->bond_atom;
   int **nspecial = atom->nspecial;
 
   if (me == 0 && screen) {
     const double * const special_lj   = force->special_lj;
     const double * const special_coul = force->special_coul;
     fprintf(screen,"Finding 1-2 1-3 1-4 neighbors ...\n"
-                   " Special bond factors lj:   %-10g %-10g %-10g\n"
-                   " Special bond factors coul: %-10g %-10g %-10g\n",
+                   "  special bond factors lj:   %-10g %-10g %-10g\n"
+                   "  special bond factors coul: %-10g %-10g %-10g\n",
                    special_lj[1],special_lj[2],special_lj[3],
                    special_coul[1],special_coul[2],special_coul[3]);
   }
 
   // initialize nspecial counters to 0
 
   for (i = 0; i < nlocal; i++) {
     nspecial[i][0] = 0;
     nspecial[i][1] = 0;
     nspecial[i][2] = 0;
   }
 
   // -----------------------------------------------------
   // compute nspecial[i][0] = # of 1-2 neighbors of atom i
   // -----------------------------------------------------
 
   // bond partners stored by atom itself
 
   for (i = 0; i < nlocal; i++) nspecial[i][0] = num_bond[i];
 
   // if newton_bond off, then done
   // else only counted 1/2 of all bonds, so count other half
 
   if (force->newton_bond) {
 
     // nbufmax = largest buffer needed to hold info from any proc
     // info for each atom = global tag of 2nd atom in each bond
 
     nbuf = 0;
     for (i = 0; i < nlocal; i++) nbuf += num_bond[i];
     memory->create(buf,nbuf,"special:buf");
 
     // fill buffer with global tags of bond partners of my atoms
 
     size = 0;
     for (i = 0; i < nlocal; i++)
       for (j = 0; j < num_bond[i]; j++)
         buf[size++] = bond_atom[i][j];
 
     // cycle buffer around ring of procs back to self
     // when receive buffer, scan tags for atoms I own
     // when find one, increment nspecial count for that atom
 
     sptr = this;
     comm->ring(size,sizeof(tagint),buf,1,ring_one,NULL);
 
     memory->destroy(buf);
   }
 
   // ----------------------------------------------------
   // create onetwo[i] = list of 1-2 neighbors for atom i
   // ----------------------------------------------------
 
   max = 0;
   for (i = 0; i < nlocal; i++) max = MAX(max,nspecial[i][0]);
 
   MPI_Allreduce(&max,&maxall,1,MPI_INT,MPI_MAX,world);
 
   if (me == 0) {
     if (screen) fprintf(screen,"  %d = max # of 1-2 neighbors\n",maxall);
     if (logfile) fprintf(logfile,"  %d = max # of 1-2 neighbors\n",maxall);
   }
 
   memory->create(onetwo,nlocal,maxall,"special:onetwo");
 
   // count = accumulating counter
 
   memory->create(count,nlocal,"special:count");
   for (i = 0; i < nlocal; i++) count[i] = 0;
 
   // add bond partners stored by atom to onetwo list
 
   for (i = 0; i < nlocal; i++)
     for (j = 0; j < num_bond[i]; j++)
       onetwo[i][count[i]++] = bond_atom[i][j];
 
   // if newton_bond off, then done
   // else only stored 1/2 of all bonds, so store other half
 
   if (force->newton_bond) {
 
     // nbufmax = largest buffer needed to hold info from any proc
     // info for each atom = 2 global tags in each bond
 
     nbuf = 0;
     for (i = 0; i < nlocal; i++) nbuf += 2*num_bond[i];
     memory->create(buf,nbuf,"special:buf");
 
     // fill buffer with global tags of both atoms in bond
 
     size = 0;
     for (i = 0; i < nlocal; i++)
       for (j = 0; j < num_bond[i]; j++) {
         buf[size++] = tag[i];
         buf[size++] = bond_atom[i][j];
       }
 
     // cycle buffer around ring of procs back to self
     // when receive buffer, scan 2nd-atom tags for atoms I own
     // when find one, add 1st-atom tag to onetwo list for 2nd atom
 
     sptr = this;
     comm->ring(size,sizeof(tagint),buf,2,ring_two,NULL);
 
     memory->destroy(buf);
   }
 
   memory->destroy(count);
 
   // -----------------------------------------------------
   // done if special_bond weights for 1-3, 1-4 are set to 1.0
   // -----------------------------------------------------
 
   if (force->special_lj[2] == 1.0 && force->special_coul[2] == 1.0 &&
       force->special_lj[3] == 1.0 && force->special_coul[3] == 1.0) {
     dedup();
     combine();
     fix_alteration();
     return;
   }
 
   // -----------------------------------------------------
   // compute nspecial[i][1] = # of 1-3 neighbors of atom i
   // -----------------------------------------------------
 
   // nbufmax = largest buffer needed to hold info from any proc
   // info for each atom = 2 scalars + list of 1-2 neighbors
 
   nbuf = 0;
   for (i = 0; i < nlocal; i++) nbuf += 2 + nspecial[i][0];
   memory->create(buf,nbuf,"special:buf");
 
   // fill buffer with:
   // (1) = counter for 1-3 neighbors, initialized to 0
   // (2) = # of 1-2 neighbors
   // (3:N) = list of 1-2 neighbors
 
   size = 0;
   for (i = 0; i < nlocal; i++) {
     buf[size++] = 0;
     buf[size++] = nspecial[i][0];
     for (j = 0; j < nspecial[i][0]; j++) buf[size++] = onetwo[i][j];
   }
 
   // cycle buffer around ring of procs back to self
   // when receive buffer, scan list of 1-2 neighbors for atoms I own
   // when find one, increment 1-3 count by # of 1-2 neighbors of my atom,
   //   subtracting one since my list will contain original atom
 
   sptr = this;
   comm->ring(size,sizeof(tagint),buf,3,ring_three,buf);
 
   // extract count from buffer that has cycled back to me
   // nspecial[i][1] = # of 1-3 neighbors of atom i
 
   j = 0;
   for (i = 0; i < nlocal; i++) {
     nspecial[i][1] = buf[j];
     j += 2 + nspecial[i][0];
   }
 
   memory->destroy(buf);
 
   // ----------------------------------------------------
   // create onethree[i] = list of 1-3 neighbors for atom i
   // ----------------------------------------------------
 
   max = 0;
   for (i = 0; i < nlocal; i++) max = MAX(max,nspecial[i][1]);
   MPI_Allreduce(&max,&maxall,1,MPI_INT,MPI_MAX,world);
 
   if (me == 0) {
     if (screen) fprintf(screen,"  %d = max # of 1-3 neighbors\n",maxall);
     if (logfile) fprintf(logfile,"  %d = max # of 1-3 neighbors\n",maxall);
   }
 
   memory->create(onethree,nlocal,maxall,"special:onethree");
 
   // nbufmax = largest buffer needed to hold info from any proc
   // info for each atom = 4 scalars + list of 1-2 neighs + list of 1-3 neighs
 
   nbuf = 0;
   for (i = 0; i < nlocal; i++) nbuf += 4 + nspecial[i][0] + nspecial[i][1];
   memory->create(buf,nbuf,"special:buf");
 
   // fill buffer with:
   // (1) = global tag of original atom
   // (2) = # of 1-2 neighbors
   // (3) = # of 1-3 neighbors
   // (4) = counter for 1-3 neighbors, initialized to 0
   // (5:N) = list of 1-2 neighbors
   // (N+1:2N) space for list of 1-3 neighbors
 
   size = 0;
   for (i = 0; i < nlocal; i++) {
     buf[size++] = tag[i];
     buf[size++] = nspecial[i][0];
     buf[size++] = nspecial[i][1];
     buf[size++] = 0;
     for (j = 0; j < nspecial[i][0]; j++) buf[size++] = onetwo[i][j];
     size += nspecial[i][1];
   }
 
   // cycle buffer around ring of procs back to self
   // when receive buffer, scan list of 1-2 neighbors for atoms I own
   // when find one, add its neighbors to 1-3 list
   //   increment the count in buf(i+4)
   //   exclude the atom whose tag = original
   //   this process may include duplicates but they will be culled later
 
   sptr = this;
   comm->ring(size,sizeof(tagint),buf,4,ring_four,buf);
 
   // fill onethree with buffer values that have been returned to me
   // sanity check: accumulated buf[i+3] count should equal
   //   nspecial[i][1] for each atom
 
   j = 0;
   for (i = 0; i < nlocal; i++) {
     if (buf[j+3] != nspecial[i][1])
       error->one(FLERR,"1-3 bond count is inconsistent");
     j += 4 + nspecial[i][0];
     for (k = 0; k < nspecial[i][1]; k++)
       onethree[i][k] = buf[j++];
   }
 
   memory->destroy(buf);
 
   // done if special_bond weights for 1-4 are set to 1.0
 
   if (force->special_lj[3] == 1.0 && force->special_coul[3] == 1.0) {
     dedup();
     if (force->special_angle) angle_trim();
     combine();
     fix_alteration();
     return;
   }
 
   // -----------------------------------------------------
   // compute nspecial[i][2] = # of 1-4 neighbors of atom i
   // -----------------------------------------------------
 
   // nbufmax = largest buffer needed to hold info from any proc
   // info for each atom = 2 scalars + list of 1-3 neighbors
 
   nbuf = 0;
   for (i = 0; i < nlocal; i++) nbuf += 2 + nspecial[i][1];
   memory->create(buf,nbuf,"special:buf");
 
   // fill buffer with:
   // (1) = counter for 1-4 neighbors, initialized to 0
   // (2) = # of 1-3 neighbors
   // (3:N) = list of 1-3 neighbors
 
   size = 0;
   for (i = 0; i < nlocal; i++) {
     buf[size++] = 0;
     buf[size++] = nspecial[i][1];
     for (j = 0; j < nspecial[i][1]; j++) buf[size++] = onethree[i][j];
   }
 
   // cycle buffer around ring of procs back to self
   // when receive buffer, scan list of 1-3 neighbors for atoms I own
   // when find one, increment 1-4 count by # of 1-2 neighbors of my atom
   //   may include duplicates and original atom but they will be culled later
 
   sptr = this;
   comm->ring(size,sizeof(tagint),buf,5,ring_five,buf);
 
   // extract count from buffer that has cycled back to me
   // nspecial[i][2] = # of 1-4 neighbors of atom i
 
   j = 0;
   for (i = 0; i < nlocal; i++) {
     nspecial[i][2] = buf[j];
     j += 2 + nspecial[i][1];
   }
 
   memory->destroy(buf);
 
   // ----------------------------------------------------
   // create onefour[i] = list of 1-4 neighbors for atom i
   // ----------------------------------------------------
 
   max = 0;
   for (i = 0; i < nlocal; i++) max = MAX(max,nspecial[i][2]);
   MPI_Allreduce(&max,&maxall,1,MPI_INT,MPI_MAX,world);
 
   if (me == 0) {
     if (screen) fprintf(screen,"  %d = max # of 1-4 neighbors\n",maxall);
     if (logfile) fprintf(logfile,"  %d = max # of 1-4 neighbors\n",maxall);
   }
 
   memory->create(onefour,nlocal,maxall,"special:onefour");
 
   // nbufmax = largest buffer needed to hold info from any proc
   // info for each atom = 3 scalars + list of 1-3 neighs + list of 1-4 neighs
 
   nbuf = 0;
   for (i = 0; i < nlocal; i++)
     nbuf += 3 + nspecial[i][1] + nspecial[i][2];
   memory->create(buf,nbuf,"special:buf");
 
   // fill buffer with:
   // (1) = # of 1-3 neighbors
   // (2) = # of 1-4 neighbors
   // (3) = counter for 1-4 neighbors, initialized to 0
   // (4:N) = list of 1-3 neighbors
   // (N+1:2N) space for list of 1-4 neighbors
 
   size = 0;
   for (i = 0; i < nlocal; i++) {
     buf[size++] = nspecial[i][1];
     buf[size++] = nspecial[i][2];
     buf[size++] = 0;
     for (j = 0; j < nspecial[i][1]; j++) buf[size++] = onethree[i][j];
     size += nspecial[i][2];
   }
 
   // cycle buffer around ring of procs back to self
   // when receive buffer, scan list of 1-3 neighbors for atoms I own
   // when find one, add its neighbors to 1-4 list
   //   incrementing the count in buf(i+4)
   //   this process may include duplicates but they will be culled later
 
   sptr = this;
   comm->ring(size,sizeof(tagint),buf,6,ring_six,buf);
 
   // fill onefour with buffer values that have been returned to me
   // sanity check: accumulated buf[i+2] count should equal
   //  nspecial[i][2] for each atom
 
   j = 0;
   for (i = 0; i < nlocal; i++) {
     if (buf[j+2] != nspecial[i][2])
       error->one(FLERR,"1-4 bond count is inconsistent");
     j += 3 + nspecial[i][1];
     for (k = 0; k < nspecial[i][2]; k++)
       onefour[i][k] = buf[j++];
   }
 
   memory->destroy(buf);
 
   dedup();
   if (force->special_angle) angle_trim();
   if (force->special_dihedral) dihedral_trim();
   combine();
   fix_alteration();
 }
 
 /* ----------------------------------------------------------------------
    remove duplicates within each of onetwo, onethree, onefour individually
 ------------------------------------------------------------------------- */
 
 void Special::dedup()
 {
   int i,j;
   tagint m;
 
   // clear map so it can be used as scratch space
 
   atom->map_clear();
 
   // use map to cull duplicates
   // exclude original atom explicitly
   // adjust onetwo, onethree, onefour values to reflect removed duplicates
   // must unset map for each atom
 
   int **nspecial = atom->nspecial;
   tagint *tag = atom->tag;
   int nlocal = atom->nlocal;
 
   int unique;
 
   for (i = 0; i < nlocal; i++) {
     unique = 0;
     atom->map_one(tag[i],0);
     for (j = 0; j < nspecial[i][0]; j++) {
       m = onetwo[i][j];
       if (atom->map(m) < 0) {
         onetwo[i][unique++] = m;
         atom->map_one(m,0);
       }
     }
     nspecial[i][0] = unique;
     atom->map_one(tag[i],-1);
     for (j = 0; j < unique; j++) atom->map_one(onetwo[i][j],-1);
   }
 
   for (i = 0; i < nlocal; i++) {
     unique = 0;
     atom->map_one(tag[i],0);
     for (j = 0; j < nspecial[i][1]; j++) {
       m = onethree[i][j];
       if (atom->map(m) < 0) {
         onethree[i][unique++] = m;
         atom->map_one(m,0);
       }
     }
     nspecial[i][1] = unique;
     atom->map_one(tag[i],-1);
     for (j = 0; j < unique; j++) atom->map_one(onethree[i][j],-1);
   }
 
   for (i = 0; i < nlocal; i++) {
     unique = 0;
     atom->map_one(tag[i],0);
     for (j = 0; j < nspecial[i][2]; j++) {
       m = onefour[i][j];
       if (atom->map(m) < 0) {
         onefour[i][unique++] = m;
         atom->map_one(m,0);
       }
     }
     nspecial[i][2] = unique;
     atom->map_one(tag[i],-1);
     for (j = 0; j < unique; j++) atom->map_one(onefour[i][j],-1);
   }
 
   // re-create map
 
+  atom->map_init(0);
   atom->nghost = 0;
   atom->map_set();
 }
 
 /* ----------------------------------------------------------------------
    concatenate onetwo, onethree, onefour into master atom->special list
    remove duplicates between 3 lists, leave dup in first list it appears in
    convert nspecial[0], nspecial[1], nspecial[2] into cumulative counters
 ------------------------------------------------------------------------- */
 
 void Special::combine()
 {
   int i,j;
   tagint m;
 
   int me;
   MPI_Comm_rank(world,&me);
 
   int **nspecial = atom->nspecial;
   tagint *tag = atom->tag;
   int nlocal = atom->nlocal;
 
   // ----------------------------------------------------
   // compute culled maxspecial = max # of special neighs of any atom
   // ----------------------------------------------------
 
   // clear map so it can be used as scratch space
 
   atom->map_clear();
 
   // unique = # of unique nspecial neighbors of one atom
   // cull duplicates using map to check for them
   // exclude original atom explicitly
   // must unset map for each atom
 
   int unique;
   int maxspecial = 0;
 
   for (i = 0; i < nlocal; i++) {
     unique = 0;
     atom->map_one(tag[i],0);
 
     for (j = 0; j < nspecial[i][0]; j++) {
       m = onetwo[i][j];
       if (atom->map(m) < 0) {
         unique++;
         atom->map_one(m,0);
       }
     }
     for (j = 0; j < nspecial[i][1]; j++) {
       m = onethree[i][j];
       if (atom->map(m) < 0) {
         unique++;
         atom->map_one(m,0);
       }
     }
     for (j = 0; j < nspecial[i][2]; j++) {
       m = onefour[i][j];
       if (atom->map(m) < 0) {
         unique++;
         atom->map_one(m,0);
       }
     }
 
     maxspecial = MAX(maxspecial,unique);
 
     atom->map_one(tag[i],-1);
     for (j = 0; j < nspecial[i][0]; j++) atom->map_one(onetwo[i][j],-1);
     for (j = 0; j < nspecial[i][1]; j++) atom->map_one(onethree[i][j],-1);
     for (j = 0; j < nspecial[i][2]; j++) atom->map_one(onefour[i][j],-1);
   }
 
   // compute global maxspecial, must be at least 1
   // add in extra factor from special_bonds command
   // allocate correct special array with same nmax, new maxspecial
   // previously allocated one must be destroyed
   // must make AtomVec class update its ptr to special
 
   MPI_Allreduce(&maxspecial,&atom->maxspecial,1,MPI_INT,MPI_MAX,world);
   atom->maxspecial += force->special_extra;
   atom->maxspecial = MAX(atom->maxspecial,1);
 
   if (me == 0) {
     if (screen)
       fprintf(screen,"  %d = max # of special neighbors\n",atom->maxspecial);
     if (logfile)
       fprintf(logfile,"  %d = max # of special neighbors\n",atom->maxspecial);
   }
 
   if (lmp->kokkos) {
     AtomKokkos* atomKK = (AtomKokkos*) atom;
     atomKK->modified(Host,SPECIAL_MASK);
     atomKK->sync(Device,SPECIAL_MASK);
     memory->grow_kokkos(atomKK->k_special,atom->special,
                         atom->nmax,atom->maxspecial,"atom:special");
     atomKK->modified(Device,SPECIAL_MASK);
     atomKK->sync(Host,SPECIAL_MASK);
   } else {
     memory->destroy(atom->special);
     memory->create(atom->special,atom->nmax,atom->maxspecial,"atom:special");
   }
 
   atom->avec->grow_reset();
   tagint **special = atom->special;
 
   // ----------------------------------------------------
   // fill special array with 1-2, 1-3, 1-4 neighs for each atom
   // ----------------------------------------------------
 
   // again use map to cull duplicates
   // exclude original atom explicitly
   // adjust nspecial[i] values to reflect removed duplicates
   // nspecial[i][1] and nspecial[i][2] now become cumulative counters
 
   for (i = 0; i < nlocal; i++) {
     unique = 0;
     atom->map_one(tag[i],0);
 
     for (j = 0; j < nspecial[i][0]; j++) {
       m = onetwo[i][j];
       if (atom->map(m) < 0) {
         special[i][unique++] = m;
         atom->map_one(m,0);
       }
     }
     nspecial[i][0] = unique;
 
     for (j = 0; j < nspecial[i][1]; j++) {
       m = onethree[i][j];
       if (atom->map(m) < 0) {
         special[i][unique++] = m;
         atom->map_one(m,0);
       }
     }
     nspecial[i][1] = unique;
 
     for (j = 0; j < nspecial[i][2]; j++) {
       m = onefour[i][j];
       if (atom->map(m) < 0) {
         special[i][unique++] = m;
         atom->map_one(m,0);
       }
     }
     nspecial[i][2] = unique;
 
     atom->map_one(tag[i],-1);
     for (j = 0; j < nspecial[i][2]; j++) atom->map_one(special[i][j],-1);
   }
 
   // re-create map
 
+  atom->map_init(0);
   atom->nghost = 0;
   atom->map_set();
 }
 
 /* ----------------------------------------------------------------------
    trim list of 1-3 neighbors by checking defined angles
    delete a 1-3 neigh if they are not end atoms of a defined angle
      and if they are not 1,3 or 2,4 atoms of a defined dihedral
 ------------------------------------------------------------------------- */
 
 void Special::angle_trim()
 {
   int i,j,m,n;
 
   int *num_angle = atom->num_angle;
   int *num_dihedral = atom->num_dihedral;
   tagint **angle_atom1 = atom->angle_atom1;
   tagint **angle_atom3 = atom->angle_atom3;
   tagint **dihedral_atom1 = atom->dihedral_atom1;
   tagint **dihedral_atom2 = atom->dihedral_atom2;
   tagint **dihedral_atom3 = atom->dihedral_atom3;
   tagint **dihedral_atom4 = atom->dihedral_atom4;
   int **nspecial = atom->nspecial;
   int nlocal = atom->nlocal;
 
   // stats on old 1-3 neighbor counts
 
   double onethreecount = 0.0;
   for (i = 0; i < nlocal; i++) onethreecount += nspecial[i][1];
   double allcount;
   MPI_Allreduce(&onethreecount,&allcount,1,MPI_DOUBLE,MPI_SUM,world);
 
   if (me == 0) {
     if (screen)
       fprintf(screen,
               "  %g = # of 1-3 neighbors before angle trim\n",allcount);
     if (logfile)
       fprintf(logfile,
               "  %g = # of 1-3 neighbors before angle trim\n",allcount);
   }
 
   // if angles or dihedrals are defined,
   // flag each 1-3 neigh if it appears in an angle or dihedral
 
   if ((num_angle && atom->nangles) || (num_dihedral && atom->ndihedrals)) {
 
     // dflag = flag for 1-3 neighs of all owned atoms
 
     int maxcount = 0;
     for (i = 0; i < nlocal; i++) maxcount = MAX(maxcount,nspecial[i][1]);
     memory->create(dflag,nlocal,maxcount,"special::dflag");
 
     for (i = 0; i < nlocal; i++) {
       n = nspecial[i][1];
       for (j = 0; j < n; j++) dflag[i][j] = 0;
     }
 
     // nbufmax = largest buffer needed to hold info from any proc
     // info for each atom = list of 1,3 atoms in each angle stored by atom
     //   and list of 1,3 and 2,4 atoms in each dihedral stored by atom
 
     int nbuf = 0;
     for (i = 0; i < nlocal; i++) {
       if (num_angle && atom->nangles) nbuf += 2*num_angle[i];
       if (num_dihedral && atom->ndihedrals) nbuf += 2*2*num_dihedral[i];
     }
     int *buf;
     memory->create(buf,nbuf,"special:buf");
 
     // fill buffer with list of 1,3 atoms in each angle
     // and with list of 1,3 and 2,4 atoms in each dihedral
 
     int size = 0;
     if (num_angle && atom->nangles)
       for (i = 0; i < nlocal; i++)
         for (j = 0; j < num_angle[i]; j++) {
           buf[size++] = angle_atom1[i][j];
           buf[size++] = angle_atom3[i][j];
         }
 
     if (num_dihedral && atom->ndihedrals)
       for (i = 0; i < nlocal; i++)
         for (j = 0; j < num_dihedral[i]; j++) {
           buf[size++] = dihedral_atom1[i][j];
           buf[size++] = dihedral_atom3[i][j];
           buf[size++] = dihedral_atom2[i][j];
           buf[size++] = dihedral_atom4[i][j];
         }
 
     // cycle buffer around ring of procs back to self
     // when receive buffer, scan list of 1,3 atoms looking for atoms I own
     // when find one, scan its 1-3 neigh list and mark I,J as in an angle
 
     sptr = this;
     comm->ring(size,sizeof(tagint),buf,7,ring_seven,NULL);
 
     // delete 1-3 neighbors if they are not flagged in dflag
 
     for (i = 0; i < nlocal; i++) {
       m = 0;
       for (j = 0; j < nspecial[i][1]; j++)
         if (dflag[i][j]) onethree[i][m++] = onethree[i][j];
       nspecial[i][1] = m;
     }
 
     // clean up
 
     memory->destroy(dflag);
     memory->destroy(buf);
 
   // if no angles or dihedrals are defined, delete all 1-3 neighs
 
   } else {
     for (i = 0; i < nlocal; i++) nspecial[i][1] = 0;
   }
 
   // stats on new 1-3 neighbor counts
 
   onethreecount = 0.0;
   for (i = 0; i < nlocal; i++) onethreecount += nspecial[i][1];
   MPI_Allreduce(&onethreecount,&allcount,1,MPI_DOUBLE,MPI_SUM,world);
 
   if (me == 0) {
     if (screen)
       fprintf(screen,
               "  %g = # of 1-3 neighbors after angle trim\n",allcount);
     if (logfile)
       fprintf(logfile,
               "  %g = # of 1-3 neighbors after angle trim\n",allcount);
   }
 }
 
 /* ----------------------------------------------------------------------
    trim list of 1-4 neighbors by checking defined dihedrals
    delete a 1-4 neigh if they are not end atoms of a defined dihedral
 ------------------------------------------------------------------------- */
 
 void Special::dihedral_trim()
 {
   int i,j,m,n;
 
   int *num_dihedral = atom->num_dihedral;
   tagint **dihedral_atom1 = atom->dihedral_atom1;
   tagint **dihedral_atom4 = atom->dihedral_atom4;
   int **nspecial = atom->nspecial;
   int nlocal = atom->nlocal;
 
   // stats on old 1-4 neighbor counts
 
   double onefourcount = 0.0;
   for (i = 0; i < nlocal; i++) onefourcount += nspecial[i][2];
   double allcount;
   MPI_Allreduce(&onefourcount,&allcount,1,MPI_DOUBLE,MPI_SUM,world);
 
   if (me == 0) {
     if (screen)
       fprintf(screen,
               "  %g = # of 1-4 neighbors before dihedral trim\n",allcount);
     if (logfile)
       fprintf(logfile,
               "  %g = # of 1-4 neighbors before dihedral trim\n",allcount);
   }
 
   // if dihedrals are defined, flag each 1-4 neigh if it appears in a dihedral
 
   if (num_dihedral && atom->ndihedrals) {
 
     // dflag = flag for 1-4 neighs of all owned atoms
 
     int maxcount = 0;
     for (i = 0; i < nlocal; i++) maxcount = MAX(maxcount,nspecial[i][2]);
     memory->create(dflag,nlocal,maxcount,"special::dflag");
 
     for (i = 0; i < nlocal; i++) {
       n = nspecial[i][2];
       for (j = 0; j < n; j++) dflag[i][j] = 0;
     }
 
     // nbufmax = largest buffer needed to hold info from any proc
     // info for each atom = list of 1,4 atoms in each dihedral stored by atom
 
     int nbuf = 0;
     for (i = 0; i < nlocal; i++) nbuf += 2*num_dihedral[i];
     int *buf;
     memory->create(buf,nbuf,"special:buf");
 
     // fill buffer with list of 1,4 atoms in each dihedral
 
     int size = 0;
     for (i = 0; i < nlocal; i++)
       for (j = 0; j < num_dihedral[i]; j++) {
         buf[size++] = dihedral_atom1[i][j];
         buf[size++] = dihedral_atom4[i][j];
       }
 
     // cycle buffer around ring of procs back to self
     // when receive buffer, scan list of 1,4 atoms looking for atoms I own
     // when find one, scan its 1-4 neigh list and mark I,J as in a dihedral
 
     sptr = this;
     comm->ring(size,sizeof(tagint),buf,8,ring_eight,NULL);
 
     // delete 1-4 neighbors if they are not flagged in dflag
 
     for (i = 0; i < nlocal; i++) {
       m = 0;
       for (j = 0; j < nspecial[i][2]; j++)
         if (dflag[i][j]) onefour[i][m++] = onefour[i][j];
       nspecial[i][2] = m;
     }
 
     // clean up
 
     memory->destroy(dflag);
     memory->destroy(buf);
 
   // if no dihedrals are defined, delete all 1-4 neighs
 
   } else {
     for (i = 0; i < nlocal; i++) nspecial[i][2] = 0;
   }
 
   // stats on new 1-4 neighbor counts
 
   onefourcount = 0.0;
   for (i = 0; i < nlocal; i++) onefourcount += nspecial[i][2];
   MPI_Allreduce(&onefourcount,&allcount,1,MPI_DOUBLE,MPI_SUM,world);
 
   if (me == 0) {
     if (screen)
       fprintf(screen,
               "  %g = # of 1-4 neighbors after dihedral trim\n",allcount);
     if (logfile)
       fprintf(logfile,
               "  %g = # of 1-4 neighbors after dihedral trim\n",allcount);
   }
 }
 
 /* ----------------------------------------------------------------------
    when receive buffer, scan tags for atoms I own
    when find one, increment nspecial count for that atom
 ------------------------------------------------------------------------- */
 
 void Special::ring_one(int ndatum, char *cbuf)
 {
   Atom *atom = sptr->atom;
   int **nspecial = atom->nspecial;
   int nlocal = atom->nlocal;
 
   tagint *buf = (tagint *) cbuf;
   int m;
 
   for (int i = 0; i < ndatum; i++) {
     m = atom->map(buf[i]);
     if (m >= 0 && m < nlocal) nspecial[m][0]++;
   }
 }
 
 /* ----------------------------------------------------------------------
    when receive buffer, scan 2nd-atom tags for atoms I own
    when find one, add 1st-atom tag to onetwo list for 2nd atom
 ------------------------------------------------------------------------- */
 
 void Special::ring_two(int ndatum, char *cbuf)
 {
   Atom *atom = sptr->atom;
   int nlocal = atom->nlocal;
 
   tagint **onetwo = sptr->onetwo;
   int *count = sptr->count;
 
   tagint *buf = (tagint *) cbuf;
   int m;
 
   for (int i = 1; i < ndatum; i += 2) {
     m = atom->map(buf[i]);
     if (m >= 0 && m < nlocal) onetwo[m][count[m]++] = buf[i-1];
   }
 }
 
 /* ----------------------------------------------------------------------
    when receive buffer, scan list of 1-2 neighbors for atoms I own
    when find one, increment 1-3 count by # of 1-2 neighbors of my atom,
      subtracting one since my list will contain original atom
 ------------------------------------------------------------------------- */
 
 void Special::ring_three(int ndatum, char *cbuf)
 {
   Atom *atom = sptr->atom;
   int **nspecial = atom->nspecial;
   int nlocal = atom->nlocal;
 
   tagint *buf = (tagint *) cbuf;
   int i,j,m,n,num12;
 
   i = 0;
   while (i < ndatum) {
     n = buf[i];
     num12 = buf[i+1];
     for (j = 0; j < num12; j++) {
       m = atom->map(buf[i+2+j]);
       if (m >= 0 && m < nlocal)
         n += nspecial[m][0] - 1;
     }
     buf[i] = n;
     i += 2 + num12;
   }
 }
 
 /* ----------------------------------------------------------------------
   when receive buffer, scan list of 1-2 neighbors for atoms I own
   when find one, add its neighbors to 1-3 list
     increment the count in buf(i+4)
     exclude the atom whose tag = original
     this process may include duplicates but they will be culled later
 ------------------------------------------------------------------------- */
 
 void Special::ring_four(int ndatum, char *cbuf)
 {
   Atom *atom = sptr->atom;
   int **nspecial = atom->nspecial;
   int nlocal = atom->nlocal;
 
   tagint **onetwo = sptr->onetwo;
 
   tagint *buf = (tagint *) cbuf;
   tagint original;
   int i,j,k,m,n,num12,num13;
 
   i = 0;
   while (i < ndatum) {
     original = buf[i];
     num12 = buf[i+1];
     num13 = buf[i+2];
     n = buf[i+3];
     for (j = 0; j < num12; j++) {
       m = atom->map(buf[i+4+j]);
       if (m >= 0 && m < nlocal)
         for (k = 0; k < nspecial[m][0]; k++)
           if (onetwo[m][k] != original)
             buf[i+4+num12+(n++)] = onetwo[m][k];
     }
     buf[i+3] = n;
     i += 4 + num12 + num13;
   }
 }
 
 /* ----------------------------------------------------------------------
    when receive buffer, scan list of 1-3 neighbors for atoms I own
    when find one, increment 1-4 count by # of 1-2 neighbors of my atom
      may include duplicates and original atom but they will be culled later
 ------------------------------------------------------------------------- */
 
 void Special::ring_five(int ndatum, char *cbuf)
 {
   Atom *atom = sptr->atom;
   int **nspecial = atom->nspecial;
   int nlocal = atom->nlocal;
 
   tagint *buf = (tagint *) cbuf;
   int i,j,m,n,num13;
 
   i = 0;
   while (i < ndatum) {
     n = buf[i];
     num13 = buf[i+1];
     for (j = 0; j < num13; j++) {
       m = atom->map(buf[i+2+j]);
       if (m >= 0 && m < nlocal) n += nspecial[m][0];
     }
       buf[i] = n;
       i += 2 + num13;
   }
 }
 
 /* ----------------------------------------------------------------------
    when receive buffer, scan list of 1-3 neighbors for atoms I own
    when find one, add its neighbors to 1-4 list
      incrementing the count in buf(i+4)
      this process may include duplicates but they will be culled later
 ------------------------------------------------------------------------- */
 
 void Special::ring_six(int ndatum, char *cbuf)
 {
   Atom *atom = sptr->atom;
   int **nspecial = atom->nspecial;
   int nlocal = atom->nlocal;
 
   tagint **onetwo = sptr->onetwo;
 
   tagint *buf = (tagint *) cbuf;
   int i,j,k,m,n,num13,num14;
 
   i = 0;
   while (i < ndatum) {
     num13 = buf[i];
     num14 = buf[i+1];
     n = buf[i+2];
     for (j = 0; j < num13; j++) {
       m = atom->map(buf[i+3+j]);
       if (m >= 0 && m < nlocal)
         for (k = 0; k < nspecial[m][0]; k++)
           buf[i+3+num13+(n++)] = onetwo[m][k];
     }
     buf[i+2] = n;
     i += 3 + num13 + num14;
   }
 }
 
 /* ----------------------------------------------------------------------
    when receive buffer, scan list of 1,3 atoms looking for atoms I own
    when find one, scan its 1-3 neigh list and mark I,J as in an angle
 ------------------------------------------------------------------------- */
 
 void Special::ring_seven(int ndatum, char *cbuf)
 {
   Atom *atom = sptr->atom;
   int **nspecial = atom->nspecial;
   int nlocal = atom->nlocal;
 
   tagint **onethree = sptr->onethree;
   int **dflag = sptr->dflag;
 
   tagint *buf = (tagint *) cbuf;
   tagint iglobal,jglobal;
   int i,m,ilocal,jlocal;
 
   i = 0;
   while (i < ndatum) {
     iglobal = buf[i];
     jglobal = buf[i+1];
     ilocal = atom->map(iglobal);
     jlocal = atom->map(jglobal);
     if (ilocal >= 0 && ilocal < nlocal)
       for (m = 0; m < nspecial[ilocal][1]; m++)
         if (jglobal == onethree[ilocal][m]) {
           dflag[ilocal][m] = 1;
           break;
         }
     if (jlocal >= 0 && jlocal < nlocal)
       for (m = 0; m < nspecial[jlocal][1]; m++)
         if (iglobal == onethree[jlocal][m]) {
           dflag[jlocal][m] = 1;
           break;
         }
     i += 2;
   }
 }
 
 /* ----------------------------------------------------------------------
    when receive buffer, scan list of 1,4 atoms looking for atoms I own
    when find one, scan its 1-4 neigh list and mark I,J as in a dihedral
 ------------------------------------------------------------------------- */
 
 void Special::ring_eight(int ndatum, char *cbuf)
 {
   Atom *atom = sptr->atom;
   int **nspecial = atom->nspecial;
   int nlocal = atom->nlocal;
 
   tagint **onefour = sptr->onefour;
   int **dflag = sptr->dflag;
 
   tagint *buf = (tagint *) cbuf;
   tagint iglobal,jglobal;
   int i,m,ilocal,jlocal;
 
   i = 0;
   while (i < ndatum) {
     iglobal = buf[i];
     jglobal = buf[i+1];
     ilocal = atom->map(iglobal);
     jlocal = atom->map(jglobal);
     if (ilocal >= 0 && ilocal < nlocal)
       for (m = 0; m < nspecial[ilocal][2]; m++)
         if (jglobal == onefour[ilocal][m]) {
           dflag[ilocal][m] = 1;
           break;
         }
     if (jlocal >= 0 && jlocal < nlocal)
       for (m = 0; m < nspecial[jlocal][2]; m++)
         if (iglobal == onefour[jlocal][m]) {
           dflag[jlocal][m] = 1;
           break;
         }
     i += 2;
   }
 }
 
 /* ----------------------------------------------------------------------
    allow fixes to alter special list
    currently, only fix drude does this
      so that both the Drude core and electron are same level of neighbor
 ------------------------------------------------------------------------- */
 
 void Special::fix_alteration()
 {
   for (int ifix = 0; ifix < modify->nfix; ifix++)
     if (modify->fix[ifix]->special_alter_flag)
       modify->fix[ifix]->rebuild_special();
 }