diff --git a/doc/src/fix_neb.txt b/doc/src/fix_neb.txt
index a5c4bf439..cd0fa23e8 100644
--- a/doc/src/fix_neb.txt
+++ b/doc/src/fix_neb.txt
@@ -1,194 +1,208 @@
 "LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c
 
 :link(lws,http://lammps.sandia.gov)
 :link(ld,Manual.html)
 :link(lc,Section_commands.html#comm)
 
 :line
 
 fix neb command :h3
 
 [Syntax:]
 
 fix ID group-ID neb Kspring keyword value :pre
 
-ID, group-ID are documented in "fix"_fix.html command
-neb = style name of this fix command
-Kspring = parallel spring constant (force/distance units)
-keyword = {idealpos} or {neigh} or {perp} or {freeend} :ul
- {idealpos} = each replica is attached with a spring to its interpolated ideal position (default)
- {neigh} = each replica is connected with spring to the previous and next replica.
- {perp} value = set spring constant for the perpendicular spring to {value}
- {freeend} flag = set behavior for the end points
-   flag = {ini} or {final} or {finaleini} or {final2eini}
+ID, group-ID are documented in "fix"_fix.html command :ulb,l
+neb = style name of this fix command :l
+Kspring = parallel spring constant (force/distance units or force units) :l
+zero or more keyword/value pairs may be appended :l
+keyword = {nudg_style} or {perp} or {freend} or {freend_k_spring} :l
+  {nudg_style} value = {neigh} or {idealpos}
+    {neigh} = the parallel nudging force is calculated from the distance to neighbouring replicas (in this case, Kspring is in force/distance units)
+    {idealpos} = the parallel nudging force is proportional to the distance between the replica and its interpolated ideal position (in this case Kspring is in force units)
+  {perp} value {none} or kspring2
+    {none} = no perpendicular spring force is applied
+  {freeend} value = {none} or {ini} or {final} or {finaleini} or {final2eini}
+    {none} = no nudging force is apply to the first and last replicas
+    {ini} = set the first replica to be a free end 
+    {final} = set the last replica to be a free end
+    {finaleini} = set the last replica to be a free end and set its target energy as that of the first replica
+    {final2eini} = same as {finaleini} plus prevent intermediate replicas to have a lower energy than the first replica
+  {freeend_kspring}  value = kspring2
+    kspring2 = spring constant of the perpendicular spring force (per distance units)    
+flag = set behavior for the end points
+   flag = 
    :pre
 
 [Examples:]
 
 fix 1 active neb 10.0
 fix 2 all neb 1.0 perp 1.0 freeend final
-fix 1 all neb 1.0 neigh freeend final2eini :pre
+fix 1 all neb 1.0 nudg_style idealpos freeend final2eini freend_kspring 1:pre
 
 [Description:]
 
 Add a nudging force to atoms in the group for a multi-replica
 simulation run via the "neb"_neb.html command to perform a nudged
 elastic band (NEB) calculation for finding the transition state.
 Hi-level explanations of NEB are given with the "neb"_neb.html command
 and in "Section_howto 5"_Section_howto.html#howto_5 of the manual.
 The fix neb command must be used with the "neb" command and defines
 how nudging inter-replica forces are computed.  A NEB calculation is
 divided in two stages. In the first stage n replicas are relaxed
 toward a MEP and in a second stage, the climbing image scheme (see
 "(Henkelman2)"_#Henkelman2) is turned on so that the replica having
 the highest energy relaxes toward the saddle point (i.e. the point of
 highest energy along the MEP).
 
 One purpose of the nudging forces is to keep the replicas equally
 spaced.  During the NEB, the 3N-length vector of interatomic force Fi
 = -Grad(V) of replicas i is altered. For all intermediate replicas
 (i.e. for 1<i<n) except for the climbing replica the force vector
 becomes:
 
 Fi = -Grad(V) + (Grad(V) dot That) That + Fspringparallel + Fspringperp :pre
 
 That is the unit "tangent" vector for replica i and is a function of
 Ri, Ri-1, Ri+1, and the potential energy of the 3 replicas; it points
 roughly in the direction of (Ri+i - Ri-1) (see the
 "(Henkelman1)"_#Henkelman1 paper for details).  Ri are the atomic
 coordinates of replica i; Ri-1 and Ri+1 are the coordinates of its
 neighbor replicas.  The term (Grad(V) dot That) is used to remove the
 component of the gradient parallel to the path which would tend to
 distribute the replica unevenly along the path.  Fspringparallel is an
 artificial spring force which is applied only in the tangent direction
 and which maintains the replicas equally spaced (see below for more
 information).  Fspringperp is an optinal artificial spring which is
 applied only perpendicular to the tangent and which prevent the paths
 from forming too acute kinks (see below for more information).
 
 
 In the second stage of the NEB, the interatomic force Fi for the
 climbing replica (which is the replica of highest energy) becomes:
 
 Fi = -Grad(V) + 2 (Grad(V) dot That) That :pre
 
 By default, the force acting on the first and last replicas is not
 altered so that during the NEB relaxation, these ending replicas relax
 toward local minima. However it is possible to use the key word
 {freeend} to allow either the initial or the final replica to relax
 toward a MEP while constraining its energy.  The interatomic force Fi
 for the free end image becomes :
 
 Fi = -Grad(V)+ (Grad(V) dot That + E-ETarget) That,  {when} Grad(V) dot That < 0
 Fi = -Grad(V)+ (Grad(V) dot That + ETarget- E) That, {when} Grad(V) dot That > 0
 :pre
 
 where E is the energy of the free end replica and ETarget is the
 target energy.
 
 When the value {ini} ({final}) is used after the keyword {freeend},
 the first (last) replica is considered as a free end. The target
 energy is set to the energy of the replica at starting of the NEB
 calculation. When the value {finaleini} or {final2eini} is used the
 last image is considered as a free end and the target energy is equal
 to the energy of the first replica (which can evolve during the NEB
 relaxation).  With the value {finaleini}, when the initial path is too
 far from the MEP, an intermediate repilica might relax "faster" and
 get a lower energy than the last replica. The benefit of the free end
 is then lost since this intermediate replica will relax toward a local
 minima. This behavior can be prevented by using the value {final2eini}
 which remove entirely the contribution of the gradient for all
 intermediate replica which have a lower energy than the initial one
 thus preventing these replicae to over-relax.  After converging a NEB
 with the {final2eini} value it is recommended to check that all
 intermediate replica have a larger energy than the initial
 replica. Finally note that if the last replica converges toward a
 local minimum with a larger energy than the energy of the first
 replica, a free end neb calculation with the value {finaleini} or
 {final2eini} cannot reach the convergence criteria.
 
 :line
 
-The keywords {idealpos} and {neigh} allow to specify how to parallel
-spring force is computed.  If the keyword {idealpos} is used or by
-default, the spring force is computed as suggested in "(E)"_#E :
+The keyword {nudg_style} allow to specify how to parallel
+nudging force is computed. With a value of idealpos, the spring 
+force is computed as suggested in "(E)"_#E :
    
-Fspringparallel=-{Kspring}* (RD-RDideal)/(2 meanDist) :pre
+Fnudgparallel=-{Kspring}* (RD-RDideal)/(2 meanDist) :pre
 
 where RD is the "reaction coordinate" see "neb"_neb.html section, and
 RDideal is the ideal RD for which all the images are equally spaced
 (i.e. RDideal = (i-1)*meanDist when the climbing image is off, where i
 is the replica number). The meanDist is the average distance between
 replicas.
 
-If the keyword {neigh} is used, the parallel spring force is computed
-as in "(Henkelman1)"_#Henkelman1 by connecting each intermediate
-replica with the previous and the next image:
+When {nudg_style} has a value of neigh (or by default), the parallel 
+nudging force is computed as in "(Henkelman1)"_#Henkelman1 by 
+connecting each intermediate replica with the previous and the next 
+image:
 
-Fspringparallel= {Kspring}* (|Ri+1 - Ri| - |Ri - Ri-1|) :pre
+Fnudgparallel= {Kspring}* (|Ri+1 - Ri| - |Ri - Ri-1|) :pre
 
-The parallel spring force associated with the key word idealpos should
+The parallel nudging force associated with the key word idealpos should
 usually be more efficient at keeping the images equally spaced.
 
 :line
 
 The keyword {perp} allows to add a spring force perpendicular to the
 path in order to prevent the path from becoming too kinky. It can
 improve significantly the convergence of the NEB when the resolution
 is poor (i.e. when too few images are used) (see "(Maras)"_#Maras1).
 The perpendicular spring force is given by
 
 Fspringperp = {Kspringperp} * f(Ri-1,Ri,Ri+1) (Ri+1 + Ri-1 - 2 Ri) :pre
 
 f(Ri-1 Ri R+1) is a smooth scalar function of the angle Ri-1 Ri
 Ri+1. It is equal to 0 when the path is straight and is equal to 1
 when the angle Ri-1 Ri Ri+1 is accute. f(Ri-1 Ri R+1) is defined in
 "(Jonsson)"_#Jonsson
 
 :line
 
 [Restart, fix_modify, output, run start/stop, minimize info:]
 
 No information about this fix is written to "binary restart
 files"_restart.html.  None of the "fix_modify"_fix_modify.html options
 are relevant to this fix.  No global or per-atom quantities are stored
 by this fix for access by various "output
 commands"_Section_howto.html#howto_15.  No parameter of this fix can
 be used with the {start/stop} keywords of the "run"_run.html command.
 
 The forces due to this fix are imposed during an energy minimization,
 as invoked by the "minimize"_minimize.html command via the
 "neb"_neb.html command.
 
 [Restrictions:]
 
 This command can only be used if LAMMPS was built with the REPLICA
 package.  See the "Making LAMMPS"_Section_start.html#start_3 section
 for more info on packages.
 
 [Related commands:]
 
 "neb"_neb.html
 
 [Default:]
 
-none
+The option defaults are nudg_style = neigh, perp = none, freeend = none and freend_kspring = 1.
+
+:line
 
 :link(Henkelman1)
 [(Henkelman1)] Henkelman and Jonsson, J Chem Phys, 113, 9978-9985 (2000).
 
 :link(Henkelman2)
 [(Henkelman2)] Henkelman, Uberuaga, Jonsson, J Chem Phys, 113,
 9901-9904 (2000).
 
 :link(E)
 [(E)] E, Ren, Vanden-Eijnden, Phys Rev B, 66, 052301 (2002)
 
 :link(Jonsson)
 [(Jonsson)] Jonsson, Mills and Jacobsen, in Classical and Quantum
 Dynamics in Condensed Phase Simulations, edited by Berne, Ciccotti, and Coker
 World Scientific, Singapore, 1998, p. 385
 
 :link(Maras1)
 [(Maras)] Maras, Trushin, Stukowski, Ala-Nissila, Jonsson,
 Comp Phys Comm, 205, 13-21 (2016)
diff --git a/examples/neb/in.neb.hop1 b/examples/neb/in.neb.hop1
index 495b2faa9..b874d1ba3 100644
--- a/examples/neb/in.neb.hop1
+++ b/examples/neb/in.neb.hop1
@@ -1,66 +1,66 @@
 # 2d NEB surface simulation, hop from surface to become adatom
 
 dimension	2
 boundary	p s p
 
 atom_style	atomic
 neighbor	0.3 bin
 neigh_modify	delay 5
 atom_modify	map array sort 0 0.0
 
 variable	u uloop 20
 
 # create geometry with flat surface
 
 lattice		hex 0.9
 region		box block 0 20 0 10 -0.25 0.25
 
 #create_box	3 box
 #create_atoms	1 box
 #mass		* 1.0
 #write_data      initial.hop1
 
 read_data        initial.hop1
 
 # LJ potentials
 
 pair_style	lj/cut 2.5
 pair_coeff	* * 1.0 1.0 2.5
 pair_modify	shift yes
 
 # initial minimization to relax surface
 
 minimize	1.0e-6 1.0e-4 1000 10000
 reset_timestep	0
 
 # define groups
 
 region	        1 block INF INF INF 1.25 INF INF
 group		lower region 1
 group		mobile subtract all lower
 set		group lower type 2
 
 timestep	0.05
 
 # group of NEB atoms - either block or single atom ID 412
 
 region		surround block 10 18 17 20 0 0 units box
 group		nebatoms region surround
 #group		nebatoms id 412
 set		group nebatoms type 3
 group		nonneb subtract all nebatoms
 
 fix		1 lower setforce 0.0 0.0 0.0
-fix		2 nebatoms neb 1.0 neigh
+fix		2 nebatoms neb 1.0 nudg_style idealpos
 fix		3 all enforce2d
 
 thermo		100
 
 #dump		1 nebatoms atom 10 dump.neb.$u
 #dump		2 nonneb atom 10 dump.nonneb.$u
 
 # run NEB for 2000 steps or to force tolerance
 
 min_style	quickmin
 
 neb		0.0 0.1 1000 1000 100 final final.hop1
diff --git a/examples/neb/in.neb.hop1freeend b/examples/neb/in.neb.hop1freeend
index fc0677f30..fa90e9a98 100644
--- a/examples/neb/in.neb.hop1freeend
+++ b/examples/neb/in.neb.hop1freeend
@@ -1,56 +1,56 @@
 # 2d NEB surface simulation, hop from surface to become adatom
 
 dimension	2
 boundary	p s p
 
 atom_style	atomic
 neighbor	0.3 bin
 neigh_modify	delay 5
 atom_modify	map array sort 0 0.0
 
 variable	u uloop 20
 
 # create geometry with flat surface
 
 lattice		hex 0.9
 region		box block 0 20 0 10 -0.25 0.25
 
 read_data        initial.hop1freeend
 
 # LJ potentials
 
 pair_style	lj/cut 2.5
 pair_coeff	* * 1.0 1.0 2.5
 pair_modify	shift yes
 
 # define groups
 
 region	        1 block INF INF INF 1.25 INF INF
 group		lower region 1
 group		mobile subtract all lower
 set		group lower type 2
 
 timestep	0.05
 
 # group of NEB atoms - either block or single atom ID 412
 
 region		surround block 10 18 17 20 0 0 units box
 group		nebatoms region surround
 #group		nebatoms id 412
 set		group nebatoms type 3
 group		nonneb subtract all nebatoms
 
 fix		1 lower setforce 0.0 0.0 0.0
-fix		2 nebatoms neb 1.0 freeend ini
+fix		2 nebatoms neb 1.0 nudg_style idealpos freeend ini
 fix		3 all enforce2d
 
 thermo		100
 
 #dump		1 nebatoms atom 10 dump.neb.$u
 #dump		2 nonneb atom 10 dump.nonneb.$u
 
 # run NEB for 2000 steps or to force tolerance
 
 min_style	quickmin
 
 neb		0.0 0.1 1000 1000 100 final final.hop1
diff --git a/examples/neb/in.neb.hop2 b/examples/neb/in.neb.hop2
index 9a8986f45..242de759f 100644
--- a/examples/neb/in.neb.hop2
+++ b/examples/neb/in.neb.hop2
@@ -1,68 +1,68 @@
 # 2d NEB surface simulation, hop of adatom on surface
 
 dimension	2
 boundary	p s p
 
 atom_style	atomic
 neighbor	0.3 bin
 neigh_modify	delay 5
 atom_modify	map array sort 0 0.0
 
 variable	u uloop 20
 
 # create geometry with adatom
 
 lattice		hex 0.9
 region		box block 0 20 0 11 -0.25 0.25
 region		box1 block 0 20 0 10 -0.25 0.25
 
 #create_box	3 box
 #create_atoms	1 region box1
 #create_atoms	1 single 11.5 10.5 0
 #mass		* 1.0
 #write_data      initial.hop2
 
 read_data        initial.hop2
 
 # LJ potentials
 
 pair_style	lj/cut 2.5
 pair_coeff	* * 1.0 1.0 2.5
 pair_modify	shift yes
 
 # initial minimization to relax surface
 
 minimize	1.0e-6 1.0e-4 1000 10000
 reset_timestep	0
 
 # define groups
 
 region	        1 block INF INF INF 1.25 INF INF
 group		lower region 1
 group		mobile subtract all lower
 set		group lower type 2
 
 timestep	0.05
 
 # group of NEB atoms - either block or single atom ID 421
 
 region		surround block 10 18 17 21 0 0 units box
 group		nebatoms region surround
 #group		nebatoms id 421
 set		group nebatoms type 3
 group		nonneb subtract all nebatoms
 
 fix		1 lower setforce 0.0 0.0 0.0
-fix		2 nebatoms neb 1.0 neigh
+fix		2 nebatoms neb 1.0 
 fix		3 all enforce2d
 
 thermo		100
 
 #dump		1 nebatoms atom 10 dump.neb.$u
 #dump		2 nonneb atom 10 dump.nonneb.$u
 
 # run NEB for 2000 steps or to force tolerance
 
 min_style	fire
 
 neb		0.0 0.01 1000 1000 100 final final.hop2
diff --git a/examples/neb/in.neb.sivac b/examples/neb/in.neb.sivac
index 2fdf46f9d..941d063b9 100644
--- a/examples/neb/in.neb.sivac
+++ b/examples/neb/in.neb.sivac
@@ -1,78 +1,78 @@
 # NEB simulation of vacancy hopping in silicon crystal
 
 units           metal
 
 atom_style      atomic
 atom_modify     map array
 boundary        p p p
 atom_modify	sort 0 0.0
 
 # coordination number cutoff
 
 variable r equal 2.835
 
 # diamond unit cell
 
 variable a equal 5.431
 lattice         custom $a               &
                 a1 1.0 0.0 0.0          &
                 a2 0.0 1.0 0.0          &
                 a3 0.0 0.0 1.0          &
                 basis 0.0 0.0 0.0       &
                 basis 0.0 0.5 0.5       &
                 basis 0.5 0.0 0.5       &
                 basis 0.5 0.5 0.0       &
                 basis 0.25 0.25 0.25    &
                 basis 0.25 0.75 0.75    &
                 basis 0.75 0.25 0.75    &
                 basis 0.75 0.75 0.25
 
 region          myreg block     0 4 &
                                 0 4 &
                                 0 4
 
 #create_box      1 myreg
 #create_atoms    1 region myreg
 #mass            1       28.06
 #write_data      initial.sivac
 
 read_data       initial.sivac
 
 # make a vacancy
 
 group Si type 1
 
 group del id 300
 delete_atoms group del compress no
 group vacneigh id 174 175 301 304 306 331 337
  
 # choose potential
 
 pair_style      sw
 pair_coeff * * Si.sw Si
 
 # set up neb run
 
 variable	u uloop 20
 
 # only output atoms near vacancy
 
 #dump events vacneigh custom 1000 dump.neb.sivac.$u id type x y z
 
 # initial minimization to relax vacancy
 
 displace_atoms all random 0.1 0.1 0.1 123456
 minimize	1.0e-6 1.0e-4 1000 10000
 
 reset_timestep	0
 
-fix		1 all neb 1.0 neigh
+fix		1 all neb 1.0 
 
 thermo		100
 
 # run NEB for 2000 steps or to force tolerance
 
 timestep        0.01
 min_style	quickmin
 
 neb		0.0 0.01 100 100 10 final final.sivac
diff --git a/src/REPLICA/fix_neb.cpp b/src/REPLICA/fix_neb.cpp
index f66167f2a..b17315ca0 100644
--- a/src/REPLICA/fix_neb.cpp
+++ b/src/REPLICA/fix_neb.cpp
@@ -1,864 +1,871 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #include <mpi.h>
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include "fix_neb.h"
 #include "universe.h"
 #include "update.h"
 #include "atom.h"
 #include "domain.h"
 #include "comm.h"
 #include "modify.h"
 #include "compute.h"
 #include "group.h"
 #include "memory.h"
 #include "error.h"
 #include "force.h"
 #include "math_const.h"
 
 using namespace LAMMPS_NS;
 using namespace FixConst;
 using namespace MathConst;
 
 enum{SINGLE_PROC_DIRECT,SINGLE_PROC_MAP,MULTI_PROC};
 /* ---------------------------------------------------------------------- */
 
 FixNEB::FixNEB(LAMMPS *lmp, int narg, char **arg) :
   Fix(lmp, narg, arg),
   id_pe(NULL), pe(NULL), nlenall(NULL), xprev(NULL), xnext(NULL),
   fnext(NULL), springF(NULL), tangent(NULL), xsend(NULL), xrecv(NULL),
   fsend(NULL), frecv(NULL), tagsend(NULL), tagrecv(NULL),
   xsendall(NULL), xrecvall(NULL), fsendall(NULL), frecvall(NULL),
   tagsendall(NULL), tagrecvall(NULL), counts(NULL),
   displacements(NULL)
 {
 
-  NEBLongRange=true;
-  StandardNEB=PerpSpring=FreeEndIni=FreeEndFinal=false;
+  NEBLongRange=false;
+  StandardNEB=true;
+  PerpSpring=FreeEndIni=FreeEndFinal=false;
   FreeEndFinalWithRespToEIni=FinalAndInterWithRespToEIni=false;
 
   kspringPerp=0.0;
-
+  kspring2=1.0;
   if (narg < 4)
     error->all(FLERR,"Illegal fix neb command, argument missing");
 
   kspring = force->numeric(FLERR,arg[3]);
   if (kspring <= 0.0)
     error->all(FLERR,"Illegal fix neb command."
                " The spring force was not provided properly");
 
   int iarg =4;
   while (iarg < narg) {
-    if (strcmp (arg[iarg],"idealpos")==0) {
-      NEBLongRange = true;
-      iarg+=1;
-    } else if (strcmp (arg[iarg],"neigh")==0) {
-      NEBLongRange = false;
-      StandardNEB = true;
-      iarg+=1;
-    } else if (strcmp (arg[iarg],"perp")==0) {
+    if (strcmp (arg[iarg],"nudg_style")==0) {
+          if (strcmp (arg[iarg+1],"idealpos")==0) {
+	    NEBLongRange = true;
+	    iarg+=2;}
+	  else if (strcmp (arg[iarg+1],"neigh")==0) {
+	    NEBLongRange = false;
+	    StandardNEB = true;
+	    iarg+=2;}
+	  else error->all(FLERR,"Illegal fix neb command. Unknown keyword");}
+    else if (strcmp (arg[iarg],"perp")==0) {
       PerpSpring=true;
       kspringPerp = force->numeric(FLERR,arg[iarg+1]);
       if (kspringPerp < 0.0)
         error->all(FLERR,"Illegal fix neb command. "
                    "The perpendicular spring force was not provided properly");
-      iarg+=2;
-    } else if (strcmp (arg[iarg],"freeend")==0) {
+      iarg+=2;} 
+    else if (strcmp (arg[iarg],"freeend")==0) {
       if (strcmp (arg[iarg+1],"ini")==0)
         FreeEndIni=true;
       else if (strcmp (arg[iarg+1],"final")==0)
         FreeEndFinal=true;
       else if (strcmp (arg[iarg+1],"finaleini")==0)
         FreeEndFinalWithRespToEIni=true;
       else if (strcmp (arg[iarg+1],"final2eini")==0) {
         FinalAndInterWithRespToEIni=true;
         FreeEndFinalWithRespToEIni=true;}
-      iarg+=2;
-    } else error->all(FLERR,"Illegal fix neb command. Unknown keyword");
+      else if (strcmp (arg[iarg+1],"none")!=0) error->all(FLERR,"Illegal fix neb command. Unknown keyword");
+      iarg+=2;} 
+    else if (strcmp (arg[iarg],"freeend_kspring")==0) {
+      kspring2=force->numeric(FLERR,arg[iarg+1]);
+      iarg+=2; }
+    else error->all(FLERR,"Illegal fix neb command. Unknown keyword");
   }
 
   // nreplica = number of partitions
   // ireplica = which world I am in universe
   // nprocs_universe = # of procs in all replicase
   // procprev,procnext = root proc in adjacent replicas
 
   me = comm->me;
   nprocs = comm->nprocs;
 
   nprocs_universe = universe->nprocs;
   nreplica = universe->nworlds;
   ireplica = universe->iworld;
 
   if (ireplica > 0) procprev = universe->root_proc[ireplica-1];
   else procprev = -1;
   if (ireplica < nreplica-1) procnext = universe->root_proc[ireplica+1];
   else procnext = -1;
 
   uworld = universe->uworld;
   int *iroots = new int[nreplica];
   MPI_Group uworldgroup,rootgroup;
   if (NEBLongRange) {
     for (int i=0; i<nreplica; i++)
       iroots[i]=universe->root_proc[i];
     MPI_Comm_group(uworld, &uworldgroup);
     MPI_Group_incl(uworldgroup, nreplica, iroots, &rootgroup);
     MPI_Comm_create(uworld, rootgroup, &rootworld);
   }
   delete[] iroots;
 
   // create a new compute pe style
   // id = fix-ID + pe, compute group = all
 
   int n = strlen(id) + 4;
   id_pe = new char[n];
   strcpy(id_pe,id);
   strcat(id_pe,"_pe");
 
   char **newarg = new char*[3];
   newarg[0] = id_pe;
   newarg[1] = (char *) "all";
   newarg[2] = (char *) "pe";
   modify->add_compute(3,newarg);
   delete [] newarg;
 
   // initialize local storage
 
   maxlocal = -1;
   ntotal = -1;
 }
 
 /* ---------------------------------------------------------------------- */
 
 FixNEB::~FixNEB()
 {
   modify->delete_compute(id_pe);
   delete [] id_pe;
 
   memory->destroy(xprev);
   memory->destroy(xnext);
   memory->destroy(tangent);
   memory->destroy(fnext);
   memory->destroy(springF);
   memory->destroy(xsend);
   memory->destroy(xrecv);
   memory->destroy(fsend);
   memory->destroy(frecv);
   memory->destroy(tagsend);
   memory->destroy(tagrecv);
 
   memory->destroy(xsendall);
   memory->destroy(xrecvall);
   memory->destroy(fsendall);
   memory->destroy(frecvall);
   memory->destroy(tagsendall);
   memory->destroy(tagrecvall);
 
   memory->destroy(counts);
   memory->destroy(displacements);
 
   if (NEBLongRange) {
     if (rootworld != MPI_COMM_NULL) MPI_Comm_free(&rootworld);
     memory->destroy(nlenall);
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 int FixNEB::setmask()
 {
   int mask = 0;
   mask |= MIN_POST_FORCE;
   return mask;
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixNEB::init()
 {
   int icompute = modify->find_compute(id_pe);
   if (icompute < 0)
     error->all(FLERR,"Potential energy ID for fix neb does not exist");
   pe = modify->compute[icompute];
 
   // turn off climbing mode, NEB command turns it on after init()
 
   rclimber = -1;
 
   // nebatoms = # of atoms in fix group = atoms with inter-replica forces
 
   bigint count = group->count(igroup);
   if (count > MAXSMALLINT) error->all(FLERR,"Too many active NEB atoms");
   nebatoms = count;
 
   // comm mode for inter-replica exchange of coords
 
   if (nreplica == nprocs_universe &&
       nebatoms == atom->natoms && atom->sortfreq == 0)
     cmode = SINGLE_PROC_DIRECT;
   else if (nreplica == nprocs_universe) cmode = SINGLE_PROC_MAP;
   else cmode = MULTI_PROC;
 
   // ntotal = total # of atoms in system, NEB atoms or not
 
   if (atom->natoms > MAXSMALLINT) error->all(FLERR,"Too many atoms for NEB");
   ntotal = atom->natoms;
 
   if (atom->nmax > maxlocal) reallocate();
 
   if (MULTI_PROC && counts == NULL) {
     memory->create(xsendall,ntotal,3,"neb:xsendall");
     memory->create(xrecvall,ntotal,3,"neb:xrecvall");
     memory->create(fsendall,ntotal,3,"neb:fsendall");
     memory->create(frecvall,ntotal,3,"neb:frecvall");
     memory->create(tagsendall,ntotal,"neb:tagsendall");
     memory->create(tagrecvall,ntotal,"neb:tagrecvall");
     memory->create(counts,nprocs,"neb:counts");
     memory->create(displacements,nprocs,"neb:displacements");
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixNEB::min_setup(int vflag)
 {
   min_post_force(vflag);
 
   // trigger potential energy computation on next timestep
 
   pe->addstep(update->ntimestep+1);
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FixNEB::min_post_force(int vflag)
 {
   double vprev,vnext;
   double delxp,delyp,delzp,delxn,delyn,delzn;
   double vIni=0.0;
 
   vprev=vnext=veng=pe->compute_scalar();
 
   if (ireplica < nreplica-1 && me ==0)
     MPI_Send(&veng,1,MPI_DOUBLE,procnext,0,uworld);
   if (ireplica > 0 && me ==0)
     MPI_Recv(&vprev,1,MPI_DOUBLE,procprev,0,uworld,MPI_STATUS_IGNORE);
 
   if (ireplica > 0 && me == 0)
     MPI_Send(&veng,1,MPI_DOUBLE,procprev,0,uworld);
   if (ireplica < nreplica-1 && me == 0)
     MPI_Recv(&vnext,1,MPI_DOUBLE,procnext,0,uworld,MPI_STATUS_IGNORE);
 
   if (cmode == MULTI_PROC) {
     MPI_Bcast(&vprev,1,MPI_DOUBLE,0,world);
     MPI_Bcast(&vnext,1,MPI_DOUBLE,0,world);
   }
 
   if (FreeEndFinal && (update->ntimestep == 0)) EFinalIni = veng;
 
   if (ireplica == 0) vIni=veng;
 
   if (FreeEndFinalWithRespToEIni) {
     if (me == 0) {
       int procFirst;
       procFirst=universe->root_proc[0];
       MPI_Bcast(&vIni,1,MPI_DOUBLE,procFirst,uworld);
     }
     if (cmode == MULTI_PROC) {
       MPI_Bcast(&vIni,1,MPI_DOUBLE,0,world);
     }
   }
   if (FreeEndIni && ireplica == 0) {
     if (me == 0 )
       if (update->ntimestep == 0) {
         EIniIni = veng;
         if (cmode == MULTI_PROC)
           MPI_Bcast(&EIniIni,1,MPI_DOUBLE,0,world);
       }
   }
 
   // communicate atoms to/from adjacent replicas to fill xprev,xnext
   inter_replica_comm();
 
   // trigger potential energy computation on next timestep
 
   pe->addstep(update->ntimestep+1);
 
   double **x = atom->x;
   int *mask = atom->mask;
   double dot = 0.0;
   double prefactor = 0.0;
 
   double **f = atom->f;
   int nlocal = atom->nlocal;
 
   //calculating separation between images
 
   plen = 0.0;
   nlen = 0.0;
   double tlen = 0.0;
   double gradnextlen = 0.0;
 
   dotgrad = gradlen = dotpath = dottangrad = 0.0;
 
   if (ireplica == nreplica-1) {
 
     for (int i = 0; i < nlocal; i++)
       if (mask[i] & groupbit) {
         delxp = x[i][0] - xprev[i][0];
         delyp = x[i][1] - xprev[i][1];
         delzp = x[i][2] - xprev[i][2];
         domain->minimum_image(delxp,delyp,delzp);
         plen += delxp*delxp + delyp*delyp + delzp*delzp;
         dottangrad += delxp* f[i][0]+ delyp*f[i][1]+delzp*f[i][2];
         gradlen += f[i][0]*f[i][0] + f[i][1]*f[i][1] + f[i][2]*f[i][2];
         if (FreeEndFinal||FreeEndFinalWithRespToEIni) {
           tangent[i][0]=delxp;
           tangent[i][1]=delyp;
           tangent[i][2]=delzp;
           tlen += tangent[i][0]*tangent[i][0]
             + tangent[i][1]*tangent[i][1] + tangent[i][2]*tangent[i][2];
           dot += f[i][0]*tangent[i][0]
             + f[i][1]*tangent[i][1] + f[i][2]*tangent[i][2];
         }
       }
 
   } else if (ireplica == 0) {
     for (int i = 0; i < nlocal; i++)
       if (mask[i] & groupbit) {
         delxn = xnext[i][0] - x[i][0];
         delyn = xnext[i][1] - x[i][1];
         delzn = xnext[i][2] - x[i][2];
         domain->minimum_image(delxn,delyn,delzn);
         nlen += delxn*delxn + delyn*delyn + delzn*delzn;
         gradnextlen += fnext[i][0]*fnext[i][0]
           + fnext[i][1]*fnext[i][1] +fnext[i][2] * fnext[i][2];
         dotgrad += f[i][0]*fnext[i][0]
           + f[i][1]*fnext[i][1] + f[i][2]*fnext[i][2];
         dottangrad += delxn*f[i][0]+ delyn*f[i][1] + delzn*f[i][2];
         gradlen += f[i][0]*f[i][0] + f[i][1]*f[i][1] + f[i][2]*f[i][2];
         if (FreeEndIni) {
           tangent[i][0]=delxn;
           tangent[i][1]=delyn;
           tangent[i][2]=delzn;
           tlen += tangent[i][0]*tangent[i][0]
             + tangent[i][1]*tangent[i][1] + tangent[i][2]*tangent[i][2];
           dot += f[i][0]*tangent[i][0]
             + f[i][1]*tangent[i][1] + f[i][2]*tangent[i][2];
         }
       }
   } else {
 
     // not the first or last replica
 
     double vmax = MAX(fabs(vnext-veng),fabs(vprev-veng));
     double vmin = MIN(fabs(vnext-veng),fabs(vprev-veng));
 
 
     for (int i = 0; i < nlocal; i++)
       if (mask[i] & groupbit) {
         delxp = x[i][0] - xprev[i][0];
         delyp = x[i][1] - xprev[i][1];
         delzp = x[i][2] - xprev[i][2];
         domain->minimum_image(delxp,delyp,delzp);
         plen += delxp*delxp + delyp*delyp + delzp*delzp;
 
         delxn = xnext[i][0] - x[i][0];
         delyn = xnext[i][1] - x[i][1];
         delzn = xnext[i][2] - x[i][2];
         domain->minimum_image(delxn,delyn,delzn);
 
         if (vnext > veng && veng > vprev) {
           tangent[i][0]=delxn;
           tangent[i][1]=delyn;
           tangent[i][2]=delzn;
         } else if (vnext < veng && veng < vprev) {
           tangent[i][0]=delxp;
           tangent[i][1]=delyp;
           tangent[i][2]=delzp;
         } else {
           if (vnext > vprev) {
             tangent[i][0] = vmax*delxn + vmin*delxp;
             tangent[i][1] = vmax*delyn + vmin*delyp;
             tangent[i][2] = vmax*delzn + vmin*delzp;
           } else {
             tangent[i][0] = vmin*delxn + vmax*delxp;
             tangent[i][1] = vmin*delyn + vmax*delyp;
             tangent[i][2] = vmin*delzn + vmax*delzp;
           }
         }
 
         nlen += delxn*delxn + delyn*delyn + delzn*delzn;
         tlen += tangent[i][0]*tangent[i][0]
           + tangent[i][1]*tangent[i][1] + tangent[i][2]*tangent[i][2];
         gradlen += f[i][0]*f[i][0] + f[i][1]*f[i][1] + f[i][2]*f[i][2];
         dotpath += delxp*delxn + delyp*delyn + delzp*delzn;
         dottangrad += tangent[i][0]* f[i][0]
           + tangent[i][1]*f[i][1] + tangent[i][2]*f[i][2];
         gradnextlen += fnext[i][0]*fnext[i][0]
           + fnext[i][1]*fnext[i][1] +fnext[i][2] * fnext[i][2];
         dotgrad += f[i][0]*fnext[i][0]
           + f[i][1]*fnext[i][1] + f[i][2]*fnext[i][2];
 
         springF[i][0]=kspringPerp*(delxn-delxp);
         springF[i][1]=kspringPerp*(delyn-delyp);
         springF[i][2]=kspringPerp*(delzn-delzp);
       }
   }
 
 #define BUFSIZE 8
   double bufin[BUFSIZE], bufout[BUFSIZE];
   bufin[0] = nlen;
   bufin[1] = plen;
   bufin[2] = tlen;
   bufin[3] = gradlen;
   bufin[4] = gradnextlen;
   bufin[5] = dotpath;
   bufin[6] = dottangrad;
   bufin[7] = dotgrad;
   MPI_Allreduce(bufin,bufout,BUFSIZE,MPI_DOUBLE,MPI_SUM,world);
   nlen = sqrt(bufout[0]);
   plen = sqrt(bufout[1]);
   tlen = sqrt(bufout[2]);
   gradlen = sqrt(bufout[3]);
   gradnextlen = sqrt(bufout[4]);
   dotpath = bufout[5];
   dottangrad = bufout[6];
   dotgrad = bufout[7];
 
   // normalize tangent vector
 
   if (tlen > 0.0) {
     double tleninv = 1.0/tlen;
     for (int i = 0; i < nlocal; i++)
       if (mask[i] & groupbit) {
         tangent[i][0] *= tleninv;
         tangent[i][1] *= tleninv;
         tangent[i][2] *= tleninv;
       }
   }
 
   // first or last replica has no change to forces, just return
 
   if(ireplica>0 && ireplica<nreplica-1)
     dottangrad = dottangrad/(tlen*gradlen);
   if (ireplica == 0)
     dottangrad = dottangrad/(nlen*gradlen);
   if (ireplica == nreplica-1)
     dottangrad = dottangrad/(plen*gradlen);
   if (ireplica < nreplica-1)
     dotgrad = dotgrad /(gradlen*gradnextlen);
 
 
   if (FreeEndIni && ireplica == 0) {
     if (tlen > 0.0) {
       double dotall;
       MPI_Allreduce(&dot,&dotall,1,MPI_DOUBLE,MPI_SUM,world);
       dot=dotall/tlen;
 
-      if (dot<0) prefactor = -dot - (veng-EIniIni);
-      else prefactor = -dot + (veng-EIniIni);
+      if (dot<0) prefactor = -dot - kspring2*(veng-EIniIni);
+      else prefactor = -dot + kspring2*(veng-EIniIni);
 
       for (int i = 0; i < nlocal; i++)
         if (mask[i] & groupbit) {
           f[i][0] += prefactor *tangent[i][0];
           f[i][1] += prefactor *tangent[i][1];
           f[i][2] += prefactor *tangent[i][2];
         }
     }
   }
 
   if (FreeEndFinal && ireplica == nreplica -1) {
     if (tlen > 0.0) {
       double dotall;
       MPI_Allreduce(&dot,&dotall,1,MPI_DOUBLE,MPI_SUM,world);
       dot=dotall/tlen;
 
-      if (dot<0) prefactor = -dot - (veng-EFinalIni);
-      else prefactor = -dot + (veng-EFinalIni);
+      if (dot<0) prefactor = -dot - kspring2*(veng-EFinalIni);
+      else prefactor = -dot + kspring2*(veng-EFinalIni);
 
       for (int i = 0; i < nlocal; i++)
         if (mask[i] & groupbit) {
           f[i][0] += prefactor *tangent[i][0];
           f[i][1] += prefactor *tangent[i][1];
           f[i][2] += prefactor *tangent[i][2];
         }
     }
   }
 
   if (FreeEndFinalWithRespToEIni&&ireplica == nreplica -1) {
     if (tlen > 0.0) {
       double dotall;
       MPI_Allreduce(&dot,&dotall,1,MPI_DOUBLE,MPI_SUM,world);
       dot=dotall/tlen;
 
-      if (dot<0) prefactor = -dot - (veng-vIni);
-      else prefactor = -dot + (veng-vIni);
+      if (dot<0) prefactor = -dot - kspring2*(veng-vIni);
+      else prefactor = -dot + kspring2*(veng-vIni);
 
       for (int i = 0; i < nlocal; i++)
         if (mask[i] & groupbit) {
           f[i][0] += prefactor *tangent[i][0];
           f[i][1] += prefactor *tangent[i][1];
           f[i][2] += prefactor *tangent[i][2];
         }
     }
   }
 
   double lentot = 0;
   double meanDist,idealPos,lenuntilIm,lenuntilClimber;
   lenuntilClimber=0;
   if (NEBLongRange) {
     if (cmode == SINGLE_PROC_DIRECT || cmode == SINGLE_PROC_MAP) {
       MPI_Allgather(&nlen,1,MPI_DOUBLE,&nlenall[0],1,MPI_DOUBLE,uworld);
     } else {
       if (me == 0)
         MPI_Allgather(&nlen,1,MPI_DOUBLE,&nlenall[0],1,MPI_DOUBLE,rootworld);
       MPI_Bcast(nlenall,nreplica,MPI_DOUBLE,0,world);
     }
 
     lenuntilIm = 0;
     for (int i = 0; i < ireplica; i++)
       lenuntilIm += nlenall[i];
 
     for (int i = 0; i < nreplica; i++)
       lentot += nlenall[i];
 
     meanDist = lentot/(nreplica -1);
 
     if (rclimber>0) {
       for (int i = 0; i < rclimber; i++)
         lenuntilClimber += nlenall[i];
       double meanDistBeforeClimber = lenuntilClimber/rclimber;
       double meanDistAfterClimber =
         (lentot-lenuntilClimber)/(nreplica-rclimber-1);
       if (ireplica<rclimber)
         idealPos = ireplica * meanDistBeforeClimber;
       else
         idealPos = lenuntilClimber+ (ireplica-rclimber)*meanDistAfterClimber;
     } else idealPos = ireplica * meanDist;
   }
 
   if (ireplica == 0 || ireplica == nreplica-1) return ;
 
   double AngularContr;
   dotpath = dotpath/(plen*nlen);
   AngularContr = 0.5 *(1+cos(MY_PI * dotpath));
 
   double dotSpringTangent;
   dotSpringTangent=0;
 
   for (int i = 0; i < nlocal; i++) {
     if (mask[i] & groupbit) {
       dot += f[i][0]*tangent[i][0]
         + f[i][1]*tangent[i][1] + f[i][2]*tangent[i][2];
       dotSpringTangent += springF[i][0]*tangent[i][0]
         +springF[i][1]*tangent[i][1]+springF[i][2]*tangent[i][2];}
   }
 
   double dotSpringTangentall;
   MPI_Allreduce(&dotSpringTangent,&dotSpringTangentall,1,MPI_DOUBLE,MPI_SUM,world);
   dotSpringTangent=dotSpringTangentall;
   double dotall;
   MPI_Allreduce(&dot,&dotall,1,MPI_DOUBLE,MPI_SUM,world);
   dot=dotall;
 
   if (ireplica == rclimber) prefactor = -2.0*dot;
   else {
     if (NEBLongRange) {
       prefactor = -dot - kspring*(lenuntilIm-idealPos)/(2*meanDist);
     } else if (StandardNEB) {
       prefactor = -dot + kspring*(nlen-plen);
     }
 
     if (FinalAndInterWithRespToEIni&& veng<vIni) {
       for (int i = 0; i < nlocal; i++)
         if (mask[i] & groupbit) {
           f[i][0] = 0;
           f[i][1] = 0;
           f[i][2] = 0;
         }
       prefactor =  kspring*(nlen-plen);
       AngularContr=0;
     }
   }
 
   for (int i = 0; i < nlocal; i++)
     if (mask[i] & groupbit) {
       f[i][0] += prefactor*tangent[i][0]
         +AngularContr*(springF[i][0] -dotSpringTangent*tangent[i][0]);
       f[i][1] += prefactor*tangent[i][1]
         + AngularContr*(springF[i][1] - dotSpringTangent*tangent[i][1]);
       f[i][2] += prefactor*tangent[i][2]
         + AngularContr*(springF[i][2] - dotSpringTangent*tangent[i][2]);
     }
 }
 
 /* ----------------------------------------------------------------------
    send/recv NEB atoms to/from adjacent replicas
    received atoms matching my local atoms are stored in xprev,xnext
    replicas 0 and N-1 send but do not receive any atoms
 ------------------------------------------------------------------------- */
 
 
 void FixNEB::inter_replica_comm()
 {
   int i,m;
   MPI_Request request;
   MPI_Request requests[2];
   MPI_Status statuses[2];
 
   // reallocate memory if necessary
 
   if (atom->nmax > maxlocal) reallocate();
 
   double **x = atom->x;
   double **f = atom->f;
   tagint *tag = atom->tag;
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
 
   // -----------------------------------------------------
   // 3 cases: two for single proc per replica
   //          one for multiple procs per replica
   // -----------------------------------------------------
 
   // single proc per replica
   // all atoms are NEB atoms and no atom sorting
   // direct comm of x -> xprev and x -> xnext
 
   if (cmode == SINGLE_PROC_DIRECT) {
     if (ireplica > 0)
       MPI_Irecv(xprev[0],3*nlocal,MPI_DOUBLE,procprev,0,uworld,&request);
     if (ireplica < nreplica-1)
       MPI_Send(x[0],3*nlocal,MPI_DOUBLE,procnext,0,uworld);
     if (ireplica > 0) MPI_Wait(&request,MPI_STATUS_IGNORE);
     if (ireplica < nreplica-1)
       MPI_Irecv(xnext[0],3*nlocal,MPI_DOUBLE,procnext,0,uworld,&request);
     if (ireplica > 0)
       MPI_Send(x[0],3*nlocal,MPI_DOUBLE,procprev,0,uworld);
     if (ireplica < nreplica-1) MPI_Wait(&request,MPI_STATUS_IGNORE);
 
     if (ireplica < nreplica-1)
       MPI_Irecv(fnext[0],3*nlocal,MPI_DOUBLE,procnext,0,uworld,&request);
     if (ireplica > 0)
       MPI_Send(f[0],3*nlocal,MPI_DOUBLE,procprev,0,uworld);
     if (ireplica < nreplica-1) MPI_Wait(&request,MPI_STATUS_IGNORE);
 
     return;
   }
 
   // single proc per replica
   // but only some atoms are NEB atoms or atom sorting is enabled
   // send atom IDs and coords of only NEB atoms to prev/next proc
   // recv procs use atom->map() to match received coords to owned atoms
 
   if (cmode == SINGLE_PROC_MAP) {
     m = 0;
     for (i = 0; i < nlocal; i++)
       if (mask[i] & groupbit) {
         tagsend[m] = tag[i];
         xsend[m][0] = x[i][0];
         xsend[m][1] = x[i][1];
         xsend[m][2] = x[i][2];
         fsend[m][0] = f[i][0];
         fsend[m][1] = f[i][1];
         fsend[m][2] = f[i][2];
         m++;
       }
 
     if (ireplica > 0) {
       MPI_Irecv(xrecv[0],3*nebatoms,MPI_DOUBLE,procprev,0,uworld,&requests[0]);
       MPI_Irecv(tagrecv,nebatoms,MPI_LMP_TAGINT,procprev,0,uworld,&requests[1]);
     }
     if (ireplica < nreplica-1) {
       MPI_Send(xsend[0],3*nebatoms,MPI_DOUBLE,procnext,0,uworld);
       MPI_Send(tagsend,nebatoms,MPI_LMP_TAGINT,procnext,0,uworld);
     }
 
     if (ireplica > 0) {
       MPI_Waitall(2,requests,statuses);
       for (i = 0; i < nebatoms; i++) {
         m = atom->map(tagrecv[i]);
         xprev[m][0] = xrecv[i][0];
         xprev[m][1] = xrecv[i][1];
         xprev[m][2] = xrecv[i][2];
       }
     }
     if (ireplica < nreplica-1) {
       MPI_Irecv(xrecv[0],3*nebatoms,MPI_DOUBLE,procnext,0,uworld,&requests[0]);
       MPI_Irecv(frecv[0],3*nebatoms,MPI_DOUBLE,procnext,0,uworld,&requests[0]);
       MPI_Irecv(tagrecv,nebatoms,MPI_LMP_TAGINT,procnext,0,uworld,&requests[1]);
     }
     if (ireplica > 0) {
       MPI_Send(xsend[0],3*nebatoms,MPI_DOUBLE,procprev,0,uworld);
       MPI_Send(fsend[0],3*nebatoms,MPI_DOUBLE,procprev,0,uworld);
       MPI_Send(tagsend,nebatoms,MPI_LMP_TAGINT,procprev,0,uworld);
     }
 
     if (ireplica < nreplica-1) {
       MPI_Waitall(2,requests,statuses);
       for (i = 0; i < nebatoms; i++) {
         m = atom->map(tagrecv[i]);
         xnext[m][0] = xrecv[i][0];
         xnext[m][1] = xrecv[i][1];
         xnext[m][2] = xrecv[i][2];
         fnext[m][0] = frecv[i][0];
         fnext[m][1] = frecv[i][1];
         fnext[m][2] = frecv[i][2];
       }
     }
 
     return;
   }
 
   // multiple procs per replica
   // MPI_Gather all coords and atom IDs to root proc of each replica
   // send to root of adjacent replicas
   // bcast within each replica
   // each proc extracts info for atoms it owns via atom->map()
 
   m = 0;
   for (i = 0; i < nlocal; i++)
     if (mask[i] & groupbit) {
       tagsend[m] = tag[i];
       xsend[m][0] = x[i][0];
       xsend[m][1] = x[i][1];
       xsend[m][2] = x[i][2];
       fsend[m][0] = f[i][0];
       fsend[m][1] = f[i][1];
       fsend[m][2] = f[i][2];
       m++;
     }
 
   MPI_Gather(&m,1,MPI_INT,counts,1,MPI_INT,0,world);
   displacements[0] = 0;
   for (i = 0; i < nprocs-1; i++)
     displacements[i+1] = displacements[i] + counts[i];
   MPI_Gatherv(tagsend,m,MPI_LMP_TAGINT,
               tagsendall,counts,displacements,MPI_LMP_TAGINT,0,world);
   for (i = 0; i < nprocs; i++) counts[i] *= 3;
   for (i = 0; i < nprocs-1; i++)
     displacements[i+1] = displacements[i] + counts[i];
   if (xsend) {
     MPI_Gatherv(xsend[0],3*m,MPI_DOUBLE,
                 xsendall[0],counts,displacements,MPI_DOUBLE,0,world);
     MPI_Gatherv(fsend[0],3*m,MPI_DOUBLE,
                 fsendall[0],counts,displacements,MPI_DOUBLE,0,world);
   } else {
     MPI_Gatherv(NULL,3*m,MPI_DOUBLE,
                 xsendall[0],counts,displacements,MPI_DOUBLE,0,world);
     MPI_Gatherv(NULL,3*m,MPI_DOUBLE,
                 fsendall[0],counts,displacements,MPI_DOUBLE,0,world);
   }
 
   if (ireplica > 0 && me == 0) {
     MPI_Irecv(xrecvall[0],3*nebatoms,MPI_DOUBLE,procprev,0,uworld,&requests[0]);
     MPI_Irecv(tagrecvall,nebatoms,MPI_LMP_TAGINT,procprev,0,uworld,
               &requests[1]);
   }
   if (ireplica < nreplica-1 && me == 0) {
     MPI_Send(xsendall[0],3*nebatoms,MPI_DOUBLE,procnext,0,uworld);
     MPI_Send(tagsendall,nebatoms,MPI_LMP_TAGINT,procnext,0,uworld);
   }
 
   if (ireplica > 0) {
     if (me == 0) MPI_Waitall(2,requests,statuses);
 
     MPI_Bcast(tagrecvall,nebatoms,MPI_INT,0,world);
     MPI_Bcast(xrecvall[0],3*nebatoms,MPI_DOUBLE,0,world);
 
     for (i = 0; i < nebatoms; i++) {
       m = atom->map(tagrecvall[i]);
       if (m < 0 || m >= nlocal) continue;
       xprev[m][0] = xrecvall[i][0];
       xprev[m][1] = xrecvall[i][1];
       xprev[m][2] = xrecvall[i][2];
     }
   }
 
   if (ireplica < nreplica-1 && me == 0) {
     MPI_Irecv(xrecvall[0],3*nebatoms,MPI_DOUBLE,procnext,0,uworld,&requests[0]);
     MPI_Irecv(frecvall[0],3*nebatoms,MPI_DOUBLE,procnext,0,uworld,&requests[0]);
     MPI_Irecv(tagrecvall,nebatoms,MPI_LMP_TAGINT,procnext,0,uworld,
               &requests[1]);
   }
   if (ireplica > 0 && me == 0) {
     MPI_Send(xsendall[0],3*nebatoms,MPI_DOUBLE,procprev,0,uworld);
     MPI_Send(fsendall[0],3*nebatoms,MPI_DOUBLE,procprev,0,uworld);
     MPI_Send(tagsendall,nebatoms,MPI_LMP_TAGINT,procprev,0,uworld);
   }
 
   if (ireplica < nreplica-1) {
     if (me == 0) MPI_Waitall(2,requests,statuses);
 
     MPI_Bcast(tagrecvall,nebatoms,MPI_INT,0,world);
     MPI_Bcast(xrecvall[0],3*nebatoms,MPI_DOUBLE,0,world);
     MPI_Bcast(frecvall[0],3*nebatoms,MPI_DOUBLE,0,world);
 
     for (i = 0; i < nebatoms; i++) {
       m = atom->map(tagrecvall[i]);
       if (m < 0 || m >= nlocal) continue;
       xnext[m][0] = xrecvall[i][0];
       xnext[m][1] = xrecvall[i][1];
       xnext[m][2] = xrecvall[i][2];
       fnext[m][0] = frecvall[i][0];
       fnext[m][1] = frecvall[i][1];
       fnext[m][2] = frecvall[i][2];
     }
   }
 }
 
 
 /* ----------------------------------------------------------------------
    reallocate xprev,xnext,tangent arrays if necessary
    reallocate communication arrays if necessary
 ------------------------------------------------------------------------- */
 
 void FixNEB::reallocate()
 {
   maxlocal = atom->nmax;
 
   memory->destroy(xprev);
   memory->destroy(xnext);
   memory->destroy(tangent);
   memory->destroy(fnext);
   memory->destroy(springF);
 
   memory->create(xprev,maxlocal,3,"neb:xprev");
   memory->create(xnext,maxlocal,3,"neb:xnext");
   memory->create(tangent,maxlocal,3,"neb:tangent");
   memory->create(fnext,maxlocal,3,"neb:fnext");
   memory->create(springF,maxlocal,3,"neb:springF");
 
   if (cmode != SINGLE_PROC_DIRECT) {
     memory->destroy(xsend);
     memory->destroy(fsend);
     memory->destroy(xrecv);
     memory->destroy(frecv);
     memory->destroy(tagsend);
     memory->destroy(tagrecv);
     memory->create(xsend,maxlocal,3,"neb:xsend");
     memory->create(fsend,maxlocal,3,"neb:fsend");
     memory->create(xrecv,maxlocal,3,"neb:xrecv");
     memory->create(frecv,maxlocal,3,"neb:frecv");
     memory->create(tagsend,maxlocal,"neb:tagsend");
     memory->create(tagrecv,maxlocal,"neb:tagrecv");
   }
 
   if (NEBLongRange) {
     memory->destroy(nlenall);
     memory->create(nlenall,nreplica,"neb:nlenall");
   }
 }
diff --git a/src/REPLICA/fix_neb.h b/src/REPLICA/fix_neb.h
index 1582912da..7e9e6db86 100644
--- a/src/REPLICA/fix_neb.h
+++ b/src/REPLICA/fix_neb.h
@@ -1,96 +1,96 @@
 /* -*- c++ -*- ----------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 #ifdef FIX_CLASS
 
 FixStyle(neb,FixNEB)
 
 #else
 
 #ifndef LMP_FIX_NEB_H
 #define LMP_FIX_NEB_H
 
 #include "fix.h"
 
 namespace LAMMPS_NS {
 
 class FixNEB : public Fix {
  public:
   double veng,plen,nlen,dotpath,dottangrad,gradlen,dotgrad;
   int rclimber;
 
   FixNEB(class LAMMPS *, int, char **);
   ~FixNEB();
   int setmask();
   void init();
   void min_setup(int);
   void min_post_force(int);
 
  private:
   int me,nprocs,nprocs_universe;
-  double kspring,kspringPerp,EIniIni,EFinalIni;
+  double kspring,kspring2,kspringPerp,EIniIni,EFinalIni;
   bool StandardNEB,NEBLongRange,PerpSpring,FreeEndIni,FreeEndFinal;
   bool FreeEndFinalWithRespToEIni,FinalAndInterWithRespToEIni;
   int ireplica,nreplica;
   int procnext,procprev;
   int cmode;
   MPI_Comm uworld;
   MPI_Comm rootworld;
 
 
   char *id_pe;
   class Compute *pe;
 
   int nebatoms;
   int ntotal;                  // total # of atoms, NEB or not
   int maxlocal;                // size of xprev,xnext,tangent arrays
   double *nlenall;
   double **xprev,**xnext,**fnext,**springF;
   double **tangent;
   double **xsend,**xrecv;      // coords to send/recv to/from other replica
   double **fsend,**frecv;      // coords to send/recv to/from other replica
   tagint *tagsend,*tagrecv;    // ditto for atom IDs
 
                                  // info gathered from all procs in my replica
   double **xsendall,**xrecvall;    // coords to send/recv to/from other replica
   double **fsendall,**frecvall;    // force to send/recv to/from other replica
   tagint *tagsendall,*tagrecvall;  // ditto for atom IDs
 
   int *counts,*displacements;   // used for MPI_Gather
 
   void inter_replica_comm();
   void reallocate();
 };
 
 }
 
 #endif
 #endif
 
 /* ERROR/WARNING messages:
 
 E: Illegal ... command
 
 Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.
 
 E: Potential energy ID for fix neb does not exist
 
 Self-explanatory.
 
 E: Atom count changed in fix neb
 
 This is not allowed in a NEB calculation.
 
 */