diff --git a/src/REPLICA/prd.cpp b/src/REPLICA/prd.cpp
index 646851f3a..91f7425fe 100644
--- a/src/REPLICA/prd.cpp
+++ b/src/REPLICA/prd.cpp
@@ -1,955 +1,955 @@
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
    Contributing author: Mike Brown (SNL)
 ------------------------------------------------------------------------- */
 
 // lmptype.h must be first b/c this file uses MAXBIGINT and includes mpi.h
 // due to OpenMPI bug which sets INT64_MAX via its mpi.h
 //   before lmptype.h can set flags to insure it is done correctly
 
 #include "lmptype.h"
 #include <mpi.h>
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include "prd.h"
 #include "universe.h"
 #include "update.h"
 #include "atom.h"
 #include "domain.h"
 #include "region.h"
 #include "comm.h"
 #include "velocity.h"
 #include "integrate.h"
 #include "min.h"
 #include "neighbor.h"
 #include "modify.h"
 #include "compute.h"
 #include "fix.h"
 #include "fix_event_prd.h"
 #include "force.h"
 #include "pair.h"
 #include "random_park.h"
 #include "random_mars.h"
 #include "output.h"
 #include "dump.h"
 #include "finish.h"
 #include "timer.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
 enum{SINGLE_PROC_DIRECT,SINGLE_PROC_MAP,MULTI_PROC};
 
 /* ---------------------------------------------------------------------- */
 
 PRD::PRD(LAMMPS *lmp) : Pointers(lmp) {}
 
 /* ----------------------------------------------------------------------
    perform PRD simulation on one or more replicas
 ------------------------------------------------------------------------- */
 
 void PRD::command(int narg, char **arg)
 {
   int ireplica;
 
   // error checks
 
   if (domain->box_exist == 0)
     error->all(FLERR,"PRD command before simulation box is defined");
   if (universe->nworlds != universe->nprocs &&
       atom->map_style == 0)
     error->all(FLERR,"Cannot use PRD with multi-processor replicas "
                "unless atom map exists");
   if (universe->nworlds == 1 && comm->me == 0)
     error->warning(FLERR,"Running PRD with only one replica");
 
   if (narg < 7) error->universe_all(FLERR,"Illegal prd command");
 
   // read as double so can cast to bigint
 
   int nsteps = force->inumeric(FLERR,arg[0]);
   t_event = force->inumeric(FLERR,arg[1]);
   n_dephase = force->inumeric(FLERR,arg[2]);
   t_dephase = force->inumeric(FLERR,arg[3]);
   t_corr = force->inumeric(FLERR,arg[4]);
 
   char *id_compute = new char[strlen(arg[5])+1];
   strcpy(id_compute,arg[5]);
   int seed = force->inumeric(FLERR,arg[6]);
 
   options(narg-7,&arg[7]);
 
   // total # of timesteps must be multiple of t_event
 
   if (t_event <= 0) error->universe_all(FLERR,"Invalid t_event in prd command");
   if (nsteps % t_event)
     error->universe_all(FLERR,"PRD nsteps must be multiple of t_event");
   if (t_corr % t_event)
     error->universe_all(FLERR,"PRD t_corr must be multiple of t_event");
 
   // local storage
 
   int me_universe = universe->me;
   int nprocs_universe = universe->nprocs;
   int nreplica = universe->nworlds;
   int iworld = universe->iworld;
 
   MPI_Comm_rank(world,&me);
   MPI_Comm_size(world,&nprocs);
 
   // comm_replica = communicator between all proc 0s across replicas
 
   int color = me;
   MPI_Comm_split(universe->uworld,color,0,&comm_replica);
 
   // comm mode for inter-replica exchange of coords
 
   if (nreplica == nprocs_universe && atom->sortfreq == 0) 
     cmode = SINGLE_PROC_DIRECT;
   else if (nreplica == nprocs_universe) cmode = SINGLE_PROC_MAP;
   else cmode = MULTI_PROC;
 
   // workspace for inter-replica communication
 
   natoms = atom->natoms;
 
   tagall = NULL;
   xall = NULL;
   imageall = NULL;
 
   if (cmode != SINGLE_PROC_DIRECT) {
     memory->create(tagall,natoms,"prd:tagall");
     memory->create(xall,natoms,3,"prd:xall");
     memory->create(imageall,natoms,"prd:imageall");
   }
 
   counts = NULL;
   displacements = NULL;
 
   if (cmode == MULTI_PROC) {
     memory->create(counts,nprocs,"prd:counts");
     memory->create(displacements,nprocs,"prd:displacements");
   }
 
   // random_select = same RNG for each replica, for multiple event selection
   // random_clock = same RNG for each replica, for clock updates
   // random_dephase = unique RNG for each replica, for dephasing
 
   random_select = new RanPark(lmp,seed);
   random_clock = new RanPark(lmp,seed+1000);
   random_dephase = new RanMars(lmp,seed+iworld);
 
   // create ComputeTemp class to monitor temperature
 
   char **args = new char*[3];
   args[0] = (char *) "prd_temp";
   args[1] = (char *) "all";
   args[2] = (char *) "temp";
   modify->add_compute(3,args);
   temperature = modify->compute[modify->ncompute-1];
 
   // create Velocity class for velocity creation in dephasing
   // pass it temperature compute, loop_setting, dist_setting settings
 
   atom->check_mass();
   velocity = new Velocity(lmp);
   velocity->init_external("all");
 
   args[0] = (char *) "temp";
   args[1] = (char *) "prd_temp";
   velocity->options(2,args);
   args[0] = (char *) "loop";
   args[1] = (char *) loop_setting;
   if (loop_setting) velocity->options(2,args);
   args[0] = (char *) "dist";
   args[1] = (char *) dist_setting;
   if (dist_setting) velocity->options(2,args);
 
   // create FixEventPRD class to store event and pre-quench states
 
   args[0] = (char *) "prd_event";
   args[1] = (char *) "all";
   args[2] = (char *) "EVENT/PRD";
   modify->add_fix(3,args);
   fix_event = (FixEventPRD *) modify->fix[modify->nfix-1];
 
   // create Finish for timing output
 
   finish = new Finish(lmp);
 
   // string clean-up
 
   delete [] args;
   delete [] loop_setting;
   delete [] dist_setting;
 
   // assign FixEventPRD to event-detection compute
   // necessary so it will know atom coords at last event
 
   int icompute = modify->find_compute(id_compute);
   if (icompute < 0) error->all(FLERR,"Could not find compute ID for PRD");
   compute_event = modify->compute[icompute];
   compute_event->reset_extra_compute_fix("prd_event");
 
   // reset reneighboring criteria since will perform minimizations
 
   neigh_every = neighbor->every;
   neigh_delay = neighbor->delay;
   neigh_dist_check = neighbor->dist_check;
 
   if (neigh_every != 1 || neigh_delay != 0 || neigh_dist_check != 1) {
     if (me == 0)
       error->warning(FLERR,"Resetting reneighboring criteria during PRD");
   }
 
   neighbor->every = 1;
   neighbor->delay = 0;
   neighbor->dist_check = 1;
 
   // initialize PRD as if one long dynamics run
 
   update->whichflag = 1;
   update->nsteps = nsteps;
   update->beginstep = update->firststep = update->ntimestep;
   update->endstep = update->laststep = update->firststep + nsteps;
   update->restrict_output = 1;
   if (update->laststep < 0)
     error->all(FLERR,"Too many timesteps");
 
   lmp->init();
 
   // init minimizer settings and minimizer itself
 
   update->etol = etol;
   update->ftol = ftol;
   update->max_eval = maxeval;
 
   update->minimize->init();
 
   // cannot use PRD with a changing box
 
   if (domain->box_change)
     error->all(FLERR,"Cannot use PRD with a changing box");
 
   // cannot use PRD with time-dependent fixes or regions
 
   for (int i = 0; i < modify->nfix; i++)
     if (modify->fix[i]->time_depend)
       error->all(FLERR,"Cannot use PRD with a time-dependent fix defined");
 
   for (int i = 0; i < domain->nregion; i++)
     if (domain->regions[i]->dynamic_check())
       error->all(FLERR,"Cannot use PRD with a time-dependent region defined");
 
   // perform PRD simulation
 
   if (me_universe == 0 && universe->uscreen)
     fprintf(universe->uscreen,"Setting up PRD ...\n");
 
   if (me_universe == 0) {
     if (universe->uscreen)
       fprintf(universe->uscreen,"Step CPU Clock Event "
               "Correlated Coincident Replica\n");
     if (universe->ulogfile)
       fprintf(universe->ulogfile,"Step CPU Clock Event "
               "Correlated Coincident Replica\n");
   }
 
   // store hot state and quenched event for replica 0
   // use share_event() to copy that info to all replicas
   // this insures all start from same place
 
   // need this line if quench() does only setup_minimal()
   // update->minimize->setup();
 
   fix_event->store_state_quench();
   quench();
   ncoincident = 0;
   share_event(0,0,0);
 
   timer->init();
   timer->barrier_start();
   time_start = timer->get_wall(Timer::TOTAL);
 
   log_event();
 
   // do full init/setup since are starting all replicas after event
   // replica 0 bcasts temp to all replicas if temp_dephase is not set
 
   update->whichflag = 1;
   lmp->init();
   update->integrate->setup();
 
   if (temp_flag == 0) {
     if (universe->iworld == 0) temp_dephase = temperature->compute_scalar();
     MPI_Bcast(&temp_dephase,1,MPI_DOUBLE,universe->root_proc[0],
               universe->uworld);
   }
 
   // main loop: look for events until out of time
   // (1) dephase independently on each proc after event
   // (2) loop: dynamics, store state, quench, check event, restore state
   // (3) share and record event
 
   nbuild = ndanger = 0;
   time_dephase = time_dynamics = time_quench = time_comm = time_output = 0.0;
   bigint clock = 0;
 
   timer->barrier_start();
   time_start = timer->get_wall(Timer::TOTAL);
 
   int istep = 0;
 
   while (istep < nsteps) {
     dephase();
 
     if (stepmode == 0) istep = update->ntimestep - update->beginstep;
     else istep = clock;
 
     ireplica = -1;
     while (istep < nsteps) {
       dynamics(t_event,time_dynamics);
       fix_event->store_state_quench();
       quench();
       clock = clock + t_event*universe->nworlds;
       ireplica = check_event();
       if (ireplica >= 0) break;
       fix_event->restore_state_quench();
       if (stepmode == 0) istep = update->ntimestep - update->beginstep;
       else istep = clock;
     }
     if (ireplica < 0) break;
 
     // decrement clock by random time at which 1 or more events occurred
 
     int frac_t_event = t_event;
     for (int i = 0; i < fix_event->ncoincident; i++) {
       int frac_rand = static_cast<int> (random_clock->uniform() * t_event);
       frac_t_event = MIN(frac_t_event,frac_rand);
     }
     int decrement = (t_event - frac_t_event)*universe->nworlds;
     clock -= decrement;
 
     // share event across replicas
     // NOTE: would be potentially more efficient for correlated events
     //   if don't share until correlated check below has completed
     // this will complicate the dump (always on replica 0)
 
     share_event(ireplica,1,decrement);
     log_event();
 
     int restart_flag = 0;
     if (output->restart_flag && universe->iworld == 0) {
       if (output->restart_every_single &&
           fix_event->event_number % output->restart_every_single == 0)
         restart_flag = 1;
       if (output->restart_every_double &&
           fix_event->event_number % output->restart_every_double == 0)
         restart_flag = 1;
     }
 
     // correlated event loop
     // other procs could be dephasing during this time
 
     int corr_endstep = update->ntimestep + t_corr;
     while (update->ntimestep < corr_endstep) {
       if (update->ntimestep == update->endstep) {
         restart_flag = 0;
         break;
       }
       dynamics(t_event,time_dynamics);
       fix_event->store_state_quench();
       quench();
       clock += t_event;
       int corr_event_check = check_event(ireplica);
       if (corr_event_check >= 0) {
         share_event(ireplica,2,0);
         log_event();
         corr_endstep = update->ntimestep + t_corr;
       } else fix_event->restore_state_quench();
     }
 
     // full init/setup since are starting all replicas after event
     // event replica bcasts temp to all replicas if temp_dephase is not set
 
     update->whichflag = 1;
     lmp->init();
     update->integrate->setup();
 
     timer->barrier_start();
 
     if (t_corr > 0) replicate(ireplica);
     if (temp_flag == 0) {
       if (ireplica == universe->iworld)
         temp_dephase = temperature->compute_scalar();
       MPI_Bcast(&temp_dephase,1,MPI_DOUBLE,universe->root_proc[ireplica],
                       universe->uworld);
     }
 
     timer->barrier_stop();
     time_comm += timer->get_wall(Timer::TOTAL);
 
     // write restart file of hot coords
 
     if (restart_flag) {
       timer->barrier_start();
       output->write_restart(update->ntimestep);
       timer->barrier_stop();
       time_output += timer->get_wall(Timer::TOTAL);
     }
 
     if (stepmode == 0) istep = update->ntimestep - update->beginstep;
     else istep = clock;
   }
 
   if (stepmode) nsteps = update->ntimestep - update->beginstep;
 
   // set total timers and counters so Finish() will process them
 
   timer->set_wall(Timer::TOTAL, time_start);
   timer->barrier_stop();
 
   timer->set_wall(Timer::DEPHASE, time_dephase);
   timer->set_wall(Timer::DYNAMICS, time_dynamics);
   timer->set_wall(Timer::QUENCH, time_quench);
   timer->set_wall(Timer::REPCOMM, time_comm);
   timer->set_wall(Timer::REPOUT, time_output);
 
   neighbor->ncalls = nbuild;
   neighbor->ndanger = ndanger;
 
   if (me_universe == 0) {
     if (universe->uscreen)
       fprintf(universe->uscreen,
               "Loop time of %g on %d procs for %d steps with " BIGINT_FORMAT
               " atoms\n",
               timer->get_wall(Timer::TOTAL),nprocs_universe,
               nsteps,atom->natoms);
     if (universe->ulogfile)
       fprintf(universe->ulogfile,
               "Loop time of %g on %d procs for %d steps with " BIGINT_FORMAT
               " atoms\n",
               timer->get_wall(Timer::TOTAL),nprocs_universe,
               nsteps,atom->natoms);
   }
 
   if (me == 0) {
     if (screen) fprintf(screen,"\nPRD done\n");
     if (logfile) fprintf(logfile,"\nPRD done\n");
   }
 
   finish->end(2);
 
   update->whichflag = 0;
   update->firststep = update->laststep = 0;
   update->beginstep = update->endstep = 0;
   update->restrict_output = 0;
 
   // reset reneighboring criteria
 
   neighbor->every = neigh_every;
   neighbor->delay = neigh_delay;
   neighbor->dist_check = neigh_dist_check;
 
   // clean up
 
   memory->destroy(tagall);
   memory->destroy(xall);
   memory->destroy(imageall);
   memory->destroy(counts);
   memory->destroy(displacements);
 
   delete [] id_compute;
   MPI_Comm_free(&comm_replica);
   delete random_select;
   delete random_clock;
   delete random_dephase;
   delete velocity;
   delete finish;
   modify->delete_compute("prd_temp");
   modify->delete_fix("prd_event");
 
   compute_event->reset_extra_compute_fix(NULL);
 }
 
 /* ----------------------------------------------------------------------
    dephasing = one or more short runs with new random velocities
 ------------------------------------------------------------------------- */
 
 void PRD::dephase()
 {
   bigint ntimestep_hold = update->ntimestep;
 
   // n_dephase iterations of dephasing, each of t_dephase steps
 
   for (int i = 0; i < n_dephase; i++) {
 
     fix_event->store_state_dephase();
 
     // do not proceed to next iteration until an event-free run occurs
 
     int done = 0;
     while (!done) {
       int seed = static_cast<int> (random_dephase->uniform() * MAXSMALLINT);
       if (seed == 0) seed = 1;
       velocity->create(temp_dephase,seed);
 
       dynamics(t_dephase,time_dephase);
       fix_event->store_state_quench();
       quench();
 
       if (compute_event->compute_scalar() > 0.0) {
         fix_event->restore_state_dephase();
         update->ntimestep -= t_dephase;
         log_event();
       } else {
         fix_event->restore_state_quench();
         done = 1;
       }
 
       if (temp_flag == 0) temp_dephase = temperature->compute_scalar();
     }
   }
 
   // reset timestep as if dephase did not occur
   // clear timestep storage from computes, since now invalid
 
   update->ntimestep = ntimestep_hold;
   for (int i = 0; i < modify->ncompute; i++)
     if (modify->compute[i]->timeflag) modify->compute[i]->clearstep();
 }
 
 /* ----------------------------------------------------------------------
    short dynamics run: for event search, decorrelation, or dephasing
 ------------------------------------------------------------------------- */
 
 void PRD::dynamics(int nsteps, double &time_category)
 {
   update->whichflag = 1;
   update->nsteps = nsteps;
 
   lmp->init();
   update->integrate->setup();
   // this may be needed if don't do full init
   //modify->addstep_compute_all(update->ntimestep);
   bigint ncalls = neighbor->ncalls;
 
   timer->barrier_start();
   update->integrate->run(nsteps);
   timer->barrier_stop();
   time_category += timer->get_wall(Timer::TOTAL);
 
   nbuild += neighbor->ncalls - ncalls;
   ndanger += neighbor->ndanger;
 
   update->integrate->cleanup();
   finish->end(0);
 }
 
 /* ----------------------------------------------------------------------
    quench minimization
 ------------------------------------------------------------------------- */
 
 void PRD::quench()
 {
   bigint ntimestep_hold = update->ntimestep;
   bigint endstep_hold = update->endstep;
 
   // need to change whichflag so that minimize->setup() calling
   // modify->setup() will call fix->min_setup()
 
   update->whichflag = 2;
   update->nsteps = maxiter;
   update->endstep = update->laststep = update->firststep + maxiter;
   if (update->laststep < 0)
     error->all(FLERR,"Too many iterations");
 
   // full init works
 
   lmp->init();
   update->minimize->setup();
 
   // partial init does not work
 
   //modify->addstep_compute_all(update->ntimestep);
   //update->minimize->setup_minimal(1);
 
   int ncalls = neighbor->ncalls;
 
   timer->barrier_start();
   update->minimize->run(maxiter);
   timer->barrier_stop();
   time_quench += timer->get_wall(Timer::TOTAL);
 
   if (neighbor->ncalls == ncalls) quench_reneighbor = 0;
   else quench_reneighbor = 1;
 
   update->minimize->cleanup();
   finish->end(0);
 
   // reset timestep as if quench did not occur
   // clear timestep storage from computes, since now invalid
 
   update->ntimestep = ntimestep_hold;
   update->endstep = update->laststep = endstep_hold;
   for (int i = 0; i < modify->ncompute; i++)
     if (modify->compute[i]->timeflag) modify->compute[i]->clearstep();
 }
 
 /* ----------------------------------------------------------------------
    check for an event in any replica
    if replica_num is non-negative only check for event on replica_num
    if multiple events, choose one at random
    return -1 if no event
    else return ireplica = world in which event occured
 ------------------------------------------------------------------------- */
 
 int PRD::check_event(int replica_num)
 {
   int worldflag,universeflag,scanflag,replicaflag,ireplica;
 
   worldflag = 0;
   if (compute_event->compute_scalar() > 0.0) worldflag = 1;
   if (replica_num >= 0 && replica_num != universe->iworld) worldflag = 0;
 
   timer->barrier_start();
 
   if (me == 0) MPI_Allreduce(&worldflag,&universeflag,1,
                              MPI_INT,MPI_SUM,comm_replica);
   MPI_Bcast(&universeflag,1,MPI_INT,0,world);
 
   ncoincident = universeflag;
 
   if (!universeflag) ireplica = -1;
   else {
 
     // multiple events, choose one at random
     // iwhich = random # from 1 to N, N = # of events to choose from
     // scanflag = 1 to N on replicas with an event, 0 on non-event replicas
     // exit with worldflag = 1 on chosen replica, 0 on all others
     // note worldflag is already 0 on replicas that didn't perform event
 
     if (universeflag > 1) {
       int iwhich = static_cast<int>
         (universeflag*random_select->uniform()) + 1;
 
       if (me == 0)
         MPI_Scan(&worldflag,&scanflag,1,MPI_INT,MPI_SUM,comm_replica);
       MPI_Bcast(&scanflag,1,MPI_INT,0,world);
 
       if (scanflag != iwhich) worldflag = 0;
     }
 
     if (worldflag) replicaflag = universe->iworld;
     else replicaflag = 0;
 
     if (me == 0) MPI_Allreduce(&replicaflag,&ireplica,1,
                                MPI_INT,MPI_SUM,comm_replica);
     MPI_Bcast(&ireplica,1,MPI_INT,0,world);
   }
 
   timer->barrier_stop();
   time_comm += timer->get_wall(Timer::TOTAL);
 
   return ireplica;
 }
 
 /* ----------------------------------------------------------------------
    share quenched and hot coords owned by ireplica with all replicas
    all replicas store event in fix_event
    replica 0 dumps event snapshot
    flag = 0 = called before PRD run
    flag = 1 = called during PRD run = not correlated event
    flag = 2 = called during PRD run = correlated event
 ------------------------------------------------------------------------- */
 
 void PRD::share_event(int ireplica, int flag, int decrement)
 {
   timer->barrier_start();
 
   // communicate quenched coords to all replicas and store as event
   // decrement event counter if flag = 0 since not really an event
 
   replicate(ireplica);
   timer->barrier_stop();
   time_comm += timer->get_wall(Timer::TOTAL);
 
   // adjust time for last correlated event check (not on first event)
 
   int corr_adjust = t_corr;
   if (fix_event->event_number < 1 || flag == 2) corr_adjust = 0;
 
   // delta = time since last correlated event check
 
   int delta = update->ntimestep - fix_event->event_timestep - corr_adjust;
 
   // if this is a correlated event, time elapsed only on one partition
 
   if (flag != 2) delta *= universe->nworlds;
   if (delta > 0 && flag != 2) delta -= decrement;
   delta += corr_adjust;
 
   // delta passed to store_event_prd() should make its clock update
   //   be consistent with clock in main PRD loop
   // don't change the clock or timestep if this is a restart
 
   if (flag == 0 && fix_event->event_number != 0)
     fix_event->store_event_prd(fix_event->event_timestep,0);
   else {
     fix_event->store_event_prd(update->ntimestep,delta);
     fix_event->replica_number = ireplica;
     fix_event->correlated_event = 0;
     if (flag == 2) fix_event->correlated_event = 1;
     fix_event->ncoincident = ncoincident;
   }
   if (flag == 0) fix_event->event_number--;
 
   // dump snapshot of quenched coords, only on replica 0
   // must reneighbor and compute forces before dumping
   // since replica 0 possibly has new state from another replica
   // addstep_compute_all insures eng/virial are calculated if needed
 
   if (output->ndump && universe->iworld == 0) {
     timer->barrier_start();
     modify->addstep_compute_all(update->ntimestep);
     update->integrate->setup_minimal(1);
     output->write_dump(update->ntimestep);
     timer->barrier_stop();
     time_output += timer->get_wall(Timer::TOTAL);
   }
 
   // restore and communicate hot coords to all replicas
 
   fix_event->restore_state_quench();
   timer->barrier_start();
   replicate(ireplica);
   timer->barrier_stop();
   time_comm += timer->get_wall(Timer::TOTAL);
 }
 
 /* ----------------------------------------------------------------------
    universe proc 0 prints event info
 ------------------------------------------------------------------------- */
 
 void PRD::log_event()
 {
   timer->set_wall(Timer::TOTAL, time_start);
   if (universe->me == 0) {
     if (universe->uscreen)
       fprintf(universe->uscreen,
               BIGINT_FORMAT " %.3f " BIGINT_FORMAT " %d %d %d %d\n",
               fix_event->event_timestep,
               timer->elapsed(Timer::TOTAL),
               fix_event->clock,
               fix_event->event_number,fix_event->correlated_event,
               fix_event->ncoincident,
               fix_event->replica_number);
     if (universe->ulogfile)
       fprintf(universe->ulogfile,
               BIGINT_FORMAT " %.3f " BIGINT_FORMAT " %d %d %d %d\n",
               fix_event->event_timestep,
               timer->elapsed(Timer::TOTAL),
               fix_event->clock,
               fix_event->event_number,fix_event->correlated_event,
               fix_event->ncoincident,
               fix_event->replica_number);
   }
 }
 
 /* ----------------------------------------------------------------------
   communicate atom coords and image flags in ireplica to all other replicas
   if one proc per replica:
     direct overwrite via bcast
   else atoms could be stored in different order on a proc or on different procs:
     gather to root proc of event replica
     bcast to roots of other replicas
     bcast within each replica
     each proc extracts info for atoms it owns using atom IDs
 ------------------------------------------------------------------------- */
 
 void PRD::replicate(int ireplica)
 {
   int i,m;
 
   // -----------------------------------------------------
   // 3 cases: two for single proc per replica
   //          one for multiple procs per replica
   // -----------------------------------------------------
 
   // single proc per replica, no atom sorting
   // direct bcast of image and x
 
   if (cmode == SINGLE_PROC_DIRECT) {
     MPI_Bcast(atom->x[0],3*atom->nlocal,MPI_DOUBLE,ireplica,comm_replica);
-    MPI_Bcast(atom->image,atom->nlocal,MPI_INT,ireplica,comm_replica);
+    MPI_Bcast(atom->image,atom->nlocal,MPI_LMP_IMAGEINT,ireplica,comm_replica);
     return;
   }
 
   // single proc per replica, atom sorting is enabled
   // bcast atom IDs, x, image via tagall, xall, imageall
   // recv procs use atom->map() to match received info to owned atoms
 
   if (cmode == SINGLE_PROC_MAP) {
     double **x = atom->x;
     tagint *tag = atom->tag;
     imageint *image = atom->image;
     int nlocal = atom->nlocal;
 
     if (universe->iworld == ireplica) {
       memcpy(tagall,tag,nlocal*sizeof(tagint));
       memcpy(xall[0],x[0],3*nlocal*sizeof(double));
       memcpy(imageall,image,nlocal*sizeof(imageint));
     }
 
-    MPI_Bcast(tagall,natoms,MPI_INT,ireplica,comm_replica);
+    MPI_Bcast(tagall,natoms,MPI_LMP_TAGINT,ireplica,comm_replica);
     MPI_Bcast(xall[0],3*natoms,MPI_DOUBLE,ireplica,comm_replica);
-    MPI_Bcast(imageall,natoms,MPI_INT,ireplica,comm_replica);
+    MPI_Bcast(imageall,natoms,MPI_LMP_IMAGEINT,ireplica,comm_replica);
 
     for (i = 0; i < nlocal; i++) {
       m = atom->map(tagall[i]);
       x[m][0] = xall[i][0];
       x[m][1] = xall[i][1];
       x[m][2] = xall[i][2];
       atom->image[m] = imageall[i];
     }
 
     return;
   }
 
   // multiple procs per replica
   // MPI_Gather all atom IDs, x, image to root proc of ireplica
   // bcast to root of other replicas
   // bcast within each replica
   // each proc extracts info for atoms it owns via atom->map()
   // NOTE: assumes imagint and tagint are always the same size
 
   if (universe->iworld == ireplica) {
     MPI_Gather(&atom->nlocal,1,MPI_INT,counts,1,MPI_INT,0,world);
     displacements[0] = 0;
     for (i = 0; i < nprocs-1; i++)
       displacements[i+1] = displacements[i] + counts[i];
     MPI_Gatherv(atom->tag,atom->nlocal,MPI_LMP_TAGINT,
                 tagall,counts,displacements,MPI_LMP_TAGINT,0,world);
-    MPI_Gatherv(atom->image,atom->nlocal,MPI_LMP_TAGINT,
-                imageall,counts,displacements,MPI_LMP_TAGINT,0,world);
+    MPI_Gatherv(atom->image,atom->nlocal,MPI_LMP_IMAGEINT,
+                imageall,counts,displacements,MPI_LMP_IMAGEINT,0,world);
     for (i = 0; i < nprocs; i++) counts[i] *= 3;
     for (i = 0; i < nprocs-1; i++)
       displacements[i+1] = displacements[i] + counts[i];
     MPI_Gatherv(atom->x[0],3*atom->nlocal,MPI_DOUBLE,
                 xall[0],counts,displacements,MPI_DOUBLE,0,world);
   }
   
   if (me == 0) {
-    MPI_Bcast(tagall,natoms,MPI_INT,ireplica,comm_replica);
-    MPI_Bcast(imageall,natoms,MPI_INT,ireplica,comm_replica);
+    MPI_Bcast(tagall,natoms,MPI_LMP_TAGINT,ireplica,comm_replica);
+    MPI_Bcast(imageall,natoms,MPI_LMP_IMAGEINT,ireplica,comm_replica);
     MPI_Bcast(xall[0],3*natoms,MPI_DOUBLE,ireplica,comm_replica);
   }
   
-  MPI_Bcast(tagall,natoms,MPI_INT,0,world);
-  MPI_Bcast(imageall,natoms,MPI_INT,0,world);
+  MPI_Bcast(tagall,natoms,MPI_LMP_TAGINT,0,world);
+  MPI_Bcast(imageall,natoms,MPI_LMP_IMAGEINT,0,world);
   MPI_Bcast(xall[0],3*natoms,MPI_DOUBLE,0,world);
   
   double **x = atom->x;
   int nlocal = atom->nlocal;
   
   for (i = 0; i < natoms; i++) {
     m = atom->map(tagall[i]);
     if (m < 0 || m >= nlocal) continue;
     x[m][0] = xall[i][0];
     x[m][1] = xall[i][1];
     x[m][2] = xall[i][2];
     atom->image[m] = imageall[i];
   }
 }
 
 /* ----------------------------------------------------------------------
    parse optional parameters at end of PRD input line
 ------------------------------------------------------------------------- */
 
 void PRD::options(int narg, char **arg)
 {
   if (narg < 0) error->all(FLERR,"Illegal prd command");
 
   // set defaults
 
   etol = 0.1;
   ftol = 0.1;
   maxiter = 40;
   maxeval = 50;
   temp_flag = 0;
   stepmode = 0;
 
   char *str = (char *) "geom";
   int n = strlen(str) + 1;
   loop_setting = new char[n];
   strcpy(loop_setting,str);
 
   str = (char *) "gaussian";
   n = strlen(str) + 1;
   dist_setting = new char[n];
   strcpy(dist_setting,str);
 
   int iarg = 0;
   while (iarg < narg) {
     if (strcmp(arg[iarg],"min") == 0) {
       if (iarg+5 > narg) error->all(FLERR,"Illegal prd command");
       etol = force->numeric(FLERR,arg[iarg+1]);
       ftol = force->numeric(FLERR,arg[iarg+2]);
       maxiter = force->inumeric(FLERR,arg[iarg+3]);
       maxeval = force->inumeric(FLERR,arg[iarg+4]);
       if (maxiter < 0) error->all(FLERR,"Illegal prd command");
       iarg += 5;
 
     } else if (strcmp(arg[iarg],"temp") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal prd command");
       temp_flag = 1;
       temp_dephase = force->numeric(FLERR,arg[iarg+1]);
       if (temp_dephase <= 0.0) error->all(FLERR,"Illegal prd command");
       iarg += 2;
 
     } else if (strcmp(arg[iarg],"vel") == 0) {
       if (iarg+3 > narg) error->all(FLERR,"Illegal prd command");
       delete [] loop_setting;
       delete [] dist_setting;
 
       if (strcmp(arg[iarg+1],"all") == 0) loop_setting = NULL;
       else if (strcmp(arg[iarg+1],"local") == 0) loop_setting = NULL;
       else if (strcmp(arg[iarg+1],"geom") == 0) loop_setting = NULL;
       else error->all(FLERR,"Illegal prd command");
       int n = strlen(arg[iarg+1]) + 1;
       loop_setting = new char[n];
       strcpy(loop_setting,arg[iarg+1]);
 
       if (strcmp(arg[iarg+2],"uniform") == 0) dist_setting = NULL;
       else if (strcmp(arg[iarg+2],"gaussian") == 0) dist_setting = NULL;
       else error->all(FLERR,"Illegal prd command");
       n = strlen(arg[iarg+2]) + 1;
       dist_setting = new char[n];
       strcpy(dist_setting,arg[iarg+2]);
 
       iarg += 3;
 
     } else if (strcmp(arg[iarg],"time") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal prd command");
       if (strcmp(arg[iarg+1],"steps") == 0) stepmode = 0;
       else if (strcmp(arg[iarg+1],"clock") == 0) stepmode = 1;
       else error->all(FLERR,"Illegal prd command");
       iarg += 2;
 
     } else error->all(FLERR,"Illegal prd command");
   }
 }
diff --git a/src/lmptype.h b/src/lmptype.h
index a8a696450..a8c09bd2a 100644
--- a/src/lmptype.h
+++ b/src/lmptype.h
@@ -1,208 +1,211 @@
 /* -*- c++ -*- ----------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    http://lammps.sandia.gov, Sandia National Laboratories
    Steve Plimpton, sjplimp@sandia.gov
 
    Copyright (2003) Sandia Corporation.  Under the terms of Contract
    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
    certain rights in this software.  This software is distributed under
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 // define integer data types used by LAMMPS and associated size limits
 
 // smallint = variables for on-procesor system (nlocal, nmax, etc)
 // imageint = variables for atom image flags (image)
 // tagint = variables for atom IDs and molecule IDs (tag,molecule)
 // bigint = variables for total system (natoms, ntimestep, etc)
 
 // smallint must be an int, as defined by C compiler
 // imageint can be 32-bit or 64-bit int, must be >= smallint
 // tagint can be 32-bit or 64-bit int, must be >= smallint
 // bigint can be 32-bit or 64-bit int, must be >= imageint,tagint
 
 // MPI_LMP_BIGINT = MPI data type corresponding to a bigint
 
 #ifndef LMP_LMPTYPE_H
 #define LMP_LMPTYPE_H
 
 #ifndef __STDC_LIMIT_MACROS
 #define __STDC_LIMIT_MACROS
 #endif
 
 #ifndef __STDC_FORMAT_MACROS
 #define __STDC_FORMAT_MACROS
 #endif
 
 #include <limits.h>
 #include <stdint.h>
 #include <inttypes.h>
 
 // grrr - IBM Power6 does not provide this def in their system header files
 
 #ifndef PRId64
 #define PRId64 "ld"
 #endif
 
 namespace LAMMPS_NS {
 
 // enum used for KOKKOS host/device flags
 
 enum ExecutionSpace{Host,Device};
 
 // reserve 2 hi bits in molecular system neigh list for special bonds flag
 // max local + ghost atoms per processor = 2^30 - 1
 
 #define SBBITS 30
 #define NEIGHMASK 0x3FFFFFFF
 
 // default to 32-bit smallint and other ints, 64-bit bigint
 
 #if !defined(LAMMPS_SMALLSMALL) && !defined(LAMMPS_BIGBIG) && !defined(LAMMPS_SMALLBIG)
 #define LAMMPS_SMALLBIG
 #endif
 
 // allow user override of LONGLONG to LONG, necessary for some machines/MPI
 
 #ifdef LAMMPS_LONGLONG_TO_LONG
 #define MPI_LL MPI_LONG
 #define ATOLL atoll
 #else
 #define MPI_LL MPI_LONG_LONG
 #define ATOLL atol
 #endif
 
 // for atomic problems that exceed 2 billion (2^31) atoms
 // 32-bit smallint/imageint/tagint, 64-bit bigint
 
 #ifdef LAMMPS_SMALLBIG
 
 typedef int smallint;
 typedef int imageint;
 typedef int tagint;
 typedef int64_t bigint;
 
 #define MAXSMALLINT INT_MAX
 #define MAXTAGINT INT_MAX
 #define MAXBIGINT INT64_MAX
 
 #define MPI_LMP_TAGINT MPI_INT
+#define MPI_LMP_IMAGEINT MPI_INT
 #define MPI_LMP_BIGINT MPI_LL
 
 #define TAGINT_FORMAT "%d"
 #define BIGINT_FORMAT "%" PRId64
 
 #define ATOTAGINT atoi
 #define ATOBIGINT ATOLL
 
 #define IMGMASK 1023
 #define IMGMAX 512
 #define IMGBITS 10
 #define IMG2BITS 20
 
 #endif
 
 // for molecular problems that exceed 2 billion (2^31) atoms
 // or problems where atoms wrap around the periodic box more than 512 times
 // 32-bit smallint, 64-bit imageint/tagint/bigint
 
 #ifdef LAMMPS_BIGBIG
 
 typedef int smallint;
 typedef int64_t imageint;
 typedef int64_t tagint;
 typedef int64_t bigint;
 
 #define MAXSMALLINT INT_MAX
 #define MAXTAGINT INT64_MAX
 #define MAXBIGINT INT64_MAX
 
 #define MPI_LMP_TAGINT MPI_LL
+#define MPI_LMP_IMAGEINT MPI_LL
 #define MPI_LMP_BIGINT MPI_LL
 
 #define TAGINT_FORMAT "%" PRId64
 #define BIGINT_FORMAT "%" PRId64
 
 #define ATOTAGINT ATOLL
 #define ATOBIGINT ATOLL
 
 #define IMGMASK 2097151
 #define IMGMAX 1048576
 #define IMGBITS 21
 #define IMG2BITS 42
 
 #endif
 
 // for machines that do not support 64-bit ints
 // 32-bit smallint/imageint/tagint/bigint
 
 #ifdef LAMMPS_SMALLSMALL
 
 typedef int smallint;
 typedef int imageint;
 typedef int tagint;
 typedef int bigint;
 
 #define MAXSMALLINT INT_MAX
 #define MAXTAGINT INT_MAX
 #define MAXBIGINT INT_MAX
 
 #define MPI_LMP_TAGINT MPI_INT
+#define MPI_LMP_IMAGEINT MPI_INT
 #define MPI_LMP_BIGINT MPI_INT
 
 #define TAGINT_FORMAT "%d"
 #define BIGINT_FORMAT "%d"
 
 #define ATOTAGINT atoi
 #define ATOBIGINT atoi
 
 #define IMGMASK 1023
 #define IMGMAX 512
 #define IMGBITS 10
 #define IMG2BITS 20
 
 #endif
 
 }
 
 // preprocessor macros for compiler specific settings
 // clear previous definitions to avoid redefinition warning
 
 #ifdef _alignvar
 #undef _alignvar
 #endif
 #ifdef _noalias
 #undef _noalias
 #endif
 
 // define stack variable alignment
 
 #if defined(__INTEL_COMPILER)
 #define _alignvar(expr,val) __declspec(align(val)) expr
 #elif defined(__GNUC__)
 #define _alignvar(expr,val) expr __attribute((aligned(val)))
 #else
 #define _alignvar(expr,val) expr
 #endif
 
 // declaration to lift aliasing restrictions
 
 #if defined(__INTEL_COMPILER)
 #define _noalias restrict
 #elif defined(__GNUC__)
 #define _noalias __restrict
 #else
 #define _noalias
 #endif
 
 #define ISFINITE(x) isfinite(x)
 
 // settings to enable LAMMPS to build under Windows
 
 #ifdef _WIN32
 #include "lmpwindows.h"
 #endif
 
 #endif