diff --git a/examples/balance/in.balance.bond.fast b/examples/balance/in.balance.bond.fast index 184943c25..d2cecd2bf 100644 --- a/examples/balance/in.balance.bond.fast +++ b/examples/balance/in.balance.bond.fast @@ -1,63 +1,63 @@ # 2d circle of particles inside a box with LJ walls variable b index 0 variable x index 50 variable y index 20 variable d index 20 variable v index 5 variable w index 2 units lj dimension 2 atom_style bond boundary f f p lattice hex 0.85 region box block 0 $x 0 $y -0.5 0.5 create_box 1 box bond/types 1 extra/bond/per/atom 6 region circle sphere $(v_d/2+1) $(v_d/2/sqrt(3.0)+1) 0.0 $(v_d/2) create_atoms 1 region circle mass 1 1.0 velocity all create 0.5 87287 loop geom velocity all set $v $w 0 sum yes pair_style lj/cut 2.5 pair_coeff 1 1 10.0 1.0 2.5 bond_style harmonic bond_coeff 1 10.0 1.2 # need to preserve 1-3, 1-4 pairwise interactions during hard collisions special_bonds lj/coul 0 1 1 create_bonds all all 1 1.0 1.5 neighbor 0.3 bin neigh_modify delay 0 every 1 check yes fix 1 all nve fix 2 all wall/lj93 xlo 0.0 1 1 2.5 xhi $x 1 1 2.5 fix 3 all wall/lj93 ylo 0.0 1 1 2.5 yhi $y 1 1 2.5 comm_style tiled -comm_modify cutoff 7.5 +comm_modify cutoff 10.0 # because bonds stretch a long ways fix 10 all balance 50 0.9 rcb #compute 1 all property/atom proc #variable p atom (c_1%10)+1 #dump 2 all custom 50 tmp.dump id v_p x y z #dump 3 all image 50 image.*.jpg v_p type bond atom 0.25 & # adiam 1.0 view 0 0 zoom 1.8 subbox yes 0.02 #variable colors string & # "red green blue yellow white & # purple pink orange lime gray" #dump_modify 3 pad 5 amap 0 10 sa 1 10 ${colors} thermo_style custom step temp epair press f_10[3] f_10 thermo 100 run 10000 diff --git a/examples/balance/in.balance.clock.dynamic b/examples/balance/in.balance.clock.dynamic index eed111fdf..a0bebd518 100644 --- a/examples/balance/in.balance.clock.dynamic +++ b/examples/balance/in.balance.clock.dynamic @@ -1,55 +1,54 @@ # 3d Lennard-Jones melt units lj atom_style atomic processors * 1 1 lattice fcc 0.8442 region box block 0 10 0 10 0 10 create_box 3 box create_atoms 1 box mass * 1.0 region long block 3 6 0 10 0 10 set region long type 2 velocity all create 1.0 87287 pair_style lj/cut 2.5 pair_coeff * * 1.0 1.0 2.5 pair_coeff * 2 1.0 1.0 5.0 neighbor 0.3 bin neigh_modify every 2 delay 4 check yes fix p all property/atom d_WEIGHT compute p all property/atom d_WEIGHT fix 0 all balance 50 1.0 shift x 10 1.0 & weight time 1.0 weight store WEIGHT variable maximb equal f_0[1] variable iter equal f_0[2] variable prev equal f_0[3] variable final equal f_0 #fix 3 all print 50 "${iter} ${prev} ${final} ${maximb}" fix 1 all nve #dump id all atom 50 dump.melt #dump id all custom 50 dump.lammpstrj id type x y z c_p #dump 2 all image 25 image.*.jpg type type & # axes yes 0.8 0.02 view 60 -30 #dump_modify 2 pad 3 #dump 3 all movie 25 movie.mpg type type & # axes yes 0.8 0.02 view 60 -30 #dump_modify 3 pad 3 thermo 50 run 500 run 500 fix 0 all balance 50 1.0 shift x 5 1.0 & weight neigh 0.5 weight time 0.66 weight store WEIGHT run 500 run 500 - diff --git a/src/balance.cpp b/src/balance.cpp index 70693f748..7780b5207 100644 --- a/src/balance.cpp +++ b/src/balance.cpp @@ -1,1322 +1,1323 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- Contributing authors, for weighted balancing: Axel Kohlmeyer (Temple U), Iain Bethune (EPCC) ------------------------------------------------------------------------- */ //#define BALANCE_DEBUG 1 #include #include #include #include #include "balance.h" #include "atom.h" #include "comm.h" #include "rcb.h" #include "irregular.h" #include "domain.h" #include "force.h" #include "update.h" #include "group.h" #include "modify.h" #include "fix_store.h" #include "imbalance.h" #include "imbalance_group.h" #include "imbalance_time.h" #include "imbalance_neigh.h" #include "imbalance_store.h" #include "imbalance_var.h" #include "timer.h" #include "memory.h" #include "error.h" using namespace LAMMPS_NS; enum{XYZ,SHIFT,BISECTION}; enum{NONE,UNIFORM,USER}; enum{X,Y,Z}; enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED}; // several files /* ---------------------------------------------------------------------- */ Balance::Balance(LAMMPS *lmp) : Pointers(lmp) { MPI_Comm_rank(world,&me); MPI_Comm_size(world,&nprocs); user_xsplit = user_ysplit = user_zsplit = NULL; shift_allocate = 0; proccost = allproccost = NULL; rcb = NULL; nimbalance = 0; imbalances = NULL; fixstore = NULL; fp = NULL; firststep = 1; } /* ---------------------------------------------------------------------- */ Balance::~Balance() { memory->destroy(proccost); memory->destroy(allproccost); delete [] user_xsplit; delete [] user_ysplit; delete [] user_zsplit; if (shift_allocate) { delete [] bdim; delete [] onecost; delete [] allcost; delete [] sum; delete [] target; delete [] lo; delete [] hi; delete [] losum; delete [] hisum; } delete rcb; for (int i = 0; i < nimbalance; i++) delete imbalances[i]; delete [] imbalances; // check nfix in case all fixes have already been deleted if (fixstore && modify->nfix) modify->delete_fix(fixstore->id); fixstore = NULL; if (fp) fclose(fp); } /* ---------------------------------------------------------------------- called as balance command in input script ------------------------------------------------------------------------- */ void Balance::command(int narg, char **arg) { if (domain->box_exist == 0) error->all(FLERR,"Balance command before simulation box is defined"); if (me == 0 && screen) fprintf(screen,"Balancing ...\n"); // parse required arguments if (narg < 2) error->all(FLERR,"Illegal balance command"); thresh = force->numeric(FLERR,arg[0]); int dimension = domain->dimension; int *procgrid = comm->procgrid; style = -1; xflag = yflag = zflag = NONE; int iarg = 1; while (iarg < narg) { if (strcmp(arg[iarg],"x") == 0) { if (style != -1 && style != XYZ) error->all(FLERR,"Illegal balance command"); style = XYZ; if (strcmp(arg[iarg+1],"uniform") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal balance command"); xflag = UNIFORM; iarg += 2; } else { if (1 + procgrid[0]-1 > narg) error->all(FLERR,"Illegal balance command"); xflag = USER; delete [] user_xsplit; user_xsplit = new double[procgrid[0]+1]; user_xsplit[0] = 0.0; iarg++; for (int i = 1; i < procgrid[0]; i++) user_xsplit[i] = force->numeric(FLERR,arg[iarg++]); user_xsplit[procgrid[0]] = 1.0; } } else if (strcmp(arg[iarg],"y") == 0) { if (style != -1 && style != XYZ) error->all(FLERR,"Illegal balance command"); style = XYZ; if (strcmp(arg[iarg+1],"uniform") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal balance command"); yflag = UNIFORM; iarg += 2; } else { if (1 + procgrid[1]-1 > narg) error->all(FLERR,"Illegal balance command"); yflag = USER; delete [] user_ysplit; user_ysplit = new double[procgrid[1]+1]; user_ysplit[0] = 0.0; iarg++; for (int i = 1; i < procgrid[1]; i++) user_ysplit[i] = force->numeric(FLERR,arg[iarg++]); user_ysplit[procgrid[1]] = 1.0; } } else if (strcmp(arg[iarg],"z") == 0) { if (style != -1 && style != XYZ) error->all(FLERR,"Illegal balance command"); style = XYZ; if (strcmp(arg[iarg+1],"uniform") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal balance command"); zflag = UNIFORM; iarg += 2; } else { if (1 + procgrid[2]-1 > narg) error->all(FLERR,"Illegal balance command"); zflag = USER; delete [] user_zsplit; user_zsplit = new double[procgrid[2]+1]; user_zsplit[0] = 0.0; iarg++; for (int i = 1; i < procgrid[2]; i++) user_zsplit[i] = force->numeric(FLERR,arg[iarg++]); user_zsplit[procgrid[2]] = 1.0; } } else if (strcmp(arg[iarg],"shift") == 0) { if (style != -1) error->all(FLERR,"Illegal balance command"); if (iarg+4 > narg) error->all(FLERR,"Illegal balance command"); style = SHIFT; if (strlen(arg[iarg+1]) > 3) error->all(FLERR,"Illegal balance command"); strcpy(bstr,arg[iarg+1]); nitermax = force->inumeric(FLERR,arg[iarg+2]); if (nitermax <= 0) error->all(FLERR,"Illegal balance command"); stopthresh = force->numeric(FLERR,arg[iarg+3]); if (stopthresh < 1.0) error->all(FLERR,"Illegal balance command"); iarg += 4; } else if (strcmp(arg[iarg],"rcb") == 0) { if (style != -1) error->all(FLERR,"Illegal balance command"); style = BISECTION; iarg++; } else break; } // error checks if (style == XYZ) { if (zflag != NONE && dimension == 2) error->all(FLERR,"Cannot balance in z dimension for 2d simulation"); if (xflag == USER) for (int i = 1; i <= procgrid[0]; i++) if (user_xsplit[i-1] >= user_xsplit[i]) error->all(FLERR,"Illegal balance command"); if (yflag == USER) for (int i = 1; i <= procgrid[1]; i++) if (user_ysplit[i-1] >= user_ysplit[i]) error->all(FLERR,"Illegal balance command"); if (zflag == USER) for (int i = 1; i <= procgrid[2]; i++) if (user_zsplit[i-1] >= user_zsplit[i]) error->all(FLERR,"Illegal balance command"); } if (style == SHIFT) { const int blen=strlen(bstr); for (int i = 0; i < blen; i++) { if (bstr[i] != 'x' && bstr[i] != 'y' && bstr[i] != 'z') error->all(FLERR,"Balance shift string is invalid"); if (bstr[i] == 'z' && dimension == 2) error->all(FLERR,"Balance shift string is invalid"); for (int j = i+1; j < blen; j++) if (bstr[i] == bstr[j]) error->all(FLERR,"Balance shift string is invalid"); } } if (style == BISECTION && comm->style == 0) error->all(FLERR,"Balance rcb cannot be used with comm_style brick"); // process remaining optional args options(iarg,narg,arg); if (wtflag) weight_storage(NULL); // insure particles are in current box & update box via shrink-wrap // init entire system since comm->setup is done // comm::init needs neighbor::init needs pair::init needs kspace::init, etc MPI_Barrier(world); double start_time = MPI_Wtime(); lmp->init(); if (domain->triclinic) domain->x2lamda(atom->nlocal); domain->pbc(); domain->reset_box(); comm->setup(); comm->exchange(); if (domain->triclinic) domain->lamda2x(atom->nlocal); // imbinit = initial imbalance double maxinit; - init_imbalance(); + init_imbalance(0); set_weights(); double imbinit = imbalance_factor(maxinit); // no load-balance if imbalance doesn't exceed threshhold // unless switching from tiled to non tiled layout, then force rebalance if (comm->layout == LAYOUT_TILED && style != BISECTION) { } else if (imbinit < thresh) return; // debug output of initial state #ifdef BALANCE_DEBUG if (outflag) dumpout(update->ntimestep); #endif int niter = 0; // perform load-balance // style XYZ = explicit setting of cutting planes of logical 3d grid if (style == XYZ) { if (comm->layout == LAYOUT_UNIFORM) { if (xflag == USER || yflag == USER || zflag == USER) comm->layout = LAYOUT_NONUNIFORM; } else if (comm->style == LAYOUT_NONUNIFORM) { if (xflag == UNIFORM && yflag == UNIFORM && zflag == UNIFORM) comm->layout = LAYOUT_UNIFORM; } else if (comm->style == LAYOUT_TILED) { if (xflag == UNIFORM && yflag == UNIFORM && zflag == UNIFORM) comm->layout = LAYOUT_UNIFORM; else comm->layout = LAYOUT_NONUNIFORM; } if (xflag == UNIFORM) { for (int i = 0; i < procgrid[0]; i++) comm->xsplit[i] = i * 1.0/procgrid[0]; comm->xsplit[procgrid[0]] = 1.0; } else if (xflag == USER) for (int i = 0; i <= procgrid[0]; i++) comm->xsplit[i] = user_xsplit[i]; if (yflag == UNIFORM) { for (int i = 0; i < procgrid[1]; i++) comm->ysplit[i] = i * 1.0/procgrid[1]; comm->ysplit[procgrid[1]] = 1.0; } else if (yflag == USER) for (int i = 0; i <= procgrid[1]; i++) comm->ysplit[i] = user_ysplit[i]; if (zflag == UNIFORM) { for (int i = 0; i < procgrid[2]; i++) comm->zsplit[i] = i * 1.0/procgrid[2]; comm->zsplit[procgrid[2]] = 1.0; } else if (zflag == USER) for (int i = 0; i <= procgrid[2]; i++) comm->zsplit[i] = user_zsplit[i]; } // style SHIFT = adjust cutting planes of logical 3d grid if (style == SHIFT) { comm->layout = LAYOUT_NONUNIFORM; shift_setup_static(bstr); niter = shift(); } // style BISECTION = recursive coordinate bisectioning if (style == BISECTION) { comm->layout = LAYOUT_TILED; bisection(1); } // reset proc sub-domains // for either brick or tiled comm style if (domain->triclinic) domain->set_lamda_box(); domain->set_local_box(); // move particles to new processors via irregular() if (domain->triclinic) domain->x2lamda(atom->nlocal); Irregular *irregular = new Irregular(lmp); if (wtflag) fixstore->disable = 0; if (style == BISECTION) irregular->migrate_atoms(1,1,rcb->sendproc); else irregular->migrate_atoms(1); if (wtflag) fixstore->disable = 1; delete irregular; if (domain->triclinic) domain->lamda2x(atom->nlocal); // output of final result if (outflag) dumpout(update->ntimestep); // check if any particles were lost bigint natoms; bigint nblocal = atom->nlocal; MPI_Allreduce(&nblocal,&natoms,1,MPI_LMP_BIGINT,MPI_SUM,world); if (natoms != atom->natoms) { char str[128]; sprintf(str,"Lost atoms via balance: original " BIGINT_FORMAT " current " BIGINT_FORMAT,atom->natoms,natoms); error->all(FLERR,str); } // imbfinal = final imbalance double maxfinal; double imbfinal = imbalance_factor(maxfinal); // stats output double stop_time = MPI_Wtime(); if (me == 0) { if (screen) { fprintf(screen," rebalancing time: %g seconds\n",stop_time-start_time); fprintf(screen," iteration count = %d\n",niter); for (int i = 0; i < nimbalance; ++i) imbalances[i]->info(screen); fprintf(screen," initial/final max load/proc = %g %g\n", maxinit,maxfinal); fprintf(screen," initial/final imbalance factor = %g %g\n", imbinit,imbfinal); } if (logfile) { fprintf(logfile," rebalancing time: %g seconds\n",stop_time-start_time); fprintf(logfile," iteration count = %d\n",niter); for (int i = 0; i < nimbalance; ++i) imbalances[i]->info(logfile); fprintf(logfile," initial/final max load/proc = %g %g\n", maxinit,maxfinal); fprintf(logfile," initial/final imbalance factor = %g %g\n", imbinit,imbfinal); } } if (style != BISECTION) { if (me == 0) { if (screen) { fprintf(screen," x cuts:"); for (int i = 0; i <= comm->procgrid[0]; i++) fprintf(screen," %g",comm->xsplit[i]); fprintf(screen,"\n"); fprintf(screen," y cuts:"); for (int i = 0; i <= comm->procgrid[1]; i++) fprintf(screen," %g",comm->ysplit[i]); fprintf(screen,"\n"); fprintf(screen," z cuts:"); for (int i = 0; i <= comm->procgrid[2]; i++) fprintf(screen," %g",comm->zsplit[i]); fprintf(screen,"\n"); } if (logfile) { fprintf(logfile," x cuts:"); for (int i = 0; i <= comm->procgrid[0]; i++) fprintf(logfile," %g",comm->xsplit[i]); fprintf(logfile,"\n"); fprintf(logfile," y cuts:"); for (int i = 0; i <= comm->procgrid[1]; i++) fprintf(logfile," %g",comm->ysplit[i]); fprintf(logfile,"\n"); fprintf(logfile," z cuts:"); for (int i = 0; i <= comm->procgrid[2]; i++) fprintf(logfile," %g",comm->zsplit[i]); fprintf(logfile,"\n"); } } } } /* ---------------------------------------------------------------------- process optional command args for Balance and FixBalance ------------------------------------------------------------------------- */ void Balance::options(int iarg, int narg, char **arg) { // count max number of weight settings nimbalance = 0; for (int i = iarg; i < narg; i++) if (strcmp(arg[i],"weight") == 0) nimbalance++; if (nimbalance) imbalances = new Imbalance*[nimbalance]; nimbalance = 0; wtflag = 0; varflag = 0; outflag = 0; int outarg = 0; fp = NULL; while (iarg < narg) { if (strcmp(arg[iarg],"weight") == 0) { wtflag = 1; Imbalance *imb; int nopt = 0; if (strcmp(arg[iarg+1],"group") == 0) { imb = new ImbalanceGroup(lmp); nopt = imb->options(narg-iarg,arg+iarg+2); imbalances[nimbalance++] = imb; } else if (strcmp(arg[iarg+1],"time") == 0) { imb = new ImbalanceTime(lmp); nopt = imb->options(narg-iarg,arg+iarg+2); imbalances[nimbalance++] = imb; } else if (strcmp(arg[iarg+1],"neigh") == 0) { imb = new ImbalanceNeigh(lmp); nopt = imb->options(narg-iarg,arg+iarg+2); imbalances[nimbalance++] = imb; } else if (strcmp(arg[iarg+1],"var") == 0) { varflag = 1; imb = new ImbalanceVar(lmp); nopt = imb->options(narg-iarg,arg+iarg+2); imbalances[nimbalance++] = imb; } else if (strcmp(arg[iarg+1],"store") == 0) { imb = new ImbalanceStore(lmp); nopt = imb->options(narg-iarg,arg+iarg+2); imbalances[nimbalance++] = imb; } else { error->all(FLERR,"Unknown (fix) balance weight method"); } iarg += 2+nopt; } else if (strcmp(arg[iarg],"out") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal (fix) balance command"); outflag = 1; outarg = iarg+1; iarg += 2; } else error->all(FLERR,"Illegal (fix) balance command"); } // output file if (outflag && comm->me == 0) { fp = fopen(arg[outarg],"w"); if (fp == NULL) error->one(FLERR,"Cannot open (fix) balance output file"); } } /* ---------------------------------------------------------------------- allocate per-particle weight storage via FixStore use prefix to distinguish Balance vs FixBalance storage fix could already be allocated if fix balance is re-specified ------------------------------------------------------------------------- */ void Balance::weight_storage(char *prefix) { char *fixargs[6]; if (prefix) { int n = strlen(prefix) + 32; fixargs[0] = new char[n]; strcpy(fixargs[0],prefix); strcat(fixargs[0],"IMBALANCE_WEIGHTS"); } else fixargs[0] = (char *) "IMBALANCE_WEIGHTS"; fixargs[1] = (char *) "all"; fixargs[2] = (char *) "STORE"; fixargs[3] = (char *) "peratom"; fixargs[4] = (char *) "0"; fixargs[5] = (char *) "1"; int ifix = modify->find_fix(fixargs[0]); if (ifix < 1) { modify->add_fix(6,fixargs); fixstore = (FixStore *) modify->fix[modify->nfix-1]; } else fixstore = (FixStore *) modify->fix[ifix]; fixstore->disable = 1; if (prefix) delete [] fixargs[0]; } /* ---------------------------------------------------------------------- invoke init() for each Imbalance class + flag = 0 for call from Balance, 1 for call from FixBalance ------------------------------------------------------------------------- */ -void Balance::init_imbalance() +void Balance::init_imbalance(int flag) { if (!wtflag) return; - for (int n = 0; n < nimbalance; n++) imbalances[n]->init(); + for (int n = 0; n < nimbalance; n++) imbalances[n]->init(flag); } /* ---------------------------------------------------------------------- set weight for each particle via list of Nimbalance classes ------------------------------------------------------------------------- */ void Balance::set_weights() { if (!wtflag) return; weight = fixstore->vstore; int nlocal = atom->nlocal; for (int i = 0; i < nlocal; i++) weight[i] = 1.0; for (int n = 0; n < nimbalance; n++) imbalances[n]->compute(weight); } /* ---------------------------------------------------------------------- calculate imbalance factor based on particle count or particle weights return max = max load per proc return imbalance = max load per proc / ave load per proc ------------------------------------------------------------------------- */ double Balance::imbalance_factor(double &maxcost) { double mycost,totalcost; if (wtflag) { weight = fixstore->vstore; int nlocal = atom->nlocal; mycost = 0.0; for (int i = 0; i < nlocal; i++) mycost += weight[i]; } else mycost = atom->nlocal; MPI_Allreduce(&mycost,&maxcost,1,MPI_DOUBLE,MPI_MAX,world); MPI_Allreduce(&mycost,&totalcost,1,MPI_DOUBLE,MPI_SUM,world); double imbalance = 1.0; if (maxcost > 0.0) imbalance = maxcost / (totalcost/nprocs); return imbalance; } /* ---------------------------------------------------------------------- perform balancing via RCB class sortflag = flag for sorting order of received messages by proc ID return list of procs to send my atoms to ------------------------------------------------------------------------- */ int *Balance::bisection(int sortflag) { if (!rcb) rcb = new RCB(lmp); // NOTE: this logic is specific to orthogonal boxes, not triclinic int dim = domain->dimension; double *boxlo = domain->boxlo; double *boxhi = domain->boxhi; double *prd = domain->prd; // shrink-wrap simulation box around atoms for input to RCB // leads to better-shaped sub-boxes when atoms are far from box boundaries double shrink[6],shrinkall[6]; shrink[0] = boxhi[0]; shrink[1] = boxhi[1]; shrink[2] = boxhi[2]; shrink[3] = boxlo[0]; shrink[4] = boxlo[1]; shrink[5] = boxlo[2]; double **x = atom->x; int nlocal = atom->nlocal; for (int i = 0; i < nlocal; i++) { shrink[0] = MIN(shrink[0],x[i][0]); shrink[1] = MIN(shrink[1],x[i][1]); shrink[2] = MIN(shrink[2],x[i][2]); shrink[3] = MAX(shrink[3],x[i][0]); shrink[4] = MAX(shrink[4],x[i][1]); shrink[5] = MAX(shrink[5],x[i][2]); } shrink[3] = -shrink[3]; shrink[4] = -shrink[4]; shrink[5] = -shrink[5]; MPI_Allreduce(shrink,shrinkall,6,MPI_DOUBLE,MPI_MIN,world); shrinkall[3] = -shrinkall[3]; shrinkall[4] = -shrinkall[4]; shrinkall[5] = -shrinkall[5]; double *shrinklo = &shrinkall[0]; double *shrinkhi = &shrinkall[3]; // invoke RCB // then invert() to create list of proc assignements for my atoms if (wtflag) { weight = fixstore->vstore; rcb->compute(dim,atom->nlocal,atom->x,weight,shrinklo,shrinkhi); } else rcb->compute(dim,atom->nlocal,atom->x,NULL,shrinklo,shrinkhi); rcb->invert(sortflag); // reset RCB lo/hi bounding box to full simulation box as needed double *lo = rcb->lo; double *hi = rcb->hi; if (lo[0] == shrinklo[0]) lo[0] = boxlo[0]; if (lo[1] == shrinklo[1]) lo[1] = boxlo[1]; if (lo[2] == shrinklo[2]) lo[2] = boxlo[2]; if (hi[0] == shrinkhi[0]) hi[0] = boxhi[0]; if (hi[1] == shrinkhi[1]) hi[1] = boxhi[1]; if (hi[2] == shrinkhi[2]) hi[2] = boxhi[2]; // store RCB cut, dim, lo/hi box in CommTiled // cut and lo/hi need to be in fractional form so can // OK if changes by epsilon from what RCB used since atoms // will subsequently migrate to new owning procs by exchange() anyway // ditto for atoms exactly on lo/hi RCB box boundaries due to ties comm->rcbnew = 1; int idim = rcb->cutdim; if (idim >= 0) comm->rcbcutfrac = (rcb->cut - boxlo[idim]) / prd[idim]; else comm->rcbcutfrac = 0.0; comm->rcbcutdim = idim; double (*mysplit)[2] = comm->mysplit; mysplit[0][0] = (lo[0] - boxlo[0]) / prd[0]; if (hi[0] == boxhi[0]) mysplit[0][1] = 1.0; else mysplit[0][1] = (hi[0] - boxlo[0]) / prd[0]; mysplit[1][0] = (lo[1] - boxlo[1]) / prd[1]; if (hi[1] == boxhi[1]) mysplit[1][1] = 1.0; else mysplit[1][1] = (hi[1] - boxlo[1]) / prd[1]; mysplit[2][0] = (lo[2] - boxlo[2]) / prd[2]; if (hi[2] == boxhi[2]) mysplit[2][1] = 1.0; else mysplit[2][1] = (hi[2] - boxlo[2]) / prd[2]; // return list of procs to send my atoms to return rcb->sendproc; } /* ---------------------------------------------------------------------- setup static load balance operations called from command and indirectly initially from fix balance set rho = 0 for static balancing ------------------------------------------------------------------------- */ void Balance::shift_setup_static(char *str) { shift_allocate = 1; memory->create(proccost,nprocs,"balance:proccost"); memory->create(allproccost,nprocs,"balance:allproccost"); ndim = strlen(str); bdim = new int[ndim]; for (int i = 0; i < ndim; i++) { if (str[i] == 'x') bdim[i] = X; if (str[i] == 'y') bdim[i] = Y; if (str[i] == 'z') bdim[i] = Z; } int max = MAX(comm->procgrid[0],comm->procgrid[1]); max = MAX(max,comm->procgrid[2]); onecost = new double[max]; allcost = new double[max]; sum = new double[max+1]; target = new double[max+1]; lo = new double[max+1]; hi = new double[max+1]; losum = new double[max+1]; hisum = new double[max+1]; // if current layout is TILED, set initial uniform splits in Comm // this gives starting point to subsequent shift balancing if (comm->layout == LAYOUT_TILED) { int *procgrid = comm->procgrid; double *xsplit = comm->xsplit; double *ysplit = comm->ysplit; double *zsplit = comm->zsplit; for (int i = 0; i < procgrid[0]; i++) xsplit[i] = i * 1.0/procgrid[0]; for (int i = 0; i < procgrid[1]; i++) ysplit[i] = i * 1.0/procgrid[1]; for (int i = 0; i < procgrid[2]; i++) zsplit[i] = i * 1.0/procgrid[2]; xsplit[procgrid[0]] = ysplit[procgrid[1]] = zsplit[procgrid[2]] = 1.0; } rho = 0; } /* ---------------------------------------------------------------------- setup shift load balance operations called from fix balance set rho = 1 to do dynamic balancing after call to shift_setup_static() ------------------------------------------------------------------------- */ void Balance::shift_setup(char *str, int nitermax_in, double thresh_in) { shift_setup_static(str); nitermax = nitermax_in; stopthresh = thresh_in; rho = 1; } /* ---------------------------------------------------------------------- load balance by changing xyz split proc boundaries in Comm called one time from input script command or many times from fix balance return niter = iteration count ------------------------------------------------------------------------- */ int Balance::shift() { int i,j,k,m,np,max; double mycost,totalcost; double *split; // no balancing if no atoms bigint natoms = atom->natoms; if (natoms == 0) return 0; // set delta for 1d balancing = root of threshhold // root = # of dimensions being balanced on double delta = pow(stopthresh,1.0/ndim) - 1.0; int *procgrid = comm->procgrid; // all balancing done in lamda coords domain->x2lamda(atom->nlocal); // loop over dimensions in balance string int niter = 0; for (int idim = 0; idim < ndim; idim++) { // split = ptr to xyz split in Comm if (bdim[idim] == X) split = comm->xsplit; else if (bdim[idim] == Y) split = comm->ysplit; else if (bdim[idim] == Z) split = comm->zsplit; else continue; // initial count and sum np = procgrid[bdim[idim]]; tally(bdim[idim],np,split); // target[i] = desired sum at split I if (wtflag) { weight = fixstore->vstore; int nlocal = atom->nlocal; mycost = 0.0; for (i = 0; i < nlocal; i++) mycost += weight[i]; } else mycost = atom->nlocal; MPI_Allreduce(&mycost,&totalcost,1,MPI_DOUBLE,MPI_SUM,world); for (i = 0; i < np; i++) target[i] = totalcost/np * i; target[np] = totalcost; // lo[i] = closest split <= split[i] with a sum <= target // hi[i] = closest split >= split[i] with a sum >= target lo[0] = hi[0] = 0.0; lo[np] = hi[np] = 1.0; losum[0] = hisum[0] = 0.0; losum[np] = hisum[np] = totalcost; for (i = 1; i < np; i++) { for (j = i; j >= 0; j--) if (sum[j] <= target[i]) { lo[i] = split[j]; losum[i] = sum[j]; break; } for (j = i; j <= np; j++) if (sum[j] >= target[i]) { hi[i] = split[j]; hisum[i] = sum[j]; break; } } // iterate until balanced #ifdef BALANCE_DEBUG if (me == 0) debug_shift_output(idim,0,np,split); #endif int doneflag; int change = 1; for (m = 0; m < nitermax; m++) { change = adjust(np,split); tally(bdim[idim],np,split); niter++; #ifdef BALANCE_DEBUG if (me == 0) debug_shift_output(idim,m+1,np,split); if (outflag) dumpout(update->ntimestep); #endif // stop if no change in splits, b/c all targets are met exactly if (!change) break; // stop if all split sums are within delta of targets // this is a 1d test of particle count per slice // assumption is that this is sufficient accuracy // for 3d imbalance factor to reach threshhold doneflag = 1; for (i = 1; i < np; i++) if (fabs(1.0*(sum[i]-target[i]))/target[i] > delta) doneflag = 0; if (doneflag) break; } // eliminate final adjacent splits that are duplicates // can happen if particle distribution is narrow and Nitermax is small // set lo = midpt between splits // spread duplicates out evenly between bounding midpts with non-duplicates // i,j = lo/hi indices of set of duplicate splits // delta = new spacing between duplicates // bounding midpts = lo[i-1] and lo[j] int duplicate = 0; for (i = 1; i < np-1; i++) if (split[i] == split[i+1]) duplicate = 1; if (duplicate) { for (i = 0; i < np; i++) lo[i] = 0.5 * (split[i] + split[i+1]); i = 1; while (i < np-1) { j = i+1; while (split[j] == split[i]) j++; j--; if (j > i) { delta = (lo[j] - lo[i-1]) / (j-i+2); for (k = i; k <= j; k++) split[k] = lo[i-1] + (k-i+1)*delta; } i = j+1; } } // sanity check on bad duplicate or inverted splits // zero or negative width sub-domains will break Comm class // should never happen if recursive multisection algorithm is correct int bad = 0; for (i = 0; i < np; i++) if (split[i] >= split[i+1]) bad = 1; if (bad) error->all(FLERR,"Balance produced bad splits"); /* if (me == 0) { printf("BAD SPLITS %d %d %d\n",np+1,niter,delta); for (i = 0; i < np+1; i++) printf(" %g",split[i]); printf("\n"); } */ // stop at this point in bstr if imbalance factor < threshhold // this is a true 3d test of particle count per processor double imbfactor = imbalance_splits(max); if (imbfactor <= stopthresh) break; } // restore real coords domain->lamda2x(atom->nlocal); return niter; } /* ---------------------------------------------------------------------- count atoms in each slice, based on their dim coordinate N = # of slices split = N+1 cuts between N slices return updated count = particles per slice return updated sum = cummulative count below each of N+1 splits use binary search to find which slice each atom is in ------------------------------------------------------------------------- */ void Balance::tally(int dim, int n, double *split) { for (int i = 0; i < n; i++) onecost[i] = 0.0; double **x = atom->x; int nlocal = atom->nlocal; int index; if (wtflag) { weight = fixstore->vstore; for (int i = 0; i < nlocal; i++) { index = binary(x[i][dim],n,split); onecost[index] += weight[i]; } } else { for (int i = 0; i < nlocal; i++) { index = binary(x[i][dim],n,split); onecost[index] += 1.0; } } MPI_Allreduce(onecost,allcost,n,MPI_DOUBLE,MPI_SUM,world); sum[0] = 0.0; for (int i = 1; i < n+1; i++) sum[i] = sum[i-1] + allcost[i-1]; } /* ---------------------------------------------------------------------- adjust cuts between N slices in a dim via recursive multisectioning method split = current N+1 cuts, with 0.0 and 1.0 at end points sum = cummulative count up to each split target = desired cummulative count up to each split lo/hi = split values that bound current split update lo/hi to reflect sums at current split values overwrite split with new cuts guaranteed that splits will remain in ascending order, though adjacent values may be identical recursive bisectioning zooms in on each cut by halving lo/hi return 0 if no changes in any splits, b/c they are all perfect ------------------------------------------------------------------------- */ int Balance::adjust(int n, double *split) { int i; double fraction; // reset lo/hi based on current sum and splits // insure lo is monotonically increasing, ties are OK // insure hi is monotonically decreasing, ties are OK // this effectively uses info from nearby splits // to possibly tighten bounds on lo/hi for (i = 1; i < n; i++) { if (sum[i] <= target[i]) { lo[i] = split[i]; losum[i] = sum[i]; } if (sum[i] >= target[i]) { hi[i] = split[i]; hisum[i] = sum[i]; } } for (i = 1; i < n; i++) if (lo[i] < lo[i-1]) { lo[i] = lo[i-1]; losum[i] = losum[i-1]; } for (i = n-1; i > 0; i--) if (hi[i] > hi[i+1]) { hi[i] = hi[i+1]; hisum[i] = hisum[i+1]; } int change = 0; for (int i = 1; i < n; i++) if (sum[i] != target[i]) { change = 1; if (rho == 0) split[i] = 0.5 * (lo[i]+hi[i]); else { fraction = 1.0*(target[i]-losum[i]) / (hisum[i]-losum[i]); split[i] = lo[i] + fraction * (hi[i]-lo[i]); } } return change; } /* ---------------------------------------------------------------------- calculate imbalance based on processor splits in 3 dims atoms must be in lamda coords (0-1) before called map particles to 3d grid of procs return maxcost = max load per proc return imbalance factor = max load per proc / ave load per proc ------------------------------------------------------------------------- */ double Balance::imbalance_splits(int &maxcost) { double *xsplit = comm->xsplit; double *ysplit = comm->ysplit; double *zsplit = comm->zsplit; int nx = comm->procgrid[0]; int ny = comm->procgrid[1]; int nz = comm->procgrid[2]; for (int i = 0; i < nprocs; i++) proccost[i] = 0.0; double **x = atom->x; int nlocal = atom->nlocal; int ix,iy,iz; if (wtflag) { weight = fixstore->vstore; for (int i = 0; i < nlocal; i++) { ix = binary(x[i][0],nx,xsplit); iy = binary(x[i][1],ny,ysplit); iz = binary(x[i][2],nz,zsplit); proccost[iz*nx*ny + iy*nx + ix] += weight[i]; } } else { for (int i = 0; i < nlocal; i++) { ix = binary(x[i][0],nx,xsplit); iy = binary(x[i][1],ny,ysplit); iz = binary(x[i][2],nz,zsplit); proccost[iz*nx*ny + iy*nx + ix] += 1.0; } } // one proc's particles may map to many partitions, so must Allreduce MPI_Allreduce(proccost,allproccost,nprocs,MPI_DOUBLE,MPI_SUM,world); maxcost = 0.0; double totalcost = 0.0; for (int i = 0; i < nprocs; i++) { maxcost = MAX(maxcost,allproccost[i]); totalcost += allproccost[i]; } double imbalance = 1.0; if (maxcost > 0.0) imbalance = maxcost / (totalcost/nprocs); return imbalance; } /* ---------------------------------------------------------------------- binary search for where value falls in N-length vec note that vec actually has N+1 values, but ignore last one values in vec are monotonically increasing, but adjacent values can be ties value may be outside range of vec limits always return index from 0 to N-1 inclusive return 0 if value < vec[0] reutrn N-1 if value >= vec[N-1] return index = 1 to N-2 inclusive if vec[index] <= value < vec[index+1] note that for adjacent tie values, index of lower tie is not returned since never satisfies 2nd condition that value < vec[index+1] ------------------------------------------------------------------------- */ int Balance::binary(double value, int n, double *vec) { int lo = 0; int hi = n-1; if (value < vec[lo]) return lo; if (value >= vec[hi]) return hi; // insure vec[lo] <= value < vec[hi] at every iteration // done when lo,hi are adjacent int index = (lo+hi)/2; while (lo < hi-1) { if (value < vec[index]) hi = index; else if (value >= vec[index]) lo = index; index = (lo+hi)/2; } return index; } /* ---------------------------------------------------------------------- write dump snapshot of line segments in Pizza.py mdump mesh format write xy lines around each proc's sub-domain for 2d write xyz cubes around each proc's sub-domain for 3d only called by proc 0 NOTE: only implemented for orthogonal boxes, not triclinic ------------------------------------------------------------------------- */ void Balance::dumpout(bigint tstep) { int dimension = domain->dimension; int triclinic = domain->triclinic; // Allgather each proc's sub-box // could use Gather, but that requires MPI to alloc memory double *lo,*hi; if (triclinic == 0) { lo = domain->sublo; hi = domain->subhi; } else { lo = domain->sublo_lamda; hi = domain->subhi_lamda; } double box[6]; box[0] = lo[0]; box[1] = lo[1]; box[2] = lo[2]; box[3] = hi[0]; box[4] = hi[1]; box[5] = hi[2]; double **boxall; memory->create(boxall,nprocs,6,"balance:dumpout"); MPI_Allgather(box,6,MPI_DOUBLE,&boxall[0][0],6,MPI_DOUBLE,world); if (me) { memory->destroy(boxall); return; } // proc 0 writes out nodal coords // some will be duplicates double *boxlo = domain->boxlo; double *boxhi = domain->boxhi; fprintf(fp,"ITEM: TIMESTEP\n"); fprintf(fp,BIGINT_FORMAT "\n",tstep); fprintf(fp,"ITEM: NUMBER OF NODES\n"); if (dimension == 2) fprintf(fp,"%d\n",4*nprocs); else fprintf(fp,"%d\n",8*nprocs); fprintf(fp,"ITEM: BOX BOUNDS\n"); fprintf(fp,"%g %g\n",boxlo[0],boxhi[0]); fprintf(fp,"%g %g\n",boxlo[1],boxhi[1]); fprintf(fp,"%g %g\n",boxlo[2],boxhi[2]); fprintf(fp,"ITEM: NODES\n"); if (triclinic == 0) { if (dimension == 2) { int m = 0; for (int i = 0; i < nprocs; i++) { fprintf(fp,"%d %d %g %g %g\n",m+1,1,boxall[i][0],boxall[i][1],0.0); fprintf(fp,"%d %d %g %g %g\n",m+2,1,boxall[i][3],boxall[i][1],0.0); fprintf(fp,"%d %d %g %g %g\n",m+3,1,boxall[i][3],boxall[i][4],0.0); fprintf(fp,"%d %d %g %g %g\n",m+4,1,boxall[i][0],boxall[i][4],0.0); m += 4; } } else { int m = 0; for (int i = 0; i < nprocs; i++) { fprintf(fp,"%d %d %g %g %g\n",m+1,1, boxall[i][0],boxall[i][1],boxall[i][2]); fprintf(fp,"%d %d %g %g %g\n",m+2,1, boxall[i][3],boxall[i][1],boxall[i][2]); fprintf(fp,"%d %d %g %g %g\n",m+3,1, boxall[i][3],boxall[i][4],boxall[i][2]); fprintf(fp,"%d %d %g %g %g\n",m+4,1, boxall[i][0],boxall[i][4],boxall[i][2]); fprintf(fp,"%d %d %g %g %g\n",m+5,1, boxall[i][0],boxall[i][1],boxall[i][5]); fprintf(fp,"%d %d %g %g %g\n",m+6,1, boxall[i][3],boxall[i][1],boxall[i][5]); fprintf(fp,"%d %d %g %g %g\n",m+7,1, boxall[i][3],boxall[i][4],boxall[i][5]); fprintf(fp,"%d %d %g %g %g\n",m+8,1, boxall[i][0],boxall[i][4],boxall[i][5]); m += 8; } } } else { double (*bc)[3] = domain->corners; if (dimension == 2) { int m = 0; for (int i = 0; i < nprocs; i++) { domain->lamda_box_corners(&boxall[i][0],&boxall[i][3]); fprintf(fp,"%d %d %g %g %g\n",m+1,1,bc[0][0],bc[0][1],0.0); fprintf(fp,"%d %d %g %g %g\n",m+2,1,bc[1][0],bc[1][1],0.0); fprintf(fp,"%d %d %g %g %g\n",m+3,1,bc[2][0],bc[2][1],0.0); fprintf(fp,"%d %d %g %g %g\n",m+4,1,bc[3][0],bc[3][1],0.0); m += 4; } } else { int m = 0; for (int i = 0; i < nprocs; i++) { domain->lamda_box_corners(&boxall[i][0],&boxall[i][3]); fprintf(fp,"%d %d %g %g %g\n",m+1,1,bc[0][0],bc[0][1],bc[0][1]); fprintf(fp,"%d %d %g %g %g\n",m+2,1,bc[1][0],bc[1][1],bc[1][1]); fprintf(fp,"%d %d %g %g %g\n",m+3,1,bc[2][0],bc[2][1],bc[2][1]); fprintf(fp,"%d %d %g %g %g\n",m+4,1,bc[3][0],bc[3][1],bc[3][1]); fprintf(fp,"%d %d %g %g %g\n",m+5,1,bc[4][0],bc[4][1],bc[4][1]); fprintf(fp,"%d %d %g %g %g\n",m+6,1,bc[5][0],bc[5][1],bc[5][1]); fprintf(fp,"%d %d %g %g %g\n",m+7,1,bc[6][0],bc[6][1],bc[6][1]); fprintf(fp,"%d %d %g %g %g\n",m+8,1,bc[7][0],bc[7][1],bc[7][1]); m += 8; } } } // write out one square/cube per processor for 2d/3d fprintf(fp,"ITEM: TIMESTEP\n"); fprintf(fp,BIGINT_FORMAT "\n",tstep); if (dimension == 2) fprintf(fp,"ITEM: NUMBER OF SQUARES\n"); else fprintf(fp,"ITEM: NUMBER OF CUBES\n"); fprintf(fp,"%d\n",nprocs); if (dimension == 2) fprintf(fp,"ITEM: SQUARES\n"); else fprintf(fp,"ITEM: CUBES\n"); if (dimension == 2) { int m = 0; for (int i = 0; i < nprocs; i++) { fprintf(fp,"%d %d %d %d %d %d\n",i+1,1,m+1,m+2,m+3,m+4); m += 4; } } else { int m = 0; for (int i = 0; i < nprocs; i++) { fprintf(fp,"%d %d %d %d %d %d %d %d %d %d\n", i+1,1,m+1,m+2,m+3,m+4,m+5,m+6,m+7,m+8); m += 8; } } memory->destroy(boxall); } /* ---------------------------------------------------------------------- debug output for Idim and count only called by proc 0 ------------------------------------------------------------------------- */ #ifdef BALANCE_DEBUG void Balance::debug_shift_output(int idim, int m, int np, double *split) { int i; const char *dim = NULL; double *boxlo = domain->boxlo; double *prd = domain->prd; if (bdim[idim] == X) dim = "X"; else if (bdim[idim] == Y) dim = "Y"; else if (bdim[idim] == Z) dim = "Z"; fprintf(stderr,"Dimension %s, Iteration %d\n",dim,m); fprintf(stderr," Count:"); for (i = 0; i < np; i++) fprintf(stderr," " BIGINT_FORMAT,count[i]); fprintf(stderr,"\n"); fprintf(stderr," Sum:"); for (i = 0; i <= np; i++) fprintf(stderr," " BIGINT_FORMAT,sum[i]); fprintf(stderr,"\n"); fprintf(stderr," Target:"); for (i = 0; i <= np; i++) fprintf(stderr," " BIGINT_FORMAT,target[i]); fprintf(stderr,"\n"); fprintf(stderr," Actual cut:"); for (i = 0; i <= np; i++) fprintf(stderr," %g",boxlo[bdim[idim]] + split[i]*prd[bdim[idim]]); fprintf(stderr,"\n"); fprintf(stderr," Split:"); for (i = 0; i <= np; i++) fprintf(stderr," %g",split[i]); fprintf(stderr,"\n"); fprintf(stderr," Low:"); for (i = 0; i <= np; i++) fprintf(stderr," %g",lo[i]); fprintf(stderr,"\n"); fprintf(stderr," Low-sum:"); for (i = 0; i <= np; i++) fprintf(stderr," " BIGINT_FORMAT,losum[i]); fprintf(stderr,"\n"); fprintf(stderr," Hi:"); for (i = 0; i <= np; i++) fprintf(stderr," %g",hi[i]); fprintf(stderr,"\n"); fprintf(stderr," Hi-sum:"); for (i = 0; i <= np; i++) fprintf(stderr," " BIGINT_FORMAT,hisum[i]); fprintf(stderr,"\n"); fprintf(stderr," Delta:"); for (i = 0; i < np; i++) fprintf(stderr," %g",split[i+1]-split[i]); fprintf(stderr,"\n"); bigint max = 0; for (i = 0; i < np; i++) max = MAX(max,count[i]); fprintf(stderr," Imbalance factor: %g\n",1.0*max*np/target[np]); } #endif diff --git a/src/balance.h b/src/balance.h index a9397e6e4..7ca035402 100644 --- a/src/balance.h +++ b/src/balance.h @@ -1,137 +1,137 @@ /* -*- c++ -*- ---------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ #ifdef COMMAND_CLASS CommandStyle(balance,Balance) #else #ifndef LMP_BALANCE_H #define LMP_BALANCE_H #include #include "pointers.h" namespace LAMMPS_NS { class Balance : protected Pointers { public: class RCB *rcb; class FixStore *fixstore; // per-atom weights stored in FixStore int wtflag; // 1 if particle weighting is used int varflag; // 1 if weight style var(iable) is used int outflag; // 1 for output of balance results to file Balance(class LAMMPS *); ~Balance(); void command(int, char **); void options(int, int, char **); void weight_storage(char *); - void init_imbalance(); + void init_imbalance(int); void set_weights(); double imbalance_factor(double &); void shift_setup(char *, int, double); int shift(); int *bisection(int sortflag = 0); void dumpout(bigint); private: int me,nprocs; double thresh; // threshhold to perform LB int style; // style of LB int xflag,yflag,zflag; // xyz LB flags double *user_xsplit,*user_ysplit,*user_zsplit; // params for xyz LB int nitermax; // params for shift LB double stopthresh; char bstr[4]; int shift_allocate; // 1 if SHIFT vectors have been allocated int ndim; // length of balance string bstr int *bdim; // XYZ for each character in bstr double *onecost; // work vector of counts in one dim double *allcost; // counts for slices in one dim double *sum; // cummulative count for slices in one dim double *target; // target sum for slices in one dim double *lo,*hi; // lo/hi split coords that bound each target double *losum,*hisum; // cummulative counts at lo/hi coords int rho; // 0 for geometric recursion // 1 for density weighted recursion double *proccost; // particle cost per processor double *allproccost; // proccost summed across procs int nimbalance; // number of user-specified weight styles class Imbalance **imbalances; // list of Imb classes, one per weight style double *weight; // ptr to FixStore weight vector FILE *fp; // balance output file int firststep; double imbalance_splits(int &); void shift_setup_static(char *); void tally(int, int, double *); int adjust(int, double *); int binary(double, int, double *); #ifdef BALANCE_DEBUG void debug_shift_output(int, int, int, double *); #endif }; } #endif #endif /* ERROR/WARNING messages: E: Balance command before simulation box is defined The balance command cannot be used before a read_data, read_restart, or create_box command. E: Illegal ... command Self-explanatory. Check the input script syntax and compare to the documentation for the command. You can use -echo screen as a command-line option when running LAMMPS to see the offending line. E: Cannot open balance output file Self-explanatory. E: Cannot balance in z dimension for 2d simulation Self-explanatory. E: Balance shift string is invalid The string can only contain the characters "x", "y", or "z". E: Balance rcb cannot be used with comm_style brick Comm_style tiled must be used instead. E: Lost atoms via balance: original %ld current %ld This should not occur. Report the problem to the developers. E: Balance produced bad splits This should not occur. It means two or more cutting plane locations are on top of each other or out of order. Report the problem to the developers. */ diff --git a/src/fix_balance.cpp b/src/fix_balance.cpp index 59742583e..33054e0e3 100644 --- a/src/fix_balance.cpp +++ b/src/fix_balance.cpp @@ -1,321 +1,335 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ #include #include #include "fix_balance.h" #include "balance.h" #include "update.h" #include "atom.h" #include "comm.h" #include "domain.h" #include "neighbor.h" #include "irregular.h" #include "force.h" #include "kspace.h" #include "modify.h" #include "fix_store.h" #include "rcb.h" +#include "timer.h" #include "error.h" using namespace LAMMPS_NS; using namespace FixConst; enum{SHIFT,BISECTION}; enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED}; // several files /* ---------------------------------------------------------------------- */ FixBalance::FixBalance(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg), balance(NULL), irregular(NULL) { if (narg < 6) error->all(FLERR,"Illegal fix balance command"); box_change_domain = 1; scalar_flag = 1; extscalar = 0; vector_flag = 1; size_vector = 3; extvector = 0; global_freq = 1; // parse required arguments int dimension = domain->dimension; nevery = force->inumeric(FLERR,arg[3]); if (nevery < 0) error->all(FLERR,"Illegal fix balance command"); thresh = force->numeric(FLERR,arg[4]); if (strcmp(arg[5],"shift") == 0) lbstyle = SHIFT; else if (strcmp(arg[5],"rcb") == 0) lbstyle = BISECTION; else error->all(FLERR,"Illegal fix balance command"); int iarg = 5; if (lbstyle == SHIFT) { if (iarg+4 > narg) error->all(FLERR,"Illegal fix balance command"); if (strlen(arg[iarg+1]) > 3) error->all(FLERR,"Illegal fix balance command"); strcpy(bstr,arg[iarg+1]); nitermax = force->inumeric(FLERR,arg[iarg+2]); if (nitermax <= 0) error->all(FLERR,"Illegal fix balance command"); stopthresh = force->numeric(FLERR,arg[iarg+3]); if (stopthresh < 1.0) error->all(FLERR,"Illegal fix balance command"); iarg += 4; } else if (lbstyle == BISECTION) { iarg++; } // error checks if (lbstyle == SHIFT) { int blen = strlen(bstr); for (int i = 0; i < blen; i++) { if (bstr[i] != 'x' && bstr[i] != 'y' && bstr[i] != 'z') error->all(FLERR,"Fix balance shift string is invalid"); if (bstr[i] == 'z' && dimension == 2) error->all(FLERR,"Fix balance shift string is invalid"); for (int j = i+1; j < blen; j++) if (bstr[i] == bstr[j]) error->all(FLERR,"Fix balance shift string is invalid"); } } if (lbstyle == BISECTION && comm->style == 0) error->all(FLERR,"Fix balance rcb cannot be used with comm_style brick"); // create instance of Balance class // if SHIFT, initialize it with params // process remaining optional args via Balance balance = new Balance(lmp); if (lbstyle == SHIFT) balance->shift_setup(bstr,nitermax,thresh); balance->options(iarg,narg,arg); wtflag = balance->wtflag; if (balance->varflag && nevery == 0) error->all(FLERR,"Fix balance nevery = 0 cannot be used with weight var"); // create instance of Irregular class irregular = new Irregular(lmp); // only force reneighboring if nevery > 0 if (nevery) force_reneighbor = 1; - + lastbalance = -1; + // compute initial outputs itercount = 0; pending = 0; imbfinal = imbprev = maxloadperproc = 0.0; } /* ---------------------------------------------------------------------- */ FixBalance::~FixBalance() { delete balance; delete irregular; } /* ---------------------------------------------------------------------- */ int FixBalance::setmask() { int mask = 0; mask |= PRE_EXCHANGE; mask |= PRE_NEIGHBOR; return mask; } /* ---------------------------------------------------------------------- */ void FixBalance::post_constructor() { if (wtflag) balance->weight_storage(id); } /* ---------------------------------------------------------------------- */ void FixBalance::init() { if (force->kspace) kspace_flag = 1; else kspace_flag = 0; - balance->init_imbalance(); + balance->init_imbalance(1); } /* ---------------------------------------------------------------------- */ void FixBalance::setup(int vflag) { // compute final imbalance factor if setup_pre_exchange() invoked balancer // this is called at end of run setup, before output pre_neighbor(); } /* ---------------------------------------------------------------------- */ void FixBalance::setup_pre_exchange() { + // do not allow rebalancing twice on same timestep + // even if wanted to, can mess up elapsed time in ImbalanceTime + + if (update->ntimestep == lastbalance) return; + lastbalance = update->ntimestep; + // insure atoms are in current box & update box via shrink-wrap // has to be be done before rebalance() invokes Irregular::migrate_atoms() // since it requires atoms be inside simulation box // even though pbc() will be done again in Verlet::run() // no exchange() since doesn't matter if atoms are assigned to correct procs if (domain->triclinic) domain->x2lamda(atom->nlocal); domain->pbc(); domain->reset_box(); if (domain->triclinic) domain->lamda2x(atom->nlocal); // perform a rebalance if threshhold exceeded balance->set_weights(); imbnow = balance->imbalance_factor(maxloadperproc); if (imbnow > thresh) rebalance(); // next timestep to rebalance if (nevery) next_reneighbor = (update->ntimestep/nevery)*nevery + nevery; } /* ---------------------------------------------------------------------- perform dynamic load balancing ------------------------------------------------------------------------- */ void FixBalance::pre_exchange() { // return if not a rebalance timestep if (nevery && update->ntimestep < next_reneighbor) return; + // do not allow rebalancing twice on same timestep + // even if wanted to, can mess up elapsed time in ImbalanceTime + + if (update->ntimestep == lastbalance) return; + lastbalance = update->ntimestep; + // insure atoms are in current box & update box via shrink-wrap // no exchange() since doesn't matter if atoms are assigned to correct procs if (domain->triclinic) domain->x2lamda(atom->nlocal); domain->pbc(); domain->reset_box(); if (domain->triclinic) domain->lamda2x(atom->nlocal); // perform a rebalance if threshhold exceeded // if weight variable is used, wrap weight setting in clear/add compute if (balance->varflag) modify->clearstep_compute(); balance->set_weights(); if (balance->varflag) modify->addstep_compute(update->ntimestep + nevery); imbnow = balance->imbalance_factor(maxloadperproc); if (imbnow > thresh) rebalance(); // next timestep to rebalance if (nevery) next_reneighbor = (update->ntimestep/nevery)*nevery + nevery; } /* ---------------------------------------------------------------------- compute final imbalance factor based on nlocal after comm->exchange() only do this if rebalancing just occured ------------------------------------------------------------------------- */ void FixBalance::pre_neighbor() { if (!pending) return; imbfinal = balance->imbalance_factor(maxloadperproc); pending = 0; } /* ---------------------------------------------------------------------- perform dynamic load balancing ------------------------------------------------------------------------- */ void FixBalance::rebalance() { imbprev = imbnow; // invoke balancer and reset comm->uniform flag int *sendproc; if (lbstyle == SHIFT) { itercount = balance->shift(); comm->layout = LAYOUT_NONUNIFORM; } else if (lbstyle == BISECTION) { sendproc = balance->bisection(); comm->layout = LAYOUT_TILED; } // output of new decomposition if (balance->outflag) balance->dumpout(update->ntimestep); // reset proc sub-domains // check and warn if any proc's subbox is smaller than neigh skin // since may lead to lost atoms in exchange() if (domain->triclinic) domain->set_lamda_box(); domain->set_local_box(); domain->subbox_too_small_check(neighbor->skin); // move atoms to new processors via irregular() // only needed if migrate_check() says an atom moves to far // else allow caller's comm->exchange() to do it if (domain->triclinic) domain->x2lamda(atom->nlocal); if (wtflag) balance->fixstore->disable = 0; if (lbstyle == BISECTION) irregular->migrate_atoms(0,1,sendproc); else if (irregular->migrate_check()) irregular->migrate_atoms(); if (wtflag) balance->fixstore->disable = 1; if (domain->triclinic) domain->lamda2x(atom->nlocal); // invoke KSpace setup_grid() to adjust to new proc sub-domains if (kspace_flag) force->kspace->setup_grid(); // pending triggers pre_neighbor() to compute final imbalance factor - // can only be done after atoms migrate in caller's comm->exchange() + // can only be done after atoms migrate in comm->exchange() pending = 1; } /* ---------------------------------------------------------------------- return imbalance factor after last rebalance ------------------------------------------------------------------------- */ double FixBalance::compute_scalar() { return imbfinal; } /* ---------------------------------------------------------------------- return stats for last rebalance ------------------------------------------------------------------------- */ double FixBalance::compute_vector(int i) { if (i == 0) return maxloadperproc; if (i == 1) return (double) itercount; return imbprev; } /* ---------------------------------------------------------------------- return # of bytes of allocated memory ------------------------------------------------------------------------- */ double FixBalance::memory_usage() { double bytes = irregular->memory_usage(); if (balance->rcb) bytes += balance->rcb->memory_usage(); return bytes; } diff --git a/src/fix_balance.h b/src/fix_balance.h index 2c0814019..71222a00a 100644 --- a/src/fix_balance.h +++ b/src/fix_balance.h @@ -1,88 +1,89 @@ /* -*- c++ -*- ---------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ #ifdef FIX_CLASS FixStyle(balance,FixBalance) #else #ifndef LMP_FIX_BALANCE_H #define LMP_FIX_BALANCE_H #include #include "fix.h" namespace LAMMPS_NS { class FixBalance : public Fix { public: FixBalance(class LAMMPS *, int, char **); ~FixBalance(); int setmask(); void post_constructor(); void init(); void setup(int); void setup_pre_exchange(); void pre_exchange(); void pre_neighbor(); double compute_scalar(); double compute_vector(int); double memory_usage(); private: int nevery,lbstyle,nitermax; double thresh,stopthresh; char bstr[4]; int wtflag; // 1 for weighted balancing double imbnow; // current imbalance factor double imbprev; // imbalance factor before last rebalancing double imbfinal; // imbalance factor after last rebalancing double maxloadperproc; // max load on any processor int itercount; // iteration count of last call to Balance int kspace_flag; // 1 if KSpace solver defined int pending; + bigint lastbalance; // last timestep balancing was attempted class Balance *balance; class Irregular *irregular; void rebalance(); }; } #endif #endif /* ERROR/WARNING messages: E: Illegal ... command Self-explanatory. Check the input script syntax and compare to the documentation for the command. You can use -echo screen as a command-line option when running LAMMPS to see the offending line. E: Fix balance shift string is invalid The string can only contain the characters "x", "y", or "z". E: Fix balance rcb cannot be used with comm_style brick Comm_style tiled must be used instead. E: Cannot open fix balance output file Self-explanatory. */ diff --git a/src/imbalance.h b/src/imbalance.h index 14fd1b59b..ce7b700b9 100644 --- a/src/imbalance.h +++ b/src/imbalance.h @@ -1,45 +1,45 @@ /* -*- c++ -*- ---------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ #ifndef LMP_IMBALANCE_H #define LMP_IMBALANCE_H #include #include "pointers.h" namespace LAMMPS_NS { class Imbalance : protected Pointers { public: Imbalance(class LAMMPS *); virtual ~Imbalance() {}; // parse options. return number of arguments consumed (required) - virtual int options(int narg, char **arg) = 0; + virtual int options(int, char **) = 0; // reinitialize internal data (needed for fix balance) (optional) - virtual void init() {}; + virtual void init(int) {}; // compute and apply weight factors to local atom array (required) - virtual void compute(double *weights) = 0; + virtual void compute(double *) = 0; // print information about the state of this imbalance compute (required) - virtual void info(FILE *fp) = 0; + virtual void info(FILE *) = 0; // disallow default and copy constructor, assignment operator // private: //Imbalance() {}; //Imbalance(const Imbalance &) {}; //Imbalance &operator=(const Imbalance &) {return *this;}; }; } #endif diff --git a/src/imbalance_time.cpp b/src/imbalance_time.cpp index 27a753785..4450390e7 100644 --- a/src/imbalance_time.cpp +++ b/src/imbalance_time.cpp @@ -1,117 +1,111 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ #include #include "imbalance_time.h" #include "atom.h" #include "comm.h" #include "force.h" #include "timer.h" #include "error.h" -// DEBUG -#include "update.h" - using namespace LAMMPS_NS; #define BIG 1.0e20 /* -------------------------------------------------------------------- */ ImbalanceTime::ImbalanceTime(LAMMPS *lmp) : Imbalance(lmp) {} /* -------------------------------------------------------------------- */ int ImbalanceTime::options(int narg, char **arg) { if (narg < 1) error->all(FLERR,"Illegal balance weight command"); factor = force->numeric(FLERR,arg[0]); if (factor <= 0.0) error->all(FLERR,"Illegal balance weight command"); return 1; } /* ---------------------------------------------------------------------- - reset last, needed for fix balance caller + reset last and timers if necessary ------------------------------------------------------------------------- */ -void ImbalanceTime::init() +void ImbalanceTime::init(int flag) { last = 0.0; + + // flag = 1 if called from FixBalance at start of run + // init Timer, so accumulated time not carried over from previous run + // should NOT init Timer if called from Balance, it uses time from last run + + if (flag) timer->init(); } /* -------------------------------------------------------------------- */ void ImbalanceTime::compute(double *weight) { if (!timer->has_normal()) return; // cost = CPU time for relevant timers since last invocation // localwt = weight assigned to each owned atom // just return if no time yet tallied double cost = -last; cost += timer->get_wall(Timer::PAIR); cost += timer->get_wall(Timer::NEIGH); cost += timer->get_wall(Timer::BOND); cost += timer->get_wall(Timer::KSPACE); - /* - printf("TIME %ld %d %g %g: %g %g %g %g\n", - update->ntimestep,atom->nlocal,last,cost, - timer->get_wall(Timer::PAIR), - timer->get_wall(Timer::NEIGH), - timer->get_wall(Timer::BOND), - timer->get_wall(Timer::KSPACE)); - */ - double maxcost; MPI_Allreduce(&cost,&maxcost,1,MPI_DOUBLE,MPI_MAX,world); if (maxcost <= 0.0) return; int nlocal = atom->nlocal; double localwt = 0.0; if (nlocal) localwt = cost/nlocal; if (nlocal && localwt <= 0.0) error->one(FLERR,"Balance weight <= 0.0"); // apply factor if specified != 1.0 // wtlo,wthi = lo/hi values excluding 0.0 due to no atoms on this proc // lo value does not change // newhi = new hi value to give hi/lo ratio factor times larger/smaller // expand/contract all localwt values from lo->hi to lo->newhi if (factor != 1.0) { double wtlo,wthi; if (localwt == 0.0) localwt = BIG; MPI_Allreduce(&localwt,&wtlo,1,MPI_DOUBLE,MPI_MIN,world); if (localwt == BIG) localwt = 0.0; MPI_Allreduce(&localwt,&wthi,1,MPI_DOUBLE,MPI_MAX,world); if (wtlo == wthi) return; double newhi = wthi*factor; localwt = wtlo + ((localwt-wtlo)/(wthi-wtlo)) * (newhi-wtlo); } for (int i = 0; i < nlocal; i++) weight[i] *= localwt; // record time up to this point last += cost; } /* -------------------------------------------------------------------- */ void ImbalanceTime::info(FILE *fp) { fprintf(fp," time weight factor: %g\n",factor); } diff --git a/src/imbalance_time.h b/src/imbalance_time.h index 76ba9aa98..8f20cc42a 100644 --- a/src/imbalance_time.h +++ b/src/imbalance_time.h @@ -1,43 +1,43 @@ /* -*- c++ -*- ---------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ #ifndef LMP_IMBALANCE_TIME_H #define LMP_IMBALANCE_TIME_H #include "imbalance.h" namespace LAMMPS_NS { class ImbalanceTime : public Imbalance { public: ImbalanceTime(class LAMMPS *); virtual ~ImbalanceTime() {} public: // parse options, return number of arguments consumed virtual int options(int, char **); // reinitialize internal data - virtual void init(); + virtual void init(int); // compute and apply weight factors to local atom array virtual void compute(double *); // print information about the state of this imbalance compute virtual void info(FILE *); private: double factor; // weight factor for time imbalance double last; // combined wall time from last call }; } #endif diff --git a/src/imbalance_var.cpp b/src/imbalance_var.cpp index a5fd9084e..394a9af36 100644 --- a/src/imbalance_var.cpp +++ b/src/imbalance_var.cpp @@ -1,96 +1,96 @@ /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ #include #include "imbalance_var.h" #include "atom.h" #include "group.h" #include "input.h" #include "variable.h" #include "memory.h" #include "error.h" // DEBUG #include "update.h" using namespace LAMMPS_NS; /* -------------------------------------------------------------------- */ ImbalanceVar::ImbalanceVar(LAMMPS *lmp) : Imbalance(lmp), name(0) {} /* -------------------------------------------------------------------- */ ImbalanceVar::~ImbalanceVar() { delete [] name; } /* -------------------------------------------------------------------- */ int ImbalanceVar::options(int narg, char **arg) { if (narg < 1) error->all(FLERR,"Illegal balance weight command"); int len = strlen(arg[0]) + 1; name = new char[len]; memcpy(name,arg[0],len); - init(); + init(0); return 1; } /* -------------------------------------------------------------------- */ -void ImbalanceVar::init() +void ImbalanceVar::init(int flag) { id = input->variable->find(name); if (id < 0) { error->all(FLERR,"Variable name for balance weight does not exist"); } else { if (input->variable->atomstyle(id) == 0) error->all(FLERR,"Variable for balance weight has invalid style"); } } /* -------------------------------------------------------------------- */ void ImbalanceVar::compute(double *weight) { const int all = group->find("all"); if (all < 0) return; double *values; const int nlocal = atom->nlocal; memory->create(values,nlocal,"imbalance:values"); input->variable->compute_atom(id,all,values,1,0); int flag = 0; for (int i = 0; i < nlocal; i++) if (values[i] <= 0.0) flag = 1; int flagall; MPI_Allreduce(&flag,&flagall,1,MPI_INT,MPI_SUM,world); if (flagall) error->one(FLERR,"Balance weight <= 0.0"); for (int i = 0; i < nlocal; i++) weight[i] *= values[i]; memory->destroy(values); } /* -------------------------------------------------------------------- */ void ImbalanceVar::info(FILE *fp) { fprintf(fp," weight variable: %s\n",name); } diff --git a/src/imbalance_var.h b/src/imbalance_var.h index 16c59eb0d..d354679c7 100644 --- a/src/imbalance_var.h +++ b/src/imbalance_var.h @@ -1,43 +1,43 @@ /* -*- c++ -*- ---------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ #ifndef LMP_IMBALANCE_VAR_H #define LMP_IMBALANCE_VAR_H #include "imbalance.h" namespace LAMMPS_NS { class ImbalanceVar : public Imbalance { public: ImbalanceVar(class LAMMPS *); virtual ~ImbalanceVar(); public: // parse options. return number of arguments consumed. virtual int options(int, char **); // re-initialize internal data, e.g. variable ID - virtual void init(); + virtual void init(int); // compute per-atom imbalance and apply to weight array virtual void compute(double *); // print information about the state of this imbalance compute (required) virtual void info(FILE *); private: char *name; // variable name int id; // variable index }; } #endif