rcb.cpp
No OneTemporary
Actions

Subscribers

None

File Metadata

Created: Tue, Jul 1, 04:42

rcb.cpp
View Options

	/* ----------------------------------------------------------------------
	LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
	http://lammps.sandia.gov, Sandia National Laboratories
	Steve Plimpton, sjplimp@sandia.gov

	Copyright (2003) Sandia Corporation. Under the terms of Contract
	DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
	certain rights in this software. This software is distributed under
	the GNU General Public License.

	See the README file in the top-level LAMMPS directory.
	------------------------------------------------------------------------- */

	#include <mpi.h>
	#include <string.h>
	#include "rcb.h"
	#include "irregular.h"
	#include "memory.h"
	#include "error.h"

	using namespace LAMMPS_NS;

	#define MYHUGE 1.0e30
	#define TINY 1.0e-6
	#define DELTA 16384

	// prototypes for non-class functions

	void box_merge(void , void , int , MPI_Datatype );
	void median_merge(void , void , int , MPI_Datatype );

	// NOTE: if want to have reuse flag, need to sum Tree across procs

	/* ---------------------------------------------------------------------- */

	RCB::RCB(LAMMPS *lmp) : Pointers(lmp)
	{
	MPI_Comm_rank(world,&me);
	MPI_Comm_size(world,&nprocs);

	ndot = maxdot = 0;
	dots = NULL;

	nlist = maxlist = 0;
	dotlist = dotmark = dotmark_select = NULL;

	maxbuf = 0;
	buf = NULL;

	maxrecv = maxsend = 0;
	recvproc = recvindex = sendproc = sendindex = NULL;

	tree = NULL;
	irregular = NULL;

	// create MPI data and function types for box and median AllReduce ops

	MPI_Type_contiguous(6,MPI_DOUBLE,&box_type);
	MPI_Type_commit(&box_type);
	MPI_Type_contiguous(sizeof(Median),MPI_CHAR,&med_type);
	MPI_Type_commit(&med_type);

	MPI_Op_create(box_merge,1,&box_op);
	MPI_Op_create(median_merge,1,&med_op);

	reuse = 0;
	}

	/* ---------------------------------------------------------------------- */

	RCB::~RCB()
	{
	memory->sfree(dots);
	memory->destroy(dotlist);
	memory->destroy(dotmark);
	memory->destroy(dotmark_select);
	memory->sfree(buf);

	memory->destroy(recvproc);
	memory->destroy(recvindex);
	memory->destroy(sendproc);
	memory->destroy(sendindex);

	memory->sfree(tree);
	delete irregular;

	MPI_Type_free(&med_type);
	MPI_Type_free(&box_type);
	MPI_Op_free(&box_op);
	MPI_Op_free(&med_op);
	}

	/* ----------------------------------------------------------------------
	perform RCB balancing of N particles at coords X in bounding box LO/HI
	NEW version: each RCB cut is tested in all dimensions
	dimeension that produces 2 boxes with largest min size is selected
	this is to prevent very narrow boxes from being produced
	if wt = NULL, ignore per-particle weights
	if wt defined, per-particle weights > 0.0
	dimension = 2 or 3
	as documented in rcb.h:
	sets noriginal,nfinal,nkeep,recvproc,recvindex,lo,hi
	all proc particles will be inside or on surface of 3-d box
	defined by final lo/hi
	// NOTE: worry about re-use of data structs for fix balance?
	------------------------------------------------------------------------- */

	void RCB::compute(int dimension, int n, double *x, double wt,
	double bboxlo, double bboxhi)
	{
	int i,j,k;
	int keep,outgoing,incoming,incoming2;
	int dim,markactive;
	int indexlo,indexhi;
	int first_iteration,breakflag;
	double wttot,wtlo,wthi,wtsum,wtok,wtupto,wtmax;
	double targetlo,targethi;
	double valuemin,valuemax,valuehalf,valuehalf_select,smaller;
	double tolerance;
	MPI_Comm comm,comm_half;
	MPI_Request request,request2;
	Median med,medme;

	// create list of my Dots

	ndot = nkeep = noriginal = n;

	if (ndot > maxdot) {
	maxdot = ndot;
	memory->sfree(dots);
	dots = (Dot ) memory->smalloc(ndotsizeof(Dot),"RCB:dots");
	}

	for (i = 0; i < ndot; i++) {
	dots[i].x[0] = x[i][0];
	dots[i].x[1] = x[i][1];
	dots[i].x[2] = x[i][2];
	dots[i].proc = me;
	dots[i].index = i;
	}

	if (wt)
	for (i = 0; i < ndot; i++) dots[i].wt = wt[i];
	else
	for (i = 0; i < ndot; i++) dots[i].wt = 1.0;

	// initial bounding box = simulation box
	// includes periodic or shrink-wrapped boundaries

	lo = bbox.lo;
	hi = bbox.hi;

	lo[0] = bboxlo[0];
	lo[1] = bboxlo[1];
	lo[2] = bboxlo[2];
	hi[0] = bboxhi[0];
	hi[1] = bboxhi[1];
	hi[2] = bboxhi[2];

	cut = 0.0;
	cutdim = -1;

	// initialize counters

	counters[0] = 0;
	counters[1] = 0;
	counters[2] = 0;
	counters[3] = ndot;
	counters[4] = maxdot;
	counters[5] = 0;
	counters[6] = 0;

	// create communicator for use in recursion

	MPI_Comm_dup(world,&comm);

	// recurse until partition is a single proc = me
	// proclower,procupper = lower,upper procs in partition
	// procmid = 1st proc in upper half of partition

	int procpartner,procpartner2;

	int procmid;
	int proclower = 0;
	int procupper = nprocs - 1;

	while (proclower != procupper) {

	// if odd # of procs, lower partition gets extra one

	procmid = proclower + (procupper - proclower) / 2 + 1;

	// determine communication partner(s)
	// readnumber = # of proc partners to read from

	if (me < procmid)
	procpartner = me + (procmid - proclower);
	else
	procpartner = me - (procmid - proclower);

	int readnumber = 1;
	if (procpartner > procupper) {
	readnumber = 0;
	procpartner--;
	}
	if (me == procupper && procpartner != procmid - 1) {
	readnumber = 2;
	procpartner2 = procpartner + 1;
	}

	// wttot = summed weight of entire partition
	// search tolerance = largest single weight (plus epsilon)
	// targetlo = desired weight in lower half of partition
	// targethi = desired weight in upper half of partition

	wtmax = wtsum = 0.0;

	if (wt) {
	for (i = 0; i < ndot; i++) {
	wtsum += dots[i].wt;
	if (dots[i].wt > wtmax) wtmax = dots[i].wt;
	}
	} else {
	for (i = 0; i < ndot; i++) wtsum += dots[i].wt;
	wtmax = 1.0;
	}

	MPI_Allreduce(&wtsum,&wttot,1,MPI_DOUBLE,MPI_SUM,comm);
	if (wt) MPI_Allreduce(&wtmax,&tolerance,1,MPI_DOUBLE,MPI_MAX,comm);
	else tolerance = 1.0;

	tolerance *= 1.0 + TINY;
	targetlo = wttot * (procmid - proclower) / (procupper + 1 - proclower);
	targethi = wttot - targetlo;

	// attempt a cut in each dimension
	// each cut produces 2 boxes, each with a reduced box length in that dim
	// smaller = the smaller of the 2 reduced box lengths in that dimension
	// choose to cut in dimension which produces largest smaller value
	// this should induce final proc sub-boxes to be as cube-ish as possible
	// dim_select = selected cut dimension
	// valuehalf_select = valuehalf in that dimension
	// dotmark_select = dot markings in that dimension

	int dim_select = -1;
	double largest = 0.0;

	for (dim = 0; dim < dimension; dim++) {

	// create active list and mark array for dots
	// initialize active list to all dots

	if (ndot > maxlist) {
	memory->destroy(dotlist);
	memory->destroy(dotmark);
	memory->destroy(dotmark_select);
	maxlist = maxdot;
	memory->create(dotlist,maxlist,"RCB:dotlist");
	memory->create(dotmark,maxlist,"RCB:dotmark");
	memory->create(dotmark_select,maxlist,"RCB:dotmark_select");
	}

	nlist = ndot;
	for (i = 0; i < nlist; i++) dotlist[i] = i;

	// median iteration
	// zoom in on bisector until correct # of dots in each half of partition
	// as each iteration of median-loop begins, require:
	// all non-active dots are marked with 0/1 in dotmark
	// valuemin <= every active dot <= valuemax
	// wtlo, wthi = total wt of non-active dots
	// when leave median-loop, require only:
	// valuehalf = correct cut position
	// all dots <= valuehalf are marked with 0 in dotmark
	// all dots >= valuehalf are marked with 1 in dotmark
	// markactive = which side of cut is active = 0/1
	// indexlo,indexhi = indices of dot closest to median

	wtlo = wthi = 0.0;
	valuemin = lo[dim];
	valuemax = hi[dim];
	first_iteration = 1;
	indexlo = indexhi = 0;

	while (1) {

	// choose bisector value
	// use old value on 1st iteration if old cut dimension is the same
	// on 2nd option: could push valuehalf towards geometric center
	// with "1.0-factor" to force overshoot

	if (first_iteration && reuse && dim == tree[procmid].dim) {
	counters[5]++;
	valuehalf = tree[procmid].cut;
	if (valuehalf < valuemin \|\| valuehalf > valuemax)
	valuehalf = 0.5 * (valuemin + valuemax);
	} else if (wt)
	valuehalf = valuemin + (targetlo - wtlo) /
	(wttot - wtlo - wthi) * (valuemax - valuemin);
	else
	valuehalf = 0.5 * (valuemin + valuemax);

	first_iteration = 0;

	// initialize local median data structure

	medme.totallo = medme.totalhi = 0.0;
	medme.valuelo = -MYHUGE;
	medme.valuehi = MYHUGE;
	medme.wtlo = medme.wthi = 0.0;
	medme.countlo = medme.counthi = 0;
	medme.proclo = medme.prochi = me;

	// mark all active dots on one side or other of bisector
	// also set all fields in median data struct
	// save indices of closest dots on either side

	for (j = 0; j < nlist; j++) {
	i = dotlist[j];
	if (dots[i].x[dim] <= valuehalf) { // in lower part
	medme.totallo += dots[i].wt;
	dotmark[i] = 0;
	if (dots[i].x[dim] > medme.valuelo) { // my closest dot
	medme.valuelo = dots[i].x[dim];
	medme.wtlo = dots[i].wt;
	medme.countlo = 1;
	indexlo = i;
	} else if (dots[i].x[dim] == medme.valuelo) { // tied for closest
	medme.wtlo += dots[i].wt;
	medme.countlo++;
	}
	}
	else { // in upper part
	medme.totalhi += dots[i].wt;
	dotmark[i] = 1;
	if (dots[i].x[dim] < medme.valuehi) { // my closest dot
	medme.valuehi = dots[i].x[dim];
	medme.wthi = dots[i].wt;
	medme.counthi = 1;
	indexhi = i;
	} else if (dots[i].x[dim] == medme.valuehi) { // tied for closest
	medme.wthi += dots[i].wt;
	medme.counthi++;
	}
	}
	}

	// combine median data struct across current subset of procs

	counters[0]++;
	MPI_Allreduce(&medme,&med,1,med_type,med_op,comm);

	// test median guess for convergence
	// move additional dots that are next to cut across it

	if (wtlo + med.totallo < targetlo) { // lower half TOO SMALL

	wtlo += med.totallo;
	valuehalf = med.valuehi;

	if (med.counthi == 1) { // only one dot to move
	if (wtlo + med.wthi < targetlo) { // move it, keep iterating
	if (me == med.prochi) dotmark[indexhi] = 0;
	}
	else { // only move if beneficial
	if (wtlo + med.wthi - targetlo < targetlo - wtlo)
	if (me == med.prochi) dotmark[indexhi] = 0;
	break; // all done
	}
	}
	else { // multiple dots to move
	breakflag = 0;
	wtok = 0.0;
	if (medme.valuehi == med.valuehi) wtok = medme.wthi;
	if (wtlo + med.wthi >= targetlo) { // all done
	MPI_Scan(&wtok,&wtupto,1,MPI_DOUBLE,MPI_SUM,comm);
	wtmax = targetlo - wtlo;
	if (wtupto > wtmax) wtok = wtok - (wtupto - wtmax);
	breakflag = 1;
	} // wtok = most I can move
	for (j = 0, wtsum = 0.0; j < nlist && wtsum < wtok; j++) {
	i = dotlist[j];
	if (dots[i].x[dim] == med.valuehi) { // only move if better
	if (wtsum + dots[i].wt - wtok < wtok - wtsum)
	dotmark[i] = 0;
	wtsum += dots[i].wt;
	}
	}
	if (breakflag) break; // done if moved enough
	}

	wtlo += med.wthi;
	if (targetlo-wtlo <= tolerance) break; // close enough

	valuemin = med.valuehi; // iterate again
	markactive = 1;
	}

	else if (wthi + med.totalhi < targethi) { // upper half TOO SMALL

	wthi += med.totalhi;
	valuehalf = med.valuelo;

	if (med.countlo == 1) { // only one dot to move
	if (wthi + med.wtlo < targethi) { // move it, keep iterating
	if (me == med.proclo) dotmark[indexlo] = 1;
	}
	else { // only move if beneficial
	if (wthi + med.wtlo - targethi < targethi - wthi)
	if (me == med.proclo) dotmark[indexlo] = 1;
	break; // all done
	}
	}
	else { // multiple dots to move
	breakflag = 0;
	wtok = 0.0;
	if (medme.valuelo == med.valuelo) wtok = medme.wtlo;
	if (wthi + med.wtlo >= targethi) { // all done
	MPI_Scan(&wtok,&wtupto,1,MPI_DOUBLE,MPI_SUM,comm);
	wtmax = targethi - wthi;
	if (wtupto > wtmax) wtok = wtok - (wtupto - wtmax);
	breakflag = 1;
	} // wtok = most I can move
	for (j = 0, wtsum = 0.0; j < nlist && wtsum < wtok; j++) {
	i = dotlist[j];
	if (dots[i].x[dim] == med.valuelo) { // only move if better
	if (wtsum + dots[i].wt - wtok < wtok - wtsum)
	dotmark[i] = 1;
	wtsum += dots[i].wt;
	}
	}
	if (breakflag) break; // done if moved enough
	}

	wthi += med.wtlo;
	if (targethi-wthi <= tolerance) break; // close enough

	valuemax = med.valuelo; // iterate again
	markactive = 0;
	}

	else // Goldilocks result: both partitions just right
	break;

	// shrink the active list

	k = 0;
	for (j = 0; j < nlist; j++) {
	i = dotlist[j];
	if (dotmark[i] == markactive) dotlist[k++] = i;
	}
	nlist = k;
	}

	// cut produces 2 sub-boxes with reduced size in dim
	// compare smaller of the 2 sizes to previous dims
	// keep dim that has the largest smaller

	smaller = MIN(valuehalf-lo[dim],hi[dim]-valuehalf);
	if (smaller > largest) {
	largest = smaller;
	dim_select = dim;
	valuehalf_select = valuehalf;
	memcpy(dotmark_select,dotmark,ndot*sizeof(int));
	}
	}

	// copy results for best dim cut into dim,valuehalf,dotmark

	dim = dim_select;
	valuehalf = valuehalf_select;
	memcpy(dotmark,dotmark_select,ndot*sizeof(int));

	// found median
	// store cut info only if I am procmid

	if (me == procmid) {
	cut = valuehalf;
	cutdim = dim;
	}

	// use cut to shrink my RCB bounding box

	if (me < procmid) hi[dim] = valuehalf;
	else lo[dim] = valuehalf;

	// outgoing = number of dots to ship to partner
	// nkeep = number of dots that have never migrated

	markactive = (me < procpartner);
	for (i = 0, keep = 0, outgoing = 0; i < ndot; i++)
	if (dotmark[i] == markactive) outgoing++;
	else if (i < nkeep) keep++;
	nkeep = keep;

	// alert partner how many dots I'll send, read how many I'll recv

	MPI_Send(&outgoing,1,MPI_INT,procpartner,0,world);
	incoming = 0;
	if (readnumber) {
	MPI_Recv(&incoming,1,MPI_INT,procpartner,0,world,MPI_STATUS_IGNORE);
	if (readnumber == 2) {
	MPI_Recv(&incoming2,1,MPI_INT,procpartner2,0,world,MPI_STATUS_IGNORE);
	incoming += incoming2;
	}
	}

	// check if need to alloc more space

	int ndotnew = ndot - outgoing + incoming;
	if (ndotnew > maxdot) {
	while (maxdot < ndotnew) maxdot += DELTA;
	dots = (Dot ) memory->srealloc(dots,maxdotsizeof(Dot),"RCB::dots");
	counters[6]++;
	}

	counters[1] += outgoing;
	counters[2] += incoming;
	if (ndotnew > counters[3]) counters[3] = ndotnew;
	if (maxdot > counters[4]) counters[4] = maxdot;

	// malloc comm send buffer

	if (outgoing > maxbuf) {
	memory->sfree(buf);
	maxbuf = outgoing;
	buf = (Dot ) memory->smalloc(maxbufsizeof(Dot),"RCB:buf");
	}

	// fill buffer with dots that are marked for sending
	// pack down the unmarked ones

	keep = outgoing = 0;
	for (i = 0; i < ndot; i++) {
	if (dotmark[i] == markactive)
	memcpy(&buf[outgoing++],&dots[i],sizeof(Dot));
	else
	memcpy(&dots[keep++],&dots[i],sizeof(Dot));
	}

	// post receives for dots

	if (readnumber > 0) {
	MPI_Irecv(&dots[keep],incoming*sizeof(Dot),MPI_CHAR,
	procpartner,1,world,&request);
	if (readnumber == 2) {
	keep += incoming - incoming2;
	MPI_Irecv(&dots[keep],incoming2*sizeof(Dot),MPI_CHAR,
	procpartner2,1,world,&request2);
	}
	}

	// handshake before sending dots to insure recvs have been posted

	if (readnumber > 0) {
	MPI_Send(NULL,0,MPI_INT,procpartner,0,world);
	if (readnumber == 2) MPI_Send(NULL,0,MPI_INT,procpartner2,0,world);
	}
	MPI_Recv(NULL,0,MPI_INT,procpartner,0,world,MPI_STATUS_IGNORE);

	// send dots to partner

	MPI_Rsend(buf,outgoing*sizeof(Dot),MPI_CHAR,procpartner,1,world);

	// wait until all dots are received

	if (readnumber > 0) {
	MPI_Wait(&request,MPI_STATUS_IGNORE);
	if (readnumber == 2) MPI_Wait(&request2,MPI_STATUS_IGNORE);
	}

	ndot = ndotnew;

	// cut partition in half, create new communicators of 1/2 size

	int split;
	if (me < procmid) {
	procupper = procmid - 1;
	split = 0;
	} else {
	proclower = procmid;
	split = 1;
	}

	MPI_Comm_split(comm,split,me,&comm_half);
	MPI_Comm_free(&comm);
	comm = comm_half;
	}

	// clean up

	MPI_Comm_free(&comm);

	// set public variables with results of rebalance

	nfinal = ndot;

	if (nfinal > maxrecv) {
	memory->destroy(recvproc);
	memory->destroy(recvindex);
	maxrecv = nfinal;
	memory->create(recvproc,maxrecv,"RCB:recvproc");
	memory->create(recvindex,maxrecv,"RCB:recvindex");
	}

	for (i = 0; i < nfinal; i++) {
	recvproc[i] = dots[i].proc;
	recvindex[i] = dots[i].index;
	}
	}

	/* ----------------------------------------------------------------------
	perform RCB balancing of N particles at coords X in bounding box LO/HI
	OLD version: each RCB cut is made in longest dimension of sub-box
	if wt = NULL, ignore per-particle weights
	if wt defined, per-particle weights > 0.0
	dimension = 2 or 3
	as documented in rcb.h:
	sets noriginal,nfinal,nkeep,recvproc,recvindex,lo,hi
	all proc particles will be inside or on surface of 3-d box
	defined by final lo/hi
	// NOTE: worry about re-use of data structs for fix balance?
	------------------------------------------------------------------------- */

	void RCB::compute_old(int dimension, int n, double *x, double wt,
	double bboxlo, double bboxhi)
	{
	int i,j,k;
	int keep,outgoing,incoming,incoming2;
	int dim,markactive;
	int indexlo,indexhi;
	int first_iteration,breakflag;
	double wttot,wtlo,wthi,wtsum,wtok,wtupto,wtmax;
	double targetlo,targethi;
	double valuemin,valuemax,valuehalf;
	double tolerance;
	MPI_Comm comm,comm_half;
	MPI_Request request,request2;
	Median med,medme;

	// create list of my Dots

	ndot = nkeep = noriginal = n;

	if (ndot > maxdot) {
	maxdot = ndot;
	memory->sfree(dots);
	dots = (Dot ) memory->smalloc(ndotsizeof(Dot),"RCB:dots");
	}

	for (i = 0; i < ndot; i++) {
	dots[i].x[0] = x[i][0];
	dots[i].x[1] = x[i][1];
	dots[i].x[2] = x[i][2];
	dots[i].proc = me;
	dots[i].index = i;
	}

	if (wt)
	for (i = 0; i < ndot; i++) dots[i].wt = wt[i];
	else
	for (i = 0; i < ndot; i++) dots[i].wt = 1.0;

	// initial bounding box = simulation box
	// includes periodic or shrink-wrapped boundaries

	lo = bbox.lo;
	hi = bbox.hi;

	lo[0] = bboxlo[0];
	lo[1] = bboxlo[1];
	lo[2] = bboxlo[2];
	hi[0] = bboxhi[0];
	hi[1] = bboxhi[1];
	hi[2] = bboxhi[2];

	cut = 0.0;
	cutdim = -1;

	// initialize counters

	counters[0] = 0;
	counters[1] = 0;
	counters[2] = 0;
	counters[3] = ndot;
	counters[4] = maxdot;
	counters[5] = 0;
	counters[6] = 0;

	// create communicator for use in recursion

	MPI_Comm_dup(world,&comm);

	// recurse until partition is a single proc = me
	// proclower,procupper = lower,upper procs in partition
	// procmid = 1st proc in upper half of partition

	int procpartner,procpartner2;

	int procmid;
	int proclower = 0;
	int procupper = nprocs - 1;

	while (proclower != procupper) {

	// if odd # of procs, lower partition gets extra one

	procmid = proclower + (procupper - proclower) / 2 + 1;

	// determine communication partner(s)
	// readnumber = # of proc partners to read from

	if (me < procmid)
	procpartner = me + (procmid - proclower);
	else
	procpartner = me - (procmid - proclower);

	int readnumber = 1;
	if (procpartner > procupper) {
	readnumber = 0;
	procpartner--;
	}
	if (me == procupper && procpartner != procmid - 1) {
	readnumber = 2;
	procpartner2 = procpartner + 1;
	}

	// wttot = summed weight of entire partition
	// search tolerance = largest single weight (plus epsilon)
	// targetlo = desired weight in lower half of partition
	// targethi = desired weight in upper half of partition

	wtmax = wtsum = 0.0;

	if (wt) {
	for (i = 0; i < ndot; i++) {
	wtsum += dots[i].wt;
	if (dots[i].wt > wtmax) wtmax = dots[i].wt;
	}
	} else {
	for (i = 0; i < ndot; i++) wtsum += dots[i].wt;
	wtmax = 1.0;
	}

	MPI_Allreduce(&wtsum,&wttot,1,MPI_DOUBLE,MPI_SUM,comm);
	if (wt) MPI_Allreduce(&wtmax,&tolerance,1,MPI_DOUBLE,MPI_MAX,comm);
	else tolerance = 1.0;

	tolerance *= 1.0 + TINY;
	targetlo = wttot * (procmid - proclower) / (procupper + 1 - proclower);
	targethi = wttot - targetlo;

	// dim = dimension to bisect on
	// do not allow choice of z dimension for 2d system

	dim = 0;
	if (hi[1]-lo[1] > hi[0]-lo[0]) dim = 1;
	if (dimension == 3) {
	if (dim == 0 && hi[2]-lo[2] > hi[0]-lo[0]) dim = 2;
	if (dim == 1 && hi[2]-lo[2] > hi[1]-lo[1]) dim = 2;
	}

	// create active list and mark array for dots
	// initialize active list to all dots

	if (ndot > maxlist) {
	memory->destroy(dotlist);
	memory->destroy(dotmark);
	maxlist = maxdot;
	memory->create(dotlist,maxlist,"RCB:dotlist");
	memory->create(dotmark,maxlist,"RCB:dotmark");
	}

	nlist = ndot;
	for (i = 0; i < nlist; i++) dotlist[i] = i;

	// median iteration
	// zoom in on bisector until correct # of dots in each half of partition
	// as each iteration of median-loop begins, require:
	// all non-active dots are marked with 0/1 in dotmark
	// valuemin <= every active dot <= valuemax
	// wtlo, wthi = total wt of non-active dots
	// when leave median-loop, require only:
	// valuehalf = correct cut position
	// all dots <= valuehalf are marked with 0 in dotmark
	// all dots >= valuehalf are marked with 1 in dotmark
	// markactive = which side of cut is active = 0/1
	// indexlo,indexhi = indices of dot closest to median

	wtlo = wthi = 0.0;
	valuemin = lo[dim];
	valuemax = hi[dim];
	first_iteration = 1;
	indexlo = indexhi = 0;

	while (1) {

	// choose bisector value
	// use old value on 1st iteration if old cut dimension is the same
	// on 2nd option: could push valuehalf towards geometric center
	// with "1.0-factor" to force overshoot

	if (first_iteration && reuse && dim == tree[procmid].dim) {
	counters[5]++;
	valuehalf = tree[procmid].cut;
	if (valuehalf < valuemin \|\| valuehalf > valuemax)
	valuehalf = 0.5 * (valuemin + valuemax);
	} else if (wt)
	valuehalf = valuemin + (targetlo - wtlo) /
	(wttot - wtlo - wthi) * (valuemax - valuemin);
	else
	valuehalf = 0.5 * (valuemin + valuemax);

	first_iteration = 0;

	// initialize local median data structure

	medme.totallo = medme.totalhi = 0.0;
	medme.valuelo = -MYHUGE;
	medme.valuehi = MYHUGE;
	medme.wtlo = medme.wthi = 0.0;
	medme.countlo = medme.counthi = 0;
	medme.proclo = medme.prochi = me;

	// mark all active dots on one side or other of bisector
	// also set all fields in median data struct
	// save indices of closest dots on either side

	for (j = 0; j < nlist; j++) {
	i = dotlist[j];
	if (dots[i].x[dim] <= valuehalf) { // in lower part
	medme.totallo += dots[i].wt;
	dotmark[i] = 0;
	if (dots[i].x[dim] > medme.valuelo) { // my closest dot
	medme.valuelo = dots[i].x[dim];
	medme.wtlo = dots[i].wt;
	medme.countlo = 1;
	indexlo = i;
	} else if (dots[i].x[dim] == medme.valuelo) { // tied for closest
	medme.wtlo += dots[i].wt;
	medme.countlo++;
	}
	}
	else { // in upper part
	medme.totalhi += dots[i].wt;
	dotmark[i] = 1;
	if (dots[i].x[dim] < medme.valuehi) { // my closest dot
	medme.valuehi = dots[i].x[dim];
	medme.wthi = dots[i].wt;
	medme.counthi = 1;
	indexhi = i;
	} else if (dots[i].x[dim] == medme.valuehi) { // tied for closest
	medme.wthi += dots[i].wt;
	medme.counthi++;
	}
	}
	}

	// combine median data struct across current subset of procs

	counters[0]++;
	MPI_Allreduce(&medme,&med,1,med_type,med_op,comm);

	// test median guess for convergence
	// move additional dots that are next to cut across it

	if (wtlo + med.totallo < targetlo) { // lower half TOO SMALL

	wtlo += med.totallo;
	valuehalf = med.valuehi;

	if (med.counthi == 1) { // only one dot to move
	if (wtlo + med.wthi < targetlo) { // move it, keep iterating
	if (me == med.prochi) dotmark[indexhi] = 0;
	}
	else { // only move if beneficial
	if (wtlo + med.wthi - targetlo < targetlo - wtlo)
	if (me == med.prochi) dotmark[indexhi] = 0;
	break; // all done
	}
	}
	else { // multiple dots to move
	breakflag = 0;
	wtok = 0.0;
	if (medme.valuehi == med.valuehi) wtok = medme.wthi;
	if (wtlo + med.wthi >= targetlo) { // all done
	MPI_Scan(&wtok,&wtupto,1,MPI_DOUBLE,MPI_SUM,comm);
	wtmax = targetlo - wtlo;
	if (wtupto > wtmax) wtok = wtok - (wtupto - wtmax);
	breakflag = 1;
	} // wtok = most I can move
	for (j = 0, wtsum = 0.0; j < nlist && wtsum < wtok; j++) {
	i = dotlist[j];
	if (dots[i].x[dim] == med.valuehi) { // only move if better
	if (wtsum + dots[i].wt - wtok < wtok - wtsum)
	dotmark[i] = 0;
	wtsum += dots[i].wt;
	}
	}
	if (breakflag) break; // done if moved enough
	}

	wtlo += med.wthi;
	if (targetlo-wtlo <= tolerance) break; // close enough

	valuemin = med.valuehi; // iterate again
	markactive = 1;
	}

	else if (wthi + med.totalhi < targethi) { // upper half TOO SMALL

	wthi += med.totalhi;
	valuehalf = med.valuelo;

	if (med.countlo == 1) { // only one dot to move
	if (wthi + med.wtlo < targethi) { // move it, keep iterating
	if (me == med.proclo) dotmark[indexlo] = 1;
	}
	else { // only move if beneficial
	if (wthi + med.wtlo - targethi < targethi - wthi)
	if (me == med.proclo) dotmark[indexlo] = 1;
	break; // all done
	}
	}
	else { // multiple dots to move
	breakflag = 0;
	wtok = 0.0;
	if (medme.valuelo == med.valuelo) wtok = medme.wtlo;
	if (wthi + med.wtlo >= targethi) { // all done
	MPI_Scan(&wtok,&wtupto,1,MPI_DOUBLE,MPI_SUM,comm);
	wtmax = targethi - wthi;
	if (wtupto > wtmax) wtok = wtok - (wtupto - wtmax);
	breakflag = 1;
	} // wtok = most I can move
	for (j = 0, wtsum = 0.0; j < nlist && wtsum < wtok; j++) {
	i = dotlist[j];
	if (dots[i].x[dim] == med.valuelo) { // only move if better
	if (wtsum + dots[i].wt - wtok < wtok - wtsum)
	dotmark[i] = 1;
	wtsum += dots[i].wt;
	}
	}
	if (breakflag) break; // done if moved enough
	}

	wthi += med.wtlo;
	if (targethi-wthi <= tolerance) break; // close enough

	valuemax = med.valuelo; // iterate again
	markactive = 0;
	}

	else // Goldilocks result: both partitions just right
	break;

	// shrink the active list

	k = 0;
	for (j = 0; j < nlist; j++) {
	i = dotlist[j];
	if (dotmark[i] == markactive) dotlist[k++] = i;
	}
	nlist = k;
	}

	// found median
	// store cut info only if I am procmid

	if (me == procmid) {
	cut = valuehalf;
	cutdim = dim;
	}

	// use cut to shrink my RCB bounding box

	if (me < procmid) hi[dim] = valuehalf;
	else lo[dim] = valuehalf;

	// outgoing = number of dots to ship to partner
	// nkeep = number of dots that have never migrated

	markactive = (me < procpartner);
	for (i = 0, keep = 0, outgoing = 0; i < ndot; i++)
	if (dotmark[i] == markactive) outgoing++;
	else if (i < nkeep) keep++;
	nkeep = keep;

	// alert partner how many dots I'll send, read how many I'll recv

	MPI_Send(&outgoing,1,MPI_INT,procpartner,0,world);
	incoming = 0;
	if (readnumber) {
	MPI_Recv(&incoming,1,MPI_INT,procpartner,0,world,MPI_STATUS_IGNORE);
	if (readnumber == 2) {
	MPI_Recv(&incoming2,1,MPI_INT,procpartner2,0,world,MPI_STATUS_IGNORE);
	incoming += incoming2;
	}
	}

	// check if need to alloc more space

	int ndotnew = ndot - outgoing + incoming;
	if (ndotnew > maxdot) {
	while (maxdot < ndotnew) maxdot += DELTA;
	dots = (Dot ) memory->srealloc(dots,maxdotsizeof(Dot),"RCB::dots");
	counters[6]++;
	}

	counters[1] += outgoing;
	counters[2] += incoming;
	if (ndotnew > counters[3]) counters[3] = ndotnew;
	if (maxdot > counters[4]) counters[4] = maxdot;

	// malloc comm send buffer

	if (outgoing > maxbuf) {
	memory->sfree(buf);
	maxbuf = outgoing;
	buf = (Dot ) memory->smalloc(maxbufsizeof(Dot),"RCB:buf");
	}

	// fill buffer with dots that are marked for sending
	// pack down the unmarked ones

	keep = outgoing = 0;
	for (i = 0; i < ndot; i++) {
	if (dotmark[i] == markactive)
	memcpy(&buf[outgoing++],&dots[i],sizeof(Dot));
	else
	memcpy(&dots[keep++],&dots[i],sizeof(Dot));
	}

	// post receives for dots

	if (readnumber > 0) {
	MPI_Irecv(&dots[keep],incoming*sizeof(Dot),MPI_CHAR,
	procpartner,1,world,&request);
	if (readnumber == 2) {
	keep += incoming - incoming2;
	MPI_Irecv(&dots[keep],incoming2*sizeof(Dot),MPI_CHAR,
	procpartner2,1,world,&request2);
	}
	}

	// handshake before sending dots to insure recvs have been posted

	if (readnumber > 0) {
	MPI_Send(NULL,0,MPI_INT,procpartner,0,world);
	if (readnumber == 2) MPI_Send(NULL,0,MPI_INT,procpartner2,0,world);
	}
	MPI_Recv(NULL,0,MPI_INT,procpartner,0,world,MPI_STATUS_IGNORE);

	// send dots to partner

	MPI_Rsend(buf,outgoing*sizeof(Dot),MPI_CHAR,procpartner,1,world);

	// wait until all dots are received

	if (readnumber > 0) {
	MPI_Wait(&request,MPI_STATUS_IGNORE);
	if (readnumber == 2) MPI_Wait(&request2,MPI_STATUS_IGNORE);
	}

	ndot = ndotnew;

	// cut partition in half, create new communicators of 1/2 size

	int split;
	if (me < procmid) {
	procupper = procmid - 1;
	split = 0;
	} else {
	proclower = procmid;
	split = 1;
	}

	MPI_Comm_split(comm,split,me,&comm_half);
	MPI_Comm_free(&comm);
	comm = comm_half;
	}

	// clean up

	MPI_Comm_free(&comm);

	// set public variables with results of rebalance

	nfinal = ndot;

	if (nfinal > maxrecv) {
	memory->destroy(recvproc);
	memory->destroy(recvindex);
	maxrecv = nfinal;
	memory->create(recvproc,maxrecv,"RCB:recvproc");
	memory->create(recvindex,maxrecv,"RCB:recvindex");
	}

	for (i = 0; i < nfinal; i++) {
	recvproc[i] = dots[i].proc;
	recvindex[i] = dots[i].index;
	}
	}

	/* ----------------------------------------------------------------------
	custom MPI reduce operation
	merge of each component of an RCB bounding box
	------------------------------------------------------------------------- */

	void box_merge(void in, void inout, int len, MPI_Datatype dptr)

	{
	RCB::BBox box1 = (RCB::BBox ) in;
	RCB::BBox box2 = (RCB::BBox ) inout;

	for (int i = 0; i < 3; i++) {
	if (box1->lo[i] < box2->lo[i]) box2->lo[i] = box1->lo[i];
	if (box1->hi[i] > box2->hi[i]) box2->hi[i] = box1->hi[i];
	}
	}

	/* ----------------------------------------------------------------------
	custom MPI reduce operation
	merge median data structure
	on input:
	in,inout->totallo, totalhi = weight in both partitions on this proc
	valuelo, valuehi = pos of nearest dot(s) to cut on this proc
	wtlo, wthi = total wt of dot(s) at that pos on this proc
	countlo, counthi = # of dot(s) nearest to cut on this proc
	proclo, prochi = not used
	on exit:
	inout-> totallo, totalhi = total # of active dots in both partitions
	valuelo, valuehi = pos of nearest dot(s) to cut
	wtlo, wthi = total wt of dot(s) at that position
	countlo, counthi = total # of dot(s) nearest to cut
	proclo, prochi = one unique proc who owns a nearest dot
	all procs must get same proclo,prochi
	------------------------------------------------------------------------- */

	void median_merge(void in, void inout, int len, MPI_Datatype dptr)

	{
	RCB::Median med1 = (RCB::Median ) in;
	RCB::Median med2 = (RCB::Median ) inout;

	med2->totallo += med1->totallo;
	if (med1->valuelo > med2->valuelo) {
	med2->valuelo = med1->valuelo;
	med2->wtlo = med1->wtlo;
	med2->countlo = med1->countlo;
	med2->proclo = med1->proclo;
	}
	else if (med1->valuelo == med2->valuelo) {
	med2->wtlo += med1->wtlo;
	med2->countlo += med1->countlo;
	if (med1->proclo < med2->proclo) med2->proclo = med1->proclo;
	}

	med2->totalhi += med1->totalhi;
	if (med1->valuehi < med2->valuehi) {
	med2->valuehi = med1->valuehi;
	med2->wthi = med1->wthi;
	med2->counthi = med1->counthi;
	med2->prochi = med1->prochi;
	}
	else if (med1->valuehi == med2->valuehi) {
	med2->wthi += med1->wthi;
	med2->counthi += med1->counthi;
	if (med1->prochi < med2->prochi) med2->prochi = med1->prochi;
	}
	}

	/* ----------------------------------------------------------------------
	invert the RCB rebalance result to convert receive info into send info
	sortflag = flag for sorting order of received messages by proc ID
	------------------------------------------------------------------------- */

	void RCB::invert(int sortflag)
	{
	// only create Irregular if not previously created
	// allows Irregular to persist for multiple RCB calls by fix balance

	if (!irregular) irregular = new Irregular(lmp);

	// nsend = # of dots to request from other procs

	int nsend = nfinal-nkeep;

	int *proclist;
	memory->create(proclist,nsend,"RCB:proclist");

	Invert *sinvert =
	(Invert ) memory->smalloc(nsendsizeof(Invert),"RCB:sinvert");

	int m = 0;
	for (int i = nkeep; i < nfinal; i++) {
	proclist[m] = recvproc[i];
	sinvert[m].rindex = recvindex[i];
	sinvert[m].sproc = me;
	sinvert[m].sindex = i;
	m++;
	}

	// perform inversion via irregular comm
	// nrecv = # of my dots to send to other procs

	int nrecv = irregular->create_data(nsend,proclist,sortflag);
	Invert *rinvert =
	(Invert ) memory->smalloc(nrecvsizeof(Invert),"RCB:rinvert");
	irregular->exchange_data((char ) sinvert,sizeof(Invert),(char ) rinvert);
	irregular->destroy_data();

	// set public variables from requests to send my dots

	if (noriginal > maxsend) {
	memory->destroy(sendproc);
	memory->destroy(sendindex);
	maxsend = noriginal;
	memory->create(sendproc,maxsend,"RCB:sendproc");
	memory->create(sendindex,maxsend,"RCB:sendindex");
	}

	for (int i = 0; i < nkeep; i++) {
	sendproc[recvindex[i]] = me;
	sendindex[recvindex[i]] = i;
	}

	for (int i = 0; i < nrecv; i++) {
	m = rinvert[i].rindex;
	sendproc[m] = rinvert[i].sproc;
	sendindex[m] = rinvert[i].sindex;
	}

	// clean-up

	memory->destroy(proclist);
	memory->destroy(sinvert);
	memory->destroy(rinvert);
	}

	/* ----------------------------------------------------------------------
	memory use of Irregular
	------------------------------------------------------------------------- */

	bigint RCB::memory_usage()
	{
	bigint bytes = 0;
	if (irregular) bytes += irregular->memory_usage();
	return bytes;
	}

	// -----------------------------------------------------------------------
	// DEBUG methods
	// -----------------------------------------------------------------------
	/*
	// consistency checks on RCB results

	void RCB::check()
	{
	int i,iflag,total1,total2;
	double weight,wtmax,wtmin,wtone,tolerance;

	// check that total # of dots remained the same

	MPI_Allreduce(&ndotorig,&total1,1,MPI_INT,MPI_SUM,world);
	MPI_Allreduce(&ndot,&total2,1,MPI_INT,MPI_SUM,world);
	if (total1 != total2) {
	if (me == 0)
	printf("ERROR: Points before RCB = %d, Points after RCB = %d\n",
	total1,total2);
	}

	// check that result is load-balanced within log2(P)*max-wt

	weight = wtone = 0.0;
	for (i = 0; i < ndot; i++) {
	weight += dots[i].wt;
	if (dots[i].wt > wtone) wtone = dots[i].wt;
	}

	MPI_Allreduce(&weight,&wtmin,1,MPI_DOUBLE,MPI_MIN,world);
	MPI_Allreduce(&weight,&wtmax,1,MPI_DOUBLE,MPI_MAX,world);
	MPI_Allreduce(&wtone,&tolerance,1,MPI_DOUBLE,MPI_MAX,world);

	// i = smallest power-of-2 >= nprocs
	// tolerance = largest-single-weight*log2(nprocs)

	for (i = 0; (nprocs >> i) != 0; i++);
	tolerance = tolerance * i * (1.0 + TINY);

	if (wtmax - wtmin > tolerance) {
	if (me == 0)
	printf("ERROR: Load-imbalance > tolerance of %g\n",tolerance);
	MPI_Barrier(world);
	if (weight == wtmin) printf(" Proc %d has weight = %g\n",me,weight);
	if (weight == wtmax) printf(" Proc %d has weight = %g\n",me,weight);
	}

	MPI_Barrier(world);

	// check that final set of points is inside RCB box of each proc

	iflag = 0;
	for (i = 0; i < ndot; i++) {
	if (dots[i].x[0] < lo[0] \|\| dots[i].x[0] > hi[0] \|\|
	dots[i].x[1] < lo[1] \|\| dots[i].x[1] > hi[1] \|\|
	dots[i].x[2] < lo[2] \|\| dots[i].x[2] > hi[2])
	iflag++;
	}
	if (iflag > 0)
	printf("ERROR: %d points are out-of-box on proc %d\n",iflag,me);
	}

	// stats for RCB decomposition

	void RCB::stats(int flag)
	{
	int i,iflag,sum,min,max;
	double ave,rsum,rmin,rmax;
	double weight,wttot,wtmin,wtmax;

	if (me == 0) printf("RCB Statistics:\n");

	// distribution info

	for (i = 0, weight = 0.0; i < ndot; i++) weight += dots[i].wt;
	MPI_Allreduce(&weight,&wttot,1,MPI_DOUBLE,MPI_SUM,world);
	MPI_Allreduce(&weight,&wtmin,1,MPI_DOUBLE,MPI_MIN,world);
	MPI_Allreduce(&weight,&wtmax,1,MPI_DOUBLE,MPI_MAX,world);

	if (me == 0) {
	printf(" Total weight of dots = %g\n",wttot);
	printf(" Weight on each proc: ave = %g, max = %g, min = %g\n",
	wttot/nprocs,wtmax,wtmin);
	}
	if (flag) {
	MPI_Barrier(world);
	printf(" Proc %d has weight = %g\n",me,weight);
	}

	for (i = 0, weight = 0.0; i < ndot; i++)
	if (dots[i].wt > weight) weight = dots[i].wt;
	MPI_Allreduce(&weight,&wtmax,1,MPI_DOUBLE,MPI_MAX,world);

	if (me == 0) printf(" Maximum weight of single dot = %g\n",wtmax);
	if (flag) {
	MPI_Barrier(world);
	printf(" Proc %d max weight = %g\n",me,weight);
	}

	// counter info

	MPI_Allreduce(&counters[0],&sum,1,MPI_INT,MPI_SUM,world);
	MPI_Allreduce(&counters[0],&min,1,MPI_INT,MPI_MIN,world);
	MPI_Allreduce(&counters[0],&max,1,MPI_INT,MPI_MAX,world);
	ave = ((double) sum)/nprocs;
	if (me == 0)
	printf(" Median iter: ave = %g, min = %d, max = %d\n",ave,min,max);
	if (flag) {
	MPI_Barrier(world);
	printf(" Proc %d median count = %d\n",me,counters[0]);
	}

	MPI_Allreduce(&counters[1],&sum,1,MPI_INT,MPI_SUM,world);
	MPI_Allreduce(&counters[1],&min,1,MPI_INT,MPI_MIN,world);
	MPI_Allreduce(&counters[1],&max,1,MPI_INT,MPI_MAX,world);
	ave = ((double) sum)/nprocs;
	if (me == 0)
	printf(" Send count: ave = %g, min = %d, max = %d\n",ave,min,max);
	if (flag) {
	MPI_Barrier(world);
	printf(" Proc %d send count = %d\n",me,counters[1]);
	}

	MPI_Allreduce(&counters[2],&sum,1,MPI_INT,MPI_SUM,world);
	MPI_Allreduce(&counters[2],&min,1,MPI_INT,MPI_MIN,world);
	MPI_Allreduce(&counters[2],&max,1,MPI_INT,MPI_MAX,world);
	ave = ((double) sum)/nprocs;
	if (me == 0)
	printf(" Recv count: ave = %g, min = %d, max = %d\n",ave,min,max);
	if (flag) {
	MPI_Barrier(world);
	printf(" Proc %d recv count = %d\n",me,counters[2]);
	}

	MPI_Allreduce(&counters[3],&sum,1,MPI_INT,MPI_SUM,world);
	MPI_Allreduce(&counters[3],&min,1,MPI_INT,MPI_MIN,world);
	MPI_Allreduce(&counters[3],&max,1,MPI_INT,MPI_MAX,world);
	ave = ((double) sum)/nprocs;
	if (me == 0)
	printf(" Max dots: ave = %g, min = %d, max = %d\n",ave,min,max);
	if (flag) {
	MPI_Barrier(world);
	printf(" Proc %d max dots = %d\n",me,counters[3]);
	}

	MPI_Allreduce(&counters[4],&sum,1,MPI_INT,MPI_SUM,world);
	MPI_Allreduce(&counters[4],&min,1,MPI_INT,MPI_MIN,world);
	MPI_Allreduce(&counters[4],&max,1,MPI_INT,MPI_MAX,world);
	ave = ((double) sum)/nprocs;
	if (me == 0)
	printf(" Max memory: ave = %g, min = %d, max = %d\n",ave,min,max);
	if (flag) {
	MPI_Barrier(world);
	printf(" Proc %d max memory = %d\n",me,counters[4]);
	}

	if (reuse) {
	MPI_Allreduce(&counters[5],&sum,1,MPI_INT,MPI_SUM,world);
	MPI_Allreduce(&counters[5],&min,1,MPI_INT,MPI_MIN,world);
	MPI_Allreduce(&counters[5],&max,1,MPI_INT,MPI_MAX,world);
	ave = ((double) sum)/nprocs;
	if (me == 0)
	printf(" # of Reuse: ave = %g, min = %d, max = %d\n",ave,min,max);
	if (flag) {
	MPI_Barrier(world);
	printf(" Proc %d # of Reuse = %d\n",me,counters[5]);
	}
	}

	MPI_Allreduce(&counters[6],&sum,1,MPI_INT,MPI_SUM,world);
	MPI_Allreduce(&counters[6],&min,1,MPI_INT,MPI_MIN,world);
	MPI_Allreduce(&counters[6],&max,1,MPI_INT,MPI_MAX,world);
	ave = ((double) sum)/nprocs;
	if (me == 0)
	printf(" # of OverAlloc: ave = %g, min = %d, max = %d\n",ave,min,max);
	if (flag) {
	MPI_Barrier(world);
	printf(" Proc %d # of OverAlloc = %d\n",me,counters[6]);
	}

	// RCB boxes for each proc

	if (flag) {
	if (me == 0) printf(" RCB sub-domain boxes:\n");
	for (i = 0; i < 3; i++) {
	MPI_Barrier(world);
	if (me == 0) printf(" Dimension %d\n",i+1);
	MPI_Barrier(world);
	printf(" Proc = %d: Box = %g %g\n",me,lo[i],hi[i]);
	}
	}
	}
	*/

rcb.cppNo OneTemporaryActions

File Metadata

rcb.cppView Options

Event Timeline

rcb.cpp
No OneTemporary
Actions

rcb.cpp
View Options