Page MenuHomec4science

domain.c
No OneTemporary

File Metadata

Created
Sat, Jun 1, 18:56

domain.c

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <mpi.h>
#include "allvars.h"
#include "proto.h"
/*! \file domain.c
* \brief code for domain decomposition
*
* This file contains the code for the domain decomposition of the
* simulation volume. The domains are constructed from disjoint subsets
* of the leaves of a fiducial top-level tree that covers the full
* simulation volume. Domain boundaries hence run along tree-node
* divisions of a fiducial global BH tree. As a result of this method, the
* tree force are in principle strictly independent of the way the domains
* are cut. The domain decomposition can be carried out for an arbitrary
* number of CPUs. Individual domains are not cubical, but spatially
* coherent since the leaves are traversed in a Peano-Hilbert order and
* individual domains form segments along this order. This also ensures
* that each domain has a small surface to volume ratio, which minimizes
* communication.
*/
#define TOPNODEFACTOR 20.0
#define REDUC_FAC 0.98
/*! toGo[task*NTask + partner] gives the number of particles in task 'task'
* that have to go to task 'partner'
*/
static int *toGo, *toGoSph;
static int *local_toGo, *local_toGoSph;
static int *list_NumPart;
static int *list_N_gas;
static int *list_load;
static int *list_loadsph;
static double *list_work;
#ifdef STELLAR_PROP
static int *toGoSt;
static int *local_toGoSt;
static int *list_N_stars;
#endif
static long long maxload, maxloadsph;
static struct topnode_exchange
{
peanokey Startkey;
int Count;
}
*toplist, *toplist_local;
/*! This is the main routine for the domain decomposition. It acts as a
* driver routine that allocates various temporary buffers, maps the
* particles back onto the periodic box if needed, and then does the
* domain decomposition, and a final Peano-Hilbert order of all particles
* as a tuning measure.
*/
void domain_Decomposition(void)
{
double t0, t1;
int i;
#ifdef PMGRID
if(All.PM_Ti_endstep == All.Ti_Current)
{
All.NumForcesSinceLastDomainDecomp = 1 + All.TotNumPart * All.TreeDomainUpdateFrequency;
/* to make sure that we do a domain decomposition before the PM-force is evaluated.
this is needed to make sure that the particles are wrapped into the box */
}
#endif
/* Check whether it is really time for a new domain decomposition */
if(All.NumForcesSinceLastDomainDecomp > All.TotNumPart * All.TreeDomainUpdateFrequency)
{
t0 = second();
#ifdef PERIODIC
do_box_wrapping(); /* map the particles back onto the box */
#endif
All.NumForcesSinceLastDomainDecomp = 0;
#ifdef SFR
rearrange_particle_sequence();
#endif
TreeReconstructFlag = 1; /* ensures that new tree will be constructed */
if(ThisTask == 0)
{
printf("start domain decomposition... \n");
fflush(stdout);
}
Key = malloc(sizeof(peanokey) * All.MaxPart);
KeySorted = malloc(sizeof(peanokey) * All.MaxPart);
toGo = malloc(sizeof(int) * NTask * NTask);
toGoSph = malloc(sizeof(int) * NTask * NTask);
local_toGo = malloc(sizeof(int) * NTask);
local_toGoSph = malloc(sizeof(int) * NTask);
list_NumPart = malloc(sizeof(int) * NTask);
list_N_gas = malloc(sizeof(int) * NTask);
list_load = malloc(sizeof(int) * NTask);
list_loadsph = malloc(sizeof(int) * NTask);
list_work = malloc(sizeof(double) * NTask);
MPI_Allgather(&NumPart, 1, MPI_INT, list_NumPart, 1, MPI_INT, MPI_COMM_WORLD);
MPI_Allgather(&N_gas, 1, MPI_INT, list_N_gas, 1, MPI_INT, MPI_COMM_WORLD);
#ifdef STELLAR_PROP
toGoSt = malloc(sizeof(int) * NTask * NTask);
local_toGoSt = malloc(sizeof(int) * NTask);
list_N_stars = malloc(sizeof(int) * NTask);
MPI_Allgather(&N_stars, 1, MPI_INT, list_N_stars, 1, MPI_INT, MPI_COMM_WORLD);
#endif
maxload = All.MaxPart * REDUC_FAC;
maxloadsph = All.MaxPartSph * REDUC_FAC;
#if STELLAR_PROP
#ifdef CHECK_ID_CORRESPONDENCE
if (ThisTask==0)
printf("Check id correspondence before decomposition...\n");
rearrange_particle_sequence();
for(i = N_gas; i < N_gas+N_stars; i++)
{
if( StP[P[i].StPIdx].PIdx != i )
{
printf("\nP/StP correspondance error\n");
printf("(%d) (in domain before) N_stars=%d N_gas=%d i=%d id=%d P[i].StPIdx=%d StP[P[i].StPIdx].PIdx=%d\n\n",ThisTask,N_stars,N_gas,i,P[i].ID,P[i].StPIdx,StP[P[i].StPIdx].PIdx);
endrun(888001);
}
if(StP[P[i].StPIdx].ID != P[i].ID)
{
printf("\nP/StP correspondance error\n");
printf("(%d) (in domain before) N_gas=%d N_stars=%d i=%d Type=%d P.Id=%d P[i].StPIdx=%d StP[P[i].StPIdx].ID=%d \n\n",ThisTask,N_gas,N_stars,i,P[i].Type,P[i].ID, P[i].StPIdx, StP[P[i].StPIdx].ID);
endrun(888002);
}
}
if (ThisTask==0)
printf("Check id correspondence before decomposition done...\n");
#endif
#endif
domain_decompose();
free(list_work);
free(list_loadsph);
free(list_load);
free(list_N_gas);
free(list_NumPart);
free(local_toGoSph);
free(local_toGo);
free(toGoSph);
free(toGo);
#ifdef STELLAR_PROP
free(list_N_stars);
free(local_toGoSt);
free(toGoSt);
#endif
#ifdef STELLAR_PROP
#ifdef CHECK_BLOCK_ORDER
int typenow;
typenow = 0;
for(i = 0; i < NumPart; i++)
{
if ((P[i].Type<typenow)&&(typenow<=ST))
{
printf("\nBlock order error\n");
printf("(%d) i=%d Type=%d typenow=%d\n\n",ThisTask,i,P[i].Type,typenow);
endrun(888003);
}
typenow = P[i].Type;
}
#endif
#endif
#ifdef STELLAR_PROP
#ifdef CHECK_ID_CORRESPONDENCE
if (ThisTask==0)
printf("Check id correspondence after decomposition...\n");
rearrange_particle_sequence();
for(i = N_gas; i < N_gas+N_stars; i++)
{
if( StP[P[i].StPIdx].PIdx != i )
{
printf("\nP/StP correspondance error\n");
printf("(%d) (in domain after) N_stars=%d N_gas=%d i=%d id=%d P[i].StPIdx=%d StP[P[i].StPIdx].PIdx=%d\n\n",ThisTask,N_stars,N_gas,i,P[i].ID,P[i].StPIdx,StP[P[i].StPIdx].PIdx);
endrun(888004);
}
if(StP[P[i].StPIdx].ID != P[i].ID)
{
printf("\nP/StP correspondance error\n");
printf("(%d) (in domain after) N_gas=%d N_stars=%d i=%d Type=%d P.Id=%d P[i].StPIdx=%d StP[P[i].StPIdx].ID=%d \n\n",ThisTask,N_gas,N_stars,i,P[i].Type,P[i].ID, P[i].StPIdx, StP[P[i].StPIdx].ID);
endrun(888005);
}
}
if (ThisTask==0)
printf("Check id correspondence after decomposition done...\n");
#endif
#endif
if(ThisTask == 0)
{
printf("domain decomposition done. \n");
fflush(stdout);
}
t1 = second();
All.CPU_Domain += timediff(t0, t1);
#ifdef PEANOHILBERT
t0 = second();
peano_hilbert_order();
t1 = second();
All.CPU_Peano += timediff(t0, t1);
#endif
free(KeySorted);
free(Key);
}
}
/*! This function carries out the actual domain decomposition for all
* particle types. It will try to balance the work-load for each domain,
* as estimated based on the P[i]-GravCost values. The decomposition will
* respect the maximum allowed memory-imbalance given by the value of
* PartAllocFactor.
*/
void domain_decompose(void)
{
int i, j, status;
int ngrp, task, partner, sendcount, recvcount;
long long sumtogo, sumload;
int maxload, *temp;
double sumwork, maxwork;
#ifdef DETAILED_CPU_DOMAIN
double t0, t1;
#endif
for(i = 0; i < 6; i++)
NtypeLocal[i] = 0;
for(i = 0; i < NumPart; i++)
NtypeLocal[P[i].Type]++;
/* because Ntype[] is of type `long long', we cannot do a simple
* MPI_Allreduce() to sum the total particle numbers
*/
temp = malloc(NTask * 6 * sizeof(int));
MPI_Allgather(NtypeLocal, 6, MPI_INT, temp, 6, MPI_INT, MPI_COMM_WORLD);
for(i = 0; i < 6; i++)
{
Ntype[i] = 0;
for(j = 0; j < NTask; j++)
Ntype[i] += temp[j * 6 + i];
}
free(temp);
#ifndef UNEQUALSOFTENINGS
for(i = 0; i < 6; i++)
if(Ntype[i] > 0)
break;
for(ngrp = i + 1; ngrp < 6; ngrp++)
{
if(Ntype[ngrp] > 0)
if(All.SofteningTable[ngrp] != All.SofteningTable[i])
{
if(ThisTask == 0)
{
fprintf(stdout, "Code was not compiled with UNEQUALSOFTENINGS, but some of the\n");
fprintf(stdout, "softening lengths are unequal nevertheless.\n");
fprintf(stdout, "This is not allowed.\n");
}
endrun(0);
}
}
#endif
/* determine global dimensions of domain grid */
#ifdef DETAILED_CPU_DOMAIN
t0 = second();
#endif
domain_findExtent();
#ifdef DETAILED_CPU_DOMAIN
t1 = second();
All.CPU_Domain_findExtend += timediff(t0, t1);
#endif
#ifdef DETAILED_CPU_DOMAIN
t0 = second();
#endif
domain_determineTopTree();
#ifdef DETAILED_CPU_DOMAIN
t1 = second();
All.CPU_Domain_determineTopTree += timediff(t0, t1);
#endif
/* determine cost distribution in domain grid */
#ifdef DETAILED_CPU_DOMAIN
t0 = second();
#endif
domain_sumCost();
#ifdef DETAILED_CPU_DOMAIN
t1 = second();
All.CPU_Domain_sumCost += timediff(t0, t1);
#endif
/* find the split of the domain grid recursively */
#ifdef DETAILED_CPU_DOMAIN
t0 = second();
#endif
#ifdef SPLIT_DOMAIN_USING_TIME
status = domain_findSplityr(0, NTask, 0, NTopleaves - 1);
#else
status = domain_findSplit(0, NTask, 0, NTopleaves - 1);
#endif
#ifdef DETAILED_CPU_DOMAIN
t1 = second();
All.CPU_Domain_findSplit += timediff(t0, t1);
#endif
if(status != 0)
{
if(ThisTask == 0)
printf("\nNo domain decomposition that stays within memory bounds is possible (status=%d).\n",status);
endrun(0);
}
/* now try to improve the work-load balance of the split */
#ifdef DETAILED_CPU_DOMAIN
t0 = second();
#endif
#ifdef SPLIT_DOMAIN_USING_TIME
domain_shiftSplityr();
#else
domain_shiftSplit();
#endif
#ifdef DETAILED_CPU_DOMAIN
t1 = second();
All.CPU_Domain_shiftSplit += timediff(t0, t1);
#endif
DomainMyStart = DomainStartList[ThisTask];
DomainMyLast = DomainEndList[ThisTask];
if(ThisTask == 0)
{
sumload = maxload = 0;
sumwork = maxwork = 0;
for(i = 0; i < NTask; i++)
{
sumload += list_load[i];
sumwork += list_work[i];
if(list_load[i] > maxload)
maxload = list_load[i];
if(list_work[i] > maxwork)
maxwork = list_work[i];
}
printf("work-load balance=%g memory-balance=%g\n",
maxwork / (sumwork / NTask), maxload / (((double) sumload) / NTask));
}
/* determine for each cpu how many particles have to be shifted to other cpus */
#ifdef DETAILED_CPU_DOMAIN
t0 = second();
#endif
domain_countToGo();
#ifdef DETAILED_CPU_DOMAIN
t1 = second();
All.CPU_Domain_countToGo += timediff(t0, t1);
#endif
#ifdef DETAILED_CPU_DOMAIN
t0 = second();
#endif
for(i = 0, sumtogo = 0; i < NTask * NTask; i++)
sumtogo += toGo[i];
while(sumtogo > 0)
{
if(ThisTask == 0)
{
printf("exchange of %d%09d particles\n", (int) (sumtogo / 1000000000),
(int) (sumtogo % 1000000000));
fflush(stdout);
}
for(ngrp = 1; ngrp < (1 << PTask); ngrp++)
{
for(task = 0; task < NTask; task++)
{
partner = task ^ ngrp;
if(partner < NTask && task < partner)
{
/* treat SPH separately */
if(All.TotN_gas > 0)
{
domain_findExchangeNumbers(task, partner, 0, &sendcount, &recvcount);
list_NumPart[task] += recvcount - sendcount;
list_NumPart[partner] -= recvcount - sendcount;
list_N_gas[task] += recvcount - sendcount;
list_N_gas[partner] -= recvcount - sendcount;
toGo[task * NTask + partner] -= sendcount;
toGo[partner * NTask + task] -= recvcount;
toGoSph[task * NTask + partner] -= sendcount;
toGoSph[partner * NTask + task] -= recvcount;
//if (sendcount>0)
// printf("(%d) gas --> %d (%d)\n",ThisTask,sendcount,N_gas);
//if (recvcount>0)
// printf("(%d) gas <-- %d (%d)\n",ThisTask,recvcount,N_gas);
//printf("(%d) N_gas=%d P[N_gas].ID=%d %d (%d %d)\n",ThisTask,N_gas,P[N_gas].ID,P[N_gas].Type,sendcount,recvcount);
if(task == ThisTask) /* actually carry out the exchange */
domain_exchangeParticles(partner, 0, sendcount, recvcount);
if(partner == ThisTask)
domain_exchangeParticles(task, 0, recvcount, sendcount);
//if (sendcount>0)
// printf("(%d) (after) gas --> %d (%d)\n",ThisTask,sendcount,N_gas);
//if (recvcount>0)
// printf("(%d) (after) gas <-- %d (%d)\n",ThisTask,recvcount,N_gas);
//printf("(%d) (after) N_gas=%d P[N_gas].ID=%d %d (%d %d)\n",ThisTask,N_gas,P[N_gas].ID,P[N_gas].Type,sendcount,recvcount);
}
#ifdef STELLAR_PROP
/* treat STARS separately */
if(All.TotN_stars > 0)
{
domain_findExchangeNumbers(task, partner, 1, &sendcount, &recvcount);
list_NumPart[task] += recvcount - sendcount;
list_NumPart[partner] -= recvcount - sendcount;
list_N_stars[task] += recvcount - sendcount;
list_N_stars[partner] -= recvcount - sendcount;
toGo[task * NTask + partner] -= sendcount;
toGo[partner * NTask + task] -= recvcount;
toGoSt[task * NTask + partner] -= sendcount;
toGoSt[partner * NTask + task] -= recvcount;
//if (sendcount>0)
// printf("(%d) stars --> %d (%d)\n",ThisTask,sendcount,N_stars);
//if (recvcount>0)
// printf("(%d) stars <-- %d (%d)\n",ThisTask,recvcount,N_stars);
if(task == ThisTask) /* actually carry out the exchange */
domain_exchangeParticles(partner, 1, sendcount, recvcount);
if(partner == ThisTask)
domain_exchangeParticles(task, 1, recvcount, sendcount);
//if (sendcount>0)
// printf("(%d) stars --> %d (%d)\n",ThisTask,sendcount,N_stars);
//if (recvcount>0)
// printf("(%d) stars <-- %d (%d)\n",ThisTask,recvcount,N_stars);
}
#endif
domain_findExchangeNumbers(task, partner, -1, &sendcount, &recvcount);
list_NumPart[task] += recvcount - sendcount;
list_NumPart[partner] -= recvcount - sendcount;
toGo[task * NTask + partner] -= sendcount;
toGo[partner * NTask + task] -= recvcount;
if(task == ThisTask) /* actually carry out the exchange */
domain_exchangeParticles(partner, -1, sendcount, recvcount);
if(partner == ThisTask)
domain_exchangeParticles(task, -1, recvcount, sendcount);
}
}
}
for(i = 0, sumtogo = 0; i < NTask * NTask; i++)
sumtogo += toGo[i];
}
#ifdef SFR
#ifndef STELLAR_PROP
/* count the new N_stars : in case STELLAR_PROP is on,
N_stars is already updated during the domain decomposition */
int nstars;
for(i = N_gas, nstars = 0; i < NumPart; i++)
{
if (P[i].Type==ST)
nstars++;
}
N_stars = nstars;
#endif
#endif
#ifdef DETAILED_CPU_DOMAIN
t1 = second();
All.CPU_Domain_exchange += timediff(t0, t1);
#endif
}
/*! This function tries to find a split point in a range of cells in the
* domain-grid. The range of cells starts at 'first', and ends at 'last'
* (inclusively). The number of cpus that holds the range is 'ncpu', with
* the first cpu given by 'cpustart'. If more than 2 cpus are to be split,
* the function calls itself recursively. The division tries to achieve a
* best particle-load balance under the constraint that 'maxload' and
* 'maxloadsph' may not be exceeded, and that each cpu holds at least one
* cell from the domaingrid. If such a decomposition cannot be achieved, a
* non-zero error code is returned.
*
* After successful completion, DomainMyStart[] and DomainMyLast[] contain
* the first and last cell of the domaingrid assigned to the local task
* for the given type. Also, DomainTask[] contains for each cell the task
* it was assigned to.
*/
int domain_findSplit(int cpustart, int ncpu, int first, int last)
{
int i, split, ok_left, ok_right;
long long load, sphload, load_leftOfSplit, sphload_leftOfSplit;
int ncpu_leftOfSplit;
double maxAvgLoad_CurrentSplit, maxAvgLoad_NewSplit;
ncpu_leftOfSplit = ncpu / 2;
for(i = first, load = 0, sphload = 0; i <= last; i++)
{
load += DomainCount[i];
sphload += DomainCountSph[i];
}
split = first + ncpu_leftOfSplit;
for(i = first, load_leftOfSplit = sphload_leftOfSplit = 0; i < split; i++)
{
load_leftOfSplit += DomainCount[i];
sphload_leftOfSplit += DomainCountSph[i];
}
/* find the best split point in terms of work-load balance */
while(split < last - (ncpu - ncpu_leftOfSplit - 1) && split > 0)
{
maxAvgLoad_CurrentSplit =
dmax(load_leftOfSplit / ncpu_leftOfSplit, (load - load_leftOfSplit) / (ncpu - ncpu_leftOfSplit));
maxAvgLoad_NewSplit =
dmax((load_leftOfSplit + DomainCount[split]) / ncpu_leftOfSplit,
(load - load_leftOfSplit - DomainCount[split]) / (ncpu - ncpu_leftOfSplit));
if(maxAvgLoad_NewSplit <= maxAvgLoad_CurrentSplit)
{
load_leftOfSplit += DomainCount[split];
sphload_leftOfSplit += DomainCountSph[split];
split++;
}
else
break;
}
/* we will now have to check whether this solution is possible given the restrictions on the maximum load */
for(i = first, load_leftOfSplit = 0, sphload_leftOfSplit = 0; i < split; i++)
{
load_leftOfSplit += DomainCount[i];
sphload_leftOfSplit += DomainCountSph[i];
}
if(load_leftOfSplit > maxload * ncpu_leftOfSplit ||
(load - load_leftOfSplit) > maxload * (ncpu - ncpu_leftOfSplit))
{
/* we did not find a viable split */
printf("(%d) error -1 ",ThisTask);
return -1;
}
if(sphload_leftOfSplit > maxloadsph * ncpu_leftOfSplit ||
(sphload - sphload_leftOfSplit) > maxloadsph * (ncpu - ncpu_leftOfSplit))
{
/* we did not find a viable split */
printf("(%d) error -2 ",ThisTask);
return -2;
}
if(ncpu_leftOfSplit >= 2)
ok_left = domain_findSplit(cpustart, ncpu_leftOfSplit, first, split - 1);
else
ok_left = 0;
if((ncpu - ncpu_leftOfSplit) >= 2)
ok_right = domain_findSplit(cpustart + ncpu_leftOfSplit, ncpu - ncpu_leftOfSplit, split, last);
else
ok_right = 0;
if(ok_left == 0 && ok_right == 0)
{
/* found a viable split */
if(ncpu_leftOfSplit == 1)
{
for(i = first; i < split; i++)
DomainTask[i] = cpustart;
list_load[cpustart] = load_leftOfSplit;
list_loadsph[cpustart] = sphload_leftOfSplit;
DomainStartList[cpustart] = first;
DomainEndList[cpustart] = split - 1;
}
if((ncpu - ncpu_leftOfSplit) == 1)
{
for(i = split; i <= last; i++)
DomainTask[i] = cpustart + ncpu_leftOfSplit;
list_load[cpustart + ncpu_leftOfSplit] = load - load_leftOfSplit;
list_loadsph[cpustart + ncpu_leftOfSplit] = sphload - sphload_leftOfSplit;
DomainStartList[cpustart + ncpu_leftOfSplit] = split;
DomainEndList[cpustart + ncpu_leftOfSplit] = last;
}
return 0;
}
/* we did not find a viable split */
return -3;
}
#ifdef SPLIT_DOMAIN_USING_TIME
/*! This function tries to find a split point in a range of cells
*/
int domain_findSplityr(int cpustart, int ncpu, int first, int last)
{
int i, split, ok_left, ok_right;
long long load, sphload, load_leftOfSplit, sphload_leftOfSplit;
int ncpu_leftOfSplit;
double maxAvgLoad_CurrentSplit, maxAvgLoad_NewSplit;
/************************************/
/* find the number of part per proc */
/************************************/
double sumCPU_Gravity,meanCPU_Gravity;
double imb;
double ImbFactor;
int NumPartDiff,sumNumPartDiff,DesiredNumPart;
int *list_DesiredNumPart;
MPI_Allreduce(&CPU_Gravity, &sumCPU_Gravity, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
meanCPU_Gravity = sumCPU_Gravity/NTask;
if (sumCPU_Gravity>0)
{
imb = CPU_Gravity/meanCPU_Gravity;
NumPartDiff = (int) ((meanCPU_Gravity-CPU_Gravity)/meanCPU_Gravity * All.TotNumPart/NTask * 0.5);
ImbFactor=0;
}
else
{
imb = 0;
ImbFactor=0;
NumPartDiff=0;
}
//NumPartDiff = (int)NumPart*ImbFactor;
MPI_Reduce(&NumPartDiff, &sumNumPartDiff, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
if (ThisTask==0)
{
NumPartDiff-= sumNumPartDiff;
}
/* check */
MPI_Reduce(&NumPartDiff, &sumNumPartDiff, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
if (ThisTask==0)
if (sumNumPartDiff != 0)
{
printf("we are in trouble here...\n");
endrun(89897676);
}
DesiredNumPart = NumPart + NumPartDiff;
/* create a list */
list_DesiredNumPart= malloc(sizeof(double) * NTask);
MPI_Allgather(&DesiredNumPart, 1, MPI_INT, list_DesiredNumPart, 1, MPI_INT, MPI_COMM_WORLD);
printf("(%04d) Step=%04d Imbalance : %12g %12g %12g %12g %12d %12d %12d\n",ThisTask,All.NumCurrentTiStep,CPU_Gravity,sumCPU_Gravity/NTask, imb ,ImbFactor,NumPart,NumPartDiff,DesiredNumPart);
/************************************/
/* find the splits */
/************************************/
int task;
int domain;
/* loop over all top leaves */
for(task = 0, domain = 0; task < NTask-1; task++)
{
DomainStartList[task] = domain;
list_load[task] =0;
list_loadsph[task]=0;
while( (list_load[task]+DomainCount[domain] < list_DesiredNumPart[task]) || ((list_load[task]+DomainCount[domain] - list_DesiredNumPart[task]) < (list_DesiredNumPart[task]-list_load[task])) )
{
/* add the domain to the task */
list_load[task] += DomainCount[domain];
list_loadsph[task]+= DomainCountSph[domain];
DomainTask[domain] = task;
DomainEndList[task] = domain;
/* move to next domain */
domain++;
if (domain==NTopleaves)
{
printf("not enought domains...\n");
endrun(77665566);
}
}
}
/* now, do the last task */
task = NTask-1;
DomainStartList[task] = domain;
list_load[task] =0;
list_loadsph[task]=0;
for(domain = domain; domain < NTopleaves; domain++)
{
list_load[task] += DomainCount[domain];
list_loadsph[task]+= DomainCountSph[domain];
DomainTask[domain] = task;
DomainEndList[task] = domain;
}
free(list_DesiredNumPart);
return 0;
}
#endif
/*! This function tries to improve the domain decomposition found by
* domain_findSplit() with respect to work-load balance. To this end, the
* boundaries in the existing domain-split solution (which was found by
* trying to balance the particle load) are shifted as long as this leads
* to better work-load while still remaining within the allowed
* memory-imbalance constraints.
*/
void domain_shiftSplit(void)
{
int i, task, iter = 0, moved;
double maxw, newmaxw;
for(task = 0; task < NTask; task++)
list_work[task] = 0;
for(i = 0; i < NTopleaves; i++)
list_work[DomainTask[i]] += DomainWork[i];
#ifndef PY_INTERFACE
if (ThisTask==0)
{
fprintf(FdLog,"1 %12g ",All.Time);
for(task = 0; task < NTask; task++)
fprintf(FdLog,"%12g ",list_work[task]);
fprintf(FdLog,"\n");
fflush(FdLog);
}
#endif
do
{
for(task = 0, moved = 0; task < NTask - 1; task++)
{
maxw = dmax(list_work[task], list_work[task + 1]);
if(list_work[task] < list_work[task + 1])
{
newmaxw = dmax(list_work[task] + DomainWork[DomainStartList[task + 1]],
list_work[task + 1] - DomainWork[DomainStartList[task + 1]]);
if(newmaxw <= maxw)
{
if(list_load[task] + DomainCount[DomainStartList[task + 1]] <= maxload)
{
if(list_loadsph[task] + DomainCountSph[DomainStartList[task + 1]] > maxloadsph)
continue;
/* ok, we can move one domain cell from right to left */
list_work[task] += DomainWork[DomainStartList[task + 1]];
list_load[task] += DomainCount[DomainStartList[task + 1]];
list_loadsph[task] += DomainCountSph[DomainStartList[task + 1]];
list_work[task + 1] -= DomainWork[DomainStartList[task + 1]];
list_load[task + 1] -= DomainCount[DomainStartList[task + 1]];
list_loadsph[task + 1] -= DomainCountSph[DomainStartList[task + 1]];
DomainTask[DomainStartList[task + 1]] = task;
DomainStartList[task + 1] += 1;
DomainEndList[task] += 1;
moved++;
}
}
}
else
{
newmaxw = dmax(list_work[task] - DomainWork[DomainEndList[task]],
list_work[task + 1] + DomainWork[DomainEndList[task]]);
if(newmaxw <= maxw)
{
if(list_load[task + 1] + DomainCount[DomainEndList[task]] <= maxload)
{
if(list_loadsph[task + 1] + DomainCountSph[DomainEndList[task]] > maxloadsph)
continue;
/* ok, we can move one domain cell from left to right */
list_work[task] -= DomainWork[DomainEndList[task]];
list_load[task] -= DomainCount[DomainEndList[task]];
list_loadsph[task] -= DomainCountSph[DomainEndList[task]];
list_work[task + 1] += DomainWork[DomainEndList[task]];
list_load[task + 1] += DomainCount[DomainEndList[task]];
list_loadsph[task + 1] += DomainCountSph[DomainEndList[task]];
DomainTask[DomainEndList[task]] = task + 1;
DomainEndList[task] -= 1;
DomainStartList[task + 1] -= 1;
moved++;
}
}
}
}
iter++;
}
while(moved > 0 && iter < 10 * NTopleaves);
#ifndef PY_INTERFACE
if (ThisTask==0)
{
fprintf(FdLog,"2 %12g ",All.Time);
for(task = 0; task < NTask; task++)
fprintf(FdLog,"%12g ",list_work[task]);
fprintf(FdLog,"\n");
fflush(FdLog);
}
#endif
}
#ifdef SPLIT_DOMAIN_USING_TIME
/*! This function tries to improve the domain decomposition found by
* domain_findSplit() with respect to work-load balance. To this end, the
* boundaries in the existing domain-split solution (which was found by
* trying to balance the particle load) are shifted as long as this leads
* to better work-load while still remaining within the allowed
* memory-imbalance constraints.
*/
void domain_shiftSplityr(void)
{
}
#endif
/*! This function counts how many particles have to be exchanged between
* two CPUs according to the domain split. If the CPUs are already quite
* full and hold data from other CPUs as well, not all the particles may
* be exchanged at once. In this case the communication phase has to be
* repeated, until enough of the third-party particles have been moved
* away such that the decomposition can be completed.
*/
void domain_findExchangeNumbers(int task, int partner, int sphflag, int *send, int *recv)
{
int numpartA, numpartsphA, ntobesentA, maxsendA, maxsendA_old;
int numpartB, numpartsphB, ntobesentB, maxsendB, maxsendB_old;
#ifdef STELLAR_PROP
int numpartstA;
int numpartstB;
#endif
numpartA = list_NumPart[task];
numpartsphA = list_N_gas[task];
#ifdef STELLAR_PROP
numpartstA = list_N_stars[task];
#endif
numpartB = list_NumPart[partner];
numpartsphB = list_N_gas[partner];
#ifdef STELLAR_PROP
numpartstB = list_N_stars[partner];
#endif
switch (sphflag)
{
case 0:
ntobesentA = toGoSph[task * NTask + partner];
ntobesentB = toGoSph[partner * NTask + task];
break;
#ifdef STELLAR_PROP
case 1:
ntobesentA = toGoSt[task * NTask + partner];
ntobesentB = toGoSt[partner * NTask + task];
break;
#endif
default:
#ifdef STELLAR_PROP
ntobesentA = toGo[task * NTask + partner] - toGoSph[task * NTask + partner] - toGoSt[task * NTask + partner];
ntobesentB = toGo[partner * NTask + task] - toGoSph[partner * NTask + task] - toGoSt[partner * NTask + task];
#else
ntobesentA = toGo[task * NTask + partner] - toGoSph[task * NTask + partner];
ntobesentB = toGo[partner * NTask + task] - toGoSph[partner * NTask + task];
#endif
break;
}
maxsendA = imin(ntobesentA, All.BunchSizeDomain);
maxsendB = imin(ntobesentB, All.BunchSizeDomain);
do
{
maxsendA_old = maxsendA;
maxsendB_old = maxsendB;
maxsendA = imin(All.MaxPart - numpartB + maxsendB, maxsendA);
maxsendB = imin(All.MaxPart - numpartA + maxsendA, maxsendB);
}
while((maxsendA != maxsendA_old) || (maxsendB != maxsendB_old));
/* now make also sure that there is enough space for SPH particles */
switch (sphflag)
{
case 0:
do
{
maxsendA_old = maxsendA;
maxsendB_old = maxsendB;
maxsendA = imin(All.MaxPartSph - numpartsphB + maxsendB, maxsendA);
maxsendB = imin(All.MaxPartSph - numpartsphA + maxsendA, maxsendB);
}
while((maxsendA != maxsendA_old) || (maxsendB != maxsendB_old));
break;
#ifdef STELLAR_PROP
case 1:
do
{
maxsendA_old = maxsendA;
maxsendB_old = maxsendB;
maxsendA = imin(All.MaxPartStars - numpartstB + maxsendB, maxsendA);
maxsendB = imin(All.MaxPartStars - numpartstA + maxsendA, maxsendB);
}
while((maxsendA != maxsendA_old) || (maxsendB != maxsendB_old));
break;
#endif
default:
break;
}
*send = maxsendA;
*recv = maxsendB;
}
/*! This function exchanges particles between two CPUs according to the
* domain split. In doing this, the memory boundaries which may restrict
* the exhange process are observed.
*/
void domain_exchangeParticles(int partner, int sphflag, int send_count, int recv_count)
{
int i, no, n, count, rep;
MPI_Status status;
#ifdef STELLAR_PROP
int m;
#endif
for(n = 0, count = 0; count < send_count && n < NumPart; n++)
{
switch (sphflag)
{
case 0:
if(P[n].Type != 0)
continue;
break;
#ifdef STELLAR_PROP
case 1:
if(P[n].Type != 1)
continue;
break;
#endif
default:
#ifdef STELLAR_PROP
if((P[n].Type == 0)||(P[n].Type == 1))
#else
if(P[n].Type == 0)
#endif
continue;
break;
}
no = 0;
while(TopNodes[no].Daughter >= 0)
no = TopNodes[no].Daughter + (Key[n] - TopNodes[no].StartKey) / (TopNodes[no].Size / 8);
no = TopNodes[no].Leaf;
if(DomainTask[no] == partner)
{
switch (sphflag)
{
case 0: /* special reorder routine for SPH particles (need to stay at beginning) */
#ifndef STELLAR_PROP
DomainPartBuf[count] = P[n]; /* copy particle and collect in contiguous memory */
DomainKeyBuf[count] = Key[n];
DomainSphBuf[count] = SphP[n];
P[n] = P[N_gas - 1];
P[N_gas - 1] = P[NumPart - 1];
Key[n] = Key[N_gas - 1];
Key[N_gas - 1] = Key[NumPart - 1];
SphP[n] = SphP[N_gas - 1];
N_gas--;
break;
#else
DomainPartBuf[count] = P[n]; /* copy particle and collect in contiguous memory */
DomainKeyBuf[count] = Key[n];
DomainSphBuf[count] = SphP[n];
P[n] = P[N_gas - 1];
P[N_gas - 1] = P[N_gas+N_stars - 1];
P[N_gas+N_stars - 1] = P[NumPart - 1];
Key[n] = Key[N_gas - 1];
Key[N_gas - 1] = Key[N_gas+N_stars - 1];
Key[N_gas+N_stars - 1] = Key[NumPart - 1];
SphP[n] = SphP[N_gas - 1];
StP[P[N_gas - 1].StPIdx].PIdx = N_gas - 1;
N_gas--;
break;
#endif
#ifdef STELLAR_PROP
case 1:
DomainPartBuf[count] = P[n]; /* copy particle and collect in contiguous memory */
DomainKeyBuf[count] = Key[n];
DomainStBuf[count] = StP[P[n].StPIdx];
m = P[n].StPIdx;
//printf("(%d) sending P .... n=%d id=%d\n",ThisTask,n,P[n].ID);
//printf("(%d) replacing P using .... n=%d id=%d\n",ThisTask,N_gas+N_stars - 1,P[N_gas+N_stars - 1].ID);
//printf("(%d) sending Stp .... n=%d id=%d\n",ThisTask,m,StP[m].ID);
//printf("(%d) replacing StP using .... n=%d id=%d\n",ThisTask,N_stars - 1,StP[N_stars - 1].ID);
P[n] = P[N_gas+N_stars - 1];
StP[P[n].StPIdx].PIdx = n; /* create correct link */
P[N_gas+N_stars - 1] = P[NumPart - 1];
Key[n] = Key[N_gas+N_stars - 1];
Key[N_gas+N_stars - 1] = Key[NumPart - 1];
if (m!=(N_stars - 1))
{
StP[m] = StP[N_stars - 1]; /* replace by the last one -> avoid a hole in Stp */
P[StP[m].PIdx].StPIdx = m; /* create correct link */
}
N_stars--;
break;
#endif
default:
DomainPartBuf[count] = P[n]; /* copy particle and collect in contiguous memory */
DomainKeyBuf[count] = Key[n];
P[n] = P[NumPart - 1];
Key[n] = Key[NumPart - 1];
break;
}
count++;
NumPart--;
n--;
}
}
if(count != send_count)
{
printf("Houston, we got a problem... (sphflag=%d)\n",sphflag);
printf("ThisTask=%d count=%d send_count=%d\n", ThisTask, count, send_count);
endrun(88);
}
/* transmit */
for(rep = 0; rep < 2; rep++)
{
if((rep == 0 && ThisTask < partner) || (rep == 1 && ThisTask > partner))
{
if(send_count > 0)
{
MPI_Ssend(&DomainPartBuf[0], send_count * sizeof(struct particle_data), MPI_BYTE, partner,
TAG_PDATA, MPI_COMM_WORLD);
MPI_Ssend(&DomainKeyBuf[0], send_count * sizeof(peanokey), MPI_BYTE, partner, TAG_KEY,
MPI_COMM_WORLD);
if(sphflag==0)
MPI_Ssend(&DomainSphBuf[0], send_count * sizeof(struct sph_particle_data), MPI_BYTE, partner,
TAG_SPHDATA, MPI_COMM_WORLD);
#ifdef STELLAR_PROP
if(sphflag==ST)
MPI_Ssend(&DomainStBuf[0], send_count * sizeof(struct st_particle_data), MPI_BYTE, partner,
TAG_STDATA, MPI_COMM_WORLD);
#endif
}
}
if((rep == 1 && ThisTask < partner) || (rep == 0 && ThisTask > partner))
{
if(recv_count > 0)
{
switch (sphflag)
{
case 0:
#ifndef STELLAR_PROP
if((NumPart - N_gas) > recv_count)
{
for(i = 0; i < recv_count; i++)
{
P[NumPart + i] = P[N_gas + i];
Key[NumPart + i] = Key[N_gas + i];
}
}
else
{
for(i = NumPart - 1; i >= N_gas; i--)
{
P[i + recv_count] = P[i];
Key[i + recv_count] = Key[i];
}
}
#else
/* A : move elts of last block */
if((NumPart - N_gas - N_stars) > recv_count)
{
for(i = 0; i < recv_count; i++)
{
P[NumPart + i] = P[N_gas + N_stars + i];
Key[NumPart + i] = Key[N_gas + N_stars + i];
}
}
else
{
for(i = NumPart - 1; i >= N_gas + N_stars; i--)
{
P[i + recv_count] = P[i];
Key[i + recv_count] = Key[i];
}
}
/* B : move stars */
if (N_stars > 0)
{
if((N_stars) > recv_count)
{
for(i = 0; i < recv_count; i++)
{
P[N_gas + N_stars + i] = P[N_gas + i];
Key[N_gas + N_stars + i] = Key[N_gas + i];
StP[P[N_gas + N_stars + i].StPIdx].PIdx = N_gas + N_stars + i;
}
}
else
{
for(i = N_gas + N_stars - 1; i >= N_gas; i--)
{
P[i + recv_count] = P[i];
Key[i + recv_count] = Key[i];
StP[P[i + recv_count].StPIdx].PIdx = i + recv_count;
}
}
}
#endif
MPI_Recv(&P[N_gas], recv_count * sizeof(struct particle_data), MPI_BYTE, partner, TAG_PDATA,
MPI_COMM_WORLD, &status);
MPI_Recv(&Key[N_gas], recv_count * sizeof(peanokey), MPI_BYTE, partner, TAG_KEY,
MPI_COMM_WORLD, &status);
MPI_Recv(&SphP[N_gas], recv_count * sizeof(struct sph_particle_data), MPI_BYTE, partner,
TAG_SPHDATA, MPI_COMM_WORLD, &status);
N_gas += recv_count;
break;
#ifdef STELLAR_PROP
case 1:
if((NumPart - N_gas - N_stars) > recv_count)
{
for(i = 0; i < recv_count; i++)
{
P[NumPart + i] = P[N_gas + N_stars + i];
Key[NumPart + i] = Key[N_gas + N_stars + i];
}
}
else
{
for(i = NumPart - 1; i >= N_gas + N_stars; i--)
{
P[i + recv_count] = P[i];
Key[i + recv_count] = Key[i];
}
}
MPI_Recv(&P[N_gas+N_stars], recv_count * sizeof(struct particle_data), MPI_BYTE, partner, TAG_PDATA,
MPI_COMM_WORLD, &status);
MPI_Recv(&Key[N_gas+N_stars], recv_count * sizeof(peanokey), MPI_BYTE, partner, TAG_KEY,
MPI_COMM_WORLD, &status);
MPI_Recv(&StP[N_stars], recv_count * sizeof(struct st_particle_data), MPI_BYTE, partner, TAG_STDATA,
MPI_COMM_WORLD, &status);
/* set right links */
for(i = 0; i < recv_count; i++)
{
P[N_gas + N_stars + i].StPIdx = N_stars + i;
StP[N_stars + i].PIdx = N_gas + N_stars + i;
}
N_stars += recv_count;
break;
#endif
default:
MPI_Recv(&P[NumPart], recv_count * sizeof(struct particle_data), MPI_BYTE, partner,
TAG_PDATA, MPI_COMM_WORLD, &status);
MPI_Recv(&Key[NumPart], recv_count * sizeof(peanokey), MPI_BYTE, partner,
TAG_KEY, MPI_COMM_WORLD, &status);
break;
}
NumPart += recv_count;
}
}
}
}
/*! This function determines how many particles that are currently stored
* on the local CPU have to be moved off according to the domain
* decomposition.
*/
void domain_countToGo(void)
{
int n, no;
for(n = 0; n < NTask; n++)
{
local_toGo[n] = 0;
local_toGoSph[n] = 0;
#ifdef STELLAR_PROP
local_toGoSt[n] = 0;
#endif
}
for(n = 0; n < NumPart; n++)
{
no = 0;
while(TopNodes[no].Daughter >= 0)
no = TopNodes[no].Daughter + (Key[n] - TopNodes[no].StartKey) / (TopNodes[no].Size / 8);
no = TopNodes[no].Leaf;
if(DomainTask[no] != ThisTask)
{
local_toGo[DomainTask[no]] += 1;
if(P[n].Type == 0)
local_toGoSph[DomainTask[no]] += 1;
#ifdef STELLAR_PROP
if(P[n].Type == 1)
local_toGoSt[DomainTask[no]] += 1;
#endif
}
}
MPI_Allgather(local_toGo, NTask, MPI_INT, toGo, NTask, MPI_INT, MPI_COMM_WORLD);
MPI_Allgather(local_toGoSph, NTask, MPI_INT, toGoSph, NTask, MPI_INT, MPI_COMM_WORLD);
#ifdef STELLAR_PROP
MPI_Allgather(local_toGoSt, NTask, MPI_INT, toGoSt, NTask, MPI_INT, MPI_COMM_WORLD);
#endif
}
/*! This function walks the global top tree in order to establish the
* number of leaves it has. These leaves are distributed to different
* processors.
*/
void domain_walktoptree(int no)
{
int i;
if(TopNodes[no].Daughter == -1)
{
TopNodes[no].Leaf = NTopleaves;
NTopleaves++;
}
else
{
for(i = 0; i < 8; i++)
domain_walktoptree(TopNodes[no].Daughter + i);
}
}
/*! This routine bins the particles onto the domain-grid, i.e. it sums up the
* total number of particles and the total amount of work in each of the
* domain-cells. This information forms the basis for the actual decision on
* the adopted domain decomposition.
*/
void domain_sumCost(void)
{
int i, n, no;
double *local_DomainWork;
int *local_DomainCount;
int *local_DomainCountSph;
local_DomainWork = malloc(NTopnodes * sizeof(double));
local_DomainCount = malloc(NTopnodes * sizeof(int));
local_DomainCountSph = malloc(NTopnodes * sizeof(int));
NTopleaves = 0;
domain_walktoptree(0);
for(i = 0; i < NTopleaves; i++)
{
local_DomainWork[i] = 0;
local_DomainCount[i] = 0;
local_DomainCountSph[i] = 0;
}
if(ThisTask == 0)
printf("NTopleaves= %d\n", NTopleaves);
for(n = 0; n < NumPart; n++)
{
no = 0;
while(TopNodes[no].Daughter >= 0)
no = TopNodes[no].Daughter + (Key[n] - TopNodes[no].StartKey) / (TopNodes[no].Size / 8);
no = TopNodes[no].Leaf;
if(P[n].Ti_endstep > P[n].Ti_begstep)
local_DomainWork[no] += (1.0 + P[n].GravCost) / (P[n].Ti_endstep - P[n].Ti_begstep);
else
local_DomainWork[no] += (1.0 + P[n].GravCost);
local_DomainCount[no] += 1;
if(P[n].Type == 0)
local_DomainCountSph[no] += 1;
}
MPI_Allreduce(local_DomainWork, DomainWork, NTopleaves, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
MPI_Allreduce(local_DomainCount, DomainCount, NTopleaves, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
MPI_Allreduce(local_DomainCountSph, DomainCountSph, NTopleaves, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
free(local_DomainCountSph);
free(local_DomainCount);
free(local_DomainWork);
}
/*! This routine finds the extent of the global domain grid.
*/
void domain_findExtent(void)
{
int i, j;
double len, xmin[3], xmax[3], xmin_glob[3], xmax_glob[3];
/* determine local extension */
for(j = 0; j < 3; j++)
{
xmin[j] = MAX_REAL_NUMBER;
xmax[j] = -MAX_REAL_NUMBER;
}
for(i = 0; i < NumPart; i++)
{
for(j = 0; j < 3; j++)
{
if(xmin[j] > P[i].Pos[j])
xmin[j] = P[i].Pos[j];
if(xmax[j] < P[i].Pos[j])
xmax[j] = P[i].Pos[j];
}
}
MPI_Allreduce(xmin, xmin_glob, 3, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD);
MPI_Allreduce(xmax, xmax_glob, 3, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
len = 0;
for(j = 0; j < 3; j++)
if(xmax_glob[j] - xmin_glob[j] > len)
len = xmax_glob[j] - xmin_glob[j];
len *= 1.001;
#ifdef DOMAIN_AT_ORIGIN
for(j = 0; j < 3; j++)
{
DomainCenter[j] = 0.0;
DomainCorner[j] = - 0.5 * len;
}
#else
for(j = 0; j < 3; j++)
{
DomainCenter[j] = 0.5 * (xmin_glob[j] + xmax_glob[j]);
DomainCorner[j] = 0.5 * (xmin_glob[j] + xmax_glob[j]) - 0.5 * len;
}
#endif
DomainLen = len;
DomainFac = 1.0 / len * (((peanokey) 1) << (BITS_PER_DIMENSION));
#ifdef OTHERINFO
if(ThisTask == 0)
{
printf("xmin_glob = %g %g %g\n",xmin_glob[0],xmin_glob[1],xmin_glob[2]);
printf("xmax_glob = %g %g %g\n",xmax_glob[0],xmax_glob[1],xmax_glob[2]);
printf("DomainCenter = %g %g %g\n",DomainCenter[0], DomainCenter[1], DomainCenter[2]);
printf("DomainCorner = %g %g %g\n",DomainCorner[0], DomainCorner[1], DomainCorner[2]);
printf("DomainLen = %g\n",DomainLen);
}
#endif
}
/*! This function constructs the global top-level tree node that is used
* for the domain decomposition. This is done by considering the string of
* Peano-Hilbert keys for all particles, which is recursively chopped off
* in pieces of eight segments until each segment holds at most a certain
* number of particles.
*/
void domain_determineTopTree(void)
{
int i, ntop_local, ntop;
int *ntopnodelist, *ntopoffset;
for(i = 0; i < NumPart; i++)
{
KeySorted[i] = Key[i] = peano_hilbert_key((P[i].Pos[0] - DomainCorner[0]) * DomainFac,
(P[i].Pos[1] - DomainCorner[1]) * DomainFac,
(P[i].Pos[2] - DomainCorner[2]) * DomainFac,
BITS_PER_DIMENSION);
}
qsort(KeySorted, NumPart, sizeof(peanokey), domain_compare_key);
NTopnodes = 1;
TopNodes[0].Daughter = -1;
TopNodes[0].Size = PEANOCELLS;
TopNodes[0].StartKey = 0;
TopNodes[0].Count = NumPart;
TopNodes[0].Pstart = 0;
domain_topsplit_local(0, 0);
toplist_local = malloc(NTopnodes * sizeof(struct topnode_exchange));
for(i = 0, ntop_local = 0; i < NTopnodes; i++)
{
if(TopNodes[i].Daughter == -1) /* only use leaves */
{
toplist_local[ntop_local].Startkey = TopNodes[i].StartKey;
toplist_local[ntop_local].Count = TopNodes[i].Count;
ntop_local++;
}
}
ntopnodelist = malloc(sizeof(int) * NTask);
ntopoffset = malloc(sizeof(int) * NTask);
MPI_Allgather(&ntop_local, 1, MPI_INT, ntopnodelist, 1, MPI_INT, MPI_COMM_WORLD);
for(i = 0, ntop = 0, ntopoffset[0] = 0; i < NTask; i++)
{
ntop += ntopnodelist[i];
if(i > 0)
ntopoffset[i] = ntopoffset[i - 1] + ntopnodelist[i - 1];
}
toplist = malloc(ntop * sizeof(struct topnode_exchange));
for(i = 0; i < NTask; i++)
{
ntopnodelist[i] *= sizeof(struct topnode_exchange);
ntopoffset[i] *= sizeof(struct topnode_exchange);
}
MPI_Allgatherv(toplist_local, ntop_local * sizeof(struct topnode_exchange), MPI_BYTE,
toplist, ntopnodelist, ntopoffset, MPI_BYTE, MPI_COMM_WORLD);
qsort(toplist, ntop, sizeof(struct topnode_exchange), domain_compare_toplist);
NTopnodes = 1;
TopNodes[0].Daughter = -1;
TopNodes[0].Size = PEANOCELLS;
TopNodes[0].StartKey = 0;
TopNodes[0].Count = All.TotNumPart;
TopNodes[0].Pstart = 0;
TopNodes[0].Blocks = ntop;
domain_topsplit(0, 0);
free(toplist);
free(ntopoffset);
free(ntopnodelist);
free(toplist_local);
}
/*! This function is responsible for constructing the local top-level
* Peano-Hilbert segments. A segment is cut into 8 pieces recursively
* until the number of particles in the segment has fallen below
* All.TotNumPart / (TOPNODEFACTOR * NTask * NTask).
*/
void domain_topsplit_local(int node, peanokey startkey)
{
int i, p, sub, bin;
if(TopNodes[node].Size >= 8)
{
TopNodes[node].Daughter = NTopnodes;
for(i = 0; i < 8; i++)
{
if(NTopnodes < MAXTOPNODES)
{
sub = TopNodes[node].Daughter + i;
TopNodes[sub].Size = TopNodes[node].Size / 8;
TopNodes[sub].Count = 0;
TopNodes[sub].Daughter = -1;
TopNodes[sub].StartKey = startkey + i * TopNodes[sub].Size;
TopNodes[sub].Pstart = TopNodes[node].Pstart;
NTopnodes++;
}
else
{
printf("task=%d: We are out of Topnodes. Increasing the constant MAXTOPNODES might help.\n",
ThisTask);
fflush(stdout);
endrun(13213);
}
}
for(p = TopNodes[node].Pstart; p < TopNodes[node].Pstart + TopNodes[node].Count; p++)
{
bin = (KeySorted[p] - startkey) / (TopNodes[node].Size / 8);
if(bin < 0 || bin > 7)
{
printf("task=%d: something odd has happened here. bin=%d\n", ThisTask, bin);
fflush(stdout);
endrun(13123123);
}
sub = TopNodes[node].Daughter + bin;
if(TopNodes[sub].Count == 0)
TopNodes[sub].Pstart = p;
TopNodes[sub].Count++;
}
for(i = 0; i < 8; i++)
{
sub = TopNodes[node].Daughter + i;
//if(TopNodes[sub].Count > All.TotNumPart / (TOPNODEFACTOR * NTask * NTask))
if(TopNodes[sub].Count > All.TotNumPart / (TOPNODEFACTOR * 8 * NTask))
domain_topsplit_local(sub, TopNodes[sub].StartKey);
}
}
}
/*! This function is responsible for constructing the global top-level tree
* segments. Starting from a joint list of all local top-level segments,
* in which mulitple occurences of the same spatial segment have been
* combined, a segment is subdivided into 8 pieces recursively until the
* number of particles in each segment has fallen below All.TotNumPart /
* (TOPNODEFACTOR * NTask).
*/
void domain_topsplit(int node, peanokey startkey)
{
int i, p, sub, bin;
if(TopNodes[node].Size >= 8)
{
TopNodes[node].Daughter = NTopnodes;
for(i = 0; i < 8; i++)
{
if(NTopnodes < MAXTOPNODES)
{
sub = TopNodes[node].Daughter + i;
TopNodes[sub].Size = TopNodes[node].Size / 8;
TopNodes[sub].Count = 0;
TopNodes[sub].Blocks = 0;
TopNodes[sub].Daughter = -1;
TopNodes[sub].StartKey = startkey + i * TopNodes[sub].Size;
TopNodes[sub].Pstart = TopNodes[node].Pstart;
NTopnodes++;
}
else
{
printf("Task=%d: We are out of Topnodes. Increasing the constant MAXTOPNODES might help.\n",
ThisTask);
fflush(stdout);
endrun(137213);
}
}
for(p = TopNodes[node].Pstart; p < TopNodes[node].Pstart + TopNodes[node].Blocks; p++)
{
bin = (toplist[p].Startkey - startkey) / (TopNodes[node].Size / 8);
sub = TopNodes[node].Daughter + bin;
if(bin < 0 || bin > 7)
endrun(77);
if(TopNodes[sub].Blocks == 0)
TopNodes[sub].Pstart = p;
TopNodes[sub].Count += toplist[p].Count;
TopNodes[sub].Blocks++;
}
for(i = 0; i < 8; i++)
{
sub = TopNodes[node].Daughter + i;
if(TopNodes[sub].Count > All.TotNumPart / (TOPNODEFACTOR * NTask))
domain_topsplit(sub, TopNodes[sub].StartKey);
}
}
}
/*! This is a comparison kernel used in a sort routine.
*/
int domain_compare_toplist(const void *a, const void *b)
{
if(((struct topnode_exchange *) a)->Startkey < (((struct topnode_exchange *) b)->Startkey))
return -1;
if(((struct topnode_exchange *) a)->Startkey > (((struct topnode_exchange *) b)->Startkey))
return +1;
return 0;
}
/*! This is a comparison kernel used in a sort routine.
*/
int domain_compare_key(const void *a, const void *b)
{
if(*(peanokey *) a < *(peanokey *) b)
return -1;
if(*(peanokey *) a > *(peanokey *) b)
return +1;
return 0;
}

Event Timeline